Home | History | Annotate | Download | only in AArch64
      1 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
      2 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
      3 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE
      4 
      5 ; CodeGenPrepare should move the zext into the block with the load
      6 ; so that SelectionDAG can select it with the load.
      7 ;
      8 ; OPTALL-LABEL: @foo
      9 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
     10 ; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
     11 ; OPTALL: store i32 [[ZEXT]], i32* %q
     12 ; OPTALL: ret
     13 define void @foo(i8* %p, i32* %q) {
     14 entry:
     15   %t = load i8, i8* %p
     16   %a = icmp slt i8 %t, 20
     17   br i1 %a, label %true, label %false
     18 true:
     19   %s = zext i8 %t to i32
     20   store i32 %s, i32* %q
     21   ret void
     22 false:
     23   ret void
     24 }
     25 
     26 ; Check that we manage to form a zextload is an operation with only one
     27 ; argument to explicitly extend is in the way.
     28 ; OPTALL-LABEL: @promoteOneArg
     29 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
     30 ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
     31 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
     32 ; Make sure the operation is not promoted when the promotion pass is disabled.
     33 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
     34 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
     35 ; OPTALL: store i32 [[RES]], i32* %q
     36 ; OPTALL: ret
     37 define void @promoteOneArg(i8* %p, i32* %q) {
     38 entry:
     39   %t = load i8, i8* %p
     40   %add = add nuw i8 %t, 2
     41   %a = icmp slt i8 %t, 20
     42   br i1 %a, label %true, label %false
     43 true:
     44   %s = zext i8 %add to i32
     45   store i32 %s, i32* %q
     46   ret void
     47 false:
     48   ret void
     49 }
     50 
     51 ; Check that we manage to form a sextload is an operation with only one
     52 ; argument to explicitly extend is in the way.
     53 ; Version with sext.
     54 ; OPTALL-LABEL: @promoteOneArgSExt
     55 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
     56 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
     57 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
     58 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
     59 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
     60 ; OPTALL: store i32 [[RES]], i32* %q
     61 ; OPTALL: ret
     62 define void @promoteOneArgSExt(i8* %p, i32* %q) {
     63 entry:
     64   %t = load i8, i8* %p
     65   %add = add nsw i8 %t, 2
     66   %a = icmp slt i8 %t, 20
     67   br i1 %a, label %true, label %false
     68 true:
     69   %s = sext i8 %add to i32
     70   store i32 %s, i32* %q
     71   ret void
     72 false:
     73   ret void
     74 }
     75 
     76 ; Check that we manage to form a zextload is an operation with two
     77 ; arguments to explicitly extend is in the way.
     78 ; Extending %add will create two extensions:
     79 ; 1. One for %b.
     80 ; 2. One for %t.
     81 ; #1 will not be removed as we do not know anything about %b.
     82 ; #2 may not be merged with the load because %t is used in a comparison.
     83 ; Since two extensions may be emitted in the end instead of one before the
     84 ; transformation, the regular heuristic does not apply the optimization.
     85 ;
     86 ; OPTALL-LABEL: @promoteTwoArgZext
     87 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
     88 ;
     89 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
     90 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
     91 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
     92 ;
     93 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
     94 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
     95 ;
     96 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
     97 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
     98 ;
     99 ; OPTALL: store i32 [[RES]], i32* %q
    100 ; OPTALL: ret
    101 define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
    102 entry:
    103   %t = load i8, i8* %p
    104   %add = add nuw i8 %t, %b
    105   %a = icmp slt i8 %t, 20
    106   br i1 %a, label %true, label %false
    107 true:
    108   %s = zext i8 %add to i32
    109   store i32 %s, i32* %q
    110   ret void
    111 false:
    112   ret void
    113 }
    114 
    115 ; Check that we manage to form a sextload is an operation with two
    116 ; arguments to explicitly extend is in the way.
    117 ; Version with sext.
    118 ; OPTALL-LABEL: @promoteTwoArgSExt
    119 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    120 ;
    121 ; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
    122 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
    123 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
    124 ;
    125 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
    126 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
    127 ;
    128 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
    129 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
    130 ; OPTALL: store i32 [[RES]], i32* %q
    131 ; OPTALL: ret
    132 define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
    133 entry:
    134   %t = load i8, i8* %p
    135   %add = add nsw i8 %t, %b
    136   %a = icmp slt i8 %t, 20
    137   br i1 %a, label %true, label %false
    138 true:
    139   %s = sext i8 %add to i32
    140   store i32 %s, i32* %q
    141   ret void
    142 false:
    143   ret void
    144 }
    145 
    146 ; Check that we do not a zextload if we need to introduce more than
    147 ; one additional extension.
    148 ; OPTALL-LABEL: @promoteThreeArgZext
    149 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    150 ;
    151 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    152 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
    153 ; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
    154 ; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
    155 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
    156 ;
    157 ; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
    158 ; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
    159 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
    160 ;
    161 ; DISABLE: add nuw i8
    162 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
    163 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
    164 ;
    165 ; OPTALL: store i32 [[RES]], i32* %q
    166 ; OPTALL: ret
    167 define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
    168 entry:
    169   %t = load i8, i8* %p
    170   %tmp = add nuw i8 %t, %b
    171   %add = add nuw i8 %tmp, %c
    172   %a = icmp slt i8 %t, 20
    173   br i1 %a, label %true, label %false
    174 true:
    175   %s = zext i8 %add to i32
    176   store i32 %s, i32* %q
    177   ret void
    178 false:
    179   ret void
    180 }
    181 
    182 ; Check that we manage to form a zextload after promoting and merging
    183 ; two extensions.
    184 ; OPTALL-LABEL: @promoteMergeExtArgZExt
    185 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    186 ;
    187 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    188 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
    189 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
    190 ;
    191 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
    192 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
    193 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
    194 ;
    195 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
    196 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
    197 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
    198 ;
    199 ; OPTALL: store i32 [[RES]], i32* %q
    200 ; OPTALL: ret
    201 define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
    202 entry:
    203   %t = load i8, i8* %p
    204   %ext = zext i8 %t to i16
    205   %add = add nuw i16 %ext, %b
    206   %a = icmp slt i8 %t, 20
    207   br i1 %a, label %true, label %false
    208 true:
    209   %s = zext i16 %add to i32
    210   store i32 %s, i32* %q
    211   ret void
    212 false:
    213   ret void
    214 }
    215 
    216 ; Check that we manage to form a sextload after promoting and merging
    217 ; two extensions.
    218 ; Version with sext.
    219 ; OPTALL-LABEL: @promoteMergeExtArgSExt
    220 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    221 ;
    222 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    223 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
    224 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
    225 ;
    226 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
    227 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
    228 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
    229 ;
    230 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
    231 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
    232 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
    233 ; OPTALL: store i32 [[RES]], i32* %q
    234 ; OPTALL: ret
    235 define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
    236 entry:
    237   %t = load i8, i8* %p
    238   %ext = zext i8 %t to i16
    239   %add = add nsw i16 %ext, %b
    240   %a = icmp slt i8 %t, 20
    241   br i1 %a, label %true, label %false
    242 true:
    243   %s = sext i16 %add to i32
    244   store i32 %s, i32* %q
    245   ret void
    246 false:
    247   ret void
    248 }
    249 
    250 ; Check that we manage to catch all the extload opportunities that are exposed
    251 ; by the different iterations of codegen prepare.
    252 ; Moreover, check that we do not promote more than we need to.
    253 ; Here is what is happening in this test (not necessarly in this order):
    254 ; 1. We try to promote the operand of %sextadd.
    255 ;    a. This creates one sext of %ld2 and one of %zextld
    256 ;    b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
    257 ;       introduced one. This is fine with the current heuristic: neutral.
    258 ;    => We have one zext of %zextld left and we created one sext of %ld2.
    259 ; 2. We try to promote the operand of %sextaddza.
    260 ;    a. This creates one sext of %zexta and one of %zextld
    261 ;    b. The sext of %zexta can be combined with the zext of %a.
    262 ;    c. The sext of %zextld leads to %ld and can be combined with it. This is
    263 ;       done by promoting %zextld. This is fine with the current heuristic:
    264 ;       neutral.
    265 ;    => We have created a new zext of %ld and we created one sext of %zexta.
    266 ; 3. We try to promote the operand of %sextaddb.
    267 ;    a. This creates one sext of %b and one of %zextld
    268 ;    b. The sext of %b is a dead-end, nothing to be done.
    269 ;    c. Same thing as 2.c. happens.
    270 ;    => We have created a new zext of %ld and we created one sext of %b.
    271 ; 4. We try to promote the operand of the zext of %zextld introduced in #1.
    272 ;    a. Same thing as 2.c. happens.
    273 ;    b. %zextld does not have any other uses. It is dead coded.
    274 ;    => We have created a new zext of %ld and we removed a zext of %zextld and
    275 ;       a zext of %ld.
    276 ; Currently we do not try to reuse existing extensions, so in the end we have
    277 ; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
    278 ;
    279 ; OPTALL-LABEL: @severalPromotions
    280 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
    281 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    282 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    283 ; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
    284 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
    285 ; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    286 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]]
    287 ; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64
    288 ; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]]
    289 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
    290 ; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]]
    291 ;
    292 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
    293 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADD]] to i64
    294 ; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
    295 ; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDZA]] to i64
    296 ; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
    297 ; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDB]] to i64
    298 ;
    299 ; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
    300 ; OPTALL: ret
    301 define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
    302   %ld = load i8, i8* %addr1
    303   %zextld = zext i8 %ld to i32
    304   %ld2 = load i32, i32* %addr2
    305   %add = add nsw i32 %ld2, %zextld
    306   %sextadd = sext i32 %add to i64
    307   %zexta = zext i8 %a to i32
    308   %addza = add nsw i32 %zexta, %zextld
    309   %sextaddza = sext i32 %addza to i64
    310   %addb = add nsw i32 %b, %zextld
    311   %sextaddb = sext i32 %addb to i64
    312   call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
    313   ret void
    314 }
    315 
    316 declare void @dummy(i64, i64, i64)
    317 
    318 ; Make sure we do not try to promote vector types since the type promotion
    319 ; helper does not support them for now.
    320 ; OPTALL-LABEL: @vectorPromotion
    321 ; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
    322 ; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
    323 ; OPTALL: ret
    324 define void @vectorPromotion() {
    325 entry:
    326   %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
    327   %b = zext <2 x i32> %a to <2 x i64>
    328   ret void
    329 }
    330 
    331 @a = common global i32 0, align 4
    332 @c = common global [2 x i32] zeroinitializer, align 4
    333 
    334 ; Make sure we support promotion of operands that produces a Value as opposed
    335 ; to an instruction.
    336 ; This used to cause a crash.
    337 ; OPTALL-LABEL: @promotionOfArgEndsUpInValue
    338 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
    339 ;
    340 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
    341 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
    342 ;
    343 ; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
    344 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
    345 ;
    346 ; OPTALL-NEXT: ret i32 [[RES]]
    347 define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
    348 entry:
    349   %val = load i16, i16* %addr
    350   %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
    351   %conv3 = sext i16 %add to i32
    352   ret i32 %conv3
    353 }
    354 
    355 ; Check that we see that one zext can be derived from the other for free.
    356 ; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
    357 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    358 ;
    359 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    360 ; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    361 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
    362 ; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
    363 ; OPT-NEXT: store i32 [[RES32]], i32* %addr
    364 ; OPT-NEXT: store i64 [[RES64]], i64* %q
    365 ;
    366 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    367 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
    368 ; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
    369 ; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
    370 ; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
    371 ; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
    372 ;
    373 ; OPTALL-NEXT: ret void
    374 define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
    375 entry:
    376   %t = load i8, i8* %p
    377   %zextt = zext i8 %t to i32
    378   %add = add nuw i32 %zextt, %b
    379   %add2 = add nuw i32 %zextt, 12
    380   store i32 %add, i32 *%addr
    381   %s = zext i32 %add2 to i64
    382   store i64 %s, i64* %q
    383   ret void
    384 }
    385 
    386 ; Check that we do not increase the cost of the code.
    387 ; The input has one free zext and one free sext. If we would have promoted
    388 ; all the way through the load we would end up with a free zext and a
    389 ; non-free sext (of %b).
    390 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode
    391 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    392 ;
    393 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    394 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
    395 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
    396 ; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
    397 ;
    398 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    399 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    400 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    401 ;
    402 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    403 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    404 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    405 ;
    406 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
    407 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
    408 ; OPTALL-NEXT: ret void
    409 define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
    410 entry:
    411   %t = load i8, i8* %p
    412   %zextt = zext i8 %t to i32
    413   %add = add nsw i32 %zextt, %b
    414   %idx64 = sext i32 %add to i64
    415   %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
    416   store i32 %add, i32 *%staddr
    417   ret void
    418 }
    419 
    420 ; Check that we do not increase the cost of the code.
    421 ; The input has one free zext and one free sext. If we would have promoted
    422 ; all the way through the load we would end up with a free zext and a
    423 ; non-free sext (of %b).
    424 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64
    425 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    426 ;
    427 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    428 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
    429 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
    430 ;
    431 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    432 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    433 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    434 ;
    435 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    436 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    437 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    438 ;
    439 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]]
    440 ; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]]
    441 ; OPTALL-NEXT: ret void
    442 define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) {
    443 entry:
    444   %t = load i8, i8* %p
    445   %zextt = zext i8 %t to i32
    446   %add = add nsw i32 %zextt, %b
    447   %idx64 = sext i32 %add to i64
    448   %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64
    449   store i64 %stuff, i64 *%staddr
    450   ret void
    451 }
    452 
    453 ; Check that we do not increase the cost of the code.
    454 ; The input has one free zext and one free sext. If we would have promoted
    455 ; all the way through the load we would end up with a free zext and a
    456 ; non-free sext (of %b).
    457 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
    458 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    459 ;
    460 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    461 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
    462 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
    463 ;
    464 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    465 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    466 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    467 ;
    468 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    469 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    470 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    471 ;
    472 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]]
    473 ; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]]
    474 ; OPTALL-NEXT: ret void
    475 define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) {
    476 entry:
    477   %t = load i8, i8* %p
    478   %zextt = zext i8 %t to i32
    479   %add = add nsw i32 %zextt, %b
    480   %idx64 = sext i32 %add to i64
    481   %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64
    482   store i128 %stuff, i128 *%staddr
    483   ret void
    484 }
    485 
    486 
    487 ; Check that we do not increase the cost of the code.
    488 ; The input has one free zext and one free sext. If we would have promoted
    489 ; all the way through the load we would end up with a free zext and a
    490 ; non-free sext (of %b).
    491 ; OPTALL-LABEL: @promoteSExtFromAddrMode256
    492 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    493 ;
    494 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    495 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
    496 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
    497 ;
    498 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    499 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    500 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    501 ;
    502 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]]
    503 ; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]]
    504 ; OPTALL-NEXT: ret void
    505 define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) {
    506 entry:
    507   %t = load i8, i8* %p
    508   %zextt = zext i8 %t to i32
    509   %add = add nsw i32 %zextt, %b
    510   %idx64 = sext i32 %add to i64
    511   %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64
    512   store i256 %stuff, i256 *%staddr
    513   ret void
    514 }
    515 
    516 ; Check that we do not increase the cost of the code.
    517 ; The input has one free zext and one free zext.
    518 ; When we promote all the way through the load, we end up with
    519 ; a free zext and a non-free zext (of %b).
    520 ; However, the current target lowering says zext i32 to i64 is free
    521 ; so the promotion happens because the cost did not change and may
    522 ; expose more opportunities.
    523 ; This would need to be fixed at some point.
    524 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode
    525 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    526 ;
    527 ; This transformation should really happen only for stress mode.
    528 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    529 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
    530 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
    531 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
    532 ;
    533 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    534 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
    535 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
    536 ;
    537 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
    538 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
    539 ; OPTALL-NEXT: ret void
    540 define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
    541 entry:
    542   %t = load i8, i8* %p
    543   %zextt = zext i8 %t to i32
    544   %add = add nuw i32 %zextt, %b
    545   %idx64 = zext i32 %add to i64
    546   %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
    547   store i32 %add, i32 *%staddr
    548   ret void
    549 }
    550 
    551 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift
    552 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    553 ;
    554 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    555 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
    556 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
    557 ;
    558 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    559 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    560 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    561 ;
    562 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    563 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
    564 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
    565 ;
    566 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
    567 ; OPTALL-NEXT: ret i64 %staddr
    568 define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) {
    569 entry:
    570   %t = load i8, i8* %p
    571   %zextt = zext i8 %t to i32
    572   %add = add nsw i32 %zextt, %b
    573   %idx64 = sext i32 %add to i64
    574   %staddr = shl i64 %idx64, 12
    575   ret i64 %staddr
    576 }
    577 
    578 ; Same comment as doNotPromoteFreeZExtFromAddrMode.
    579 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift
    580 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
    581 ;
    582 ; This transformation should really happen only for stress mode.
    583 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
    584 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
    585 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
    586 ;
    587 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
    588 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
    589 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
    590 ;
    591 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
    592 ; OPTALL-NEXT: ret i64 %staddr
    593 define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) {
    594 entry:
    595   %t = load i8, i8* %p
    596   %zextt = zext i8 %t to i32
    597   %add = add nuw i32 %zextt, %b
    598   %idx64 = zext i32 %add to i64
    599   %staddr = shl i64 %idx64, 12
    600   ret i64 %staddr
    601 }
    602 
    603 ; The input has one free zext and one non-free sext.
    604 ; When we promote all the way through to the load, we end up with
    605 ; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
    606 ; However, we when generate load pair and the free sext(%ld1) becomes
    607 ; non-free. So technically, we trade a non-free sext to two non-free
    608 ; sext.
    609 ; This would need to be fixed at some point.
    610 ; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad
    611 ; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p
    612 ; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1
    613 ; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]]
    614 ;
    615 ; This transformation should really happen only for stress mode.
    616 ; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64
    617 ; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64
    618 ; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]]
    619 ;
    620 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst
    621 ; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64
    622 ;
    623 ; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64
    624 ; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
    625 ; OPTALL-NEXT: ret i64 [[FINAL]]
    626 define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) {
    627   %ld0 = load i32, i32* %p
    628   %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1
    629   %ld1 = load i32, i32* %idxLd1
    630   %res = add nsw i32 %ld1, %cst
    631   %sextres = sext i32 %res to i64
    632   %zextLd0 = zext i32 %ld0 to i64
    633   %final = add i64 %sextres, %zextLd0
    634   ret i64 %final
    635 }
    636 
    637 define i64 @promoteZextShl(i1 %c, i16* %P) {
    638 entry:
    639 ; OPTALL-LABEL: promoteZextShl
    640 ; OPTALL: entry:
    641 ; OPT: %[[LD:.*]] = load i16, i16* %P
    642 ; OPT: %[[EXT:.*]] = zext i16 %[[LD]] to i64
    643 ; OPT: if.then:
    644 ; OPT: shl nsw i64 %[[EXT]], 1
    645 ; DISABLE: if.then:
    646 ; DISABLE: %r = sext i32 %shl2 to i64
    647   %ld = load i16, i16* %P
    648   br i1 %c, label %end, label %if.then
    649 if.then:
    650   %z = zext i16 %ld to i32
    651   %shl2 = shl nsw i32 %z, 1
    652   %r = sext i32 %shl2 to i64
    653   ret i64 %r
    654 end:
    655   ret i64 0
    656 }
    657