1 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS 2 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS 3 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE 4 5 ; CodeGenPrepare should move the zext into the block with the load 6 ; so that SelectionDAG can select it with the load. 7 ; 8 ; OPTALL-LABEL: @foo 9 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 10 ; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 11 ; OPTALL: store i32 [[ZEXT]], i32* %q 12 ; OPTALL: ret 13 define void @foo(i8* %p, i32* %q) { 14 entry: 15 %t = load i8, i8* %p 16 %a = icmp slt i8 %t, 20 17 br i1 %a, label %true, label %false 18 true: 19 %s = zext i8 %t to i32 20 store i32 %s, i32* %q 21 ret void 22 false: 23 ret void 24 } 25 26 ; Check that we manage to form a zextload is an operation with only one 27 ; argument to explicitly extend is in the way. 28 ; OPTALL-LABEL: @promoteOneArg 29 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 30 ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 31 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2 32 ; Make sure the operation is not promoted when the promotion pass is disabled. 33 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2 34 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 35 ; OPTALL: store i32 [[RES]], i32* %q 36 ; OPTALL: ret 37 define void @promoteOneArg(i8* %p, i32* %q) { 38 entry: 39 %t = load i8, i8* %p 40 %add = add nuw i8 %t, 2 41 %a = icmp slt i8 %t, 20 42 br i1 %a, label %true, label %false 43 true: 44 %s = zext i8 %add to i32 45 store i32 %s, i32* %q 46 ret void 47 false: 48 ret void 49 } 50 51 ; Check that we manage to form a sextload is an operation with only one 52 ; argument to explicitly extend is in the way. 53 ; Version with sext. 54 ; OPTALL-LABEL: @promoteOneArgSExt 55 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 56 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 57 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2 58 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2 59 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 60 ; OPTALL: store i32 [[RES]], i32* %q 61 ; OPTALL: ret 62 define void @promoteOneArgSExt(i8* %p, i32* %q) { 63 entry: 64 %t = load i8, i8* %p 65 %add = add nsw i8 %t, 2 66 %a = icmp slt i8 %t, 20 67 br i1 %a, label %true, label %false 68 true: 69 %s = sext i8 %add to i32 70 store i32 %s, i32* %q 71 ret void 72 false: 73 ret void 74 } 75 76 ; Check that we manage to form a zextload is an operation with two 77 ; arguments to explicitly extend is in the way. 78 ; Extending %add will create two extensions: 79 ; 1. One for %b. 80 ; 2. One for %t. 81 ; #1 will not be removed as we do not know anything about %b. 82 ; #2 may not be merged with the load because %t is used in a comparison. 83 ; Since two extensions may be emitted in the end instead of one before the 84 ; transformation, the regular heuristic does not apply the optimization. 85 ; 86 ; OPTALL-LABEL: @promoteTwoArgZext 87 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 88 ; 89 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 90 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 91 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 92 ; 93 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 94 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 95 ; 96 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 97 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 98 ; 99 ; OPTALL: store i32 [[RES]], i32* %q 100 ; OPTALL: ret 101 define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) { 102 entry: 103 %t = load i8, i8* %p 104 %add = add nuw i8 %t, %b 105 %a = icmp slt i8 %t, 20 106 br i1 %a, label %true, label %false 107 true: 108 %s = zext i8 %add to i32 109 store i32 %s, i32* %q 110 ret void 111 false: 112 ret void 113 } 114 115 ; Check that we manage to form a sextload is an operation with two 116 ; arguments to explicitly extend is in the way. 117 ; Version with sext. 118 ; OPTALL-LABEL: @promoteTwoArgSExt 119 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 120 ; 121 ; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 122 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32 123 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]] 124 ; 125 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 126 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 127 ; 128 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 129 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 130 ; OPTALL: store i32 [[RES]], i32* %q 131 ; OPTALL: ret 132 define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) { 133 entry: 134 %t = load i8, i8* %p 135 %add = add nsw i8 %t, %b 136 %a = icmp slt i8 %t, 20 137 br i1 %a, label %true, label %false 138 true: 139 %s = sext i8 %add to i32 140 store i32 %s, i32* %q 141 ret void 142 false: 143 ret void 144 } 145 146 ; Check that we do not a zextload if we need to introduce more than 147 ; one additional extension. 148 ; OPTALL-LABEL: @promoteThreeArgZext 149 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 150 ; 151 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 152 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 153 ; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 154 ; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32 155 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]] 156 ; 157 ; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 158 ; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c 159 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 160 ; 161 ; DISABLE: add nuw i8 162 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 163 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 164 ; 165 ; OPTALL: store i32 [[RES]], i32* %q 166 ; OPTALL: ret 167 define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) { 168 entry: 169 %t = load i8, i8* %p 170 %tmp = add nuw i8 %t, %b 171 %add = add nuw i8 %tmp, %c 172 %a = icmp slt i8 %t, 20 173 br i1 %a, label %true, label %false 174 true: 175 %s = zext i8 %add to i32 176 store i32 %s, i32* %q 177 ret void 178 false: 179 ret void 180 } 181 182 ; Check that we manage to form a zextload after promoting and merging 183 ; two extensions. 184 ; OPTALL-LABEL: @promoteMergeExtArgZExt 185 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 186 ; 187 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 188 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32 189 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 190 ; 191 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 192 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 193 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 194 ; 195 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 196 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 197 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 198 ; 199 ; OPTALL: store i32 [[RES]], i32* %q 200 ; OPTALL: ret 201 define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) { 202 entry: 203 %t = load i8, i8* %p 204 %ext = zext i8 %t to i16 205 %add = add nuw i16 %ext, %b 206 %a = icmp slt i8 %t, 20 207 br i1 %a, label %true, label %false 208 true: 209 %s = zext i16 %add to i32 210 store i32 %s, i32* %q 211 ret void 212 false: 213 ret void 214 } 215 216 ; Check that we manage to form a sextload after promoting and merging 217 ; two extensions. 218 ; Version with sext. 219 ; OPTALL-LABEL: @promoteMergeExtArgSExt 220 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 221 ; 222 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 223 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32 224 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]] 225 ; 226 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 227 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 228 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 229 ; 230 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 231 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 232 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 233 ; OPTALL: store i32 [[RES]], i32* %q 234 ; OPTALL: ret 235 define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) { 236 entry: 237 %t = load i8, i8* %p 238 %ext = zext i8 %t to i16 239 %add = add nsw i16 %ext, %b 240 %a = icmp slt i8 %t, 20 241 br i1 %a, label %true, label %false 242 true: 243 %s = sext i16 %add to i32 244 store i32 %s, i32* %q 245 ret void 246 false: 247 ret void 248 } 249 250 ; Check that we manage to catch all the extload opportunities that are exposed 251 ; by the different iterations of codegen prepare. 252 ; Moreover, check that we do not promote more than we need to. 253 ; Here is what is happening in this test (not necessarly in this order): 254 ; 1. We try to promote the operand of %sextadd. 255 ; a. This creates one sext of %ld2 and one of %zextld 256 ; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but 257 ; introduced one. This is fine with the current heuristic: neutral. 258 ; => We have one zext of %zextld left and we created one sext of %ld2. 259 ; 2. We try to promote the operand of %sextaddza. 260 ; a. This creates one sext of %zexta and one of %zextld 261 ; b. The sext of %zexta can be combined with the zext of %a. 262 ; c. The sext of %zextld leads to %ld and can be combined with it. This is 263 ; done by promoting %zextld. This is fine with the current heuristic: 264 ; neutral. 265 ; => We have created a new zext of %ld and we created one sext of %zexta. 266 ; 3. We try to promote the operand of %sextaddb. 267 ; a. This creates one sext of %b and one of %zextld 268 ; b. The sext of %b is a dead-end, nothing to be done. 269 ; c. Same thing as 2.c. happens. 270 ; => We have created a new zext of %ld and we created one sext of %b. 271 ; 4. We try to promote the operand of the zext of %zextld introduced in #1. 272 ; a. Same thing as 2.c. happens. 273 ; b. %zextld does not have any other uses. It is dead coded. 274 ; => We have created a new zext of %ld and we removed a zext of %zextld and 275 ; a zext of %ld. 276 ; Currently we do not try to reuse existing extensions, so in the end we have 277 ; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. 278 ; 279 ; OPTALL-LABEL: @severalPromotions 280 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 281 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 282 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 283 ; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 284 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 285 ; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 286 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]] 287 ; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64 288 ; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]] 289 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 290 ; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]] 291 ; 292 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 293 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 294 ; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32 295 ; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64 296 ; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32 297 ; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64 298 ; 299 ; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]]) 300 ; OPTALL: ret 301 define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) { 302 %ld = load i8, i8* %addr1 303 %zextld = zext i8 %ld to i32 304 %ld2 = load i32, i32* %addr2 305 %add = add nsw i32 %ld2, %zextld 306 %sextadd = sext i32 %add to i64 307 %zexta = zext i8 %a to i32 308 %addza = add nsw i32 %zexta, %zextld 309 %sextaddza = sext i32 %addza to i64 310 %addb = add nsw i32 %b, %zextld 311 %sextaddb = sext i32 %addb to i64 312 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) 313 ret void 314 } 315 316 declare void @dummy(i64, i64, i64) 317 318 ; Make sure we do not try to promote vector types since the type promotion 319 ; helper does not support them for now. 320 ; OPTALL-LABEL: @vectorPromotion 321 ; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 322 ; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64> 323 ; OPTALL: ret 324 define void @vectorPromotion() { 325 entry: 326 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 327 %b = zext <2 x i32> %a to <2 x i64> 328 ret void 329 } 330 331 @a = common global i32 0, align 4 332 @c = common global [2 x i32] zeroinitializer, align 4 333 334 ; Make sure we support promotion of operands that produces a Value as opposed 335 ; to an instruction. 336 ; This used to cause a crash. 337 ; OPTALL-LABEL: @promotionOfArgEndsUpInValue 338 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr 339 ; 340 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32 341 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32) 342 ; 343 ; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 344 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 345 ; 346 ; OPTALL-NEXT: ret i32 [[RES]] 347 define i32 @promotionOfArgEndsUpInValue(i16* %addr) { 348 entry: 349 %val = load i16, i16* %addr 350 %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 351 %conv3 = sext i16 %add to i32 352 ret i32 %conv3 353 } 354 355 ; Check that we see that one zext can be derived from the other for free. 356 ; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice 357 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 358 ; 359 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 360 ; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 361 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 362 ; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12 363 ; OPT-NEXT: store i32 [[RES32]], i32* %addr 364 ; OPT-NEXT: store i64 [[RES64]], i64* %q 365 ; 366 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 367 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 368 ; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12 369 ; DISABLE-NEXT: store i32 [[RES32]], i32* %addr 370 ; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64 371 ; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q 372 ; 373 ; OPTALL-NEXT: ret void 374 define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) { 375 entry: 376 %t = load i8, i8* %p 377 %zextt = zext i8 %t to i32 378 %add = add nuw i32 %zextt, %b 379 %add2 = add nuw i32 %zextt, 12 380 store i32 %add, i32 *%addr 381 %s = zext i32 %add2 to i64 382 store i64 %s, i64* %q 383 ret void 384 } 385 386 ; Check that we do not increase the cost of the code. 387 ; The input has one free zext and one free sext. If we would have promoted 388 ; all the way through the load we would end up with a free zext and a 389 ; non-free sext (of %b). 390 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode 391 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 392 ; 393 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 394 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 395 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 396 ; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 397 ; 398 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 399 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 400 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 401 ; 402 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 403 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 404 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 405 ; 406 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 407 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 408 ; OPTALL-NEXT: ret void 409 define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 410 entry: 411 %t = load i8, i8* %p 412 %zextt = zext i8 %t to i32 413 %add = add nsw i32 %zextt, %b 414 %idx64 = sext i32 %add to i64 415 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 416 store i32 %add, i32 *%staddr 417 ret void 418 } 419 420 ; Check that we do not increase the cost of the code. 421 ; The input has one free zext and one free sext. If we would have promoted 422 ; all the way through the load we would end up with a free zext and a 423 ; non-free sext (of %b). 424 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64 425 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 426 ; 427 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 428 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 429 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 430 ; 431 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 432 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 433 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 434 ; 435 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 436 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 437 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 438 ; 439 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]] 440 ; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]] 441 ; OPTALL-NEXT: ret void 442 define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) { 443 entry: 444 %t = load i8, i8* %p 445 %zextt = zext i8 %t to i32 446 %add = add nsw i32 %zextt, %b 447 %idx64 = sext i32 %add to i64 448 %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64 449 store i64 %stuff, i64 *%staddr 450 ret void 451 } 452 453 ; Check that we do not increase the cost of the code. 454 ; The input has one free zext and one free sext. If we would have promoted 455 ; all the way through the load we would end up with a free zext and a 456 ; non-free sext (of %b). 457 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128 458 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 459 ; 460 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 461 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 462 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 463 ; 464 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 465 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 466 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 467 ; 468 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 469 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 470 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 471 ; 472 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]] 473 ; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]] 474 ; OPTALL-NEXT: ret void 475 define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) { 476 entry: 477 %t = load i8, i8* %p 478 %zextt = zext i8 %t to i32 479 %add = add nsw i32 %zextt, %b 480 %idx64 = sext i32 %add to i64 481 %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64 482 store i128 %stuff, i128 *%staddr 483 ret void 484 } 485 486 487 ; Check that we do not increase the cost of the code. 488 ; The input has one free zext and one free sext. If we would have promoted 489 ; all the way through the load we would end up with a free zext and a 490 ; non-free sext (of %b). 491 ; OPTALL-LABEL: @promoteSExtFromAddrMode256 492 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 493 ; 494 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 495 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 496 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 497 ; 498 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 499 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 500 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 501 ; 502 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]] 503 ; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]] 504 ; OPTALL-NEXT: ret void 505 define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) { 506 entry: 507 %t = load i8, i8* %p 508 %zextt = zext i8 %t to i32 509 %add = add nsw i32 %zextt, %b 510 %idx64 = sext i32 %add to i64 511 %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64 512 store i256 %stuff, i256 *%staddr 513 ret void 514 } 515 516 ; Check that we do not increase the cost of the code. 517 ; The input has one free zext and one free zext. 518 ; When we promote all the way through the load, we end up with 519 ; a free zext and a non-free zext (of %b). 520 ; However, the current target lowering says zext i32 to i64 is free 521 ; so the promotion happens because the cost did not change and may 522 ; expose more opportunities. 523 ; This would need to be fixed at some point. 524 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode 525 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 526 ; 527 ; This transformation should really happen only for stress mode. 528 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 529 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 530 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 531 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 532 ; 533 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 534 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 535 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 536 ; 537 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 538 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 539 ; OPTALL-NEXT: ret void 540 define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 541 entry: 542 %t = load i8, i8* %p 543 %zextt = zext i8 %t to i32 544 %add = add nuw i32 %zextt, %b 545 %idx64 = zext i32 %add to i64 546 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 547 store i32 %add, i32 *%staddr 548 ret void 549 } 550 551 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift 552 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 553 ; 554 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 555 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 556 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 557 ; 558 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 559 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 560 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 561 ; 562 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 563 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 564 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 565 ; 566 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 567 ; OPTALL-NEXT: ret i64 %staddr 568 define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) { 569 entry: 570 %t = load i8, i8* %p 571 %zextt = zext i8 %t to i32 572 %add = add nsw i32 %zextt, %b 573 %idx64 = sext i32 %add to i64 574 %staddr = shl i64 %idx64, 12 575 ret i64 %staddr 576 } 577 578 ; Same comment as doNotPromoteFreeZExtFromAddrMode. 579 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift 580 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 581 ; 582 ; This transformation should really happen only for stress mode. 583 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 584 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 585 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 586 ; 587 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 588 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 589 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 590 ; 591 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 592 ; OPTALL-NEXT: ret i64 %staddr 593 define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) { 594 entry: 595 %t = load i8, i8* %p 596 %zextt = zext i8 %t to i32 597 %add = add nuw i32 %zextt, %b 598 %idx64 = zext i32 %add to i64 599 %staddr = shl i64 %idx64, 12 600 ret i64 %staddr 601 } 602 603 ; The input has one free zext and one non-free sext. 604 ; When we promote all the way through to the load, we end up with 605 ; a free zext, a free sext (%ld1), and a non-free sext (of %cst). 606 ; However, we when generate load pair and the free sext(%ld1) becomes 607 ; non-free. So technically, we trade a non-free sext to two non-free 608 ; sext. 609 ; This would need to be fixed at some point. 610 ; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad 611 ; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p 612 ; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1 613 ; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]] 614 ; 615 ; This transformation should really happen only for stress mode. 616 ; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64 617 ; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64 618 ; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]] 619 ; 620 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst 621 ; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64 622 ; 623 ; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64 624 ; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]] 625 ; OPTALL-NEXT: ret i64 [[FINAL]] 626 define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) { 627 %ld0 = load i32, i32* %p 628 %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1 629 %ld1 = load i32, i32* %idxLd1 630 %res = add nsw i32 %ld1, %cst 631 %sextres = sext i32 %res to i64 632 %zextLd0 = zext i32 %ld0 to i64 633 %final = add i64 %sextres, %zextLd0 634 ret i64 %final 635 } 636 637 define i64 @promoteZextShl(i1 %c, i16* %P) { 638 entry: 639 ; OPTALL-LABEL: promoteZextShl 640 ; OPTALL: entry: 641 ; OPT: %[[LD:.*]] = load i16, i16* %P 642 ; OPT: %[[EXT:.*]] = zext i16 %[[LD]] to i64 643 ; OPT: if.then: 644 ; OPT: shl nsw i64 %[[EXT]], 1 645 ; DISABLE: if.then: 646 ; DISABLE: %r = sext i32 %shl2 to i64 647 %ld = load i16, i16* %P 648 br i1 %c, label %end, label %if.then 649 if.then: 650 %z = zext i16 %ld to i32 651 %shl2 = shl nsw i32 %z, 1 652 %r = sext i32 %shl2 to i64 653 ret i64 %r 654 end: 655 ret i64 0 656 } 657