1 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS 2 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS 3 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE 4 5 ; CodeGenPrepare should move the zext into the block with the load 6 ; so that SelectionDAG can select it with the load. 7 ; 8 ; OPTALL-LABEL: @foo 9 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 10 ; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 11 ; OPTALL: store i32 [[ZEXT]], i32* %q 12 ; OPTALL: ret 13 define void @foo(i8* %p, i32* %q) { 14 entry: 15 %t = load i8, i8* %p 16 %a = icmp slt i8 %t, 20 17 br i1 %a, label %true, label %false 18 true: 19 %s = zext i8 %t to i32 20 store i32 %s, i32* %q 21 ret void 22 false: 23 ret void 24 } 25 26 ; Check that we manage to form a zextload is an operation with only one 27 ; argument to explicitly extend is in the way. 28 ; OPTALL-LABEL: @promoteOneArg 29 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 30 ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 31 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2 32 ; Make sure the operation is not promoted when the promotion pass is disabled. 33 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2 34 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 35 ; OPTALL: store i32 [[RES]], i32* %q 36 ; OPTALL: ret 37 define void @promoteOneArg(i8* %p, i32* %q) { 38 entry: 39 %t = load i8, i8* %p 40 %add = add nuw i8 %t, 2 41 %a = icmp slt i8 %t, 20 42 br i1 %a, label %true, label %false 43 true: 44 %s = zext i8 %add to i32 45 store i32 %s, i32* %q 46 ret void 47 false: 48 ret void 49 } 50 51 ; Check that we manage to form a sextload is an operation with only one 52 ; argument to explicitly extend is in the way. 53 ; Version with sext. 54 ; OPTALL-LABEL: @promoteOneArgSExt 55 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 56 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 57 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2 58 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2 59 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 60 ; OPTALL: store i32 [[RES]], i32* %q 61 ; OPTALL: ret 62 define void @promoteOneArgSExt(i8* %p, i32* %q) { 63 entry: 64 %t = load i8, i8* %p 65 %add = add nsw i8 %t, 2 66 %a = icmp slt i8 %t, 20 67 br i1 %a, label %true, label %false 68 true: 69 %s = sext i8 %add to i32 70 store i32 %s, i32* %q 71 ret void 72 false: 73 ret void 74 } 75 76 ; Check that we manage to form a zextload is an operation with two 77 ; arguments to explicitly extend is in the way. 78 ; Extending %add will create two extensions: 79 ; 1. One for %b. 80 ; 2. One for %t. 81 ; #1 will not be removed as we do not know anything about %b. 82 ; #2 may not be merged with the load because %t is used in a comparison. 83 ; Since two extensions may be emitted in the end instead of one before the 84 ; transformation, the regular heuristic does not apply the optimization. 85 ; 86 ; OPTALL-LABEL: @promoteTwoArgZext 87 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 88 ; 89 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 90 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 91 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 92 ; 93 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 94 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 95 ; 96 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 97 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 98 ; 99 ; OPTALL: store i32 [[RES]], i32* %q 100 ; OPTALL: ret 101 define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) { 102 entry: 103 %t = load i8, i8* %p 104 %add = add nuw i8 %t, %b 105 %a = icmp slt i8 %t, 20 106 br i1 %a, label %true, label %false 107 true: 108 %s = zext i8 %add to i32 109 store i32 %s, i32* %q 110 ret void 111 false: 112 ret void 113 } 114 115 ; Check that we manage to form a sextload is an operation with two 116 ; arguments to explicitly extend is in the way. 117 ; Version with sext. 118 ; OPTALL-LABEL: @promoteTwoArgSExt 119 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 120 ; 121 ; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 122 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32 123 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]] 124 ; 125 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 126 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 127 ; 128 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 129 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 130 ; OPTALL: store i32 [[RES]], i32* %q 131 ; OPTALL: ret 132 define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) { 133 entry: 134 %t = load i8, i8* %p 135 %add = add nsw i8 %t, %b 136 %a = icmp slt i8 %t, 20 137 br i1 %a, label %true, label %false 138 true: 139 %s = sext i8 %add to i32 140 store i32 %s, i32* %q 141 ret void 142 false: 143 ret void 144 } 145 146 ; Check that we do not a zextload if we need to introduce more than 147 ; one additional extension. 148 ; OPTALL-LABEL: @promoteThreeArgZext 149 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 150 ; 151 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 152 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 153 ; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 154 ; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32 155 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]] 156 ; 157 ; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 158 ; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c 159 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 160 ; 161 ; DISABLE: add nuw i8 162 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 163 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 164 ; 165 ; OPTALL: store i32 [[RES]], i32* %q 166 ; OPTALL: ret 167 define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) { 168 entry: 169 %t = load i8, i8* %p 170 %tmp = add nuw i8 %t, %b 171 %add = add nuw i8 %tmp, %c 172 %a = icmp slt i8 %t, 20 173 br i1 %a, label %true, label %false 174 true: 175 %s = zext i8 %add to i32 176 store i32 %s, i32* %q 177 ret void 178 false: 179 ret void 180 } 181 182 ; Check that we manage to form a zextload after promoting and merging 183 ; two extensions. 184 ; OPTALL-LABEL: @promoteMergeExtArgZExt 185 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 186 ; 187 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 188 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32 189 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 190 ; 191 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 192 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 193 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 194 ; 195 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 196 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 197 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 198 ; 199 ; OPTALL: store i32 [[RES]], i32* %q 200 ; OPTALL: ret 201 define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) { 202 entry: 203 %t = load i8, i8* %p 204 %ext = zext i8 %t to i16 205 %add = add nuw i16 %ext, %b 206 %a = icmp slt i8 %t, 20 207 br i1 %a, label %true, label %false 208 true: 209 %s = zext i16 %add to i32 210 store i32 %s, i32* %q 211 ret void 212 false: 213 ret void 214 } 215 216 ; Check that we manage to form a sextload after promoting and merging 217 ; two extensions. 218 ; Version with sext. 219 ; OPTALL-LABEL: @promoteMergeExtArgSExt 220 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 221 ; 222 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 223 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32 224 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]] 225 ; 226 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 227 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 228 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 229 ; 230 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 231 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 232 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 233 ; OPTALL: store i32 [[RES]], i32* %q 234 ; OPTALL: ret 235 define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) { 236 entry: 237 %t = load i8, i8* %p 238 %ext = zext i8 %t to i16 239 %add = add nsw i16 %ext, %b 240 %a = icmp slt i8 %t, 20 241 br i1 %a, label %true, label %false 242 true: 243 %s = sext i16 %add to i32 244 store i32 %s, i32* %q 245 ret void 246 false: 247 ret void 248 } 249 250 ; Check that we manage to catch all the extload opportunities that are exposed 251 ; by the different iterations of codegen prepare. 252 ; Moreover, check that we do not promote more than we need to. 253 ; Here is what is happening in this test (not necessarly in this order): 254 ; 1. We try to promote the operand of %sextadd. 255 ; a. This creates one sext of %ld2 and one of %zextld 256 ; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but 257 ; introduced one. This is fine with the current heuristic: neutral. 258 ; => We have one zext of %zextld left and we created one sext of %ld2. 259 ; 2. We try to promote the operand of %sextaddza. 260 ; a. This creates one sext of %zexta and one of %zextld 261 ; b. The sext of %zexta does not lead to any load, it stays here, even if it 262 ; could have been combine with the zext of %a. 263 ; c. The sext of %zextld leads to %ld and can be combined with it. This is 264 ; done by promoting %zextld. This is fine with the current heuristic: 265 ; neutral. 266 ; => We have created a new zext of %ld and we created one sext of %zexta. 267 ; 3. We try to promote the operand of %sextaddb. 268 ; a. This creates one sext of %b and one of %zextld 269 ; b. The sext of %b is a dead-end, nothing to be done. 270 ; c. Same thing as 2.c. happens. 271 ; => We have created a new zext of %ld and we created one sext of %b. 272 ; 4. We try to promote the operand of the zext of %zextld introduced in #1. 273 ; a. Same thing as 2.c. happens. 274 ; b. %zextld does not have any other uses. It is dead coded. 275 ; => We have created a new zext of %ld and we removed a zext of %zextld and 276 ; a zext of %ld. 277 ; Currently we do not try to reuse existing extensions, so in the end we have 278 ; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. 279 ; 280 ; OPTALL-LABEL: @severalPromotions 281 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 282 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 283 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 284 ; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 285 ; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 286 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 287 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]] 288 ; We do not combine this one: see 2.b. 289 ; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32 290 ; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64 291 ; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]] 292 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 293 ; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]] 294 ; 295 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 296 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 297 ; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32 298 ; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64 299 ; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32 300 ; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64 301 ; 302 ; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]]) 303 ; OPTALL: ret 304 define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) { 305 %ld = load i8, i8* %addr1 306 %zextld = zext i8 %ld to i32 307 %ld2 = load i32, i32* %addr2 308 %add = add nsw i32 %ld2, %zextld 309 %sextadd = sext i32 %add to i64 310 %zexta = zext i8 %a to i32 311 %addza = add nsw i32 %zexta, %zextld 312 %sextaddza = sext i32 %addza to i64 313 %addb = add nsw i32 %b, %zextld 314 %sextaddb = sext i32 %addb to i64 315 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) 316 ret void 317 } 318 319 declare void @dummy(i64, i64, i64) 320 321 ; Make sure we do not try to promote vector types since the type promotion 322 ; helper does not support them for now. 323 ; OPTALL-LABEL: @vectorPromotion 324 ; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 325 ; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64> 326 ; OPTALL: ret 327 define void @vectorPromotion() { 328 entry: 329 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 330 %b = zext <2 x i32> %a to <2 x i64> 331 ret void 332 } 333 334 @a = common global i32 0, align 4 335 @c = common global [2 x i32] zeroinitializer, align 4 336 337 ; Make sure we support promotion of operands that produces a Value as opposed 338 ; to an instruction. 339 ; This used to cause a crash. 340 ; OPTALL-LABEL: @promotionOfArgEndsUpInValue 341 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr 342 ; 343 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32 344 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32) 345 ; 346 ; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 347 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 348 ; 349 ; OPTALL-NEXT: ret i32 [[RES]] 350 define i32 @promotionOfArgEndsUpInValue(i16* %addr) { 351 entry: 352 %val = load i16, i16* %addr 353 %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 354 %conv3 = sext i16 %add to i32 355 ret i32 %conv3 356 } 357 358 ; Check that we see that one zext can be derived from the other for free. 359 ; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice 360 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 361 ; 362 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 363 ; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 364 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 365 ; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12 366 ; OPT-NEXT: store i32 [[RES32]], i32* %addr 367 ; OPT-NEXT: store i64 [[RES64]], i64* %q 368 ; 369 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 370 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 371 ; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12 372 ; DISABLE-NEXT: store i32 [[RES32]], i32* %addr 373 ; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64 374 ; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q 375 ; 376 ; OPTALL-NEXT: ret void 377 define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) { 378 entry: 379 %t = load i8, i8* %p 380 %zextt = zext i8 %t to i32 381 %add = add nuw i32 %zextt, %b 382 %add2 = add nuw i32 %zextt, 12 383 store i32 %add, i32 *%addr 384 %s = zext i32 %add2 to i64 385 store i64 %s, i64* %q 386 ret void 387 } 388 389 ; Check that we do not increase the cost of the code. 390 ; The input has one free zext and one free sext. If we would have promoted 391 ; all the way through the load we would end up with a free zext and a 392 ; non-free sext (of %b). 393 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode 394 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 395 ; 396 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 397 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 398 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 399 ; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 400 ; 401 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 402 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 403 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 404 ; 405 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 406 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 407 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 408 ; 409 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 410 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 411 ; OPTALL-NEXT: ret void 412 define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 413 entry: 414 %t = load i8, i8* %p 415 %zextt = zext i8 %t to i32 416 %add = add nsw i32 %zextt, %b 417 %idx64 = sext i32 %add to i64 418 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 419 store i32 %add, i32 *%staddr 420 ret void 421 } 422 423 ; Check that we do not increase the cost of the code. 424 ; The input has one free zext and one free sext. If we would have promoted 425 ; all the way through the load we would end up with a free zext and a 426 ; non-free sext (of %b). 427 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64 428 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 429 ; 430 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 431 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 432 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 433 ; 434 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 435 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 436 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 437 ; 438 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 439 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 440 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 441 ; 442 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]] 443 ; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]] 444 ; OPTALL-NEXT: ret void 445 define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) { 446 entry: 447 %t = load i8, i8* %p 448 %zextt = zext i8 %t to i32 449 %add = add nsw i32 %zextt, %b 450 %idx64 = sext i32 %add to i64 451 %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64 452 store i64 %stuff, i64 *%staddr 453 ret void 454 } 455 456 ; Check that we do not increase the cost of the code. 457 ; The input has one free zext and one free sext. If we would have promoted 458 ; all the way through the load we would end up with a free zext and a 459 ; non-free sext (of %b). 460 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128 461 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 462 ; 463 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 464 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 465 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 466 ; 467 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 468 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 469 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 470 ; 471 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 472 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 473 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 474 ; 475 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]] 476 ; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]] 477 ; OPTALL-NEXT: ret void 478 define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) { 479 entry: 480 %t = load i8, i8* %p 481 %zextt = zext i8 %t to i32 482 %add = add nsw i32 %zextt, %b 483 %idx64 = sext i32 %add to i64 484 %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64 485 store i128 %stuff, i128 *%staddr 486 ret void 487 } 488 489 490 ; Check that we do not increase the cost of the code. 491 ; The input has one free zext and one free sext. If we would have promoted 492 ; all the way through the load we would end up with a free zext and a 493 ; non-free sext (of %b). 494 ; OPTALL-LABEL: @promoteSExtFromAddrMode256 495 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 496 ; 497 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 498 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 499 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 500 ; 501 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 502 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 503 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 504 ; 505 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]] 506 ; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]] 507 ; OPTALL-NEXT: ret void 508 define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) { 509 entry: 510 %t = load i8, i8* %p 511 %zextt = zext i8 %t to i32 512 %add = add nsw i32 %zextt, %b 513 %idx64 = sext i32 %add to i64 514 %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64 515 store i256 %stuff, i256 *%staddr 516 ret void 517 } 518 519 ; Check that we do not increase the cost of the code. 520 ; The input has one free zext and one free zext. 521 ; When we promote all the way through the load, we end up with 522 ; a free zext and a non-free zext (of %b). 523 ; However, the current target lowering says zext i32 to i64 is free 524 ; so the promotion happens because the cost did not change and may 525 ; expose more opportunities. 526 ; This would need to be fixed at some point. 527 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode 528 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 529 ; 530 ; This transformation should really happen only for stress mode. 531 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 532 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 533 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 534 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 535 ; 536 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 537 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 538 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 539 ; 540 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 541 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 542 ; OPTALL-NEXT: ret void 543 define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 544 entry: 545 %t = load i8, i8* %p 546 %zextt = zext i8 %t to i32 547 %add = add nuw i32 %zextt, %b 548 %idx64 = zext i32 %add to i64 549 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 550 store i32 %add, i32 *%staddr 551 ret void 552 } 553 554 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift 555 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 556 ; 557 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 558 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 559 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 560 ; 561 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 562 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 563 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 564 ; 565 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 566 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 567 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 568 ; 569 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 570 ; OPTALL-NEXT: ret i64 %staddr 571 define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) { 572 entry: 573 %t = load i8, i8* %p 574 %zextt = zext i8 %t to i32 575 %add = add nsw i32 %zextt, %b 576 %idx64 = sext i32 %add to i64 577 %staddr = shl i64 %idx64, 12 578 ret i64 %staddr 579 } 580 581 ; Same comment as doNotPromoteFreeZExtFromAddrMode. 582 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift 583 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 584 ; 585 ; This transformation should really happen only for stress mode. 586 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 587 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 588 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 589 ; 590 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 591 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 592 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 593 ; 594 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 595 ; OPTALL-NEXT: ret i64 %staddr 596 define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) { 597 entry: 598 %t = load i8, i8* %p 599 %zextt = zext i8 %t to i32 600 %add = add nuw i32 %zextt, %b 601 %idx64 = zext i32 %add to i64 602 %staddr = shl i64 %idx64, 12 603 ret i64 %staddr 604 } 605 606 ; The input has one free zext and one non-free sext. 607 ; When we promote all the way through to the load, we end up with 608 ; a free zext, a free sext (%ld1), and a non-free sext (of %cst). 609 ; However, we when generate load pair and the free sext(%ld1) becomes 610 ; non-free. So technically, we trade a non-free sext to two non-free 611 ; sext. 612 ; This would need to be fixed at some point. 613 ; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad 614 ; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p 615 ; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1 616 ; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]] 617 ; 618 ; This transformation should really happen only for stress mode. 619 ; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64 620 ; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64 621 ; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]] 622 ; 623 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst 624 ; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64 625 ; 626 ; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64 627 ; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]] 628 ; OPTALL-NEXT: ret i64 [[FINAL]] 629 define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) { 630 %ld0 = load i32, i32* %p 631 %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1 632 %ld1 = load i32, i32* %idxLd1 633 %res = add nsw i32 %ld1, %cst 634 %sextres = sext i32 %res to i64 635 %zextLd0 = zext i32 %ld0 to i64 636 %final = add i64 %sextres, %zextLd0 637 ret i64 %final 638 } 639