1 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s 2 ; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s 3 ; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS 4 ; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS 5 ; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE 6 7 ; rdar://7304838 8 ; CodeGenPrepare should move the zext into the block with the load 9 ; so that SelectionDAG can select it with the load. 10 ; 11 ; CHECK-LABEL: foo: 12 ; CHECK: movsbl ({{%rdi|%rcx}}), %eax 13 ; 14 ; OPTALL-LABEL: @foo 15 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 16 ; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 17 ; OPTALL: store i32 [[ZEXT]], i32* %q 18 ; OPTALL: ret 19 define void @foo(i8* %p, i32* %q) { 20 entry: 21 %t = load i8, i8* %p 22 %a = icmp slt i8 %t, 20 23 br i1 %a, label %true, label %false 24 true: 25 %s = zext i8 %t to i32 26 store i32 %s, i32* %q 27 ret void 28 false: 29 ret void 30 } 31 32 ; Check that we manage to form a zextload is an operation with only one 33 ; argument to explicitly extend is in the way. 34 ; OPTALL-LABEL: @promoteOneArg 35 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 36 ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 37 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2 38 ; Make sure the operation is not promoted when the promotion pass is disabled. 39 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2 40 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 41 ; OPTALL: store i32 [[RES]], i32* %q 42 ; OPTALL: ret 43 define void @promoteOneArg(i8* %p, i32* %q) { 44 entry: 45 %t = load i8, i8* %p 46 %add = add nuw i8 %t, 2 47 %a = icmp slt i8 %t, 20 48 br i1 %a, label %true, label %false 49 true: 50 %s = zext i8 %add to i32 51 store i32 %s, i32* %q 52 ret void 53 false: 54 ret void 55 } 56 57 ; Check that we manage to form a sextload is an operation with only one 58 ; argument to explicitly extend is in the way. 59 ; Version with sext. 60 ; OPTALL-LABEL: @promoteOneArgSExt 61 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 62 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 63 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2 64 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2 65 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 66 ; OPTALL: store i32 [[RES]], i32* %q 67 ; OPTALL: ret 68 define void @promoteOneArgSExt(i8* %p, i32* %q) { 69 entry: 70 %t = load i8, i8* %p 71 %add = add nsw i8 %t, 2 72 %a = icmp slt i8 %t, 20 73 br i1 %a, label %true, label %false 74 true: 75 %s = sext i8 %add to i32 76 store i32 %s, i32* %q 77 ret void 78 false: 79 ret void 80 } 81 82 ; Check that we manage to form a zextload is an operation with two 83 ; arguments to explicitly extend is in the way. 84 ; Extending %add will create two extensions: 85 ; 1. One for %b. 86 ; 2. One for %t. 87 ; #1 will not be removed as we do not know anything about %b. 88 ; #2 may not be merged with the load because %t is used in a comparison. 89 ; Since two extensions may be emitted in the end instead of one before the 90 ; transformation, the regular heuristic does not apply the optimization. 91 ; 92 ; OPTALL-LABEL: @promoteTwoArgZext 93 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 94 ; 95 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 96 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 97 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 98 ; 99 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 100 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 101 ; 102 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 103 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 104 ; 105 ; OPTALL: store i32 [[RES]], i32* %q 106 ; OPTALL: ret 107 define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) { 108 entry: 109 %t = load i8, i8* %p 110 %add = add nuw i8 %t, %b 111 %a = icmp slt i8 %t, 20 112 br i1 %a, label %true, label %false 113 true: 114 %s = zext i8 %add to i32 115 store i32 %s, i32* %q 116 ret void 117 false: 118 ret void 119 } 120 121 ; Check that we manage to form a sextload is an operation with two 122 ; arguments to explicitly extend is in the way. 123 ; Version with sext. 124 ; OPTALL-LABEL: @promoteTwoArgSExt 125 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 126 ; 127 ; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 128 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32 129 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]] 130 ; 131 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 132 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 133 ; 134 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 135 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 136 ; OPTALL: store i32 [[RES]], i32* %q 137 ; OPTALL: ret 138 define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) { 139 entry: 140 %t = load i8, i8* %p 141 %add = add nsw i8 %t, %b 142 %a = icmp slt i8 %t, 20 143 br i1 %a, label %true, label %false 144 true: 145 %s = sext i8 %add to i32 146 store i32 %s, i32* %q 147 ret void 148 false: 149 ret void 150 } 151 152 ; Check that we do not a zextload if we need to introduce more than 153 ; one additional extension. 154 ; OPTALL-LABEL: @promoteThreeArgZext 155 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 156 ; 157 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 158 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 159 ; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 160 ; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32 161 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]] 162 ; 163 ; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 164 ; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c 165 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 166 ; 167 ; DISABLE: add nuw i8 168 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 169 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 170 ; 171 ; OPTALL: store i32 [[RES]], i32* %q 172 ; OPTALL: ret 173 define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) { 174 entry: 175 %t = load i8, i8* %p 176 %tmp = add nuw i8 %t, %b 177 %add = add nuw i8 %tmp, %c 178 %a = icmp slt i8 %t, 20 179 br i1 %a, label %true, label %false 180 true: 181 %s = zext i8 %add to i32 182 store i32 %s, i32* %q 183 ret void 184 false: 185 ret void 186 } 187 188 ; Check that we manage to form a zextload after promoting and merging 189 ; two extensions. 190 ; OPTALL-LABEL: @promoteMergeExtArgZExt 191 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 192 ; 193 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 194 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32 195 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 196 ; 197 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 198 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 199 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 200 ; 201 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 202 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 203 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 204 ; 205 ; OPTALL: store i32 [[RES]], i32* %q 206 ; OPTALL: ret 207 define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) { 208 entry: 209 %t = load i8, i8* %p 210 %ext = zext i8 %t to i16 211 %add = add nuw i16 %ext, %b 212 %a = icmp slt i8 %t, 20 213 br i1 %a, label %true, label %false 214 true: 215 %s = zext i16 %add to i32 216 store i32 %s, i32* %q 217 ret void 218 false: 219 ret void 220 } 221 222 ; Check that we manage to form a sextload after promoting and merging 223 ; two extensions. 224 ; Version with sext. 225 ; OPTALL-LABEL: @promoteMergeExtArgSExt 226 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 227 ; 228 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 229 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32 230 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]] 231 ; 232 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 233 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 234 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 235 ; 236 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 237 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 238 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 239 ; OPTALL: store i32 [[RES]], i32* %q 240 ; OPTALL: ret 241 define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) { 242 entry: 243 %t = load i8, i8* %p 244 %ext = zext i8 %t to i16 245 %add = add nsw i16 %ext, %b 246 %a = icmp slt i8 %t, 20 247 br i1 %a, label %true, label %false 248 true: 249 %s = sext i16 %add to i32 250 store i32 %s, i32* %q 251 ret void 252 false: 253 ret void 254 } 255 256 ; Check that we manage to catch all the extload opportunities that are exposed 257 ; by the different iterations of codegen prepare. 258 ; Moreover, check that we do not promote more than we need to. 259 ; Here is what is happening in this test (not necessarly in this order): 260 ; 1. We try to promote the operand of %sextadd. 261 ; a. This creates one sext of %ld2 and one of %zextld 262 ; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but 263 ; introduced one. This is fine with the current heuristic: neutral. 264 ; => We have one zext of %zextld left and we created one sext of %ld2. 265 ; 2. We try to promote the operand of %sextaddza. 266 ; a. This creates one sext of %zexta and one of %zextld 267 ; b. The sext of %zexta can be combined with the zext of %a. 268 ; c. The sext of %zextld leads to %ld and can be combined with it. This is 269 ; done by promoting %zextld. This is fine with the current heuristic: 270 ; neutral. 271 ; => We have created a new zext of %ld and we created one sext of %zexta. 272 ; 3. We try to promote the operand of %sextaddb. 273 ; a. This creates one sext of %b and one of %zextld 274 ; b. The sext of %b is a dead-end, nothing to be done. 275 ; c. Same thing as 2.c. happens. 276 ; => We have created a new zext of %ld and we created one sext of %b. 277 ; 4. We try to promote the operand of the zext of %zextld introduced in #1. 278 ; a. Same thing as 2.c. happens. 279 ; b. %zextld does not have any other uses. It is dead coded. 280 ; => We have created a new zext of %ld and we removed a zext of %zextld and 281 ; a zext of %ld. 282 ; Currently we do not try to reuse existing extensions, so in the end we have 283 ; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. 284 ; 285 ; OPTALL-LABEL: @severalPromotions 286 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 287 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 288 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 289 ; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 290 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 291 ; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 292 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]] 293 ; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64 294 ; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]] 295 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 296 ; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]] 297 ; 298 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 299 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 300 ; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32 301 ; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64 302 ; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32 303 ; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64 304 ; 305 ; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]]) 306 ; OPTALL: ret 307 define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) { 308 %ld = load i8, i8* %addr1 309 %zextld = zext i8 %ld to i32 310 %ld2 = load i32, i32* %addr2 311 %add = add nsw i32 %ld2, %zextld 312 %sextadd = sext i32 %add to i64 313 %zexta = zext i8 %a to i32 314 %addza = add nsw i32 %zexta, %zextld 315 %sextaddza = sext i32 %addza to i64 316 %addb = add nsw i32 %b, %zextld 317 %sextaddb = sext i32 %addb to i64 318 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) 319 ret void 320 } 321 322 declare void @dummy(i64, i64, i64) 323 324 ; Make sure we do not try to promote vector types since the type promotion 325 ; helper does not support them for now. 326 ; OPTALL-LABEL: @vectorPromotion 327 ; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 328 ; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64> 329 ; OPTALL: ret 330 define void @vectorPromotion() { 331 entry: 332 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 333 %b = zext <2 x i32> %a to <2 x i64> 334 ret void 335 } 336 337 @a = common global i32 0, align 4 338 @c = common global [2 x i32] zeroinitializer, align 4 339 340 ; PR21978. 341 ; Make sure we support promotion of operands that produces a Value as opposed 342 ; to an instruction. 343 ; This used to cause a crash. 344 ; OPTALL-LABEL: @promotionOfArgEndsUpInValue 345 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr 346 347 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32 348 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32) 349 ; 350 ; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 351 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 352 ; 353 ; OPTALL-NEXT: ret i32 [[RES]] 354 define i32 @promotionOfArgEndsUpInValue(i16* %addr) { 355 entry: 356 %val = load i16, i16* %addr 357 %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 358 %conv3 = sext i16 %add to i32 359 ret i32 %conv3 360 } 361 362 ; Check that we see that one zext can be derived from the other for free. 363 ; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice 364 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 365 366 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 367 ; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 368 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 369 ; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12 370 ; OPT-NEXT: store i32 [[RES32]], i32* %addr 371 ; OPT-NEXT: store i64 [[RES64]], i64* %q 372 ; 373 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 374 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 375 ; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12 376 ; DISABLE-NEXT: store i32 [[RES32]], i32* %addr 377 ; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64 378 ; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q 379 ; 380 ; OPTALL-NEXT: ret void 381 define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) { 382 entry: 383 %t = load i8, i8* %p 384 %zextt = zext i8 %t to i32 385 %add = add nuw i32 %zextt, %b 386 %add2 = add nuw i32 %zextt, 12 387 store i32 %add, i32 *%addr 388 %s = zext i32 %add2 to i64 389 store i64 %s, i64* %q 390 ret void 391 } 392