1 ; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE 2 ; RUN: cat %t | FileCheck %s --check-prefix=REMARKS 3 ; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE 4 ; 5 ; Note: Lots of tests use inline asm instead of regular calls. 6 ; This allows to have a better control on what the allocation will do. 7 ; Otherwise, we may have spill right in the entry block, defeating 8 ; shrink-wrapping. Moreover, some of the inline asm statement (nop) 9 ; are here to ensure that the related paths do not end up as critical 10 ; edges. 11 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 12 target triple = "x86_64-apple-macosx" 13 14 15 ; Initial motivating example: Simple diamond with a call just on one side. 16 ; CHECK-LABEL: foo: 17 ; 18 ; Compare the arguments and jump to exit. 19 ; No prologue needed. 20 ; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] 21 ; ENABLE-NEXT: cmpl %esi, %edi 22 ; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 23 ; 24 ; Prologue code. 25 ; (What we push does not matter. It should be some random sratch register.) 26 ; CHECK: pushq 27 ; 28 ; Compare the arguments and jump to exit. 29 ; After the prologue is set. 30 ; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] 31 ; DISABLE-NEXT: cmpl %esi, %edi 32 ; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 33 ; 34 ; Store %a in the alloca. 35 ; CHECK: movl [[ARG0CPY]], 4(%rsp) 36 ; Set the alloca address in the second argument. 37 ; CHECK-NEXT: leaq 4(%rsp), %rsi 38 ; Set the first argument to zero. 39 ; CHECK-NEXT: xorl %edi, %edi 40 ; CHECK-NEXT: callq _doSomething 41 ; 42 ; With shrink-wrapping, epilogue is just after the call. 43 ; ENABLE-NEXT: addq $8, %rsp 44 ; 45 ; CHECK: [[EXIT_LABEL]]: 46 ; 47 ; Without shrink-wrapping, epilogue is in the exit block. 48 ; Epilogue code. (What we pop does not matter.) 49 ; DISABLE-NEXT: popq 50 ; 51 ; CHECK-NEXT: retq 52 define i32 @foo(i32 %a, i32 %b) { 53 %tmp = alloca i32, align 4 54 %tmp2 = icmp slt i32 %a, %b 55 br i1 %tmp2, label %true, label %false 56 57 true: 58 store i32 %a, i32* %tmp, align 4 59 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) 60 br label %false 61 62 false: 63 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 64 ret i32 %tmp.0 65 } 66 67 ; Function Attrs: optsize 68 declare i32 @doSomething(i32, i32*) 69 70 71 ; Check that we do not perform the restore inside the loop whereas the save 72 ; is outside. 73 ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: 74 ; 75 ; Shrink-wrapping allows to skip the prologue in the else case. 76 ; ENABLE: testl %edi, %edi 77 ; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]] 78 ; 79 ; Prologue code. 80 ; Make sure we save the CSR used in the inline asm: rbx. 81 ; CHECK: pushq %rbx 82 ; 83 ; DISABLE: testl %edi, %edi 84 ; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] 85 ; 86 ; SUM is in %esi because it is coalesced with the second 87 ; argument on the else path. 88 ; CHECK: xorl [[SUM:%esi]], [[SUM]] 89 ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 90 ; 91 ; Next BB. 92 ; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body 93 ; CHECK: movl $1, [[TMP:%e[a-z]+]] 94 ; CHECK: addl [[TMP]], [[SUM]] 95 ; CHECK-NEXT: decl [[IV]] 96 ; CHECK-NEXT: jne [[LOOP]] 97 ; 98 ; Next BB. 99 ; SUM << 3. 100 ; CHECK: shll $3, [[SUM]] 101 ; 102 ; Jump to epilogue. 103 ; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 104 ; 105 ; DISABLE: [[ELSE_LABEL]]: ## %if.else 106 ; Shift second argument by one and store into returned register. 107 ; DISABLE: addl %esi, %esi 108 ; DISABLE: [[EPILOG_BB]]: ## %if.end 109 ; 110 ; Epilogue code. 111 ; CHECK-DAG: popq %rbx 112 ; CHECK-DAG: movl %esi, %eax 113 ; CHECK: retq 114 ; 115 ; ENABLE: [[ELSE_LABEL]]: ## %if.else 116 ; Shift second argument by one and store into returned register. 117 ; ENABLE: addl %esi, %esi 118 ; ENABLE-NEXT: movl %esi, %eax 119 ; ENABLE-NEXT: retq 120 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { 121 entry: 122 %tobool = icmp eq i32 %cond, 0 123 br i1 %tobool, label %if.else, label %for.preheader 124 125 for.preheader: 126 tail call void asm "nop", ""() 127 br label %for.body 128 129 for.body: ; preds = %entry, %for.body 130 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 131 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 132 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 133 %add = add nsw i32 %call, %sum.04 134 %inc = add nuw nsw i32 %i.05, 1 135 %exitcond = icmp eq i32 %inc, 10 136 br i1 %exitcond, label %for.end, label %for.body 137 138 for.end: ; preds = %for.body 139 %shl = shl i32 %add, 3 140 br label %if.end 141 142 if.else: ; preds = %entry 143 %mul = shl nsw i32 %N, 1 144 br label %if.end 145 146 if.end: ; preds = %if.else, %for.end 147 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 148 ret i32 %sum.1 149 } 150 151 declare i32 @something(...) 152 153 ; Check that we do not perform the shrink-wrapping inside the loop even 154 ; though that would be legal. The cost model must prevent that. 155 ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: 156 ; Prologue code. 157 ; Make sure we save the CSR used in the inline asm: rbx. 158 ; CHECK: pushq %rbx 159 ; CHECK: nop 160 ; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]] 161 ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 162 ; Next BB. 163 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 164 ; CHECK: movl $1, [[TMP:%e[a-z]+]] 165 ; CHECK: addl [[TMP]], [[SUM]] 166 ; CHECK-NEXT: decl [[IV]] 167 ; CHECK-NEXT: jne [[LOOP_LABEL]] 168 ; Next BB. 169 ; CHECK: ## %for.exit 170 ; CHECK: nop 171 ; CHECK: popq %rbx 172 ; CHECK-NEXT: retq 173 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { 174 entry: 175 br label %for.preheader 176 177 for.preheader: 178 tail call void asm "nop", ""() 179 br label %for.body 180 181 for.body: ; preds = %for.body, %entry 182 %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] 183 %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] 184 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 185 %add = add nsw i32 %call, %sum.03 186 %inc = add nuw nsw i32 %i.04, 1 187 %exitcond = icmp eq i32 %inc, 10 188 br i1 %exitcond, label %for.exit, label %for.body 189 190 for.exit: 191 tail call void asm "nop", ""() 192 br label %for.end 193 194 for.end: ; preds = %for.body 195 ret i32 %add 196 } 197 198 ; Check with a more complex case that we do not have save within the loop and 199 ; restore outside. 200 ; CHECK-LABEL: loopInfoSaveOutsideLoop: 201 ; 202 ; ENABLE: testl %edi, %edi 203 ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 204 ; 205 ; Prologue code. 206 ; Make sure we save the CSR used in the inline asm: rbx. 207 ; CHECK: pushq %rbx 208 ; 209 ; DISABLE: testl %edi, %edi 210 ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 211 ; 212 ; CHECK: nop 213 ; CHECK: xorl [[SUM:%esi]], [[SUM]] 214 ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 215 ; 216 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 217 ; CHECK: movl $1, [[TMP:%e[a-z]+]] 218 ; CHECK: addl [[TMP]], [[SUM]] 219 ; CHECK-NEXT: decl [[IV]] 220 ; CHECK-NEXT: jne [[LOOP_LABEL]] 221 ; Next BB. 222 ; CHECK: nop 223 ; CHECK: shll $3, [[SUM]] 224 ; 225 ; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 226 ; 227 ; DISABLE: [[ELSE_LABEL]]: ## %if.else 228 ; Shift second argument by one and store into returned register. 229 ; DISABLE: addl %esi, %esi 230 ; DISABLE: [[EPILOG_BB]]: ## %if.end 231 ; 232 ; Epilogue code. 233 ; CHECK-DAG: popq %rbx 234 ; CHECK-DAG: movl %esi, %eax 235 ; CHECK: retq 236 ; 237 ; ENABLE: [[ELSE_LABEL]]: ## %if.else 238 ; Shift second argument by one and store into returned register. 239 ; ENABLE: addl %esi, %esi 240 ; ENABLE-NEXT: movl %esi, %eax 241 ; ENABLE-NEXT: retq 242 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { 243 entry: 244 %tobool = icmp eq i32 %cond, 0 245 br i1 %tobool, label %if.else, label %for.preheader 246 247 for.preheader: 248 tail call void asm "nop", ""() 249 br label %for.body 250 251 for.body: ; preds = %entry, %for.body 252 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 253 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 254 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 255 %add = add nsw i32 %call, %sum.04 256 %inc = add nuw nsw i32 %i.05, 1 257 %exitcond = icmp eq i32 %inc, 10 258 br i1 %exitcond, label %for.end, label %for.body 259 260 for.end: ; preds = %for.body 261 tail call void asm "nop", "~{ebx}"() 262 %shl = shl i32 %add, 3 263 br label %if.end 264 265 if.else: ; preds = %entry 266 %mul = shl nsw i32 %N, 1 267 br label %if.end 268 269 if.end: ; preds = %if.else, %for.end 270 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 271 ret i32 %sum.1 272 } 273 274 ; Check with a more complex case that we do not have restore within the loop and 275 ; save outside. 276 ; CHECK-LABEL: loopInfoRestoreOutsideLoop: 277 ; 278 ; ENABLE: testl %edi, %edi 279 ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 280 ; 281 ; Prologue code. 282 ; Make sure we save the CSR used in the inline asm: rbx. 283 ; CHECK: pushq %rbx 284 ; 285 ; DISABLE: testl %edi, %edi 286 ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 287 ; 288 ; CHECK: nop 289 ; CHECK: xorl [[SUM:%esi]], [[SUM]] 290 ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 291 ; 292 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 293 ; CHECK: movl $1, [[TMP:%e[a-z]+]] 294 ; CHECK: addl [[TMP]], [[SUM]] 295 ; CHECK-NEXT: decl [[IV]] 296 ; CHECK-NEXT: jne [[LOOP_LABEL]] 297 ; Next BB. 298 ; CHECK: shll $3, [[SUM]] 299 ; 300 ; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 301 ; 302 ; DISABLE: [[ELSE_LABEL]]: ## %if.else 303 304 ; Shift second argument by one and store into returned register. 305 ; DISABLE: addl %esi, %esi 306 ; DISABLE: [[EPILOG_BB]]: ## %if.end 307 ; 308 ; Epilogue code. 309 ; CHECK-DAG: popq %rbx 310 ; CHECK-DAG: movl %esi, %eax 311 ; CHECK: retq 312 ; 313 ; ENABLE: [[ELSE_LABEL]]: ## %if.else 314 ; Shift second argument by one and store into returned register. 315 ; ENABLE: addl %esi, %esi 316 ; ENABLE-NEXT: movl %esi, %eax 317 ; ENABLE-NEXT: retq 318 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { 319 entry: 320 %tobool = icmp eq i32 %cond, 0 321 br i1 %tobool, label %if.else, label %if.then 322 323 if.then: ; preds = %entry 324 tail call void asm "nop", "~{ebx}"() 325 br label %for.body 326 327 for.body: ; preds = %for.body, %if.then 328 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] 329 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 330 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 331 %add = add nsw i32 %call, %sum.04 332 %inc = add nuw nsw i32 %i.05, 1 333 %exitcond = icmp eq i32 %inc, 10 334 br i1 %exitcond, label %for.end, label %for.body 335 336 for.end: ; preds = %for.body 337 %shl = shl i32 %add, 3 338 br label %if.end 339 340 if.else: ; preds = %entry 341 %mul = shl nsw i32 %N, 1 342 br label %if.end 343 344 if.end: ; preds = %if.else, %for.end 345 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 346 ret i32 %sum.1 347 } 348 349 ; Check that we handle function with no frame information correctly. 350 ; CHECK-LABEL: emptyFrame: 351 ; CHECK: ## %entry 352 ; CHECK-NEXT: xorl %eax, %eax 353 ; CHECK-NEXT: retq 354 define i32 @emptyFrame() { 355 entry: 356 ret i32 0 357 } 358 359 ; Check that we handle inline asm correctly. 360 ; CHECK-LABEL: inlineAsm: 361 ; 362 ; ENABLE: testl %edi, %edi 363 ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 364 ; 365 ; Prologue code. 366 ; Make sure we save the CSR used in the inline asm: rbx. 367 ; CHECK: pushq %rbx 368 ; 369 ; DISABLE: testl %edi, %edi 370 ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 371 ; 372 ; CHECK: nop 373 ; CHECK: movl $10, [[IV:%e[a-z]+]] 374 ; 375 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 376 ; Inline asm statement. 377 ; CHECK: addl $1, %ebx 378 ; CHECK: decl [[IV]] 379 ; CHECK-NEXT: jne [[LOOP_LABEL]] 380 ; Next BB. 381 ; CHECK: nop 382 ; CHECK: xorl %esi, %esi 383 ; 384 ; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 385 ; 386 ; DISABLE: [[ELSE_LABEL]]: ## %if.else 387 ; Shift second argument by one and store into returned register. 388 ; DISABLE: addl %esi, %esi 389 ; DISABLE: [[EPILOG_BB]]: ## %if.end 390 ; 391 ; Epilogue code. 392 ; CHECK-DAG: popq %rbx 393 ; CHECK-DAG: movl %esi, %eax 394 ; CHECK: retq 395 ; 396 ; ENABLE: [[ELSE_LABEL]]: ## %if.else 397 ; Shift second argument by one and store into returned register. 398 ; ENABLE: addl %esi, %esi 399 ; ENABLE-NEXT: movl %esi, %eax 400 ; ENABLE-NEXT: retq 401 define i32 @inlineAsm(i32 %cond, i32 %N) { 402 entry: 403 %tobool = icmp eq i32 %cond, 0 404 br i1 %tobool, label %if.else, label %for.preheader 405 406 for.preheader: 407 tail call void asm "nop", ""() 408 br label %for.body 409 410 for.body: ; preds = %entry, %for.body 411 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 412 tail call void asm "addl $$1, %ebx", "~{ebx}"() 413 %inc = add nuw nsw i32 %i.03, 1 414 %exitcond = icmp eq i32 %inc, 10 415 br i1 %exitcond, label %for.exit, label %for.body 416 417 for.exit: 418 tail call void asm "nop", ""() 419 br label %if.end 420 421 if.else: ; preds = %entry 422 %mul = shl nsw i32 %N, 1 423 br label %if.end 424 425 if.end: ; preds = %for.body, %if.else 426 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] 427 ret i32 %sum.0 428 } 429 430 ; Check that we handle calls to variadic functions correctly. 431 ; CHECK-LABEL: callVariadicFunc: 432 ; 433 ; ENABLE: testl %edi, %edi 434 ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 435 ; 436 ; Prologue code. 437 ; CHECK: pushq 438 ; 439 ; DISABLE: testl %edi, %edi 440 ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 441 ; 442 ; Setup of the varags. 443 ; CHECK: movl %esi, (%rsp) 444 ; CHECK-NEXT: xorl %eax, %eax 445 ; CHECK-NEXT: %esi, %edi 446 ; CHECK-NEXT: %esi, %edx 447 ; CHECK-NEXT: %esi, %ecx 448 ; CHECK-NEXT: %esi, %r8d 449 ; CHECK-NEXT: %esi, %r9d 450 ; CHECK-NEXT: callq _someVariadicFunc 451 ; CHECK-NEXT: movl %eax, %esi 452 ; CHECK-NEXT: shll $3, %esi 453 ; 454 ; ENABLE-NEXT: addq $8, %rsp 455 ; ENABLE-NEXT: movl %esi, %eax 456 ; ENABLE-NEXT: retq 457 ; 458 ; DISABLE: jmp [[IFEND_LABEL:LBB[0-9_]+]] 459 ; 460 ; CHECK: [[ELSE_LABEL]]: ## %if.else 461 ; Shift second argument by one and store into returned register. 462 ; CHECK: addl %esi, %esi 463 ; 464 ; DISABLE: [[IFEND_LABEL]]: ## %if.end 465 ; 466 ; Epilogue code. 467 ; CHECK-NEXT: movl %esi, %eax 468 ; DISABLE-NEXT: popq 469 ; CHECK-NEXT: retq 470 define i32 @callVariadicFunc(i32 %cond, i32 %N) { 471 entry: 472 %tobool = icmp eq i32 %cond, 0 473 br i1 %tobool, label %if.else, label %if.then 474 475 if.then: ; preds = %entry 476 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) 477 %shl = shl i32 %call, 3 478 br label %if.end 479 480 if.else: ; preds = %entry 481 %mul = shl nsw i32 %N, 1 482 br label %if.end 483 484 if.end: ; preds = %if.else, %if.then 485 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] 486 ret i32 %sum.0 487 } 488 489 declare i32 @someVariadicFunc(i32, ...) 490 491 ; Check that we use LEA not to clobber EFLAGS. 492 %struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %union.tree_node*, %union.tree_node*, i8, i8, i32, i32, i64, i64 } 493 %union.tree_node = type { %struct.tree_decl } 494 %struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %union.tree_node*, i48, %union.anon, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %union.anon.1, %union.tree_node*, %union.tree_node*, %union.tree_node*, i64, %struct.lang_decl* } 495 %struct.tree_common = type { %union.tree_node*, %union.tree_node*, i32 } 496 %union.anon = type { i64 } 497 %union.anon.1 = type { %struct.function* } 498 %struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %union.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.ix86_args, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %union.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i24 } 499 %struct.eh_status = type opaque 500 %struct.stmt_status = type opaque 501 %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } 502 %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %union.tree_node**, %struct.rtx_def** } 503 %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack* } 504 %struct.varasm_status = type opaque 505 %struct.ix86_args = type { i32, i32, i32, i32, i32, i32, i32 } 506 %struct.initial_value_struct = type opaque 507 %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } 508 %struct.machine_function = type opaque 509 %struct.language_function = type opaque 510 %struct.lang_decl = type opaque 511 %struct.rtx_def = type { i32, [1 x %union.rtunion_def] } 512 %union.rtunion_def = type { i64 } 513 514 declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly) 515 516 ; CHECK-LABEL: useLEA: 517 ; DISABLE: pushq 518 ; 519 ; CHECK: testq %rdi, %rdi 520 ; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]] 521 ; 522 ; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]] 523 ; CHECK-NEXT: cmpl $66, [[BF_LOAD]] 524 ; CHECK-NEXT: jne [[CLEANUP]] 525 ; 526 ; CHECK: movq 8(%rdi), %rdi 527 ; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]] 528 ; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]] 529 ; CHECK-NEXT: cmpl $14, [[TMP]] 530 ; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]] 531 ; 532 ; CHECK: movl $24599, [[TMP2:%e[a-z]+]] 533 ; CHECK-NEXT: btl [[TMP]], [[TMP2]] 534 ; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]] 535 ; 536 ; CHECK: [[CLEANUP]]: ## %cleanup 537 ; DISABLE: popq 538 ; CHECK-NEXT: retq 539 ; 540 ; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false 541 ; CHECK: cmpl $134, %e[[BF_LOAD2]] 542 ; CHECK-NEXT: je [[CLEANUP]] 543 ; 544 ; CHECK: cmpl $140, %e[[BF_LOAD2]] 545 ; CHECK-NEXT: je [[CLEANUP]] 546 ; 547 ; ENABLE: pushq 548 ; CHECK: callq _find_temp_slot_from_address 549 ; CHECK-NEXT: testq %rax, %rax 550 ; 551 ; The adjustment must use LEA here (or be moved above the test). 552 ; ENABLE-NEXT: leaq 8(%rsp), %rsp 553 ; 554 ; CHECK-NEXT: je [[CLEANUP]] 555 ; 556 ; CHECK: movb $1, 57(%rax) 557 define void @useLEA(%struct.rtx_def* readonly %x) { 558 entry: 559 %cmp = icmp eq %struct.rtx_def* %x, null 560 br i1 %cmp, label %cleanup, label %if.end 561 562 if.end: ; preds = %entry 563 %tmp = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 0 564 %bf.load = load i32, i32* %tmp, align 8 565 %bf.clear = and i32 %bf.load, 65535 566 %cmp1 = icmp eq i32 %bf.clear, 66 567 br i1 %cmp1, label %lor.lhs.false, label %cleanup 568 569 lor.lhs.false: ; preds = %if.end 570 %arrayidx = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 1, i64 0 571 %rtx = bitcast %union.rtunion_def* %arrayidx to %struct.rtx_def** 572 %tmp1 = load %struct.rtx_def*, %struct.rtx_def** %rtx, align 8 573 %tmp2 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %tmp1, i64 0, i32 0 574 %bf.load2 = load i32, i32* %tmp2, align 8 575 %bf.clear3 = and i32 %bf.load2, 65535 576 switch i32 %bf.clear3, label %if.end.55 [ 577 i32 67, label %cleanup 578 i32 68, label %cleanup 579 i32 54, label %cleanup 580 i32 55, label %cleanup 581 i32 58, label %cleanup 582 i32 134, label %cleanup 583 i32 56, label %cleanup 584 i32 140, label %cleanup 585 ] 586 587 if.end.55: ; preds = %lor.lhs.false 588 %call = tail call fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* %tmp1) #2 589 %cmp59 = icmp eq %struct.temp_slot* %call, null 590 br i1 %cmp59, label %cleanup, label %if.then.60 591 592 if.then.60: ; preds = %if.end.55 593 %addr_taken = getelementptr inbounds %struct.temp_slot, %struct.temp_slot* %call, i64 0, i32 8 594 store i8 1, i8* %addr_taken, align 1 595 br label %cleanup 596 597 cleanup: ; preds = %if.then.60, %if.end.55, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %if.end, %entry 598 ret void 599 } 600 601 ; Make sure we do not insert unreachable code after noreturn function. 602 ; Although this is not incorrect to insert such code, it is useless 603 ; and it hurts the binary size. 604 ; 605 ; CHECK-LABEL: noreturn: 606 ; DISABLE: pushq 607 ; 608 ; CHECK: testb %dil, %dil 609 ; CHECK-NEXT: jne [[ABORT:LBB[0-9_]+]] 610 ; 611 ; CHECK: movl $42, %eax 612 ; 613 ; DISABLE-NEXT: popq 614 ; 615 ; CHECK-NEXT: retq 616 ; 617 ; CHECK: [[ABORT]]: ## %if.abort 618 ; 619 ; ENABLE: pushq 620 ; 621 ; CHECK: callq _abort 622 ; ENABLE-NOT: popq 623 define i32 @noreturn(i8 signext %bad_thing) { 624 entry: 625 %tobool = icmp eq i8 %bad_thing, 0 626 br i1 %tobool, label %if.end, label %if.abort 627 628 if.abort: 629 tail call void @abort() #0 630 unreachable 631 632 if.end: 633 ret i32 42 634 } 635 636 declare void @abort() #0 637 638 attributes #0 = { noreturn nounwind } 639 640 641 ; Make sure that we handle infinite loops properly When checking that the Save 642 ; and Restore blocks are control flow equivalent, the loop searches for the 643 ; immediate (post) dominator for the (restore) save blocks. When either the Save 644 ; or Restore block is located in an infinite loop the only immediate (post) 645 ; dominator is itself. In this case, we cannot perform shrink wrapping, but we 646 ; should return gracefully and continue compilation. 647 ; The only condition for this test is the compilation finishes correctly. 648 ; 649 ; CHECK-LABEL: infiniteloop 650 ; CHECK: retq 651 define void @infiniteloop() { 652 entry: 653 br i1 undef, label %if.then, label %if.end 654 655 if.then: 656 %ptr = alloca i32, i32 4 657 br label %for.body 658 659 for.body: ; preds = %for.body, %entry 660 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 661 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 662 %add = add nsw i32 %call, %sum.03 663 store i32 %add, i32* %ptr 664 br label %for.body 665 666 if.end: 667 ret void 668 } 669 670 ; Another infinite loop test this time with a body bigger than just one block. 671 ; CHECK-LABEL: infiniteloop2 672 ; CHECK: retq 673 define void @infiniteloop2() { 674 entry: 675 br i1 undef, label %if.then, label %if.end 676 677 if.then: 678 %ptr = alloca i32, i32 4 679 br label %for.body 680 681 for.body: ; preds = %for.body, %entry 682 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] 683 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 684 %add = add nsw i32 %call, %sum.03 685 store i32 %add, i32* %ptr 686 br i1 undef, label %body1, label %body2 687 688 body1: 689 tail call void asm sideeffect "nop", "~{ebx}"() 690 br label %for.body 691 692 body2: 693 tail call void asm sideeffect "nop", "~{ebx}"() 694 br label %for.body 695 696 if.end: 697 ret void 698 } 699 700 ; Another infinite loop test this time with two nested infinite loop. 701 ; CHECK-LABEL: infiniteloop3 702 ; CHECK: retq 703 define void @infiniteloop3() { 704 entry: 705 br i1 undef, label %loop2a, label %body 706 707 body: ; preds = %entry 708 br i1 undef, label %loop2a, label %end 709 710 loop1: ; preds = %loop2a, %loop2b 711 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 712 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 713 %0 = icmp eq i32* %var, null 714 %next.load = load i32*, i32** undef 715 br i1 %0, label %loop2a, label %loop2b 716 717 loop2a: ; preds = %loop1, %body, %entry 718 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] 719 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] 720 br label %loop1 721 722 loop2b: ; preds = %loop1 723 %gep1 = bitcast i32* %var.phi to i32* 724 %next.ptr = bitcast i32* %gep1 to i32** 725 store i32* %next.phi, i32** %next.ptr 726 br label %loop1 727 728 end: 729 ret void 730 } 731 732 ; Check that we just don't bail out on RegMask. 733 ; In this case, the RegMask does not touch a CSR so we are good to go! 734 ; CHECK-LABEL: regmask: 735 ; 736 ; Compare the arguments and jump to exit. 737 ; No prologue needed. 738 ; ENABLE: cmpl %esi, %edi 739 ; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 740 ; 741 ; Prologue code. 742 ; (What we push does not matter. It should be some random sratch register.) 743 ; CHECK: pushq 744 ; 745 ; Compare the arguments and jump to exit. 746 ; After the prologue is set. 747 ; DISABLE: cmpl %esi, %edi 748 ; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 749 ; 750 ; CHECK: nop 751 ; Set the first argument to zero. 752 ; CHECK: xorl %edi, %edi 753 ; Set the second argument to addr. 754 ; CHECK-NEXT: movq %rdx, %rsi 755 ; CHECK-NEXT: callq _doSomething 756 ; CHECK-NEXT: popq 757 ; CHECK-NEXT: retq 758 ; 759 ; CHECK: [[EXIT_LABEL]]: 760 ; Set the first argument to 6. 761 ; CHECK-NEXT: movl $6, %edi 762 ; Set the second argument to addr. 763 ; CHECK-NEXT: movq %rdx, %rsi 764 ; 765 ; Without shrink-wrapping, we need to restore the stack before 766 ; making the tail call. 767 ; Epilogue code. 768 ; DISABLE-NEXT: popq 769 ; 770 ; CHECK-NEXT: jmp _doSomething 771 define i32 @regmask(i32 %a, i32 %b, i32* %addr) { 772 %tmp2 = icmp slt i32 %a, %b 773 br i1 %tmp2, label %true, label %false 774 775 true: 776 ; Clobber a CSR so that we check something on the regmask 777 ; of the tail call. 778 tail call void asm sideeffect "nop", "~{ebx}"() 779 %tmp4 = call i32 @doSomething(i32 0, i32* %addr) 780 br label %end 781 782 false: 783 %tmp5 = tail call i32 @doSomething(i32 6, i32* %addr) 784 br label %end 785 786 end: 787 %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ] 788 ret i32 %tmp.0 789 } 790 791 @b = internal unnamed_addr global i1 false 792 @c = internal unnamed_addr global i8 0, align 1 793 @a = common global i32 0, align 4 794 795 ; Make sure the prologue does not clobber the EFLAGS when 796 ; it is live accross. 797 ; PR25629. 798 ; Note: The registers may change in the following patterns, but 799 ; because they imply register hierarchy (e.g., eax, al) this is 800 ; tricky to write robust patterns. 801 ; 802 ; CHECK-LABEL: useLEAForPrologue: 803 ; 804 ; Prologue is at the beginning of the function when shrink-wrapping 805 ; is disabled. 806 ; DISABLE: pushq 807 ; The stack adjustment can use SUB instr because we do not need to 808 ; preserve the EFLAGS at this point. 809 ; DISABLE-NEXT: subq $16, %rsp 810 ; 811 ; Load the value of b. 812 ; Create the zero value for the select assignment. 813 ; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] 814 ; CHECK-NEXT: cmpb $0, _b(%rip) 815 ; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]] 816 ; 817 ; CHECK: movb $48, [[CMOVE_VAL:%al]] 818 ; 819 ; CHECK: [[STOREC_LABEL]]: 820 ; 821 ; ENABLE-NEXT: pushq 822 ; For the stack adjustment, we need to preserve the EFLAGS. 823 ; ENABLE-NEXT: leaq -16(%rsp), %rsp 824 ; 825 ; Technically, we should use CMOVE_VAL here or its subregister. 826 ; CHECK-NEXT: movb %al, _c(%rip) 827 ; testb set the EFLAGS read here. 828 ; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]] 829 ; 830 ; The code of the loop is not interesting. 831 ; [...] 832 ; 833 ; CHECK: [[VARFUNC_CALL]]: 834 ; Set the null parameter. 835 ; CHECK-NEXT: xorl %edi, %edi 836 ; CHECK-NEXT: callq _varfunc 837 ; 838 ; Set the return value. 839 ; CHECK-NEXT: xorl %eax, %eax 840 ; 841 ; Epilogue code. 842 ; CHECK-NEXT: addq $16, %rsp 843 ; CHECK-NEXT: popq 844 ; CHECK-NEXT: retq 845 define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { 846 entry: 847 %tmp = alloca i3 848 %.b = load i1, i1* @b, align 1 849 %bool = select i1 %.b, i8 0, i8 48 850 store i8 %bool, i8* @c, align 1 851 br i1 %.b, label %for.body.lr.ph, label %for.end 852 853 for.body.lr.ph: ; preds = %entry 854 tail call void asm sideeffect "nop", "~{ebx}"() 855 br label %for.body 856 857 for.body: ; preds = %for.body.lr.ph, %for.body 858 %inc6 = phi i8 [ %c, %for.body.lr.ph ], [ %inc, %for.body ] 859 %cond5 = phi i32 [ %a, %for.body.lr.ph ], [ %conv3, %for.body ] 860 %cmp2 = icmp slt i32 %d, %cond5 861 %conv3 = zext i1 %cmp2 to i32 862 %inc = add i8 %inc6, 1 863 %cmp = icmp slt i8 %inc, 45 864 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge 865 866 for.cond.for.end_crit_edge: ; preds = %for.body 867 store i32 %conv3, i32* @a, align 4 868 br label %for.end 869 870 for.end: ; preds = %for.cond.for.end_crit_edge, %entry 871 %call = tail call i32 (i8*) @varfunc(i8* null) 872 ret i32 0 873 } 874 875 declare i32 @varfunc(i8* nocapture readonly) 876 877 @sum1 = external hidden thread_local global i32, align 4 878 879 880 ; Function Attrs: nounwind 881 ; Make sure the TLS call used to access @sum1 happens after the prologue 882 ; and before the epilogue. 883 ; TLS calls used to be wrongly model and shrink-wrapping would have inserted 884 ; the prologue and epilogue just around the call to doSomething. 885 ; PR25820. 886 ; 887 ; CHECK-LABEL: tlsCall: 888 ; CHECK: pushq 889 ; CHECK: testb $1, %dil 890 ; CHECK: je [[ELSE_LABEL:LBB[0-9_]+]] 891 ; 892 ; master bb 893 ; CHECK: movq _sum1@TLVP(%rip), %rdi 894 ; CHECK-NEXT: callq *(%rdi) 895 ; CHECK: jmp [[EXIT_LABEL:LBB[0-9_]+]] 896 ; 897 ; [[ELSE_LABEL]]: 898 ; CHECK: callq _doSomething 899 ; 900 ; [[EXIT_LABEL]]: 901 ; CHECK: popq 902 ; CHECK-NEXT: retq 903 define i32 @tlsCall(i1 %bool1, i32 %arg, i32* readonly dereferenceable(4) %sum1) #3 { 904 entry: 905 br i1 %bool1, label %master, label %else 906 907 master: 908 %tmp1 = load i32, i32* %sum1, align 4 909 store i32 %tmp1, i32* @sum1, align 4 910 br label %exit 911 912 else: 913 %call = call i32 @doSomething(i32 0, i32* null) 914 br label %exit 915 916 exit: 917 %res = phi i32 [ %arg, %master], [ %call, %else ] 918 ret i32 %res 919 } 920 921 attributes #3 = { nounwind } 922 923 @irreducibleCFGa = common global i32 0, align 4 924 @irreducibleCFGf = common global i8 0, align 1 925 @irreducibleCFGb = common global i32 0, align 4 926 927 ; Check that we do not run shrink-wrapping on irreducible CFGs until 928 ; it is actually supported. 929 ; At the moment, on those CFGs the loop information may be incorrect 930 ; and since we use that information to do the placement, we may end up 931 ; inserting the prologue/epilogue at incorrect places. 932 ; PR25988. 933 ; 934 ; CHECK-LABEL: irreducibleCFG: 935 ; CHECK: %entry 936 ; Make sure the prologue happens in the entry block. 937 ; CHECK-NEXT: pushq 938 ; ... 939 ; Make sure the epilogue happens in the exit block. 940 ; CHECK-NOT: popq 941 ; CHECK: popq 942 ; CHECK-NEXT: popq 943 ; CHECK-NEXT: retq 944 ; Make sure we emit missed optimization remarks for this. 945 ; REMARKS: Pass: shrink-wrap 946 ; REMARKS-NEXT: Name: UnsupportedIrreducibleCFG 947 ; REMARKS-NEXT: Function: irreducibleCFG 948 ; REMARKS-NEXT: Args: 949 ; REMARKS-NEXT: - String: Irreducible CFGs are not supported yet 950 951 define i32 @irreducibleCFG() #4 { 952 entry: 953 %i0 = load i32, i32* @irreducibleCFGa, align 4 954 %.pr = load i8, i8* @irreducibleCFGf, align 1 955 %bool = icmp eq i8 %.pr, 0 956 br i1 %bool, label %split, label %preheader 957 958 preheader: 959 br label %preheader 960 961 split: 962 %i1 = load i32, i32* @irreducibleCFGb, align 4 963 %tobool1.i = icmp ne i32 %i1, 0 964 br i1 %tobool1.i, label %for.body4.i, label %for.cond8.i.preheader 965 966 for.body4.i: 967 %call.i = tail call i32 (...) @something(i32 %i0) 968 br label %for.cond8 969 970 for.cond8: 971 %p1 = phi i32 [ %inc18.i, %for.inc ], [ 0, %for.body4.i ] 972 %.pr1.pr = load i32, i32* @irreducibleCFGb, align 4 973 br label %for.cond8.i.preheader 974 975 for.cond8.i.preheader: 976 %.pr1 = phi i32 [ %.pr1.pr, %for.cond8 ], [ %i1, %split ] 977 %p13 = phi i32 [ %p1, %for.cond8 ], [ 0, %split ] 978 br label %for.inc 979 980 fn1.exit: 981 ret i32 0 982 983 for.inc: 984 %inc18.i = add nuw nsw i32 %p13, 1 985 %cmp = icmp slt i32 %inc18.i, 7 986 br i1 %cmp, label %for.cond8, label %fn1.exit 987 } 988 989 attributes #4 = { "no-frame-pointer-elim"="true" } 990 991 @x = external global i32, align 4 992 @y = external global i32, align 4 993 994 ; The post-dominator tree does not include the branch containing the infinite 995 ; loop, which can occur into a misplacement of the restore block, if we're 996 ; looking for the nearest common post-dominator of an "unreachable" block. 997 998 ; CHECK-LABEL: infiniteLoopNoSuccessor: 999 ; CHECK: ## %bb.0: 1000 ; Make sure the prologue happens in the entry block. 1001 ; CHECK-NEXT: pushq %rbp 1002 ; ... 1003 ; Make sure we don't shrink-wrap. 1004 ; CHECK: ## %bb.1 1005 ; CHECK-NOT: pushq %rbp 1006 ; ... 1007 ; Make sure the epilogue happens in the exit block. 1008 ; CHECK: ## %bb.5 1009 ; CHECK: popq %rbp 1010 ; CHECK-NEXT: retq 1011 define void @infiniteLoopNoSuccessor() #5 { 1012 %1 = load i32, i32* @x, align 4 1013 %2 = icmp ne i32 %1, 0 1014 br i1 %2, label %3, label %4 1015 1016 ; <label>:3: 1017 store i32 0, i32* @x, align 4 1018 br label %4 1019 1020 ; <label>:4: 1021 call void (...) @somethingElse() 1022 %5 = load i32, i32* @y, align 4 1023 %6 = icmp ne i32 %5, 0 1024 br i1 %6, label %10, label %7 1025 1026 ; <label>:7: 1027 %8 = call i32 (...) @something() 1028 br label %9 1029 1030 ; <label>:9: 1031 call void (...) @somethingElse() 1032 br label %9 1033 1034 ; <label>:10: 1035 ret void 1036 } 1037 1038 declare void @somethingElse(...) 1039 1040 attributes #5 = { nounwind "no-frame-pointer-elim-non-leaf" } 1041