1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_reg_alloc2.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 39 #include "main_util.h" 40 #include "host_generic_regs.h" 41 42 /* Set to 1 for lots of debugging output. */ 43 #define DEBUG_REGALLOC 0 44 45 46 /* TODO 27 Oct 04: 47 48 Better consistency checking from what isMove tells us. 49 50 We can possibly do V-V coalescing even when the src is spilled, 51 providing we can arrange for the dst to have the same spill slot. 52 53 Note that state[].hreg is the same as the available real regs. 54 55 Generally rationalise data structures. */ 56 57 58 /* Records information on virtual register live ranges. Computed once 59 and remains unchanged after that. */ 60 typedef 61 struct { 62 /* Becomes live for the first time after this insn ... */ 63 Short live_after; 64 /* Becomes dead for the last time before this insn ... */ 65 Short dead_before; 66 /* The "home" spill slot, if needed. Never changes. */ 67 Short spill_offset; 68 Short spill_size; 69 /* What kind of register this is. */ 70 HRegClass reg_class; 71 } 72 VRegLR; 73 74 75 /* Records information on real-register live ranges. Computed once 76 and remains unchanged after that. */ 77 typedef 78 struct { 79 HReg rreg; 80 /* Becomes live after this insn ... */ 81 Short live_after; 82 /* Becomes dead before this insn ... */ 83 Short dead_before; 84 } 85 RRegLR; 86 87 88 /* An array of the following structs (rreg_state) comprises the 89 running state of the allocator. It indicates what the current 90 disposition of each allocatable real register is. The array gets 91 updated as the allocator processes instructions. */ 92 typedef 93 struct { 94 /* ------ FIELDS WHICH DO NOT CHANGE ------ */ 95 /* Which rreg is this for? */ 96 HReg rreg; 97 /* Is this involved in any HLRs? (only an optimisation hint) */ 98 Bool has_hlrs; 99 /* ------ FIELDS WHICH DO CHANGE ------ */ 100 /* 6 May 07: rearranged fields below so the whole struct fits 101 into 16 bytes on both x86 and amd64. */ 102 /* Used when .disp == Bound and we are looking for vregs to 103 spill. */ 104 Bool is_spill_cand; 105 /* Optimisation: used when .disp == Bound. Indicates when the 106 rreg has the same value as the spill slot for the associated 107 vreg. Is safely left at False, and becomes True after a 108 spill store or reload for this rreg. */ 109 Bool eq_spill_slot; 110 /* What's it's current disposition? */ 111 enum { Free, /* available for use */ 112 Unavail, /* in a real-reg live range */ 113 Bound /* in use (holding value of some vreg) */ 114 } 115 disp; 116 /* If .disp == Bound, what vreg is it bound to? */ 117 HReg vreg; 118 } 119 RRegState; 120 121 122 /* The allocator also maintains a redundant array of indexes 123 (vreg_state) from vreg numbers back to entries in rreg_state. It 124 is redundant because iff vreg_state[i] == j then 125 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries 126 point at each other. The purpose of this is to speed up activities 127 which involve looking for a particular vreg: there is no need to 128 scan the rreg_state looking for it, just index directly into 129 vreg_state. The FAQ "does this vreg already have an associated 130 rreg" is the main beneficiary. 131 132 To indicate, in vreg_state[i], that a given vreg is not currently 133 associated with any rreg, that entry can be set to INVALID_RREG_NO. 134 135 Because the vreg_state entries are signed Shorts, the max number 136 of vregs that can be handed by regalloc is 32767. 137 */ 138 139 #define INVALID_RREG_NO ((Short)(-1)) 140 141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) 142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) 143 144 145 /* Does this instruction mention a particular reg? */ 146 static Bool instrMentionsReg ( 147 void (*getRegUsage) (HRegUsage*, HInstr*, Bool), 148 HInstr* instr, 149 HReg r, 150 Bool mode64 151 ) 152 { 153 Int i; 154 HRegUsage reg_usage; 155 (*getRegUsage)(®_usage, instr, mode64); 156 for (i = 0; i < reg_usage.n_used; i++) 157 if (reg_usage.hreg[i] == r) 158 return True; 159 return False; 160 } 161 162 163 /* Search forward from some given point in the incoming instruction 164 sequence. Point is to select a virtual register to spill, by 165 finding the vreg which is mentioned as far ahead as possible, in 166 the hope that this will minimise the number of consequent reloads. 167 168 Only do the search for vregs which are Bound in the running state, 169 and for which the .is_spill_cand field is set. This allows the 170 caller to arbitrarily restrict the set of spill candidates to be 171 considered. 172 173 Returns an index into the state array indicating the (v,r) pair to 174 spill, or -1 if none was found. */ 175 static 176 Int findMostDistantlyMentionedVReg ( 177 void (*getRegUsage) (HRegUsage*, HInstr*, Bool), 178 HInstrArray* instrs_in, 179 Int search_from_instr, 180 RRegState* state, 181 Int n_state, 182 Bool mode64 183 ) 184 { 185 Int k, m; 186 Int furthest_k = -1; 187 Int furthest = -1; 188 vassert(search_from_instr >= 0); 189 for (k = 0; k < n_state; k++) { 190 if (!state[k].is_spill_cand) 191 continue; 192 vassert(state[k].disp == Bound); 193 for (m = search_from_instr; m < instrs_in->arr_used; m++) { 194 if (instrMentionsReg(getRegUsage, 195 instrs_in->arr[m], state[k].vreg, mode64)) 196 break; 197 } 198 if (m > furthest) { 199 furthest = m; 200 furthest_k = k; 201 } 202 } 203 return furthest_k; 204 } 205 206 207 /* Check that this vreg has been assigned a sane spill offset. */ 208 static inline void sanity_check_spill_offset ( VRegLR* vreg ) 209 { 210 if (vreg->reg_class == HRcVec128 || vreg->reg_class == HRcFlt64) { 211 vassert(0 == ((UShort)vreg->spill_offset % 16)); 212 } else { 213 vassert(0 == ((UShort)vreg->spill_offset % 8)); 214 } 215 } 216 217 218 /* Double the size of the real-reg live-range array, if needed. */ 219 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) 220 { 221 Int k; 222 RRegLR* arr2; 223 if (used < *size) return; 224 if (0) 225 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); 226 vassert(used == *size); 227 arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR)); 228 for (k = 0; k < *size; k++) 229 arr2[k] = (*info)[k]; 230 *size *= 2; 231 *info = arr2; 232 } 233 234 235 /* Sort an array of RRegLR entries by either the .live_after or 236 .dead_before fields. This is performance-critical. */ 237 static void sortRRLRarray ( RRegLR* arr, 238 Int size, Bool by_live_after ) 239 { 240 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 241 9841, 29524, 88573, 265720, 242 797161, 2391484 }; 243 Int lo = 0; 244 Int hi = size-1; 245 Int i, j, h, bigN, hp; 246 RRegLR v; 247 248 vassert(size >= 0); 249 if (size == 0) 250 return; 251 252 bigN = hi - lo + 1; if (bigN < 2) return; 253 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; 254 255 if (by_live_after) { 256 257 for ( ; hp >= 0; hp--) { 258 h = incs[hp]; 259 for (i = lo + h; i <= hi; i++) { 260 v = arr[i]; 261 j = i; 262 while (arr[j-h].live_after > v.live_after) { 263 arr[j] = arr[j-h]; 264 j = j - h; 265 if (j <= (lo + h - 1)) break; 266 } 267 arr[j] = v; 268 } 269 } 270 271 } else { 272 273 for ( ; hp >= 0; hp--) { 274 h = incs[hp]; 275 for (i = lo + h; i <= hi; i++) { 276 v = arr[i]; 277 j = i; 278 while (arr[j-h].dead_before > v.dead_before) { 279 arr[j] = arr[j-h]; 280 j = j - h; 281 if (j <= (lo + h - 1)) break; 282 } 283 arr[j] = v; 284 } 285 } 286 287 } 288 } 289 290 291 /* A target-independent register allocator. Requires various 292 functions which it uses to deal abstractly with instructions and 293 registers, since it cannot have any target-specific knowledge. 294 295 Returns a new list of instructions, which, as a result of the 296 behaviour of mapRegs, will be in-place modifications of the 297 original instructions. 298 299 Requires that the incoming code has been generated using 300 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside 301 that range is a checked run-time error. 302 303 Takes an expandable array of pointers to unallocated insns. 304 Returns an expandable array of pointers to allocated insns. 305 */ 306 HInstrArray* doRegisterAllocation ( 307 308 /* Incoming virtual-registerised code. */ 309 HInstrArray* instrs_in, 310 311 /* An array listing all the real registers the allocator may use, 312 in no particular order. */ 313 HReg* available_real_regs, 314 Int n_available_real_regs, 315 316 /* Return True iff the given insn is a reg-reg move, in which 317 case also return the src and dst regs. */ 318 Bool (*isMove) ( HInstr*, HReg*, HReg* ), 319 320 /* Get info about register usage in this insn. */ 321 void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ), 322 323 /* Apply a reg-reg mapping to an insn. */ 324 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), 325 326 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a 327 real reg to a spill slot byte offset. The two leading HInstr** 328 args are out parameters, through which the generated insns are 329 returned. Also (optionally) a 'directReload' function, which 330 attempts to replace a given instruction by one which reads 331 directly from a specified spill slot. May be NULL, in which 332 case the optimisation is not attempted. */ 333 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), 334 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), 335 HInstr* (*directReload) ( HInstr*, HReg, Short ), 336 Int guest_sizeB, 337 338 /* For debug printing only. */ 339 void (*ppInstr) ( HInstr*, Bool ), 340 void (*ppReg) ( HReg ), 341 342 /* 32/64bit mode */ 343 Bool mode64 344 ) 345 { 346 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) 347 348 const Bool eq_spill_opt = True; 349 350 /* Iterators and temporaries. */ 351 Int ii, j, k, m, spillee, k_suboptimal; 352 HReg rreg, vreg, vregS, vregD; 353 HRegUsage reg_usage; 354 355 /* Info on vregs and rregs. Computed once and remains 356 unchanged. */ 357 Int n_vregs; 358 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ 359 360 /* We keep two copies of the real-reg live range info, one sorted 361 by .live_after and the other by .dead_before. First the 362 unsorted info is created in the _la variant is copied into the 363 _db variant. Once that's done both of them are sorted. 364 We also need two integer cursors which record the next 365 location in the two arrays to consider. */ 366 RRegLR* rreg_lrs_la; 367 RRegLR* rreg_lrs_db; 368 Int rreg_lrs_size; 369 Int rreg_lrs_used; 370 Int rreg_lrs_la_next; 371 Int rreg_lrs_db_next; 372 373 /* Used when constructing vreg_lrs (for allocating stack 374 slots). */ 375 Int ss_busy_until_before[N_SPILL64S]; 376 377 /* Used when constructing rreg_lrs. */ 378 Int* rreg_live_after; 379 Int* rreg_dead_before; 380 381 /* Running state of the core allocation algorithm. */ 382 RRegState* rreg_state; /* [0 .. n_rregs-1] */ 383 Int n_rregs; 384 385 /* .. and the redundant backward map */ 386 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. 387 This inplies n_rregs must be <= 32768. */ 388 Short* vreg_state; /* [0 .. n_vregs-1] */ 389 390 /* The vreg -> rreg map constructed and then applied to each 391 instr. */ 392 HRegRemap remap; 393 394 /* The output array of instructions. */ 395 HInstrArray* instrs_out; 396 397 /* Sanity checks are expensive. They are only done periodically, 398 not at each insn processed. */ 399 Bool do_sanity_check; 400 401 vassert(0 == (guest_sizeB % 16)); 402 vassert(0 == (LibVEX_N_SPILL_BYTES % 16)); 403 vassert(0 == (N_SPILL64S % 2)); 404 405 /* The live range numbers are signed shorts, and so limiting the 406 number of insns to 10000 comfortably guards against them 407 overflowing 32k. */ 408 vassert(instrs_in->arr_used <= 10000); 409 410 # define INVALID_INSTRNO (-2) 411 412 # define EMIT_INSTR(_instr) \ 413 do { \ 414 HInstr* _tmp = (_instr); \ 415 if (DEBUG_REGALLOC) { \ 416 vex_printf("** "); \ 417 (*ppInstr)(_tmp, mode64); \ 418 vex_printf("\n\n"); \ 419 } \ 420 addHInstr ( instrs_out, _tmp ); \ 421 } while (0) 422 423 # define PRINT_STATE \ 424 do { \ 425 Int z, q; \ 426 for (z = 0; z < n_rregs; z++) { \ 427 vex_printf(" rreg_state[%2d] = ", z); \ 428 (*ppReg)(rreg_state[z].rreg); \ 429 vex_printf(" \t"); \ 430 switch (rreg_state[z].disp) { \ 431 case Free: vex_printf("Free\n"); break; \ 432 case Unavail: vex_printf("Unavail\n"); break; \ 433 case Bound: vex_printf("BoundTo "); \ 434 (*ppReg)(rreg_state[z].vreg); \ 435 vex_printf("\n"); break; \ 436 } \ 437 } \ 438 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ 439 q = 0; \ 440 for (z = 0; z < n_vregs; z++) { \ 441 if (vreg_state[z] == INVALID_RREG_NO) \ 442 continue; \ 443 vex_printf("[%d] -> %d ", z, vreg_state[z]); \ 444 q++; \ 445 if (q > 0 && (q % 6) == 0) \ 446 vex_printf("\n "); \ 447 } \ 448 vex_printf("\n"); \ 449 } while (0) 450 451 452 /* --------- Stage 0: set up output array --------- */ 453 /* --------- and allocate/initialise running state. --------- */ 454 455 instrs_out = newHInstrArray(); 456 457 /* ... and initialise running state. */ 458 /* n_rregs is no more than a short name for n_available_real_regs. */ 459 n_rregs = n_available_real_regs; 460 n_vregs = instrs_in->n_vregs; 461 462 /* If this is not so, vreg_state entries will overflow. */ 463 vassert(n_vregs < 32767); 464 465 rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState)); 466 vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short)); 467 468 for (j = 0; j < n_rregs; j++) { 469 rreg_state[j].rreg = available_real_regs[j]; 470 rreg_state[j].has_hlrs = False; 471 rreg_state[j].disp = Free; 472 rreg_state[j].vreg = INVALID_HREG; 473 rreg_state[j].is_spill_cand = False; 474 rreg_state[j].eq_spill_slot = False; 475 } 476 477 for (j = 0; j < n_vregs; j++) 478 vreg_state[j] = INVALID_RREG_NO; 479 480 481 /* --------- Stage 1: compute vreg live ranges. --------- */ 482 /* --------- Stage 2: compute rreg live ranges. --------- */ 483 484 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 485 486 /* This is relatively simple, because (1) we only seek the complete 487 end-to-end live range of each vreg, and are not interested in 488 any holes in it, and (2) the vregs are conveniently numbered 0 489 .. n_vregs-1, so we can just dump the results in a 490 pre-allocated array. */ 491 492 vreg_lrs = NULL; 493 if (n_vregs > 0) 494 vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs); 495 496 for (j = 0; j < n_vregs; j++) { 497 vreg_lrs[j].live_after = INVALID_INSTRNO; 498 vreg_lrs[j].dead_before = INVALID_INSTRNO; 499 vreg_lrs[j].spill_offset = 0; 500 vreg_lrs[j].spill_size = 0; 501 vreg_lrs[j].reg_class = HRcINVALID; 502 } 503 504 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 505 506 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 507 508 /* This is more complex than Stage 1, because we need to compute 509 exactly all the live ranges of all the allocatable real regs, 510 and we don't know in advance how many there will be. */ 511 512 rreg_lrs_used = 0; 513 rreg_lrs_size = 4; 514 rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR)); 515 rreg_lrs_db = NULL; /* we'll create this later */ 516 517 /* We'll need to track live range start/end points seperately for 518 each rreg. Sigh. */ 519 vassert(n_available_real_regs > 0); 520 rreg_live_after = LibVEX_Alloc(n_available_real_regs * sizeof(Int)); 521 rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int)); 522 523 for (j = 0; j < n_available_real_regs; j++) { 524 rreg_live_after[j] = 525 rreg_dead_before[j] = INVALID_INSTRNO; 526 } 527 528 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 529 530 /* ------ start of ITERATE OVER INSNS ------ */ 531 532 for (ii = 0; ii < instrs_in->arr_used; ii++) { 533 534 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 535 536 # if 0 537 vex_printf("\n%d stage1: ", ii); 538 (*ppInstr)(instrs_in->arr[ii], mode64); 539 vex_printf("\n"); 540 ppHRegUsage(®_usage); 541 # endif 542 543 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ 544 545 /* for each reg mentioned in the insn ... */ 546 for (j = 0; j < reg_usage.n_used; j++) { 547 548 vreg = reg_usage.hreg[j]; 549 /* only interested in virtual registers right now. */ 550 if (!hregIsVirtual(vreg)) 551 continue; 552 k = hregNumber(vreg); 553 if (k < 0 || k >= n_vregs) { 554 vex_printf("\n"); 555 (*ppInstr)(instrs_in->arr[ii], mode64); 556 vex_printf("\n"); 557 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); 558 vpanic("doRegisterAllocation: out-of-range vreg"); 559 } 560 561 /* Take the opportunity to note its regclass. We'll need 562 that when allocating spill slots. */ 563 if (vreg_lrs[k].reg_class == HRcINVALID) { 564 /* First mention of this vreg. */ 565 vreg_lrs[k].reg_class = hregClass(vreg); 566 } else { 567 /* Seen it before, so check for consistency. */ 568 vassert(vreg_lrs[k].reg_class == hregClass(vreg)); 569 } 570 571 /* Now consider live ranges. */ 572 switch (reg_usage.mode[j]) { 573 case HRmRead: 574 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 575 vex_printf("\n\nOFFENDING VREG = %d\n", k); 576 vpanic("doRegisterAllocation: " 577 "first event for vreg is Read"); 578 } 579 vreg_lrs[k].dead_before = toShort(ii + 1); 580 break; 581 case HRmWrite: 582 if (vreg_lrs[k].live_after == INVALID_INSTRNO) 583 vreg_lrs[k].live_after = toShort(ii); 584 vreg_lrs[k].dead_before = toShort(ii + 1); 585 break; 586 case HRmModify: 587 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 588 vex_printf("\n\nOFFENDING VREG = %d\n", k); 589 vpanic("doRegisterAllocation: " 590 "first event for vreg is Modify"); 591 } 592 vreg_lrs[k].dead_before = toShort(ii + 1); 593 break; 594 default: 595 vpanic("doRegisterAllocation(1)"); 596 } /* switch */ 597 598 } /* iterate over registers */ 599 600 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ 601 602 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ 603 604 /* for each reg mentioned in the insn ... */ 605 for (j = 0; j < reg_usage.n_used; j++) { 606 607 /* Dummy initialisations of flush_la and flush_db to avoid 608 possible bogus uninit-var warnings from gcc. */ 609 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; 610 Bool flush; 611 612 rreg = reg_usage.hreg[j]; 613 614 /* only interested in real registers right now. */ 615 if (hregIsVirtual(rreg)) 616 continue; 617 618 /* Furthermore, we're not interested in this rreg unless it's 619 one of the allocatable ones. For example, it could be a 620 stack pointer register, or some other register beyond our 621 control, in which case we should just ignore it. */ 622 for (k = 0; k < n_available_real_regs; k++) 623 if (available_real_regs[k] == rreg) 624 break; 625 if (k == n_available_real_regs) 626 continue; /* not found -- ignore. */ 627 flush = False; 628 switch (reg_usage.mode[j]) { 629 case HRmWrite: 630 flush_la = rreg_live_after[k]; 631 flush_db = rreg_dead_before[k]; 632 if (flush_la != INVALID_INSTRNO 633 && flush_db != INVALID_INSTRNO) 634 flush = True; 635 rreg_live_after[k] = ii; 636 rreg_dead_before[k] = ii+1; 637 break; 638 case HRmRead: 639 if (rreg_live_after[k] == INVALID_INSTRNO) { 640 vex_printf("\nOFFENDING RREG = "); 641 (*ppReg)(available_real_regs[k]); 642 vex_printf("\n"); 643 vex_printf("\nOFFENDING instr = "); 644 (*ppInstr)(instrs_in->arr[ii], mode64); 645 vex_printf("\n"); 646 vpanic("doRegisterAllocation: " 647 "first event for rreg is Read"); 648 } 649 rreg_dead_before[k] = ii+1; 650 break; 651 case HRmModify: 652 if (rreg_live_after[k] == INVALID_INSTRNO) { 653 vex_printf("\nOFFENDING RREG = "); 654 (*ppReg)(available_real_regs[k]); 655 vex_printf("\n"); 656 vex_printf("\nOFFENDING instr = "); 657 (*ppInstr)(instrs_in->arr[ii], mode64); 658 vex_printf("\n"); 659 vpanic("doRegisterAllocation: " 660 "first event for rreg is Modify"); 661 } 662 rreg_dead_before[k] = ii+1; 663 break; 664 default: 665 vpanic("doRegisterAllocation(2)"); 666 } 667 668 if (flush) { 669 vassert(flush_la != INVALID_INSTRNO); 670 vassert(flush_db != INVALID_INSTRNO); 671 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 672 if (0) 673 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); 674 rreg_lrs_la[rreg_lrs_used].rreg = rreg; 675 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); 676 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); 677 rreg_lrs_used++; 678 } 679 680 } /* iterate over regs in the instr */ 681 682 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ 683 684 } /* iterate over insns */ 685 686 /* ------ end of ITERATE OVER INSNS ------ */ 687 688 /* ------ start of FINALISE RREG LIVE RANGES ------ */ 689 690 /* Now finish up any live ranges left over. */ 691 for (j = 0; j < n_available_real_regs; j++) { 692 693 # if 0 694 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], 695 rreg_dead_before[j]); 696 # endif 697 vassert( (rreg_live_after[j] == INVALID_INSTRNO 698 && rreg_dead_before[j] == INVALID_INSTRNO) 699 || 700 (rreg_live_after[j] != INVALID_INSTRNO 701 && rreg_dead_before[j] != INVALID_INSTRNO) 702 ); 703 704 if (rreg_live_after[j] == INVALID_INSTRNO) 705 continue; 706 707 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 708 if (0) 709 vex_printf("FLUSH 2 (%d,%d)\n", 710 rreg_live_after[j], rreg_dead_before[j]); 711 rreg_lrs_la[rreg_lrs_used].rreg = available_real_regs[j]; 712 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); 713 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); 714 rreg_lrs_used++; 715 } 716 717 /* Compute summary hints for choosing real regs. If a real reg is 718 involved in a hard live range, record that fact in the fixed 719 part of the running rreg_state. Later, when offered a choice between 720 rregs, it's better to choose one which is not marked as having 721 any HLRs, since ones with HLRs may need to be spilled around 722 their HLRs. Correctness of final assignment is unaffected by 723 this mechanism -- it is only an optimisation. */ 724 725 for (j = 0; j < rreg_lrs_used; j++) { 726 rreg = rreg_lrs_la[j].rreg; 727 vassert(!hregIsVirtual(rreg)); 728 /* rreg is involved in a HLR. Record this info in the array, if 729 there is space. */ 730 for (k = 0; k < n_rregs; k++) 731 if (rreg_state[k].rreg == rreg) 732 break; 733 vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */ 734 rreg_state[k].has_hlrs = True; 735 } 736 if (0) { 737 for (j = 0; j < n_rregs; j++) { 738 if (!rreg_state[j].has_hlrs) 739 continue; 740 ppReg(rreg_state[j].rreg); 741 vex_printf(" hinted\n"); 742 } 743 } 744 745 /* Finally, copy the _la variant into the _db variant and 746 sort both by their respective fields. */ 747 rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR)); 748 for (j = 0; j < rreg_lrs_used; j++) 749 rreg_lrs_db[j] = rreg_lrs_la[j]; 750 751 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); 752 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); 753 754 /* And set up the cursors. */ 755 rreg_lrs_la_next = 0; 756 rreg_lrs_db_next = 0; 757 758 for (j = 1; j < rreg_lrs_used; j++) { 759 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); 760 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); 761 } 762 763 /* ------ end of FINALISE RREG LIVE RANGES ------ */ 764 765 # if DEBUG_REGALLOC 766 for (j = 0; j < n_vregs; j++) { 767 vex_printf("vreg %d: la = %d, db = %d\n", 768 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); 769 } 770 # endif 771 772 # if DEBUG_REGALLOC 773 vex_printf("RRegLRs by LA:\n"); 774 for (j = 0; j < rreg_lrs_used; j++) { 775 vex_printf(" "); 776 (*ppReg)(rreg_lrs_la[j].rreg); 777 vex_printf(" la = %d, db = %d\n", 778 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); 779 } 780 vex_printf("RRegLRs by DB:\n"); 781 for (j = 0; j < rreg_lrs_used; j++) { 782 vex_printf(" "); 783 (*ppReg)(rreg_lrs_db[j].rreg); 784 vex_printf(" la = %d, db = %d\n", 785 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); 786 } 787 # endif 788 789 /* --------- Stage 3: allocate spill slots. --------- */ 790 791 /* Each spill slot is 8 bytes long. For vregs which take more than 792 64 bits to spill (classes Flt64 and Vec128), we have to allocate 793 two spill slots. 794 795 For Vec128-class on PowerPC, the spill slot's actual address 796 must be 16-byte aligned. Since the spill slot's address is 797 computed as an offset from the guest state pointer, and since 798 the user of the generated code must set that pointer to a 799 16-aligned value, we have the residual obligation here of 800 choosing a 16-aligned spill slot offset for Vec128-class values. 801 Since each spill slot is 8 bytes long, that means for 802 Vec128-class values we must allocated a spill slot number which 803 is zero mod 2. 804 805 Do a rank-based allocation of vregs to spill slot numbers. We 806 put as few values as possible in spill slots, but nevertheless 807 need to have a spill slot available for all vregs, just in case. 808 */ 809 /* max_ss_no = -1; */ 810 811 for (j = 0; j < N_SPILL64S; j++) 812 ss_busy_until_before[j] = 0; 813 814 for (j = 0; j < n_vregs; j++) { 815 816 /* True iff this vreg is unused. In which case we also expect 817 that the reg_class field for it has not been set. */ 818 if (vreg_lrs[j].live_after == INVALID_INSTRNO) { 819 vassert(vreg_lrs[j].reg_class == HRcINVALID); 820 continue; 821 } 822 823 /* The spill slots are 64 bits in size. As per the comment on 824 definition of HRegClass in host_generic_regs.h, that means, to 825 spill a vreg of class Flt64 or Vec128, we'll need to find two 826 adjacent spill slots to use. Note, this logic needs to kept 827 in sync with the size info on the definition of HRegClass. */ 828 829 if (vreg_lrs[j].reg_class == HRcVec128 830 || vreg_lrs[j].reg_class == HRcFlt64) { 831 832 /* Find two adjacent free slots in which between them provide 833 up to 128 bits in which to spill the vreg. Since we are 834 trying to find an even:odd pair, move along in steps of 2 835 (slots). */ 836 837 for (k = 0; k < N_SPILL64S-1; k += 2) 838 if (ss_busy_until_before[k] <= vreg_lrs[j].live_after 839 && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after) 840 break; 841 if (k >= N_SPILL64S-1) { 842 vpanic("LibVEX_N_SPILL_BYTES is too low. " 843 "Increase and recompile."); 844 } 845 if (0) vex_printf("16-byte spill offset in spill slot %d\n", (Int)k); 846 ss_busy_until_before[k+0] = vreg_lrs[j].dead_before; 847 ss_busy_until_before[k+1] = vreg_lrs[j].dead_before; 848 849 } else { 850 851 /* The ordinary case -- just find a single spill slot. */ 852 853 /* Find the lowest-numbered spill slot which is available at 854 the start point of this interval, and assign the interval 855 to it. */ 856 for (k = 0; k < N_SPILL64S; k++) 857 if (ss_busy_until_before[k] <= vreg_lrs[j].live_after) 858 break; 859 if (k == N_SPILL64S) { 860 vpanic("LibVEX_N_SPILL_BYTES is too low. " 861 "Increase and recompile."); 862 } 863 ss_busy_until_before[k] = vreg_lrs[j].dead_before; 864 865 } 866 867 /* This reflects LibVEX's hard-wired knowledge of the baseBlock 868 layout: the guest state, then two equal sized areas following 869 it for two sets of shadow state, and then the spill area. */ 870 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8); 871 872 /* Independent check that we've made a sane choice of slot */ 873 sanity_check_spill_offset( &vreg_lrs[j] ); 874 /* if (j > max_ss_no) */ 875 /* max_ss_no = j; */ 876 } 877 878 # if 0 879 vex_printf("\n\n"); 880 for (j = 0; j < n_vregs; j++) 881 vex_printf("vreg %d --> spill offset %d\n", 882 j, vreg_lrs[j].spill_offset); 883 # endif 884 885 /* --------- Stage 4: establish rreg preferences --------- */ 886 887 /* It may be advantageous to allocating certain vregs to specific 888 rregs, as a way of avoiding reg-reg moves later. Here we 889 establish which, if any, rreg each vreg would prefer to be in. 890 Note that this constrains the allocator -- ideally we end up 891 with as few as possible vregs expressing a preference. 892 893 This is an optimisation: if the .preferred_rreg field is never 894 set to anything different from INVALID_HREG, the allocator still 895 works. */ 896 897 /* 30 Dec 04: removed this mechanism as it does not seem to 898 help. */ 899 900 /* --------- Stage 5: process instructions --------- */ 901 902 /* This is the main loop of the allocator. First, we need to 903 correctly set up our running state, which tracks the status of 904 each real register. */ 905 906 /* ------ BEGIN: Process each insn in turn. ------ */ 907 908 for (ii = 0; ii < instrs_in->arr_used; ii++) { 909 910 # if DEBUG_REGALLOC 911 vex_printf("\n====----====---- Insn %d ----====----====\n", ii); 912 vex_printf("---- "); 913 (*ppInstr)(instrs_in->arr[ii], mode64); 914 vex_printf("\n\nInitial state:\n"); 915 PRINT_STATE; 916 vex_printf("\n"); 917 # endif 918 919 /* ------------ Sanity checks ------------ */ 920 921 /* Sanity checks are expensive. So they are done only once 922 every 7 instructions, and just before the last 923 instruction. */ 924 do_sanity_check 925 = toBool( 926 False /* Set to True for sanity checking of all insns. */ 927 || ii == instrs_in->arr_used-1 928 || (ii > 0 && (ii % 7) == 0) 929 ); 930 931 if (do_sanity_check) { 932 933 /* Sanity check 1: all rregs with a hard live range crossing 934 this insn must be marked as unavailable in the running 935 state. */ 936 for (j = 0; j < rreg_lrs_used; j++) { 937 if (rreg_lrs_la[j].live_after < ii 938 && ii < rreg_lrs_la[j].dead_before) { 939 /* ii is the middle of a hard live range for some real 940 reg. Check it's marked as such in the running 941 state. */ 942 943 # if 0 944 vex_printf("considering la %d .. db %d reg = ", 945 rreg_lrs[j].live_after, 946 rreg_lrs[j].dead_before); 947 (*ppReg)(rreg_lrs[j].rreg); 948 vex_printf("\n"); 949 # endif 950 951 /* find the state entry for this rreg */ 952 for (k = 0; k < n_rregs; k++) 953 if (rreg_state[k].rreg == rreg_lrs_la[j].rreg) 954 break; 955 956 /* and assert that this rreg is marked as unavailable */ 957 vassert(rreg_state[k].disp == Unavail); 958 } 959 } 960 961 /* Sanity check 2: conversely, all rregs marked as 962 unavailable in the running rreg_state must have a 963 corresponding hard live range entry in the rreg_lrs 964 array. */ 965 for (j = 0; j < n_available_real_regs; j++) { 966 vassert(rreg_state[j].disp == Bound 967 || rreg_state[j].disp == Free 968 || rreg_state[j].disp == Unavail); 969 if (rreg_state[j].disp != Unavail) 970 continue; 971 for (k = 0; k < rreg_lrs_used; k++) 972 if (rreg_lrs_la[k].rreg == rreg_state[j].rreg 973 && rreg_lrs_la[k].live_after < ii 974 && ii < rreg_lrs_la[k].dead_before) 975 break; 976 /* If this vassertion fails, we couldn't find a 977 corresponding HLR. */ 978 vassert(k < rreg_lrs_used); 979 } 980 981 /* Sanity check 3: all vreg-rreg bindings must bind registers 982 of the same class. */ 983 for (j = 0; j < n_rregs; j++) { 984 if (rreg_state[j].disp != Bound) { 985 vassert(rreg_state[j].eq_spill_slot == False); 986 continue; 987 } 988 vassert(hregClass(rreg_state[j].rreg) 989 == hregClass(rreg_state[j].vreg)); 990 vassert( hregIsVirtual(rreg_state[j].vreg)); 991 vassert(!hregIsVirtual(rreg_state[j].rreg)); 992 } 993 994 /* Sanity check 4: the vreg_state and rreg_state 995 mutually-redundant mappings are consistent. If 996 rreg_state[j].vreg points at some vreg_state entry then 997 that vreg_state entry should point back at 998 rreg_state[j]. */ 999 for (j = 0; j < n_rregs; j++) { 1000 if (rreg_state[j].disp != Bound) 1001 continue; 1002 k = hregNumber(rreg_state[j].vreg); 1003 vassert(IS_VALID_VREGNO(k)); 1004 vassert(vreg_state[k] == j); 1005 } 1006 for (j = 0; j < n_vregs; j++) { 1007 k = vreg_state[j]; 1008 if (k == INVALID_RREG_NO) 1009 continue; 1010 vassert(IS_VALID_RREGNO(k)); 1011 vassert(rreg_state[k].disp == Bound); 1012 vassert(hregNumber(rreg_state[k].vreg) == j); 1013 } 1014 1015 } /* if (do_sanity_check) */ 1016 1017 /* ------------ end of Sanity checks ------------ */ 1018 1019 /* Do various optimisations pertaining to register coalescing 1020 and preferencing: 1021 MOV v <-> v coalescing (done here). 1022 MOV v <-> r coalescing (not yet, if ever) 1023 */ 1024 /* If doing a reg-reg move between two vregs, and the src's live 1025 range ends here and the dst's live range starts here, bind 1026 the dst to the src's rreg, and that's all. */ 1027 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { 1028 if (!hregIsVirtual(vregS)) goto cannot_coalesce; 1029 if (!hregIsVirtual(vregD)) goto cannot_coalesce; 1030 /* Check that *isMove is not telling us a bunch of lies ... */ 1031 vassert(hregClass(vregS) == hregClass(vregD)); 1032 k = hregNumber(vregS); 1033 m = hregNumber(vregD); 1034 vassert(IS_VALID_VREGNO(k)); 1035 vassert(IS_VALID_VREGNO(m)); 1036 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; 1037 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; 1038 # if DEBUG_REGALLOC 1039 vex_printf("COALESCE "); 1040 (*ppReg)(vregS); 1041 vex_printf(" -> "); 1042 (*ppReg)(vregD); 1043 vex_printf("\n\n"); 1044 # endif 1045 /* Find the state entry for vregS. */ 1046 for (m = 0; m < n_rregs; m++) 1047 if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS) 1048 break; 1049 if (m == n_rregs) 1050 /* We failed to find a binding for vregS, which means it's 1051 currently not in a register. So we can't do the 1052 coalescing. Give up. */ 1053 goto cannot_coalesce; 1054 1055 /* Finally, we can do the coalescing. It's trivial -- merely 1056 claim vregS's register for vregD. */ 1057 rreg_state[m].vreg = vregD; 1058 vassert(IS_VALID_VREGNO(hregNumber(vregD))); 1059 vassert(IS_VALID_VREGNO(hregNumber(vregS))); 1060 vreg_state[hregNumber(vregD)] = toShort(m); 1061 vreg_state[hregNumber(vregS)] = INVALID_RREG_NO; 1062 1063 /* This rreg has become associated with a different vreg and 1064 hence with a different spill slot. Play safe. */ 1065 rreg_state[m].eq_spill_slot = False; 1066 1067 /* Move on to the next insn. We skip the post-insn stuff for 1068 fixed registers, since this move should not interact with 1069 them in any way. */ 1070 continue; 1071 } 1072 cannot_coalesce: 1073 1074 /* ------ Free up rregs bound to dead vregs ------ */ 1075 1076 /* Look for vregs whose live range has just ended, and 1077 mark the associated rreg as free. */ 1078 1079 for (j = 0; j < n_rregs; j++) { 1080 if (rreg_state[j].disp != Bound) 1081 continue; 1082 vreg = hregNumber(rreg_state[j].vreg); 1083 vassert(IS_VALID_VREGNO(vreg)); 1084 if (vreg_lrs[vreg].dead_before <= ii) { 1085 rreg_state[j].disp = Free; 1086 rreg_state[j].eq_spill_slot = False; 1087 m = hregNumber(rreg_state[j].vreg); 1088 vassert(IS_VALID_VREGNO(m)); 1089 vreg_state[m] = INVALID_RREG_NO; 1090 if (DEBUG_REGALLOC) { 1091 vex_printf("free up "); 1092 (*ppReg)(rreg_state[j].rreg); 1093 vex_printf("\n"); 1094 } 1095 } 1096 } 1097 1098 /* ------ Pre-instruction actions for fixed rreg uses ------ */ 1099 1100 /* Now we have to deal with rregs which are about to be made 1101 live by this instruction -- in other words, are entering into 1102 one of their live ranges. If any such rreg holds a vreg, we 1103 will have to free up the rreg. The simplest solution which 1104 is correct is to spill the rreg. 1105 1106 Note we could do better: 1107 * Could move it into some other free rreg, if one is available 1108 1109 Do this efficiently, by incrementally stepping along an array 1110 of rreg HLRs that are known to be sorted by start point 1111 (their .live_after field). 1112 */ 1113 while (True) { 1114 vassert(rreg_lrs_la_next >= 0); 1115 vassert(rreg_lrs_la_next <= rreg_lrs_used); 1116 if (rreg_lrs_la_next == rreg_lrs_used) 1117 break; /* no more real reg live ranges to consider */ 1118 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) 1119 break; /* next live range does not yet start */ 1120 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); 1121 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. 1122 Find the associated rreg_state entry. */ 1123 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. 1124 Real register live ranges are guaranteed to be well-formed 1125 in that they start with a write to the register -- Stage 2 1126 rejects any code not satisfying this. So the correct 1127 question to ask is whether 1128 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, 1129 whether the reg becomes live after this insn -- rather 1130 than before it. */ 1131 # if DEBUG_REGALLOC 1132 vex_printf("need to free up rreg: "); 1133 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); 1134 vex_printf("\n\n"); 1135 # endif 1136 for (k = 0; k < n_rregs; k++) 1137 if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg) 1138 break; 1139 /* If this fails, we don't have an entry for this rreg. 1140 Which we should. */ 1141 vassert(IS_VALID_RREGNO(k)); 1142 m = hregNumber(rreg_state[k].vreg); 1143 if (rreg_state[k].disp == Bound) { 1144 /* Yes, there is an associated vreg. Spill it if it's 1145 still live. */ 1146 vassert(IS_VALID_VREGNO(m)); 1147 vreg_state[m] = INVALID_RREG_NO; 1148 if (vreg_lrs[m].dead_before > ii) { 1149 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1150 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { 1151 HInstr* spill1 = NULL; 1152 HInstr* spill2 = NULL; 1153 (*genSpill)( &spill1, &spill2, rreg_state[k].rreg, 1154 vreg_lrs[m].spill_offset, mode64 ); 1155 vassert(spill1 || spill2); /* can't both be NULL */ 1156 if (spill1) 1157 EMIT_INSTR(spill1); 1158 if (spill2) 1159 EMIT_INSTR(spill2); 1160 } 1161 rreg_state[k].eq_spill_slot = True; 1162 } 1163 } 1164 rreg_state[k].disp = Unavail; 1165 rreg_state[k].vreg = INVALID_HREG; 1166 rreg_state[k].eq_spill_slot = False; 1167 1168 /* check for further rregs entering HLRs at this point */ 1169 rreg_lrs_la_next++; 1170 } 1171 1172 1173 # if DEBUG_REGALLOC 1174 vex_printf("After pre-insn actions for fixed regs:\n"); 1175 PRINT_STATE; 1176 vex_printf("\n"); 1177 # endif 1178 1179 1180 /* ------ Deal with the current instruction. ------ */ 1181 1182 /* Finally we can begin the processing of this instruction 1183 itself. The aim is to free up enough rregs for this insn. 1184 This may generate spill stores since we may have to evict 1185 some vregs currently in rregs. Also generates spill loads. 1186 We also build up the final vreg->rreg mapping to be applied 1187 to the insn. */ 1188 1189 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 1190 1191 initHRegRemap(&remap); 1192 1193 /* ------------ BEGIN directReload optimisation ----------- */ 1194 1195 /* If the instruction reads exactly one vreg which is currently 1196 in a spill slot, and this is last use of that vreg, see if we 1197 can convert the instruction into one reads directly from the 1198 spill slot. This is clearly only possible for x86 and amd64 1199 targets, since ppc and arm load-store architectures. If 1200 successful, replace instrs_in->arr[ii] with this new 1201 instruction, and recompute its reg usage, so that the change 1202 is invisible to the standard-case handling that follows. */ 1203 1204 if (directReload && reg_usage.n_used <= 2) { 1205 Bool debug_direct_reload = True && False; 1206 HReg cand = INVALID_HREG; 1207 Bool nreads = 0; 1208 Short spilloff = 0; 1209 1210 for (j = 0; j < reg_usage.n_used; j++) { 1211 1212 vreg = reg_usage.hreg[j]; 1213 1214 if (!hregIsVirtual(vreg)) 1215 continue; 1216 1217 if (reg_usage.mode[j] == HRmRead) { 1218 nreads++; 1219 m = hregNumber(vreg); 1220 vassert(IS_VALID_VREGNO(m)); 1221 k = vreg_state[m]; 1222 if (!IS_VALID_RREGNO(k)) { 1223 /* ok, it is spilled. Now, is this its last use? */ 1224 vassert(vreg_lrs[m].dead_before >= ii+1); 1225 if (vreg_lrs[m].dead_before == ii+1 1226 && cand == INVALID_HREG) { 1227 spilloff = vreg_lrs[m].spill_offset; 1228 cand = vreg; 1229 } 1230 } 1231 } 1232 } 1233 1234 if (nreads == 1 && cand != INVALID_HREG) { 1235 HInstr* reloaded; 1236 if (reg_usage.n_used == 2) 1237 vassert(reg_usage.hreg[0] != reg_usage.hreg[1]); 1238 1239 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); 1240 if (debug_direct_reload && !reloaded) { 1241 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); 1242 ppInstr(instrs_in->arr[ii], mode64); 1243 } 1244 if (reloaded) { 1245 /* Update info about the insn, so it looks as if it had 1246 been in this form all along. */ 1247 instrs_in->arr[ii] = reloaded; 1248 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 1249 if (debug_direct_reload && !reloaded) { 1250 vex_printf(" --> "); 1251 ppInstr(reloaded, mode64); 1252 } 1253 } 1254 1255 if (debug_direct_reload && !reloaded) 1256 vex_printf("\n"); 1257 } 1258 1259 } 1260 1261 /* ------------ END directReload optimisation ------------ */ 1262 1263 /* for each reg mentioned in the insn ... */ 1264 for (j = 0; j < reg_usage.n_used; j++) { 1265 1266 vreg = reg_usage.hreg[j]; 1267 1268 /* only interested in virtual registers right now. */ 1269 if (!hregIsVirtual(vreg)) 1270 continue; 1271 1272 # if 0 1273 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); 1274 # endif 1275 1276 /* Now we're trying to find a rreg for "vreg". First of all, 1277 if it already has an rreg assigned, we don't need to do 1278 anything more. Search the current state to find out. */ 1279 m = hregNumber(vreg); 1280 vassert(IS_VALID_VREGNO(m)); 1281 k = vreg_state[m]; 1282 if (IS_VALID_RREGNO(k)) { 1283 vassert(rreg_state[k].disp == Bound); 1284 addToHRegRemap(&remap, vreg, rreg_state[k].rreg); 1285 /* If this rreg is written or modified, mark it as different 1286 from any spill slot value. */ 1287 if (reg_usage.mode[j] != HRmRead) 1288 rreg_state[k].eq_spill_slot = False; 1289 continue; 1290 } else { 1291 vassert(k == INVALID_RREG_NO); 1292 } 1293 1294 /* No luck. The next thing to do is see if there is a 1295 currently free rreg available, of the correct class. If 1296 so, bag it. NOTE, we could improve this by selecting an 1297 rreg for which the next live-range event is as far ahead 1298 as possible. */ 1299 k_suboptimal = -1; 1300 for (k = 0; k < n_rregs; k++) { 1301 if (rreg_state[k].disp != Free 1302 || hregClass(rreg_state[k].rreg) != hregClass(vreg)) 1303 continue; 1304 if (rreg_state[k].has_hlrs) { 1305 /* Well, at least we can use k_suboptimal if we really 1306 have to. Keep on looking for a better candidate. */ 1307 k_suboptimal = k; 1308 } else { 1309 /* Found a preferable reg. Use it. */ 1310 k_suboptimal = -1; 1311 break; 1312 } 1313 } 1314 if (k_suboptimal >= 0) 1315 k = k_suboptimal; 1316 1317 if (k < n_rregs) { 1318 rreg_state[k].disp = Bound; 1319 rreg_state[k].vreg = vreg; 1320 m = hregNumber(vreg); 1321 vassert(IS_VALID_VREGNO(m)); 1322 vreg_state[m] = toShort(k); 1323 addToHRegRemap(&remap, vreg, rreg_state[k].rreg); 1324 /* Generate a reload if needed. This only creates needed 1325 reloads because the live range builder for vregs will 1326 guarantee that the first event for a vreg is a write. 1327 Hence, if this reference is not a write, it cannot be 1328 the first reference for this vreg, and so a reload is 1329 indeed needed. */ 1330 if (reg_usage.mode[j] != HRmWrite) { 1331 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1332 HInstr* reload1 = NULL; 1333 HInstr* reload2 = NULL; 1334 (*genReload)( &reload1, &reload2, rreg_state[k].rreg, 1335 vreg_lrs[m].spill_offset, mode64 ); 1336 vassert(reload1 || reload2); /* can't both be NULL */ 1337 if (reload1) 1338 EMIT_INSTR(reload1); 1339 if (reload2) 1340 EMIT_INSTR(reload2); 1341 /* This rreg is read or modified by the instruction. 1342 If it's merely read we can claim it now equals the 1343 spill slot, but not so if it is modified. */ 1344 if (reg_usage.mode[j] == HRmRead) { 1345 rreg_state[k].eq_spill_slot = True; 1346 } else { 1347 vassert(reg_usage.mode[j] == HRmModify); 1348 rreg_state[k].eq_spill_slot = False; 1349 } 1350 } else { 1351 rreg_state[k].eq_spill_slot = False; 1352 } 1353 1354 continue; 1355 } 1356 1357 /* Well, now we have no option but to spill a vreg. It's 1358 important to make a good choice of vreg to spill, and of 1359 course we need to be careful not to spill a vreg which is 1360 needed by this insn. */ 1361 1362 /* First, mark in the rreg_state, those rregs which are not spill 1363 candidates, due to holding a vreg mentioned by this 1364 instruction. Or being of the wrong class. */ 1365 for (k = 0; k < n_rregs; k++) { 1366 rreg_state[k].is_spill_cand = False; 1367 if (rreg_state[k].disp != Bound) 1368 continue; 1369 if (hregClass(rreg_state[k].rreg) != hregClass(vreg)) 1370 continue; 1371 rreg_state[k].is_spill_cand = True; 1372 for (m = 0; m < reg_usage.n_used; m++) { 1373 if (rreg_state[k].vreg == reg_usage.hreg[m]) { 1374 rreg_state[k].is_spill_cand = False; 1375 break; 1376 } 1377 } 1378 } 1379 1380 /* We can choose to spill any rreg satisfying 1381 rreg_state[r].is_spill_cand (so to speak). Choose r so that 1382 the next use of its associated vreg is as far ahead as 1383 possible, in the hope that this will minimise the number 1384 of consequent reloads required. */ 1385 spillee 1386 = findMostDistantlyMentionedVReg ( 1387 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 ); 1388 1389 if (spillee == -1) { 1390 /* Hmmmmm. There don't appear to be any spill candidates. 1391 We're hosed. */ 1392 vex_printf("reg_alloc: can't find a register in class: "); 1393 ppHRegClass(hregClass(vreg)); 1394 vex_printf("\n"); 1395 vpanic("reg_alloc: can't create a free register."); 1396 } 1397 1398 /* Right. So we're going to spill rreg_state[spillee]. */ 1399 vassert(IS_VALID_RREGNO(spillee)); 1400 vassert(rreg_state[spillee].disp == Bound); 1401 /* check it's the right class */ 1402 vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg)); 1403 /* check we're not ejecting the vreg for which we are trying 1404 to free up a register. */ 1405 vassert(rreg_state[spillee].vreg != vreg); 1406 1407 m = hregNumber(rreg_state[spillee].vreg); 1408 vassert(IS_VALID_VREGNO(m)); 1409 1410 /* So here's the spill store. Assert that we're spilling a 1411 live vreg. */ 1412 vassert(vreg_lrs[m].dead_before > ii); 1413 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1414 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { 1415 HInstr* spill1 = NULL; 1416 HInstr* spill2 = NULL; 1417 (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg, 1418 vreg_lrs[m].spill_offset, mode64 ); 1419 vassert(spill1 || spill2); /* can't both be NULL */ 1420 if (spill1) 1421 EMIT_INSTR(spill1); 1422 if (spill2) 1423 EMIT_INSTR(spill2); 1424 } 1425 1426 /* Update the rreg_state to reflect the new assignment for this 1427 rreg. */ 1428 rreg_state[spillee].vreg = vreg; 1429 vreg_state[m] = INVALID_RREG_NO; 1430 1431 rreg_state[spillee].eq_spill_slot = False; /* be safe */ 1432 1433 m = hregNumber(vreg); 1434 vassert(IS_VALID_VREGNO(m)); 1435 vreg_state[m] = toShort(spillee); 1436 1437 /* Now, if this vreg is being read or modified (as opposed to 1438 written), we have to generate a reload for it. */ 1439 if (reg_usage.mode[j] != HRmWrite) { 1440 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1441 HInstr* reload1 = NULL; 1442 HInstr* reload2 = NULL; 1443 (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg, 1444 vreg_lrs[m].spill_offset, mode64 ); 1445 vassert(reload1 || reload2); /* can't both be NULL */ 1446 if (reload1) 1447 EMIT_INSTR(reload1); 1448 if (reload2) 1449 EMIT_INSTR(reload2); 1450 /* This rreg is read or modified by the instruction. 1451 If it's merely read we can claim it now equals the 1452 spill slot, but not so if it is modified. */ 1453 if (reg_usage.mode[j] == HRmRead) { 1454 rreg_state[spillee].eq_spill_slot = True; 1455 } else { 1456 vassert(reg_usage.mode[j] == HRmModify); 1457 rreg_state[spillee].eq_spill_slot = False; 1458 } 1459 } 1460 1461 /* So after much twisting and turning, we have vreg mapped to 1462 rreg_state[spillee].rreg. Note that in the map. */ 1463 addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg); 1464 1465 } /* iterate over registers in this instruction. */ 1466 1467 /* We've finished clowning around with registers in this instruction. 1468 Three results: 1469 - the running rreg_state[] has been updated 1470 - a suitable vreg->rreg mapping for this instruction has been 1471 constructed 1472 - spill and reload instructions may have been emitted. 1473 1474 The final step is to apply the mapping to the instruction, 1475 and emit that. 1476 */ 1477 1478 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ 1479 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); 1480 EMIT_INSTR( instrs_in->arr[ii] ); 1481 1482 # if DEBUG_REGALLOC 1483 vex_printf("After dealing with current insn:\n"); 1484 PRINT_STATE; 1485 vex_printf("\n"); 1486 # endif 1487 1488 /* ------ Post-instruction actions for fixed rreg uses ------ */ 1489 1490 /* Now we need to check for rregs exiting fixed live ranges 1491 after this instruction, and if so mark them as free. */ 1492 while (True) { 1493 vassert(rreg_lrs_db_next >= 0); 1494 vassert(rreg_lrs_db_next <= rreg_lrs_used); 1495 if (rreg_lrs_db_next == rreg_lrs_used) 1496 break; /* no more real reg live ranges to consider */ 1497 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) 1498 break; /* next live range does not yet start */ 1499 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); 1500 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live 1501 range. Mark it as such in the main rreg_state array. */ 1502 for (k = 0; k < n_rregs; k++) 1503 if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg) 1504 break; 1505 /* If this vassertion fails, we don't have an entry for 1506 this rreg. Which we should. */ 1507 vassert(k < n_rregs); 1508 vassert(rreg_state[k].disp == Unavail); 1509 rreg_state[k].disp = Free; 1510 rreg_state[k].vreg = INVALID_HREG; 1511 rreg_state[k].eq_spill_slot = False; 1512 1513 /* check for further rregs leaving HLRs at this point */ 1514 rreg_lrs_db_next++; 1515 } 1516 1517 # if DEBUG_REGALLOC 1518 vex_printf("After post-insn actions for fixed regs:\n"); 1519 PRINT_STATE; 1520 vex_printf("\n"); 1521 # endif 1522 1523 } /* iterate over insns */ 1524 1525 /* ------ END: Process each insn in turn. ------ */ 1526 1527 /* free(rreg_state); */ 1528 /* free(rreg_lrs); */ 1529 /* if (vreg_lrs) free(vreg_lrs); */ 1530 1531 /* Paranoia */ 1532 for (j = 0; j < n_rregs; j++) 1533 vassert(rreg_state[j].rreg == available_real_regs[j]); 1534 1535 vassert(rreg_lrs_la_next == rreg_lrs_used); 1536 vassert(rreg_lrs_db_next == rreg_lrs_used); 1537 1538 return instrs_out; 1539 1540 # undef INVALID_INSTRNO 1541 # undef EMIT_INSTR 1542 # undef PRINT_STATE 1543 } 1544 1545 1546 1547 /*---------------------------------------------------------------*/ 1548 /*--- host_reg_alloc2.c ---*/ 1549 /*---------------------------------------------------------------*/ 1550