1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_reg_alloc2.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 39 #include "main_util.h" 40 #include "host_generic_regs.h" 41 42 /* Set to 1 for lots of debugging output. */ 43 #define DEBUG_REGALLOC 0 44 45 46 /* TODO 27 Oct 04: 47 48 Better consistency checking from what isMove tells us. 49 50 We can possibly do V-V coalescing even when the src is spilled, 51 providing we can arrange for the dst to have the same spill slot. 52 53 Note that state[].hreg is the same as the available real regs. 54 55 Generally rationalise data structures. */ 56 57 58 /* Records information on virtual register live ranges. Computed once 59 and remains unchanged after that. */ 60 typedef 61 struct { 62 /* Becomes live for the first time after this insn ... */ 63 Short live_after; 64 /* Becomes dead for the last time before this insn ... */ 65 Short dead_before; 66 /* The "home" spill slot, if needed. Never changes. */ 67 Short spill_offset; 68 Short spill_size; 69 /* What kind of register this is. */ 70 HRegClass reg_class; 71 } 72 VRegLR; 73 74 75 /* Records information on real-register live ranges. Computed once 76 and remains unchanged after that. */ 77 typedef 78 struct { 79 HReg rreg; 80 /* Becomes live after this insn ... */ 81 Short live_after; 82 /* Becomes dead before this insn ... */ 83 Short dead_before; 84 } 85 RRegLR; 86 87 88 /* An array of the following structs (rreg_state) comprises the 89 running state of the allocator. It indicates what the current 90 disposition of each allocatable real register is. The array gets 91 updated as the allocator processes instructions. */ 92 typedef 93 struct { 94 /* ------ FIELDS WHICH DO NOT CHANGE ------ */ 95 /* Which rreg is this for? */ 96 HReg rreg; 97 /* Is this involved in any HLRs? (only an optimisation hint) */ 98 Bool has_hlrs; 99 /* ------ FIELDS WHICH DO CHANGE ------ */ 100 /* 6 May 07: rearranged fields below so the whole struct fits 101 into 16 bytes on both x86 and amd64. */ 102 /* Used when .disp == Bound and we are looking for vregs to 103 spill. */ 104 Bool is_spill_cand; 105 /* Optimisation: used when .disp == Bound. Indicates when the 106 rreg has the same value as the spill slot for the associated 107 vreg. Is safely left at False, and becomes True after a 108 spill store or reload for this rreg. */ 109 Bool eq_spill_slot; 110 /* What's it's current disposition? */ 111 enum { Free, /* available for use */ 112 Unavail, /* in a real-reg live range */ 113 Bound /* in use (holding value of some vreg) */ 114 } 115 disp; 116 /* If .disp == Bound, what vreg is it bound to? */ 117 HReg vreg; 118 } 119 RRegState; 120 121 122 /* The allocator also maintains a redundant array of indexes 123 (vreg_state) from vreg numbers back to entries in rreg_state. It 124 is redundant because iff vreg_state[i] == j then 125 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries 126 point at each other. The purpose of this is to speed up activities 127 which involve looking for a particular vreg: there is no need to 128 scan the rreg_state looking for it, just index directly into 129 vreg_state. The FAQ "does this vreg already have an associated 130 rreg" is the main beneficiary. 131 132 To indicate, in vreg_state[i], that a given vreg is not currently 133 associated with any rreg, that entry can be set to INVALID_RREG_NO. 134 135 Because the vreg_state entries are signed Shorts, the max number 136 of vregs that can be handed by regalloc is 32767. 137 */ 138 139 #define INVALID_RREG_NO ((Short)(-1)) 140 141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) 142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) 143 144 145 /* Does this instruction mention a particular reg? */ 146 static Bool instrMentionsReg ( 147 void (*getRegUsage) (HRegUsage*, HInstr*, Bool), 148 HInstr* instr, 149 HReg r, 150 Bool mode64 151 ) 152 { 153 Int i; 154 HRegUsage reg_usage; 155 (*getRegUsage)(®_usage, instr, mode64); 156 for (i = 0; i < reg_usage.n_used; i++) 157 if (sameHReg(reg_usage.hreg[i], r)) 158 return True; 159 return False; 160 } 161 162 163 /* Search forward from some given point in the incoming instruction 164 sequence. Point is to select a virtual register to spill, by 165 finding the vreg which is mentioned as far ahead as possible, in 166 the hope that this will minimise the number of consequent reloads. 167 168 Only do the search for vregs which are Bound in the running state, 169 and for which the .is_spill_cand field is set. This allows the 170 caller to arbitrarily restrict the set of spill candidates to be 171 considered. 172 173 Returns an index into the state array indicating the (v,r) pair to 174 spill, or -1 if none was found. */ 175 static 176 Int findMostDistantlyMentionedVReg ( 177 void (*getRegUsage) (HRegUsage*, HInstr*, Bool), 178 HInstrArray* instrs_in, 179 Int search_from_instr, 180 RRegState* state, 181 Int n_state, 182 Bool mode64 183 ) 184 { 185 Int k, m; 186 Int furthest_k = -1; 187 Int furthest = -1; 188 vassert(search_from_instr >= 0); 189 for (k = 0; k < n_state; k++) { 190 if (!state[k].is_spill_cand) 191 continue; 192 vassert(state[k].disp == Bound); 193 for (m = search_from_instr; m < instrs_in->arr_used; m++) { 194 if (instrMentionsReg(getRegUsage, 195 instrs_in->arr[m], state[k].vreg, mode64)) 196 break; 197 } 198 if (m > furthest) { 199 furthest = m; 200 furthest_k = k; 201 } 202 } 203 return furthest_k; 204 } 205 206 207 /* Check that this vreg has been assigned a sane spill offset. */ 208 static inline void sanity_check_spill_offset ( VRegLR* vreg ) 209 { 210 switch (vreg->reg_class) { 211 case HRcVec128: case HRcFlt64: 212 vassert(0 == ((UShort)vreg->spill_offset % 16)); break; 213 default: 214 vassert(0 == ((UShort)vreg->spill_offset % 8)); break; 215 } 216 } 217 218 219 /* Double the size of the real-reg live-range array, if needed. */ 220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) 221 { 222 Int k; 223 RRegLR* arr2; 224 if (used < *size) return; 225 if (0) 226 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); 227 vassert(used == *size); 228 arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR)); 229 for (k = 0; k < *size; k++) 230 arr2[k] = (*info)[k]; 231 *size *= 2; 232 *info = arr2; 233 } 234 235 236 /* Sort an array of RRegLR entries by either the .live_after or 237 .dead_before fields. This is performance-critical. */ 238 static void sortRRLRarray ( RRegLR* arr, 239 Int size, Bool by_live_after ) 240 { 241 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 242 9841, 29524, 88573, 265720, 243 797161, 2391484 }; 244 Int lo = 0; 245 Int hi = size-1; 246 Int i, j, h, bigN, hp; 247 RRegLR v; 248 249 vassert(size >= 0); 250 if (size == 0) 251 return; 252 253 bigN = hi - lo + 1; if (bigN < 2) return; 254 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; 255 256 if (by_live_after) { 257 258 for ( ; hp >= 0; hp--) { 259 h = incs[hp]; 260 for (i = lo + h; i <= hi; i++) { 261 v = arr[i]; 262 j = i; 263 while (arr[j-h].live_after > v.live_after) { 264 arr[j] = arr[j-h]; 265 j = j - h; 266 if (j <= (lo + h - 1)) break; 267 } 268 arr[j] = v; 269 } 270 } 271 272 } else { 273 274 for ( ; hp >= 0; hp--) { 275 h = incs[hp]; 276 for (i = lo + h; i <= hi; i++) { 277 v = arr[i]; 278 j = i; 279 while (arr[j-h].dead_before > v.dead_before) { 280 arr[j] = arr[j-h]; 281 j = j - h; 282 if (j <= (lo + h - 1)) break; 283 } 284 arr[j] = v; 285 } 286 } 287 288 } 289 } 290 291 292 /* A target-independent register allocator. Requires various 293 functions which it uses to deal abstractly with instructions and 294 registers, since it cannot have any target-specific knowledge. 295 296 Returns a new list of instructions, which, as a result of the 297 behaviour of mapRegs, will be in-place modifications of the 298 original instructions. 299 300 Requires that the incoming code has been generated using 301 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside 302 that range is a checked run-time error. 303 304 Takes an expandable array of pointers to unallocated insns. 305 Returns an expandable array of pointers to allocated insns. 306 */ 307 HInstrArray* doRegisterAllocation ( 308 309 /* Incoming virtual-registerised code. */ 310 HInstrArray* instrs_in, 311 312 /* An array listing all the real registers the allocator may use, 313 in no particular order. */ 314 HReg* available_real_regs, 315 Int n_available_real_regs, 316 317 /* Return True iff the given insn is a reg-reg move, in which 318 case also return the src and dst regs. */ 319 Bool (*isMove) ( HInstr*, HReg*, HReg* ), 320 321 /* Get info about register usage in this insn. */ 322 void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ), 323 324 /* Apply a reg-reg mapping to an insn. */ 325 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), 326 327 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a 328 real reg to a spill slot byte offset. The two leading HInstr** 329 args are out parameters, through which the generated insns are 330 returned. Also (optionally) a 'directReload' function, which 331 attempts to replace a given instruction by one which reads 332 directly from a specified spill slot. May be NULL, in which 333 case the optimisation is not attempted. */ 334 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), 335 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), 336 HInstr* (*directReload) ( HInstr*, HReg, Short ), 337 Int guest_sizeB, 338 339 /* For debug printing only. */ 340 void (*ppInstr) ( HInstr*, Bool ), 341 void (*ppReg) ( HReg ), 342 343 /* 32/64bit mode */ 344 Bool mode64 345 ) 346 { 347 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) 348 349 const Bool eq_spill_opt = True; 350 351 /* Iterators and temporaries. */ 352 Int ii, j, k, m, spillee, k_suboptimal; 353 HReg rreg, vreg, vregS, vregD; 354 HRegUsage reg_usage; 355 356 /* Info on vregs and rregs. Computed once and remains 357 unchanged. */ 358 Int n_vregs; 359 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ 360 361 /* We keep two copies of the real-reg live range info, one sorted 362 by .live_after and the other by .dead_before. First the 363 unsorted info is created in the _la variant is copied into the 364 _db variant. Once that's done both of them are sorted. 365 We also need two integer cursors which record the next 366 location in the two arrays to consider. */ 367 RRegLR* rreg_lrs_la; 368 RRegLR* rreg_lrs_db; 369 Int rreg_lrs_size; 370 Int rreg_lrs_used; 371 Int rreg_lrs_la_next; 372 Int rreg_lrs_db_next; 373 374 /* Used when constructing vreg_lrs (for allocating stack 375 slots). */ 376 Int ss_busy_until_before[N_SPILL64S]; 377 378 /* Used when constructing rreg_lrs. */ 379 Int* rreg_live_after; 380 Int* rreg_dead_before; 381 382 /* Running state of the core allocation algorithm. */ 383 RRegState* rreg_state; /* [0 .. n_rregs-1] */ 384 Int n_rregs; 385 386 /* .. and the redundant backward map */ 387 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. 388 This inplies n_rregs must be <= 32768. */ 389 Short* vreg_state; /* [0 .. n_vregs-1] */ 390 391 /* The vreg -> rreg map constructed and then applied to each 392 instr. */ 393 HRegRemap remap; 394 395 /* The output array of instructions. */ 396 HInstrArray* instrs_out; 397 398 /* Sanity checks are expensive. They are only done periodically, 399 not at each insn processed. */ 400 Bool do_sanity_check; 401 402 vassert(0 == (guest_sizeB % 16)); 403 vassert(0 == (LibVEX_N_SPILL_BYTES % 16)); 404 vassert(0 == (N_SPILL64S % 2)); 405 406 /* The live range numbers are signed shorts, and so limiting the 407 number of insns to 15000 comfortably guards against them 408 overflowing 32k. */ 409 vassert(instrs_in->arr_used <= 15000); 410 411 # define INVALID_INSTRNO (-2) 412 413 # define EMIT_INSTR(_instr) \ 414 do { \ 415 HInstr* _tmp = (_instr); \ 416 if (DEBUG_REGALLOC) { \ 417 vex_printf("** "); \ 418 (*ppInstr)(_tmp, mode64); \ 419 vex_printf("\n\n"); \ 420 } \ 421 addHInstr ( instrs_out, _tmp ); \ 422 } while (0) 423 424 # define PRINT_STATE \ 425 do { \ 426 Int z, q; \ 427 for (z = 0; z < n_rregs; z++) { \ 428 vex_printf(" rreg_state[%2d] = ", z); \ 429 (*ppReg)(rreg_state[z].rreg); \ 430 vex_printf(" \t"); \ 431 switch (rreg_state[z].disp) { \ 432 case Free: vex_printf("Free\n"); break; \ 433 case Unavail: vex_printf("Unavail\n"); break; \ 434 case Bound: vex_printf("BoundTo "); \ 435 (*ppReg)(rreg_state[z].vreg); \ 436 vex_printf("\n"); break; \ 437 } \ 438 } \ 439 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ 440 q = 0; \ 441 for (z = 0; z < n_vregs; z++) { \ 442 if (vreg_state[z] == INVALID_RREG_NO) \ 443 continue; \ 444 vex_printf("[%d] -> %d ", z, vreg_state[z]); \ 445 q++; \ 446 if (q > 0 && (q % 6) == 0) \ 447 vex_printf("\n "); \ 448 } \ 449 vex_printf("\n"); \ 450 } while (0) 451 452 453 /* --------- Stage 0: set up output array --------- */ 454 /* --------- and allocate/initialise running state. --------- */ 455 456 instrs_out = newHInstrArray(); 457 458 /* ... and initialise running state. */ 459 /* n_rregs is no more than a short name for n_available_real_regs. */ 460 n_rregs = n_available_real_regs; 461 n_vregs = instrs_in->n_vregs; 462 463 /* If this is not so, vreg_state entries will overflow. */ 464 vassert(n_vregs < 32767); 465 466 rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState)); 467 vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short)); 468 469 for (j = 0; j < n_rregs; j++) { 470 rreg_state[j].rreg = available_real_regs[j]; 471 rreg_state[j].has_hlrs = False; 472 rreg_state[j].disp = Free; 473 rreg_state[j].vreg = INVALID_HREG; 474 rreg_state[j].is_spill_cand = False; 475 rreg_state[j].eq_spill_slot = False; 476 } 477 478 for (j = 0; j < n_vregs; j++) 479 vreg_state[j] = INVALID_RREG_NO; 480 481 482 /* --------- Stage 1: compute vreg live ranges. --------- */ 483 /* --------- Stage 2: compute rreg live ranges. --------- */ 484 485 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 486 487 /* This is relatively simple, because (1) we only seek the complete 488 end-to-end live range of each vreg, and are not interested in 489 any holes in it, and (2) the vregs are conveniently numbered 0 490 .. n_vregs-1, so we can just dump the results in a 491 pre-allocated array. */ 492 493 vreg_lrs = NULL; 494 if (n_vregs > 0) 495 vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs); 496 497 for (j = 0; j < n_vregs; j++) { 498 vreg_lrs[j].live_after = INVALID_INSTRNO; 499 vreg_lrs[j].dead_before = INVALID_INSTRNO; 500 vreg_lrs[j].spill_offset = 0; 501 vreg_lrs[j].spill_size = 0; 502 vreg_lrs[j].reg_class = HRcINVALID; 503 } 504 505 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 506 507 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 508 509 /* This is more complex than Stage 1, because we need to compute 510 exactly all the live ranges of all the allocatable real regs, 511 and we don't know in advance how many there will be. */ 512 513 rreg_lrs_used = 0; 514 rreg_lrs_size = 4; 515 rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR)); 516 rreg_lrs_db = NULL; /* we'll create this later */ 517 518 /* We'll need to track live range start/end points seperately for 519 each rreg. Sigh. */ 520 vassert(n_available_real_regs > 0); 521 rreg_live_after = LibVEX_Alloc(n_available_real_regs * sizeof(Int)); 522 rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int)); 523 524 for (j = 0; j < n_available_real_regs; j++) { 525 rreg_live_after[j] = 526 rreg_dead_before[j] = INVALID_INSTRNO; 527 } 528 529 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 530 531 /* ------ start of ITERATE OVER INSNS ------ */ 532 533 for (ii = 0; ii < instrs_in->arr_used; ii++) { 534 535 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 536 537 # if 0 538 vex_printf("\n%d stage1: ", ii); 539 (*ppInstr)(instrs_in->arr[ii], mode64); 540 vex_printf("\n"); 541 ppHRegUsage(®_usage); 542 # endif 543 544 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ 545 546 /* for each reg mentioned in the insn ... */ 547 for (j = 0; j < reg_usage.n_used; j++) { 548 549 vreg = reg_usage.hreg[j]; 550 /* only interested in virtual registers right now. */ 551 if (!hregIsVirtual(vreg)) 552 continue; 553 k = hregNumber(vreg); 554 if (k < 0 || k >= n_vregs) { 555 vex_printf("\n"); 556 (*ppInstr)(instrs_in->arr[ii], mode64); 557 vex_printf("\n"); 558 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); 559 vpanic("doRegisterAllocation: out-of-range vreg"); 560 } 561 562 /* Take the opportunity to note its regclass. We'll need 563 that when allocating spill slots. */ 564 if (vreg_lrs[k].reg_class == HRcINVALID) { 565 /* First mention of this vreg. */ 566 vreg_lrs[k].reg_class = hregClass(vreg); 567 } else { 568 /* Seen it before, so check for consistency. */ 569 vassert(vreg_lrs[k].reg_class == hregClass(vreg)); 570 } 571 572 /* Now consider live ranges. */ 573 switch (reg_usage.mode[j]) { 574 case HRmRead: 575 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 576 vex_printf("\n\nOFFENDING VREG = %d\n", k); 577 vpanic("doRegisterAllocation: " 578 "first event for vreg is Read"); 579 } 580 vreg_lrs[k].dead_before = toShort(ii + 1); 581 break; 582 case HRmWrite: 583 if (vreg_lrs[k].live_after == INVALID_INSTRNO) 584 vreg_lrs[k].live_after = toShort(ii); 585 vreg_lrs[k].dead_before = toShort(ii + 1); 586 break; 587 case HRmModify: 588 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 589 vex_printf("\n\nOFFENDING VREG = %d\n", k); 590 vpanic("doRegisterAllocation: " 591 "first event for vreg is Modify"); 592 } 593 vreg_lrs[k].dead_before = toShort(ii + 1); 594 break; 595 default: 596 vpanic("doRegisterAllocation(1)"); 597 } /* switch */ 598 599 } /* iterate over registers */ 600 601 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ 602 603 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ 604 605 /* for each reg mentioned in the insn ... */ 606 for (j = 0; j < reg_usage.n_used; j++) { 607 608 /* Dummy initialisations of flush_la and flush_db to avoid 609 possible bogus uninit-var warnings from gcc. */ 610 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; 611 Bool flush; 612 613 rreg = reg_usage.hreg[j]; 614 615 /* only interested in real registers right now. */ 616 if (hregIsVirtual(rreg)) 617 continue; 618 619 /* Furthermore, we're not interested in this rreg unless it's 620 one of the allocatable ones. For example, it could be a 621 stack pointer register, or some other register beyond our 622 control, in which case we should just ignore it. */ 623 for (k = 0; k < n_available_real_regs; k++) 624 if (sameHReg(available_real_regs[k], rreg)) 625 break; 626 if (k == n_available_real_regs) 627 continue; /* not found -- ignore. */ 628 flush = False; 629 switch (reg_usage.mode[j]) { 630 case HRmWrite: 631 flush_la = rreg_live_after[k]; 632 flush_db = rreg_dead_before[k]; 633 if (flush_la != INVALID_INSTRNO 634 && flush_db != INVALID_INSTRNO) 635 flush = True; 636 rreg_live_after[k] = ii; 637 rreg_dead_before[k] = ii+1; 638 break; 639 case HRmRead: 640 if (rreg_live_after[k] == INVALID_INSTRNO) { 641 vex_printf("\nOFFENDING RREG = "); 642 (*ppReg)(available_real_regs[k]); 643 vex_printf("\n"); 644 vex_printf("\nOFFENDING instr = "); 645 (*ppInstr)(instrs_in->arr[ii], mode64); 646 vex_printf("\n"); 647 vpanic("doRegisterAllocation: " 648 "first event for rreg is Read"); 649 } 650 rreg_dead_before[k] = ii+1; 651 break; 652 case HRmModify: 653 if (rreg_live_after[k] == INVALID_INSTRNO) { 654 vex_printf("\nOFFENDING RREG = "); 655 (*ppReg)(available_real_regs[k]); 656 vex_printf("\n"); 657 vex_printf("\nOFFENDING instr = "); 658 (*ppInstr)(instrs_in->arr[ii], mode64); 659 vex_printf("\n"); 660 vpanic("doRegisterAllocation: " 661 "first event for rreg is Modify"); 662 } 663 rreg_dead_before[k] = ii+1; 664 break; 665 default: 666 vpanic("doRegisterAllocation(2)"); 667 } 668 669 if (flush) { 670 vassert(flush_la != INVALID_INSTRNO); 671 vassert(flush_db != INVALID_INSTRNO); 672 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 673 if (0) 674 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); 675 rreg_lrs_la[rreg_lrs_used].rreg = rreg; 676 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); 677 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); 678 rreg_lrs_used++; 679 } 680 681 } /* iterate over regs in the instr */ 682 683 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ 684 685 } /* iterate over insns */ 686 687 /* ------ end of ITERATE OVER INSNS ------ */ 688 689 /* ------ start of FINALISE RREG LIVE RANGES ------ */ 690 691 /* Now finish up any live ranges left over. */ 692 for (j = 0; j < n_available_real_regs; j++) { 693 694 # if 0 695 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], 696 rreg_dead_before[j]); 697 # endif 698 vassert( (rreg_live_after[j] == INVALID_INSTRNO 699 && rreg_dead_before[j] == INVALID_INSTRNO) 700 || 701 (rreg_live_after[j] != INVALID_INSTRNO 702 && rreg_dead_before[j] != INVALID_INSTRNO) 703 ); 704 705 if (rreg_live_after[j] == INVALID_INSTRNO) 706 continue; 707 708 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 709 if (0) 710 vex_printf("FLUSH 2 (%d,%d)\n", 711 rreg_live_after[j], rreg_dead_before[j]); 712 rreg_lrs_la[rreg_lrs_used].rreg = available_real_regs[j]; 713 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); 714 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); 715 rreg_lrs_used++; 716 } 717 718 /* Compute summary hints for choosing real regs. If a real reg is 719 involved in a hard live range, record that fact in the fixed 720 part of the running rreg_state. Later, when offered a choice between 721 rregs, it's better to choose one which is not marked as having 722 any HLRs, since ones with HLRs may need to be spilled around 723 their HLRs. Correctness of final assignment is unaffected by 724 this mechanism -- it is only an optimisation. */ 725 726 for (j = 0; j < rreg_lrs_used; j++) { 727 rreg = rreg_lrs_la[j].rreg; 728 vassert(!hregIsVirtual(rreg)); 729 /* rreg is involved in a HLR. Record this info in the array, if 730 there is space. */ 731 for (k = 0; k < n_rregs; k++) 732 if (sameHReg(rreg_state[k].rreg, rreg)) 733 break; 734 vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */ 735 rreg_state[k].has_hlrs = True; 736 } 737 if (0) { 738 for (j = 0; j < n_rregs; j++) { 739 if (!rreg_state[j].has_hlrs) 740 continue; 741 ppReg(rreg_state[j].rreg); 742 vex_printf(" hinted\n"); 743 } 744 } 745 746 /* Finally, copy the _la variant into the _db variant and 747 sort both by their respective fields. */ 748 rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR)); 749 for (j = 0; j < rreg_lrs_used; j++) 750 rreg_lrs_db[j] = rreg_lrs_la[j]; 751 752 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); 753 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); 754 755 /* And set up the cursors. */ 756 rreg_lrs_la_next = 0; 757 rreg_lrs_db_next = 0; 758 759 for (j = 1; j < rreg_lrs_used; j++) { 760 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); 761 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); 762 } 763 764 /* ------ end of FINALISE RREG LIVE RANGES ------ */ 765 766 # if DEBUG_REGALLOC 767 for (j = 0; j < n_vregs; j++) { 768 vex_printf("vreg %d: la = %d, db = %d\n", 769 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); 770 } 771 # endif 772 773 # if DEBUG_REGALLOC 774 vex_printf("RRegLRs by LA:\n"); 775 for (j = 0; j < rreg_lrs_used; j++) { 776 vex_printf(" "); 777 (*ppReg)(rreg_lrs_la[j].rreg); 778 vex_printf(" la = %d, db = %d\n", 779 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); 780 } 781 vex_printf("RRegLRs by DB:\n"); 782 for (j = 0; j < rreg_lrs_used; j++) { 783 vex_printf(" "); 784 (*ppReg)(rreg_lrs_db[j].rreg); 785 vex_printf(" la = %d, db = %d\n", 786 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); 787 } 788 # endif 789 790 /* --------- Stage 3: allocate spill slots. --------- */ 791 792 /* Each spill slot is 8 bytes long. For vregs which take more than 793 64 bits to spill (classes Flt64 and Vec128), we have to allocate 794 two consecutive spill slots. For 256 bit registers (class 795 Vec256), we have to allocate four consecutive spill slots. 796 797 For Vec128-class on PowerPC, the spill slot's actual address 798 must be 16-byte aligned. Since the spill slot's address is 799 computed as an offset from the guest state pointer, and since 800 the user of the generated code must set that pointer to a 801 32-aligned value, we have the residual obligation here of 802 choosing a 16-aligned spill slot offset for Vec128-class values. 803 Since each spill slot is 8 bytes long, that means for 804 Vec128-class values we must allocated a spill slot number which 805 is zero mod 2. 806 807 Similarly, for Vec256 calss on amd64, find a spill slot number 808 which is zero mod 4. This guarantees it will be 32 byte 809 aligned, which isn't actually necessary on amd64 (we use movUpd 810 etc to spill), but seems like good practice. 811 812 Do a rank-based allocation of vregs to spill slot numbers. We 813 put as few values as possible in spill slots, but nevertheless 814 need to have a spill slot available for all vregs, just in case. 815 */ 816 /* max_ss_no = -1; */ 817 818 for (j = 0; j < N_SPILL64S; j++) 819 ss_busy_until_before[j] = 0; 820 821 for (j = 0; j < n_vregs; j++) { 822 823 /* True iff this vreg is unused. In which case we also expect 824 that the reg_class field for it has not been set. */ 825 if (vreg_lrs[j].live_after == INVALID_INSTRNO) { 826 vassert(vreg_lrs[j].reg_class == HRcINVALID); 827 continue; 828 } 829 830 /* The spill slots are 64 bits in size. As per the comment on 831 definition of HRegClass in host_generic_regs.h, that means, 832 to spill a vreg of class Flt64 or Vec128, we'll need to find 833 two adjacent spill slots to use. For Vec256, we'll need to 834 find four adjacent slots to use. Note, this logic needs to 835 kept in sync with the size info on the definition of 836 HRegClass. */ 837 switch (vreg_lrs[j].reg_class) { 838 839 case HRcVec128: case HRcFlt64: 840 /* Find two adjacent free slots in which between them 841 provide up to 128 bits in which to spill the vreg. 842 Since we are trying to find an even:odd pair, move 843 along in steps of 2 (slots). */ 844 for (k = 0; k < N_SPILL64S-1; k += 2) 845 if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after 846 && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after) 847 break; 848 if (k >= N_SPILL64S-1) { 849 vpanic("LibVEX_N_SPILL_BYTES is too low. " 850 "Increase and recompile."); 851 } 852 if (0) vex_printf("16-byte spill offset in spill slot %d\n", 853 (Int)k); 854 ss_busy_until_before[k+0] = vreg_lrs[j].dead_before; 855 ss_busy_until_before[k+1] = vreg_lrs[j].dead_before; 856 break; 857 858 default: 859 /* The ordinary case -- just find a single spill slot. */ 860 /* Find the lowest-numbered spill slot which is available 861 at the start point of this interval, and assign the 862 interval to it. */ 863 for (k = 0; k < N_SPILL64S; k++) 864 if (ss_busy_until_before[k] <= vreg_lrs[j].live_after) 865 break; 866 if (k == N_SPILL64S) { 867 vpanic("LibVEX_N_SPILL_BYTES is too low. " 868 "Increase and recompile."); 869 } 870 ss_busy_until_before[k] = vreg_lrs[j].dead_before; 871 break; 872 873 } /* switch (vreg_lrs[j].reg_class) { */ 874 875 /* This reflects LibVEX's hard-wired knowledge of the baseBlock 876 layout: the guest state, then two equal sized areas following 877 it for two sets of shadow state, and then the spill area. */ 878 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8); 879 880 /* Independent check that we've made a sane choice of slot */ 881 sanity_check_spill_offset( &vreg_lrs[j] ); 882 /* if (j > max_ss_no) */ 883 /* max_ss_no = j; */ 884 } 885 886 # if 0 887 vex_printf("\n\n"); 888 for (j = 0; j < n_vregs; j++) 889 vex_printf("vreg %d --> spill offset %d\n", 890 j, vreg_lrs[j].spill_offset); 891 # endif 892 893 /* --------- Stage 4: establish rreg preferences --------- */ 894 895 /* It may be advantageous to allocating certain vregs to specific 896 rregs, as a way of avoiding reg-reg moves later. Here we 897 establish which, if any, rreg each vreg would prefer to be in. 898 Note that this constrains the allocator -- ideally we end up 899 with as few as possible vregs expressing a preference. 900 901 This is an optimisation: if the .preferred_rreg field is never 902 set to anything different from INVALID_HREG, the allocator still 903 works. */ 904 905 /* 30 Dec 04: removed this mechanism as it does not seem to 906 help. */ 907 908 /* --------- Stage 5: process instructions --------- */ 909 910 /* This is the main loop of the allocator. First, we need to 911 correctly set up our running state, which tracks the status of 912 each real register. */ 913 914 /* ------ BEGIN: Process each insn in turn. ------ */ 915 916 for (ii = 0; ii < instrs_in->arr_used; ii++) { 917 918 # if DEBUG_REGALLOC 919 vex_printf("\n====----====---- Insn %d ----====----====\n", ii); 920 vex_printf("---- "); 921 (*ppInstr)(instrs_in->arr[ii], mode64); 922 vex_printf("\n\nInitial state:\n"); 923 PRINT_STATE; 924 vex_printf("\n"); 925 # endif 926 927 /* ------------ Sanity checks ------------ */ 928 929 /* Sanity checks are expensive. So they are done only once 930 every 7 instructions, and just before the last 931 instruction. */ 932 do_sanity_check 933 = toBool( 934 False /* Set to True for sanity checking of all insns. */ 935 || ii == instrs_in->arr_used-1 936 || (ii > 0 && (ii % 7) == 0) 937 ); 938 939 if (do_sanity_check) { 940 941 /* Sanity check 1: all rregs with a hard live range crossing 942 this insn must be marked as unavailable in the running 943 state. */ 944 for (j = 0; j < rreg_lrs_used; j++) { 945 if (rreg_lrs_la[j].live_after < ii 946 && ii < rreg_lrs_la[j].dead_before) { 947 /* ii is the middle of a hard live range for some real 948 reg. Check it's marked as such in the running 949 state. */ 950 951 # if 0 952 vex_printf("considering la %d .. db %d reg = ", 953 rreg_lrs[j].live_after, 954 rreg_lrs[j].dead_before); 955 (*ppReg)(rreg_lrs[j].rreg); 956 vex_printf("\n"); 957 # endif 958 959 /* find the state entry for this rreg */ 960 for (k = 0; k < n_rregs; k++) 961 if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[j].rreg)) 962 break; 963 964 /* and assert that this rreg is marked as unavailable */ 965 vassert(rreg_state[k].disp == Unavail); 966 } 967 } 968 969 /* Sanity check 2: conversely, all rregs marked as 970 unavailable in the running rreg_state must have a 971 corresponding hard live range entry in the rreg_lrs 972 array. */ 973 for (j = 0; j < n_available_real_regs; j++) { 974 vassert(rreg_state[j].disp == Bound 975 || rreg_state[j].disp == Free 976 || rreg_state[j].disp == Unavail); 977 if (rreg_state[j].disp != Unavail) 978 continue; 979 for (k = 0; k < rreg_lrs_used; k++) 980 if (sameHReg(rreg_lrs_la[k].rreg, rreg_state[j].rreg) 981 && rreg_lrs_la[k].live_after < ii 982 && ii < rreg_lrs_la[k].dead_before) 983 break; 984 /* If this vassertion fails, we couldn't find a 985 corresponding HLR. */ 986 vassert(k < rreg_lrs_used); 987 } 988 989 /* Sanity check 3: all vreg-rreg bindings must bind registers 990 of the same class. */ 991 for (j = 0; j < n_rregs; j++) { 992 if (rreg_state[j].disp != Bound) { 993 vassert(rreg_state[j].eq_spill_slot == False); 994 continue; 995 } 996 vassert(hregClass(rreg_state[j].rreg) 997 == hregClass(rreg_state[j].vreg)); 998 vassert( hregIsVirtual(rreg_state[j].vreg)); 999 vassert(!hregIsVirtual(rreg_state[j].rreg)); 1000 } 1001 1002 /* Sanity check 4: the vreg_state and rreg_state 1003 mutually-redundant mappings are consistent. If 1004 rreg_state[j].vreg points at some vreg_state entry then 1005 that vreg_state entry should point back at 1006 rreg_state[j]. */ 1007 for (j = 0; j < n_rregs; j++) { 1008 if (rreg_state[j].disp != Bound) 1009 continue; 1010 k = hregNumber(rreg_state[j].vreg); 1011 vassert(IS_VALID_VREGNO(k)); 1012 vassert(vreg_state[k] == j); 1013 } 1014 for (j = 0; j < n_vregs; j++) { 1015 k = vreg_state[j]; 1016 if (k == INVALID_RREG_NO) 1017 continue; 1018 vassert(IS_VALID_RREGNO(k)); 1019 vassert(rreg_state[k].disp == Bound); 1020 vassert(hregNumber(rreg_state[k].vreg) == j); 1021 } 1022 1023 } /* if (do_sanity_check) */ 1024 1025 /* ------------ end of Sanity checks ------------ */ 1026 1027 /* Do various optimisations pertaining to register coalescing 1028 and preferencing: 1029 MOV v <-> v coalescing (done here). 1030 MOV v <-> r coalescing (not yet, if ever) 1031 */ 1032 /* If doing a reg-reg move between two vregs, and the src's live 1033 range ends here and the dst's live range starts here, bind 1034 the dst to the src's rreg, and that's all. */ 1035 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { 1036 if (!hregIsVirtual(vregS)) goto cannot_coalesce; 1037 if (!hregIsVirtual(vregD)) goto cannot_coalesce; 1038 /* Check that *isMove is not telling us a bunch of lies ... */ 1039 vassert(hregClass(vregS) == hregClass(vregD)); 1040 k = hregNumber(vregS); 1041 m = hregNumber(vregD); 1042 vassert(IS_VALID_VREGNO(k)); 1043 vassert(IS_VALID_VREGNO(m)); 1044 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; 1045 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; 1046 # if DEBUG_REGALLOC 1047 vex_printf("COALESCE "); 1048 (*ppReg)(vregS); 1049 vex_printf(" -> "); 1050 (*ppReg)(vregD); 1051 vex_printf("\n\n"); 1052 # endif 1053 /* Find the state entry for vregS. */ 1054 for (m = 0; m < n_rregs; m++) 1055 if (rreg_state[m].disp == Bound 1056 && sameHReg(rreg_state[m].vreg, vregS)) 1057 break; 1058 if (m == n_rregs) 1059 /* We failed to find a binding for vregS, which means it's 1060 currently not in a register. So we can't do the 1061 coalescing. Give up. */ 1062 goto cannot_coalesce; 1063 1064 /* Finally, we can do the coalescing. It's trivial -- merely 1065 claim vregS's register for vregD. */ 1066 rreg_state[m].vreg = vregD; 1067 vassert(IS_VALID_VREGNO(hregNumber(vregD))); 1068 vassert(IS_VALID_VREGNO(hregNumber(vregS))); 1069 vreg_state[hregNumber(vregD)] = toShort(m); 1070 vreg_state[hregNumber(vregS)] = INVALID_RREG_NO; 1071 1072 /* This rreg has become associated with a different vreg and 1073 hence with a different spill slot. Play safe. */ 1074 rreg_state[m].eq_spill_slot = False; 1075 1076 /* Move on to the next insn. We skip the post-insn stuff for 1077 fixed registers, since this move should not interact with 1078 them in any way. */ 1079 continue; 1080 } 1081 cannot_coalesce: 1082 1083 /* ------ Free up rregs bound to dead vregs ------ */ 1084 1085 /* Look for vregs whose live range has just ended, and 1086 mark the associated rreg as free. */ 1087 1088 for (j = 0; j < n_rregs; j++) { 1089 if (rreg_state[j].disp != Bound) 1090 continue; 1091 UInt vregno = hregNumber(rreg_state[j].vreg); 1092 vassert(IS_VALID_VREGNO(vregno)); 1093 if (vreg_lrs[vregno].dead_before <= ii) { 1094 rreg_state[j].disp = Free; 1095 rreg_state[j].eq_spill_slot = False; 1096 m = hregNumber(rreg_state[j].vreg); 1097 vassert(IS_VALID_VREGNO(m)); 1098 vreg_state[m] = INVALID_RREG_NO; 1099 if (DEBUG_REGALLOC) { 1100 vex_printf("free up "); 1101 (*ppReg)(rreg_state[j].rreg); 1102 vex_printf("\n"); 1103 } 1104 } 1105 } 1106 1107 /* ------ Pre-instruction actions for fixed rreg uses ------ */ 1108 1109 /* Now we have to deal with rregs which are about to be made 1110 live by this instruction -- in other words, are entering into 1111 one of their live ranges. If any such rreg holds a vreg, we 1112 will have to free up the rreg. The simplest solution which 1113 is correct is to spill the rreg. 1114 1115 Note we could do better: 1116 * Could move it into some other free rreg, if one is available 1117 1118 Do this efficiently, by incrementally stepping along an array 1119 of rreg HLRs that are known to be sorted by start point 1120 (their .live_after field). 1121 */ 1122 while (True) { 1123 vassert(rreg_lrs_la_next >= 0); 1124 vassert(rreg_lrs_la_next <= rreg_lrs_used); 1125 if (rreg_lrs_la_next == rreg_lrs_used) 1126 break; /* no more real reg live ranges to consider */ 1127 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) 1128 break; /* next live range does not yet start */ 1129 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); 1130 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. 1131 Find the associated rreg_state entry. */ 1132 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. 1133 Real register live ranges are guaranteed to be well-formed 1134 in that they start with a write to the register -- Stage 2 1135 rejects any code not satisfying this. So the correct 1136 question to ask is whether 1137 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, 1138 whether the reg becomes live after this insn -- rather 1139 than before it. */ 1140 # if DEBUG_REGALLOC 1141 vex_printf("need to free up rreg: "); 1142 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); 1143 vex_printf("\n\n"); 1144 # endif 1145 for (k = 0; k < n_rregs; k++) 1146 if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[rreg_lrs_la_next].rreg)) 1147 break; 1148 /* If this fails, we don't have an entry for this rreg. 1149 Which we should. */ 1150 vassert(IS_VALID_RREGNO(k)); 1151 m = hregNumber(rreg_state[k].vreg); 1152 if (rreg_state[k].disp == Bound) { 1153 /* Yes, there is an associated vreg. Spill it if it's 1154 still live. */ 1155 vassert(IS_VALID_VREGNO(m)); 1156 vreg_state[m] = INVALID_RREG_NO; 1157 if (vreg_lrs[m].dead_before > ii) { 1158 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1159 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { 1160 HInstr* spill1 = NULL; 1161 HInstr* spill2 = NULL; 1162 (*genSpill)( &spill1, &spill2, rreg_state[k].rreg, 1163 vreg_lrs[m].spill_offset, mode64 ); 1164 vassert(spill1 || spill2); /* can't both be NULL */ 1165 if (spill1) 1166 EMIT_INSTR(spill1); 1167 if (spill2) 1168 EMIT_INSTR(spill2); 1169 } 1170 rreg_state[k].eq_spill_slot = True; 1171 } 1172 } 1173 rreg_state[k].disp = Unavail; 1174 rreg_state[k].vreg = INVALID_HREG; 1175 rreg_state[k].eq_spill_slot = False; 1176 1177 /* check for further rregs entering HLRs at this point */ 1178 rreg_lrs_la_next++; 1179 } 1180 1181 1182 # if DEBUG_REGALLOC 1183 vex_printf("After pre-insn actions for fixed regs:\n"); 1184 PRINT_STATE; 1185 vex_printf("\n"); 1186 # endif 1187 1188 1189 /* ------ Deal with the current instruction. ------ */ 1190 1191 /* Finally we can begin the processing of this instruction 1192 itself. The aim is to free up enough rregs for this insn. 1193 This may generate spill stores since we may have to evict 1194 some vregs currently in rregs. Also generates spill loads. 1195 We also build up the final vreg->rreg mapping to be applied 1196 to the insn. */ 1197 1198 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 1199 1200 initHRegRemap(&remap); 1201 1202 /* ------------ BEGIN directReload optimisation ----------- */ 1203 1204 /* If the instruction reads exactly one vreg which is currently 1205 in a spill slot, and this is last use of that vreg, see if we 1206 can convert the instruction into one reads directly from the 1207 spill slot. This is clearly only possible for x86 and amd64 1208 targets, since ppc and arm load-store architectures. If 1209 successful, replace instrs_in->arr[ii] with this new 1210 instruction, and recompute its reg usage, so that the change 1211 is invisible to the standard-case handling that follows. */ 1212 1213 if (directReload && reg_usage.n_used <= 2) { 1214 Bool debug_direct_reload = True && False; 1215 HReg cand = INVALID_HREG; 1216 Bool nreads = 0; 1217 Short spilloff = 0; 1218 1219 for (j = 0; j < reg_usage.n_used; j++) { 1220 1221 vreg = reg_usage.hreg[j]; 1222 1223 if (!hregIsVirtual(vreg)) 1224 continue; 1225 1226 if (reg_usage.mode[j] == HRmRead) { 1227 nreads++; 1228 m = hregNumber(vreg); 1229 vassert(IS_VALID_VREGNO(m)); 1230 k = vreg_state[m]; 1231 if (!IS_VALID_RREGNO(k)) { 1232 /* ok, it is spilled. Now, is this its last use? */ 1233 vassert(vreg_lrs[m].dead_before >= ii+1); 1234 if (vreg_lrs[m].dead_before == ii+1 1235 && hregIsInvalid(cand)) { 1236 spilloff = vreg_lrs[m].spill_offset; 1237 cand = vreg; 1238 } 1239 } 1240 } 1241 } 1242 1243 if (nreads == 1 && ! hregIsInvalid(cand)) { 1244 HInstr* reloaded; 1245 if (reg_usage.n_used == 2) 1246 vassert(! sameHReg(reg_usage.hreg[0], reg_usage.hreg[1])); 1247 1248 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); 1249 if (debug_direct_reload && !reloaded) { 1250 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); 1251 ppInstr(instrs_in->arr[ii], mode64); 1252 } 1253 if (reloaded) { 1254 /* Update info about the insn, so it looks as if it had 1255 been in this form all along. */ 1256 instrs_in->arr[ii] = reloaded; 1257 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 1258 if (debug_direct_reload && !reloaded) { 1259 vex_printf(" --> "); 1260 ppInstr(reloaded, mode64); 1261 } 1262 } 1263 1264 if (debug_direct_reload && !reloaded) 1265 vex_printf("\n"); 1266 } 1267 1268 } 1269 1270 /* ------------ END directReload optimisation ------------ */ 1271 1272 /* for each reg mentioned in the insn ... */ 1273 for (j = 0; j < reg_usage.n_used; j++) { 1274 1275 vreg = reg_usage.hreg[j]; 1276 1277 /* only interested in virtual registers right now. */ 1278 if (!hregIsVirtual(vreg)) 1279 continue; 1280 1281 # if 0 1282 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); 1283 # endif 1284 1285 /* Now we're trying to find a rreg for "vreg". First of all, 1286 if it already has an rreg assigned, we don't need to do 1287 anything more. Search the current state to find out. */ 1288 m = hregNumber(vreg); 1289 vassert(IS_VALID_VREGNO(m)); 1290 k = vreg_state[m]; 1291 if (IS_VALID_RREGNO(k)) { 1292 vassert(rreg_state[k].disp == Bound); 1293 addToHRegRemap(&remap, vreg, rreg_state[k].rreg); 1294 /* If this rreg is written or modified, mark it as different 1295 from any spill slot value. */ 1296 if (reg_usage.mode[j] != HRmRead) 1297 rreg_state[k].eq_spill_slot = False; 1298 continue; 1299 } else { 1300 vassert(k == INVALID_RREG_NO); 1301 } 1302 1303 /* No luck. The next thing to do is see if there is a 1304 currently free rreg available, of the correct class. If 1305 so, bag it. NOTE, we could improve this by selecting an 1306 rreg for which the next live-range event is as far ahead 1307 as possible. */ 1308 k_suboptimal = -1; 1309 for (k = 0; k < n_rregs; k++) { 1310 if (rreg_state[k].disp != Free 1311 || hregClass(rreg_state[k].rreg) != hregClass(vreg)) 1312 continue; 1313 if (rreg_state[k].has_hlrs) { 1314 /* Well, at least we can use k_suboptimal if we really 1315 have to. Keep on looking for a better candidate. */ 1316 k_suboptimal = k; 1317 } else { 1318 /* Found a preferable reg. Use it. */ 1319 k_suboptimal = -1; 1320 break; 1321 } 1322 } 1323 if (k_suboptimal >= 0) 1324 k = k_suboptimal; 1325 1326 if (k < n_rregs) { 1327 rreg_state[k].disp = Bound; 1328 rreg_state[k].vreg = vreg; 1329 m = hregNumber(vreg); 1330 vassert(IS_VALID_VREGNO(m)); 1331 vreg_state[m] = toShort(k); 1332 addToHRegRemap(&remap, vreg, rreg_state[k].rreg); 1333 /* Generate a reload if needed. This only creates needed 1334 reloads because the live range builder for vregs will 1335 guarantee that the first event for a vreg is a write. 1336 Hence, if this reference is not a write, it cannot be 1337 the first reference for this vreg, and so a reload is 1338 indeed needed. */ 1339 if (reg_usage.mode[j] != HRmWrite) { 1340 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1341 HInstr* reload1 = NULL; 1342 HInstr* reload2 = NULL; 1343 (*genReload)( &reload1, &reload2, rreg_state[k].rreg, 1344 vreg_lrs[m].spill_offset, mode64 ); 1345 vassert(reload1 || reload2); /* can't both be NULL */ 1346 if (reload1) 1347 EMIT_INSTR(reload1); 1348 if (reload2) 1349 EMIT_INSTR(reload2); 1350 /* This rreg is read or modified by the instruction. 1351 If it's merely read we can claim it now equals the 1352 spill slot, but not so if it is modified. */ 1353 if (reg_usage.mode[j] == HRmRead) { 1354 rreg_state[k].eq_spill_slot = True; 1355 } else { 1356 vassert(reg_usage.mode[j] == HRmModify); 1357 rreg_state[k].eq_spill_slot = False; 1358 } 1359 } else { 1360 rreg_state[k].eq_spill_slot = False; 1361 } 1362 1363 continue; 1364 } 1365 1366 /* Well, now we have no option but to spill a vreg. It's 1367 important to make a good choice of vreg to spill, and of 1368 course we need to be careful not to spill a vreg which is 1369 needed by this insn. */ 1370 1371 /* First, mark in the rreg_state, those rregs which are not spill 1372 candidates, due to holding a vreg mentioned by this 1373 instruction. Or being of the wrong class. */ 1374 for (k = 0; k < n_rregs; k++) { 1375 rreg_state[k].is_spill_cand = False; 1376 if (rreg_state[k].disp != Bound) 1377 continue; 1378 if (hregClass(rreg_state[k].rreg) != hregClass(vreg)) 1379 continue; 1380 rreg_state[k].is_spill_cand = True; 1381 for (m = 0; m < reg_usage.n_used; m++) { 1382 if (sameHReg(rreg_state[k].vreg, reg_usage.hreg[m])) { 1383 rreg_state[k].is_spill_cand = False; 1384 break; 1385 } 1386 } 1387 } 1388 1389 /* We can choose to spill any rreg satisfying 1390 rreg_state[r].is_spill_cand (so to speak). Choose r so that 1391 the next use of its associated vreg is as far ahead as 1392 possible, in the hope that this will minimise the number 1393 of consequent reloads required. */ 1394 spillee 1395 = findMostDistantlyMentionedVReg ( 1396 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 ); 1397 1398 if (spillee == -1) { 1399 /* Hmmmmm. There don't appear to be any spill candidates. 1400 We're hosed. */ 1401 vex_printf("reg_alloc: can't find a register in class: "); 1402 ppHRegClass(hregClass(vreg)); 1403 vex_printf("\n"); 1404 vpanic("reg_alloc: can't create a free register."); 1405 } 1406 1407 /* Right. So we're going to spill rreg_state[spillee]. */ 1408 vassert(IS_VALID_RREGNO(spillee)); 1409 vassert(rreg_state[spillee].disp == Bound); 1410 /* check it's the right class */ 1411 vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg)); 1412 /* check we're not ejecting the vreg for which we are trying 1413 to free up a register. */ 1414 vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); 1415 1416 m = hregNumber(rreg_state[spillee].vreg); 1417 vassert(IS_VALID_VREGNO(m)); 1418 1419 /* So here's the spill store. Assert that we're spilling a 1420 live vreg. */ 1421 vassert(vreg_lrs[m].dead_before > ii); 1422 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1423 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { 1424 HInstr* spill1 = NULL; 1425 HInstr* spill2 = NULL; 1426 (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg, 1427 vreg_lrs[m].spill_offset, mode64 ); 1428 vassert(spill1 || spill2); /* can't both be NULL */ 1429 if (spill1) 1430 EMIT_INSTR(spill1); 1431 if (spill2) 1432 EMIT_INSTR(spill2); 1433 } 1434 1435 /* Update the rreg_state to reflect the new assignment for this 1436 rreg. */ 1437 rreg_state[spillee].vreg = vreg; 1438 vreg_state[m] = INVALID_RREG_NO; 1439 1440 rreg_state[spillee].eq_spill_slot = False; /* be safe */ 1441 1442 m = hregNumber(vreg); 1443 vassert(IS_VALID_VREGNO(m)); 1444 vreg_state[m] = toShort(spillee); 1445 1446 /* Now, if this vreg is being read or modified (as opposed to 1447 written), we have to generate a reload for it. */ 1448 if (reg_usage.mode[j] != HRmWrite) { 1449 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1450 HInstr* reload1 = NULL; 1451 HInstr* reload2 = NULL; 1452 (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg, 1453 vreg_lrs[m].spill_offset, mode64 ); 1454 vassert(reload1 || reload2); /* can't both be NULL */ 1455 if (reload1) 1456 EMIT_INSTR(reload1); 1457 if (reload2) 1458 EMIT_INSTR(reload2); 1459 /* This rreg is read or modified by the instruction. 1460 If it's merely read we can claim it now equals the 1461 spill slot, but not so if it is modified. */ 1462 if (reg_usage.mode[j] == HRmRead) { 1463 rreg_state[spillee].eq_spill_slot = True; 1464 } else { 1465 vassert(reg_usage.mode[j] == HRmModify); 1466 rreg_state[spillee].eq_spill_slot = False; 1467 } 1468 } 1469 1470 /* So after much twisting and turning, we have vreg mapped to 1471 rreg_state[spillee].rreg. Note that in the map. */ 1472 addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg); 1473 1474 } /* iterate over registers in this instruction. */ 1475 1476 /* We've finished clowning around with registers in this instruction. 1477 Three results: 1478 - the running rreg_state[] has been updated 1479 - a suitable vreg->rreg mapping for this instruction has been 1480 constructed 1481 - spill and reload instructions may have been emitted. 1482 1483 The final step is to apply the mapping to the instruction, 1484 and emit that. 1485 */ 1486 1487 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ 1488 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); 1489 EMIT_INSTR( instrs_in->arr[ii] ); 1490 1491 # if DEBUG_REGALLOC 1492 vex_printf("After dealing with current insn:\n"); 1493 PRINT_STATE; 1494 vex_printf("\n"); 1495 # endif 1496 1497 /* ------ Post-instruction actions for fixed rreg uses ------ */ 1498 1499 /* Now we need to check for rregs exiting fixed live ranges 1500 after this instruction, and if so mark them as free. */ 1501 while (True) { 1502 vassert(rreg_lrs_db_next >= 0); 1503 vassert(rreg_lrs_db_next <= rreg_lrs_used); 1504 if (rreg_lrs_db_next == rreg_lrs_used) 1505 break; /* no more real reg live ranges to consider */ 1506 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) 1507 break; /* next live range does not yet start */ 1508 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); 1509 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live 1510 range. Mark it as such in the main rreg_state array. */ 1511 for (k = 0; k < n_rregs; k++) 1512 if (sameHReg(rreg_state[k].rreg, rreg_lrs_db[rreg_lrs_db_next].rreg)) 1513 break; 1514 /* If this vassertion fails, we don't have an entry for 1515 this rreg. Which we should. */ 1516 vassert(k < n_rregs); 1517 vassert(rreg_state[k].disp == Unavail); 1518 rreg_state[k].disp = Free; 1519 rreg_state[k].vreg = INVALID_HREG; 1520 rreg_state[k].eq_spill_slot = False; 1521 1522 /* check for further rregs leaving HLRs at this point */ 1523 rreg_lrs_db_next++; 1524 } 1525 1526 # if DEBUG_REGALLOC 1527 vex_printf("After post-insn actions for fixed regs:\n"); 1528 PRINT_STATE; 1529 vex_printf("\n"); 1530 # endif 1531 1532 } /* iterate over insns */ 1533 1534 /* ------ END: Process each insn in turn. ------ */ 1535 1536 /* free(rreg_state); */ 1537 /* free(rreg_lrs); */ 1538 /* if (vreg_lrs) free(vreg_lrs); */ 1539 1540 /* Paranoia */ 1541 for (j = 0; j < n_rregs; j++) 1542 vassert(sameHReg(rreg_state[j].rreg, available_real_regs[j])); 1543 1544 vassert(rreg_lrs_la_next == rreg_lrs_used); 1545 vassert(rreg_lrs_db_next == rreg_lrs_used); 1546 1547 return instrs_out; 1548 1549 # undef INVALID_INSTRNO 1550 # undef EMIT_INSTR 1551 # undef PRINT_STATE 1552 } 1553 1554 1555 1556 /*---------------------------------------------------------------*/ 1557 /*--- host_reg_alloc2.c ---*/ 1558 /*---------------------------------------------------------------*/ 1559