1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_reg_alloc2.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 39 #include "main_util.h" 40 #include "host_generic_regs.h" 41 42 /* Set to 1 for lots of debugging output. */ 43 #define DEBUG_REGALLOC 0 44 45 46 /* TODO 27 Oct 04: 47 48 Better consistency checking from what isMove tells us. 49 50 We can possibly do V-V coalescing even when the src is spilled, 51 providing we can arrange for the dst to have the same spill slot. 52 53 Note that state[].hreg is the same as the available real regs. 54 55 Generally rationalise data structures. */ 56 57 58 /* Records information on virtual register live ranges. Computed once 59 and remains unchanged after that. */ 60 typedef 61 struct { 62 /* Becomes live for the first time after this insn ... */ 63 Short live_after; 64 /* Becomes dead for the last time before this insn ... */ 65 Short dead_before; 66 /* The "home" spill slot, if needed. Never changes. */ 67 Short spill_offset; 68 Short spill_size; 69 /* What kind of register this is. */ 70 HRegClass reg_class; 71 } 72 VRegLR; 73 74 75 /* Records information on real-register live ranges. Computed once 76 and remains unchanged after that. */ 77 typedef 78 struct { 79 HReg rreg; 80 /* Becomes live after this insn ... */ 81 Short live_after; 82 /* Becomes dead before this insn ... */ 83 Short dead_before; 84 } 85 RRegLR; 86 87 88 /* An array of the following structs (rreg_state) comprises the 89 running state of the allocator. It indicates what the current 90 disposition of each allocatable real register is. The array gets 91 updated as the allocator processes instructions. */ 92 typedef 93 struct { 94 /* ------ FIELDS WHICH DO NOT CHANGE ------ */ 95 /* Which rreg is this for? */ 96 HReg rreg; 97 /* Is this involved in any HLRs? (only an optimisation hint) */ 98 Bool has_hlrs; 99 /* ------ FIELDS WHICH DO CHANGE ------ */ 100 /* 6 May 07: rearranged fields below so the whole struct fits 101 into 16 bytes on both x86 and amd64. */ 102 /* Used when .disp == Bound and we are looking for vregs to 103 spill. */ 104 Bool is_spill_cand; 105 /* Optimisation: used when .disp == Bound. Indicates when the 106 rreg has the same value as the spill slot for the associated 107 vreg. Is safely left at False, and becomes True after a 108 spill store or reload for this rreg. */ 109 Bool eq_spill_slot; 110 /* What's it's current disposition? */ 111 enum { Free, /* available for use */ 112 Unavail, /* in a real-reg live range */ 113 Bound /* in use (holding value of some vreg) */ 114 } 115 disp; 116 /* If .disp == Bound, what vreg is it bound to? */ 117 HReg vreg; 118 } 119 RRegState; 120 121 122 /* The allocator also maintains a redundant array of indexes 123 (vreg_state) from vreg numbers back to entries in rreg_state. It 124 is redundant because iff vreg_state[i] == j then 125 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries 126 point at each other. The purpose of this is to speed up activities 127 which involve looking for a particular vreg: there is no need to 128 scan the rreg_state looking for it, just index directly into 129 vreg_state. The FAQ "does this vreg already have an associated 130 rreg" is the main beneficiary. 131 132 To indicate, in vreg_state[i], that a given vreg is not currently 133 associated with any rreg, that entry can be set to INVALID_RREG_NO. 134 135 Because the vreg_state entries are signed Shorts, the max number 136 of vregs that can be handed by regalloc is 32767. 137 */ 138 139 #define INVALID_RREG_NO ((Short)(-1)) 140 141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) 142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) 143 144 145 /* Does this instruction mention a particular reg? */ 146 static Bool instrMentionsReg ( 147 void (*getRegUsage) (HRegUsage*, HInstr*, Bool), 148 HInstr* instr, 149 HReg r, 150 Bool mode64 151 ) 152 { 153 Int i; 154 HRegUsage reg_usage; 155 (*getRegUsage)(®_usage, instr, mode64); 156 for (i = 0; i < reg_usage.n_used; i++) 157 if (reg_usage.hreg[i] == r) 158 return True; 159 return False; 160 } 161 162 163 /* Search forward from some given point in the incoming instruction 164 sequence. Point is to select a virtual register to spill, by 165 finding the vreg which is mentioned as far ahead as possible, in 166 the hope that this will minimise the number of consequent reloads. 167 168 Only do the search for vregs which are Bound in the running state, 169 and for which the .is_spill_cand field is set. This allows the 170 caller to arbitrarily restrict the set of spill candidates to be 171 considered. 172 173 Returns an index into the state array indicating the (v,r) pair to 174 spill, or -1 if none was found. */ 175 static 176 Int findMostDistantlyMentionedVReg ( 177 void (*getRegUsage) (HRegUsage*, HInstr*, Bool), 178 HInstrArray* instrs_in, 179 Int search_from_instr, 180 RRegState* state, 181 Int n_state, 182 Bool mode64 183 ) 184 { 185 Int k, m; 186 Int furthest_k = -1; 187 Int furthest = -1; 188 vassert(search_from_instr >= 0); 189 for (k = 0; k < n_state; k++) { 190 if (!state[k].is_spill_cand) 191 continue; 192 vassert(state[k].disp == Bound); 193 for (m = search_from_instr; m < instrs_in->arr_used; m++) { 194 if (instrMentionsReg(getRegUsage, 195 instrs_in->arr[m], state[k].vreg, mode64)) 196 break; 197 } 198 if (m > furthest) { 199 furthest = m; 200 furthest_k = k; 201 } 202 } 203 return furthest_k; 204 } 205 206 207 /* Check that this vreg has been assigned a sane spill offset. */ 208 static inline void sanity_check_spill_offset ( VRegLR* vreg ) 209 { 210 switch (vreg->reg_class) { 211 case HRcVec128: case HRcFlt64: 212 vassert(0 == ((UShort)vreg->spill_offset % 16)); break; 213 default: 214 vassert(0 == ((UShort)vreg->spill_offset % 8)); break; 215 } 216 } 217 218 219 /* Double the size of the real-reg live-range array, if needed. */ 220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) 221 { 222 Int k; 223 RRegLR* arr2; 224 if (used < *size) return; 225 if (0) 226 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); 227 vassert(used == *size); 228 arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR)); 229 for (k = 0; k < *size; k++) 230 arr2[k] = (*info)[k]; 231 *size *= 2; 232 *info = arr2; 233 } 234 235 236 /* Sort an array of RRegLR entries by either the .live_after or 237 .dead_before fields. This is performance-critical. */ 238 static void sortRRLRarray ( RRegLR* arr, 239 Int size, Bool by_live_after ) 240 { 241 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 242 9841, 29524, 88573, 265720, 243 797161, 2391484 }; 244 Int lo = 0; 245 Int hi = size-1; 246 Int i, j, h, bigN, hp; 247 RRegLR v; 248 249 vassert(size >= 0); 250 if (size == 0) 251 return; 252 253 bigN = hi - lo + 1; if (bigN < 2) return; 254 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; 255 256 if (by_live_after) { 257 258 for ( ; hp >= 0; hp--) { 259 h = incs[hp]; 260 for (i = lo + h; i <= hi; i++) { 261 v = arr[i]; 262 j = i; 263 while (arr[j-h].live_after > v.live_after) { 264 arr[j] = arr[j-h]; 265 j = j - h; 266 if (j <= (lo + h - 1)) break; 267 } 268 arr[j] = v; 269 } 270 } 271 272 } else { 273 274 for ( ; hp >= 0; hp--) { 275 h = incs[hp]; 276 for (i = lo + h; i <= hi; i++) { 277 v = arr[i]; 278 j = i; 279 while (arr[j-h].dead_before > v.dead_before) { 280 arr[j] = arr[j-h]; 281 j = j - h; 282 if (j <= (lo + h - 1)) break; 283 } 284 arr[j] = v; 285 } 286 } 287 288 } 289 } 290 291 292 /* A target-independent register allocator. Requires various 293 functions which it uses to deal abstractly with instructions and 294 registers, since it cannot have any target-specific knowledge. 295 296 Returns a new list of instructions, which, as a result of the 297 behaviour of mapRegs, will be in-place modifications of the 298 original instructions. 299 300 Requires that the incoming code has been generated using 301 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside 302 that range is a checked run-time error. 303 304 Takes an expandable array of pointers to unallocated insns. 305 Returns an expandable array of pointers to allocated insns. 306 */ 307 HInstrArray* doRegisterAllocation ( 308 309 /* Incoming virtual-registerised code. */ 310 HInstrArray* instrs_in, 311 312 /* An array listing all the real registers the allocator may use, 313 in no particular order. */ 314 HReg* available_real_regs, 315 Int n_available_real_regs, 316 317 /* Return True iff the given insn is a reg-reg move, in which 318 case also return the src and dst regs. */ 319 Bool (*isMove) ( HInstr*, HReg*, HReg* ), 320 321 /* Get info about register usage in this insn. */ 322 void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ), 323 324 /* Apply a reg-reg mapping to an insn. */ 325 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), 326 327 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a 328 real reg to a spill slot byte offset. The two leading HInstr** 329 args are out parameters, through which the generated insns are 330 returned. Also (optionally) a 'directReload' function, which 331 attempts to replace a given instruction by one which reads 332 directly from a specified spill slot. May be NULL, in which 333 case the optimisation is not attempted. */ 334 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), 335 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), 336 HInstr* (*directReload) ( HInstr*, HReg, Short ), 337 Int guest_sizeB, 338 339 /* For debug printing only. */ 340 void (*ppInstr) ( HInstr*, Bool ), 341 void (*ppReg) ( HReg ), 342 343 /* 32/64bit mode */ 344 Bool mode64 345 ) 346 { 347 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) 348 349 const Bool eq_spill_opt = True; 350 351 /* Iterators and temporaries. */ 352 Int ii, j, k, m, spillee, k_suboptimal; 353 HReg rreg, vreg, vregS, vregD; 354 HRegUsage reg_usage; 355 356 /* Info on vregs and rregs. Computed once and remains 357 unchanged. */ 358 Int n_vregs; 359 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ 360 361 /* We keep two copies of the real-reg live range info, one sorted 362 by .live_after and the other by .dead_before. First the 363 unsorted info is created in the _la variant is copied into the 364 _db variant. Once that's done both of them are sorted. 365 We also need two integer cursors which record the next 366 location in the two arrays to consider. */ 367 RRegLR* rreg_lrs_la; 368 RRegLR* rreg_lrs_db; 369 Int rreg_lrs_size; 370 Int rreg_lrs_used; 371 Int rreg_lrs_la_next; 372 Int rreg_lrs_db_next; 373 374 /* Used when constructing vreg_lrs (for allocating stack 375 slots). */ 376 Int ss_busy_until_before[N_SPILL64S]; 377 378 /* Used when constructing rreg_lrs. */ 379 Int* rreg_live_after; 380 Int* rreg_dead_before; 381 382 /* Running state of the core allocation algorithm. */ 383 RRegState* rreg_state; /* [0 .. n_rregs-1] */ 384 Int n_rregs; 385 386 /* .. and the redundant backward map */ 387 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. 388 This inplies n_rregs must be <= 32768. */ 389 Short* vreg_state; /* [0 .. n_vregs-1] */ 390 391 /* The vreg -> rreg map constructed and then applied to each 392 instr. */ 393 HRegRemap remap; 394 395 /* The output array of instructions. */ 396 HInstrArray* instrs_out; 397 398 /* Sanity checks are expensive. They are only done periodically, 399 not at each insn processed. */ 400 Bool do_sanity_check; 401 402 vassert(0 == (guest_sizeB % 32)); 403 vassert(0 == (LibVEX_N_SPILL_BYTES % 32)); 404 vassert(0 == (N_SPILL64S % 4)); 405 406 /* The live range numbers are signed shorts, and so limiting the 407 number of insns to 15000 comfortably guards against them 408 overflowing 32k. */ 409 vassert(instrs_in->arr_used <= 15000); 410 411 # define INVALID_INSTRNO (-2) 412 413 # define EMIT_INSTR(_instr) \ 414 do { \ 415 HInstr* _tmp = (_instr); \ 416 if (DEBUG_REGALLOC) { \ 417 vex_printf("** "); \ 418 (*ppInstr)(_tmp, mode64); \ 419 vex_printf("\n\n"); \ 420 } \ 421 addHInstr ( instrs_out, _tmp ); \ 422 } while (0) 423 424 # define PRINT_STATE \ 425 do { \ 426 Int z, q; \ 427 for (z = 0; z < n_rregs; z++) { \ 428 vex_printf(" rreg_state[%2d] = ", z); \ 429 (*ppReg)(rreg_state[z].rreg); \ 430 vex_printf(" \t"); \ 431 switch (rreg_state[z].disp) { \ 432 case Free: vex_printf("Free\n"); break; \ 433 case Unavail: vex_printf("Unavail\n"); break; \ 434 case Bound: vex_printf("BoundTo "); \ 435 (*ppReg)(rreg_state[z].vreg); \ 436 vex_printf("\n"); break; \ 437 } \ 438 } \ 439 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ 440 q = 0; \ 441 for (z = 0; z < n_vregs; z++) { \ 442 if (vreg_state[z] == INVALID_RREG_NO) \ 443 continue; \ 444 vex_printf("[%d] -> %d ", z, vreg_state[z]); \ 445 q++; \ 446 if (q > 0 && (q % 6) == 0) \ 447 vex_printf("\n "); \ 448 } \ 449 vex_printf("\n"); \ 450 } while (0) 451 452 453 /* --------- Stage 0: set up output array --------- */ 454 /* --------- and allocate/initialise running state. --------- */ 455 456 instrs_out = newHInstrArray(); 457 458 /* ... and initialise running state. */ 459 /* n_rregs is no more than a short name for n_available_real_regs. */ 460 n_rregs = n_available_real_regs; 461 n_vregs = instrs_in->n_vregs; 462 463 /* If this is not so, vreg_state entries will overflow. */ 464 vassert(n_vregs < 32767); 465 466 rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState)); 467 vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short)); 468 469 for (j = 0; j < n_rregs; j++) { 470 rreg_state[j].rreg = available_real_regs[j]; 471 rreg_state[j].has_hlrs = False; 472 rreg_state[j].disp = Free; 473 rreg_state[j].vreg = INVALID_HREG; 474 rreg_state[j].is_spill_cand = False; 475 rreg_state[j].eq_spill_slot = False; 476 } 477 478 for (j = 0; j < n_vregs; j++) 479 vreg_state[j] = INVALID_RREG_NO; 480 481 482 /* --------- Stage 1: compute vreg live ranges. --------- */ 483 /* --------- Stage 2: compute rreg live ranges. --------- */ 484 485 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 486 487 /* This is relatively simple, because (1) we only seek the complete 488 end-to-end live range of each vreg, and are not interested in 489 any holes in it, and (2) the vregs are conveniently numbered 0 490 .. n_vregs-1, so we can just dump the results in a 491 pre-allocated array. */ 492 493 vreg_lrs = NULL; 494 if (n_vregs > 0) 495 vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs); 496 497 for (j = 0; j < n_vregs; j++) { 498 vreg_lrs[j].live_after = INVALID_INSTRNO; 499 vreg_lrs[j].dead_before = INVALID_INSTRNO; 500 vreg_lrs[j].spill_offset = 0; 501 vreg_lrs[j].spill_size = 0; 502 vreg_lrs[j].reg_class = HRcINVALID; 503 } 504 505 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 506 507 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 508 509 /* This is more complex than Stage 1, because we need to compute 510 exactly all the live ranges of all the allocatable real regs, 511 and we don't know in advance how many there will be. */ 512 513 rreg_lrs_used = 0; 514 rreg_lrs_size = 4; 515 rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR)); 516 rreg_lrs_db = NULL; /* we'll create this later */ 517 518 /* We'll need to track live range start/end points seperately for 519 each rreg. Sigh. */ 520 vassert(n_available_real_regs > 0); 521 rreg_live_after = LibVEX_Alloc(n_available_real_regs * sizeof(Int)); 522 rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int)); 523 524 for (j = 0; j < n_available_real_regs; j++) { 525 rreg_live_after[j] = 526 rreg_dead_before[j] = INVALID_INSTRNO; 527 } 528 529 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 530 531 /* ------ start of ITERATE OVER INSNS ------ */ 532 533 for (ii = 0; ii < instrs_in->arr_used; ii++) { 534 535 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 536 537 # if 0 538 vex_printf("\n%d stage1: ", ii); 539 (*ppInstr)(instrs_in->arr[ii], mode64); 540 vex_printf("\n"); 541 ppHRegUsage(®_usage); 542 # endif 543 544 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ 545 546 /* for each reg mentioned in the insn ... */ 547 for (j = 0; j < reg_usage.n_used; j++) { 548 549 vreg = reg_usage.hreg[j]; 550 /* only interested in virtual registers right now. */ 551 if (!hregIsVirtual(vreg)) 552 continue; 553 k = hregNumber(vreg); 554 if (k < 0 || k >= n_vregs) { 555 vex_printf("\n"); 556 (*ppInstr)(instrs_in->arr[ii], mode64); 557 vex_printf("\n"); 558 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); 559 vpanic("doRegisterAllocation: out-of-range vreg"); 560 } 561 562 /* Take the opportunity to note its regclass. We'll need 563 that when allocating spill slots. */ 564 if (vreg_lrs[k].reg_class == HRcINVALID) { 565 /* First mention of this vreg. */ 566 vreg_lrs[k].reg_class = hregClass(vreg); 567 } else { 568 /* Seen it before, so check for consistency. */ 569 vassert(vreg_lrs[k].reg_class == hregClass(vreg)); 570 } 571 572 /* Now consider live ranges. */ 573 switch (reg_usage.mode[j]) { 574 case HRmRead: 575 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 576 vex_printf("\n\nOFFENDING VREG = %d\n", k); 577 vpanic("doRegisterAllocation: " 578 "first event for vreg is Read"); 579 } 580 vreg_lrs[k].dead_before = toShort(ii + 1); 581 break; 582 case HRmWrite: 583 if (vreg_lrs[k].live_after == INVALID_INSTRNO) 584 vreg_lrs[k].live_after = toShort(ii); 585 vreg_lrs[k].dead_before = toShort(ii + 1); 586 break; 587 case HRmModify: 588 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 589 vex_printf("\n\nOFFENDING VREG = %d\n", k); 590 vpanic("doRegisterAllocation: " 591 "first event for vreg is Modify"); 592 } 593 vreg_lrs[k].dead_before = toShort(ii + 1); 594 break; 595 default: 596 vpanic("doRegisterAllocation(1)"); 597 } /* switch */ 598 599 } /* iterate over registers */ 600 601 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ 602 603 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ 604 605 /* for each reg mentioned in the insn ... */ 606 for (j = 0; j < reg_usage.n_used; j++) { 607 608 /* Dummy initialisations of flush_la and flush_db to avoid 609 possible bogus uninit-var warnings from gcc. */ 610 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; 611 Bool flush; 612 613 rreg = reg_usage.hreg[j]; 614 615 /* only interested in real registers right now. */ 616 if (hregIsVirtual(rreg)) 617 continue; 618 619 /* Furthermore, we're not interested in this rreg unless it's 620 one of the allocatable ones. For example, it could be a 621 stack pointer register, or some other register beyond our 622 control, in which case we should just ignore it. */ 623 for (k = 0; k < n_available_real_regs; k++) 624 if (available_real_regs[k] == rreg) 625 break; 626 if (k == n_available_real_regs) 627 continue; /* not found -- ignore. */ 628 flush = False; 629 switch (reg_usage.mode[j]) { 630 case HRmWrite: 631 flush_la = rreg_live_after[k]; 632 flush_db = rreg_dead_before[k]; 633 if (flush_la != INVALID_INSTRNO 634 && flush_db != INVALID_INSTRNO) 635 flush = True; 636 rreg_live_after[k] = ii; 637 rreg_dead_before[k] = ii+1; 638 break; 639 case HRmRead: 640 if (rreg_live_after[k] == INVALID_INSTRNO) { 641 vex_printf("\nOFFENDING RREG = "); 642 (*ppReg)(available_real_regs[k]); 643 vex_printf("\n"); 644 vex_printf("\nOFFENDING instr = "); 645 (*ppInstr)(instrs_in->arr[ii], mode64); 646 vex_printf("\n"); 647 vpanic("doRegisterAllocation: " 648 "first event for rreg is Read"); 649 } 650 rreg_dead_before[k] = ii+1; 651 break; 652 case HRmModify: 653 if (rreg_live_after[k] == INVALID_INSTRNO) { 654 vex_printf("\nOFFENDING RREG = "); 655 (*ppReg)(available_real_regs[k]); 656 vex_printf("\n"); 657 vex_printf("\nOFFENDING instr = "); 658 (*ppInstr)(instrs_in->arr[ii], mode64); 659 vex_printf("\n"); 660 vpanic("doRegisterAllocation: " 661 "first event for rreg is Modify"); 662 } 663 rreg_dead_before[k] = ii+1; 664 break; 665 default: 666 vpanic("doRegisterAllocation(2)"); 667 } 668 669 if (flush) { 670 vassert(flush_la != INVALID_INSTRNO); 671 vassert(flush_db != INVALID_INSTRNO); 672 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 673 if (0) 674 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); 675 rreg_lrs_la[rreg_lrs_used].rreg = rreg; 676 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); 677 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); 678 rreg_lrs_used++; 679 } 680 681 } /* iterate over regs in the instr */ 682 683 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ 684 685 } /* iterate over insns */ 686 687 /* ------ end of ITERATE OVER INSNS ------ */ 688 689 /* ------ start of FINALISE RREG LIVE RANGES ------ */ 690 691 /* Now finish up any live ranges left over. */ 692 for (j = 0; j < n_available_real_regs; j++) { 693 694 # if 0 695 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], 696 rreg_dead_before[j]); 697 # endif 698 vassert( (rreg_live_after[j] == INVALID_INSTRNO 699 && rreg_dead_before[j] == INVALID_INSTRNO) 700 || 701 (rreg_live_after[j] != INVALID_INSTRNO 702 && rreg_dead_before[j] != INVALID_INSTRNO) 703 ); 704 705 if (rreg_live_after[j] == INVALID_INSTRNO) 706 continue; 707 708 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 709 if (0) 710 vex_printf("FLUSH 2 (%d,%d)\n", 711 rreg_live_after[j], rreg_dead_before[j]); 712 rreg_lrs_la[rreg_lrs_used].rreg = available_real_regs[j]; 713 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); 714 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); 715 rreg_lrs_used++; 716 } 717 718 /* Compute summary hints for choosing real regs. If a real reg is 719 involved in a hard live range, record that fact in the fixed 720 part of the running rreg_state. Later, when offered a choice between 721 rregs, it's better to choose one which is not marked as having 722 any HLRs, since ones with HLRs may need to be spilled around 723 their HLRs. Correctness of final assignment is unaffected by 724 this mechanism -- it is only an optimisation. */ 725 726 for (j = 0; j < rreg_lrs_used; j++) { 727 rreg = rreg_lrs_la[j].rreg; 728 vassert(!hregIsVirtual(rreg)); 729 /* rreg is involved in a HLR. Record this info in the array, if 730 there is space. */ 731 for (k = 0; k < n_rregs; k++) 732 if (rreg_state[k].rreg == rreg) 733 break; 734 vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */ 735 rreg_state[k].has_hlrs = True; 736 } 737 if (0) { 738 for (j = 0; j < n_rregs; j++) { 739 if (!rreg_state[j].has_hlrs) 740 continue; 741 ppReg(rreg_state[j].rreg); 742 vex_printf(" hinted\n"); 743 } 744 } 745 746 /* Finally, copy the _la variant into the _db variant and 747 sort both by their respective fields. */ 748 rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR)); 749 for (j = 0; j < rreg_lrs_used; j++) 750 rreg_lrs_db[j] = rreg_lrs_la[j]; 751 752 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); 753 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); 754 755 /* And set up the cursors. */ 756 rreg_lrs_la_next = 0; 757 rreg_lrs_db_next = 0; 758 759 for (j = 1; j < rreg_lrs_used; j++) { 760 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); 761 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); 762 } 763 764 /* ------ end of FINALISE RREG LIVE RANGES ------ */ 765 766 # if DEBUG_REGALLOC 767 for (j = 0; j < n_vregs; j++) { 768 vex_printf("vreg %d: la = %d, db = %d\n", 769 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); 770 } 771 # endif 772 773 # if DEBUG_REGALLOC 774 vex_printf("RRegLRs by LA:\n"); 775 for (j = 0; j < rreg_lrs_used; j++) { 776 vex_printf(" "); 777 (*ppReg)(rreg_lrs_la[j].rreg); 778 vex_printf(" la = %d, db = %d\n", 779 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); 780 } 781 vex_printf("RRegLRs by DB:\n"); 782 for (j = 0; j < rreg_lrs_used; j++) { 783 vex_printf(" "); 784 (*ppReg)(rreg_lrs_db[j].rreg); 785 vex_printf(" la = %d, db = %d\n", 786 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); 787 } 788 # endif 789 790 /* --------- Stage 3: allocate spill slots. --------- */ 791 792 /* Each spill slot is 8 bytes long. For vregs which take more than 793 64 bits to spill (classes Flt64 and Vec128), we have to allocate 794 two consecutive spill slots. For 256 bit registers (class 795 Vec256), we have to allocate four consecutive spill slots. 796 797 For Vec128-class on PowerPC, the spill slot's actual address 798 must be 16-byte aligned. Since the spill slot's address is 799 computed as an offset from the guest state pointer, and since 800 the user of the generated code must set that pointer to a 801 32-aligned value, we have the residual obligation here of 802 choosing a 16-aligned spill slot offset for Vec128-class values. 803 Since each spill slot is 8 bytes long, that means for 804 Vec128-class values we must allocated a spill slot number which 805 is zero mod 2. 806 807 Similarly, for Vec256 calss on amd64, find a spill slot number 808 which is zero mod 4. This guarantees it will be 32 byte 809 aligned, which isn't actually necessary on amd64 (we use movUpd 810 etc to spill), but seems like good practice. 811 812 Do a rank-based allocation of vregs to spill slot numbers. We 813 put as few values as possible in spill slots, but nevertheless 814 need to have a spill slot available for all vregs, just in case. 815 */ 816 /* max_ss_no = -1; */ 817 818 for (j = 0; j < N_SPILL64S; j++) 819 ss_busy_until_before[j] = 0; 820 821 for (j = 0; j < n_vregs; j++) { 822 823 /* True iff this vreg is unused. In which case we also expect 824 that the reg_class field for it has not been set. */ 825 if (vreg_lrs[j].live_after == INVALID_INSTRNO) { 826 vassert(vreg_lrs[j].reg_class == HRcINVALID); 827 continue; 828 } 829 830 /* The spill slots are 64 bits in size. As per the comment on 831 definition of HRegClass in host_generic_regs.h, that means, 832 to spill a vreg of class Flt64 or Vec128, we'll need to find 833 two adjacent spill slots to use. For Vec256, we'll need to 834 find four adjacent slots to use. Note, this logic needs to 835 kept in sync with the size info on the definition of 836 HRegClass. */ 837 switch (vreg_lrs[j].reg_class) { 838 839 case HRcVec128: case HRcFlt64: 840 /* Find two adjacent free slots in which between them 841 provide up to 128 bits in which to spill the vreg. 842 Since we are trying to find an even:odd pair, move 843 along in steps of 2 (slots). */ 844 for (k = 0; k < N_SPILL64S-1; k += 2) 845 if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after 846 && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after) 847 break; 848 if (k >= N_SPILL64S-1) { 849 vpanic("LibVEX_N_SPILL_BYTES is too low. " 850 "Increase and recompile."); 851 } 852 if (0) vex_printf("16-byte spill offset in spill slot %d\n", 853 (Int)k); 854 ss_busy_until_before[k+0] = vreg_lrs[j].dead_before; 855 ss_busy_until_before[k+1] = vreg_lrs[j].dead_before; 856 break; 857 858 default: 859 /* The ordinary case -- just find a single spill slot. */ 860 /* Find the lowest-numbered spill slot which is available 861 at the start point of this interval, and assign the 862 interval to it. */ 863 for (k = 0; k < N_SPILL64S; k++) 864 if (ss_busy_until_before[k] <= vreg_lrs[j].live_after) 865 break; 866 if (k == N_SPILL64S) { 867 vpanic("LibVEX_N_SPILL_BYTES is too low. " 868 "Increase and recompile."); 869 } 870 ss_busy_until_before[k] = vreg_lrs[j].dead_before; 871 break; 872 873 } /* switch (vreg_lrs[j].reg_class) { */ 874 875 /* This reflects LibVEX's hard-wired knowledge of the baseBlock 876 layout: the guest state, then two equal sized areas following 877 it for two sets of shadow state, and then the spill area. */ 878 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8); 879 880 /* Independent check that we've made a sane choice of slot */ 881 sanity_check_spill_offset( &vreg_lrs[j] ); 882 /* if (j > max_ss_no) */ 883 /* max_ss_no = j; */ 884 } 885 886 # if 0 887 vex_printf("\n\n"); 888 for (j = 0; j < n_vregs; j++) 889 vex_printf("vreg %d --> spill offset %d\n", 890 j, vreg_lrs[j].spill_offset); 891 # endif 892 893 /* --------- Stage 4: establish rreg preferences --------- */ 894 895 /* It may be advantageous to allocating certain vregs to specific 896 rregs, as a way of avoiding reg-reg moves later. Here we 897 establish which, if any, rreg each vreg would prefer to be in. 898 Note that this constrains the allocator -- ideally we end up 899 with as few as possible vregs expressing a preference. 900 901 This is an optimisation: if the .preferred_rreg field is never 902 set to anything different from INVALID_HREG, the allocator still 903 works. */ 904 905 /* 30 Dec 04: removed this mechanism as it does not seem to 906 help. */ 907 908 /* --------- Stage 5: process instructions --------- */ 909 910 /* This is the main loop of the allocator. First, we need to 911 correctly set up our running state, which tracks the status of 912 each real register. */ 913 914 /* ------ BEGIN: Process each insn in turn. ------ */ 915 916 for (ii = 0; ii < instrs_in->arr_used; ii++) { 917 918 # if DEBUG_REGALLOC 919 vex_printf("\n====----====---- Insn %d ----====----====\n", ii); 920 vex_printf("---- "); 921 (*ppInstr)(instrs_in->arr[ii], mode64); 922 vex_printf("\n\nInitial state:\n"); 923 PRINT_STATE; 924 vex_printf("\n"); 925 # endif 926 927 /* ------------ Sanity checks ------------ */ 928 929 /* Sanity checks are expensive. So they are done only once 930 every 7 instructions, and just before the last 931 instruction. */ 932 do_sanity_check 933 = toBool( 934 False /* Set to True for sanity checking of all insns. */ 935 || ii == instrs_in->arr_used-1 936 || (ii > 0 && (ii % 7) == 0) 937 ); 938 939 if (do_sanity_check) { 940 941 /* Sanity check 1: all rregs with a hard live range crossing 942 this insn must be marked as unavailable in the running 943 state. */ 944 for (j = 0; j < rreg_lrs_used; j++) { 945 if (rreg_lrs_la[j].live_after < ii 946 && ii < rreg_lrs_la[j].dead_before) { 947 /* ii is the middle of a hard live range for some real 948 reg. Check it's marked as such in the running 949 state. */ 950 951 # if 0 952 vex_printf("considering la %d .. db %d reg = ", 953 rreg_lrs[j].live_after, 954 rreg_lrs[j].dead_before); 955 (*ppReg)(rreg_lrs[j].rreg); 956 vex_printf("\n"); 957 # endif 958 959 /* find the state entry for this rreg */ 960 for (k = 0; k < n_rregs; k++) 961 if (rreg_state[k].rreg == rreg_lrs_la[j].rreg) 962 break; 963 964 /* and assert that this rreg is marked as unavailable */ 965 vassert(rreg_state[k].disp == Unavail); 966 } 967 } 968 969 /* Sanity check 2: conversely, all rregs marked as 970 unavailable in the running rreg_state must have a 971 corresponding hard live range entry in the rreg_lrs 972 array. */ 973 for (j = 0; j < n_available_real_regs; j++) { 974 vassert(rreg_state[j].disp == Bound 975 || rreg_state[j].disp == Free 976 || rreg_state[j].disp == Unavail); 977 if (rreg_state[j].disp != Unavail) 978 continue; 979 for (k = 0; k < rreg_lrs_used; k++) 980 if (rreg_lrs_la[k].rreg == rreg_state[j].rreg 981 && rreg_lrs_la[k].live_after < ii 982 && ii < rreg_lrs_la[k].dead_before) 983 break; 984 /* If this vassertion fails, we couldn't find a 985 corresponding HLR. */ 986 vassert(k < rreg_lrs_used); 987 } 988 989 /* Sanity check 3: all vreg-rreg bindings must bind registers 990 of the same class. */ 991 for (j = 0; j < n_rregs; j++) { 992 if (rreg_state[j].disp != Bound) { 993 vassert(rreg_state[j].eq_spill_slot == False); 994 continue; 995 } 996 vassert(hregClass(rreg_state[j].rreg) 997 == hregClass(rreg_state[j].vreg)); 998 vassert( hregIsVirtual(rreg_state[j].vreg)); 999 vassert(!hregIsVirtual(rreg_state[j].rreg)); 1000 } 1001 1002 /* Sanity check 4: the vreg_state and rreg_state 1003 mutually-redundant mappings are consistent. If 1004 rreg_state[j].vreg points at some vreg_state entry then 1005 that vreg_state entry should point back at 1006 rreg_state[j]. */ 1007 for (j = 0; j < n_rregs; j++) { 1008 if (rreg_state[j].disp != Bound) 1009 continue; 1010 k = hregNumber(rreg_state[j].vreg); 1011 vassert(IS_VALID_VREGNO(k)); 1012 vassert(vreg_state[k] == j); 1013 } 1014 for (j = 0; j < n_vregs; j++) { 1015 k = vreg_state[j]; 1016 if (k == INVALID_RREG_NO) 1017 continue; 1018 vassert(IS_VALID_RREGNO(k)); 1019 vassert(rreg_state[k].disp == Bound); 1020 vassert(hregNumber(rreg_state[k].vreg) == j); 1021 } 1022 1023 } /* if (do_sanity_check) */ 1024 1025 /* ------------ end of Sanity checks ------------ */ 1026 1027 /* Do various optimisations pertaining to register coalescing 1028 and preferencing: 1029 MOV v <-> v coalescing (done here). 1030 MOV v <-> r coalescing (not yet, if ever) 1031 */ 1032 /* If doing a reg-reg move between two vregs, and the src's live 1033 range ends here and the dst's live range starts here, bind 1034 the dst to the src's rreg, and that's all. */ 1035 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { 1036 if (!hregIsVirtual(vregS)) goto cannot_coalesce; 1037 if (!hregIsVirtual(vregD)) goto cannot_coalesce; 1038 /* Check that *isMove is not telling us a bunch of lies ... */ 1039 vassert(hregClass(vregS) == hregClass(vregD)); 1040 k = hregNumber(vregS); 1041 m = hregNumber(vregD); 1042 vassert(IS_VALID_VREGNO(k)); 1043 vassert(IS_VALID_VREGNO(m)); 1044 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; 1045 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; 1046 # if DEBUG_REGALLOC 1047 vex_printf("COALESCE "); 1048 (*ppReg)(vregS); 1049 vex_printf(" -> "); 1050 (*ppReg)(vregD); 1051 vex_printf("\n\n"); 1052 # endif 1053 /* Find the state entry for vregS. */ 1054 for (m = 0; m < n_rregs; m++) 1055 if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS) 1056 break; 1057 if (m == n_rregs) 1058 /* We failed to find a binding for vregS, which means it's 1059 currently not in a register. So we can't do the 1060 coalescing. Give up. */ 1061 goto cannot_coalesce; 1062 1063 /* Finally, we can do the coalescing. It's trivial -- merely 1064 claim vregS's register for vregD. */ 1065 rreg_state[m].vreg = vregD; 1066 vassert(IS_VALID_VREGNO(hregNumber(vregD))); 1067 vassert(IS_VALID_VREGNO(hregNumber(vregS))); 1068 vreg_state[hregNumber(vregD)] = toShort(m); 1069 vreg_state[hregNumber(vregS)] = INVALID_RREG_NO; 1070 1071 /* This rreg has become associated with a different vreg and 1072 hence with a different spill slot. Play safe. */ 1073 rreg_state[m].eq_spill_slot = False; 1074 1075 /* Move on to the next insn. We skip the post-insn stuff for 1076 fixed registers, since this move should not interact with 1077 them in any way. */ 1078 continue; 1079 } 1080 cannot_coalesce: 1081 1082 /* ------ Free up rregs bound to dead vregs ------ */ 1083 1084 /* Look for vregs whose live range has just ended, and 1085 mark the associated rreg as free. */ 1086 1087 for (j = 0; j < n_rregs; j++) { 1088 if (rreg_state[j].disp != Bound) 1089 continue; 1090 vreg = hregNumber(rreg_state[j].vreg); 1091 vassert(IS_VALID_VREGNO(vreg)); 1092 if (vreg_lrs[vreg].dead_before <= ii) { 1093 rreg_state[j].disp = Free; 1094 rreg_state[j].eq_spill_slot = False; 1095 m = hregNumber(rreg_state[j].vreg); 1096 vassert(IS_VALID_VREGNO(m)); 1097 vreg_state[m] = INVALID_RREG_NO; 1098 if (DEBUG_REGALLOC) { 1099 vex_printf("free up "); 1100 (*ppReg)(rreg_state[j].rreg); 1101 vex_printf("\n"); 1102 } 1103 } 1104 } 1105 1106 /* ------ Pre-instruction actions for fixed rreg uses ------ */ 1107 1108 /* Now we have to deal with rregs which are about to be made 1109 live by this instruction -- in other words, are entering into 1110 one of their live ranges. If any such rreg holds a vreg, we 1111 will have to free up the rreg. The simplest solution which 1112 is correct is to spill the rreg. 1113 1114 Note we could do better: 1115 * Could move it into some other free rreg, if one is available 1116 1117 Do this efficiently, by incrementally stepping along an array 1118 of rreg HLRs that are known to be sorted by start point 1119 (their .live_after field). 1120 */ 1121 while (True) { 1122 vassert(rreg_lrs_la_next >= 0); 1123 vassert(rreg_lrs_la_next <= rreg_lrs_used); 1124 if (rreg_lrs_la_next == rreg_lrs_used) 1125 break; /* no more real reg live ranges to consider */ 1126 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) 1127 break; /* next live range does not yet start */ 1128 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); 1129 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. 1130 Find the associated rreg_state entry. */ 1131 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. 1132 Real register live ranges are guaranteed to be well-formed 1133 in that they start with a write to the register -- Stage 2 1134 rejects any code not satisfying this. So the correct 1135 question to ask is whether 1136 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, 1137 whether the reg becomes live after this insn -- rather 1138 than before it. */ 1139 # if DEBUG_REGALLOC 1140 vex_printf("need to free up rreg: "); 1141 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); 1142 vex_printf("\n\n"); 1143 # endif 1144 for (k = 0; k < n_rregs; k++) 1145 if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg) 1146 break; 1147 /* If this fails, we don't have an entry for this rreg. 1148 Which we should. */ 1149 vassert(IS_VALID_RREGNO(k)); 1150 m = hregNumber(rreg_state[k].vreg); 1151 if (rreg_state[k].disp == Bound) { 1152 /* Yes, there is an associated vreg. Spill it if it's 1153 still live. */ 1154 vassert(IS_VALID_VREGNO(m)); 1155 vreg_state[m] = INVALID_RREG_NO; 1156 if (vreg_lrs[m].dead_before > ii) { 1157 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1158 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { 1159 HInstr* spill1 = NULL; 1160 HInstr* spill2 = NULL; 1161 (*genSpill)( &spill1, &spill2, rreg_state[k].rreg, 1162 vreg_lrs[m].spill_offset, mode64 ); 1163 vassert(spill1 || spill2); /* can't both be NULL */ 1164 if (spill1) 1165 EMIT_INSTR(spill1); 1166 if (spill2) 1167 EMIT_INSTR(spill2); 1168 } 1169 rreg_state[k].eq_spill_slot = True; 1170 } 1171 } 1172 rreg_state[k].disp = Unavail; 1173 rreg_state[k].vreg = INVALID_HREG; 1174 rreg_state[k].eq_spill_slot = False; 1175 1176 /* check for further rregs entering HLRs at this point */ 1177 rreg_lrs_la_next++; 1178 } 1179 1180 1181 # if DEBUG_REGALLOC 1182 vex_printf("After pre-insn actions for fixed regs:\n"); 1183 PRINT_STATE; 1184 vex_printf("\n"); 1185 # endif 1186 1187 1188 /* ------ Deal with the current instruction. ------ */ 1189 1190 /* Finally we can begin the processing of this instruction 1191 itself. The aim is to free up enough rregs for this insn. 1192 This may generate spill stores since we may have to evict 1193 some vregs currently in rregs. Also generates spill loads. 1194 We also build up the final vreg->rreg mapping to be applied 1195 to the insn. */ 1196 1197 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 1198 1199 initHRegRemap(&remap); 1200 1201 /* ------------ BEGIN directReload optimisation ----------- */ 1202 1203 /* If the instruction reads exactly one vreg which is currently 1204 in a spill slot, and this is last use of that vreg, see if we 1205 can convert the instruction into one reads directly from the 1206 spill slot. This is clearly only possible for x86 and amd64 1207 targets, since ppc and arm load-store architectures. If 1208 successful, replace instrs_in->arr[ii] with this new 1209 instruction, and recompute its reg usage, so that the change 1210 is invisible to the standard-case handling that follows. */ 1211 1212 if (directReload && reg_usage.n_used <= 2) { 1213 Bool debug_direct_reload = True && False; 1214 HReg cand = INVALID_HREG; 1215 Bool nreads = 0; 1216 Short spilloff = 0; 1217 1218 for (j = 0; j < reg_usage.n_used; j++) { 1219 1220 vreg = reg_usage.hreg[j]; 1221 1222 if (!hregIsVirtual(vreg)) 1223 continue; 1224 1225 if (reg_usage.mode[j] == HRmRead) { 1226 nreads++; 1227 m = hregNumber(vreg); 1228 vassert(IS_VALID_VREGNO(m)); 1229 k = vreg_state[m]; 1230 if (!IS_VALID_RREGNO(k)) { 1231 /* ok, it is spilled. Now, is this its last use? */ 1232 vassert(vreg_lrs[m].dead_before >= ii+1); 1233 if (vreg_lrs[m].dead_before == ii+1 1234 && cand == INVALID_HREG) { 1235 spilloff = vreg_lrs[m].spill_offset; 1236 cand = vreg; 1237 } 1238 } 1239 } 1240 } 1241 1242 if (nreads == 1 && cand != INVALID_HREG) { 1243 HInstr* reloaded; 1244 if (reg_usage.n_used == 2) 1245 vassert(reg_usage.hreg[0] != reg_usage.hreg[1]); 1246 1247 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); 1248 if (debug_direct_reload && !reloaded) { 1249 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); 1250 ppInstr(instrs_in->arr[ii], mode64); 1251 } 1252 if (reloaded) { 1253 /* Update info about the insn, so it looks as if it had 1254 been in this form all along. */ 1255 instrs_in->arr[ii] = reloaded; 1256 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); 1257 if (debug_direct_reload && !reloaded) { 1258 vex_printf(" --> "); 1259 ppInstr(reloaded, mode64); 1260 } 1261 } 1262 1263 if (debug_direct_reload && !reloaded) 1264 vex_printf("\n"); 1265 } 1266 1267 } 1268 1269 /* ------------ END directReload optimisation ------------ */ 1270 1271 /* for each reg mentioned in the insn ... */ 1272 for (j = 0; j < reg_usage.n_used; j++) { 1273 1274 vreg = reg_usage.hreg[j]; 1275 1276 /* only interested in virtual registers right now. */ 1277 if (!hregIsVirtual(vreg)) 1278 continue; 1279 1280 # if 0 1281 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); 1282 # endif 1283 1284 /* Now we're trying to find a rreg for "vreg". First of all, 1285 if it already has an rreg assigned, we don't need to do 1286 anything more. Search the current state to find out. */ 1287 m = hregNumber(vreg); 1288 vassert(IS_VALID_VREGNO(m)); 1289 k = vreg_state[m]; 1290 if (IS_VALID_RREGNO(k)) { 1291 vassert(rreg_state[k].disp == Bound); 1292 addToHRegRemap(&remap, vreg, rreg_state[k].rreg); 1293 /* If this rreg is written or modified, mark it as different 1294 from any spill slot value. */ 1295 if (reg_usage.mode[j] != HRmRead) 1296 rreg_state[k].eq_spill_slot = False; 1297 continue; 1298 } else { 1299 vassert(k == INVALID_RREG_NO); 1300 } 1301 1302 /* No luck. The next thing to do is see if there is a 1303 currently free rreg available, of the correct class. If 1304 so, bag it. NOTE, we could improve this by selecting an 1305 rreg for which the next live-range event is as far ahead 1306 as possible. */ 1307 k_suboptimal = -1; 1308 for (k = 0; k < n_rregs; k++) { 1309 if (rreg_state[k].disp != Free 1310 || hregClass(rreg_state[k].rreg) != hregClass(vreg)) 1311 continue; 1312 if (rreg_state[k].has_hlrs) { 1313 /* Well, at least we can use k_suboptimal if we really 1314 have to. Keep on looking for a better candidate. */ 1315 k_suboptimal = k; 1316 } else { 1317 /* Found a preferable reg. Use it. */ 1318 k_suboptimal = -1; 1319 break; 1320 } 1321 } 1322 if (k_suboptimal >= 0) 1323 k = k_suboptimal; 1324 1325 if (k < n_rregs) { 1326 rreg_state[k].disp = Bound; 1327 rreg_state[k].vreg = vreg; 1328 m = hregNumber(vreg); 1329 vassert(IS_VALID_VREGNO(m)); 1330 vreg_state[m] = toShort(k); 1331 addToHRegRemap(&remap, vreg, rreg_state[k].rreg); 1332 /* Generate a reload if needed. This only creates needed 1333 reloads because the live range builder for vregs will 1334 guarantee that the first event for a vreg is a write. 1335 Hence, if this reference is not a write, it cannot be 1336 the first reference for this vreg, and so a reload is 1337 indeed needed. */ 1338 if (reg_usage.mode[j] != HRmWrite) { 1339 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1340 HInstr* reload1 = NULL; 1341 HInstr* reload2 = NULL; 1342 (*genReload)( &reload1, &reload2, rreg_state[k].rreg, 1343 vreg_lrs[m].spill_offset, mode64 ); 1344 vassert(reload1 || reload2); /* can't both be NULL */ 1345 if (reload1) 1346 EMIT_INSTR(reload1); 1347 if (reload2) 1348 EMIT_INSTR(reload2); 1349 /* This rreg is read or modified by the instruction. 1350 If it's merely read we can claim it now equals the 1351 spill slot, but not so if it is modified. */ 1352 if (reg_usage.mode[j] == HRmRead) { 1353 rreg_state[k].eq_spill_slot = True; 1354 } else { 1355 vassert(reg_usage.mode[j] == HRmModify); 1356 rreg_state[k].eq_spill_slot = False; 1357 } 1358 } else { 1359 rreg_state[k].eq_spill_slot = False; 1360 } 1361 1362 continue; 1363 } 1364 1365 /* Well, now we have no option but to spill a vreg. It's 1366 important to make a good choice of vreg to spill, and of 1367 course we need to be careful not to spill a vreg which is 1368 needed by this insn. */ 1369 1370 /* First, mark in the rreg_state, those rregs which are not spill 1371 candidates, due to holding a vreg mentioned by this 1372 instruction. Or being of the wrong class. */ 1373 for (k = 0; k < n_rregs; k++) { 1374 rreg_state[k].is_spill_cand = False; 1375 if (rreg_state[k].disp != Bound) 1376 continue; 1377 if (hregClass(rreg_state[k].rreg) != hregClass(vreg)) 1378 continue; 1379 rreg_state[k].is_spill_cand = True; 1380 for (m = 0; m < reg_usage.n_used; m++) { 1381 if (rreg_state[k].vreg == reg_usage.hreg[m]) { 1382 rreg_state[k].is_spill_cand = False; 1383 break; 1384 } 1385 } 1386 } 1387 1388 /* We can choose to spill any rreg satisfying 1389 rreg_state[r].is_spill_cand (so to speak). Choose r so that 1390 the next use of its associated vreg is as far ahead as 1391 possible, in the hope that this will minimise the number 1392 of consequent reloads required. */ 1393 spillee 1394 = findMostDistantlyMentionedVReg ( 1395 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 ); 1396 1397 if (spillee == -1) { 1398 /* Hmmmmm. There don't appear to be any spill candidates. 1399 We're hosed. */ 1400 vex_printf("reg_alloc: can't find a register in class: "); 1401 ppHRegClass(hregClass(vreg)); 1402 vex_printf("\n"); 1403 vpanic("reg_alloc: can't create a free register."); 1404 } 1405 1406 /* Right. So we're going to spill rreg_state[spillee]. */ 1407 vassert(IS_VALID_RREGNO(spillee)); 1408 vassert(rreg_state[spillee].disp == Bound); 1409 /* check it's the right class */ 1410 vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg)); 1411 /* check we're not ejecting the vreg for which we are trying 1412 to free up a register. */ 1413 vassert(rreg_state[spillee].vreg != vreg); 1414 1415 m = hregNumber(rreg_state[spillee].vreg); 1416 vassert(IS_VALID_VREGNO(m)); 1417 1418 /* So here's the spill store. Assert that we're spilling a 1419 live vreg. */ 1420 vassert(vreg_lrs[m].dead_before > ii); 1421 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1422 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { 1423 HInstr* spill1 = NULL; 1424 HInstr* spill2 = NULL; 1425 (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg, 1426 vreg_lrs[m].spill_offset, mode64 ); 1427 vassert(spill1 || spill2); /* can't both be NULL */ 1428 if (spill1) 1429 EMIT_INSTR(spill1); 1430 if (spill2) 1431 EMIT_INSTR(spill2); 1432 } 1433 1434 /* Update the rreg_state to reflect the new assignment for this 1435 rreg. */ 1436 rreg_state[spillee].vreg = vreg; 1437 vreg_state[m] = INVALID_RREG_NO; 1438 1439 rreg_state[spillee].eq_spill_slot = False; /* be safe */ 1440 1441 m = hregNumber(vreg); 1442 vassert(IS_VALID_VREGNO(m)); 1443 vreg_state[m] = toShort(spillee); 1444 1445 /* Now, if this vreg is being read or modified (as opposed to 1446 written), we have to generate a reload for it. */ 1447 if (reg_usage.mode[j] != HRmWrite) { 1448 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1449 HInstr* reload1 = NULL; 1450 HInstr* reload2 = NULL; 1451 (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg, 1452 vreg_lrs[m].spill_offset, mode64 ); 1453 vassert(reload1 || reload2); /* can't both be NULL */ 1454 if (reload1) 1455 EMIT_INSTR(reload1); 1456 if (reload2) 1457 EMIT_INSTR(reload2); 1458 /* This rreg is read or modified by the instruction. 1459 If it's merely read we can claim it now equals the 1460 spill slot, but not so if it is modified. */ 1461 if (reg_usage.mode[j] == HRmRead) { 1462 rreg_state[spillee].eq_spill_slot = True; 1463 } else { 1464 vassert(reg_usage.mode[j] == HRmModify); 1465 rreg_state[spillee].eq_spill_slot = False; 1466 } 1467 } 1468 1469 /* So after much twisting and turning, we have vreg mapped to 1470 rreg_state[spillee].rreg. Note that in the map. */ 1471 addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg); 1472 1473 } /* iterate over registers in this instruction. */ 1474 1475 /* We've finished clowning around with registers in this instruction. 1476 Three results: 1477 - the running rreg_state[] has been updated 1478 - a suitable vreg->rreg mapping for this instruction has been 1479 constructed 1480 - spill and reload instructions may have been emitted. 1481 1482 The final step is to apply the mapping to the instruction, 1483 and emit that. 1484 */ 1485 1486 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ 1487 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); 1488 EMIT_INSTR( instrs_in->arr[ii] ); 1489 1490 # if DEBUG_REGALLOC 1491 vex_printf("After dealing with current insn:\n"); 1492 PRINT_STATE; 1493 vex_printf("\n"); 1494 # endif 1495 1496 /* ------ Post-instruction actions for fixed rreg uses ------ */ 1497 1498 /* Now we need to check for rregs exiting fixed live ranges 1499 after this instruction, and if so mark them as free. */ 1500 while (True) { 1501 vassert(rreg_lrs_db_next >= 0); 1502 vassert(rreg_lrs_db_next <= rreg_lrs_used); 1503 if (rreg_lrs_db_next == rreg_lrs_used) 1504 break; /* no more real reg live ranges to consider */ 1505 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) 1506 break; /* next live range does not yet start */ 1507 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); 1508 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live 1509 range. Mark it as such in the main rreg_state array. */ 1510 for (k = 0; k < n_rregs; k++) 1511 if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg) 1512 break; 1513 /* If this vassertion fails, we don't have an entry for 1514 this rreg. Which we should. */ 1515 vassert(k < n_rregs); 1516 vassert(rreg_state[k].disp == Unavail); 1517 rreg_state[k].disp = Free; 1518 rreg_state[k].vreg = INVALID_HREG; 1519 rreg_state[k].eq_spill_slot = False; 1520 1521 /* check for further rregs leaving HLRs at this point */ 1522 rreg_lrs_db_next++; 1523 } 1524 1525 # if DEBUG_REGALLOC 1526 vex_printf("After post-insn actions for fixed regs:\n"); 1527 PRINT_STATE; 1528 vex_printf("\n"); 1529 # endif 1530 1531 } /* iterate over insns */ 1532 1533 /* ------ END: Process each insn in turn. ------ */ 1534 1535 /* free(rreg_state); */ 1536 /* free(rreg_lrs); */ 1537 /* if (vreg_lrs) free(vreg_lrs); */ 1538 1539 /* Paranoia */ 1540 for (j = 0; j < n_rregs; j++) 1541 vassert(rreg_state[j].rreg == available_real_regs[j]); 1542 1543 vassert(rreg_lrs_la_next == rreg_lrs_used); 1544 vassert(rreg_lrs_db_next == rreg_lrs_used); 1545 1546 return instrs_out; 1547 1548 # undef INVALID_INSTRNO 1549 # undef EMIT_INSTR 1550 # undef PRINT_STATE 1551 } 1552 1553 1554 1555 /*---------------------------------------------------------------*/ 1556 /*--- host_reg_alloc2.c ---*/ 1557 /*---------------------------------------------------------------*/ 1558