1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_reg_alloc2.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 39 #include "main_util.h" 40 #include "host_generic_regs.h" 41 42 /* Set to 1 for lots of debugging output. */ 43 #define DEBUG_REGALLOC 0 44 45 46 /* TODO 27 Oct 04: 47 48 Better consistency checking from what isMove tells us. 49 50 We can possibly do V-V coalescing even when the src is spilled, 51 providing we can arrange for the dst to have the same spill slot. 52 53 Note that state[].hreg is the same as the available real regs. 54 55 Generally rationalise data structures. */ 56 57 58 /* Records information on virtual register live ranges. Computed once 59 and remains unchanged after that. */ 60 typedef 61 struct { 62 /* Becomes live for the first time after this insn ... */ 63 Short live_after; 64 /* Becomes dead for the last time before this insn ... */ 65 Short dead_before; 66 /* The "home" spill slot, if needed. Never changes. */ 67 Short spill_offset; 68 Short spill_size; 69 /* What kind of register this is. */ 70 HRegClass reg_class; 71 } 72 VRegLR; 73 74 75 /* Records information on real-register live ranges. Computed once 76 and remains unchanged after that. */ 77 typedef 78 struct { 79 HReg rreg; 80 /* Becomes live after this insn ... */ 81 Short live_after; 82 /* Becomes dead before this insn ... */ 83 Short dead_before; 84 } 85 RRegLR; 86 87 88 /* An array of the following structs (rreg_state) comprises the 89 running state of the allocator. It indicates what the current 90 disposition of each allocatable real register is. The array gets 91 updated as the allocator processes instructions. The identity of 92 the register is not recorded here, because the index of this 93 structure in doRegisterAllocation()'s |rreg_state| is the index 94 number of the register, and the register itself can be extracted 95 from the RRegUniverse supplied to doRegisterAllocation(). */ 96 typedef 97 struct { 98 /* ------ FIELDS WHICH DO NOT CHANGE ------ */ 99 /* Is this involved in any HLRs? (only an optimisation hint) */ 100 Bool has_hlrs; 101 /* ------ FIELDS WHICH DO CHANGE ------ */ 102 /* 6 May 07: rearranged fields below so the whole struct fits 103 into 16 bytes on both x86 and amd64. */ 104 /* Used when .disp == Bound and we are looking for vregs to 105 spill. */ 106 Bool is_spill_cand; 107 /* Optimisation: used when .disp == Bound. Indicates when the 108 rreg has the same value as the spill slot for the associated 109 vreg. Is safely left at False, and becomes True after a 110 spill store or reload for this rreg. */ 111 Bool eq_spill_slot; 112 /* What's it's current disposition? */ 113 enum { Free, /* available for use */ 114 Unavail, /* in a real-reg live range */ 115 Bound /* in use (holding value of some vreg) */ 116 } 117 disp; 118 /* If .disp == Bound, what vreg is it bound to? */ 119 HReg vreg; 120 } 121 RRegState; 122 123 124 /* The allocator also maintains a redundant array of indexes 125 (vreg_state) from vreg numbers back to entries in rreg_state. It 126 is redundant because iff vreg_state[i] == j then 127 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries 128 point at each other. The purpose of this is to speed up activities 129 which involve looking for a particular vreg: there is no need to 130 scan the rreg_state looking for it, just index directly into 131 vreg_state. The FAQ "does this vreg already have an associated 132 rreg" is the main beneficiary. 133 134 To indicate, in vreg_state[i], that a given vreg is not currently 135 associated with any rreg, that entry can be set to INVALID_RREG_NO. 136 137 Because the vreg_state entries are signed Shorts, the max number 138 of vregs that can be handed by regalloc is 32767. 139 */ 140 141 #define INVALID_RREG_NO ((Short)(-1)) 142 143 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) 144 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) 145 146 147 /* Search forward from some given point in the incoming instruction 148 sequence. Point is to select a virtual register to spill, by 149 finding the vreg which is mentioned as far ahead as possible, in 150 the hope that this will minimise the number of consequent reloads. 151 152 Only do the search for vregs which are Bound in the running state, 153 and for which the .is_spill_cand field is set. This allows the 154 caller to arbitrarily restrict the set of spill candidates to be 155 considered. 156 157 To do this we don't actually need to see the incoming instruction 158 stream. Rather, what we need us the HRegUsage records for the 159 incoming instruction stream. Hence that is passed in. 160 161 Returns an index into the state array indicating the (v,r) pair to 162 spill, or -1 if none was found. */ 163 static 164 Int findMostDistantlyMentionedVReg ( 165 HRegUsage* reg_usages_in, 166 Int search_from_instr, 167 Int num_instrs, 168 RRegState* state, 169 Int n_state 170 ) 171 { 172 Int k, m; 173 Int furthest_k = -1; 174 Int furthest = -1; 175 vassert(search_from_instr >= 0); 176 for (k = 0; k < n_state; k++) { 177 if (!state[k].is_spill_cand) 178 continue; 179 vassert(state[k].disp == Bound); 180 for (m = search_from_instr; m < num_instrs; m++) { 181 if (HRegUsage__contains(®_usages_in[m], state[k].vreg)) 182 break; 183 } 184 if (m > furthest) { 185 furthest = m; 186 furthest_k = k; 187 } 188 } 189 return furthest_k; 190 } 191 192 193 /* Check that this vreg has been assigned a sane spill offset. */ 194 inline 195 static void sanity_check_spill_offset ( VRegLR* vreg ) 196 { 197 switch (vreg->reg_class) { 198 case HRcVec128: case HRcFlt64: 199 vassert(0 == ((UShort)vreg->spill_offset % 16)); break; 200 default: 201 vassert(0 == ((UShort)vreg->spill_offset % 8)); break; 202 } 203 } 204 205 206 /* Double the size of the real-reg live-range array, if needed. */ 207 __attribute__((noinline)) 208 static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used ) 209 { 210 Int k; 211 RRegLR* arr2; 212 if (0) 213 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); 214 vassert(used == *size); 215 arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR)); 216 for (k = 0; k < *size; k++) 217 arr2[k] = (*info)[k]; 218 *size *= 2; 219 *info = arr2; 220 } 221 inline 222 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) 223 { 224 if (LIKELY(used < *size)) return; 225 ensureRRLRspace_SLOW(info, size, used); 226 } 227 228 229 /* Sort an array of RRegLR entries by either the .live_after or 230 .dead_before fields. This is performance-critical. */ 231 static void sortRRLRarray ( RRegLR* arr, 232 Int size, Bool by_live_after ) 233 { 234 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 235 9841, 29524, 88573, 265720, 236 797161, 2391484 }; 237 Int lo = 0; 238 Int hi = size-1; 239 Int i, j, h, bigN, hp; 240 RRegLR v; 241 242 vassert(size >= 0); 243 if (size == 0) 244 return; 245 246 bigN = hi - lo + 1; if (bigN < 2) return; 247 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; 248 249 if (by_live_after) { 250 251 for ( ; hp >= 0; hp--) { 252 h = incs[hp]; 253 for (i = lo + h; i <= hi; i++) { 254 v = arr[i]; 255 j = i; 256 while (arr[j-h].live_after > v.live_after) { 257 arr[j] = arr[j-h]; 258 j = j - h; 259 if (j <= (lo + h - 1)) break; 260 } 261 arr[j] = v; 262 } 263 } 264 265 } else { 266 267 for ( ; hp >= 0; hp--) { 268 h = incs[hp]; 269 for (i = lo + h; i <= hi; i++) { 270 v = arr[i]; 271 j = i; 272 while (arr[j-h].dead_before > v.dead_before) { 273 arr[j] = arr[j-h]; 274 j = j - h; 275 if (j <= (lo + h - 1)) break; 276 } 277 arr[j] = v; 278 } 279 } 280 281 } 282 } 283 284 285 /* Compute the index of the highest and lowest 1 in a ULong, 286 respectively. Results are undefined if the argument is zero. 287 Don't pass it zero :) */ 288 static inline UInt ULong__maxIndex ( ULong w64 ) { 289 return 63 - __builtin_clzll(w64); 290 } 291 292 static inline UInt ULong__minIndex ( ULong w64 ) { 293 return __builtin_ctzll(w64); 294 } 295 296 297 /* Vectorised memset, copied from Valgrind's m_libcbase.c. */ 298 static void* local_memset ( void *destV, Int c, SizeT sz ) 299 { 300 # define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) 301 302 UInt c4; 303 UChar* d = destV; 304 UChar uc = c; 305 306 while ((!IS_4_ALIGNED(d)) && sz >= 1) { 307 d[0] = uc; 308 d++; 309 sz--; 310 } 311 if (sz == 0) 312 return destV; 313 c4 = uc; 314 c4 |= (c4 << 8); 315 c4 |= (c4 << 16); 316 while (sz >= 16) { 317 ((UInt*)d)[0] = c4; 318 ((UInt*)d)[1] = c4; 319 ((UInt*)d)[2] = c4; 320 ((UInt*)d)[3] = c4; 321 d += 16; 322 sz -= 16; 323 } 324 while (sz >= 4) { 325 ((UInt*)d)[0] = c4; 326 d += 4; 327 sz -= 4; 328 } 329 while (sz >= 1) { 330 d[0] = c; 331 d++; 332 sz--; 333 } 334 return destV; 335 336 # undef IS_4_ALIGNED 337 } 338 339 340 /* A target-independent register allocator. Requires various 341 functions which it uses to deal abstractly with instructions and 342 registers, since it cannot have any target-specific knowledge. 343 344 Returns a new list of instructions, which, as a result of the 345 behaviour of mapRegs, will be in-place modifications of the 346 original instructions. 347 348 Requires that the incoming code has been generated using 349 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside 350 that range is a checked run-time error. 351 352 Takes an expandable array of pointers to unallocated insns. 353 Returns an expandable array of pointers to allocated insns. 354 */ 355 HInstrArray* doRegisterAllocation ( 356 357 /* Incoming virtual-registerised code. */ 358 HInstrArray* instrs_in, 359 360 /* The real-register universe to use. This contains facts about 361 real registers, one of which is the set of registers available 362 for allocation. */ 363 const RRegUniverse* univ, 364 365 /* Return True iff the given insn is a reg-reg move, in which 366 case also return the src and dst regs. */ 367 Bool (*isMove) ( const HInstr*, HReg*, HReg* ), 368 369 /* Get info about register usage in this insn. */ 370 void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), 371 372 /* Apply a reg-reg mapping to an insn. */ 373 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), 374 375 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a 376 real reg to a spill slot byte offset. The two leading HInstr** 377 args are out parameters, through which the generated insns are 378 returned. Also (optionally) a 'directReload' function, which 379 attempts to replace a given instruction by one which reads 380 directly from a specified spill slot. May be NULL, in which 381 case the optimisation is not attempted. */ 382 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), 383 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), 384 HInstr* (*directReload) ( HInstr*, HReg, Short ), 385 Int guest_sizeB, 386 387 /* For debug printing only. */ 388 void (*ppInstr) ( const HInstr*, Bool ), 389 void (*ppReg) ( HReg ), 390 391 /* 32/64bit mode */ 392 Bool mode64 393 ) 394 { 395 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) 396 397 const Bool eq_spill_opt = True; 398 399 /* Info on vregs and rregs. Computed once and remains 400 unchanged. */ 401 Int n_vregs; 402 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ 403 404 /* We keep two copies of the real-reg live range info, one sorted 405 by .live_after and the other by .dead_before. First the 406 unsorted info is created in the _la variant is copied into the 407 _db variant. Once that's done both of them are sorted. 408 We also need two integer cursors which record the next 409 location in the two arrays to consider. */ 410 RRegLR* rreg_lrs_la; 411 RRegLR* rreg_lrs_db; 412 Int rreg_lrs_size; 413 Int rreg_lrs_used; 414 Int rreg_lrs_la_next; 415 Int rreg_lrs_db_next; 416 417 /* Info on register usage in the incoming instruction array. 418 Computed once and remains unchanged, more or less; updated 419 sometimes by the direct-reload optimisation. */ 420 HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */ 421 422 /* Used when constructing vreg_lrs (for allocating stack 423 slots). */ 424 Short ss_busy_until_before[N_SPILL64S]; 425 426 /* Used when constructing rreg_lrs. */ 427 Int* rreg_live_after; 428 Int* rreg_dead_before; 429 430 /* Running state of the core allocation algorithm. */ 431 RRegState* rreg_state; /* [0 .. n_rregs-1] */ 432 Int n_rregs; 433 434 /* .. and the redundant backward map */ 435 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. 436 This inplies n_rregs must be <= 32768. */ 437 Short* vreg_state; /* [0 .. n_vregs-1] */ 438 439 /* The vreg -> rreg map constructed and then applied to each 440 instr. */ 441 HRegRemap remap; 442 443 /* The output array of instructions. */ 444 HInstrArray* instrs_out; 445 446 /* Sanity checks are expensive. They are only done periodically, 447 not at each insn processed. */ 448 Bool do_sanity_check; 449 450 vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); 451 vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); 452 vassert(0 == (N_SPILL64S % 2)); 453 454 /* The live range numbers are signed shorts, and so limiting the 455 number of insns to 15000 comfortably guards against them 456 overflowing 32k. */ 457 vassert(instrs_in->arr_used <= 15000); 458 459 # define INVALID_INSTRNO (-2) 460 461 # define EMIT_INSTR(_instr) \ 462 do { \ 463 HInstr* _tmp = (_instr); \ 464 if (DEBUG_REGALLOC) { \ 465 vex_printf("** "); \ 466 (*ppInstr)(_tmp, mode64); \ 467 vex_printf("\n\n"); \ 468 } \ 469 addHInstr ( instrs_out, _tmp ); \ 470 } while (0) 471 472 # define PRINT_STATE \ 473 do { \ 474 Int z, q; \ 475 for (z = 0; z < n_rregs; z++) { \ 476 vex_printf(" rreg_state[%2d] = ", z); \ 477 (*ppReg)(univ->regs[z]); \ 478 vex_printf(" \t"); \ 479 switch (rreg_state[z].disp) { \ 480 case Free: vex_printf("Free\n"); break; \ 481 case Unavail: vex_printf("Unavail\n"); break; \ 482 case Bound: vex_printf("BoundTo "); \ 483 (*ppReg)(rreg_state[z].vreg); \ 484 vex_printf("\n"); break; \ 485 } \ 486 } \ 487 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ 488 q = 0; \ 489 for (z = 0; z < n_vregs; z++) { \ 490 if (vreg_state[z] == INVALID_RREG_NO) \ 491 continue; \ 492 vex_printf("[%d] -> %d ", z, vreg_state[z]); \ 493 q++; \ 494 if (q > 0 && (q % 6) == 0) \ 495 vex_printf("\n "); \ 496 } \ 497 vex_printf("\n"); \ 498 } while (0) 499 500 501 /* --------- Stage 0: set up output array --------- */ 502 /* --------- and allocate/initialise running state. --------- */ 503 504 instrs_out = newHInstrArray(); 505 506 /* ... and initialise running state. */ 507 /* n_rregs is no more than a short name for n_available_real_regs. */ 508 n_rregs = univ->allocable; 509 n_vregs = instrs_in->n_vregs; 510 511 /* If this is not so, vreg_state entries will overflow. */ 512 vassert(n_vregs < 32767); 513 514 /* If this is not so, the universe we have is nonsensical. */ 515 vassert(n_rregs > 0); 516 517 rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); 518 vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short)); 519 520 for (Int j = 0; j < n_rregs; j++) { 521 rreg_state[j].has_hlrs = False; 522 rreg_state[j].disp = Free; 523 rreg_state[j].vreg = INVALID_HREG; 524 rreg_state[j].is_spill_cand = False; 525 rreg_state[j].eq_spill_slot = False; 526 } 527 528 for (Int j = 0; j < n_vregs; j++) 529 vreg_state[j] = INVALID_RREG_NO; 530 531 532 /* --------- Stage 1: compute vreg live ranges. --------- */ 533 /* --------- Stage 2: compute rreg live ranges. --------- */ 534 535 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 536 537 /* This is relatively simple, because (1) we only seek the complete 538 end-to-end live range of each vreg, and are not interested in 539 any holes in it, and (2) the vregs are conveniently numbered 0 540 .. n_vregs-1, so we can just dump the results in a 541 pre-allocated array. */ 542 543 vreg_lrs = NULL; 544 if (n_vregs > 0) 545 vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs); 546 547 for (Int j = 0; j < n_vregs; j++) { 548 vreg_lrs[j].live_after = INVALID_INSTRNO; 549 vreg_lrs[j].dead_before = INVALID_INSTRNO; 550 vreg_lrs[j].spill_offset = 0; 551 vreg_lrs[j].spill_size = 0; 552 vreg_lrs[j].reg_class = HRcINVALID; 553 } 554 555 /* An array to hold the reg-usage info for the incoming 556 instructions. */ 557 reg_usage_arr 558 = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1); 559 560 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 561 562 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 563 564 /* This is more complex than Stage 1, because we need to compute 565 exactly all the live ranges of all the allocatable real regs, 566 and we don't know in advance how many there will be. */ 567 568 rreg_lrs_used = 0; 569 rreg_lrs_size = 4; 570 rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR)); 571 rreg_lrs_db = NULL; /* we'll create this later */ 572 573 /* We'll need to track live range start/end points seperately for 574 each rreg. Sigh. */ 575 vassert(n_rregs > 0); 576 rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); 577 rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); 578 579 for (Int j = 0; j < n_rregs; j++) { 580 rreg_live_after[j] = 581 rreg_dead_before[j] = INVALID_INSTRNO; 582 } 583 584 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 585 586 /* ------ start of ITERATE OVER INSNS ------ */ 587 588 for (Int ii = 0; ii < instrs_in->arr_used; ii++) { 589 590 (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); 591 592 if (0) { 593 vex_printf("\n%d stage1: ", ii); 594 (*ppInstr)(instrs_in->arr[ii], mode64); 595 vex_printf("\n"); 596 ppHRegUsage(univ, ®_usage_arr[ii]); 597 } 598 599 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ 600 601 /* for each virtual reg mentioned in the insn ... */ 602 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { 603 604 HReg vreg = reg_usage_arr[ii].vRegs[j]; 605 vassert(hregIsVirtual(vreg)); 606 607 Int k = hregIndex(vreg); 608 if (k < 0 || k >= n_vregs) { 609 vex_printf("\n"); 610 (*ppInstr)(instrs_in->arr[ii], mode64); 611 vex_printf("\n"); 612 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); 613 vpanic("doRegisterAllocation: out-of-range vreg"); 614 } 615 616 /* Take the opportunity to note its regclass. We'll need 617 that when allocating spill slots. */ 618 if (vreg_lrs[k].reg_class == HRcINVALID) { 619 /* First mention of this vreg. */ 620 vreg_lrs[k].reg_class = hregClass(vreg); 621 } else { 622 /* Seen it before, so check for consistency. */ 623 vassert(vreg_lrs[k].reg_class == hregClass(vreg)); 624 } 625 626 /* Now consider live ranges. */ 627 switch (reg_usage_arr[ii].vMode[j]) { 628 case HRmRead: 629 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 630 vex_printf("\n\nOFFENDING VREG = %d\n", k); 631 vpanic("doRegisterAllocation: " 632 "first event for vreg is Read"); 633 } 634 vreg_lrs[k].dead_before = toShort(ii + 1); 635 break; 636 case HRmWrite: 637 if (vreg_lrs[k].live_after == INVALID_INSTRNO) 638 vreg_lrs[k].live_after = toShort(ii); 639 vreg_lrs[k].dead_before = toShort(ii + 1); 640 break; 641 case HRmModify: 642 if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 643 vex_printf("\n\nOFFENDING VREG = %d\n", k); 644 vpanic("doRegisterAllocation: " 645 "first event for vreg is Modify"); 646 } 647 vreg_lrs[k].dead_before = toShort(ii + 1); 648 break; 649 default: 650 vpanic("doRegisterAllocation(1)"); 651 } /* switch */ 652 653 } /* iterate over virtual registers */ 654 655 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ 656 657 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ 658 659 /* If this doesn't hold, the following iteration over real registers 660 will fail miserably. */ 661 vassert(N_RREGUNIVERSE_REGS == 64); 662 663 const ULong rRead = reg_usage_arr[ii].rRead; 664 const ULong rWritten = reg_usage_arr[ii].rWritten; 665 const ULong rMentioned = rRead | rWritten; 666 667 UInt rReg_minIndex; 668 UInt rReg_maxIndex; 669 if (rMentioned == 0) { 670 /* There are no real register uses in this insn. Set 671 rReg_{min,max}Index so that the following loop doesn't iterate 672 at all, so as to avoid wasting time. */ 673 rReg_minIndex = 1; 674 rReg_maxIndex = 0; 675 } else { 676 rReg_minIndex = ULong__minIndex(rMentioned); 677 rReg_maxIndex = ULong__maxIndex(rMentioned); 678 /* Don't bother to look at registers which are not available 679 to the allocator. We asserted above that n_rregs > 0, so 680 n_rregs-1 is safe. */ 681 if (rReg_maxIndex >= n_rregs) 682 rReg_maxIndex = n_rregs-1; 683 } 684 685 /* for each allocator-available real reg mentioned in the insn ... */ 686 /* Note. We are allocating only over the real regs available to 687 the allocator. Others, eg the stack or baseblock pointers, 688 are unavailable to allocation and so we never visit them. 689 Hence the iteration is cut off at n_rregs-1, since n_rregs == 690 univ->allocable. */ 691 for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) { 692 693 const ULong jMask = 1ULL << j; 694 if (LIKELY((rMentioned & jMask) == 0)) 695 continue; 696 697 const Bool isR = (rRead & jMask) != 0; 698 const Bool isW = (rWritten & jMask) != 0; 699 700 /* Dummy initialisations of flush_la and flush_db to avoid 701 possible bogus uninit-var warnings from gcc. */ 702 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; 703 Bool flush = False; 704 705 if (isW && !isR) { 706 flush_la = rreg_live_after[j]; 707 flush_db = rreg_dead_before[j]; 708 if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO) 709 flush = True; 710 rreg_live_after[j] = ii; 711 rreg_dead_before[j] = ii+1; 712 } else if (!isW && isR) { 713 if (rreg_live_after[j] == INVALID_INSTRNO) { 714 vex_printf("\nOFFENDING RREG = "); 715 (*ppReg)(univ->regs[j]); 716 vex_printf("\n"); 717 vex_printf("\nOFFENDING instr = "); 718 (*ppInstr)(instrs_in->arr[ii], mode64); 719 vex_printf("\n"); 720 vpanic("doRegisterAllocation: " 721 "first event for rreg is Read"); 722 } 723 rreg_dead_before[j] = ii+1; 724 } else { 725 vassert(isR && isW); 726 if (rreg_live_after[j] == INVALID_INSTRNO) { 727 vex_printf("\nOFFENDING RREG = "); 728 (*ppReg)(univ->regs[j]); 729 vex_printf("\n"); 730 vex_printf("\nOFFENDING instr = "); 731 (*ppInstr)(instrs_in->arr[ii], mode64); 732 vex_printf("\n"); 733 vpanic("doRegisterAllocation: " 734 "first event for rreg is Modify"); 735 } 736 rreg_dead_before[j] = ii+1; 737 } 738 739 if (flush) { 740 vassert(flush_la != INVALID_INSTRNO); 741 vassert(flush_db != INVALID_INSTRNO); 742 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 743 if (0) 744 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); 745 rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; 746 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); 747 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); 748 rreg_lrs_used++; 749 } 750 751 } /* iterate over rregs in the instr */ 752 753 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ 754 755 } /* iterate over insns */ 756 757 /* ------ end of ITERATE OVER INSNS ------ */ 758 759 /* ------ start of FINALISE RREG LIVE RANGES ------ */ 760 761 /* Now finish up any live ranges left over. */ 762 for (Int j = 0; j < n_rregs; j++) { 763 764 if (0) { 765 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], 766 rreg_dead_before[j]); 767 } 768 vassert( (rreg_live_after[j] == INVALID_INSTRNO 769 && rreg_dead_before[j] == INVALID_INSTRNO) 770 || 771 (rreg_live_after[j] != INVALID_INSTRNO 772 && rreg_dead_before[j] != INVALID_INSTRNO) 773 ); 774 775 if (rreg_live_after[j] == INVALID_INSTRNO) 776 continue; 777 778 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 779 if (0) 780 vex_printf("FLUSH 2 (%d,%d)\n", 781 rreg_live_after[j], rreg_dead_before[j]); 782 rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; 783 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); 784 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); 785 rreg_lrs_used++; 786 } 787 788 /* Compute summary hints for choosing real regs. If a real reg is 789 involved in a hard live range, record that fact in the fixed 790 part of the running rreg_state. Later, when offered a choice between 791 rregs, it's better to choose one which is not marked as having 792 any HLRs, since ones with HLRs may need to be spilled around 793 their HLRs. Correctness of final assignment is unaffected by 794 this mechanism -- it is only an optimisation. */ 795 796 for (Int j = 0; j < rreg_lrs_used; j++) { 797 HReg rreg = rreg_lrs_la[j].rreg; 798 vassert(!hregIsVirtual(rreg)); 799 /* rreg is involved in a HLR. Record this info in the array, if 800 there is space. */ 801 UInt ix = hregIndex(rreg); 802 vassert(ix < n_rregs); 803 rreg_state[ix].has_hlrs = True; 804 } 805 if (0) { 806 for (Int j = 0; j < n_rregs; j++) { 807 if (!rreg_state[j].has_hlrs) 808 continue; 809 ppReg(univ->regs[j]); 810 vex_printf(" hinted\n"); 811 } 812 } 813 814 /* Finally, copy the _la variant into the _db variant and 815 sort both by their respective fields. */ 816 rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR)); 817 for (Int j = 0; j < rreg_lrs_used; j++) 818 rreg_lrs_db[j] = rreg_lrs_la[j]; 819 820 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); 821 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); 822 823 /* And set up the cursors. */ 824 rreg_lrs_la_next = 0; 825 rreg_lrs_db_next = 0; 826 827 for (Int j = 1; j < rreg_lrs_used; j++) { 828 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); 829 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); 830 } 831 832 /* ------ end of FINALISE RREG LIVE RANGES ------ */ 833 834 if (DEBUG_REGALLOC) { 835 for (Int j = 0; j < n_vregs; j++) { 836 vex_printf("vreg %d: la = %d, db = %d\n", 837 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); 838 } 839 } 840 841 if (DEBUG_REGALLOC) { 842 vex_printf("RRegLRs by LA:\n"); 843 for (Int j = 0; j < rreg_lrs_used; j++) { 844 vex_printf(" "); 845 (*ppReg)(rreg_lrs_la[j].rreg); 846 vex_printf(" la = %d, db = %d\n", 847 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); 848 } 849 vex_printf("RRegLRs by DB:\n"); 850 for (Int j = 0; j < rreg_lrs_used; j++) { 851 vex_printf(" "); 852 (*ppReg)(rreg_lrs_db[j].rreg); 853 vex_printf(" la = %d, db = %d\n", 854 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); 855 } 856 } 857 858 /* --------- Stage 3: allocate spill slots. --------- */ 859 860 /* Each spill slot is 8 bytes long. For vregs which take more than 861 64 bits to spill (classes Flt64 and Vec128), we have to allocate 862 two consecutive spill slots. For 256 bit registers (class 863 Vec256), we have to allocate four consecutive spill slots. 864 865 For Vec128-class on PowerPC, the spill slot's actual address 866 must be 16-byte aligned. Since the spill slot's address is 867 computed as an offset from the guest state pointer, and since 868 the user of the generated code must set that pointer to a 869 32-aligned value, we have the residual obligation here of 870 choosing a 16-aligned spill slot offset for Vec128-class values. 871 Since each spill slot is 8 bytes long, that means for 872 Vec128-class values we must allocated a spill slot number which 873 is zero mod 2. 874 875 Similarly, for Vec256 class on amd64, find a spill slot number 876 which is zero mod 4. This guarantees it will be 32 byte 877 aligned, which isn't actually necessary on amd64 (we use movUpd 878 etc to spill), but seems like good practice. 879 880 Do a rank-based allocation of vregs to spill slot numbers. We 881 put as few values as possible in spill slots, but nevertheless 882 need to have a spill slot available for all vregs, just in case. 883 */ 884 /* Int max_ss_no = -1; */ 885 886 local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); 887 888 for (Int j = 0; j < n_vregs; j++) { 889 890 /* True iff this vreg is unused. In which case we also expect 891 that the reg_class field for it has not been set. */ 892 if (vreg_lrs[j].live_after == INVALID_INSTRNO) { 893 vassert(vreg_lrs[j].reg_class == HRcINVALID); 894 continue; 895 } 896 897 /* The spill slots are 64 bits in size. As per the comment on 898 definition of HRegClass in host_generic_regs.h, that means, 899 to spill a vreg of class Flt64 or Vec128, we'll need to find 900 two adjacent spill slots to use. For Vec256, we'll need to 901 find four adjacent slots to use. Note, this logic needs to 902 kept in sync with the size info on the definition of 903 HRegClass. */ 904 Int ss_no = -1; 905 switch (vreg_lrs[j].reg_class) { 906 907 case HRcVec128: case HRcFlt64: 908 /* Find two adjacent free slots in which between them 909 provide up to 128 bits in which to spill the vreg. 910 Since we are trying to find an even:odd pair, move 911 along in steps of 2 (slots). */ 912 for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2) 913 if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after 914 && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after) 915 break; 916 if (ss_no >= N_SPILL64S-1) { 917 vpanic("LibVEX_N_SPILL_BYTES is too low. " 918 "Increase and recompile."); 919 } 920 ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before; 921 ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before; 922 break; 923 924 default: 925 /* The ordinary case -- just find a single spill slot. */ 926 /* Find the lowest-numbered spill slot which is available 927 at the start point of this interval, and assign the 928 interval to it. */ 929 for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) 930 if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after) 931 break; 932 if (ss_no == N_SPILL64S) { 933 vpanic("LibVEX_N_SPILL_BYTES is too low. " 934 "Increase and recompile."); 935 } 936 ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before; 937 break; 938 939 } /* switch (vreg_lrs[j].reg_class) */ 940 941 /* This reflects LibVEX's hard-wired knowledge of the baseBlock 942 layout: the guest state, then two equal sized areas following 943 it for two sets of shadow state, and then the spill area. */ 944 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); 945 946 /* Independent check that we've made a sane choice of slot */ 947 sanity_check_spill_offset( &vreg_lrs[j] ); 948 /* if (j > max_ss_no) */ 949 /* max_ss_no = j; */ 950 } 951 952 if (0) { 953 vex_printf("\n\n"); 954 for (Int j = 0; j < n_vregs; j++) 955 vex_printf("vreg %d --> spill offset %d\n", 956 j, vreg_lrs[j].spill_offset); 957 } 958 959 /* --------- Stage 4: establish rreg preferences --------- */ 960 961 /* It may be advantageous to allocating certain vregs to specific 962 rregs, as a way of avoiding reg-reg moves later. Here we 963 establish which, if any, rreg each vreg would prefer to be in. 964 Note that this constrains the allocator -- ideally we end up 965 with as few as possible vregs expressing a preference. 966 967 This is an optimisation: if the .preferred_rreg field is never 968 set to anything different from INVALID_HREG, the allocator still 969 works. */ 970 971 /* 30 Dec 04: removed this mechanism as it does not seem to 972 help. */ 973 974 /* --------- Stage 5: process instructions --------- */ 975 976 /* This is the main loop of the allocator. First, we need to 977 correctly set up our running state, which tracks the status of 978 each real register. */ 979 980 /* ------ BEGIN: Process each insn in turn. ------ */ 981 982 for (Int ii = 0; ii < instrs_in->arr_used; ii++) { 983 984 if (DEBUG_REGALLOC) { 985 vex_printf("\n====----====---- Insn %d ----====----====\n", ii); 986 vex_printf("---- "); 987 (*ppInstr)(instrs_in->arr[ii], mode64); 988 vex_printf("\n\nInitial state:\n"); 989 PRINT_STATE; 990 vex_printf("\n"); 991 } 992 993 /* ------------ Sanity checks ------------ */ 994 995 /* Sanity checks are expensive. So they are done only once 996 every 13 instructions, and just before the last 997 instruction. */ 998 do_sanity_check 999 = toBool( 1000 False /* Set to True for sanity checking of all insns. */ 1001 || ii == instrs_in->arr_used-1 1002 || (ii > 0 && (ii % 13) == 0) 1003 ); 1004 1005 if (do_sanity_check) { 1006 1007 /* Sanity check 1: all rregs with a hard live range crossing 1008 this insn must be marked as unavailable in the running 1009 state. */ 1010 for (Int j = 0; j < rreg_lrs_used; j++) { 1011 if (rreg_lrs_la[j].live_after < ii 1012 && ii < rreg_lrs_la[j].dead_before) { 1013 /* ii is the middle of a hard live range for some real 1014 reg. Check it's marked as such in the running 1015 state. */ 1016 HReg reg = rreg_lrs_la[j].rreg; 1017 1018 if (0) { 1019 vex_printf("considering la %d .. db %d reg = ", 1020 rreg_lrs_la[j].live_after, 1021 rreg_lrs_la[j].dead_before); 1022 (*ppReg)(reg); 1023 vex_printf("\n"); 1024 } 1025 1026 /* assert that this rreg is marked as unavailable */ 1027 vassert(!hregIsVirtual(reg)); 1028 vassert(rreg_state[hregIndex(reg)].disp == Unavail); 1029 } 1030 } 1031 1032 /* Sanity check 2: conversely, all rregs marked as 1033 unavailable in the running rreg_state must have a 1034 corresponding hard live range entry in the rreg_lrs 1035 array. */ 1036 for (Int j = 0; j < n_rregs; j++) { 1037 vassert(rreg_state[j].disp == Bound 1038 || rreg_state[j].disp == Free 1039 || rreg_state[j].disp == Unavail); 1040 if (rreg_state[j].disp != Unavail) 1041 continue; 1042 Int k; 1043 for (k = 0; k < rreg_lrs_used; k++) { 1044 HReg reg = rreg_lrs_la[k].rreg; 1045 vassert(!hregIsVirtual(reg)); 1046 if (hregIndex(reg) == j 1047 && rreg_lrs_la[k].live_after < ii 1048 && ii < rreg_lrs_la[k].dead_before) 1049 break; 1050 } 1051 /* If this vassertion fails, we couldn't find a 1052 corresponding HLR. */ 1053 vassert(k < rreg_lrs_used); 1054 } 1055 1056 /* Sanity check 3: all vreg-rreg bindings must bind registers 1057 of the same class. */ 1058 for (Int j = 0; j < n_rregs; j++) { 1059 if (rreg_state[j].disp != Bound) { 1060 vassert(rreg_state[j].eq_spill_slot == False); 1061 continue; 1062 } 1063 vassert(hregClass(univ->regs[j]) 1064 == hregClass(rreg_state[j].vreg)); 1065 vassert( hregIsVirtual(rreg_state[j].vreg)); 1066 } 1067 1068 /* Sanity check 4: the vreg_state and rreg_state 1069 mutually-redundant mappings are consistent. If 1070 rreg_state[j].vreg points at some vreg_state entry then 1071 that vreg_state entry should point back at 1072 rreg_state[j]. */ 1073 for (Int j = 0; j < n_rregs; j++) { 1074 if (rreg_state[j].disp != Bound) 1075 continue; 1076 Int k = hregIndex(rreg_state[j].vreg); 1077 vassert(IS_VALID_VREGNO(k)); 1078 vassert(vreg_state[k] == j); 1079 } 1080 for (Int j = 0; j < n_vregs; j++) { 1081 Int k = vreg_state[j]; 1082 if (k == INVALID_RREG_NO) 1083 continue; 1084 vassert(IS_VALID_RREGNO(k)); 1085 vassert(rreg_state[k].disp == Bound); 1086 vassert(hregIndex(rreg_state[k].vreg) == j); 1087 } 1088 1089 } /* if (do_sanity_check) */ 1090 1091 /* ------------ end of Sanity checks ------------ */ 1092 1093 /* Do various optimisations pertaining to register coalescing 1094 and preferencing: 1095 MOV v <-> v coalescing (done here). 1096 MOV v <-> r coalescing (not yet, if ever) 1097 */ 1098 /* If doing a reg-reg move between two vregs, and the src's live 1099 range ends here and the dst's live range starts here, bind 1100 the dst to the src's rreg, and that's all. */ 1101 HReg vregS = INVALID_HREG; 1102 HReg vregD = INVALID_HREG; 1103 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { 1104 if (!hregIsVirtual(vregS)) goto cannot_coalesce; 1105 if (!hregIsVirtual(vregD)) goto cannot_coalesce; 1106 /* Check that *isMove is not telling us a bunch of lies ... */ 1107 vassert(hregClass(vregS) == hregClass(vregD)); 1108 Int k = hregIndex(vregS); 1109 Int m = hregIndex(vregD); 1110 vassert(IS_VALID_VREGNO(k)); 1111 vassert(IS_VALID_VREGNO(m)); 1112 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; 1113 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; 1114 if (DEBUG_REGALLOC) { 1115 vex_printf("COALESCE "); 1116 (*ppReg)(vregS); 1117 vex_printf(" -> "); 1118 (*ppReg)(vregD); 1119 vex_printf("\n\n"); 1120 } 1121 /* Find the state entry for vregS. */ 1122 Int n = vreg_state[k]; /* k is the index of vregS */ 1123 if (n == INVALID_RREG_NO) { 1124 /* vregS is not currently in a real register. So we can't 1125 do the coalescing. Give up. */ 1126 goto cannot_coalesce; 1127 } 1128 vassert(IS_VALID_RREGNO(n)); 1129 1130 /* Finally, we can do the coalescing. It's trivial -- merely 1131 claim vregS's register for vregD. */ 1132 rreg_state[n].vreg = vregD; 1133 vassert(IS_VALID_VREGNO(hregIndex(vregD))); 1134 vassert(IS_VALID_VREGNO(hregIndex(vregS))); 1135 vreg_state[hregIndex(vregD)] = toShort(n); 1136 vreg_state[hregIndex(vregS)] = INVALID_RREG_NO; 1137 1138 /* This rreg has become associated with a different vreg and 1139 hence with a different spill slot. Play safe. */ 1140 rreg_state[n].eq_spill_slot = False; 1141 1142 /* Move on to the next insn. We skip the post-insn stuff for 1143 fixed registers, since this move should not interact with 1144 them in any way. */ 1145 continue; 1146 } 1147 cannot_coalesce: 1148 1149 /* ------ Free up rregs bound to dead vregs ------ */ 1150 1151 /* Look for vregs whose live range has just ended, and 1152 mark the associated rreg as free. */ 1153 1154 for (Int j = 0; j < n_rregs; j++) { 1155 if (rreg_state[j].disp != Bound) 1156 continue; 1157 UInt vregno = hregIndex(rreg_state[j].vreg); 1158 vassert(IS_VALID_VREGNO(vregno)); 1159 if (vreg_lrs[vregno].dead_before <= ii) { 1160 rreg_state[j].disp = Free; 1161 rreg_state[j].eq_spill_slot = False; 1162 Int m = hregIndex(rreg_state[j].vreg); 1163 vassert(IS_VALID_VREGNO(m)); 1164 vreg_state[m] = INVALID_RREG_NO; 1165 if (DEBUG_REGALLOC) { 1166 vex_printf("free up "); 1167 (*ppReg)(univ->regs[j]); 1168 vex_printf("\n"); 1169 } 1170 } 1171 } 1172 1173 /* ------ Pre-instruction actions for fixed rreg uses ------ */ 1174 1175 /* Now we have to deal with rregs which are about to be made 1176 live by this instruction -- in other words, are entering into 1177 one of their live ranges. If any such rreg holds a vreg, we 1178 will have to free up the rreg. The simplest solution which 1179 is correct is to spill the rreg. 1180 1181 Note we could do better: 1182 * Could move it into some other free rreg, if one is available 1183 1184 Do this efficiently, by incrementally stepping along an array 1185 of rreg HLRs that are known to be sorted by start point 1186 (their .live_after field). 1187 */ 1188 while (True) { 1189 vassert(rreg_lrs_la_next >= 0); 1190 vassert(rreg_lrs_la_next <= rreg_lrs_used); 1191 if (rreg_lrs_la_next == rreg_lrs_used) 1192 break; /* no more real reg live ranges to consider */ 1193 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) 1194 break; /* next live range does not yet start */ 1195 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); 1196 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. 1197 Find the associated rreg_state entry. */ 1198 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. 1199 Real register live ranges are guaranteed to be well-formed 1200 in that they start with a write to the register -- Stage 2 1201 rejects any code not satisfying this. So the correct 1202 question to ask is whether 1203 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, 1204 whether the reg becomes live after this insn -- rather 1205 than before it. */ 1206 if (DEBUG_REGALLOC) { 1207 vex_printf("need to free up rreg: "); 1208 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); 1209 vex_printf("\n\n"); 1210 } 1211 Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); 1212 1213 /* If this fails, we don't have an entry for this rreg. 1214 Which we should. */ 1215 vassert(IS_VALID_RREGNO(k)); 1216 Int m = hregIndex(rreg_state[k].vreg); 1217 if (rreg_state[k].disp == Bound) { 1218 /* Yes, there is an associated vreg. Spill it if it's 1219 still live. */ 1220 vassert(IS_VALID_VREGNO(m)); 1221 vreg_state[m] = INVALID_RREG_NO; 1222 if (vreg_lrs[m].dead_before > ii) { 1223 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1224 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { 1225 HInstr* spill1 = NULL; 1226 HInstr* spill2 = NULL; 1227 (*genSpill)( &spill1, &spill2, univ->regs[k], 1228 vreg_lrs[m].spill_offset, mode64 ); 1229 vassert(spill1 || spill2); /* can't both be NULL */ 1230 if (spill1) 1231 EMIT_INSTR(spill1); 1232 if (spill2) 1233 EMIT_INSTR(spill2); 1234 } 1235 rreg_state[k].eq_spill_slot = True; 1236 } 1237 } 1238 rreg_state[k].disp = Unavail; 1239 rreg_state[k].vreg = INVALID_HREG; 1240 rreg_state[k].eq_spill_slot = False; 1241 1242 /* check for further rregs entering HLRs at this point */ 1243 rreg_lrs_la_next++; 1244 } 1245 1246 if (DEBUG_REGALLOC) { 1247 vex_printf("After pre-insn actions for fixed regs:\n"); 1248 PRINT_STATE; 1249 vex_printf("\n"); 1250 } 1251 1252 /* ------ Deal with the current instruction. ------ */ 1253 1254 /* Finally we can begin the processing of this instruction 1255 itself. The aim is to free up enough rregs for this insn. 1256 This may generate spill stores since we may have to evict 1257 some vregs currently in rregs. Also generates spill loads. 1258 We also build up the final vreg->rreg mapping to be applied 1259 to the insn. */ 1260 1261 initHRegRemap(&remap); 1262 1263 /* ------------ BEGIN directReload optimisation ----------- */ 1264 1265 /* If the instruction reads exactly one vreg which is currently 1266 in a spill slot, and this is last use of that vreg, see if we 1267 can convert the instruction into one that reads directly from 1268 the spill slot. This is clearly only possible for x86 and 1269 amd64 targets, since ppc and arm are load-store 1270 architectures. If successful, replace instrs_in->arr[ii] 1271 with this new instruction, and recompute its reg usage, so 1272 that the change is invisible to the standard-case handling 1273 that follows. */ 1274 1275 if (directReload && reg_usage_arr[ii].n_vRegs <= 2) { 1276 Bool debug_direct_reload = False; 1277 HReg cand = INVALID_HREG; 1278 Bool nreads = 0; 1279 Short spilloff = 0; 1280 1281 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { 1282 1283 HReg vreg = reg_usage_arr[ii].vRegs[j]; 1284 vassert(hregIsVirtual(vreg)); 1285 1286 if (reg_usage_arr[ii].vMode[j] == HRmRead) { 1287 nreads++; 1288 Int m = hregIndex(vreg); 1289 vassert(IS_VALID_VREGNO(m)); 1290 Int k = vreg_state[m]; 1291 if (!IS_VALID_RREGNO(k)) { 1292 /* ok, it is spilled. Now, is this its last use? */ 1293 vassert(vreg_lrs[m].dead_before >= ii+1); 1294 if (vreg_lrs[m].dead_before == ii+1 1295 && hregIsInvalid(cand)) { 1296 spilloff = vreg_lrs[m].spill_offset; 1297 cand = vreg; 1298 } 1299 } 1300 } 1301 } 1302 1303 if (nreads == 1 && ! hregIsInvalid(cand)) { 1304 HInstr* reloaded; 1305 if (reg_usage_arr[ii].n_vRegs == 2) 1306 vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], 1307 reg_usage_arr[ii].vRegs[1])); 1308 1309 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); 1310 if (debug_direct_reload && !reloaded) { 1311 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); 1312 ppInstr(instrs_in->arr[ii], mode64); 1313 } 1314 if (reloaded) { 1315 /* Update info about the insn, so it looks as if it had 1316 been in this form all along. */ 1317 instrs_in->arr[ii] = reloaded; 1318 (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); 1319 if (debug_direct_reload && !reloaded) { 1320 vex_printf(" --> "); 1321 ppInstr(reloaded, mode64); 1322 } 1323 } 1324 1325 if (debug_direct_reload && !reloaded) 1326 vex_printf("\n"); 1327 } 1328 1329 } 1330 1331 /* ------------ END directReload optimisation ------------ */ 1332 1333 /* for each virtual reg mentioned in the insn ... */ 1334 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { 1335 1336 HReg vreg = reg_usage_arr[ii].vRegs[j]; 1337 vassert(hregIsVirtual(vreg)); 1338 1339 if (0) { 1340 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); 1341 } 1342 1343 /* Now we're trying to find a rreg for "vreg". First of all, 1344 if it already has an rreg assigned, we don't need to do 1345 anything more. Inspect the current state to find out. */ 1346 Int m = hregIndex(vreg); 1347 vassert(IS_VALID_VREGNO(m)); 1348 Int n = vreg_state[m]; 1349 if (IS_VALID_RREGNO(n)) { 1350 vassert(rreg_state[n].disp == Bound); 1351 addToHRegRemap(&remap, vreg, univ->regs[n]); 1352 /* If this rreg is written or modified, mark it as different 1353 from any spill slot value. */ 1354 if (reg_usage_arr[ii].vMode[j] != HRmRead) 1355 rreg_state[n].eq_spill_slot = False; 1356 continue; 1357 } else { 1358 vassert(n == INVALID_RREG_NO); 1359 } 1360 1361 /* No luck. The next thing to do is see if there is a 1362 currently free rreg available, of the correct class. If 1363 so, bag it. NOTE, we could improve this by selecting an 1364 rreg for which the next live-range event is as far ahead 1365 as possible. */ 1366 Int k_suboptimal = -1; 1367 Int k; 1368 for (k = 0; k < n_rregs; k++) { 1369 if (rreg_state[k].disp != Free 1370 || hregClass(univ->regs[k]) != hregClass(vreg)) 1371 continue; 1372 if (rreg_state[k].has_hlrs) { 1373 /* Well, at least we can use k_suboptimal if we really 1374 have to. Keep on looking for a better candidate. */ 1375 k_suboptimal = k; 1376 } else { 1377 /* Found a preferable reg. Use it. */ 1378 k_suboptimal = -1; 1379 break; 1380 } 1381 } 1382 if (k_suboptimal >= 0) 1383 k = k_suboptimal; 1384 1385 if (k < n_rregs) { 1386 rreg_state[k].disp = Bound; 1387 rreg_state[k].vreg = vreg; 1388 Int p = hregIndex(vreg); 1389 vassert(IS_VALID_VREGNO(p)); 1390 vreg_state[p] = toShort(k); 1391 addToHRegRemap(&remap, vreg, univ->regs[k]); 1392 /* Generate a reload if needed. This only creates needed 1393 reloads because the live range builder for vregs will 1394 guarantee that the first event for a vreg is a write. 1395 Hence, if this reference is not a write, it cannot be 1396 the first reference for this vreg, and so a reload is 1397 indeed needed. */ 1398 if (reg_usage_arr[ii].vMode[j] != HRmWrite) { 1399 vassert(vreg_lrs[p].reg_class != HRcINVALID); 1400 HInstr* reload1 = NULL; 1401 HInstr* reload2 = NULL; 1402 (*genReload)( &reload1, &reload2, univ->regs[k], 1403 vreg_lrs[p].spill_offset, mode64 ); 1404 vassert(reload1 || reload2); /* can't both be NULL */ 1405 if (reload1) 1406 EMIT_INSTR(reload1); 1407 if (reload2) 1408 EMIT_INSTR(reload2); 1409 /* This rreg is read or modified by the instruction. 1410 If it's merely read we can claim it now equals the 1411 spill slot, but not so if it is modified. */ 1412 if (reg_usage_arr[ii].vMode[j] == HRmRead) { 1413 rreg_state[k].eq_spill_slot = True; 1414 } else { 1415 vassert(reg_usage_arr[ii].vMode[j] == HRmModify); 1416 rreg_state[k].eq_spill_slot = False; 1417 } 1418 } else { 1419 rreg_state[k].eq_spill_slot = False; 1420 } 1421 1422 continue; 1423 } 1424 1425 /* Well, now we have no option but to spill a vreg. It's 1426 important to make a good choice of vreg to spill, and of 1427 course we need to be careful not to spill a vreg which is 1428 needed by this insn. */ 1429 1430 /* First, mark in the rreg_state, those rregs which are not spill 1431 candidates, due to holding a vreg mentioned by this 1432 instruction. Or being of the wrong class. */ 1433 for (k = 0; k < n_rregs; k++) { 1434 rreg_state[k].is_spill_cand = False; 1435 if (rreg_state[k].disp != Bound) 1436 continue; 1437 if (hregClass(univ->regs[k]) != hregClass(vreg)) 1438 continue; 1439 rreg_state[k].is_spill_cand = True; 1440 /* Note, the following loop visits only the virtual regs 1441 mentioned by the instruction. */ 1442 for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) { 1443 if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) { 1444 rreg_state[k].is_spill_cand = False; 1445 break; 1446 } 1447 } 1448 } 1449 1450 /* We can choose to spill any rreg satisfying 1451 rreg_state[r].is_spill_cand (so to speak). Choose r so that 1452 the next use of its associated vreg is as far ahead as 1453 possible, in the hope that this will minimise the number 1454 of consequent reloads required. */ 1455 Int spillee 1456 = findMostDistantlyMentionedVReg ( 1457 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs ); 1458 1459 if (spillee == -1) { 1460 /* Hmmmmm. There don't appear to be any spill candidates. 1461 We're hosed. */ 1462 vex_printf("reg_alloc: can't find a register in class: "); 1463 ppHRegClass(hregClass(vreg)); 1464 vex_printf("\n"); 1465 vpanic("reg_alloc: can't create a free register."); 1466 } 1467 1468 /* Right. So we're going to spill rreg_state[spillee]. */ 1469 vassert(IS_VALID_RREGNO(spillee)); 1470 vassert(rreg_state[spillee].disp == Bound); 1471 /* check it's the right class */ 1472 vassert(hregClass(univ->regs[spillee]) == hregClass(vreg)); 1473 /* check we're not ejecting the vreg for which we are trying 1474 to free up a register. */ 1475 vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); 1476 1477 m = hregIndex(rreg_state[spillee].vreg); 1478 vassert(IS_VALID_VREGNO(m)); 1479 1480 /* So here's the spill store. Assert that we're spilling a 1481 live vreg. */ 1482 vassert(vreg_lrs[m].dead_before > ii); 1483 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1484 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { 1485 HInstr* spill1 = NULL; 1486 HInstr* spill2 = NULL; 1487 (*genSpill)( &spill1, &spill2, univ->regs[spillee], 1488 vreg_lrs[m].spill_offset, mode64 ); 1489 vassert(spill1 || spill2); /* can't both be NULL */ 1490 if (spill1) 1491 EMIT_INSTR(spill1); 1492 if (spill2) 1493 EMIT_INSTR(spill2); 1494 } 1495 1496 /* Update the rreg_state to reflect the new assignment for this 1497 rreg. */ 1498 rreg_state[spillee].vreg = vreg; 1499 vreg_state[m] = INVALID_RREG_NO; 1500 1501 rreg_state[spillee].eq_spill_slot = False; /* be safe */ 1502 1503 m = hregIndex(vreg); 1504 vassert(IS_VALID_VREGNO(m)); 1505 vreg_state[m] = toShort(spillee); 1506 1507 /* Now, if this vreg is being read or modified (as opposed to 1508 written), we have to generate a reload for it. */ 1509 if (reg_usage_arr[ii].vMode[j] != HRmWrite) { 1510 vassert(vreg_lrs[m].reg_class != HRcINVALID); 1511 HInstr* reload1 = NULL; 1512 HInstr* reload2 = NULL; 1513 (*genReload)( &reload1, &reload2, univ->regs[spillee], 1514 vreg_lrs[m].spill_offset, mode64 ); 1515 vassert(reload1 || reload2); /* can't both be NULL */ 1516 if (reload1) 1517 EMIT_INSTR(reload1); 1518 if (reload2) 1519 EMIT_INSTR(reload2); 1520 /* This rreg is read or modified by the instruction. 1521 If it's merely read we can claim it now equals the 1522 spill slot, but not so if it is modified. */ 1523 if (reg_usage_arr[ii].vMode[j] == HRmRead) { 1524 rreg_state[spillee].eq_spill_slot = True; 1525 } else { 1526 vassert(reg_usage_arr[ii].vMode[j] == HRmModify); 1527 rreg_state[spillee].eq_spill_slot = False; 1528 } 1529 } 1530 1531 /* So after much twisting and turning, we have vreg mapped to 1532 rreg_state[spillee].rreg. Note that in the map. */ 1533 addToHRegRemap(&remap, vreg, univ->regs[spillee]); 1534 1535 } /* iterate over virtual registers in this instruction. */ 1536 1537 /* We've finished clowning around with registers in this instruction. 1538 Three results: 1539 - the running rreg_state[] has been updated 1540 - a suitable vreg->rreg mapping for this instruction has been 1541 constructed 1542 - spill and reload instructions may have been emitted. 1543 1544 The final step is to apply the mapping to the instruction, 1545 and emit that. 1546 */ 1547 1548 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ 1549 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); 1550 EMIT_INSTR( instrs_in->arr[ii] ); 1551 1552 if (DEBUG_REGALLOC) { 1553 vex_printf("After dealing with current insn:\n"); 1554 PRINT_STATE; 1555 vex_printf("\n"); 1556 } 1557 1558 /* ------ Post-instruction actions for fixed rreg uses ------ */ 1559 1560 /* Now we need to check for rregs exiting fixed live ranges 1561 after this instruction, and if so mark them as free. */ 1562 while (True) { 1563 vassert(rreg_lrs_db_next >= 0); 1564 vassert(rreg_lrs_db_next <= rreg_lrs_used); 1565 if (rreg_lrs_db_next == rreg_lrs_used) 1566 break; /* no more real reg live ranges to consider */ 1567 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) 1568 break; /* next live range does not yet start */ 1569 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); 1570 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live 1571 range. Mark it as such in the main rreg_state array. */ 1572 HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg; 1573 vassert(!hregIsVirtual(reg)); 1574 Int k = hregIndex(reg); 1575 vassert(IS_VALID_RREGNO(k)); 1576 vassert(rreg_state[k].disp == Unavail); 1577 rreg_state[k].disp = Free; 1578 rreg_state[k].vreg = INVALID_HREG; 1579 rreg_state[k].eq_spill_slot = False; 1580 1581 /* check for further rregs leaving HLRs at this point */ 1582 rreg_lrs_db_next++; 1583 } 1584 1585 if (DEBUG_REGALLOC) { 1586 vex_printf("After post-insn actions for fixed regs:\n"); 1587 PRINT_STATE; 1588 vex_printf("\n"); 1589 } 1590 1591 } /* iterate over insns */ 1592 1593 /* ------ END: Process each insn in turn. ------ */ 1594 1595 /* free(rreg_state); */ 1596 /* free(rreg_lrs); */ 1597 /* if (vreg_lrs) free(vreg_lrs); */ 1598 1599 /* Paranoia */ 1600 vassert(rreg_lrs_la_next == rreg_lrs_used); 1601 vassert(rreg_lrs_db_next == rreg_lrs_used); 1602 1603 return instrs_out; 1604 1605 # undef INVALID_INSTRNO 1606 # undef EMIT_INSTR 1607 # undef PRINT_STATE 1608 } 1609 1610 1611 1612 /*---------------------------------------------------------------*/ 1613 /*--- host_reg_alloc2.c ---*/ 1614 /*---------------------------------------------------------------*/ 1615