Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_reg_alloc2.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 
     39 #include "main_util.h"
     40 #include "host_generic_regs.h"
     41 
     42 /* Set to 1 for lots of debugging output. */
     43 #define DEBUG_REGALLOC 0
     44 
     45 
     46 /* TODO 27 Oct 04:
     47 
     48    Better consistency checking from what isMove tells us.
     49 
     50    We can possibly do V-V coalescing even when the src is spilled,
     51    providing we can arrange for the dst to have the same spill slot.
     52 
     53    Note that state[].hreg is the same as the available real regs.
     54 
     55    Generally rationalise data structures.  */
     56 
     57 
     58 /* Records information on virtual register live ranges.  Computed once
     59    and remains unchanged after that. */
     60 typedef
     61    struct {
     62       /* Becomes live for the first time after this insn ... */
     63       Short live_after;
     64       /* Becomes dead for the last time before this insn ... */
     65       Short dead_before;
     66       /* The "home" spill slot, if needed.  Never changes. */
     67       Short spill_offset;
     68       Short spill_size;
     69       /* What kind of register this is. */
     70       HRegClass reg_class;
     71    }
     72    VRegLR;
     73 
     74 
     75 /* Records information on real-register live ranges.  Computed once
     76    and remains unchanged after that. */
     77 typedef
     78    struct {
     79       HReg rreg;
     80       /* Becomes live after this insn ... */
     81       Short live_after;
     82       /* Becomes dead before this insn ... */
     83       Short dead_before;
     84    }
     85    RRegLR;
     86 
     87 
     88 /* An array of the following structs (rreg_state) comprises the
     89    running state of the allocator.  It indicates what the current
     90    disposition of each allocatable real register is.  The array gets
     91    updated as the allocator processes instructions. */
     92 typedef
     93    struct {
     94       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
     95       /* Which rreg is this for? */
     96       HReg rreg;
     97       /* Is this involved in any HLRs?  (only an optimisation hint) */
     98       Bool has_hlrs;
     99       /* ------ FIELDS WHICH DO CHANGE ------ */
    100       /* 6 May 07: rearranged fields below so the whole struct fits
    101          into 16 bytes on both x86 and amd64. */
    102       /* Used when .disp == Bound and we are looking for vregs to
    103          spill. */
    104       Bool is_spill_cand;
    105       /* Optimisation: used when .disp == Bound.  Indicates when the
    106          rreg has the same value as the spill slot for the associated
    107          vreg.  Is safely left at False, and becomes True after a
    108          spill store or reload for this rreg. */
    109       Bool eq_spill_slot;
    110       /* What's it's current disposition? */
    111       enum { Free,     /* available for use */
    112              Unavail,  /* in a real-reg live range */
    113              Bound     /* in use (holding value of some vreg) */
    114            }
    115            disp;
    116       /* If .disp == Bound, what vreg is it bound to? */
    117       HReg vreg;
    118    }
    119    RRegState;
    120 
    121 
    122 /* The allocator also maintains a redundant array of indexes
    123    (vreg_state) from vreg numbers back to entries in rreg_state.  It
    124    is redundant because iff vreg_state[i] == j then
    125    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
    126    point at each other.  The purpose of this is to speed up activities
    127    which involve looking for a particular vreg: there is no need to
    128    scan the rreg_state looking for it, just index directly into
    129    vreg_state.  The FAQ "does this vreg already have an associated
    130    rreg" is the main beneficiary.
    131 
    132    To indicate, in vreg_state[i], that a given vreg is not currently
    133    associated with any rreg, that entry can be set to INVALID_RREG_NO.
    134 
    135    Because the vreg_state entries are signed Shorts, the max number
    136    of vregs that can be handed by regalloc is 32767.
    137 */
    138 
    139 #define INVALID_RREG_NO ((Short)(-1))
    140 
    141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
    142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
    143 
    144 
    145 /* Does this instruction mention a particular reg? */
    146 static Bool instrMentionsReg (
    147    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
    148    HInstr* instr,
    149    HReg r,
    150    Bool mode64
    151 )
    152 {
    153    Int       i;
    154    HRegUsage reg_usage;
    155    (*getRegUsage)(&reg_usage, instr, mode64);
    156    for (i = 0; i < reg_usage.n_used; i++)
    157       if (reg_usage.hreg[i] == r)
    158          return True;
    159    return False;
    160 }
    161 
    162 
    163 /* Search forward from some given point in the incoming instruction
    164    sequence.  Point is to select a virtual register to spill, by
    165    finding the vreg which is mentioned as far ahead as possible, in
    166    the hope that this will minimise the number of consequent reloads.
    167 
    168    Only do the search for vregs which are Bound in the running state,
    169    and for which the .is_spill_cand field is set.  This allows the
    170    caller to arbitrarily restrict the set of spill candidates to be
    171    considered.
    172 
    173    Returns an index into the state array indicating the (v,r) pair to
    174    spill, or -1 if none was found.  */
    175 static
    176 Int findMostDistantlyMentionedVReg (
    177    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
    178    HInstrArray* instrs_in,
    179    Int          search_from_instr,
    180    RRegState*   state,
    181    Int          n_state,
    182    Bool         mode64
    183 )
    184 {
    185    Int k, m;
    186    Int furthest_k = -1;
    187    Int furthest   = -1;
    188    vassert(search_from_instr >= 0);
    189    for (k = 0; k < n_state; k++) {
    190       if (!state[k].is_spill_cand)
    191          continue;
    192       vassert(state[k].disp == Bound);
    193       for (m = search_from_instr; m < instrs_in->arr_used; m++) {
    194          if (instrMentionsReg(getRegUsage,
    195                               instrs_in->arr[m], state[k].vreg, mode64))
    196             break;
    197       }
    198       if (m > furthest) {
    199          furthest   = m;
    200          furthest_k = k;
    201       }
    202    }
    203    return furthest_k;
    204 }
    205 
    206 
    207 /* Check that this vreg has been assigned a sane spill offset. */
    208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
    209 {
    210    if (vreg->reg_class == HRcVec128 || vreg->reg_class == HRcFlt64) {
    211       vassert(0 == ((UShort)vreg->spill_offset % 16));
    212    } else {
    213       vassert(0 == ((UShort)vreg->spill_offset % 8));
    214    }
    215 }
    216 
    217 
    218 /* Double the size of the real-reg live-range array, if needed. */
    219 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
    220 {
    221    Int     k;
    222    RRegLR* arr2;
    223    if (used < *size) return;
    224    if (0)
    225       vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
    226    vassert(used == *size);
    227    arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
    228    for (k = 0; k < *size; k++)
    229       arr2[k] = (*info)[k];
    230    *size *= 2;
    231    *info = arr2;
    232 }
    233 
    234 
    235 /* Sort an array of RRegLR entries by either the .live_after or
    236    .dead_before fields.  This is performance-critical. */
    237 static void sortRRLRarray ( RRegLR* arr,
    238                             Int size, Bool by_live_after )
    239 {
    240    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    241                        9841, 29524, 88573, 265720,
    242                        797161, 2391484 };
    243    Int    lo = 0;
    244    Int    hi = size-1;
    245    Int    i, j, h, bigN, hp;
    246    RRegLR v;
    247 
    248    vassert(size >= 0);
    249    if (size == 0)
    250       return;
    251 
    252    bigN = hi - lo + 1; if (bigN < 2) return;
    253    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
    254 
    255    if (by_live_after) {
    256 
    257       for ( ; hp >= 0; hp--) {
    258          h = incs[hp];
    259          for (i = lo + h; i <= hi; i++) {
    260             v = arr[i];
    261             j = i;
    262             while (arr[j-h].live_after > v.live_after) {
    263                arr[j] = arr[j-h];
    264                j = j - h;
    265                if (j <= (lo + h - 1)) break;
    266             }
    267             arr[j] = v;
    268          }
    269       }
    270 
    271    } else {
    272 
    273       for ( ; hp >= 0; hp--) {
    274          h = incs[hp];
    275          for (i = lo + h; i <= hi; i++) {
    276             v = arr[i];
    277             j = i;
    278             while (arr[j-h].dead_before > v.dead_before) {
    279                arr[j] = arr[j-h];
    280                j = j - h;
    281                if (j <= (lo + h - 1)) break;
    282             }
    283             arr[j] = v;
    284          }
    285       }
    286 
    287    }
    288 }
    289 
    290 
    291 /* A target-independent register allocator.  Requires various
    292    functions which it uses to deal abstractly with instructions and
    293    registers, since it cannot have any target-specific knowledge.
    294 
    295    Returns a new list of instructions, which, as a result of the
    296    behaviour of mapRegs, will be in-place modifications of the
    297    original instructions.
    298 
    299    Requires that the incoming code has been generated using
    300    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
    301    that range is a checked run-time error.
    302 
    303    Takes an expandable array of pointers to unallocated insns.
    304    Returns an expandable array of pointers to allocated insns.
    305 */
    306 HInstrArray* doRegisterAllocation (
    307 
    308    /* Incoming virtual-registerised code. */
    309    HInstrArray* instrs_in,
    310 
    311    /* An array listing all the real registers the allocator may use,
    312       in no particular order. */
    313    HReg* available_real_regs,
    314    Int   n_available_real_regs,
    315 
    316    /* Return True iff the given insn is a reg-reg move, in which
    317       case also return the src and dst regs. */
    318    Bool (*isMove) ( HInstr*, HReg*, HReg* ),
    319 
    320    /* Get info about register usage in this insn. */
    321    void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
    322 
    323    /* Apply a reg-reg mapping to an insn. */
    324    void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
    325 
    326    /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
    327       real reg to a spill slot byte offset.  The two leading HInstr**
    328       args are out parameters, through which the generated insns are
    329       returned.  Also (optionally) a 'directReload' function, which
    330       attempts to replace a given instruction by one which reads
    331       directly from a specified spill slot.  May be NULL, in which
    332       case the optimisation is not attempted. */
    333    void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
    334    void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
    335    HInstr* (*directReload) ( HInstr*, HReg, Short ),
    336    Int     guest_sizeB,
    337 
    338    /* For debug printing only. */
    339    void (*ppInstr) ( HInstr*, Bool ),
    340    void (*ppReg) ( HReg ),
    341 
    342    /* 32/64bit mode */
    343    Bool mode64
    344 )
    345 {
    346 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
    347 
    348    const Bool eq_spill_opt = True;
    349 
    350    /* Iterators and temporaries. */
    351    Int       ii, j, k, m, spillee, k_suboptimal;
    352    HReg      rreg, vreg, vregS, vregD;
    353    HRegUsage reg_usage;
    354 
    355    /* Info on vregs and rregs.  Computed once and remains
    356       unchanged. */
    357    Int     n_vregs;
    358    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
    359 
    360    /* We keep two copies of the real-reg live range info, one sorted
    361       by .live_after and the other by .dead_before.  First the
    362       unsorted info is created in the _la variant is copied into the
    363       _db variant.  Once that's done both of them are sorted.
    364       We also need two integer cursors which record the next
    365       location in the two arrays to consider. */
    366    RRegLR* rreg_lrs_la;
    367    RRegLR* rreg_lrs_db;
    368    Int     rreg_lrs_size;
    369    Int     rreg_lrs_used;
    370    Int     rreg_lrs_la_next;
    371    Int     rreg_lrs_db_next;
    372 
    373    /* Used when constructing vreg_lrs (for allocating stack
    374       slots). */
    375    Int ss_busy_until_before[N_SPILL64S];
    376 
    377    /* Used when constructing rreg_lrs. */
    378    Int* rreg_live_after;
    379    Int* rreg_dead_before;
    380 
    381    /* Running state of the core allocation algorithm. */
    382    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
    383    Int        n_rregs;
    384 
    385    /* .. and the redundant backward map */
    386    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
    387       This inplies n_rregs must be <= 32768. */
    388    Short*     vreg_state;  /* [0 .. n_vregs-1] */
    389 
    390    /* The vreg -> rreg map constructed and then applied to each
    391       instr. */
    392    HRegRemap remap;
    393 
    394    /* The output array of instructions. */
    395    HInstrArray* instrs_out;
    396 
    397    /* Sanity checks are expensive.  They are only done periodically,
    398       not at each insn processed. */
    399    Bool do_sanity_check;
    400 
    401    vassert(0 == (guest_sizeB % 16));
    402    vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
    403    vassert(0 == (N_SPILL64S % 2));
    404 
    405    /* The live range numbers are signed shorts, and so limiting the
    406       number of insns to 10000 comfortably guards against them
    407       overflowing 32k. */
    408    vassert(instrs_in->arr_used <= 10000);
    409 
    410 #  define INVALID_INSTRNO (-2)
    411 
    412 #  define EMIT_INSTR(_instr)                  \
    413       do {                                    \
    414         HInstr* _tmp = (_instr);              \
    415         if (DEBUG_REGALLOC) {                 \
    416            vex_printf("**  ");                \
    417            (*ppInstr)(_tmp, mode64);          \
    418            vex_printf("\n\n");                \
    419         }                                     \
    420         addHInstr ( instrs_out, _tmp );       \
    421       } while (0)
    422 
    423 #   define PRINT_STATE						   \
    424       do {							   \
    425          Int z, q;						   \
    426          for (z = 0; z < n_rregs; z++) {			   \
    427             vex_printf("  rreg_state[%2d] = ", z);		   \
    428             (*ppReg)(rreg_state[z].rreg);	       		   \
    429             vex_printf("  \t");					   \
    430             switch (rreg_state[z].disp) {			   \
    431                case Free:    vex_printf("Free\n"); break;	   \
    432                case Unavail: vex_printf("Unavail\n"); break;	   \
    433                case Bound:   vex_printf("BoundTo "); 		   \
    434                              (*ppReg)(rreg_state[z].vreg);	   \
    435                              vex_printf("\n"); break;		   \
    436             }							   \
    437          }							   \
    438          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
    439          q = 0;                                                    \
    440          for (z = 0; z < n_vregs; z++) {                           \
    441             if (vreg_state[z] == INVALID_RREG_NO)                  \
    442                continue;                                           \
    443             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
    444             q++;                                                   \
    445             if (q > 0 && (q % 6) == 0)                             \
    446                vex_printf("\n    ");                               \
    447          }                                                         \
    448          vex_printf("\n");                                         \
    449       } while (0)
    450 
    451 
    452    /* --------- Stage 0: set up output array --------- */
    453    /* --------- and allocate/initialise running state. --------- */
    454 
    455    instrs_out = newHInstrArray();
    456 
    457    /* ... and initialise running state. */
    458    /* n_rregs is no more than a short name for n_available_real_regs. */
    459    n_rregs = n_available_real_regs;
    460    n_vregs = instrs_in->n_vregs;
    461 
    462    /* If this is not so, vreg_state entries will overflow. */
    463    vassert(n_vregs < 32767);
    464 
    465    rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
    466    vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
    467 
    468    for (j = 0; j < n_rregs; j++) {
    469       rreg_state[j].rreg          = available_real_regs[j];
    470       rreg_state[j].has_hlrs      = False;
    471       rreg_state[j].disp          = Free;
    472       rreg_state[j].vreg          = INVALID_HREG;
    473       rreg_state[j].is_spill_cand = False;
    474       rreg_state[j].eq_spill_slot = False;
    475    }
    476 
    477    for (j = 0; j < n_vregs; j++)
    478       vreg_state[j] = INVALID_RREG_NO;
    479 
    480 
    481    /* --------- Stage 1: compute vreg live ranges. --------- */
    482    /* --------- Stage 2: compute rreg live ranges. --------- */
    483 
    484    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    485 
    486    /* This is relatively simple, because (1) we only seek the complete
    487       end-to-end live range of each vreg, and are not interested in
    488       any holes in it, and (2) the vregs are conveniently numbered 0
    489       .. n_vregs-1, so we can just dump the results in a
    490       pre-allocated array. */
    491 
    492    vreg_lrs = NULL;
    493    if (n_vregs > 0)
    494       vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
    495 
    496    for (j = 0; j < n_vregs; j++) {
    497       vreg_lrs[j].live_after     = INVALID_INSTRNO;
    498       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
    499       vreg_lrs[j].spill_offset   = 0;
    500       vreg_lrs[j].spill_size     = 0;
    501       vreg_lrs[j].reg_class      = HRcINVALID;
    502    }
    503 
    504    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    505 
    506    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    507 
    508    /* This is more complex than Stage 1, because we need to compute
    509       exactly all the live ranges of all the allocatable real regs,
    510       and we don't know in advance how many there will be. */
    511 
    512    rreg_lrs_used = 0;
    513    rreg_lrs_size = 4;
    514    rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
    515    rreg_lrs_db = NULL; /* we'll create this later */
    516 
    517    /* We'll need to track live range start/end points seperately for
    518       each rreg.  Sigh. */
    519    vassert(n_available_real_regs > 0);
    520    rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
    521    rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
    522 
    523    for (j = 0; j < n_available_real_regs; j++) {
    524       rreg_live_after[j] =
    525       rreg_dead_before[j] = INVALID_INSTRNO;
    526    }
    527 
    528    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    529 
    530    /* ------ start of ITERATE OVER INSNS ------ */
    531 
    532    for (ii = 0; ii < instrs_in->arr_used; ii++) {
    533 
    534       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
    535 
    536 #     if 0
    537       vex_printf("\n%d  stage1: ", ii);
    538       (*ppInstr)(instrs_in->arr[ii], mode64);
    539       vex_printf("\n");
    540       ppHRegUsage(&reg_usage);
    541 #     endif
    542 
    543       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
    544 
    545       /* for each reg mentioned in the insn ... */
    546       for (j = 0; j < reg_usage.n_used; j++) {
    547 
    548          vreg = reg_usage.hreg[j];
    549          /* only interested in virtual registers right now. */
    550          if (!hregIsVirtual(vreg))
    551             continue;
    552          k = hregNumber(vreg);
    553          if (k < 0 || k >= n_vregs) {
    554             vex_printf("\n");
    555             (*ppInstr)(instrs_in->arr[ii], mode64);
    556             vex_printf("\n");
    557             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
    558             vpanic("doRegisterAllocation: out-of-range vreg");
    559          }
    560 
    561          /* Take the opportunity to note its regclass.  We'll need
    562             that when allocating spill slots. */
    563          if (vreg_lrs[k].reg_class == HRcINVALID) {
    564             /* First mention of this vreg. */
    565             vreg_lrs[k].reg_class = hregClass(vreg);
    566          } else {
    567             /* Seen it before, so check for consistency. */
    568             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
    569          }
    570 
    571          /* Now consider live ranges. */
    572          switch (reg_usage.mode[j]) {
    573             case HRmRead:
    574                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    575                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    576                   vpanic("doRegisterAllocation: "
    577                          "first event for vreg is Read");
    578                }
    579                vreg_lrs[k].dead_before = toShort(ii + 1);
    580                break;
    581             case HRmWrite:
    582                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
    583                   vreg_lrs[k].live_after = toShort(ii);
    584                vreg_lrs[k].dead_before = toShort(ii + 1);
    585                break;
    586             case HRmModify:
    587                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    588                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    589                   vpanic("doRegisterAllocation: "
    590                          "first event for vreg is Modify");
    591                }
    592                vreg_lrs[k].dead_before = toShort(ii + 1);
    593                break;
    594             default:
    595                vpanic("doRegisterAllocation(1)");
    596          } /* switch */
    597 
    598       } /* iterate over registers */
    599 
    600       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
    601 
    602       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
    603 
    604       /* for each reg mentioned in the insn ... */
    605       for (j = 0; j < reg_usage.n_used; j++) {
    606 
    607          /* Dummy initialisations of flush_la and flush_db to avoid
    608             possible bogus uninit-var warnings from gcc. */
    609          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
    610          Bool flush;
    611 
    612          rreg = reg_usage.hreg[j];
    613 
    614          /* only interested in real registers right now. */
    615          if (hregIsVirtual(rreg))
    616             continue;
    617 
    618          /* Furthermore, we're not interested in this rreg unless it's
    619             one of the allocatable ones.  For example, it could be a
    620             stack pointer register, or some other register beyond our
    621             control, in which case we should just ignore it. */
    622          for (k = 0; k < n_available_real_regs; k++)
    623             if (available_real_regs[k] == rreg)
    624                break;
    625          if (k == n_available_real_regs)
    626             continue; /* not found -- ignore. */
    627          flush = False;
    628          switch (reg_usage.mode[j]) {
    629             case HRmWrite:
    630                flush_la = rreg_live_after[k];
    631                flush_db = rreg_dead_before[k];
    632                if (flush_la != INVALID_INSTRNO
    633                    && flush_db != INVALID_INSTRNO)
    634                   flush = True;
    635                rreg_live_after[k]  = ii;
    636                rreg_dead_before[k] = ii+1;
    637                break;
    638             case HRmRead:
    639                if (rreg_live_after[k] == INVALID_INSTRNO) {
    640                   vex_printf("\nOFFENDING RREG = ");
    641                   (*ppReg)(available_real_regs[k]);
    642                   vex_printf("\n");
    643                   vex_printf("\nOFFENDING instr = ");
    644                   (*ppInstr)(instrs_in->arr[ii], mode64);
    645                   vex_printf("\n");
    646                   vpanic("doRegisterAllocation: "
    647                          "first event for rreg is Read");
    648                }
    649                rreg_dead_before[k] = ii+1;
    650                break;
    651             case HRmModify:
    652                if (rreg_live_after[k] == INVALID_INSTRNO) {
    653                   vex_printf("\nOFFENDING RREG = ");
    654                   (*ppReg)(available_real_regs[k]);
    655                   vex_printf("\n");
    656                   vex_printf("\nOFFENDING instr = ");
    657                   (*ppInstr)(instrs_in->arr[ii], mode64);
    658                   vex_printf("\n");
    659                   vpanic("doRegisterAllocation: "
    660                          "first event for rreg is Modify");
    661                }
    662                rreg_dead_before[k] = ii+1;
    663                break;
    664             default:
    665                vpanic("doRegisterAllocation(2)");
    666          }
    667 
    668          if (flush) {
    669             vassert(flush_la != INVALID_INSTRNO);
    670             vassert(flush_db != INVALID_INSTRNO);
    671             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    672             if (0)
    673                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
    674             rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
    675             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
    676             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
    677             rreg_lrs_used++;
    678          }
    679 
    680       } /* iterate over regs in the instr */
    681 
    682       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
    683 
    684    } /* iterate over insns */
    685 
    686    /* ------ end of ITERATE OVER INSNS ------ */
    687 
    688    /* ------ start of FINALISE RREG LIVE RANGES ------ */
    689 
    690    /* Now finish up any live ranges left over. */
    691    for (j = 0; j < n_available_real_regs; j++) {
    692 
    693 #     if 0
    694       vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
    695                                              rreg_dead_before[j]);
    696 #     endif
    697       vassert( (rreg_live_after[j] == INVALID_INSTRNO
    698                && rreg_dead_before[j] == INVALID_INSTRNO)
    699               ||
    700               (rreg_live_after[j] != INVALID_INSTRNO
    701                && rreg_dead_before[j] != INVALID_INSTRNO)
    702             );
    703 
    704       if (rreg_live_after[j] == INVALID_INSTRNO)
    705          continue;
    706 
    707       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    708       if (0)
    709          vex_printf("FLUSH 2 (%d,%d)\n",
    710                     rreg_live_after[j], rreg_dead_before[j]);
    711       rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
    712       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
    713       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
    714       rreg_lrs_used++;
    715    }
    716 
    717    /* Compute summary hints for choosing real regs.  If a real reg is
    718       involved in a hard live range, record that fact in the fixed
    719       part of the running rreg_state.  Later, when offered a choice between
    720       rregs, it's better to choose one which is not marked as having
    721       any HLRs, since ones with HLRs may need to be spilled around
    722       their HLRs.  Correctness of final assignment is unaffected by
    723       this mechanism -- it is only an optimisation. */
    724 
    725    for (j = 0; j < rreg_lrs_used; j++) {
    726       rreg = rreg_lrs_la[j].rreg;
    727       vassert(!hregIsVirtual(rreg));
    728       /* rreg is involved in a HLR.  Record this info in the array, if
    729          there is space. */
    730       for (k = 0; k < n_rregs; k++)
    731          if (rreg_state[k].rreg == rreg)
    732             break;
    733       vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
    734       rreg_state[k].has_hlrs = True;
    735    }
    736    if (0) {
    737       for (j = 0; j < n_rregs; j++) {
    738          if (!rreg_state[j].has_hlrs)
    739             continue;
    740          ppReg(rreg_state[j].rreg);
    741          vex_printf(" hinted\n");
    742       }
    743    }
    744 
    745    /* Finally, copy the _la variant into the _db variant and
    746       sort both by their respective fields. */
    747    rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
    748    for (j = 0; j < rreg_lrs_used; j++)
    749       rreg_lrs_db[j] = rreg_lrs_la[j];
    750 
    751    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
    752    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
    753 
    754    /* And set up the cursors. */
    755    rreg_lrs_la_next = 0;
    756    rreg_lrs_db_next = 0;
    757 
    758    for (j = 1; j < rreg_lrs_used; j++) {
    759       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
    760       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
    761    }
    762 
    763    /* ------ end of FINALISE RREG LIVE RANGES ------ */
    764 
    765 #  if DEBUG_REGALLOC
    766    for (j = 0; j < n_vregs; j++) {
    767       vex_printf("vreg %d:  la = %d,  db = %d\n",
    768                  j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
    769    }
    770 #  endif
    771 
    772 #  if DEBUG_REGALLOC
    773    vex_printf("RRegLRs by LA:\n");
    774    for (j = 0; j < rreg_lrs_used; j++) {
    775       vex_printf("  ");
    776       (*ppReg)(rreg_lrs_la[j].rreg);
    777       vex_printf("      la = %d,  db = %d\n",
    778                  rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
    779    }
    780    vex_printf("RRegLRs by DB:\n");
    781    for (j = 0; j < rreg_lrs_used; j++) {
    782       vex_printf("  ");
    783       (*ppReg)(rreg_lrs_db[j].rreg);
    784       vex_printf("      la = %d,  db = %d\n",
    785                  rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
    786    }
    787 #  endif
    788 
    789    /* --------- Stage 3: allocate spill slots. --------- */
    790 
    791    /* Each spill slot is 8 bytes long.  For vregs which take more than
    792       64 bits to spill (classes Flt64 and Vec128), we have to allocate
    793       two spill slots.
    794 
    795       For Vec128-class on PowerPC, the spill slot's actual address
    796       must be 16-byte aligned.  Since the spill slot's address is
    797       computed as an offset from the guest state pointer, and since
    798       the user of the generated code must set that pointer to a
    799       16-aligned value, we have the residual obligation here of
    800       choosing a 16-aligned spill slot offset for Vec128-class values.
    801       Since each spill slot is 8 bytes long, that means for
    802       Vec128-class values we must allocated a spill slot number which
    803       is zero mod 2.
    804 
    805       Do a rank-based allocation of vregs to spill slot numbers.  We
    806       put as few values as possible in spill slots, but nevertheless
    807       need to have a spill slot available for all vregs, just in case.
    808    */
    809    /* max_ss_no = -1; */
    810 
    811    for (j = 0; j < N_SPILL64S; j++)
    812       ss_busy_until_before[j] = 0;
    813 
    814    for (j = 0; j < n_vregs; j++) {
    815 
    816       /* True iff this vreg is unused.  In which case we also expect
    817          that the reg_class field for it has not been set.  */
    818       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
    819          vassert(vreg_lrs[j].reg_class == HRcINVALID);
    820          continue;
    821       }
    822 
    823       /* The spill slots are 64 bits in size.  As per the comment on
    824          definition of HRegClass in host_generic_regs.h, that means, to
    825          spill a vreg of class Flt64 or Vec128, we'll need to find two
    826          adjacent spill slots to use.  Note, this logic needs to kept
    827          in sync with the size info on the definition of HRegClass. */
    828 
    829       if (vreg_lrs[j].reg_class == HRcVec128
    830           || vreg_lrs[j].reg_class == HRcFlt64) {
    831 
    832          /* Find two adjacent free slots in which between them provide
    833             up to 128 bits in which to spill the vreg.  Since we are
    834             trying to find an even:odd pair, move along in steps of 2
    835             (slots). */
    836 
    837          for (k = 0; k < N_SPILL64S-1; k += 2)
    838             if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
    839                 && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
    840                break;
    841          if (k >= N_SPILL64S-1) {
    842             vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    843                    "Increase and recompile.");
    844          }
    845          if (0) vex_printf("16-byte spill offset in spill slot %d\n", (Int)k);
    846          ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
    847          ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
    848 
    849       } else {
    850 
    851          /* The ordinary case -- just find a single spill slot. */
    852 
    853          /* Find the lowest-numbered spill slot which is available at
    854             the start point of this interval, and assign the interval
    855             to it. */
    856          for (k = 0; k < N_SPILL64S; k++)
    857             if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
    858                break;
    859          if (k == N_SPILL64S) {
    860             vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    861                    "Increase and recompile.");
    862          }
    863          ss_busy_until_before[k] = vreg_lrs[j].dead_before;
    864 
    865       }
    866 
    867       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
    868          layout: the guest state, then two equal sized areas following
    869          it for two sets of shadow state, and then the spill area. */
    870       vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
    871 
    872       /* Independent check that we've made a sane choice of slot */
    873       sanity_check_spill_offset( &vreg_lrs[j] );
    874       /* if (j > max_ss_no) */
    875       /*    max_ss_no = j; */
    876    }
    877 
    878 #  if 0
    879    vex_printf("\n\n");
    880    for (j = 0; j < n_vregs; j++)
    881       vex_printf("vreg %d    --> spill offset %d\n",
    882                  j, vreg_lrs[j].spill_offset);
    883 #  endif
    884 
    885    /* --------- Stage 4: establish rreg preferences --------- */
    886 
    887    /* It may be advantageous to allocating certain vregs to specific
    888       rregs, as a way of avoiding reg-reg moves later.  Here we
    889       establish which, if any, rreg each vreg would prefer to be in.
    890       Note that this constrains the allocator -- ideally we end up
    891       with as few as possible vregs expressing a preference.
    892 
    893       This is an optimisation: if the .preferred_rreg field is never
    894       set to anything different from INVALID_HREG, the allocator still
    895       works. */
    896 
    897    /* 30 Dec 04: removed this mechanism as it does not seem to
    898       help. */
    899 
    900    /* --------- Stage 5: process instructions --------- */
    901 
    902    /* This is the main loop of the allocator.  First, we need to
    903       correctly set up our running state, which tracks the status of
    904       each real register. */
    905 
    906    /* ------ BEGIN: Process each insn in turn. ------ */
    907 
    908    for (ii = 0; ii < instrs_in->arr_used; ii++) {
    909 
    910 #     if DEBUG_REGALLOC
    911       vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
    912       vex_printf("---- ");
    913       (*ppInstr)(instrs_in->arr[ii], mode64);
    914       vex_printf("\n\nInitial state:\n");
    915       PRINT_STATE;
    916       vex_printf("\n");
    917 #     endif
    918 
    919       /* ------------ Sanity checks ------------ */
    920 
    921       /* Sanity checks are expensive.  So they are done only once
    922          every 7 instructions, and just before the last
    923          instruction. */
    924       do_sanity_check
    925          = toBool(
    926               False  /* Set to True for sanity checking of all insns. */
    927               || ii == instrs_in->arr_used-1
    928               || (ii > 0 && (ii % 7) == 0)
    929            );
    930 
    931       if (do_sanity_check) {
    932 
    933          /* Sanity check 1: all rregs with a hard live range crossing
    934             this insn must be marked as unavailable in the running
    935             state. */
    936          for (j = 0; j < rreg_lrs_used; j++) {
    937             if (rreg_lrs_la[j].live_after < ii
    938                 && ii < rreg_lrs_la[j].dead_before) {
    939                /* ii is the middle of a hard live range for some real
    940                   reg.  Check it's marked as such in the running
    941                   state. */
    942 
    943 #              if 0
    944                vex_printf("considering la %d .. db %d   reg = ",
    945                           rreg_lrs[j].live_after,
    946                           rreg_lrs[j].dead_before);
    947                (*ppReg)(rreg_lrs[j].rreg);
    948                vex_printf("\n");
    949 #              endif
    950 
    951                /* find the state entry for this rreg */
    952                for (k = 0; k < n_rregs; k++)
    953                   if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
    954                      break;
    955 
    956                /* and assert that this rreg is marked as unavailable */
    957                vassert(rreg_state[k].disp == Unavail);
    958             }
    959          }
    960 
    961          /* Sanity check 2: conversely, all rregs marked as
    962             unavailable in the running rreg_state must have a
    963             corresponding hard live range entry in the rreg_lrs
    964             array. */
    965          for (j = 0; j < n_available_real_regs; j++) {
    966             vassert(rreg_state[j].disp == Bound
    967                     || rreg_state[j].disp == Free
    968                     || rreg_state[j].disp == Unavail);
    969             if (rreg_state[j].disp != Unavail)
    970                continue;
    971             for (k = 0; k < rreg_lrs_used; k++)
    972                if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
    973                    && rreg_lrs_la[k].live_after < ii
    974                    && ii < rreg_lrs_la[k].dead_before)
    975                   break;
    976             /* If this vassertion fails, we couldn't find a
    977                corresponding HLR. */
    978             vassert(k < rreg_lrs_used);
    979          }
    980 
    981          /* Sanity check 3: all vreg-rreg bindings must bind registers
    982             of the same class. */
    983          for (j = 0; j < n_rregs; j++) {
    984             if (rreg_state[j].disp != Bound) {
    985                vassert(rreg_state[j].eq_spill_slot == False);
    986                continue;
    987             }
    988             vassert(hregClass(rreg_state[j].rreg)
    989                     == hregClass(rreg_state[j].vreg));
    990             vassert( hregIsVirtual(rreg_state[j].vreg));
    991             vassert(!hregIsVirtual(rreg_state[j].rreg));
    992          }
    993 
    994          /* Sanity check 4: the vreg_state and rreg_state
    995             mutually-redundant mappings are consistent.  If
    996             rreg_state[j].vreg points at some vreg_state entry then
    997             that vreg_state entry should point back at
    998             rreg_state[j]. */
    999          for (j = 0; j < n_rregs; j++) {
   1000             if (rreg_state[j].disp != Bound)
   1001                continue;
   1002             k = hregNumber(rreg_state[j].vreg);
   1003             vassert(IS_VALID_VREGNO(k));
   1004             vassert(vreg_state[k] == j);
   1005          }
   1006          for (j = 0; j < n_vregs; j++) {
   1007             k = vreg_state[j];
   1008             if (k == INVALID_RREG_NO)
   1009                continue;
   1010             vassert(IS_VALID_RREGNO(k));
   1011             vassert(rreg_state[k].disp == Bound);
   1012             vassert(hregNumber(rreg_state[k].vreg) == j);
   1013          }
   1014 
   1015       } /* if (do_sanity_check) */
   1016 
   1017       /* ------------ end of Sanity checks ------------ */
   1018 
   1019       /* Do various optimisations pertaining to register coalescing
   1020          and preferencing:
   1021             MOV  v <-> v   coalescing (done here).
   1022             MOV  v <-> r   coalescing (not yet, if ever)
   1023       */
   1024       /* If doing a reg-reg move between two vregs, and the src's live
   1025          range ends here and the dst's live range starts here, bind
   1026          the dst to the src's rreg, and that's all. */
   1027       if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
   1028          if (!hregIsVirtual(vregS)) goto cannot_coalesce;
   1029          if (!hregIsVirtual(vregD)) goto cannot_coalesce;
   1030          /* Check that *isMove is not telling us a bunch of lies ... */
   1031          vassert(hregClass(vregS) == hregClass(vregD));
   1032          k = hregNumber(vregS);
   1033          m = hregNumber(vregD);
   1034          vassert(IS_VALID_VREGNO(k));
   1035          vassert(IS_VALID_VREGNO(m));
   1036          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
   1037          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
   1038 #        if DEBUG_REGALLOC
   1039          vex_printf("COALESCE ");
   1040          (*ppReg)(vregS);
   1041          vex_printf(" -> ");
   1042          (*ppReg)(vregD);
   1043          vex_printf("\n\n");
   1044 #        endif
   1045          /* Find the state entry for vregS. */
   1046          for (m = 0; m < n_rregs; m++)
   1047             if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
   1048                break;
   1049          if (m == n_rregs)
   1050             /* We failed to find a binding for vregS, which means it's
   1051                currently not in a register.  So we can't do the
   1052                coalescing.  Give up. */
   1053             goto cannot_coalesce;
   1054 
   1055          /* Finally, we can do the coalescing.  It's trivial -- merely
   1056             claim vregS's register for vregD. */
   1057          rreg_state[m].vreg = vregD;
   1058          vassert(IS_VALID_VREGNO(hregNumber(vregD)));
   1059          vassert(IS_VALID_VREGNO(hregNumber(vregS)));
   1060          vreg_state[hregNumber(vregD)] = toShort(m);
   1061          vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
   1062 
   1063          /* This rreg has become associated with a different vreg and
   1064             hence with a different spill slot.  Play safe. */
   1065          rreg_state[m].eq_spill_slot = False;
   1066 
   1067          /* Move on to the next insn.  We skip the post-insn stuff for
   1068             fixed registers, since this move should not interact with
   1069             them in any way. */
   1070          continue;
   1071       }
   1072      cannot_coalesce:
   1073 
   1074       /* ------ Free up rregs bound to dead vregs ------ */
   1075 
   1076       /* Look for vregs whose live range has just ended, and
   1077 	 mark the associated rreg as free. */
   1078 
   1079       for (j = 0; j < n_rregs; j++) {
   1080          if (rreg_state[j].disp != Bound)
   1081             continue;
   1082          vreg = hregNumber(rreg_state[j].vreg);
   1083          vassert(IS_VALID_VREGNO(vreg));
   1084          if (vreg_lrs[vreg].dead_before <= ii) {
   1085             rreg_state[j].disp = Free;
   1086             rreg_state[j].eq_spill_slot = False;
   1087             m = hregNumber(rreg_state[j].vreg);
   1088             vassert(IS_VALID_VREGNO(m));
   1089             vreg_state[m] = INVALID_RREG_NO;
   1090             if (DEBUG_REGALLOC) {
   1091                vex_printf("free up ");
   1092                (*ppReg)(rreg_state[j].rreg);
   1093                vex_printf("\n");
   1094             }
   1095          }
   1096       }
   1097 
   1098       /* ------ Pre-instruction actions for fixed rreg uses ------ */
   1099 
   1100       /* Now we have to deal with rregs which are about to be made
   1101          live by this instruction -- in other words, are entering into
   1102          one of their live ranges.  If any such rreg holds a vreg, we
   1103          will have to free up the rreg.  The simplest solution which
   1104          is correct is to spill the rreg.
   1105 
   1106          Note we could do better:
   1107          * Could move it into some other free rreg, if one is available
   1108 
   1109          Do this efficiently, by incrementally stepping along an array
   1110          of rreg HLRs that are known to be sorted by start point
   1111          (their .live_after field).
   1112       */
   1113       while (True) {
   1114          vassert(rreg_lrs_la_next >= 0);
   1115          vassert(rreg_lrs_la_next <= rreg_lrs_used);
   1116          if (rreg_lrs_la_next == rreg_lrs_used)
   1117             break; /* no more real reg live ranges to consider */
   1118          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
   1119             break; /* next live range does not yet start */
   1120          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
   1121          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
   1122             Find the associated rreg_state entry. */
   1123          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
   1124             Real register live ranges are guaranteed to be well-formed
   1125             in that they start with a write to the register -- Stage 2
   1126             rejects any code not satisfying this.  So the correct
   1127             question to ask is whether
   1128             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
   1129             whether the reg becomes live after this insn -- rather
   1130             than before it. */
   1131 #        if DEBUG_REGALLOC
   1132          vex_printf("need to free up rreg: ");
   1133          (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
   1134          vex_printf("\n\n");
   1135 #        endif
   1136          for (k = 0; k < n_rregs; k++)
   1137             if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
   1138                break;
   1139          /* If this fails, we don't have an entry for this rreg.
   1140             Which we should. */
   1141          vassert(IS_VALID_RREGNO(k));
   1142          m = hregNumber(rreg_state[k].vreg);
   1143          if (rreg_state[k].disp == Bound) {
   1144             /* Yes, there is an associated vreg.  Spill it if it's
   1145                still live. */
   1146             vassert(IS_VALID_VREGNO(m));
   1147             vreg_state[m] = INVALID_RREG_NO;
   1148             if (vreg_lrs[m].dead_before > ii) {
   1149                vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1150                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
   1151                   HInstr* spill1 = NULL;
   1152                   HInstr* spill2 = NULL;
   1153                   (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
   1154                                vreg_lrs[m].spill_offset, mode64 );
   1155                   vassert(spill1 || spill2); /* can't both be NULL */
   1156                   if (spill1)
   1157                      EMIT_INSTR(spill1);
   1158                   if (spill2)
   1159                      EMIT_INSTR(spill2);
   1160                }
   1161                rreg_state[k].eq_spill_slot = True;
   1162             }
   1163          }
   1164          rreg_state[k].disp = Unavail;
   1165          rreg_state[k].vreg = INVALID_HREG;
   1166          rreg_state[k].eq_spill_slot = False;
   1167 
   1168          /* check for further rregs entering HLRs at this point */
   1169          rreg_lrs_la_next++;
   1170       }
   1171 
   1172 
   1173 #     if DEBUG_REGALLOC
   1174       vex_printf("After pre-insn actions for fixed regs:\n");
   1175       PRINT_STATE;
   1176       vex_printf("\n");
   1177 #     endif
   1178 
   1179 
   1180       /* ------ Deal with the current instruction. ------ */
   1181 
   1182       /* Finally we can begin the processing of this instruction
   1183          itself.  The aim is to free up enough rregs for this insn.
   1184          This may generate spill stores since we may have to evict
   1185          some vregs currently in rregs.  Also generates spill loads.
   1186          We also build up the final vreg->rreg mapping to be applied
   1187          to the insn. */
   1188 
   1189       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
   1190 
   1191       initHRegRemap(&remap);
   1192 
   1193       /* ------------ BEGIN directReload optimisation ----------- */
   1194 
   1195       /* If the instruction reads exactly one vreg which is currently
   1196          in a spill slot, and this is last use of that vreg, see if we
   1197          can convert the instruction into one reads directly from the
   1198          spill slot.  This is clearly only possible for x86 and amd64
   1199          targets, since ppc and arm load-store architectures.  If
   1200          successful, replace instrs_in->arr[ii] with this new
   1201          instruction, and recompute its reg usage, so that the change
   1202          is invisible to the standard-case handling that follows. */
   1203 
   1204       if (directReload && reg_usage.n_used <= 2) {
   1205          Bool  debug_direct_reload = True && False;
   1206          HReg  cand     = INVALID_HREG;
   1207          Bool  nreads   = 0;
   1208          Short spilloff = 0;
   1209 
   1210          for (j = 0; j < reg_usage.n_used; j++) {
   1211 
   1212             vreg = reg_usage.hreg[j];
   1213 
   1214             if (!hregIsVirtual(vreg))
   1215                continue;
   1216 
   1217             if (reg_usage.mode[j] == HRmRead) {
   1218                nreads++;
   1219                m = hregNumber(vreg);
   1220                vassert(IS_VALID_VREGNO(m));
   1221                k = vreg_state[m];
   1222                if (!IS_VALID_RREGNO(k)) {
   1223                   /* ok, it is spilled.  Now, is this its last use? */
   1224                   vassert(vreg_lrs[m].dead_before >= ii+1);
   1225                   if (vreg_lrs[m].dead_before == ii+1
   1226                       && cand == INVALID_HREG) {
   1227                      spilloff = vreg_lrs[m].spill_offset;
   1228                      cand = vreg;
   1229                   }
   1230                }
   1231             }
   1232          }
   1233 
   1234          if (nreads == 1 && cand != INVALID_HREG) {
   1235             HInstr* reloaded;
   1236             if (reg_usage.n_used == 2)
   1237                vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
   1238 
   1239             reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
   1240             if (debug_direct_reload && !reloaded) {
   1241                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
   1242                ppInstr(instrs_in->arr[ii], mode64);
   1243             }
   1244             if (reloaded) {
   1245                /* Update info about the insn, so it looks as if it had
   1246                   been in this form all along. */
   1247                instrs_in->arr[ii] = reloaded;
   1248                (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
   1249                if (debug_direct_reload && !reloaded) {
   1250                   vex_printf("  -->  ");
   1251                   ppInstr(reloaded, mode64);
   1252                }
   1253             }
   1254 
   1255             if (debug_direct_reload && !reloaded)
   1256                vex_printf("\n");
   1257          }
   1258 
   1259       }
   1260 
   1261       /* ------------ END directReload optimisation ------------ */
   1262 
   1263       /* for each reg mentioned in the insn ... */
   1264       for (j = 0; j < reg_usage.n_used; j++) {
   1265 
   1266          vreg = reg_usage.hreg[j];
   1267 
   1268          /* only interested in virtual registers right now. */
   1269          if (!hregIsVirtual(vreg))
   1270             continue;
   1271 
   1272 #        if 0
   1273          vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
   1274 #        endif
   1275 
   1276          /* Now we're trying to find a rreg for "vreg".  First of all,
   1277             if it already has an rreg assigned, we don't need to do
   1278             anything more.  Search the current state to find out. */
   1279          m = hregNumber(vreg);
   1280          vassert(IS_VALID_VREGNO(m));
   1281          k = vreg_state[m];
   1282          if (IS_VALID_RREGNO(k)) {
   1283             vassert(rreg_state[k].disp == Bound);
   1284             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
   1285             /* If this rreg is written or modified, mark it as different
   1286                from any spill slot value. */
   1287             if (reg_usage.mode[j] != HRmRead)
   1288                rreg_state[k].eq_spill_slot = False;
   1289             continue;
   1290          } else {
   1291             vassert(k == INVALID_RREG_NO);
   1292          }
   1293 
   1294          /* No luck.  The next thing to do is see if there is a
   1295             currently free rreg available, of the correct class.  If
   1296             so, bag it.  NOTE, we could improve this by selecting an
   1297             rreg for which the next live-range event is as far ahead
   1298             as possible. */
   1299          k_suboptimal = -1;
   1300          for (k = 0; k < n_rregs; k++) {
   1301             if (rreg_state[k].disp != Free
   1302                 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
   1303                continue;
   1304             if (rreg_state[k].has_hlrs) {
   1305                /* Well, at least we can use k_suboptimal if we really
   1306                   have to.  Keep on looking for a better candidate. */
   1307                k_suboptimal = k;
   1308             } else {
   1309                /* Found a preferable reg.  Use it. */
   1310                k_suboptimal = -1;
   1311                break;
   1312             }
   1313          }
   1314          if (k_suboptimal >= 0)
   1315             k = k_suboptimal;
   1316 
   1317          if (k < n_rregs) {
   1318             rreg_state[k].disp = Bound;
   1319             rreg_state[k].vreg = vreg;
   1320             m = hregNumber(vreg);
   1321             vassert(IS_VALID_VREGNO(m));
   1322             vreg_state[m] = toShort(k);
   1323             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
   1324             /* Generate a reload if needed.  This only creates needed
   1325                reloads because the live range builder for vregs will
   1326                guarantee that the first event for a vreg is a write.
   1327                Hence, if this reference is not a write, it cannot be
   1328                the first reference for this vreg, and so a reload is
   1329                indeed needed. */
   1330             if (reg_usage.mode[j] != HRmWrite) {
   1331                vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1332                HInstr* reload1 = NULL;
   1333                HInstr* reload2 = NULL;
   1334                (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
   1335                              vreg_lrs[m].spill_offset, mode64 );
   1336                vassert(reload1 || reload2); /* can't both be NULL */
   1337                if (reload1)
   1338                   EMIT_INSTR(reload1);
   1339                if (reload2)
   1340                   EMIT_INSTR(reload2);
   1341                /* This rreg is read or modified by the instruction.
   1342                   If it's merely read we can claim it now equals the
   1343                   spill slot, but not so if it is modified. */
   1344                if (reg_usage.mode[j] == HRmRead) {
   1345                   rreg_state[k].eq_spill_slot = True;
   1346                } else {
   1347                   vassert(reg_usage.mode[j] == HRmModify);
   1348                   rreg_state[k].eq_spill_slot = False;
   1349                }
   1350             } else {
   1351                rreg_state[k].eq_spill_slot = False;
   1352             }
   1353 
   1354             continue;
   1355          }
   1356 
   1357          /* Well, now we have no option but to spill a vreg.  It's
   1358             important to make a good choice of vreg to spill, and of
   1359             course we need to be careful not to spill a vreg which is
   1360             needed by this insn. */
   1361 
   1362          /* First, mark in the rreg_state, those rregs which are not spill
   1363             candidates, due to holding a vreg mentioned by this
   1364             instruction.  Or being of the wrong class. */
   1365          for (k = 0; k < n_rregs; k++) {
   1366             rreg_state[k].is_spill_cand = False;
   1367             if (rreg_state[k].disp != Bound)
   1368                continue;
   1369             if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
   1370                continue;
   1371             rreg_state[k].is_spill_cand = True;
   1372             for (m = 0; m < reg_usage.n_used; m++) {
   1373                if (rreg_state[k].vreg == reg_usage.hreg[m]) {
   1374                   rreg_state[k].is_spill_cand = False;
   1375                   break;
   1376                }
   1377             }
   1378          }
   1379 
   1380          /* We can choose to spill any rreg satisfying
   1381             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
   1382             the next use of its associated vreg is as far ahead as
   1383             possible, in the hope that this will minimise the number
   1384             of consequent reloads required. */
   1385          spillee
   1386             = findMostDistantlyMentionedVReg (
   1387                  getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
   1388 
   1389          if (spillee == -1) {
   1390             /* Hmmmmm.  There don't appear to be any spill candidates.
   1391                We're hosed. */
   1392             vex_printf("reg_alloc: can't find a register in class: ");
   1393             ppHRegClass(hregClass(vreg));
   1394             vex_printf("\n");
   1395             vpanic("reg_alloc: can't create a free register.");
   1396          }
   1397 
   1398          /* Right.  So we're going to spill rreg_state[spillee]. */
   1399          vassert(IS_VALID_RREGNO(spillee));
   1400          vassert(rreg_state[spillee].disp == Bound);
   1401          /* check it's the right class */
   1402          vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
   1403          /* check we're not ejecting the vreg for which we are trying
   1404             to free up a register. */
   1405          vassert(rreg_state[spillee].vreg != vreg);
   1406 
   1407          m = hregNumber(rreg_state[spillee].vreg);
   1408          vassert(IS_VALID_VREGNO(m));
   1409 
   1410          /* So here's the spill store.  Assert that we're spilling a
   1411             live vreg. */
   1412          vassert(vreg_lrs[m].dead_before > ii);
   1413          vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1414          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
   1415             HInstr* spill1 = NULL;
   1416             HInstr* spill2 = NULL;
   1417             (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
   1418                          vreg_lrs[m].spill_offset, mode64 );
   1419             vassert(spill1 || spill2); /* can't both be NULL */
   1420             if (spill1)
   1421                EMIT_INSTR(spill1);
   1422             if (spill2)
   1423                EMIT_INSTR(spill2);
   1424          }
   1425 
   1426          /* Update the rreg_state to reflect the new assignment for this
   1427             rreg. */
   1428          rreg_state[spillee].vreg = vreg;
   1429          vreg_state[m] = INVALID_RREG_NO;
   1430 
   1431          rreg_state[spillee].eq_spill_slot = False; /* be safe */
   1432 
   1433          m = hregNumber(vreg);
   1434          vassert(IS_VALID_VREGNO(m));
   1435          vreg_state[m] = toShort(spillee);
   1436 
   1437          /* Now, if this vreg is being read or modified (as opposed to
   1438             written), we have to generate a reload for it. */
   1439          if (reg_usage.mode[j] != HRmWrite) {
   1440             vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1441             HInstr* reload1 = NULL;
   1442             HInstr* reload2 = NULL;
   1443             (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
   1444                           vreg_lrs[m].spill_offset, mode64 );
   1445             vassert(reload1 || reload2); /* can't both be NULL */
   1446             if (reload1)
   1447                EMIT_INSTR(reload1);
   1448             if (reload2)
   1449                EMIT_INSTR(reload2);
   1450             /* This rreg is read or modified by the instruction.
   1451                If it's merely read we can claim it now equals the
   1452                spill slot, but not so if it is modified. */
   1453             if (reg_usage.mode[j] == HRmRead) {
   1454                rreg_state[spillee].eq_spill_slot = True;
   1455             } else {
   1456                vassert(reg_usage.mode[j] == HRmModify);
   1457                rreg_state[spillee].eq_spill_slot = False;
   1458             }
   1459          }
   1460 
   1461          /* So after much twisting and turning, we have vreg mapped to
   1462             rreg_state[spillee].rreg.  Note that in the map. */
   1463          addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
   1464 
   1465       } /* iterate over registers in this instruction. */
   1466 
   1467       /* We've finished clowning around with registers in this instruction.
   1468          Three results:
   1469          - the running rreg_state[] has been updated
   1470          - a suitable vreg->rreg mapping for this instruction has been
   1471            constructed
   1472          - spill and reload instructions may have been emitted.
   1473 
   1474         The final step is to apply the mapping to the instruction,
   1475         and emit that.
   1476       */
   1477 
   1478       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
   1479       (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
   1480       EMIT_INSTR( instrs_in->arr[ii] );
   1481 
   1482 #     if DEBUG_REGALLOC
   1483       vex_printf("After dealing with current insn:\n");
   1484       PRINT_STATE;
   1485       vex_printf("\n");
   1486 #     endif
   1487 
   1488       /* ------ Post-instruction actions for fixed rreg uses ------ */
   1489 
   1490       /* Now we need to check for rregs exiting fixed live ranges
   1491          after this instruction, and if so mark them as free. */
   1492       while (True) {
   1493          vassert(rreg_lrs_db_next >= 0);
   1494          vassert(rreg_lrs_db_next <= rreg_lrs_used);
   1495          if (rreg_lrs_db_next == rreg_lrs_used)
   1496             break; /* no more real reg live ranges to consider */
   1497          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
   1498             break; /* next live range does not yet start */
   1499          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
   1500          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
   1501             range.  Mark it as such in the main rreg_state array. */
   1502          for (k = 0; k < n_rregs; k++)
   1503             if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
   1504                break;
   1505          /* If this vassertion fails, we don't have an entry for
   1506             this rreg.  Which we should. */
   1507          vassert(k < n_rregs);
   1508          vassert(rreg_state[k].disp == Unavail);
   1509          rreg_state[k].disp = Free;
   1510          rreg_state[k].vreg = INVALID_HREG;
   1511          rreg_state[k].eq_spill_slot = False;
   1512 
   1513          /* check for further rregs leaving HLRs at this point */
   1514          rreg_lrs_db_next++;
   1515       }
   1516 
   1517 #     if DEBUG_REGALLOC
   1518       vex_printf("After post-insn actions for fixed regs:\n");
   1519       PRINT_STATE;
   1520       vex_printf("\n");
   1521 #     endif
   1522 
   1523    } /* iterate over insns */
   1524 
   1525    /* ------ END: Process each insn in turn. ------ */
   1526 
   1527    /* free(rreg_state); */
   1528    /* free(rreg_lrs); */
   1529    /* if (vreg_lrs) free(vreg_lrs); */
   1530 
   1531    /* Paranoia */
   1532    for (j = 0; j < n_rregs; j++)
   1533       vassert(rreg_state[j].rreg == available_real_regs[j]);
   1534 
   1535    vassert(rreg_lrs_la_next == rreg_lrs_used);
   1536    vassert(rreg_lrs_db_next == rreg_lrs_used);
   1537 
   1538    return instrs_out;
   1539 
   1540 #  undef INVALID_INSTRNO
   1541 #  undef EMIT_INSTR
   1542 #  undef PRINT_STATE
   1543 }
   1544 
   1545 
   1546 
   1547 /*---------------------------------------------------------------*/
   1548 /*---                                       host_reg_alloc2.c ---*/
   1549 /*---------------------------------------------------------------*/
   1550