Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_reg_alloc2.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2012 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 
     39 #include "main_util.h"
     40 #include "host_generic_regs.h"
     41 
     42 /* Set to 1 for lots of debugging output. */
     43 #define DEBUG_REGALLOC 0
     44 
     45 
     46 /* TODO 27 Oct 04:
     47 
     48    Better consistency checking from what isMove tells us.
     49 
     50    We can possibly do V-V coalescing even when the src is spilled,
     51    providing we can arrange for the dst to have the same spill slot.
     52 
     53    Note that state[].hreg is the same as the available real regs.
     54 
     55    Generally rationalise data structures.  */
     56 
     57 
     58 /* Records information on virtual register live ranges.  Computed once
     59    and remains unchanged after that. */
     60 typedef
     61    struct {
     62       /* Becomes live for the first time after this insn ... */
     63       Short live_after;
     64       /* Becomes dead for the last time before this insn ... */
     65       Short dead_before;
     66       /* The "home" spill slot, if needed.  Never changes. */
     67       Short spill_offset;
     68       Short spill_size;
     69       /* What kind of register this is. */
     70       HRegClass reg_class;
     71    }
     72    VRegLR;
     73 
     74 
     75 /* Records information on real-register live ranges.  Computed once
     76    and remains unchanged after that. */
     77 typedef
     78    struct {
     79       HReg rreg;
     80       /* Becomes live after this insn ... */
     81       Short live_after;
     82       /* Becomes dead before this insn ... */
     83       Short dead_before;
     84    }
     85    RRegLR;
     86 
     87 
     88 /* An array of the following structs (rreg_state) comprises the
     89    running state of the allocator.  It indicates what the current
     90    disposition of each allocatable real register is.  The array gets
     91    updated as the allocator processes instructions. */
     92 typedef
     93    struct {
     94       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
     95       /* Which rreg is this for? */
     96       HReg rreg;
     97       /* Is this involved in any HLRs?  (only an optimisation hint) */
     98       Bool has_hlrs;
     99       /* ------ FIELDS WHICH DO CHANGE ------ */
    100       /* 6 May 07: rearranged fields below so the whole struct fits
    101          into 16 bytes on both x86 and amd64. */
    102       /* Used when .disp == Bound and we are looking for vregs to
    103          spill. */
    104       Bool is_spill_cand;
    105       /* Optimisation: used when .disp == Bound.  Indicates when the
    106          rreg has the same value as the spill slot for the associated
    107          vreg.  Is safely left at False, and becomes True after a
    108          spill store or reload for this rreg. */
    109       Bool eq_spill_slot;
    110       /* What's it's current disposition? */
    111       enum { Free,     /* available for use */
    112              Unavail,  /* in a real-reg live range */
    113              Bound     /* in use (holding value of some vreg) */
    114            }
    115            disp;
    116       /* If .disp == Bound, what vreg is it bound to? */
    117       HReg vreg;
    118    }
    119    RRegState;
    120 
    121 
    122 /* The allocator also maintains a redundant array of indexes
    123    (vreg_state) from vreg numbers back to entries in rreg_state.  It
    124    is redundant because iff vreg_state[i] == j then
    125    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
    126    point at each other.  The purpose of this is to speed up activities
    127    which involve looking for a particular vreg: there is no need to
    128    scan the rreg_state looking for it, just index directly into
    129    vreg_state.  The FAQ "does this vreg already have an associated
    130    rreg" is the main beneficiary.
    131 
    132    To indicate, in vreg_state[i], that a given vreg is not currently
    133    associated with any rreg, that entry can be set to INVALID_RREG_NO.
    134 
    135    Because the vreg_state entries are signed Shorts, the max number
    136    of vregs that can be handed by regalloc is 32767.
    137 */
    138 
    139 #define INVALID_RREG_NO ((Short)(-1))
    140 
    141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
    142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
    143 
    144 
    145 /* Does this instruction mention a particular reg? */
    146 static Bool instrMentionsReg (
    147    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
    148    HInstr* instr,
    149    HReg r,
    150    Bool mode64
    151 )
    152 {
    153    Int       i;
    154    HRegUsage reg_usage;
    155    (*getRegUsage)(&reg_usage, instr, mode64);
    156    for (i = 0; i < reg_usage.n_used; i++)
    157       if (reg_usage.hreg[i] == r)
    158          return True;
    159    return False;
    160 }
    161 
    162 
    163 /* Search forward from some given point in the incoming instruction
    164    sequence.  Point is to select a virtual register to spill, by
    165    finding the vreg which is mentioned as far ahead as possible, in
    166    the hope that this will minimise the number of consequent reloads.
    167 
    168    Only do the search for vregs which are Bound in the running state,
    169    and for which the .is_spill_cand field is set.  This allows the
    170    caller to arbitrarily restrict the set of spill candidates to be
    171    considered.
    172 
    173    Returns an index into the state array indicating the (v,r) pair to
    174    spill, or -1 if none was found.  */
    175 static
    176 Int findMostDistantlyMentionedVReg (
    177    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
    178    HInstrArray* instrs_in,
    179    Int          search_from_instr,
    180    RRegState*   state,
    181    Int          n_state,
    182    Bool         mode64
    183 )
    184 {
    185    Int k, m;
    186    Int furthest_k = -1;
    187    Int furthest   = -1;
    188    vassert(search_from_instr >= 0);
    189    for (k = 0; k < n_state; k++) {
    190       if (!state[k].is_spill_cand)
    191          continue;
    192       vassert(state[k].disp == Bound);
    193       for (m = search_from_instr; m < instrs_in->arr_used; m++) {
    194          if (instrMentionsReg(getRegUsage,
    195                               instrs_in->arr[m], state[k].vreg, mode64))
    196             break;
    197       }
    198       if (m > furthest) {
    199          furthest   = m;
    200          furthest_k = k;
    201       }
    202    }
    203    return furthest_k;
    204 }
    205 
    206 
    207 /* Check that this vreg has been assigned a sane spill offset. */
    208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
    209 {
    210    switch (vreg->reg_class) {
    211       case HRcVec128: case HRcFlt64:
    212          vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
    213       default:
    214          vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
    215    }
    216 }
    217 
    218 
    219 /* Double the size of the real-reg live-range array, if needed. */
    220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
    221 {
    222    Int     k;
    223    RRegLR* arr2;
    224    if (used < *size) return;
    225    if (0)
    226       vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
    227    vassert(used == *size);
    228    arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
    229    for (k = 0; k < *size; k++)
    230       arr2[k] = (*info)[k];
    231    *size *= 2;
    232    *info = arr2;
    233 }
    234 
    235 
    236 /* Sort an array of RRegLR entries by either the .live_after or
    237    .dead_before fields.  This is performance-critical. */
    238 static void sortRRLRarray ( RRegLR* arr,
    239                             Int size, Bool by_live_after )
    240 {
    241    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    242                        9841, 29524, 88573, 265720,
    243                        797161, 2391484 };
    244    Int    lo = 0;
    245    Int    hi = size-1;
    246    Int    i, j, h, bigN, hp;
    247    RRegLR v;
    248 
    249    vassert(size >= 0);
    250    if (size == 0)
    251       return;
    252 
    253    bigN = hi - lo + 1; if (bigN < 2) return;
    254    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
    255 
    256    if (by_live_after) {
    257 
    258       for ( ; hp >= 0; hp--) {
    259          h = incs[hp];
    260          for (i = lo + h; i <= hi; i++) {
    261             v = arr[i];
    262             j = i;
    263             while (arr[j-h].live_after > v.live_after) {
    264                arr[j] = arr[j-h];
    265                j = j - h;
    266                if (j <= (lo + h - 1)) break;
    267             }
    268             arr[j] = v;
    269          }
    270       }
    271 
    272    } else {
    273 
    274       for ( ; hp >= 0; hp--) {
    275          h = incs[hp];
    276          for (i = lo + h; i <= hi; i++) {
    277             v = arr[i];
    278             j = i;
    279             while (arr[j-h].dead_before > v.dead_before) {
    280                arr[j] = arr[j-h];
    281                j = j - h;
    282                if (j <= (lo + h - 1)) break;
    283             }
    284             arr[j] = v;
    285          }
    286       }
    287 
    288    }
    289 }
    290 
    291 
    292 /* A target-independent register allocator.  Requires various
    293    functions which it uses to deal abstractly with instructions and
    294    registers, since it cannot have any target-specific knowledge.
    295 
    296    Returns a new list of instructions, which, as a result of the
    297    behaviour of mapRegs, will be in-place modifications of the
    298    original instructions.
    299 
    300    Requires that the incoming code has been generated using
    301    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
    302    that range is a checked run-time error.
    303 
    304    Takes an expandable array of pointers to unallocated insns.
    305    Returns an expandable array of pointers to allocated insns.
    306 */
    307 HInstrArray* doRegisterAllocation (
    308 
    309    /* Incoming virtual-registerised code. */
    310    HInstrArray* instrs_in,
    311 
    312    /* An array listing all the real registers the allocator may use,
    313       in no particular order. */
    314    HReg* available_real_regs,
    315    Int   n_available_real_regs,
    316 
    317    /* Return True iff the given insn is a reg-reg move, in which
    318       case also return the src and dst regs. */
    319    Bool (*isMove) ( HInstr*, HReg*, HReg* ),
    320 
    321    /* Get info about register usage in this insn. */
    322    void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
    323 
    324    /* Apply a reg-reg mapping to an insn. */
    325    void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
    326 
    327    /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
    328       real reg to a spill slot byte offset.  The two leading HInstr**
    329       args are out parameters, through which the generated insns are
    330       returned.  Also (optionally) a 'directReload' function, which
    331       attempts to replace a given instruction by one which reads
    332       directly from a specified spill slot.  May be NULL, in which
    333       case the optimisation is not attempted. */
    334    void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
    335    void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
    336    HInstr* (*directReload) ( HInstr*, HReg, Short ),
    337    Int     guest_sizeB,
    338 
    339    /* For debug printing only. */
    340    void (*ppInstr) ( HInstr*, Bool ),
    341    void (*ppReg) ( HReg ),
    342 
    343    /* 32/64bit mode */
    344    Bool mode64
    345 )
    346 {
    347 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
    348 
    349    const Bool eq_spill_opt = True;
    350 
    351    /* Iterators and temporaries. */
    352    Int       ii, j, k, m, spillee, k_suboptimal;
    353    HReg      rreg, vreg, vregS, vregD;
    354    HRegUsage reg_usage;
    355 
    356    /* Info on vregs and rregs.  Computed once and remains
    357       unchanged. */
    358    Int     n_vregs;
    359    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
    360 
    361    /* We keep two copies of the real-reg live range info, one sorted
    362       by .live_after and the other by .dead_before.  First the
    363       unsorted info is created in the _la variant is copied into the
    364       _db variant.  Once that's done both of them are sorted.
    365       We also need two integer cursors which record the next
    366       location in the two arrays to consider. */
    367    RRegLR* rreg_lrs_la;
    368    RRegLR* rreg_lrs_db;
    369    Int     rreg_lrs_size;
    370    Int     rreg_lrs_used;
    371    Int     rreg_lrs_la_next;
    372    Int     rreg_lrs_db_next;
    373 
    374    /* Used when constructing vreg_lrs (for allocating stack
    375       slots). */
    376    Int ss_busy_until_before[N_SPILL64S];
    377 
    378    /* Used when constructing rreg_lrs. */
    379    Int* rreg_live_after;
    380    Int* rreg_dead_before;
    381 
    382    /* Running state of the core allocation algorithm. */
    383    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
    384    Int        n_rregs;
    385 
    386    /* .. and the redundant backward map */
    387    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
    388       This inplies n_rregs must be <= 32768. */
    389    Short*     vreg_state;  /* [0 .. n_vregs-1] */
    390 
    391    /* The vreg -> rreg map constructed and then applied to each
    392       instr. */
    393    HRegRemap remap;
    394 
    395    /* The output array of instructions. */
    396    HInstrArray* instrs_out;
    397 
    398    /* Sanity checks are expensive.  They are only done periodically,
    399       not at each insn processed. */
    400    Bool do_sanity_check;
    401 
    402    vassert(0 == (guest_sizeB % 32));
    403    vassert(0 == (LibVEX_N_SPILL_BYTES % 32));
    404    vassert(0 == (N_SPILL64S % 4));
    405 
    406    /* The live range numbers are signed shorts, and so limiting the
    407       number of insns to 15000 comfortably guards against them
    408       overflowing 32k. */
    409    vassert(instrs_in->arr_used <= 15000);
    410 
    411 #  define INVALID_INSTRNO (-2)
    412 
    413 #  define EMIT_INSTR(_instr)                  \
    414       do {                                    \
    415         HInstr* _tmp = (_instr);              \
    416         if (DEBUG_REGALLOC) {                 \
    417            vex_printf("**  ");                \
    418            (*ppInstr)(_tmp, mode64);          \
    419            vex_printf("\n\n");                \
    420         }                                     \
    421         addHInstr ( instrs_out, _tmp );       \
    422       } while (0)
    423 
    424 #   define PRINT_STATE						   \
    425       do {							   \
    426          Int z, q;						   \
    427          for (z = 0; z < n_rregs; z++) {			   \
    428             vex_printf("  rreg_state[%2d] = ", z);		   \
    429             (*ppReg)(rreg_state[z].rreg);	       		   \
    430             vex_printf("  \t");					   \
    431             switch (rreg_state[z].disp) {			   \
    432                case Free:    vex_printf("Free\n"); break;	   \
    433                case Unavail: vex_printf("Unavail\n"); break;	   \
    434                case Bound:   vex_printf("BoundTo "); 		   \
    435                              (*ppReg)(rreg_state[z].vreg);	   \
    436                              vex_printf("\n"); break;		   \
    437             }							   \
    438          }							   \
    439          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
    440          q = 0;                                                    \
    441          for (z = 0; z < n_vregs; z++) {                           \
    442             if (vreg_state[z] == INVALID_RREG_NO)                  \
    443                continue;                                           \
    444             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
    445             q++;                                                   \
    446             if (q > 0 && (q % 6) == 0)                             \
    447                vex_printf("\n    ");                               \
    448          }                                                         \
    449          vex_printf("\n");                                         \
    450       } while (0)
    451 
    452 
    453    /* --------- Stage 0: set up output array --------- */
    454    /* --------- and allocate/initialise running state. --------- */
    455 
    456    instrs_out = newHInstrArray();
    457 
    458    /* ... and initialise running state. */
    459    /* n_rregs is no more than a short name for n_available_real_regs. */
    460    n_rregs = n_available_real_regs;
    461    n_vregs = instrs_in->n_vregs;
    462 
    463    /* If this is not so, vreg_state entries will overflow. */
    464    vassert(n_vregs < 32767);
    465 
    466    rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
    467    vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
    468 
    469    for (j = 0; j < n_rregs; j++) {
    470       rreg_state[j].rreg          = available_real_regs[j];
    471       rreg_state[j].has_hlrs      = False;
    472       rreg_state[j].disp          = Free;
    473       rreg_state[j].vreg          = INVALID_HREG;
    474       rreg_state[j].is_spill_cand = False;
    475       rreg_state[j].eq_spill_slot = False;
    476    }
    477 
    478    for (j = 0; j < n_vregs; j++)
    479       vreg_state[j] = INVALID_RREG_NO;
    480 
    481 
    482    /* --------- Stage 1: compute vreg live ranges. --------- */
    483    /* --------- Stage 2: compute rreg live ranges. --------- */
    484 
    485    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    486 
    487    /* This is relatively simple, because (1) we only seek the complete
    488       end-to-end live range of each vreg, and are not interested in
    489       any holes in it, and (2) the vregs are conveniently numbered 0
    490       .. n_vregs-1, so we can just dump the results in a
    491       pre-allocated array. */
    492 
    493    vreg_lrs = NULL;
    494    if (n_vregs > 0)
    495       vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
    496 
    497    for (j = 0; j < n_vregs; j++) {
    498       vreg_lrs[j].live_after     = INVALID_INSTRNO;
    499       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
    500       vreg_lrs[j].spill_offset   = 0;
    501       vreg_lrs[j].spill_size     = 0;
    502       vreg_lrs[j].reg_class      = HRcINVALID;
    503    }
    504 
    505    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    506 
    507    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    508 
    509    /* This is more complex than Stage 1, because we need to compute
    510       exactly all the live ranges of all the allocatable real regs,
    511       and we don't know in advance how many there will be. */
    512 
    513    rreg_lrs_used = 0;
    514    rreg_lrs_size = 4;
    515    rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
    516    rreg_lrs_db = NULL; /* we'll create this later */
    517 
    518    /* We'll need to track live range start/end points seperately for
    519       each rreg.  Sigh. */
    520    vassert(n_available_real_regs > 0);
    521    rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
    522    rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
    523 
    524    for (j = 0; j < n_available_real_regs; j++) {
    525       rreg_live_after[j] =
    526       rreg_dead_before[j] = INVALID_INSTRNO;
    527    }
    528 
    529    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    530 
    531    /* ------ start of ITERATE OVER INSNS ------ */
    532 
    533    for (ii = 0; ii < instrs_in->arr_used; ii++) {
    534 
    535       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
    536 
    537 #     if 0
    538       vex_printf("\n%d  stage1: ", ii);
    539       (*ppInstr)(instrs_in->arr[ii], mode64);
    540       vex_printf("\n");
    541       ppHRegUsage(&reg_usage);
    542 #     endif
    543 
    544       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
    545 
    546       /* for each reg mentioned in the insn ... */
    547       for (j = 0; j < reg_usage.n_used; j++) {
    548 
    549          vreg = reg_usage.hreg[j];
    550          /* only interested in virtual registers right now. */
    551          if (!hregIsVirtual(vreg))
    552             continue;
    553          k = hregNumber(vreg);
    554          if (k < 0 || k >= n_vregs) {
    555             vex_printf("\n");
    556             (*ppInstr)(instrs_in->arr[ii], mode64);
    557             vex_printf("\n");
    558             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
    559             vpanic("doRegisterAllocation: out-of-range vreg");
    560          }
    561 
    562          /* Take the opportunity to note its regclass.  We'll need
    563             that when allocating spill slots. */
    564          if (vreg_lrs[k].reg_class == HRcINVALID) {
    565             /* First mention of this vreg. */
    566             vreg_lrs[k].reg_class = hregClass(vreg);
    567          } else {
    568             /* Seen it before, so check for consistency. */
    569             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
    570          }
    571 
    572          /* Now consider live ranges. */
    573          switch (reg_usage.mode[j]) {
    574             case HRmRead:
    575                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    576                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    577                   vpanic("doRegisterAllocation: "
    578                          "first event for vreg is Read");
    579                }
    580                vreg_lrs[k].dead_before = toShort(ii + 1);
    581                break;
    582             case HRmWrite:
    583                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
    584                   vreg_lrs[k].live_after = toShort(ii);
    585                vreg_lrs[k].dead_before = toShort(ii + 1);
    586                break;
    587             case HRmModify:
    588                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    589                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    590                   vpanic("doRegisterAllocation: "
    591                          "first event for vreg is Modify");
    592                }
    593                vreg_lrs[k].dead_before = toShort(ii + 1);
    594                break;
    595             default:
    596                vpanic("doRegisterAllocation(1)");
    597          } /* switch */
    598 
    599       } /* iterate over registers */
    600 
    601       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
    602 
    603       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
    604 
    605       /* for each reg mentioned in the insn ... */
    606       for (j = 0; j < reg_usage.n_used; j++) {
    607 
    608          /* Dummy initialisations of flush_la and flush_db to avoid
    609             possible bogus uninit-var warnings from gcc. */
    610          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
    611          Bool flush;
    612 
    613          rreg = reg_usage.hreg[j];
    614 
    615          /* only interested in real registers right now. */
    616          if (hregIsVirtual(rreg))
    617             continue;
    618 
    619          /* Furthermore, we're not interested in this rreg unless it's
    620             one of the allocatable ones.  For example, it could be a
    621             stack pointer register, or some other register beyond our
    622             control, in which case we should just ignore it. */
    623          for (k = 0; k < n_available_real_regs; k++)
    624             if (available_real_regs[k] == rreg)
    625                break;
    626          if (k == n_available_real_regs)
    627             continue; /* not found -- ignore. */
    628          flush = False;
    629          switch (reg_usage.mode[j]) {
    630             case HRmWrite:
    631                flush_la = rreg_live_after[k];
    632                flush_db = rreg_dead_before[k];
    633                if (flush_la != INVALID_INSTRNO
    634                    && flush_db != INVALID_INSTRNO)
    635                   flush = True;
    636                rreg_live_after[k]  = ii;
    637                rreg_dead_before[k] = ii+1;
    638                break;
    639             case HRmRead:
    640                if (rreg_live_after[k] == INVALID_INSTRNO) {
    641                   vex_printf("\nOFFENDING RREG = ");
    642                   (*ppReg)(available_real_regs[k]);
    643                   vex_printf("\n");
    644                   vex_printf("\nOFFENDING instr = ");
    645                   (*ppInstr)(instrs_in->arr[ii], mode64);
    646                   vex_printf("\n");
    647                   vpanic("doRegisterAllocation: "
    648                          "first event for rreg is Read");
    649                }
    650                rreg_dead_before[k] = ii+1;
    651                break;
    652             case HRmModify:
    653                if (rreg_live_after[k] == INVALID_INSTRNO) {
    654                   vex_printf("\nOFFENDING RREG = ");
    655                   (*ppReg)(available_real_regs[k]);
    656                   vex_printf("\n");
    657                   vex_printf("\nOFFENDING instr = ");
    658                   (*ppInstr)(instrs_in->arr[ii], mode64);
    659                   vex_printf("\n");
    660                   vpanic("doRegisterAllocation: "
    661                          "first event for rreg is Modify");
    662                }
    663                rreg_dead_before[k] = ii+1;
    664                break;
    665             default:
    666                vpanic("doRegisterAllocation(2)");
    667          }
    668 
    669          if (flush) {
    670             vassert(flush_la != INVALID_INSTRNO);
    671             vassert(flush_db != INVALID_INSTRNO);
    672             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    673             if (0)
    674                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
    675             rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
    676             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
    677             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
    678             rreg_lrs_used++;
    679          }
    680 
    681       } /* iterate over regs in the instr */
    682 
    683       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
    684 
    685    } /* iterate over insns */
    686 
    687    /* ------ end of ITERATE OVER INSNS ------ */
    688 
    689    /* ------ start of FINALISE RREG LIVE RANGES ------ */
    690 
    691    /* Now finish up any live ranges left over. */
    692    for (j = 0; j < n_available_real_regs; j++) {
    693 
    694 #     if 0
    695       vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
    696                                              rreg_dead_before[j]);
    697 #     endif
    698       vassert( (rreg_live_after[j] == INVALID_INSTRNO
    699                && rreg_dead_before[j] == INVALID_INSTRNO)
    700               ||
    701               (rreg_live_after[j] != INVALID_INSTRNO
    702                && rreg_dead_before[j] != INVALID_INSTRNO)
    703             );
    704 
    705       if (rreg_live_after[j] == INVALID_INSTRNO)
    706          continue;
    707 
    708       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    709       if (0)
    710          vex_printf("FLUSH 2 (%d,%d)\n",
    711                     rreg_live_after[j], rreg_dead_before[j]);
    712       rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
    713       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
    714       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
    715       rreg_lrs_used++;
    716    }
    717 
    718    /* Compute summary hints for choosing real regs.  If a real reg is
    719       involved in a hard live range, record that fact in the fixed
    720       part of the running rreg_state.  Later, when offered a choice between
    721       rregs, it's better to choose one which is not marked as having
    722       any HLRs, since ones with HLRs may need to be spilled around
    723       their HLRs.  Correctness of final assignment is unaffected by
    724       this mechanism -- it is only an optimisation. */
    725 
    726    for (j = 0; j < rreg_lrs_used; j++) {
    727       rreg = rreg_lrs_la[j].rreg;
    728       vassert(!hregIsVirtual(rreg));
    729       /* rreg is involved in a HLR.  Record this info in the array, if
    730          there is space. */
    731       for (k = 0; k < n_rregs; k++)
    732          if (rreg_state[k].rreg == rreg)
    733             break;
    734       vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
    735       rreg_state[k].has_hlrs = True;
    736    }
    737    if (0) {
    738       for (j = 0; j < n_rregs; j++) {
    739          if (!rreg_state[j].has_hlrs)
    740             continue;
    741          ppReg(rreg_state[j].rreg);
    742          vex_printf(" hinted\n");
    743       }
    744    }
    745 
    746    /* Finally, copy the _la variant into the _db variant and
    747       sort both by their respective fields. */
    748    rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
    749    for (j = 0; j < rreg_lrs_used; j++)
    750       rreg_lrs_db[j] = rreg_lrs_la[j];
    751 
    752    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
    753    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
    754 
    755    /* And set up the cursors. */
    756    rreg_lrs_la_next = 0;
    757    rreg_lrs_db_next = 0;
    758 
    759    for (j = 1; j < rreg_lrs_used; j++) {
    760       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
    761       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
    762    }
    763 
    764    /* ------ end of FINALISE RREG LIVE RANGES ------ */
    765 
    766 #  if DEBUG_REGALLOC
    767    for (j = 0; j < n_vregs; j++) {
    768       vex_printf("vreg %d:  la = %d,  db = %d\n",
    769                  j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
    770    }
    771 #  endif
    772 
    773 #  if DEBUG_REGALLOC
    774    vex_printf("RRegLRs by LA:\n");
    775    for (j = 0; j < rreg_lrs_used; j++) {
    776       vex_printf("  ");
    777       (*ppReg)(rreg_lrs_la[j].rreg);
    778       vex_printf("      la = %d,  db = %d\n",
    779                  rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
    780    }
    781    vex_printf("RRegLRs by DB:\n");
    782    for (j = 0; j < rreg_lrs_used; j++) {
    783       vex_printf("  ");
    784       (*ppReg)(rreg_lrs_db[j].rreg);
    785       vex_printf("      la = %d,  db = %d\n",
    786                  rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
    787    }
    788 #  endif
    789 
    790    /* --------- Stage 3: allocate spill slots. --------- */
    791 
    792    /* Each spill slot is 8 bytes long.  For vregs which take more than
    793       64 bits to spill (classes Flt64 and Vec128), we have to allocate
    794       two consecutive spill slots.  For 256 bit registers (class
    795       Vec256), we have to allocate four consecutive spill slots.
    796 
    797       For Vec128-class on PowerPC, the spill slot's actual address
    798       must be 16-byte aligned.  Since the spill slot's address is
    799       computed as an offset from the guest state pointer, and since
    800       the user of the generated code must set that pointer to a
    801       32-aligned value, we have the residual obligation here of
    802       choosing a 16-aligned spill slot offset for Vec128-class values.
    803       Since each spill slot is 8 bytes long, that means for
    804       Vec128-class values we must allocated a spill slot number which
    805       is zero mod 2.
    806 
    807       Similarly, for Vec256 calss on amd64, find a spill slot number
    808       which is zero mod 4.  This guarantees it will be 32 byte
    809       aligned, which isn't actually necessary on amd64 (we use movUpd
    810       etc to spill), but seems like good practice.
    811 
    812       Do a rank-based allocation of vregs to spill slot numbers.  We
    813       put as few values as possible in spill slots, but nevertheless
    814       need to have a spill slot available for all vregs, just in case.
    815    */
    816    /* max_ss_no = -1; */
    817 
    818    for (j = 0; j < N_SPILL64S; j++)
    819       ss_busy_until_before[j] = 0;
    820 
    821    for (j = 0; j < n_vregs; j++) {
    822 
    823       /* True iff this vreg is unused.  In which case we also expect
    824          that the reg_class field for it has not been set.  */
    825       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
    826          vassert(vreg_lrs[j].reg_class == HRcINVALID);
    827          continue;
    828       }
    829 
    830       /* The spill slots are 64 bits in size.  As per the comment on
    831          definition of HRegClass in host_generic_regs.h, that means,
    832          to spill a vreg of class Flt64 or Vec128, we'll need to find
    833          two adjacent spill slots to use.  For Vec256, we'll need to
    834          find four adjacent slots to use.  Note, this logic needs to
    835          kept in sync with the size info on the definition of
    836          HRegClass. */
    837       switch (vreg_lrs[j].reg_class) {
    838 
    839          case HRcVec128: case HRcFlt64:
    840             /* Find two adjacent free slots in which between them
    841                provide up to 128 bits in which to spill the vreg.
    842                Since we are trying to find an even:odd pair, move
    843                along in steps of 2 (slots). */
    844             for (k = 0; k < N_SPILL64S-1; k += 2)
    845                if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
    846                    && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
    847                   break;
    848             if (k >= N_SPILL64S-1) {
    849                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    850                       "Increase and recompile.");
    851             }
    852             if (0) vex_printf("16-byte spill offset in spill slot %d\n",
    853                               (Int)k);
    854             ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
    855             ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
    856             break;
    857 
    858          default:
    859             /* The ordinary case -- just find a single spill slot. */
    860             /* Find the lowest-numbered spill slot which is available
    861                at the start point of this interval, and assign the
    862                interval to it. */
    863             for (k = 0; k < N_SPILL64S; k++)
    864                if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
    865                   break;
    866             if (k == N_SPILL64S) {
    867                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    868                       "Increase and recompile.");
    869             }
    870             ss_busy_until_before[k] = vreg_lrs[j].dead_before;
    871             break;
    872 
    873       } /* switch (vreg_lrs[j].reg_class) { */
    874 
    875       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
    876          layout: the guest state, then two equal sized areas following
    877          it for two sets of shadow state, and then the spill area. */
    878       vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
    879 
    880       /* Independent check that we've made a sane choice of slot */
    881       sanity_check_spill_offset( &vreg_lrs[j] );
    882       /* if (j > max_ss_no) */
    883       /*    max_ss_no = j; */
    884    }
    885 
    886 #  if 0
    887    vex_printf("\n\n");
    888    for (j = 0; j < n_vregs; j++)
    889       vex_printf("vreg %d    --> spill offset %d\n",
    890                  j, vreg_lrs[j].spill_offset);
    891 #  endif
    892 
    893    /* --------- Stage 4: establish rreg preferences --------- */
    894 
    895    /* It may be advantageous to allocating certain vregs to specific
    896       rregs, as a way of avoiding reg-reg moves later.  Here we
    897       establish which, if any, rreg each vreg would prefer to be in.
    898       Note that this constrains the allocator -- ideally we end up
    899       with as few as possible vregs expressing a preference.
    900 
    901       This is an optimisation: if the .preferred_rreg field is never
    902       set to anything different from INVALID_HREG, the allocator still
    903       works. */
    904 
    905    /* 30 Dec 04: removed this mechanism as it does not seem to
    906       help. */
    907 
    908    /* --------- Stage 5: process instructions --------- */
    909 
    910    /* This is the main loop of the allocator.  First, we need to
    911       correctly set up our running state, which tracks the status of
    912       each real register. */
    913 
    914    /* ------ BEGIN: Process each insn in turn. ------ */
    915 
    916    for (ii = 0; ii < instrs_in->arr_used; ii++) {
    917 
    918 #     if DEBUG_REGALLOC
    919       vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
    920       vex_printf("---- ");
    921       (*ppInstr)(instrs_in->arr[ii], mode64);
    922       vex_printf("\n\nInitial state:\n");
    923       PRINT_STATE;
    924       vex_printf("\n");
    925 #     endif
    926 
    927       /* ------------ Sanity checks ------------ */
    928 
    929       /* Sanity checks are expensive.  So they are done only once
    930          every 7 instructions, and just before the last
    931          instruction. */
    932       do_sanity_check
    933          = toBool(
    934               False  /* Set to True for sanity checking of all insns. */
    935               || ii == instrs_in->arr_used-1
    936               || (ii > 0 && (ii % 7) == 0)
    937            );
    938 
    939       if (do_sanity_check) {
    940 
    941          /* Sanity check 1: all rregs with a hard live range crossing
    942             this insn must be marked as unavailable in the running
    943             state. */
    944          for (j = 0; j < rreg_lrs_used; j++) {
    945             if (rreg_lrs_la[j].live_after < ii
    946                 && ii < rreg_lrs_la[j].dead_before) {
    947                /* ii is the middle of a hard live range for some real
    948                   reg.  Check it's marked as such in the running
    949                   state. */
    950 
    951 #              if 0
    952                vex_printf("considering la %d .. db %d   reg = ",
    953                           rreg_lrs[j].live_after,
    954                           rreg_lrs[j].dead_before);
    955                (*ppReg)(rreg_lrs[j].rreg);
    956                vex_printf("\n");
    957 #              endif
    958 
    959                /* find the state entry for this rreg */
    960                for (k = 0; k < n_rregs; k++)
    961                   if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
    962                      break;
    963 
    964                /* and assert that this rreg is marked as unavailable */
    965                vassert(rreg_state[k].disp == Unavail);
    966             }
    967          }
    968 
    969          /* Sanity check 2: conversely, all rregs marked as
    970             unavailable in the running rreg_state must have a
    971             corresponding hard live range entry in the rreg_lrs
    972             array. */
    973          for (j = 0; j < n_available_real_regs; j++) {
    974             vassert(rreg_state[j].disp == Bound
    975                     || rreg_state[j].disp == Free
    976                     || rreg_state[j].disp == Unavail);
    977             if (rreg_state[j].disp != Unavail)
    978                continue;
    979             for (k = 0; k < rreg_lrs_used; k++)
    980                if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
    981                    && rreg_lrs_la[k].live_after < ii
    982                    && ii < rreg_lrs_la[k].dead_before)
    983                   break;
    984             /* If this vassertion fails, we couldn't find a
    985                corresponding HLR. */
    986             vassert(k < rreg_lrs_used);
    987          }
    988 
    989          /* Sanity check 3: all vreg-rreg bindings must bind registers
    990             of the same class. */
    991          for (j = 0; j < n_rregs; j++) {
    992             if (rreg_state[j].disp != Bound) {
    993                vassert(rreg_state[j].eq_spill_slot == False);
    994                continue;
    995             }
    996             vassert(hregClass(rreg_state[j].rreg)
    997                     == hregClass(rreg_state[j].vreg));
    998             vassert( hregIsVirtual(rreg_state[j].vreg));
    999             vassert(!hregIsVirtual(rreg_state[j].rreg));
   1000          }
   1001 
   1002          /* Sanity check 4: the vreg_state and rreg_state
   1003             mutually-redundant mappings are consistent.  If
   1004             rreg_state[j].vreg points at some vreg_state entry then
   1005             that vreg_state entry should point back at
   1006             rreg_state[j]. */
   1007          for (j = 0; j < n_rregs; j++) {
   1008             if (rreg_state[j].disp != Bound)
   1009                continue;
   1010             k = hregNumber(rreg_state[j].vreg);
   1011             vassert(IS_VALID_VREGNO(k));
   1012             vassert(vreg_state[k] == j);
   1013          }
   1014          for (j = 0; j < n_vregs; j++) {
   1015             k = vreg_state[j];
   1016             if (k == INVALID_RREG_NO)
   1017                continue;
   1018             vassert(IS_VALID_RREGNO(k));
   1019             vassert(rreg_state[k].disp == Bound);
   1020             vassert(hregNumber(rreg_state[k].vreg) == j);
   1021          }
   1022 
   1023       } /* if (do_sanity_check) */
   1024 
   1025       /* ------------ end of Sanity checks ------------ */
   1026 
   1027       /* Do various optimisations pertaining to register coalescing
   1028          and preferencing:
   1029             MOV  v <-> v   coalescing (done here).
   1030             MOV  v <-> r   coalescing (not yet, if ever)
   1031       */
   1032       /* If doing a reg-reg move between two vregs, and the src's live
   1033          range ends here and the dst's live range starts here, bind
   1034          the dst to the src's rreg, and that's all. */
   1035       if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
   1036          if (!hregIsVirtual(vregS)) goto cannot_coalesce;
   1037          if (!hregIsVirtual(vregD)) goto cannot_coalesce;
   1038          /* Check that *isMove is not telling us a bunch of lies ... */
   1039          vassert(hregClass(vregS) == hregClass(vregD));
   1040          k = hregNumber(vregS);
   1041          m = hregNumber(vregD);
   1042          vassert(IS_VALID_VREGNO(k));
   1043          vassert(IS_VALID_VREGNO(m));
   1044          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
   1045          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
   1046 #        if DEBUG_REGALLOC
   1047          vex_printf("COALESCE ");
   1048          (*ppReg)(vregS);
   1049          vex_printf(" -> ");
   1050          (*ppReg)(vregD);
   1051          vex_printf("\n\n");
   1052 #        endif
   1053          /* Find the state entry for vregS. */
   1054          for (m = 0; m < n_rregs; m++)
   1055             if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
   1056                break;
   1057          if (m == n_rregs)
   1058             /* We failed to find a binding for vregS, which means it's
   1059                currently not in a register.  So we can't do the
   1060                coalescing.  Give up. */
   1061             goto cannot_coalesce;
   1062 
   1063          /* Finally, we can do the coalescing.  It's trivial -- merely
   1064             claim vregS's register for vregD. */
   1065          rreg_state[m].vreg = vregD;
   1066          vassert(IS_VALID_VREGNO(hregNumber(vregD)));
   1067          vassert(IS_VALID_VREGNO(hregNumber(vregS)));
   1068          vreg_state[hregNumber(vregD)] = toShort(m);
   1069          vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
   1070 
   1071          /* This rreg has become associated with a different vreg and
   1072             hence with a different spill slot.  Play safe. */
   1073          rreg_state[m].eq_spill_slot = False;
   1074 
   1075          /* Move on to the next insn.  We skip the post-insn stuff for
   1076             fixed registers, since this move should not interact with
   1077             them in any way. */
   1078          continue;
   1079       }
   1080      cannot_coalesce:
   1081 
   1082       /* ------ Free up rregs bound to dead vregs ------ */
   1083 
   1084       /* Look for vregs whose live range has just ended, and
   1085 	 mark the associated rreg as free. */
   1086 
   1087       for (j = 0; j < n_rregs; j++) {
   1088          if (rreg_state[j].disp != Bound)
   1089             continue;
   1090          vreg = hregNumber(rreg_state[j].vreg);
   1091          vassert(IS_VALID_VREGNO(vreg));
   1092          if (vreg_lrs[vreg].dead_before <= ii) {
   1093             rreg_state[j].disp = Free;
   1094             rreg_state[j].eq_spill_slot = False;
   1095             m = hregNumber(rreg_state[j].vreg);
   1096             vassert(IS_VALID_VREGNO(m));
   1097             vreg_state[m] = INVALID_RREG_NO;
   1098             if (DEBUG_REGALLOC) {
   1099                vex_printf("free up ");
   1100                (*ppReg)(rreg_state[j].rreg);
   1101                vex_printf("\n");
   1102             }
   1103          }
   1104       }
   1105 
   1106       /* ------ Pre-instruction actions for fixed rreg uses ------ */
   1107 
   1108       /* Now we have to deal with rregs which are about to be made
   1109          live by this instruction -- in other words, are entering into
   1110          one of their live ranges.  If any such rreg holds a vreg, we
   1111          will have to free up the rreg.  The simplest solution which
   1112          is correct is to spill the rreg.
   1113 
   1114          Note we could do better:
   1115          * Could move it into some other free rreg, if one is available
   1116 
   1117          Do this efficiently, by incrementally stepping along an array
   1118          of rreg HLRs that are known to be sorted by start point
   1119          (their .live_after field).
   1120       */
   1121       while (True) {
   1122          vassert(rreg_lrs_la_next >= 0);
   1123          vassert(rreg_lrs_la_next <= rreg_lrs_used);
   1124          if (rreg_lrs_la_next == rreg_lrs_used)
   1125             break; /* no more real reg live ranges to consider */
   1126          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
   1127             break; /* next live range does not yet start */
   1128          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
   1129          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
   1130             Find the associated rreg_state entry. */
   1131          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
   1132             Real register live ranges are guaranteed to be well-formed
   1133             in that they start with a write to the register -- Stage 2
   1134             rejects any code not satisfying this.  So the correct
   1135             question to ask is whether
   1136             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
   1137             whether the reg becomes live after this insn -- rather
   1138             than before it. */
   1139 #        if DEBUG_REGALLOC
   1140          vex_printf("need to free up rreg: ");
   1141          (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
   1142          vex_printf("\n\n");
   1143 #        endif
   1144          for (k = 0; k < n_rregs; k++)
   1145             if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
   1146                break;
   1147          /* If this fails, we don't have an entry for this rreg.
   1148             Which we should. */
   1149          vassert(IS_VALID_RREGNO(k));
   1150          m = hregNumber(rreg_state[k].vreg);
   1151          if (rreg_state[k].disp == Bound) {
   1152             /* Yes, there is an associated vreg.  Spill it if it's
   1153                still live. */
   1154             vassert(IS_VALID_VREGNO(m));
   1155             vreg_state[m] = INVALID_RREG_NO;
   1156             if (vreg_lrs[m].dead_before > ii) {
   1157                vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1158                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
   1159                   HInstr* spill1 = NULL;
   1160                   HInstr* spill2 = NULL;
   1161                   (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
   1162                                vreg_lrs[m].spill_offset, mode64 );
   1163                   vassert(spill1 || spill2); /* can't both be NULL */
   1164                   if (spill1)
   1165                      EMIT_INSTR(spill1);
   1166                   if (spill2)
   1167                      EMIT_INSTR(spill2);
   1168                }
   1169                rreg_state[k].eq_spill_slot = True;
   1170             }
   1171          }
   1172          rreg_state[k].disp = Unavail;
   1173          rreg_state[k].vreg = INVALID_HREG;
   1174          rreg_state[k].eq_spill_slot = False;
   1175 
   1176          /* check for further rregs entering HLRs at this point */
   1177          rreg_lrs_la_next++;
   1178       }
   1179 
   1180 
   1181 #     if DEBUG_REGALLOC
   1182       vex_printf("After pre-insn actions for fixed regs:\n");
   1183       PRINT_STATE;
   1184       vex_printf("\n");
   1185 #     endif
   1186 
   1187 
   1188       /* ------ Deal with the current instruction. ------ */
   1189 
   1190       /* Finally we can begin the processing of this instruction
   1191          itself.  The aim is to free up enough rregs for this insn.
   1192          This may generate spill stores since we may have to evict
   1193          some vregs currently in rregs.  Also generates spill loads.
   1194          We also build up the final vreg->rreg mapping to be applied
   1195          to the insn. */
   1196 
   1197       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
   1198 
   1199       initHRegRemap(&remap);
   1200 
   1201       /* ------------ BEGIN directReload optimisation ----------- */
   1202 
   1203       /* If the instruction reads exactly one vreg which is currently
   1204          in a spill slot, and this is last use of that vreg, see if we
   1205          can convert the instruction into one reads directly from the
   1206          spill slot.  This is clearly only possible for x86 and amd64
   1207          targets, since ppc and arm load-store architectures.  If
   1208          successful, replace instrs_in->arr[ii] with this new
   1209          instruction, and recompute its reg usage, so that the change
   1210          is invisible to the standard-case handling that follows. */
   1211 
   1212       if (directReload && reg_usage.n_used <= 2) {
   1213          Bool  debug_direct_reload = True && False;
   1214          HReg  cand     = INVALID_HREG;
   1215          Bool  nreads   = 0;
   1216          Short spilloff = 0;
   1217 
   1218          for (j = 0; j < reg_usage.n_used; j++) {
   1219 
   1220             vreg = reg_usage.hreg[j];
   1221 
   1222             if (!hregIsVirtual(vreg))
   1223                continue;
   1224 
   1225             if (reg_usage.mode[j] == HRmRead) {
   1226                nreads++;
   1227                m = hregNumber(vreg);
   1228                vassert(IS_VALID_VREGNO(m));
   1229                k = vreg_state[m];
   1230                if (!IS_VALID_RREGNO(k)) {
   1231                   /* ok, it is spilled.  Now, is this its last use? */
   1232                   vassert(vreg_lrs[m].dead_before >= ii+1);
   1233                   if (vreg_lrs[m].dead_before == ii+1
   1234                       && cand == INVALID_HREG) {
   1235                      spilloff = vreg_lrs[m].spill_offset;
   1236                      cand = vreg;
   1237                   }
   1238                }
   1239             }
   1240          }
   1241 
   1242          if (nreads == 1 && cand != INVALID_HREG) {
   1243             HInstr* reloaded;
   1244             if (reg_usage.n_used == 2)
   1245                vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
   1246 
   1247             reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
   1248             if (debug_direct_reload && !reloaded) {
   1249                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
   1250                ppInstr(instrs_in->arr[ii], mode64);
   1251             }
   1252             if (reloaded) {
   1253                /* Update info about the insn, so it looks as if it had
   1254                   been in this form all along. */
   1255                instrs_in->arr[ii] = reloaded;
   1256                (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
   1257                if (debug_direct_reload && !reloaded) {
   1258                   vex_printf("  -->  ");
   1259                   ppInstr(reloaded, mode64);
   1260                }
   1261             }
   1262 
   1263             if (debug_direct_reload && !reloaded)
   1264                vex_printf("\n");
   1265          }
   1266 
   1267       }
   1268 
   1269       /* ------------ END directReload optimisation ------------ */
   1270 
   1271       /* for each reg mentioned in the insn ... */
   1272       for (j = 0; j < reg_usage.n_used; j++) {
   1273 
   1274          vreg = reg_usage.hreg[j];
   1275 
   1276          /* only interested in virtual registers right now. */
   1277          if (!hregIsVirtual(vreg))
   1278             continue;
   1279 
   1280 #        if 0
   1281          vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
   1282 #        endif
   1283 
   1284          /* Now we're trying to find a rreg for "vreg".  First of all,
   1285             if it already has an rreg assigned, we don't need to do
   1286             anything more.  Search the current state to find out. */
   1287          m = hregNumber(vreg);
   1288          vassert(IS_VALID_VREGNO(m));
   1289          k = vreg_state[m];
   1290          if (IS_VALID_RREGNO(k)) {
   1291             vassert(rreg_state[k].disp == Bound);
   1292             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
   1293             /* If this rreg is written or modified, mark it as different
   1294                from any spill slot value. */
   1295             if (reg_usage.mode[j] != HRmRead)
   1296                rreg_state[k].eq_spill_slot = False;
   1297             continue;
   1298          } else {
   1299             vassert(k == INVALID_RREG_NO);
   1300          }
   1301 
   1302          /* No luck.  The next thing to do is see if there is a
   1303             currently free rreg available, of the correct class.  If
   1304             so, bag it.  NOTE, we could improve this by selecting an
   1305             rreg for which the next live-range event is as far ahead
   1306             as possible. */
   1307          k_suboptimal = -1;
   1308          for (k = 0; k < n_rregs; k++) {
   1309             if (rreg_state[k].disp != Free
   1310                 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
   1311                continue;
   1312             if (rreg_state[k].has_hlrs) {
   1313                /* Well, at least we can use k_suboptimal if we really
   1314                   have to.  Keep on looking for a better candidate. */
   1315                k_suboptimal = k;
   1316             } else {
   1317                /* Found a preferable reg.  Use it. */
   1318                k_suboptimal = -1;
   1319                break;
   1320             }
   1321          }
   1322          if (k_suboptimal >= 0)
   1323             k = k_suboptimal;
   1324 
   1325          if (k < n_rregs) {
   1326             rreg_state[k].disp = Bound;
   1327             rreg_state[k].vreg = vreg;
   1328             m = hregNumber(vreg);
   1329             vassert(IS_VALID_VREGNO(m));
   1330             vreg_state[m] = toShort(k);
   1331             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
   1332             /* Generate a reload if needed.  This only creates needed
   1333                reloads because the live range builder for vregs will
   1334                guarantee that the first event for a vreg is a write.
   1335                Hence, if this reference is not a write, it cannot be
   1336                the first reference for this vreg, and so a reload is
   1337                indeed needed. */
   1338             if (reg_usage.mode[j] != HRmWrite) {
   1339                vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1340                HInstr* reload1 = NULL;
   1341                HInstr* reload2 = NULL;
   1342                (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
   1343                              vreg_lrs[m].spill_offset, mode64 );
   1344                vassert(reload1 || reload2); /* can't both be NULL */
   1345                if (reload1)
   1346                   EMIT_INSTR(reload1);
   1347                if (reload2)
   1348                   EMIT_INSTR(reload2);
   1349                /* This rreg is read or modified by the instruction.
   1350                   If it's merely read we can claim it now equals the
   1351                   spill slot, but not so if it is modified. */
   1352                if (reg_usage.mode[j] == HRmRead) {
   1353                   rreg_state[k].eq_spill_slot = True;
   1354                } else {
   1355                   vassert(reg_usage.mode[j] == HRmModify);
   1356                   rreg_state[k].eq_spill_slot = False;
   1357                }
   1358             } else {
   1359                rreg_state[k].eq_spill_slot = False;
   1360             }
   1361 
   1362             continue;
   1363          }
   1364 
   1365          /* Well, now we have no option but to spill a vreg.  It's
   1366             important to make a good choice of vreg to spill, and of
   1367             course we need to be careful not to spill a vreg which is
   1368             needed by this insn. */
   1369 
   1370          /* First, mark in the rreg_state, those rregs which are not spill
   1371             candidates, due to holding a vreg mentioned by this
   1372             instruction.  Or being of the wrong class. */
   1373          for (k = 0; k < n_rregs; k++) {
   1374             rreg_state[k].is_spill_cand = False;
   1375             if (rreg_state[k].disp != Bound)
   1376                continue;
   1377             if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
   1378                continue;
   1379             rreg_state[k].is_spill_cand = True;
   1380             for (m = 0; m < reg_usage.n_used; m++) {
   1381                if (rreg_state[k].vreg == reg_usage.hreg[m]) {
   1382                   rreg_state[k].is_spill_cand = False;
   1383                   break;
   1384                }
   1385             }
   1386          }
   1387 
   1388          /* We can choose to spill any rreg satisfying
   1389             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
   1390             the next use of its associated vreg is as far ahead as
   1391             possible, in the hope that this will minimise the number
   1392             of consequent reloads required. */
   1393          spillee
   1394             = findMostDistantlyMentionedVReg (
   1395                  getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
   1396 
   1397          if (spillee == -1) {
   1398             /* Hmmmmm.  There don't appear to be any spill candidates.
   1399                We're hosed. */
   1400             vex_printf("reg_alloc: can't find a register in class: ");
   1401             ppHRegClass(hregClass(vreg));
   1402             vex_printf("\n");
   1403             vpanic("reg_alloc: can't create a free register.");
   1404          }
   1405 
   1406          /* Right.  So we're going to spill rreg_state[spillee]. */
   1407          vassert(IS_VALID_RREGNO(spillee));
   1408          vassert(rreg_state[spillee].disp == Bound);
   1409          /* check it's the right class */
   1410          vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
   1411          /* check we're not ejecting the vreg for which we are trying
   1412             to free up a register. */
   1413          vassert(rreg_state[spillee].vreg != vreg);
   1414 
   1415          m = hregNumber(rreg_state[spillee].vreg);
   1416          vassert(IS_VALID_VREGNO(m));
   1417 
   1418          /* So here's the spill store.  Assert that we're spilling a
   1419             live vreg. */
   1420          vassert(vreg_lrs[m].dead_before > ii);
   1421          vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1422          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
   1423             HInstr* spill1 = NULL;
   1424             HInstr* spill2 = NULL;
   1425             (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
   1426                          vreg_lrs[m].spill_offset, mode64 );
   1427             vassert(spill1 || spill2); /* can't both be NULL */
   1428             if (spill1)
   1429                EMIT_INSTR(spill1);
   1430             if (spill2)
   1431                EMIT_INSTR(spill2);
   1432          }
   1433 
   1434          /* Update the rreg_state to reflect the new assignment for this
   1435             rreg. */
   1436          rreg_state[spillee].vreg = vreg;
   1437          vreg_state[m] = INVALID_RREG_NO;
   1438 
   1439          rreg_state[spillee].eq_spill_slot = False; /* be safe */
   1440 
   1441          m = hregNumber(vreg);
   1442          vassert(IS_VALID_VREGNO(m));
   1443          vreg_state[m] = toShort(spillee);
   1444 
   1445          /* Now, if this vreg is being read or modified (as opposed to
   1446             written), we have to generate a reload for it. */
   1447          if (reg_usage.mode[j] != HRmWrite) {
   1448             vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1449             HInstr* reload1 = NULL;
   1450             HInstr* reload2 = NULL;
   1451             (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
   1452                           vreg_lrs[m].spill_offset, mode64 );
   1453             vassert(reload1 || reload2); /* can't both be NULL */
   1454             if (reload1)
   1455                EMIT_INSTR(reload1);
   1456             if (reload2)
   1457                EMIT_INSTR(reload2);
   1458             /* This rreg is read or modified by the instruction.
   1459                If it's merely read we can claim it now equals the
   1460                spill slot, but not so if it is modified. */
   1461             if (reg_usage.mode[j] == HRmRead) {
   1462                rreg_state[spillee].eq_spill_slot = True;
   1463             } else {
   1464                vassert(reg_usage.mode[j] == HRmModify);
   1465                rreg_state[spillee].eq_spill_slot = False;
   1466             }
   1467          }
   1468 
   1469          /* So after much twisting and turning, we have vreg mapped to
   1470             rreg_state[spillee].rreg.  Note that in the map. */
   1471          addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
   1472 
   1473       } /* iterate over registers in this instruction. */
   1474 
   1475       /* We've finished clowning around with registers in this instruction.
   1476          Three results:
   1477          - the running rreg_state[] has been updated
   1478          - a suitable vreg->rreg mapping for this instruction has been
   1479            constructed
   1480          - spill and reload instructions may have been emitted.
   1481 
   1482         The final step is to apply the mapping to the instruction,
   1483         and emit that.
   1484       */
   1485 
   1486       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
   1487       (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
   1488       EMIT_INSTR( instrs_in->arr[ii] );
   1489 
   1490 #     if DEBUG_REGALLOC
   1491       vex_printf("After dealing with current insn:\n");
   1492       PRINT_STATE;
   1493       vex_printf("\n");
   1494 #     endif
   1495 
   1496       /* ------ Post-instruction actions for fixed rreg uses ------ */
   1497 
   1498       /* Now we need to check for rregs exiting fixed live ranges
   1499          after this instruction, and if so mark them as free. */
   1500       while (True) {
   1501          vassert(rreg_lrs_db_next >= 0);
   1502          vassert(rreg_lrs_db_next <= rreg_lrs_used);
   1503          if (rreg_lrs_db_next == rreg_lrs_used)
   1504             break; /* no more real reg live ranges to consider */
   1505          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
   1506             break; /* next live range does not yet start */
   1507          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
   1508          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
   1509             range.  Mark it as such in the main rreg_state array. */
   1510          for (k = 0; k < n_rregs; k++)
   1511             if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
   1512                break;
   1513          /* If this vassertion fails, we don't have an entry for
   1514             this rreg.  Which we should. */
   1515          vassert(k < n_rregs);
   1516          vassert(rreg_state[k].disp == Unavail);
   1517          rreg_state[k].disp = Free;
   1518          rreg_state[k].vreg = INVALID_HREG;
   1519          rreg_state[k].eq_spill_slot = False;
   1520 
   1521          /* check for further rregs leaving HLRs at this point */
   1522          rreg_lrs_db_next++;
   1523       }
   1524 
   1525 #     if DEBUG_REGALLOC
   1526       vex_printf("After post-insn actions for fixed regs:\n");
   1527       PRINT_STATE;
   1528       vex_printf("\n");
   1529 #     endif
   1530 
   1531    } /* iterate over insns */
   1532 
   1533    /* ------ END: Process each insn in turn. ------ */
   1534 
   1535    /* free(rreg_state); */
   1536    /* free(rreg_lrs); */
   1537    /* if (vreg_lrs) free(vreg_lrs); */
   1538 
   1539    /* Paranoia */
   1540    for (j = 0; j < n_rregs; j++)
   1541       vassert(rreg_state[j].rreg == available_real_regs[j]);
   1542 
   1543    vassert(rreg_lrs_la_next == rreg_lrs_used);
   1544    vassert(rreg_lrs_db_next == rreg_lrs_used);
   1545 
   1546    return instrs_out;
   1547 
   1548 #  undef INVALID_INSTRNO
   1549 #  undef EMIT_INSTR
   1550 #  undef PRINT_STATE
   1551 }
   1552 
   1553 
   1554 
   1555 /*---------------------------------------------------------------*/
   1556 /*---                                       host_reg_alloc2.c ---*/
   1557 /*---------------------------------------------------------------*/
   1558