Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_reg_alloc2.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 
     39 #include "main_util.h"
     40 #include "host_generic_regs.h"
     41 
     42 /* Set to 1 for lots of debugging output. */
     43 #define DEBUG_REGALLOC 0
     44 
     45 
     46 /* TODO 27 Oct 04:
     47 
     48    Better consistency checking from what isMove tells us.
     49 
     50    We can possibly do V-V coalescing even when the src is spilled,
     51    providing we can arrange for the dst to have the same spill slot.
     52 
     53    Note that state[].hreg is the same as the available real regs.
     54 
     55    Generally rationalise data structures.  */
     56 
     57 
     58 /* Records information on virtual register live ranges.  Computed once
     59    and remains unchanged after that. */
     60 typedef
     61    struct {
     62       /* Becomes live for the first time after this insn ... */
     63       Short live_after;
     64       /* Becomes dead for the last time before this insn ... */
     65       Short dead_before;
     66       /* The "home" spill slot, if needed.  Never changes. */
     67       Short spill_offset;
     68       Short spill_size;
     69       /* What kind of register this is. */
     70       HRegClass reg_class;
     71    }
     72    VRegLR;
     73 
     74 
     75 /* Records information on real-register live ranges.  Computed once
     76    and remains unchanged after that. */
     77 typedef
     78    struct {
     79       HReg rreg;
     80       /* Becomes live after this insn ... */
     81       Short live_after;
     82       /* Becomes dead before this insn ... */
     83       Short dead_before;
     84    }
     85    RRegLR;
     86 
     87 
     88 /* An array of the following structs (rreg_state) comprises the
     89    running state of the allocator.  It indicates what the current
     90    disposition of each allocatable real register is.  The array gets
     91    updated as the allocator processes instructions. */
     92 typedef
     93    struct {
     94       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
     95       /* Which rreg is this for? */
     96       HReg rreg;
     97       /* Is this involved in any HLRs?  (only an optimisation hint) */
     98       Bool has_hlrs;
     99       /* ------ FIELDS WHICH DO CHANGE ------ */
    100       /* 6 May 07: rearranged fields below so the whole struct fits
    101          into 16 bytes on both x86 and amd64. */
    102       /* Used when .disp == Bound and we are looking for vregs to
    103          spill. */
    104       Bool is_spill_cand;
    105       /* Optimisation: used when .disp == Bound.  Indicates when the
    106          rreg has the same value as the spill slot for the associated
    107          vreg.  Is safely left at False, and becomes True after a
    108          spill store or reload for this rreg. */
    109       Bool eq_spill_slot;
    110       /* What's it's current disposition? */
    111       enum { Free,     /* available for use */
    112              Unavail,  /* in a real-reg live range */
    113              Bound     /* in use (holding value of some vreg) */
    114            }
    115            disp;
    116       /* If .disp == Bound, what vreg is it bound to? */
    117       HReg vreg;
    118    }
    119    RRegState;
    120 
    121 
    122 /* The allocator also maintains a redundant array of indexes
    123    (vreg_state) from vreg numbers back to entries in rreg_state.  It
    124    is redundant because iff vreg_state[i] == j then
    125    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
    126    point at each other.  The purpose of this is to speed up activities
    127    which involve looking for a particular vreg: there is no need to
    128    scan the rreg_state looking for it, just index directly into
    129    vreg_state.  The FAQ "does this vreg already have an associated
    130    rreg" is the main beneficiary.
    131 
    132    To indicate, in vreg_state[i], that a given vreg is not currently
    133    associated with any rreg, that entry can be set to INVALID_RREG_NO.
    134 
    135    Because the vreg_state entries are signed Shorts, the max number
    136    of vregs that can be handed by regalloc is 32767.
    137 */
    138 
    139 #define INVALID_RREG_NO ((Short)(-1))
    140 
    141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
    142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
    143 
    144 
    145 /* Does this instruction mention a particular reg? */
    146 static Bool instrMentionsReg (
    147    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
    148    HInstr* instr,
    149    HReg r,
    150    Bool mode64
    151 )
    152 {
    153    Int       i;
    154    HRegUsage reg_usage;
    155    (*getRegUsage)(&reg_usage, instr, mode64);
    156    for (i = 0; i < reg_usage.n_used; i++)
    157       if (sameHReg(reg_usage.hreg[i], r))
    158          return True;
    159    return False;
    160 }
    161 
    162 
    163 /* Search forward from some given point in the incoming instruction
    164    sequence.  Point is to select a virtual register to spill, by
    165    finding the vreg which is mentioned as far ahead as possible, in
    166    the hope that this will minimise the number of consequent reloads.
    167 
    168    Only do the search for vregs which are Bound in the running state,
    169    and for which the .is_spill_cand field is set.  This allows the
    170    caller to arbitrarily restrict the set of spill candidates to be
    171    considered.
    172 
    173    Returns an index into the state array indicating the (v,r) pair to
    174    spill, or -1 if none was found.  */
    175 static
    176 Int findMostDistantlyMentionedVReg (
    177    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
    178    HInstrArray* instrs_in,
    179    Int          search_from_instr,
    180    RRegState*   state,
    181    Int          n_state,
    182    Bool         mode64
    183 )
    184 {
    185    Int k, m;
    186    Int furthest_k = -1;
    187    Int furthest   = -1;
    188    vassert(search_from_instr >= 0);
    189    for (k = 0; k < n_state; k++) {
    190       if (!state[k].is_spill_cand)
    191          continue;
    192       vassert(state[k].disp == Bound);
    193       for (m = search_from_instr; m < instrs_in->arr_used; m++) {
    194          if (instrMentionsReg(getRegUsage,
    195                               instrs_in->arr[m], state[k].vreg, mode64))
    196             break;
    197       }
    198       if (m > furthest) {
    199          furthest   = m;
    200          furthest_k = k;
    201       }
    202    }
    203    return furthest_k;
    204 }
    205 
    206 
    207 /* Check that this vreg has been assigned a sane spill offset. */
    208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
    209 {
    210    switch (vreg->reg_class) {
    211       case HRcVec128: case HRcFlt64:
    212          vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
    213       default:
    214          vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
    215    }
    216 }
    217 
    218 
    219 /* Double the size of the real-reg live-range array, if needed. */
    220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
    221 {
    222    Int     k;
    223    RRegLR* arr2;
    224    if (used < *size) return;
    225    if (0)
    226       vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
    227    vassert(used == *size);
    228    arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
    229    for (k = 0; k < *size; k++)
    230       arr2[k] = (*info)[k];
    231    *size *= 2;
    232    *info = arr2;
    233 }
    234 
    235 
    236 /* Sort an array of RRegLR entries by either the .live_after or
    237    .dead_before fields.  This is performance-critical. */
    238 static void sortRRLRarray ( RRegLR* arr,
    239                             Int size, Bool by_live_after )
    240 {
    241    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    242                        9841, 29524, 88573, 265720,
    243                        797161, 2391484 };
    244    Int    lo = 0;
    245    Int    hi = size-1;
    246    Int    i, j, h, bigN, hp;
    247    RRegLR v;
    248 
    249    vassert(size >= 0);
    250    if (size == 0)
    251       return;
    252 
    253    bigN = hi - lo + 1; if (bigN < 2) return;
    254    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
    255 
    256    if (by_live_after) {
    257 
    258       for ( ; hp >= 0; hp--) {
    259          h = incs[hp];
    260          for (i = lo + h; i <= hi; i++) {
    261             v = arr[i];
    262             j = i;
    263             while (arr[j-h].live_after > v.live_after) {
    264                arr[j] = arr[j-h];
    265                j = j - h;
    266                if (j <= (lo + h - 1)) break;
    267             }
    268             arr[j] = v;
    269          }
    270       }
    271 
    272    } else {
    273 
    274       for ( ; hp >= 0; hp--) {
    275          h = incs[hp];
    276          for (i = lo + h; i <= hi; i++) {
    277             v = arr[i];
    278             j = i;
    279             while (arr[j-h].dead_before > v.dead_before) {
    280                arr[j] = arr[j-h];
    281                j = j - h;
    282                if (j <= (lo + h - 1)) break;
    283             }
    284             arr[j] = v;
    285          }
    286       }
    287 
    288    }
    289 }
    290 
    291 
    292 /* A target-independent register allocator.  Requires various
    293    functions which it uses to deal abstractly with instructions and
    294    registers, since it cannot have any target-specific knowledge.
    295 
    296    Returns a new list of instructions, which, as a result of the
    297    behaviour of mapRegs, will be in-place modifications of the
    298    original instructions.
    299 
    300    Requires that the incoming code has been generated using
    301    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
    302    that range is a checked run-time error.
    303 
    304    Takes an expandable array of pointers to unallocated insns.
    305    Returns an expandable array of pointers to allocated insns.
    306 */
    307 HInstrArray* doRegisterAllocation (
    308 
    309    /* Incoming virtual-registerised code. */
    310    HInstrArray* instrs_in,
    311 
    312    /* An array listing all the real registers the allocator may use,
    313       in no particular order. */
    314    HReg* available_real_regs,
    315    Int   n_available_real_regs,
    316 
    317    /* Return True iff the given insn is a reg-reg move, in which
    318       case also return the src and dst regs. */
    319    Bool (*isMove) ( HInstr*, HReg*, HReg* ),
    320 
    321    /* Get info about register usage in this insn. */
    322    void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
    323 
    324    /* Apply a reg-reg mapping to an insn. */
    325    void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
    326 
    327    /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
    328       real reg to a spill slot byte offset.  The two leading HInstr**
    329       args are out parameters, through which the generated insns are
    330       returned.  Also (optionally) a 'directReload' function, which
    331       attempts to replace a given instruction by one which reads
    332       directly from a specified spill slot.  May be NULL, in which
    333       case the optimisation is not attempted. */
    334    void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
    335    void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
    336    HInstr* (*directReload) ( HInstr*, HReg, Short ),
    337    Int     guest_sizeB,
    338 
    339    /* For debug printing only. */
    340    void (*ppInstr) ( HInstr*, Bool ),
    341    void (*ppReg) ( HReg ),
    342 
    343    /* 32/64bit mode */
    344    Bool mode64
    345 )
    346 {
    347 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
    348 
    349    const Bool eq_spill_opt = True;
    350 
    351    /* Iterators and temporaries. */
    352    Int       ii, j, k, m, spillee, k_suboptimal;
    353    HReg      rreg, vreg, vregS, vregD;
    354    HRegUsage reg_usage;
    355 
    356    /* Info on vregs and rregs.  Computed once and remains
    357       unchanged. */
    358    Int     n_vregs;
    359    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
    360 
    361    /* We keep two copies of the real-reg live range info, one sorted
    362       by .live_after and the other by .dead_before.  First the
    363       unsorted info is created in the _la variant is copied into the
    364       _db variant.  Once that's done both of them are sorted.
    365       We also need two integer cursors which record the next
    366       location in the two arrays to consider. */
    367    RRegLR* rreg_lrs_la;
    368    RRegLR* rreg_lrs_db;
    369    Int     rreg_lrs_size;
    370    Int     rreg_lrs_used;
    371    Int     rreg_lrs_la_next;
    372    Int     rreg_lrs_db_next;
    373 
    374    /* Used when constructing vreg_lrs (for allocating stack
    375       slots). */
    376    Int ss_busy_until_before[N_SPILL64S];
    377 
    378    /* Used when constructing rreg_lrs. */
    379    Int* rreg_live_after;
    380    Int* rreg_dead_before;
    381 
    382    /* Running state of the core allocation algorithm. */
    383    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
    384    Int        n_rregs;
    385 
    386    /* .. and the redundant backward map */
    387    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
    388       This inplies n_rregs must be <= 32768. */
    389    Short*     vreg_state;  /* [0 .. n_vregs-1] */
    390 
    391    /* The vreg -> rreg map constructed and then applied to each
    392       instr. */
    393    HRegRemap remap;
    394 
    395    /* The output array of instructions. */
    396    HInstrArray* instrs_out;
    397 
    398    /* Sanity checks are expensive.  They are only done periodically,
    399       not at each insn processed. */
    400    Bool do_sanity_check;
    401 
    402    vassert(0 == (guest_sizeB % 16));
    403    vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
    404    vassert(0 == (N_SPILL64S % 2));
    405 
    406    /* The live range numbers are signed shorts, and so limiting the
    407       number of insns to 15000 comfortably guards against them
    408       overflowing 32k. */
    409    vassert(instrs_in->arr_used <= 15000);
    410 
    411 #  define INVALID_INSTRNO (-2)
    412 
    413 #  define EMIT_INSTR(_instr)                  \
    414       do {                                    \
    415         HInstr* _tmp = (_instr);              \
    416         if (DEBUG_REGALLOC) {                 \
    417            vex_printf("**  ");                \
    418            (*ppInstr)(_tmp, mode64);          \
    419            vex_printf("\n\n");                \
    420         }                                     \
    421         addHInstr ( instrs_out, _tmp );       \
    422       } while (0)
    423 
    424 #   define PRINT_STATE						   \
    425       do {							   \
    426          Int z, q;						   \
    427          for (z = 0; z < n_rregs; z++) {			   \
    428             vex_printf("  rreg_state[%2d] = ", z);		   \
    429             (*ppReg)(rreg_state[z].rreg);	       		   \
    430             vex_printf("  \t");					   \
    431             switch (rreg_state[z].disp) {			   \
    432                case Free:    vex_printf("Free\n"); break;	   \
    433                case Unavail: vex_printf("Unavail\n"); break;	   \
    434                case Bound:   vex_printf("BoundTo "); 		   \
    435                              (*ppReg)(rreg_state[z].vreg);	   \
    436                              vex_printf("\n"); break;		   \
    437             }							   \
    438          }							   \
    439          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
    440          q = 0;                                                    \
    441          for (z = 0; z < n_vregs; z++) {                           \
    442             if (vreg_state[z] == INVALID_RREG_NO)                  \
    443                continue;                                           \
    444             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
    445             q++;                                                   \
    446             if (q > 0 && (q % 6) == 0)                             \
    447                vex_printf("\n    ");                               \
    448          }                                                         \
    449          vex_printf("\n");                                         \
    450       } while (0)
    451 
    452 
    453    /* --------- Stage 0: set up output array --------- */
    454    /* --------- and allocate/initialise running state. --------- */
    455 
    456    instrs_out = newHInstrArray();
    457 
    458    /* ... and initialise running state. */
    459    /* n_rregs is no more than a short name for n_available_real_regs. */
    460    n_rregs = n_available_real_regs;
    461    n_vregs = instrs_in->n_vregs;
    462 
    463    /* If this is not so, vreg_state entries will overflow. */
    464    vassert(n_vregs < 32767);
    465 
    466    rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
    467    vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
    468 
    469    for (j = 0; j < n_rregs; j++) {
    470       rreg_state[j].rreg          = available_real_regs[j];
    471       rreg_state[j].has_hlrs      = False;
    472       rreg_state[j].disp          = Free;
    473       rreg_state[j].vreg          = INVALID_HREG;
    474       rreg_state[j].is_spill_cand = False;
    475       rreg_state[j].eq_spill_slot = False;
    476    }
    477 
    478    for (j = 0; j < n_vregs; j++)
    479       vreg_state[j] = INVALID_RREG_NO;
    480 
    481 
    482    /* --------- Stage 1: compute vreg live ranges. --------- */
    483    /* --------- Stage 2: compute rreg live ranges. --------- */
    484 
    485    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    486 
    487    /* This is relatively simple, because (1) we only seek the complete
    488       end-to-end live range of each vreg, and are not interested in
    489       any holes in it, and (2) the vregs are conveniently numbered 0
    490       .. n_vregs-1, so we can just dump the results in a
    491       pre-allocated array. */
    492 
    493    vreg_lrs = NULL;
    494    if (n_vregs > 0)
    495       vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
    496 
    497    for (j = 0; j < n_vregs; j++) {
    498       vreg_lrs[j].live_after     = INVALID_INSTRNO;
    499       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
    500       vreg_lrs[j].spill_offset   = 0;
    501       vreg_lrs[j].spill_size     = 0;
    502       vreg_lrs[j].reg_class      = HRcINVALID;
    503    }
    504 
    505    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    506 
    507    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    508 
    509    /* This is more complex than Stage 1, because we need to compute
    510       exactly all the live ranges of all the allocatable real regs,
    511       and we don't know in advance how many there will be. */
    512 
    513    rreg_lrs_used = 0;
    514    rreg_lrs_size = 4;
    515    rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
    516    rreg_lrs_db = NULL; /* we'll create this later */
    517 
    518    /* We'll need to track live range start/end points seperately for
    519       each rreg.  Sigh. */
    520    vassert(n_available_real_regs > 0);
    521    rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
    522    rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
    523 
    524    for (j = 0; j < n_available_real_regs; j++) {
    525       rreg_live_after[j] =
    526       rreg_dead_before[j] = INVALID_INSTRNO;
    527    }
    528 
    529    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    530 
    531    /* ------ start of ITERATE OVER INSNS ------ */
    532 
    533    for (ii = 0; ii < instrs_in->arr_used; ii++) {
    534 
    535       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
    536 
    537 #     if 0
    538       vex_printf("\n%d  stage1: ", ii);
    539       (*ppInstr)(instrs_in->arr[ii], mode64);
    540       vex_printf("\n");
    541       ppHRegUsage(&reg_usage);
    542 #     endif
    543 
    544       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
    545 
    546       /* for each reg mentioned in the insn ... */
    547       for (j = 0; j < reg_usage.n_used; j++) {
    548 
    549          vreg = reg_usage.hreg[j];
    550          /* only interested in virtual registers right now. */
    551          if (!hregIsVirtual(vreg))
    552             continue;
    553          k = hregNumber(vreg);
    554          if (k < 0 || k >= n_vregs) {
    555             vex_printf("\n");
    556             (*ppInstr)(instrs_in->arr[ii], mode64);
    557             vex_printf("\n");
    558             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
    559             vpanic("doRegisterAllocation: out-of-range vreg");
    560          }
    561 
    562          /* Take the opportunity to note its regclass.  We'll need
    563             that when allocating spill slots. */
    564          if (vreg_lrs[k].reg_class == HRcINVALID) {
    565             /* First mention of this vreg. */
    566             vreg_lrs[k].reg_class = hregClass(vreg);
    567          } else {
    568             /* Seen it before, so check for consistency. */
    569             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
    570          }
    571 
    572          /* Now consider live ranges. */
    573          switch (reg_usage.mode[j]) {
    574             case HRmRead:
    575                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    576                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    577                   vpanic("doRegisterAllocation: "
    578                          "first event for vreg is Read");
    579                }
    580                vreg_lrs[k].dead_before = toShort(ii + 1);
    581                break;
    582             case HRmWrite:
    583                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
    584                   vreg_lrs[k].live_after = toShort(ii);
    585                vreg_lrs[k].dead_before = toShort(ii + 1);
    586                break;
    587             case HRmModify:
    588                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    589                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    590                   vpanic("doRegisterAllocation: "
    591                          "first event for vreg is Modify");
    592                }
    593                vreg_lrs[k].dead_before = toShort(ii + 1);
    594                break;
    595             default:
    596                vpanic("doRegisterAllocation(1)");
    597          } /* switch */
    598 
    599       } /* iterate over registers */
    600 
    601       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
    602 
    603       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
    604 
    605       /* for each reg mentioned in the insn ... */
    606       for (j = 0; j < reg_usage.n_used; j++) {
    607 
    608          /* Dummy initialisations of flush_la and flush_db to avoid
    609             possible bogus uninit-var warnings from gcc. */
    610          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
    611          Bool flush;
    612 
    613          rreg = reg_usage.hreg[j];
    614 
    615          /* only interested in real registers right now. */
    616          if (hregIsVirtual(rreg))
    617             continue;
    618 
    619          /* Furthermore, we're not interested in this rreg unless it's
    620             one of the allocatable ones.  For example, it could be a
    621             stack pointer register, or some other register beyond our
    622             control, in which case we should just ignore it. */
    623          for (k = 0; k < n_available_real_regs; k++)
    624             if (sameHReg(available_real_regs[k], rreg))
    625                break;
    626          if (k == n_available_real_regs)
    627             continue; /* not found -- ignore. */
    628          flush = False;
    629          switch (reg_usage.mode[j]) {
    630             case HRmWrite:
    631                flush_la = rreg_live_after[k];
    632                flush_db = rreg_dead_before[k];
    633                if (flush_la != INVALID_INSTRNO
    634                    && flush_db != INVALID_INSTRNO)
    635                   flush = True;
    636                rreg_live_after[k]  = ii;
    637                rreg_dead_before[k] = ii+1;
    638                break;
    639             case HRmRead:
    640                if (rreg_live_after[k] == INVALID_INSTRNO) {
    641                   vex_printf("\nOFFENDING RREG = ");
    642                   (*ppReg)(available_real_regs[k]);
    643                   vex_printf("\n");
    644                   vex_printf("\nOFFENDING instr = ");
    645                   (*ppInstr)(instrs_in->arr[ii], mode64);
    646                   vex_printf("\n");
    647                   vpanic("doRegisterAllocation: "
    648                          "first event for rreg is Read");
    649                }
    650                rreg_dead_before[k] = ii+1;
    651                break;
    652             case HRmModify:
    653                if (rreg_live_after[k] == INVALID_INSTRNO) {
    654                   vex_printf("\nOFFENDING RREG = ");
    655                   (*ppReg)(available_real_regs[k]);
    656                   vex_printf("\n");
    657                   vex_printf("\nOFFENDING instr = ");
    658                   (*ppInstr)(instrs_in->arr[ii], mode64);
    659                   vex_printf("\n");
    660                   vpanic("doRegisterAllocation: "
    661                          "first event for rreg is Modify");
    662                }
    663                rreg_dead_before[k] = ii+1;
    664                break;
    665             default:
    666                vpanic("doRegisterAllocation(2)");
    667          }
    668 
    669          if (flush) {
    670             vassert(flush_la != INVALID_INSTRNO);
    671             vassert(flush_db != INVALID_INSTRNO);
    672             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    673             if (0)
    674                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
    675             rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
    676             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
    677             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
    678             rreg_lrs_used++;
    679          }
    680 
    681       } /* iterate over regs in the instr */
    682 
    683       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
    684 
    685    } /* iterate over insns */
    686 
    687    /* ------ end of ITERATE OVER INSNS ------ */
    688 
    689    /* ------ start of FINALISE RREG LIVE RANGES ------ */
    690 
    691    /* Now finish up any live ranges left over. */
    692    for (j = 0; j < n_available_real_regs; j++) {
    693 
    694 #     if 0
    695       vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
    696                                              rreg_dead_before[j]);
    697 #     endif
    698       vassert( (rreg_live_after[j] == INVALID_INSTRNO
    699                && rreg_dead_before[j] == INVALID_INSTRNO)
    700               ||
    701               (rreg_live_after[j] != INVALID_INSTRNO
    702                && rreg_dead_before[j] != INVALID_INSTRNO)
    703             );
    704 
    705       if (rreg_live_after[j] == INVALID_INSTRNO)
    706          continue;
    707 
    708       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    709       if (0)
    710          vex_printf("FLUSH 2 (%d,%d)\n",
    711                     rreg_live_after[j], rreg_dead_before[j]);
    712       rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
    713       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
    714       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
    715       rreg_lrs_used++;
    716    }
    717 
    718    /* Compute summary hints for choosing real regs.  If a real reg is
    719       involved in a hard live range, record that fact in the fixed
    720       part of the running rreg_state.  Later, when offered a choice between
    721       rregs, it's better to choose one which is not marked as having
    722       any HLRs, since ones with HLRs may need to be spilled around
    723       their HLRs.  Correctness of final assignment is unaffected by
    724       this mechanism -- it is only an optimisation. */
    725 
    726    for (j = 0; j < rreg_lrs_used; j++) {
    727       rreg = rreg_lrs_la[j].rreg;
    728       vassert(!hregIsVirtual(rreg));
    729       /* rreg is involved in a HLR.  Record this info in the array, if
    730          there is space. */
    731       for (k = 0; k < n_rregs; k++)
    732          if (sameHReg(rreg_state[k].rreg, rreg))
    733             break;
    734       vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
    735       rreg_state[k].has_hlrs = True;
    736    }
    737    if (0) {
    738       for (j = 0; j < n_rregs; j++) {
    739          if (!rreg_state[j].has_hlrs)
    740             continue;
    741          ppReg(rreg_state[j].rreg);
    742          vex_printf(" hinted\n");
    743       }
    744    }
    745 
    746    /* Finally, copy the _la variant into the _db variant and
    747       sort both by their respective fields. */
    748    rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
    749    for (j = 0; j < rreg_lrs_used; j++)
    750       rreg_lrs_db[j] = rreg_lrs_la[j];
    751 
    752    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
    753    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
    754 
    755    /* And set up the cursors. */
    756    rreg_lrs_la_next = 0;
    757    rreg_lrs_db_next = 0;
    758 
    759    for (j = 1; j < rreg_lrs_used; j++) {
    760       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
    761       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
    762    }
    763 
    764    /* ------ end of FINALISE RREG LIVE RANGES ------ */
    765 
    766 #  if DEBUG_REGALLOC
    767    for (j = 0; j < n_vregs; j++) {
    768       vex_printf("vreg %d:  la = %d,  db = %d\n",
    769                  j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
    770    }
    771 #  endif
    772 
    773 #  if DEBUG_REGALLOC
    774    vex_printf("RRegLRs by LA:\n");
    775    for (j = 0; j < rreg_lrs_used; j++) {
    776       vex_printf("  ");
    777       (*ppReg)(rreg_lrs_la[j].rreg);
    778       vex_printf("      la = %d,  db = %d\n",
    779                  rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
    780    }
    781    vex_printf("RRegLRs by DB:\n");
    782    for (j = 0; j < rreg_lrs_used; j++) {
    783       vex_printf("  ");
    784       (*ppReg)(rreg_lrs_db[j].rreg);
    785       vex_printf("      la = %d,  db = %d\n",
    786                  rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
    787    }
    788 #  endif
    789 
    790    /* --------- Stage 3: allocate spill slots. --------- */
    791 
    792    /* Each spill slot is 8 bytes long.  For vregs which take more than
    793       64 bits to spill (classes Flt64 and Vec128), we have to allocate
    794       two consecutive spill slots.  For 256 bit registers (class
    795       Vec256), we have to allocate four consecutive spill slots.
    796 
    797       For Vec128-class on PowerPC, the spill slot's actual address
    798       must be 16-byte aligned.  Since the spill slot's address is
    799       computed as an offset from the guest state pointer, and since
    800       the user of the generated code must set that pointer to a
    801       32-aligned value, we have the residual obligation here of
    802       choosing a 16-aligned spill slot offset for Vec128-class values.
    803       Since each spill slot is 8 bytes long, that means for
    804       Vec128-class values we must allocated a spill slot number which
    805       is zero mod 2.
    806 
    807       Similarly, for Vec256 calss on amd64, find a spill slot number
    808       which is zero mod 4.  This guarantees it will be 32 byte
    809       aligned, which isn't actually necessary on amd64 (we use movUpd
    810       etc to spill), but seems like good practice.
    811 
    812       Do a rank-based allocation of vregs to spill slot numbers.  We
    813       put as few values as possible in spill slots, but nevertheless
    814       need to have a spill slot available for all vregs, just in case.
    815    */
    816    /* max_ss_no = -1; */
    817 
    818    for (j = 0; j < N_SPILL64S; j++)
    819       ss_busy_until_before[j] = 0;
    820 
    821    for (j = 0; j < n_vregs; j++) {
    822 
    823       /* True iff this vreg is unused.  In which case we also expect
    824          that the reg_class field for it has not been set.  */
    825       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
    826          vassert(vreg_lrs[j].reg_class == HRcINVALID);
    827          continue;
    828       }
    829 
    830       /* The spill slots are 64 bits in size.  As per the comment on
    831          definition of HRegClass in host_generic_regs.h, that means,
    832          to spill a vreg of class Flt64 or Vec128, we'll need to find
    833          two adjacent spill slots to use.  For Vec256, we'll need to
    834          find four adjacent slots to use.  Note, this logic needs to
    835          kept in sync with the size info on the definition of
    836          HRegClass. */
    837       switch (vreg_lrs[j].reg_class) {
    838 
    839          case HRcVec128: case HRcFlt64:
    840             /* Find two adjacent free slots in which between them
    841                provide up to 128 bits in which to spill the vreg.
    842                Since we are trying to find an even:odd pair, move
    843                along in steps of 2 (slots). */
    844             for (k = 0; k < N_SPILL64S-1; k += 2)
    845                if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
    846                    && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
    847                   break;
    848             if (k >= N_SPILL64S-1) {
    849                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    850                       "Increase and recompile.");
    851             }
    852             if (0) vex_printf("16-byte spill offset in spill slot %d\n",
    853                               (Int)k);
    854             ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
    855             ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
    856             break;
    857 
    858          default:
    859             /* The ordinary case -- just find a single spill slot. */
    860             /* Find the lowest-numbered spill slot which is available
    861                at the start point of this interval, and assign the
    862                interval to it. */
    863             for (k = 0; k < N_SPILL64S; k++)
    864                if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
    865                   break;
    866             if (k == N_SPILL64S) {
    867                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    868                       "Increase and recompile.");
    869             }
    870             ss_busy_until_before[k] = vreg_lrs[j].dead_before;
    871             break;
    872 
    873       } /* switch (vreg_lrs[j].reg_class) { */
    874 
    875       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
    876          layout: the guest state, then two equal sized areas following
    877          it for two sets of shadow state, and then the spill area. */
    878       vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
    879 
    880       /* Independent check that we've made a sane choice of slot */
    881       sanity_check_spill_offset( &vreg_lrs[j] );
    882       /* if (j > max_ss_no) */
    883       /*    max_ss_no = j; */
    884    }
    885 
    886 #  if 0
    887    vex_printf("\n\n");
    888    for (j = 0; j < n_vregs; j++)
    889       vex_printf("vreg %d    --> spill offset %d\n",
    890                  j, vreg_lrs[j].spill_offset);
    891 #  endif
    892 
    893    /* --------- Stage 4: establish rreg preferences --------- */
    894 
    895    /* It may be advantageous to allocating certain vregs to specific
    896       rregs, as a way of avoiding reg-reg moves later.  Here we
    897       establish which, if any, rreg each vreg would prefer to be in.
    898       Note that this constrains the allocator -- ideally we end up
    899       with as few as possible vregs expressing a preference.
    900 
    901       This is an optimisation: if the .preferred_rreg field is never
    902       set to anything different from INVALID_HREG, the allocator still
    903       works. */
    904 
    905    /* 30 Dec 04: removed this mechanism as it does not seem to
    906       help. */
    907 
    908    /* --------- Stage 5: process instructions --------- */
    909 
    910    /* This is the main loop of the allocator.  First, we need to
    911       correctly set up our running state, which tracks the status of
    912       each real register. */
    913 
    914    /* ------ BEGIN: Process each insn in turn. ------ */
    915 
    916    for (ii = 0; ii < instrs_in->arr_used; ii++) {
    917 
    918 #     if DEBUG_REGALLOC
    919       vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
    920       vex_printf("---- ");
    921       (*ppInstr)(instrs_in->arr[ii], mode64);
    922       vex_printf("\n\nInitial state:\n");
    923       PRINT_STATE;
    924       vex_printf("\n");
    925 #     endif
    926 
    927       /* ------------ Sanity checks ------------ */
    928 
    929       /* Sanity checks are expensive.  So they are done only once
    930          every 7 instructions, and just before the last
    931          instruction. */
    932       do_sanity_check
    933          = toBool(
    934               False  /* Set to True for sanity checking of all insns. */
    935               || ii == instrs_in->arr_used-1
    936               || (ii > 0 && (ii % 7) == 0)
    937            );
    938 
    939       if (do_sanity_check) {
    940 
    941          /* Sanity check 1: all rregs with a hard live range crossing
    942             this insn must be marked as unavailable in the running
    943             state. */
    944          for (j = 0; j < rreg_lrs_used; j++) {
    945             if (rreg_lrs_la[j].live_after < ii
    946                 && ii < rreg_lrs_la[j].dead_before) {
    947                /* ii is the middle of a hard live range for some real
    948                   reg.  Check it's marked as such in the running
    949                   state. */
    950 
    951 #              if 0
    952                vex_printf("considering la %d .. db %d   reg = ",
    953                           rreg_lrs[j].live_after,
    954                           rreg_lrs[j].dead_before);
    955                (*ppReg)(rreg_lrs[j].rreg);
    956                vex_printf("\n");
    957 #              endif
    958 
    959                /* find the state entry for this rreg */
    960                for (k = 0; k < n_rregs; k++)
    961                   if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[j].rreg))
    962                      break;
    963 
    964                /* and assert that this rreg is marked as unavailable */
    965                vassert(rreg_state[k].disp == Unavail);
    966             }
    967          }
    968 
    969          /* Sanity check 2: conversely, all rregs marked as
    970             unavailable in the running rreg_state must have a
    971             corresponding hard live range entry in the rreg_lrs
    972             array. */
    973          for (j = 0; j < n_available_real_regs; j++) {
    974             vassert(rreg_state[j].disp == Bound
    975                     || rreg_state[j].disp == Free
    976                     || rreg_state[j].disp == Unavail);
    977             if (rreg_state[j].disp != Unavail)
    978                continue;
    979             for (k = 0; k < rreg_lrs_used; k++)
    980                if (sameHReg(rreg_lrs_la[k].rreg, rreg_state[j].rreg)
    981                    && rreg_lrs_la[k].live_after < ii
    982                    && ii < rreg_lrs_la[k].dead_before)
    983                   break;
    984             /* If this vassertion fails, we couldn't find a
    985                corresponding HLR. */
    986             vassert(k < rreg_lrs_used);
    987          }
    988 
    989          /* Sanity check 3: all vreg-rreg bindings must bind registers
    990             of the same class. */
    991          for (j = 0; j < n_rregs; j++) {
    992             if (rreg_state[j].disp != Bound) {
    993                vassert(rreg_state[j].eq_spill_slot == False);
    994                continue;
    995             }
    996             vassert(hregClass(rreg_state[j].rreg)
    997                     == hregClass(rreg_state[j].vreg));
    998             vassert( hregIsVirtual(rreg_state[j].vreg));
    999             vassert(!hregIsVirtual(rreg_state[j].rreg));
   1000          }
   1001 
   1002          /* Sanity check 4: the vreg_state and rreg_state
   1003             mutually-redundant mappings are consistent.  If
   1004             rreg_state[j].vreg points at some vreg_state entry then
   1005             that vreg_state entry should point back at
   1006             rreg_state[j]. */
   1007          for (j = 0; j < n_rregs; j++) {
   1008             if (rreg_state[j].disp != Bound)
   1009                continue;
   1010             k = hregNumber(rreg_state[j].vreg);
   1011             vassert(IS_VALID_VREGNO(k));
   1012             vassert(vreg_state[k] == j);
   1013          }
   1014          for (j = 0; j < n_vregs; j++) {
   1015             k = vreg_state[j];
   1016             if (k == INVALID_RREG_NO)
   1017                continue;
   1018             vassert(IS_VALID_RREGNO(k));
   1019             vassert(rreg_state[k].disp == Bound);
   1020             vassert(hregNumber(rreg_state[k].vreg) == j);
   1021          }
   1022 
   1023       } /* if (do_sanity_check) */
   1024 
   1025       /* ------------ end of Sanity checks ------------ */
   1026 
   1027       /* Do various optimisations pertaining to register coalescing
   1028          and preferencing:
   1029             MOV  v <-> v   coalescing (done here).
   1030             MOV  v <-> r   coalescing (not yet, if ever)
   1031       */
   1032       /* If doing a reg-reg move between two vregs, and the src's live
   1033          range ends here and the dst's live range starts here, bind
   1034          the dst to the src's rreg, and that's all. */
   1035       if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
   1036          if (!hregIsVirtual(vregS)) goto cannot_coalesce;
   1037          if (!hregIsVirtual(vregD)) goto cannot_coalesce;
   1038          /* Check that *isMove is not telling us a bunch of lies ... */
   1039          vassert(hregClass(vregS) == hregClass(vregD));
   1040          k = hregNumber(vregS);
   1041          m = hregNumber(vregD);
   1042          vassert(IS_VALID_VREGNO(k));
   1043          vassert(IS_VALID_VREGNO(m));
   1044          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
   1045          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
   1046 #        if DEBUG_REGALLOC
   1047          vex_printf("COALESCE ");
   1048          (*ppReg)(vregS);
   1049          vex_printf(" -> ");
   1050          (*ppReg)(vregD);
   1051          vex_printf("\n\n");
   1052 #        endif
   1053          /* Find the state entry for vregS. */
   1054          for (m = 0; m < n_rregs; m++)
   1055             if (rreg_state[m].disp == Bound
   1056                 && sameHReg(rreg_state[m].vreg, vregS))
   1057                break;
   1058          if (m == n_rregs)
   1059             /* We failed to find a binding for vregS, which means it's
   1060                currently not in a register.  So we can't do the
   1061                coalescing.  Give up. */
   1062             goto cannot_coalesce;
   1063 
   1064          /* Finally, we can do the coalescing.  It's trivial -- merely
   1065             claim vregS's register for vregD. */
   1066          rreg_state[m].vreg = vregD;
   1067          vassert(IS_VALID_VREGNO(hregNumber(vregD)));
   1068          vassert(IS_VALID_VREGNO(hregNumber(vregS)));
   1069          vreg_state[hregNumber(vregD)] = toShort(m);
   1070          vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
   1071 
   1072          /* This rreg has become associated with a different vreg and
   1073             hence with a different spill slot.  Play safe. */
   1074          rreg_state[m].eq_spill_slot = False;
   1075 
   1076          /* Move on to the next insn.  We skip the post-insn stuff for
   1077             fixed registers, since this move should not interact with
   1078             them in any way. */
   1079          continue;
   1080       }
   1081      cannot_coalesce:
   1082 
   1083       /* ------ Free up rregs bound to dead vregs ------ */
   1084 
   1085       /* Look for vregs whose live range has just ended, and
   1086 	 mark the associated rreg as free. */
   1087 
   1088       for (j = 0; j < n_rregs; j++) {
   1089          if (rreg_state[j].disp != Bound)
   1090             continue;
   1091          UInt vregno = hregNumber(rreg_state[j].vreg);
   1092          vassert(IS_VALID_VREGNO(vregno));
   1093          if (vreg_lrs[vregno].dead_before <= ii) {
   1094             rreg_state[j].disp = Free;
   1095             rreg_state[j].eq_spill_slot = False;
   1096             m = hregNumber(rreg_state[j].vreg);
   1097             vassert(IS_VALID_VREGNO(m));
   1098             vreg_state[m] = INVALID_RREG_NO;
   1099             if (DEBUG_REGALLOC) {
   1100                vex_printf("free up ");
   1101                (*ppReg)(rreg_state[j].rreg);
   1102                vex_printf("\n");
   1103             }
   1104          }
   1105       }
   1106 
   1107       /* ------ Pre-instruction actions for fixed rreg uses ------ */
   1108 
   1109       /* Now we have to deal with rregs which are about to be made
   1110          live by this instruction -- in other words, are entering into
   1111          one of their live ranges.  If any such rreg holds a vreg, we
   1112          will have to free up the rreg.  The simplest solution which
   1113          is correct is to spill the rreg.
   1114 
   1115          Note we could do better:
   1116          * Could move it into some other free rreg, if one is available
   1117 
   1118          Do this efficiently, by incrementally stepping along an array
   1119          of rreg HLRs that are known to be sorted by start point
   1120          (their .live_after field).
   1121       */
   1122       while (True) {
   1123          vassert(rreg_lrs_la_next >= 0);
   1124          vassert(rreg_lrs_la_next <= rreg_lrs_used);
   1125          if (rreg_lrs_la_next == rreg_lrs_used)
   1126             break; /* no more real reg live ranges to consider */
   1127          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
   1128             break; /* next live range does not yet start */
   1129          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
   1130          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
   1131             Find the associated rreg_state entry. */
   1132          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
   1133             Real register live ranges are guaranteed to be well-formed
   1134             in that they start with a write to the register -- Stage 2
   1135             rejects any code not satisfying this.  So the correct
   1136             question to ask is whether
   1137             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
   1138             whether the reg becomes live after this insn -- rather
   1139             than before it. */
   1140 #        if DEBUG_REGALLOC
   1141          vex_printf("need to free up rreg: ");
   1142          (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
   1143          vex_printf("\n\n");
   1144 #        endif
   1145          for (k = 0; k < n_rregs; k++)
   1146             if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[rreg_lrs_la_next].rreg))
   1147                break;
   1148          /* If this fails, we don't have an entry for this rreg.
   1149             Which we should. */
   1150          vassert(IS_VALID_RREGNO(k));
   1151          m = hregNumber(rreg_state[k].vreg);
   1152          if (rreg_state[k].disp == Bound) {
   1153             /* Yes, there is an associated vreg.  Spill it if it's
   1154                still live. */
   1155             vassert(IS_VALID_VREGNO(m));
   1156             vreg_state[m] = INVALID_RREG_NO;
   1157             if (vreg_lrs[m].dead_before > ii) {
   1158                vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1159                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
   1160                   HInstr* spill1 = NULL;
   1161                   HInstr* spill2 = NULL;
   1162                   (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
   1163                                vreg_lrs[m].spill_offset, mode64 );
   1164                   vassert(spill1 || spill2); /* can't both be NULL */
   1165                   if (spill1)
   1166                      EMIT_INSTR(spill1);
   1167                   if (spill2)
   1168                      EMIT_INSTR(spill2);
   1169                }
   1170                rreg_state[k].eq_spill_slot = True;
   1171             }
   1172          }
   1173          rreg_state[k].disp = Unavail;
   1174          rreg_state[k].vreg = INVALID_HREG;
   1175          rreg_state[k].eq_spill_slot = False;
   1176 
   1177          /* check for further rregs entering HLRs at this point */
   1178          rreg_lrs_la_next++;
   1179       }
   1180 
   1181 
   1182 #     if DEBUG_REGALLOC
   1183       vex_printf("After pre-insn actions for fixed regs:\n");
   1184       PRINT_STATE;
   1185       vex_printf("\n");
   1186 #     endif
   1187 
   1188 
   1189       /* ------ Deal with the current instruction. ------ */
   1190 
   1191       /* Finally we can begin the processing of this instruction
   1192          itself.  The aim is to free up enough rregs for this insn.
   1193          This may generate spill stores since we may have to evict
   1194          some vregs currently in rregs.  Also generates spill loads.
   1195          We also build up the final vreg->rreg mapping to be applied
   1196          to the insn. */
   1197 
   1198       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
   1199 
   1200       initHRegRemap(&remap);
   1201 
   1202       /* ------------ BEGIN directReload optimisation ----------- */
   1203 
   1204       /* If the instruction reads exactly one vreg which is currently
   1205          in a spill slot, and this is last use of that vreg, see if we
   1206          can convert the instruction into one reads directly from the
   1207          spill slot.  This is clearly only possible for x86 and amd64
   1208          targets, since ppc and arm load-store architectures.  If
   1209          successful, replace instrs_in->arr[ii] with this new
   1210          instruction, and recompute its reg usage, so that the change
   1211          is invisible to the standard-case handling that follows. */
   1212 
   1213       if (directReload && reg_usage.n_used <= 2) {
   1214          Bool  debug_direct_reload = True && False;
   1215          HReg  cand     = INVALID_HREG;
   1216          Bool  nreads   = 0;
   1217          Short spilloff = 0;
   1218 
   1219          for (j = 0; j < reg_usage.n_used; j++) {
   1220 
   1221             vreg = reg_usage.hreg[j];
   1222 
   1223             if (!hregIsVirtual(vreg))
   1224                continue;
   1225 
   1226             if (reg_usage.mode[j] == HRmRead) {
   1227                nreads++;
   1228                m = hregNumber(vreg);
   1229                vassert(IS_VALID_VREGNO(m));
   1230                k = vreg_state[m];
   1231                if (!IS_VALID_RREGNO(k)) {
   1232                   /* ok, it is spilled.  Now, is this its last use? */
   1233                   vassert(vreg_lrs[m].dead_before >= ii+1);
   1234                   if (vreg_lrs[m].dead_before == ii+1
   1235                       && hregIsInvalid(cand)) {
   1236                      spilloff = vreg_lrs[m].spill_offset;
   1237                      cand = vreg;
   1238                   }
   1239                }
   1240             }
   1241          }
   1242 
   1243          if (nreads == 1 && ! hregIsInvalid(cand)) {
   1244             HInstr* reloaded;
   1245             if (reg_usage.n_used == 2)
   1246                vassert(! sameHReg(reg_usage.hreg[0], reg_usage.hreg[1]));
   1247 
   1248             reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
   1249             if (debug_direct_reload && !reloaded) {
   1250                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
   1251                ppInstr(instrs_in->arr[ii], mode64);
   1252             }
   1253             if (reloaded) {
   1254                /* Update info about the insn, so it looks as if it had
   1255                   been in this form all along. */
   1256                instrs_in->arr[ii] = reloaded;
   1257                (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
   1258                if (debug_direct_reload && !reloaded) {
   1259                   vex_printf("  -->  ");
   1260                   ppInstr(reloaded, mode64);
   1261                }
   1262             }
   1263 
   1264             if (debug_direct_reload && !reloaded)
   1265                vex_printf("\n");
   1266          }
   1267 
   1268       }
   1269 
   1270       /* ------------ END directReload optimisation ------------ */
   1271 
   1272       /* for each reg mentioned in the insn ... */
   1273       for (j = 0; j < reg_usage.n_used; j++) {
   1274 
   1275          vreg = reg_usage.hreg[j];
   1276 
   1277          /* only interested in virtual registers right now. */
   1278          if (!hregIsVirtual(vreg))
   1279             continue;
   1280 
   1281 #        if 0
   1282          vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
   1283 #        endif
   1284 
   1285          /* Now we're trying to find a rreg for "vreg".  First of all,
   1286             if it already has an rreg assigned, we don't need to do
   1287             anything more.  Search the current state to find out. */
   1288          m = hregNumber(vreg);
   1289          vassert(IS_VALID_VREGNO(m));
   1290          k = vreg_state[m];
   1291          if (IS_VALID_RREGNO(k)) {
   1292             vassert(rreg_state[k].disp == Bound);
   1293             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
   1294             /* If this rreg is written or modified, mark it as different
   1295                from any spill slot value. */
   1296             if (reg_usage.mode[j] != HRmRead)
   1297                rreg_state[k].eq_spill_slot = False;
   1298             continue;
   1299          } else {
   1300             vassert(k == INVALID_RREG_NO);
   1301          }
   1302 
   1303          /* No luck.  The next thing to do is see if there is a
   1304             currently free rreg available, of the correct class.  If
   1305             so, bag it.  NOTE, we could improve this by selecting an
   1306             rreg for which the next live-range event is as far ahead
   1307             as possible. */
   1308          k_suboptimal = -1;
   1309          for (k = 0; k < n_rregs; k++) {
   1310             if (rreg_state[k].disp != Free
   1311                 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
   1312                continue;
   1313             if (rreg_state[k].has_hlrs) {
   1314                /* Well, at least we can use k_suboptimal if we really
   1315                   have to.  Keep on looking for a better candidate. */
   1316                k_suboptimal = k;
   1317             } else {
   1318                /* Found a preferable reg.  Use it. */
   1319                k_suboptimal = -1;
   1320                break;
   1321             }
   1322          }
   1323          if (k_suboptimal >= 0)
   1324             k = k_suboptimal;
   1325 
   1326          if (k < n_rregs) {
   1327             rreg_state[k].disp = Bound;
   1328             rreg_state[k].vreg = vreg;
   1329             m = hregNumber(vreg);
   1330             vassert(IS_VALID_VREGNO(m));
   1331             vreg_state[m] = toShort(k);
   1332             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
   1333             /* Generate a reload if needed.  This only creates needed
   1334                reloads because the live range builder for vregs will
   1335                guarantee that the first event for a vreg is a write.
   1336                Hence, if this reference is not a write, it cannot be
   1337                the first reference for this vreg, and so a reload is
   1338                indeed needed. */
   1339             if (reg_usage.mode[j] != HRmWrite) {
   1340                vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1341                HInstr* reload1 = NULL;
   1342                HInstr* reload2 = NULL;
   1343                (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
   1344                              vreg_lrs[m].spill_offset, mode64 );
   1345                vassert(reload1 || reload2); /* can't both be NULL */
   1346                if (reload1)
   1347                   EMIT_INSTR(reload1);
   1348                if (reload2)
   1349                   EMIT_INSTR(reload2);
   1350                /* This rreg is read or modified by the instruction.
   1351                   If it's merely read we can claim it now equals the
   1352                   spill slot, but not so if it is modified. */
   1353                if (reg_usage.mode[j] == HRmRead) {
   1354                   rreg_state[k].eq_spill_slot = True;
   1355                } else {
   1356                   vassert(reg_usage.mode[j] == HRmModify);
   1357                   rreg_state[k].eq_spill_slot = False;
   1358                }
   1359             } else {
   1360                rreg_state[k].eq_spill_slot = False;
   1361             }
   1362 
   1363             continue;
   1364          }
   1365 
   1366          /* Well, now we have no option but to spill a vreg.  It's
   1367             important to make a good choice of vreg to spill, and of
   1368             course we need to be careful not to spill a vreg which is
   1369             needed by this insn. */
   1370 
   1371          /* First, mark in the rreg_state, those rregs which are not spill
   1372             candidates, due to holding a vreg mentioned by this
   1373             instruction.  Or being of the wrong class. */
   1374          for (k = 0; k < n_rregs; k++) {
   1375             rreg_state[k].is_spill_cand = False;
   1376             if (rreg_state[k].disp != Bound)
   1377                continue;
   1378             if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
   1379                continue;
   1380             rreg_state[k].is_spill_cand = True;
   1381             for (m = 0; m < reg_usage.n_used; m++) {
   1382                if (sameHReg(rreg_state[k].vreg, reg_usage.hreg[m])) {
   1383                   rreg_state[k].is_spill_cand = False;
   1384                   break;
   1385                }
   1386             }
   1387          }
   1388 
   1389          /* We can choose to spill any rreg satisfying
   1390             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
   1391             the next use of its associated vreg is as far ahead as
   1392             possible, in the hope that this will minimise the number
   1393             of consequent reloads required. */
   1394          spillee
   1395             = findMostDistantlyMentionedVReg (
   1396                  getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
   1397 
   1398          if (spillee == -1) {
   1399             /* Hmmmmm.  There don't appear to be any spill candidates.
   1400                We're hosed. */
   1401             vex_printf("reg_alloc: can't find a register in class: ");
   1402             ppHRegClass(hregClass(vreg));
   1403             vex_printf("\n");
   1404             vpanic("reg_alloc: can't create a free register.");
   1405          }
   1406 
   1407          /* Right.  So we're going to spill rreg_state[spillee]. */
   1408          vassert(IS_VALID_RREGNO(spillee));
   1409          vassert(rreg_state[spillee].disp == Bound);
   1410          /* check it's the right class */
   1411          vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
   1412          /* check we're not ejecting the vreg for which we are trying
   1413             to free up a register. */
   1414          vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
   1415 
   1416          m = hregNumber(rreg_state[spillee].vreg);
   1417          vassert(IS_VALID_VREGNO(m));
   1418 
   1419          /* So here's the spill store.  Assert that we're spilling a
   1420             live vreg. */
   1421          vassert(vreg_lrs[m].dead_before > ii);
   1422          vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1423          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
   1424             HInstr* spill1 = NULL;
   1425             HInstr* spill2 = NULL;
   1426             (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
   1427                          vreg_lrs[m].spill_offset, mode64 );
   1428             vassert(spill1 || spill2); /* can't both be NULL */
   1429             if (spill1)
   1430                EMIT_INSTR(spill1);
   1431             if (spill2)
   1432                EMIT_INSTR(spill2);
   1433          }
   1434 
   1435          /* Update the rreg_state to reflect the new assignment for this
   1436             rreg. */
   1437          rreg_state[spillee].vreg = vreg;
   1438          vreg_state[m] = INVALID_RREG_NO;
   1439 
   1440          rreg_state[spillee].eq_spill_slot = False; /* be safe */
   1441 
   1442          m = hregNumber(vreg);
   1443          vassert(IS_VALID_VREGNO(m));
   1444          vreg_state[m] = toShort(spillee);
   1445 
   1446          /* Now, if this vreg is being read or modified (as opposed to
   1447             written), we have to generate a reload for it. */
   1448          if (reg_usage.mode[j] != HRmWrite) {
   1449             vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1450             HInstr* reload1 = NULL;
   1451             HInstr* reload2 = NULL;
   1452             (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
   1453                           vreg_lrs[m].spill_offset, mode64 );
   1454             vassert(reload1 || reload2); /* can't both be NULL */
   1455             if (reload1)
   1456                EMIT_INSTR(reload1);
   1457             if (reload2)
   1458                EMIT_INSTR(reload2);
   1459             /* This rreg is read or modified by the instruction.
   1460                If it's merely read we can claim it now equals the
   1461                spill slot, but not so if it is modified. */
   1462             if (reg_usage.mode[j] == HRmRead) {
   1463                rreg_state[spillee].eq_spill_slot = True;
   1464             } else {
   1465                vassert(reg_usage.mode[j] == HRmModify);
   1466                rreg_state[spillee].eq_spill_slot = False;
   1467             }
   1468          }
   1469 
   1470          /* So after much twisting and turning, we have vreg mapped to
   1471             rreg_state[spillee].rreg.  Note that in the map. */
   1472          addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
   1473 
   1474       } /* iterate over registers in this instruction. */
   1475 
   1476       /* We've finished clowning around with registers in this instruction.
   1477          Three results:
   1478          - the running rreg_state[] has been updated
   1479          - a suitable vreg->rreg mapping for this instruction has been
   1480            constructed
   1481          - spill and reload instructions may have been emitted.
   1482 
   1483         The final step is to apply the mapping to the instruction,
   1484         and emit that.
   1485       */
   1486 
   1487       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
   1488       (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
   1489       EMIT_INSTR( instrs_in->arr[ii] );
   1490 
   1491 #     if DEBUG_REGALLOC
   1492       vex_printf("After dealing with current insn:\n");
   1493       PRINT_STATE;
   1494       vex_printf("\n");
   1495 #     endif
   1496 
   1497       /* ------ Post-instruction actions for fixed rreg uses ------ */
   1498 
   1499       /* Now we need to check for rregs exiting fixed live ranges
   1500          after this instruction, and if so mark them as free. */
   1501       while (True) {
   1502          vassert(rreg_lrs_db_next >= 0);
   1503          vassert(rreg_lrs_db_next <= rreg_lrs_used);
   1504          if (rreg_lrs_db_next == rreg_lrs_used)
   1505             break; /* no more real reg live ranges to consider */
   1506          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
   1507             break; /* next live range does not yet start */
   1508          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
   1509          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
   1510             range.  Mark it as such in the main rreg_state array. */
   1511          for (k = 0; k < n_rregs; k++)
   1512            if (sameHReg(rreg_state[k].rreg, rreg_lrs_db[rreg_lrs_db_next].rreg))
   1513                break;
   1514          /* If this vassertion fails, we don't have an entry for
   1515             this rreg.  Which we should. */
   1516          vassert(k < n_rregs);
   1517          vassert(rreg_state[k].disp == Unavail);
   1518          rreg_state[k].disp = Free;
   1519          rreg_state[k].vreg = INVALID_HREG;
   1520          rreg_state[k].eq_spill_slot = False;
   1521 
   1522          /* check for further rregs leaving HLRs at this point */
   1523          rreg_lrs_db_next++;
   1524       }
   1525 
   1526 #     if DEBUG_REGALLOC
   1527       vex_printf("After post-insn actions for fixed regs:\n");
   1528       PRINT_STATE;
   1529       vex_printf("\n");
   1530 #     endif
   1531 
   1532    } /* iterate over insns */
   1533 
   1534    /* ------ END: Process each insn in turn. ------ */
   1535 
   1536    /* free(rreg_state); */
   1537    /* free(rreg_lrs); */
   1538    /* if (vreg_lrs) free(vreg_lrs); */
   1539 
   1540    /* Paranoia */
   1541    for (j = 0; j < n_rregs; j++)
   1542       vassert(sameHReg(rreg_state[j].rreg, available_real_regs[j]));
   1543 
   1544    vassert(rreg_lrs_la_next == rreg_lrs_used);
   1545    vassert(rreg_lrs_db_next == rreg_lrs_used);
   1546 
   1547    return instrs_out;
   1548 
   1549 #  undef INVALID_INSTRNO
   1550 #  undef EMIT_INSTR
   1551 #  undef PRINT_STATE
   1552 }
   1553 
   1554 
   1555 
   1556 /*---------------------------------------------------------------*/
   1557 /*---                                       host_reg_alloc2.c ---*/
   1558 /*---------------------------------------------------------------*/
   1559