Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_reg_alloc2.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex.h"
     38 
     39 #include "main_util.h"
     40 #include "host_generic_regs.h"
     41 
     42 /* Set to 1 for lots of debugging output. */
     43 #define DEBUG_REGALLOC 0
     44 
     45 
     46 /* TODO 27 Oct 04:
     47 
     48    Better consistency checking from what isMove tells us.
     49 
     50    We can possibly do V-V coalescing even when the src is spilled,
     51    providing we can arrange for the dst to have the same spill slot.
     52 
     53    Note that state[].hreg is the same as the available real regs.
     54 
     55    Generally rationalise data structures.  */
     56 
     57 
     58 /* Records information on virtual register live ranges.  Computed once
     59    and remains unchanged after that. */
     60 typedef
     61    struct {
     62       /* Becomes live for the first time after this insn ... */
     63       Short live_after;
     64       /* Becomes dead for the last time before this insn ... */
     65       Short dead_before;
     66       /* The "home" spill slot, if needed.  Never changes. */
     67       Short spill_offset;
     68       Short spill_size;
     69       /* What kind of register this is. */
     70       HRegClass reg_class;
     71    }
     72    VRegLR;
     73 
     74 
     75 /* Records information on real-register live ranges.  Computed once
     76    and remains unchanged after that. */
     77 typedef
     78    struct {
     79       HReg rreg;
     80       /* Becomes live after this insn ... */
     81       Short live_after;
     82       /* Becomes dead before this insn ... */
     83       Short dead_before;
     84    }
     85    RRegLR;
     86 
     87 
     88 /* An array of the following structs (rreg_state) comprises the
     89    running state of the allocator.  It indicates what the current
     90    disposition of each allocatable real register is.  The array gets
     91    updated as the allocator processes instructions.  The identity of
     92    the register is not recorded here, because the index of this
     93    structure in doRegisterAllocation()'s |rreg_state| is the index
     94    number of the register, and the register itself can be extracted
     95    from the RRegUniverse supplied to doRegisterAllocation(). */
     96 typedef
     97    struct {
     98       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
     99       /* Is this involved in any HLRs?  (only an optimisation hint) */
    100       Bool has_hlrs;
    101       /* ------ FIELDS WHICH DO CHANGE ------ */
    102       /* 6 May 07: rearranged fields below so the whole struct fits
    103          into 16 bytes on both x86 and amd64. */
    104       /* Used when .disp == Bound and we are looking for vregs to
    105          spill. */
    106       Bool is_spill_cand;
    107       /* Optimisation: used when .disp == Bound.  Indicates when the
    108          rreg has the same value as the spill slot for the associated
    109          vreg.  Is safely left at False, and becomes True after a
    110          spill store or reload for this rreg. */
    111       Bool eq_spill_slot;
    112       /* What's it's current disposition? */
    113       enum { Free,     /* available for use */
    114              Unavail,  /* in a real-reg live range */
    115              Bound     /* in use (holding value of some vreg) */
    116            }
    117            disp;
    118       /* If .disp == Bound, what vreg is it bound to? */
    119       HReg vreg;
    120    }
    121    RRegState;
    122 
    123 
    124 /* The allocator also maintains a redundant array of indexes
    125    (vreg_state) from vreg numbers back to entries in rreg_state.  It
    126    is redundant because iff vreg_state[i] == j then
    127    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
    128    point at each other.  The purpose of this is to speed up activities
    129    which involve looking for a particular vreg: there is no need to
    130    scan the rreg_state looking for it, just index directly into
    131    vreg_state.  The FAQ "does this vreg already have an associated
    132    rreg" is the main beneficiary.
    133 
    134    To indicate, in vreg_state[i], that a given vreg is not currently
    135    associated with any rreg, that entry can be set to INVALID_RREG_NO.
    136 
    137    Because the vreg_state entries are signed Shorts, the max number
    138    of vregs that can be handed by regalloc is 32767.
    139 */
    140 
    141 #define INVALID_RREG_NO ((Short)(-1))
    142 
    143 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
    144 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
    145 
    146 
    147 /* Search forward from some given point in the incoming instruction
    148    sequence.  Point is to select a virtual register to spill, by
    149    finding the vreg which is mentioned as far ahead as possible, in
    150    the hope that this will minimise the number of consequent reloads.
    151 
    152    Only do the search for vregs which are Bound in the running state,
    153    and for which the .is_spill_cand field is set.  This allows the
    154    caller to arbitrarily restrict the set of spill candidates to be
    155    considered.
    156 
    157    To do this we don't actually need to see the incoming instruction
    158    stream.  Rather, what we need us the HRegUsage records for the
    159    incoming instruction stream.  Hence that is passed in.
    160 
    161    Returns an index into the state array indicating the (v,r) pair to
    162    spill, or -1 if none was found.  */
    163 static
    164 Int findMostDistantlyMentionedVReg (
    165    HRegUsage*   reg_usages_in,
    166    Int          search_from_instr,
    167    Int          num_instrs,
    168    RRegState*   state,
    169    Int          n_state
    170 )
    171 {
    172    Int k, m;
    173    Int furthest_k = -1;
    174    Int furthest   = -1;
    175    vassert(search_from_instr >= 0);
    176    for (k = 0; k < n_state; k++) {
    177       if (!state[k].is_spill_cand)
    178          continue;
    179       vassert(state[k].disp == Bound);
    180       for (m = search_from_instr; m < num_instrs; m++) {
    181          if (HRegUsage__contains(&reg_usages_in[m], state[k].vreg))
    182             break;
    183       }
    184       if (m > furthest) {
    185          furthest   = m;
    186          furthest_k = k;
    187       }
    188    }
    189    return furthest_k;
    190 }
    191 
    192 
    193 /* Check that this vreg has been assigned a sane spill offset. */
    194 inline
    195 static void sanity_check_spill_offset ( VRegLR* vreg )
    196 {
    197    switch (vreg->reg_class) {
    198       case HRcVec128: case HRcFlt64:
    199          vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
    200       default:
    201          vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
    202    }
    203 }
    204 
    205 
    206 /* Double the size of the real-reg live-range array, if needed. */
    207 __attribute__((noinline))
    208 static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
    209 {
    210    Int     k;
    211    RRegLR* arr2;
    212    if (0)
    213       vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
    214    vassert(used == *size);
    215    arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
    216    for (k = 0; k < *size; k++)
    217       arr2[k] = (*info)[k];
    218    *size *= 2;
    219    *info = arr2;
    220 }
    221 inline
    222 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
    223 {
    224    if (LIKELY(used < *size)) return;
    225    ensureRRLRspace_SLOW(info, size, used);
    226 }
    227 
    228 
    229 /* Sort an array of RRegLR entries by either the .live_after or
    230    .dead_before fields.  This is performance-critical. */
    231 static void sortRRLRarray ( RRegLR* arr,
    232                             Int size, Bool by_live_after )
    233 {
    234    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    235                        9841, 29524, 88573, 265720,
    236                        797161, 2391484 };
    237    Int    lo = 0;
    238    Int    hi = size-1;
    239    Int    i, j, h, bigN, hp;
    240    RRegLR v;
    241 
    242    vassert(size >= 0);
    243    if (size == 0)
    244       return;
    245 
    246    bigN = hi - lo + 1; if (bigN < 2) return;
    247    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
    248 
    249    if (by_live_after) {
    250 
    251       for ( ; hp >= 0; hp--) {
    252          h = incs[hp];
    253          for (i = lo + h; i <= hi; i++) {
    254             v = arr[i];
    255             j = i;
    256             while (arr[j-h].live_after > v.live_after) {
    257                arr[j] = arr[j-h];
    258                j = j - h;
    259                if (j <= (lo + h - 1)) break;
    260             }
    261             arr[j] = v;
    262          }
    263       }
    264 
    265    } else {
    266 
    267       for ( ; hp >= 0; hp--) {
    268          h = incs[hp];
    269          for (i = lo + h; i <= hi; i++) {
    270             v = arr[i];
    271             j = i;
    272             while (arr[j-h].dead_before > v.dead_before) {
    273                arr[j] = arr[j-h];
    274                j = j - h;
    275                if (j <= (lo + h - 1)) break;
    276             }
    277             arr[j] = v;
    278          }
    279       }
    280 
    281    }
    282 }
    283 
    284 
    285 /* Compute the index of the highest and lowest 1 in a ULong,
    286    respectively.  Results are undefined if the argument is zero.
    287    Don't pass it zero :) */
    288 static inline UInt ULong__maxIndex ( ULong w64 ) {
    289    return 63 - __builtin_clzll(w64);
    290 }
    291 
    292 static inline UInt ULong__minIndex ( ULong w64 ) {
    293    return __builtin_ctzll(w64);
    294 }
    295 
    296 
    297 /* Vectorised memset, copied from Valgrind's m_libcbase.c. */
    298 static void* local_memset ( void *destV, Int c, SizeT sz )
    299 {
    300 #  define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
    301 
    302    UInt   c4;
    303    UChar* d = destV;
    304    UChar  uc = c;
    305 
    306    while ((!IS_4_ALIGNED(d)) && sz >= 1) {
    307       d[0] = uc;
    308       d++;
    309       sz--;
    310    }
    311    if (sz == 0)
    312       return destV;
    313    c4 = uc;
    314    c4 |= (c4 << 8);
    315    c4 |= (c4 << 16);
    316    while (sz >= 16) {
    317       ((UInt*)d)[0] = c4;
    318       ((UInt*)d)[1] = c4;
    319       ((UInt*)d)[2] = c4;
    320       ((UInt*)d)[3] = c4;
    321       d += 16;
    322       sz -= 16;
    323    }
    324    while (sz >= 4) {
    325       ((UInt*)d)[0] = c4;
    326       d += 4;
    327       sz -= 4;
    328    }
    329    while (sz >= 1) {
    330       d[0] = c;
    331       d++;
    332       sz--;
    333    }
    334    return destV;
    335 
    336 #  undef IS_4_ALIGNED
    337 }
    338 
    339 
    340 /* A target-independent register allocator.  Requires various
    341    functions which it uses to deal abstractly with instructions and
    342    registers, since it cannot have any target-specific knowledge.
    343 
    344    Returns a new list of instructions, which, as a result of the
    345    behaviour of mapRegs, will be in-place modifications of the
    346    original instructions.
    347 
    348    Requires that the incoming code has been generated using
    349    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
    350    that range is a checked run-time error.
    351 
    352    Takes an expandable array of pointers to unallocated insns.
    353    Returns an expandable array of pointers to allocated insns.
    354 */
    355 HInstrArray* doRegisterAllocation (
    356 
    357    /* Incoming virtual-registerised code. */
    358    HInstrArray* instrs_in,
    359 
    360    /* The real-register universe to use.  This contains facts about
    361       real registers, one of which is the set of registers available
    362       for allocation. */
    363    const RRegUniverse* univ,
    364 
    365    /* Return True iff the given insn is a reg-reg move, in which
    366       case also return the src and dst regs. */
    367    Bool (*isMove) ( const HInstr*, HReg*, HReg* ),
    368 
    369    /* Get info about register usage in this insn. */
    370    void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ),
    371 
    372    /* Apply a reg-reg mapping to an insn. */
    373    void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
    374 
    375    /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
    376       real reg to a spill slot byte offset.  The two leading HInstr**
    377       args are out parameters, through which the generated insns are
    378       returned.  Also (optionally) a 'directReload' function, which
    379       attempts to replace a given instruction by one which reads
    380       directly from a specified spill slot.  May be NULL, in which
    381       case the optimisation is not attempted. */
    382    void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
    383    void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
    384    HInstr* (*directReload) ( HInstr*, HReg, Short ),
    385    Int     guest_sizeB,
    386 
    387    /* For debug printing only. */
    388    void (*ppInstr) ( const HInstr*, Bool ),
    389    void (*ppReg) ( HReg ),
    390 
    391    /* 32/64bit mode */
    392    Bool mode64
    393 )
    394 {
    395 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
    396 
    397    const Bool eq_spill_opt = True;
    398 
    399    /* Info on vregs and rregs.  Computed once and remains
    400       unchanged. */
    401    Int     n_vregs;
    402    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
    403 
    404    /* We keep two copies of the real-reg live range info, one sorted
    405       by .live_after and the other by .dead_before.  First the
    406       unsorted info is created in the _la variant is copied into the
    407       _db variant.  Once that's done both of them are sorted.
    408       We also need two integer cursors which record the next
    409       location in the two arrays to consider. */
    410    RRegLR* rreg_lrs_la;
    411    RRegLR* rreg_lrs_db;
    412    Int     rreg_lrs_size;
    413    Int     rreg_lrs_used;
    414    Int     rreg_lrs_la_next;
    415    Int     rreg_lrs_db_next;
    416 
    417    /* Info on register usage in the incoming instruction array.
    418       Computed once and remains unchanged, more or less; updated
    419       sometimes by the direct-reload optimisation. */
    420    HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
    421 
    422    /* Used when constructing vreg_lrs (for allocating stack
    423       slots). */
    424    Short ss_busy_until_before[N_SPILL64S];
    425 
    426    /* Used when constructing rreg_lrs. */
    427    Int* rreg_live_after;
    428    Int* rreg_dead_before;
    429 
    430    /* Running state of the core allocation algorithm. */
    431    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
    432    Int        n_rregs;
    433 
    434    /* .. and the redundant backward map */
    435    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
    436       This inplies n_rregs must be <= 32768. */
    437    Short*     vreg_state;  /* [0 .. n_vregs-1] */
    438 
    439    /* The vreg -> rreg map constructed and then applied to each
    440       instr. */
    441    HRegRemap remap;
    442 
    443    /* The output array of instructions. */
    444    HInstrArray* instrs_out;
    445 
    446    /* Sanity checks are expensive.  They are only done periodically,
    447       not at each insn processed. */
    448    Bool do_sanity_check;
    449 
    450    vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
    451    vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
    452    vassert(0 == (N_SPILL64S % 2));
    453 
    454    /* The live range numbers are signed shorts, and so limiting the
    455       number of insns to 15000 comfortably guards against them
    456       overflowing 32k. */
    457    vassert(instrs_in->arr_used <= 15000);
    458 
    459 #  define INVALID_INSTRNO (-2)
    460 
    461 #  define EMIT_INSTR(_instr)                  \
    462       do {                                    \
    463         HInstr* _tmp = (_instr);              \
    464         if (DEBUG_REGALLOC) {                 \
    465            vex_printf("**  ");                \
    466            (*ppInstr)(_tmp, mode64);          \
    467            vex_printf("\n\n");                \
    468         }                                     \
    469         addHInstr ( instrs_out, _tmp );       \
    470       } while (0)
    471 
    472 #   define PRINT_STATE						   \
    473       do {							   \
    474          Int z, q;						   \
    475          for (z = 0; z < n_rregs; z++) {			   \
    476             vex_printf("  rreg_state[%2d] = ", z);		   \
    477             (*ppReg)(univ->regs[z]);	       			   \
    478             vex_printf("  \t");					   \
    479             switch (rreg_state[z].disp) {			   \
    480                case Free:    vex_printf("Free\n"); break;	   \
    481                case Unavail: vex_printf("Unavail\n"); break;	   \
    482                case Bound:   vex_printf("BoundTo "); 		   \
    483                              (*ppReg)(rreg_state[z].vreg);	   \
    484                              vex_printf("\n"); break;		   \
    485             }							   \
    486          }							   \
    487          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
    488          q = 0;                                                    \
    489          for (z = 0; z < n_vregs; z++) {                           \
    490             if (vreg_state[z] == INVALID_RREG_NO)                  \
    491                continue;                                           \
    492             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
    493             q++;                                                   \
    494             if (q > 0 && (q % 6) == 0)                             \
    495                vex_printf("\n    ");                               \
    496          }                                                         \
    497          vex_printf("\n");                                         \
    498       } while (0)
    499 
    500 
    501    /* --------- Stage 0: set up output array --------- */
    502    /* --------- and allocate/initialise running state. --------- */
    503 
    504    instrs_out = newHInstrArray();
    505 
    506    /* ... and initialise running state. */
    507    /* n_rregs is no more than a short name for n_available_real_regs. */
    508    n_rregs = univ->allocable;
    509    n_vregs = instrs_in->n_vregs;
    510 
    511    /* If this is not so, vreg_state entries will overflow. */
    512    vassert(n_vregs < 32767);
    513 
    514    /* If this is not so, the universe we have is nonsensical. */
    515    vassert(n_rregs > 0);
    516 
    517    rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
    518    vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
    519 
    520    for (Int j = 0; j < n_rregs; j++) {
    521       rreg_state[j].has_hlrs      = False;
    522       rreg_state[j].disp          = Free;
    523       rreg_state[j].vreg          = INVALID_HREG;
    524       rreg_state[j].is_spill_cand = False;
    525       rreg_state[j].eq_spill_slot = False;
    526    }
    527 
    528    for (Int j = 0; j < n_vregs; j++)
    529       vreg_state[j] = INVALID_RREG_NO;
    530 
    531 
    532    /* --------- Stage 1: compute vreg live ranges. --------- */
    533    /* --------- Stage 2: compute rreg live ranges. --------- */
    534 
    535    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    536 
    537    /* This is relatively simple, because (1) we only seek the complete
    538       end-to-end live range of each vreg, and are not interested in
    539       any holes in it, and (2) the vregs are conveniently numbered 0
    540       .. n_vregs-1, so we can just dump the results in a
    541       pre-allocated array. */
    542 
    543    vreg_lrs = NULL;
    544    if (n_vregs > 0)
    545       vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
    546 
    547    for (Int j = 0; j < n_vregs; j++) {
    548       vreg_lrs[j].live_after     = INVALID_INSTRNO;
    549       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
    550       vreg_lrs[j].spill_offset   = 0;
    551       vreg_lrs[j].spill_size     = 0;
    552       vreg_lrs[j].reg_class      = HRcINVALID;
    553    }
    554 
    555    /* An array to hold the reg-usage info for the incoming
    556       instructions. */
    557    reg_usage_arr
    558       = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1);
    559 
    560    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
    561 
    562    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    563 
    564    /* This is more complex than Stage 1, because we need to compute
    565       exactly all the live ranges of all the allocatable real regs,
    566       and we don't know in advance how many there will be. */
    567 
    568    rreg_lrs_used = 0;
    569    rreg_lrs_size = 4;
    570    rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
    571    rreg_lrs_db = NULL; /* we'll create this later */
    572 
    573    /* We'll need to track live range start/end points seperately for
    574       each rreg.  Sigh. */
    575    vassert(n_rregs > 0);
    576    rreg_live_after  = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
    577    rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
    578 
    579    for (Int j = 0; j < n_rregs; j++) {
    580       rreg_live_after[j] =
    581       rreg_dead_before[j] = INVALID_INSTRNO;
    582    }
    583 
    584    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
    585 
    586    /* ------ start of ITERATE OVER INSNS ------ */
    587 
    588    for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
    589 
    590       (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
    591 
    592       if (0) {
    593          vex_printf("\n%d  stage1: ", ii);
    594          (*ppInstr)(instrs_in->arr[ii], mode64);
    595          vex_printf("\n");
    596          ppHRegUsage(univ, &reg_usage_arr[ii]);
    597       }
    598 
    599       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
    600 
    601       /* for each virtual reg mentioned in the insn ... */
    602       for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
    603 
    604          HReg vreg = reg_usage_arr[ii].vRegs[j];
    605          vassert(hregIsVirtual(vreg));
    606 
    607          Int k = hregIndex(vreg);
    608          if (k < 0 || k >= n_vregs) {
    609             vex_printf("\n");
    610             (*ppInstr)(instrs_in->arr[ii], mode64);
    611             vex_printf("\n");
    612             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
    613             vpanic("doRegisterAllocation: out-of-range vreg");
    614          }
    615 
    616          /* Take the opportunity to note its regclass.  We'll need
    617             that when allocating spill slots. */
    618          if (vreg_lrs[k].reg_class == HRcINVALID) {
    619             /* First mention of this vreg. */
    620             vreg_lrs[k].reg_class = hregClass(vreg);
    621          } else {
    622             /* Seen it before, so check for consistency. */
    623             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
    624          }
    625 
    626          /* Now consider live ranges. */
    627          switch (reg_usage_arr[ii].vMode[j]) {
    628             case HRmRead:
    629                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    630                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    631                   vpanic("doRegisterAllocation: "
    632                          "first event for vreg is Read");
    633                }
    634                vreg_lrs[k].dead_before = toShort(ii + 1);
    635                break;
    636             case HRmWrite:
    637                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
    638                   vreg_lrs[k].live_after = toShort(ii);
    639                vreg_lrs[k].dead_before = toShort(ii + 1);
    640                break;
    641             case HRmModify:
    642                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
    643                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
    644                   vpanic("doRegisterAllocation: "
    645                          "first event for vreg is Modify");
    646                }
    647                vreg_lrs[k].dead_before = toShort(ii + 1);
    648                break;
    649             default:
    650                vpanic("doRegisterAllocation(1)");
    651          } /* switch */
    652 
    653       } /* iterate over virtual registers */
    654 
    655       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
    656 
    657       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
    658 
    659       /* If this doesn't hold, the following iteration over real registers
    660          will fail miserably. */
    661       vassert(N_RREGUNIVERSE_REGS == 64);
    662 
    663       const ULong rRead      = reg_usage_arr[ii].rRead;
    664       const ULong rWritten   = reg_usage_arr[ii].rWritten;
    665       const ULong rMentioned = rRead | rWritten;
    666 
    667       UInt rReg_minIndex;
    668       UInt rReg_maxIndex;
    669       if (rMentioned == 0) {
    670          /* There are no real register uses in this insn.  Set
    671             rReg_{min,max}Index so that the following loop doesn't iterate
    672             at all, so as to avoid wasting time. */
    673          rReg_minIndex = 1;
    674          rReg_maxIndex = 0;
    675       } else {
    676          rReg_minIndex = ULong__minIndex(rMentioned);
    677          rReg_maxIndex = ULong__maxIndex(rMentioned);
    678          /* Don't bother to look at registers which are not available
    679             to the allocator.  We asserted above that n_rregs > 0, so
    680             n_rregs-1 is safe. */
    681          if (rReg_maxIndex >= n_rregs)
    682             rReg_maxIndex = n_rregs-1;
    683       }
    684 
    685       /* for each allocator-available real reg mentioned in the insn ... */
    686       /* Note.  We are allocating only over the real regs available to
    687          the allocator.  Others, eg the stack or baseblock pointers,
    688          are unavailable to allocation and so we never visit them.
    689          Hence the iteration is cut off at n_rregs-1, since n_rregs ==
    690          univ->allocable. */
    691       for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
    692 
    693          const ULong jMask = 1ULL << j;
    694          if (LIKELY((rMentioned & jMask) == 0))
    695             continue;
    696 
    697          const Bool isR = (rRead    & jMask) != 0;
    698          const Bool isW = (rWritten & jMask) != 0;
    699 
    700          /* Dummy initialisations of flush_la and flush_db to avoid
    701             possible bogus uninit-var warnings from gcc. */
    702          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
    703          Bool flush = False;
    704 
    705          if (isW && !isR) {
    706             flush_la = rreg_live_after[j];
    707             flush_db = rreg_dead_before[j];
    708             if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
    709                flush = True;
    710             rreg_live_after[j]  = ii;
    711             rreg_dead_before[j] = ii+1;
    712          } else if (!isW && isR) {
    713             if (rreg_live_after[j] == INVALID_INSTRNO) {
    714                vex_printf("\nOFFENDING RREG = ");
    715                (*ppReg)(univ->regs[j]);
    716                vex_printf("\n");
    717                vex_printf("\nOFFENDING instr = ");
    718                (*ppInstr)(instrs_in->arr[ii], mode64);
    719                vex_printf("\n");
    720                vpanic("doRegisterAllocation: "
    721                       "first event for rreg is Read");
    722             }
    723             rreg_dead_before[j] = ii+1;
    724          } else {
    725             vassert(isR && isW);
    726             if (rreg_live_after[j] == INVALID_INSTRNO) {
    727                vex_printf("\nOFFENDING RREG = ");
    728                (*ppReg)(univ->regs[j]);
    729                vex_printf("\n");
    730                vex_printf("\nOFFENDING instr = ");
    731                (*ppInstr)(instrs_in->arr[ii], mode64);
    732                vex_printf("\n");
    733                vpanic("doRegisterAllocation: "
    734                       "first event for rreg is Modify");
    735             }
    736             rreg_dead_before[j] = ii+1;
    737          }
    738 
    739          if (flush) {
    740             vassert(flush_la != INVALID_INSTRNO);
    741             vassert(flush_db != INVALID_INSTRNO);
    742             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    743             if (0)
    744                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
    745             rreg_lrs_la[rreg_lrs_used].rreg        = univ->regs[j];
    746             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
    747             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
    748             rreg_lrs_used++;
    749          }
    750 
    751       } /* iterate over rregs in the instr */
    752 
    753       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
    754 
    755    } /* iterate over insns */
    756 
    757    /* ------ end of ITERATE OVER INSNS ------ */
    758 
    759    /* ------ start of FINALISE RREG LIVE RANGES ------ */
    760 
    761    /* Now finish up any live ranges left over. */
    762    for (Int j = 0; j < n_rregs; j++) {
    763 
    764       if (0) {
    765          vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
    766                                                 rreg_dead_before[j]);
    767       }
    768       vassert( (rreg_live_after[j] == INVALID_INSTRNO
    769                 && rreg_dead_before[j] == INVALID_INSTRNO)
    770               ||
    771                (rreg_live_after[j] != INVALID_INSTRNO
    772                 && rreg_dead_before[j] != INVALID_INSTRNO)
    773             );
    774 
    775       if (rreg_live_after[j] == INVALID_INSTRNO)
    776          continue;
    777 
    778       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
    779       if (0)
    780          vex_printf("FLUSH 2 (%d,%d)\n",
    781                     rreg_live_after[j], rreg_dead_before[j]);
    782       rreg_lrs_la[rreg_lrs_used].rreg        = univ->regs[j];
    783       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
    784       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
    785       rreg_lrs_used++;
    786    }
    787 
    788    /* Compute summary hints for choosing real regs.  If a real reg is
    789       involved in a hard live range, record that fact in the fixed
    790       part of the running rreg_state.  Later, when offered a choice between
    791       rregs, it's better to choose one which is not marked as having
    792       any HLRs, since ones with HLRs may need to be spilled around
    793       their HLRs.  Correctness of final assignment is unaffected by
    794       this mechanism -- it is only an optimisation. */
    795 
    796    for (Int j = 0; j < rreg_lrs_used; j++) {
    797       HReg rreg = rreg_lrs_la[j].rreg;
    798       vassert(!hregIsVirtual(rreg));
    799       /* rreg is involved in a HLR.  Record this info in the array, if
    800          there is space. */
    801       UInt ix = hregIndex(rreg);
    802       vassert(ix < n_rregs);
    803       rreg_state[ix].has_hlrs = True;
    804    }
    805    if (0) {
    806       for (Int j = 0; j < n_rregs; j++) {
    807          if (!rreg_state[j].has_hlrs)
    808             continue;
    809          ppReg(univ->regs[j]);
    810          vex_printf(" hinted\n");
    811       }
    812    }
    813 
    814    /* Finally, copy the _la variant into the _db variant and
    815       sort both by their respective fields. */
    816    rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
    817    for (Int j = 0; j < rreg_lrs_used; j++)
    818       rreg_lrs_db[j] = rreg_lrs_la[j];
    819 
    820    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
    821    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
    822 
    823    /* And set up the cursors. */
    824    rreg_lrs_la_next = 0;
    825    rreg_lrs_db_next = 0;
    826 
    827    for (Int j = 1; j < rreg_lrs_used; j++) {
    828       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
    829       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
    830    }
    831 
    832    /* ------ end of FINALISE RREG LIVE RANGES ------ */
    833 
    834    if (DEBUG_REGALLOC) {
    835       for (Int j = 0; j < n_vregs; j++) {
    836          vex_printf("vreg %d:  la = %d,  db = %d\n",
    837                     j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
    838       }
    839    }
    840 
    841    if (DEBUG_REGALLOC) {
    842       vex_printf("RRegLRs by LA:\n");
    843       for (Int j = 0; j < rreg_lrs_used; j++) {
    844          vex_printf("  ");
    845          (*ppReg)(rreg_lrs_la[j].rreg);
    846          vex_printf("      la = %d,  db = %d\n",
    847                     rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
    848       }
    849       vex_printf("RRegLRs by DB:\n");
    850       for (Int j = 0; j < rreg_lrs_used; j++) {
    851          vex_printf("  ");
    852          (*ppReg)(rreg_lrs_db[j].rreg);
    853          vex_printf("      la = %d,  db = %d\n",
    854                     rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
    855       }
    856    }
    857 
    858    /* --------- Stage 3: allocate spill slots. --------- */
    859 
    860    /* Each spill slot is 8 bytes long.  For vregs which take more than
    861       64 bits to spill (classes Flt64 and Vec128), we have to allocate
    862       two consecutive spill slots.  For 256 bit registers (class
    863       Vec256), we have to allocate four consecutive spill slots.
    864 
    865       For Vec128-class on PowerPC, the spill slot's actual address
    866       must be 16-byte aligned.  Since the spill slot's address is
    867       computed as an offset from the guest state pointer, and since
    868       the user of the generated code must set that pointer to a
    869       32-aligned value, we have the residual obligation here of
    870       choosing a 16-aligned spill slot offset for Vec128-class values.
    871       Since each spill slot is 8 bytes long, that means for
    872       Vec128-class values we must allocated a spill slot number which
    873       is zero mod 2.
    874 
    875       Similarly, for Vec256 class on amd64, find a spill slot number
    876       which is zero mod 4.  This guarantees it will be 32 byte
    877       aligned, which isn't actually necessary on amd64 (we use movUpd
    878       etc to spill), but seems like good practice.
    879 
    880       Do a rank-based allocation of vregs to spill slot numbers.  We
    881       put as few values as possible in spill slots, but nevertheless
    882       need to have a spill slot available for all vregs, just in case.
    883    */
    884    /* Int max_ss_no = -1; */
    885 
    886    local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before));
    887 
    888    for (Int j = 0; j < n_vregs; j++) {
    889 
    890       /* True iff this vreg is unused.  In which case we also expect
    891          that the reg_class field for it has not been set.  */
    892       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
    893          vassert(vreg_lrs[j].reg_class == HRcINVALID);
    894          continue;
    895       }
    896 
    897       /* The spill slots are 64 bits in size.  As per the comment on
    898          definition of HRegClass in host_generic_regs.h, that means,
    899          to spill a vreg of class Flt64 or Vec128, we'll need to find
    900          two adjacent spill slots to use.  For Vec256, we'll need to
    901          find four adjacent slots to use.  Note, this logic needs to
    902          kept in sync with the size info on the definition of
    903          HRegClass. */
    904       Int ss_no = -1;
    905       switch (vreg_lrs[j].reg_class) {
    906 
    907          case HRcVec128: case HRcFlt64:
    908             /* Find two adjacent free slots in which between them
    909                provide up to 128 bits in which to spill the vreg.
    910                Since we are trying to find an even:odd pair, move
    911                along in steps of 2 (slots). */
    912             for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
    913                if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
    914                    && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
    915                   break;
    916             if (ss_no >= N_SPILL64S-1) {
    917                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    918                       "Increase and recompile.");
    919             }
    920             ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
    921             ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
    922             break;
    923 
    924          default:
    925             /* The ordinary case -- just find a single spill slot. */
    926             /* Find the lowest-numbered spill slot which is available
    927                at the start point of this interval, and assign the
    928                interval to it. */
    929             for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
    930                if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
    931                   break;
    932             if (ss_no == N_SPILL64S) {
    933                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
    934                       "Increase and recompile.");
    935             }
    936             ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
    937             break;
    938 
    939       } /* switch (vreg_lrs[j].reg_class) */
    940 
    941       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
    942          layout: the guest state, then two equal sized areas following
    943          it for two sets of shadow state, and then the spill area. */
    944       vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8);
    945 
    946       /* Independent check that we've made a sane choice of slot */
    947       sanity_check_spill_offset( &vreg_lrs[j] );
    948       /* if (j > max_ss_no) */
    949       /*    max_ss_no = j; */
    950    }
    951 
    952    if (0) {
    953       vex_printf("\n\n");
    954       for (Int j = 0; j < n_vregs; j++)
    955          vex_printf("vreg %d    --> spill offset %d\n",
    956                     j, vreg_lrs[j].spill_offset);
    957    }
    958 
    959    /* --------- Stage 4: establish rreg preferences --------- */
    960 
    961    /* It may be advantageous to allocating certain vregs to specific
    962       rregs, as a way of avoiding reg-reg moves later.  Here we
    963       establish which, if any, rreg each vreg would prefer to be in.
    964       Note that this constrains the allocator -- ideally we end up
    965       with as few as possible vregs expressing a preference.
    966 
    967       This is an optimisation: if the .preferred_rreg field is never
    968       set to anything different from INVALID_HREG, the allocator still
    969       works. */
    970 
    971    /* 30 Dec 04: removed this mechanism as it does not seem to
    972       help. */
    973 
    974    /* --------- Stage 5: process instructions --------- */
    975 
    976    /* This is the main loop of the allocator.  First, we need to
    977       correctly set up our running state, which tracks the status of
    978       each real register. */
    979 
    980    /* ------ BEGIN: Process each insn in turn. ------ */
    981 
    982    for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
    983 
    984       if (DEBUG_REGALLOC) {
    985          vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
    986          vex_printf("---- ");
    987          (*ppInstr)(instrs_in->arr[ii], mode64);
    988          vex_printf("\n\nInitial state:\n");
    989          PRINT_STATE;
    990          vex_printf("\n");
    991       }
    992 
    993       /* ------------ Sanity checks ------------ */
    994 
    995       /* Sanity checks are expensive.  So they are done only once
    996          every 13 instructions, and just before the last
    997          instruction. */
    998       do_sanity_check
    999          = toBool(
   1000               False /* Set to True for sanity checking of all insns. */
   1001               || ii == instrs_in->arr_used-1
   1002               || (ii > 0 && (ii % 13) == 0)
   1003            );
   1004 
   1005       if (do_sanity_check) {
   1006 
   1007          /* Sanity check 1: all rregs with a hard live range crossing
   1008             this insn must be marked as unavailable in the running
   1009             state. */
   1010          for (Int j = 0; j < rreg_lrs_used; j++) {
   1011             if (rreg_lrs_la[j].live_after < ii
   1012                 && ii < rreg_lrs_la[j].dead_before) {
   1013                /* ii is the middle of a hard live range for some real
   1014                   reg.  Check it's marked as such in the running
   1015                   state. */
   1016                HReg reg = rreg_lrs_la[j].rreg;
   1017 
   1018                if (0) {
   1019                   vex_printf("considering la %d .. db %d   reg = ",
   1020                              rreg_lrs_la[j].live_after,
   1021                              rreg_lrs_la[j].dead_before);
   1022                   (*ppReg)(reg);
   1023                   vex_printf("\n");
   1024                }
   1025 
   1026                /* assert that this rreg is marked as unavailable */
   1027                vassert(!hregIsVirtual(reg));
   1028                vassert(rreg_state[hregIndex(reg)].disp == Unavail);
   1029             }
   1030          }
   1031 
   1032          /* Sanity check 2: conversely, all rregs marked as
   1033             unavailable in the running rreg_state must have a
   1034             corresponding hard live range entry in the rreg_lrs
   1035             array. */
   1036          for (Int j = 0; j < n_rregs; j++) {
   1037             vassert(rreg_state[j].disp == Bound
   1038                     || rreg_state[j].disp == Free
   1039                     || rreg_state[j].disp == Unavail);
   1040             if (rreg_state[j].disp != Unavail)
   1041                continue;
   1042             Int k;
   1043             for (k = 0; k < rreg_lrs_used; k++) {
   1044                HReg reg = rreg_lrs_la[k].rreg;
   1045                vassert(!hregIsVirtual(reg));
   1046                if (hregIndex(reg) == j
   1047                    && rreg_lrs_la[k].live_after < ii
   1048                    && ii < rreg_lrs_la[k].dead_before)
   1049                   break;
   1050             }
   1051             /* If this vassertion fails, we couldn't find a
   1052                corresponding HLR. */
   1053             vassert(k < rreg_lrs_used);
   1054          }
   1055 
   1056          /* Sanity check 3: all vreg-rreg bindings must bind registers
   1057             of the same class. */
   1058          for (Int j = 0; j < n_rregs; j++) {
   1059             if (rreg_state[j].disp != Bound) {
   1060                vassert(rreg_state[j].eq_spill_slot == False);
   1061                continue;
   1062             }
   1063             vassert(hregClass(univ->regs[j])
   1064                     == hregClass(rreg_state[j].vreg));
   1065             vassert( hregIsVirtual(rreg_state[j].vreg));
   1066          }
   1067 
   1068          /* Sanity check 4: the vreg_state and rreg_state
   1069             mutually-redundant mappings are consistent.  If
   1070             rreg_state[j].vreg points at some vreg_state entry then
   1071             that vreg_state entry should point back at
   1072             rreg_state[j]. */
   1073          for (Int j = 0; j < n_rregs; j++) {
   1074             if (rreg_state[j].disp != Bound)
   1075                continue;
   1076             Int k = hregIndex(rreg_state[j].vreg);
   1077             vassert(IS_VALID_VREGNO(k));
   1078             vassert(vreg_state[k] == j);
   1079          }
   1080          for (Int j = 0; j < n_vregs; j++) {
   1081             Int k = vreg_state[j];
   1082             if (k == INVALID_RREG_NO)
   1083                continue;
   1084             vassert(IS_VALID_RREGNO(k));
   1085             vassert(rreg_state[k].disp == Bound);
   1086             vassert(hregIndex(rreg_state[k].vreg) == j);
   1087          }
   1088 
   1089       } /* if (do_sanity_check) */
   1090 
   1091       /* ------------ end of Sanity checks ------------ */
   1092 
   1093       /* Do various optimisations pertaining to register coalescing
   1094          and preferencing:
   1095             MOV  v <-> v   coalescing (done here).
   1096             MOV  v <-> r   coalescing (not yet, if ever)
   1097       */
   1098       /* If doing a reg-reg move between two vregs, and the src's live
   1099          range ends here and the dst's live range starts here, bind
   1100          the dst to the src's rreg, and that's all. */
   1101       HReg vregS = INVALID_HREG;
   1102       HReg vregD = INVALID_HREG;
   1103       if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
   1104          if (!hregIsVirtual(vregS)) goto cannot_coalesce;
   1105          if (!hregIsVirtual(vregD)) goto cannot_coalesce;
   1106          /* Check that *isMove is not telling us a bunch of lies ... */
   1107          vassert(hregClass(vregS) == hregClass(vregD));
   1108          Int k = hregIndex(vregS);
   1109          Int m = hregIndex(vregD);
   1110          vassert(IS_VALID_VREGNO(k));
   1111          vassert(IS_VALID_VREGNO(m));
   1112          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
   1113          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
   1114          if (DEBUG_REGALLOC) {
   1115          vex_printf("COALESCE ");
   1116             (*ppReg)(vregS);
   1117             vex_printf(" -> ");
   1118             (*ppReg)(vregD);
   1119             vex_printf("\n\n");
   1120          }
   1121          /* Find the state entry for vregS. */
   1122          Int n = vreg_state[k]; /* k is the index of vregS */
   1123          if (n == INVALID_RREG_NO) {
   1124             /* vregS is not currently in a real register.  So we can't
   1125                do the coalescing.  Give up. */
   1126             goto cannot_coalesce;
   1127          }
   1128          vassert(IS_VALID_RREGNO(n));
   1129 
   1130          /* Finally, we can do the coalescing.  It's trivial -- merely
   1131             claim vregS's register for vregD. */
   1132          rreg_state[n].vreg = vregD;
   1133          vassert(IS_VALID_VREGNO(hregIndex(vregD)));
   1134          vassert(IS_VALID_VREGNO(hregIndex(vregS)));
   1135          vreg_state[hregIndex(vregD)] = toShort(n);
   1136          vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
   1137 
   1138          /* This rreg has become associated with a different vreg and
   1139             hence with a different spill slot.  Play safe. */
   1140          rreg_state[n].eq_spill_slot = False;
   1141 
   1142          /* Move on to the next insn.  We skip the post-insn stuff for
   1143             fixed registers, since this move should not interact with
   1144             them in any way. */
   1145          continue;
   1146       }
   1147      cannot_coalesce:
   1148 
   1149       /* ------ Free up rregs bound to dead vregs ------ */
   1150 
   1151       /* Look for vregs whose live range has just ended, and
   1152 	 mark the associated rreg as free. */
   1153 
   1154       for (Int j = 0; j < n_rregs; j++) {
   1155          if (rreg_state[j].disp != Bound)
   1156             continue;
   1157          UInt vregno = hregIndex(rreg_state[j].vreg);
   1158          vassert(IS_VALID_VREGNO(vregno));
   1159          if (vreg_lrs[vregno].dead_before <= ii) {
   1160             rreg_state[j].disp = Free;
   1161             rreg_state[j].eq_spill_slot = False;
   1162             Int m = hregIndex(rreg_state[j].vreg);
   1163             vassert(IS_VALID_VREGNO(m));
   1164             vreg_state[m] = INVALID_RREG_NO;
   1165             if (DEBUG_REGALLOC) {
   1166                vex_printf("free up ");
   1167                (*ppReg)(univ->regs[j]);
   1168                vex_printf("\n");
   1169             }
   1170          }
   1171       }
   1172 
   1173       /* ------ Pre-instruction actions for fixed rreg uses ------ */
   1174 
   1175       /* Now we have to deal with rregs which are about to be made
   1176          live by this instruction -- in other words, are entering into
   1177          one of their live ranges.  If any such rreg holds a vreg, we
   1178          will have to free up the rreg.  The simplest solution which
   1179          is correct is to spill the rreg.
   1180 
   1181          Note we could do better:
   1182          * Could move it into some other free rreg, if one is available
   1183 
   1184          Do this efficiently, by incrementally stepping along an array
   1185          of rreg HLRs that are known to be sorted by start point
   1186          (their .live_after field).
   1187       */
   1188       while (True) {
   1189          vassert(rreg_lrs_la_next >= 0);
   1190          vassert(rreg_lrs_la_next <= rreg_lrs_used);
   1191          if (rreg_lrs_la_next == rreg_lrs_used)
   1192             break; /* no more real reg live ranges to consider */
   1193          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
   1194             break; /* next live range does not yet start */
   1195          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
   1196          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
   1197             Find the associated rreg_state entry. */
   1198          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
   1199             Real register live ranges are guaranteed to be well-formed
   1200             in that they start with a write to the register -- Stage 2
   1201             rejects any code not satisfying this.  So the correct
   1202             question to ask is whether
   1203             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
   1204             whether the reg becomes live after this insn -- rather
   1205             than before it. */
   1206          if (DEBUG_REGALLOC) {
   1207             vex_printf("need to free up rreg: ");
   1208             (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
   1209             vex_printf("\n\n");
   1210          }
   1211          Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
   1212 
   1213          /* If this fails, we don't have an entry for this rreg.
   1214             Which we should. */
   1215          vassert(IS_VALID_RREGNO(k));
   1216          Int m = hregIndex(rreg_state[k].vreg);
   1217          if (rreg_state[k].disp == Bound) {
   1218             /* Yes, there is an associated vreg.  Spill it if it's
   1219                still live. */
   1220             vassert(IS_VALID_VREGNO(m));
   1221             vreg_state[m] = INVALID_RREG_NO;
   1222             if (vreg_lrs[m].dead_before > ii) {
   1223                vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1224                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
   1225                   HInstr* spill1 = NULL;
   1226                   HInstr* spill2 = NULL;
   1227                   (*genSpill)( &spill1, &spill2, univ->regs[k],
   1228                                vreg_lrs[m].spill_offset, mode64 );
   1229                   vassert(spill1 || spill2); /* can't both be NULL */
   1230                   if (spill1)
   1231                      EMIT_INSTR(spill1);
   1232                   if (spill2)
   1233                      EMIT_INSTR(spill2);
   1234                }
   1235                rreg_state[k].eq_spill_slot = True;
   1236             }
   1237          }
   1238          rreg_state[k].disp = Unavail;
   1239          rreg_state[k].vreg = INVALID_HREG;
   1240          rreg_state[k].eq_spill_slot = False;
   1241 
   1242          /* check for further rregs entering HLRs at this point */
   1243          rreg_lrs_la_next++;
   1244       }
   1245 
   1246       if (DEBUG_REGALLOC) {
   1247          vex_printf("After pre-insn actions for fixed regs:\n");
   1248          PRINT_STATE;
   1249          vex_printf("\n");
   1250       }
   1251 
   1252       /* ------ Deal with the current instruction. ------ */
   1253 
   1254       /* Finally we can begin the processing of this instruction
   1255          itself.  The aim is to free up enough rregs for this insn.
   1256          This may generate spill stores since we may have to evict
   1257          some vregs currently in rregs.  Also generates spill loads.
   1258          We also build up the final vreg->rreg mapping to be applied
   1259          to the insn. */
   1260 
   1261       initHRegRemap(&remap);
   1262 
   1263       /* ------------ BEGIN directReload optimisation ----------- */
   1264 
   1265       /* If the instruction reads exactly one vreg which is currently
   1266          in a spill slot, and this is last use of that vreg, see if we
   1267          can convert the instruction into one that reads directly from
   1268          the spill slot.  This is clearly only possible for x86 and
   1269          amd64 targets, since ppc and arm are load-store
   1270          architectures.  If successful, replace instrs_in->arr[ii]
   1271          with this new instruction, and recompute its reg usage, so
   1272          that the change is invisible to the standard-case handling
   1273          that follows. */
   1274 
   1275       if (directReload && reg_usage_arr[ii].n_vRegs <= 2) {
   1276          Bool  debug_direct_reload = False;
   1277          HReg  cand     = INVALID_HREG;
   1278          Bool  nreads   = 0;
   1279          Short spilloff = 0;
   1280 
   1281          for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
   1282 
   1283             HReg vreg = reg_usage_arr[ii].vRegs[j];
   1284             vassert(hregIsVirtual(vreg));
   1285 
   1286             if (reg_usage_arr[ii].vMode[j] == HRmRead) {
   1287                nreads++;
   1288                Int m = hregIndex(vreg);
   1289                vassert(IS_VALID_VREGNO(m));
   1290                Int k = vreg_state[m];
   1291                if (!IS_VALID_RREGNO(k)) {
   1292                   /* ok, it is spilled.  Now, is this its last use? */
   1293                   vassert(vreg_lrs[m].dead_before >= ii+1);
   1294                   if (vreg_lrs[m].dead_before == ii+1
   1295                       && hregIsInvalid(cand)) {
   1296                      spilloff = vreg_lrs[m].spill_offset;
   1297                      cand = vreg;
   1298                   }
   1299                }
   1300             }
   1301          }
   1302 
   1303          if (nreads == 1 && ! hregIsInvalid(cand)) {
   1304             HInstr* reloaded;
   1305             if (reg_usage_arr[ii].n_vRegs == 2)
   1306                vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
   1307                                   reg_usage_arr[ii].vRegs[1]));
   1308 
   1309             reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
   1310             if (debug_direct_reload && !reloaded) {
   1311                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
   1312                ppInstr(instrs_in->arr[ii], mode64);
   1313             }
   1314             if (reloaded) {
   1315                /* Update info about the insn, so it looks as if it had
   1316                   been in this form all along. */
   1317                instrs_in->arr[ii] = reloaded;
   1318                (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
   1319                if (debug_direct_reload && !reloaded) {
   1320                   vex_printf("  -->  ");
   1321                   ppInstr(reloaded, mode64);
   1322                }
   1323             }
   1324 
   1325             if (debug_direct_reload && !reloaded)
   1326                vex_printf("\n");
   1327          }
   1328 
   1329       }
   1330 
   1331       /* ------------ END directReload optimisation ------------ */
   1332 
   1333       /* for each virtual reg mentioned in the insn ... */
   1334       for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
   1335 
   1336          HReg vreg = reg_usage_arr[ii].vRegs[j];
   1337          vassert(hregIsVirtual(vreg));
   1338 
   1339          if (0) {
   1340             vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
   1341          }
   1342 
   1343          /* Now we're trying to find a rreg for "vreg".  First of all,
   1344             if it already has an rreg assigned, we don't need to do
   1345             anything more.  Inspect the current state to find out. */
   1346          Int m = hregIndex(vreg);
   1347          vassert(IS_VALID_VREGNO(m));
   1348          Int n = vreg_state[m];
   1349          if (IS_VALID_RREGNO(n)) {
   1350             vassert(rreg_state[n].disp == Bound);
   1351             addToHRegRemap(&remap, vreg, univ->regs[n]);
   1352             /* If this rreg is written or modified, mark it as different
   1353                from any spill slot value. */
   1354             if (reg_usage_arr[ii].vMode[j] != HRmRead)
   1355                rreg_state[n].eq_spill_slot = False;
   1356             continue;
   1357          } else {
   1358             vassert(n == INVALID_RREG_NO);
   1359          }
   1360 
   1361          /* No luck.  The next thing to do is see if there is a
   1362             currently free rreg available, of the correct class.  If
   1363             so, bag it.  NOTE, we could improve this by selecting an
   1364             rreg for which the next live-range event is as far ahead
   1365             as possible. */
   1366          Int k_suboptimal = -1;
   1367          Int k;
   1368          for (k = 0; k < n_rregs; k++) {
   1369             if (rreg_state[k].disp != Free
   1370                 || hregClass(univ->regs[k]) != hregClass(vreg))
   1371                continue;
   1372             if (rreg_state[k].has_hlrs) {
   1373                /* Well, at least we can use k_suboptimal if we really
   1374                   have to.  Keep on looking for a better candidate. */
   1375                k_suboptimal = k;
   1376             } else {
   1377                /* Found a preferable reg.  Use it. */
   1378                k_suboptimal = -1;
   1379                break;
   1380             }
   1381          }
   1382          if (k_suboptimal >= 0)
   1383             k = k_suboptimal;
   1384 
   1385          if (k < n_rregs) {
   1386             rreg_state[k].disp = Bound;
   1387             rreg_state[k].vreg = vreg;
   1388             Int p = hregIndex(vreg);
   1389             vassert(IS_VALID_VREGNO(p));
   1390             vreg_state[p] = toShort(k);
   1391             addToHRegRemap(&remap, vreg, univ->regs[k]);
   1392             /* Generate a reload if needed.  This only creates needed
   1393                reloads because the live range builder for vregs will
   1394                guarantee that the first event for a vreg is a write.
   1395                Hence, if this reference is not a write, it cannot be
   1396                the first reference for this vreg, and so a reload is
   1397                indeed needed. */
   1398             if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
   1399                vassert(vreg_lrs[p].reg_class != HRcINVALID);
   1400                HInstr* reload1 = NULL;
   1401                HInstr* reload2 = NULL;
   1402                (*genReload)( &reload1, &reload2, univ->regs[k],
   1403                              vreg_lrs[p].spill_offset, mode64 );
   1404                vassert(reload1 || reload2); /* can't both be NULL */
   1405                if (reload1)
   1406                   EMIT_INSTR(reload1);
   1407                if (reload2)
   1408                   EMIT_INSTR(reload2);
   1409                /* This rreg is read or modified by the instruction.
   1410                   If it's merely read we can claim it now equals the
   1411                   spill slot, but not so if it is modified. */
   1412                if (reg_usage_arr[ii].vMode[j] == HRmRead) {
   1413                   rreg_state[k].eq_spill_slot = True;
   1414                } else {
   1415                   vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
   1416                   rreg_state[k].eq_spill_slot = False;
   1417                }
   1418             } else {
   1419                rreg_state[k].eq_spill_slot = False;
   1420             }
   1421 
   1422             continue;
   1423          }
   1424 
   1425          /* Well, now we have no option but to spill a vreg.  It's
   1426             important to make a good choice of vreg to spill, and of
   1427             course we need to be careful not to spill a vreg which is
   1428             needed by this insn. */
   1429 
   1430          /* First, mark in the rreg_state, those rregs which are not spill
   1431             candidates, due to holding a vreg mentioned by this
   1432             instruction.  Or being of the wrong class. */
   1433          for (k = 0; k < n_rregs; k++) {
   1434             rreg_state[k].is_spill_cand = False;
   1435             if (rreg_state[k].disp != Bound)
   1436                continue;
   1437             if (hregClass(univ->regs[k]) != hregClass(vreg))
   1438                continue;
   1439             rreg_state[k].is_spill_cand = True;
   1440             /* Note, the following loop visits only the virtual regs
   1441                mentioned by the instruction. */
   1442             for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
   1443                if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
   1444                   rreg_state[k].is_spill_cand = False;
   1445                   break;
   1446                }
   1447             }
   1448          }
   1449 
   1450          /* We can choose to spill any rreg satisfying
   1451             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
   1452             the next use of its associated vreg is as far ahead as
   1453             possible, in the hope that this will minimise the number
   1454             of consequent reloads required. */
   1455          Int spillee
   1456             = findMostDistantlyMentionedVReg (
   1457                  reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs );
   1458 
   1459          if (spillee == -1) {
   1460             /* Hmmmmm.  There don't appear to be any spill candidates.
   1461                We're hosed. */
   1462             vex_printf("reg_alloc: can't find a register in class: ");
   1463             ppHRegClass(hregClass(vreg));
   1464             vex_printf("\n");
   1465             vpanic("reg_alloc: can't create a free register.");
   1466          }
   1467 
   1468          /* Right.  So we're going to spill rreg_state[spillee]. */
   1469          vassert(IS_VALID_RREGNO(spillee));
   1470          vassert(rreg_state[spillee].disp == Bound);
   1471          /* check it's the right class */
   1472          vassert(hregClass(univ->regs[spillee]) == hregClass(vreg));
   1473          /* check we're not ejecting the vreg for which we are trying
   1474             to free up a register. */
   1475          vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
   1476 
   1477          m = hregIndex(rreg_state[spillee].vreg);
   1478          vassert(IS_VALID_VREGNO(m));
   1479 
   1480          /* So here's the spill store.  Assert that we're spilling a
   1481             live vreg. */
   1482          vassert(vreg_lrs[m].dead_before > ii);
   1483          vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1484          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
   1485             HInstr* spill1 = NULL;
   1486             HInstr* spill2 = NULL;
   1487             (*genSpill)( &spill1, &spill2, univ->regs[spillee],
   1488                          vreg_lrs[m].spill_offset, mode64 );
   1489             vassert(spill1 || spill2); /* can't both be NULL */
   1490             if (spill1)
   1491                EMIT_INSTR(spill1);
   1492             if (spill2)
   1493                EMIT_INSTR(spill2);
   1494          }
   1495 
   1496          /* Update the rreg_state to reflect the new assignment for this
   1497             rreg. */
   1498          rreg_state[spillee].vreg = vreg;
   1499          vreg_state[m] = INVALID_RREG_NO;
   1500 
   1501          rreg_state[spillee].eq_spill_slot = False; /* be safe */
   1502 
   1503          m = hregIndex(vreg);
   1504          vassert(IS_VALID_VREGNO(m));
   1505          vreg_state[m] = toShort(spillee);
   1506 
   1507          /* Now, if this vreg is being read or modified (as opposed to
   1508             written), we have to generate a reload for it. */
   1509          if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
   1510             vassert(vreg_lrs[m].reg_class != HRcINVALID);
   1511             HInstr* reload1 = NULL;
   1512             HInstr* reload2 = NULL;
   1513             (*genReload)( &reload1, &reload2, univ->regs[spillee],
   1514                           vreg_lrs[m].spill_offset, mode64 );
   1515             vassert(reload1 || reload2); /* can't both be NULL */
   1516             if (reload1)
   1517                EMIT_INSTR(reload1);
   1518             if (reload2)
   1519                EMIT_INSTR(reload2);
   1520             /* This rreg is read or modified by the instruction.
   1521                If it's merely read we can claim it now equals the
   1522                spill slot, but not so if it is modified. */
   1523             if (reg_usage_arr[ii].vMode[j] == HRmRead) {
   1524                rreg_state[spillee].eq_spill_slot = True;
   1525             } else {
   1526                vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
   1527                rreg_state[spillee].eq_spill_slot = False;
   1528             }
   1529          }
   1530 
   1531          /* So after much twisting and turning, we have vreg mapped to
   1532             rreg_state[spillee].rreg.  Note that in the map. */
   1533          addToHRegRemap(&remap, vreg, univ->regs[spillee]);
   1534 
   1535       } /* iterate over virtual registers in this instruction. */
   1536 
   1537       /* We've finished clowning around with registers in this instruction.
   1538          Three results:
   1539          - the running rreg_state[] has been updated
   1540          - a suitable vreg->rreg mapping for this instruction has been
   1541            constructed
   1542          - spill and reload instructions may have been emitted.
   1543 
   1544         The final step is to apply the mapping to the instruction,
   1545         and emit that.
   1546       */
   1547 
   1548       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
   1549       (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
   1550       EMIT_INSTR( instrs_in->arr[ii] );
   1551 
   1552       if (DEBUG_REGALLOC) {
   1553          vex_printf("After dealing with current insn:\n");
   1554          PRINT_STATE;
   1555          vex_printf("\n");
   1556       }
   1557 
   1558       /* ------ Post-instruction actions for fixed rreg uses ------ */
   1559 
   1560       /* Now we need to check for rregs exiting fixed live ranges
   1561          after this instruction, and if so mark them as free. */
   1562       while (True) {
   1563          vassert(rreg_lrs_db_next >= 0);
   1564          vassert(rreg_lrs_db_next <= rreg_lrs_used);
   1565          if (rreg_lrs_db_next == rreg_lrs_used)
   1566             break; /* no more real reg live ranges to consider */
   1567          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
   1568             break; /* next live range does not yet start */
   1569          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
   1570          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
   1571             range.  Mark it as such in the main rreg_state array. */
   1572          HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
   1573          vassert(!hregIsVirtual(reg));
   1574          Int k = hregIndex(reg);
   1575          vassert(IS_VALID_RREGNO(k));
   1576          vassert(rreg_state[k].disp == Unavail);
   1577          rreg_state[k].disp = Free;
   1578          rreg_state[k].vreg = INVALID_HREG;
   1579          rreg_state[k].eq_spill_slot = False;
   1580 
   1581          /* check for further rregs leaving HLRs at this point */
   1582          rreg_lrs_db_next++;
   1583       }
   1584 
   1585       if (DEBUG_REGALLOC) {
   1586          vex_printf("After post-insn actions for fixed regs:\n");
   1587          PRINT_STATE;
   1588          vex_printf("\n");
   1589       }
   1590 
   1591    } /* iterate over insns */
   1592 
   1593    /* ------ END: Process each insn in turn. ------ */
   1594 
   1595    /* free(rreg_state); */
   1596    /* free(rreg_lrs); */
   1597    /* if (vreg_lrs) free(vreg_lrs); */
   1598 
   1599    /* Paranoia */
   1600    vassert(rreg_lrs_la_next == rreg_lrs_used);
   1601    vassert(rreg_lrs_db_next == rreg_lrs_used);
   1602 
   1603    return instrs_out;
   1604 
   1605 #  undef INVALID_INSTRNO
   1606 #  undef EMIT_INSTR
   1607 #  undef PRINT_STATE
   1608 }
   1609 
   1610 
   1611 
   1612 /*---------------------------------------------------------------*/
   1613 /*---                                       host_reg_alloc2.c ---*/
   1614 /*---------------------------------------------------------------*/
   1615