Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                               guest_generic_bb_to_IR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2010 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 #include "main_util.h"
     40 #include "main_globals.h"
     41 #include "guest_generic_bb_to_IR.h"
     42 
     43 
     44 /* Forwards .. */
     45 __attribute__((regparm(2)))
     46 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s );
     47 __attribute__((regparm(1)))
     48 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
     49 __attribute__((regparm(1)))
     50 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
     51 __attribute__((regparm(1)))
     52 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
     53 __attribute__((regparm(1)))
     54 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
     55 __attribute__((regparm(1)))
     56 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
     57 __attribute__((regparm(1)))
     58 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
     59 __attribute__((regparm(1)))
     60 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
     61 __attribute__((regparm(1)))
     62 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
     63 __attribute__((regparm(1)))
     64 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
     65 __attribute__((regparm(1)))
     66 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
     67 __attribute__((regparm(1)))
     68 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
     69 __attribute__((regparm(1)))
     70 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
     71 
     72 /* Small helpers */
     73 static Bool const_False ( void* callback_opaque, Addr64 a ) {
     74    return False;
     75 }
     76 
     77 /* Disassemble a complete basic block, starting at guest_IP_start,
     78    returning a new IRSB.  The disassembler may chase across basic
     79    block boundaries if it wishes and if chase_into_ok allows it.
     80    The precise guest address ranges from which code has been taken
     81    are written into vge.  guest_IP_bbstart is taken to be the IP in
     82    the guest's address space corresponding to the instruction at
     83    &guest_code[0].
     84 
     85    dis_instr_fn is the arch-specific fn to disassemble on function; it
     86    is this that does the real work.
     87 
     88    do_self_check indicates that the caller needs a self-checking
     89    translation.
     90 
     91    preamble_function is a callback which allows the caller to add
     92    its own IR preamble (following the self-check, if any).  May be
     93    NULL.  If non-NULL, the IRSB under construction is handed to
     94    this function, which presumably adds IR statements to it.  The
     95    callback may optionally complete the block and direct bb_to_IR
     96    not to disassemble any instructions into it; this is indicated
     97    by the callback returning True.
     98 
     99    offB_TIADDR and offB_TILEN are the offsets of guest_TIADDR and
    100    guest_TILEN.  Since this routine has to work for any guest state,
    101    without knowing what it is, those offsets have to passed in.
    102 
    103    callback_opaque is a caller-supplied pointer to data which the
    104    callbacks may want to see.  Vex has no idea what it is.
    105    (In fact it's a VgInstrumentClosure.)
    106 */
    107 
    108 IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge,
    109                  /*IN*/ void*            callback_opaque,
    110                  /*IN*/ DisOneInstrFn    dis_instr_fn,
    111                  /*IN*/ UChar*           guest_code,
    112                  /*IN*/ Addr64           guest_IP_bbstart,
    113                  /*IN*/ Bool             (*chase_into_ok)(void*,Addr64),
    114                  /*IN*/ Bool             host_bigendian,
    115                  /*IN*/ VexArch          arch_guest,
    116                  /*IN*/ VexArchInfo*     archinfo_guest,
    117                  /*IN*/ VexAbiInfo*      abiinfo_both,
    118                  /*IN*/ IRType           guest_word_type,
    119                  /*IN*/ Bool             do_self_check,
    120                  /*IN*/ Bool             (*preamble_function)(void*,IRSB*),
    121                  /*IN*/ Int              offB_TISTART,
    122                  /*IN*/ Int              offB_TILEN )
    123 {
    124    Long       delta;
    125    Int        i, n_instrs, first_stmt_idx;
    126    Bool       resteerOK, need_to_put_IP, debug_print;
    127    DisResult  dres;
    128    IRStmt*    imark;
    129    static Int n_resteers = 0;
    130    Int        d_resteers = 0;
    131    Int        selfcheck_idx = 0;
    132    IRSB*      irsb;
    133    Addr64     guest_IP_curr_instr;
    134    IRConst*   guest_IP_bbstart_IRConst = NULL;
    135    Int        n_cond_resteers_allowed = 2;
    136 
    137    Bool (*resteerOKfn)(void*,Addr64) = NULL;
    138 
    139    debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
    140 
    141    /* Note: for adler32 to work without % operation for the self
    142       check, need to limit length of stuff it scans to 5552 bytes.
    143       Therefore limiting the max bb len to 100 insns seems generously
    144       conservative. */
    145 
    146    /* check sanity .. */
    147    vassert(sizeof(HWord) == sizeof(void*));
    148    vassert(vex_control.guest_max_insns >= 1);
    149    vassert(vex_control.guest_max_insns < 100);
    150    vassert(vex_control.guest_chase_thresh >= 0);
    151    vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
    152    vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
    153 
    154    /* Start a new, empty extent. */
    155    vge->n_used  = 1;
    156    vge->base[0] = guest_IP_bbstart;
    157    vge->len[0]  = 0;
    158 
    159    /* And a new IR superblock to dump the result into. */
    160    irsb = emptyIRSB();
    161 
    162    /* Delta keeps track of how far along the guest_code array we have
    163       so far gone. */
    164    delta    = 0;
    165    n_instrs = 0;
    166 
    167    /* Guest addresses as IRConsts.  Used in the two self-checks
    168       generated. */
    169    if (do_self_check) {
    170       guest_IP_bbstart_IRConst
    171          = guest_word_type==Ity_I32
    172               ? IRConst_U32(toUInt(guest_IP_bbstart))
    173               : IRConst_U64(guest_IP_bbstart);
    174    }
    175 
    176    /* If asked to make a self-checking translation, leave 5 spaces
    177       in which to put the check statements.  We'll fill them in later
    178       when we know the length and adler32 of the area to check. */
    179    if (do_self_check) {
    180       selfcheck_idx = irsb->stmts_used;
    181       addStmtToIRSB( irsb, IRStmt_NoOp() );
    182       addStmtToIRSB( irsb, IRStmt_NoOp() );
    183       addStmtToIRSB( irsb, IRStmt_NoOp() );
    184       addStmtToIRSB( irsb, IRStmt_NoOp() );
    185       addStmtToIRSB( irsb, IRStmt_NoOp() );
    186    }
    187 
    188    /* If the caller supplied a function to add its own preamble, use
    189       it now. */
    190    if (preamble_function) {
    191       Bool stopNow = preamble_function( callback_opaque, irsb );
    192       if (stopNow) {
    193          /* The callback has completed the IR block without any guest
    194             insns being disassembled into it, so just return it at
    195             this point, even if a self-check was requested - as there
    196             is nothing to self-check.  The five self-check no-ops will
    197             still be in place, but they are harmless. */
    198          return irsb;
    199       }
    200    }
    201 
    202    /* Process instructions. */
    203    while (True) {
    204       vassert(n_instrs < vex_control.guest_max_insns);
    205 
    206       /* Regardless of what chase_into_ok says, is chasing permissible
    207          at all right now?  Set resteerOKfn accordingly. */
    208       resteerOK
    209          = toBool(
    210               n_instrs < vex_control.guest_chase_thresh
    211               /* If making self-checking translations, don't chase
    212                  .. it makes the checks too complicated.  We only want
    213                  to scan just one sequence of bytes in the check, not
    214                  a whole bunch. */
    215               && !do_self_check
    216               /* we can't afford to have a resteer once we're on the
    217                  last extent slot. */
    218               && vge->n_used < 3
    219            );
    220 
    221       resteerOKfn
    222          = resteerOK ? chase_into_ok : const_False;
    223 
    224       /* n_cond_resteers_allowed keeps track of whether we're still
    225          allowing dis_instr_fn to chase conditional branches.  It
    226          starts (at 2) and gets decremented each time dis_instr_fn
    227          tells us it has chased a conditional branch.  We then
    228          decrement it, and use it to tell later calls to dis_instr_fn
    229          whether or not it is allowed to chase conditional
    230          branches. */
    231       vassert(n_cond_resteers_allowed >= 0 && n_cond_resteers_allowed <= 2);
    232 
    233       /* This is the IP of the instruction we're just about to deal
    234          with. */
    235       guest_IP_curr_instr = guest_IP_bbstart + delta;
    236 
    237       /* This is the irsb statement array index of the first stmt in
    238          this insn.  That will always be the instruction-mark
    239          descriptor. */
    240       first_stmt_idx = irsb->stmts_used;
    241 
    242       /* Add an instruction-mark statement.  We won't know until after
    243          disassembling the instruction how long it instruction is, so
    244          just put in a zero length and we'll fix it up later. */
    245       addStmtToIRSB( irsb, IRStmt_IMark( guest_IP_curr_instr, 0 ));
    246 
    247       /* for the first insn, the dispatch loop will have set
    248          %IP, but for all the others we have to do it ourselves. */
    249       need_to_put_IP = toBool(n_instrs > 0);
    250 
    251       /* Finally, actually disassemble an instruction. */
    252       dres = dis_instr_fn ( irsb,
    253                             need_to_put_IP,
    254                             resteerOKfn,
    255                             toBool(n_cond_resteers_allowed > 0),
    256                             callback_opaque,
    257                             guest_code,
    258                             delta,
    259                             guest_IP_curr_instr,
    260                             arch_guest,
    261                             archinfo_guest,
    262                             abiinfo_both,
    263                             host_bigendian );
    264 
    265       /* stay sane ... */
    266       vassert(dres.whatNext == Dis_StopHere
    267               || dres.whatNext == Dis_Continue
    268               || dres.whatNext == Dis_ResteerU
    269               || dres.whatNext == Dis_ResteerC);
    270       /* ... disassembled insn length is sane ... */
    271       vassert(dres.len >= 0 && dres.len <= 20);
    272       /* ... continueAt is zero if no resteer requested ... */
    273       if (dres.whatNext != Dis_ResteerU && dres.whatNext != Dis_ResteerC)
    274          vassert(dres.continueAt == 0);
    275       /* ... if we disallowed conditional resteers, check that one
    276              didn't actually happen anyway ... */
    277       if (n_cond_resteers_allowed == 0)
    278          vassert(dres.whatNext != Dis_ResteerC);
    279 
    280       /* Fill in the insn-mark length field. */
    281       vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used);
    282       imark = irsb->stmts[first_stmt_idx];
    283       vassert(imark);
    284       vassert(imark->tag == Ist_IMark);
    285       vassert(imark->Ist.IMark.len == 0);
    286       imark->Ist.IMark.len = toUInt(dres.len);
    287 
    288       /* Print the resulting IR, if needed. */
    289       if (vex_traceflags & VEX_TRACE_FE) {
    290          for (i = first_stmt_idx; i < irsb->stmts_used; i++) {
    291             vex_printf("              ");
    292             ppIRStmt(irsb->stmts[i]);
    293             vex_printf("\n");
    294          }
    295       }
    296 
    297       /* If dis_instr_fn terminated the BB at this point, check it
    298          also filled in the irsb->next field. */
    299       if (dres.whatNext == Dis_StopHere) {
    300          vassert(irsb->next != NULL);
    301          if (debug_print) {
    302             vex_printf("              ");
    303             vex_printf( "goto {");
    304             ppIRJumpKind(irsb->jumpkind);
    305             vex_printf( "} ");
    306             ppIRExpr( irsb->next );
    307             vex_printf( "\n");
    308          }
    309       }
    310 
    311       /* Update the VexGuestExtents we are constructing. */
    312       /* If vex_control.guest_max_insns is required to be < 100 and
    313          each insn is at max 20 bytes long, this limit of 5000 then
    314          seems reasonable since the max possible extent length will be
    315          100 * 20 == 2000. */
    316       vassert(vge->len[vge->n_used-1] < 5000);
    317       vge->len[vge->n_used-1]
    318          = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
    319       n_instrs++;
    320       if (debug_print)
    321          vex_printf("\n");
    322 
    323       /* Advance delta (inconspicuous but very important :-) */
    324       delta += (Long)dres.len;
    325 
    326       switch (dres.whatNext) {
    327          case Dis_Continue:
    328             vassert(irsb->next == NULL);
    329             if (n_instrs < vex_control.guest_max_insns) {
    330                /* keep going */
    331             } else {
    332                /* We have to stop. */
    333                irsb->next
    334                   = IRExpr_Const(
    335                        guest_word_type == Ity_I32
    336                           ? IRConst_U32(toUInt(guest_IP_bbstart+delta))
    337                           : IRConst_U64(guest_IP_bbstart+delta)
    338                     );
    339                goto done;
    340             }
    341             break;
    342          case Dis_StopHere:
    343             vassert(irsb->next != NULL);
    344             goto done;
    345          case Dis_ResteerU:
    346          case Dis_ResteerC:
    347             /* Check that we actually allowed a resteer .. */
    348             vassert(resteerOK);
    349             vassert(irsb->next == NULL);
    350             if (dres.whatNext == Dis_ResteerC) {
    351                vassert(n_cond_resteers_allowed > 0);
    352                n_cond_resteers_allowed--;
    353             }
    354             /* figure out a new delta to continue at. */
    355             vassert(resteerOKfn(callback_opaque,dres.continueAt));
    356             delta = dres.continueAt - guest_IP_bbstart;
    357             /* we now have to start a new extent slot. */
    358             vge->n_used++;
    359             vassert(vge->n_used <= 3);
    360             vge->base[vge->n_used-1] = dres.continueAt;
    361             vge->len[vge->n_used-1] = 0;
    362             n_resteers++;
    363             d_resteers++;
    364             if (0 && (n_resteers & 0xFF) == 0)
    365             vex_printf("resteer[%d,%d] to 0x%llx (delta = %lld)\n",
    366                        n_resteers, d_resteers,
    367                        dres.continueAt, delta);
    368             break;
    369          default:
    370             vpanic("bb_to_IR");
    371       }
    372    }
    373    /*NOTREACHED*/
    374    vassert(0);
    375 
    376   done:
    377    /* We're done.  The only thing that might need attending to is that
    378       a self-checking preamble may need to be created.
    379 
    380       The scheme is to compute a rather crude checksum of the code
    381       we're making a translation of, and add to the IR a call to a
    382       helper routine which recomputes the checksum every time the
    383       translation is run, and requests a retranslation if it doesn't
    384       match.  This is obviously very expensive and considerable
    385       efforts are made to speed it up:
    386 
    387       * the checksum is computed from all the 32-bit words that
    388         overlap the translated code.  That means it could depend on up
    389         to 3 bytes before and 3 bytes after which aren't part of the
    390         translated area, and so if those change then we'll
    391         unnecessarily have to discard and retranslate.  This seems
    392         like a pretty remote possibility and it seems as if the
    393         benefit of not having to deal with the ends of the range at
    394         byte precision far outweigh any possible extra translations
    395         needed.
    396 
    397       * there's a generic routine and 12 specialised cases, which
    398         handle the cases of 1 through 12-word lengths respectively.
    399         They seem to cover about 90% of the cases that occur in
    400         practice.
    401    */
    402    if (do_self_check) {
    403 
    404       UInt     len2check, expected32;
    405       IRTemp   tistart_tmp, tilen_tmp;
    406       UInt     (*fn_generic)(HWord, HWord) __attribute__((regparm(2)));
    407       UInt     (*fn_spec)(HWord) __attribute__((regparm(1)));
    408       HChar*   nm_generic;
    409       HChar*   nm_spec;
    410       HWord    fn_generic_entry = 0;
    411       HWord    fn_spec_entry = 0;
    412 
    413       vassert(vge->n_used == 1);
    414       len2check = vge->len[0];
    415 
    416       /* stay sane */
    417       vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
    418 
    419       /* Skip the check if the translation involved zero bytes */
    420       if (len2check > 0) {
    421          HWord first_w32 = ((HWord)guest_code) & ~(HWord)3;
    422          HWord last_w32  = (((HWord)guest_code) + len2check - 1) & ~(HWord)3;
    423          vassert(first_w32 <= last_w32);
    424          HWord w32_diff = last_w32 - first_w32;
    425          vassert(0 == (w32_diff & 3));
    426          HWord w32s_to_check = (w32_diff + 4) / 4;
    427          vassert(w32s_to_check > 0 && w32s_to_check < 1004/*arbitrary*//4);
    428 
    429          /* vex_printf("%lx %lx  %ld\n", first_w32, last_w32, w32s_to_check); */
    430 
    431          fn_generic =  genericg_compute_checksum_4al;
    432          nm_generic = "genericg_compute_checksum_4al";
    433          fn_spec = NULL;
    434          nm_spec = NULL;
    435 
    436          switch (w32s_to_check) {
    437              case 1:  fn_spec =  genericg_compute_checksum_4al_1;
    438                       nm_spec = "genericg_compute_checksum_4al_1"; break;
    439              case 2:  fn_spec =  genericg_compute_checksum_4al_2;
    440                       nm_spec = "genericg_compute_checksum_4al_2"; break;
    441              case 3:  fn_spec =  genericg_compute_checksum_4al_3;
    442                       nm_spec = "genericg_compute_checksum_4al_3"; break;
    443              case 4:  fn_spec =  genericg_compute_checksum_4al_4;
    444                       nm_spec = "genericg_compute_checksum_4al_4"; break;
    445              case 5:  fn_spec =  genericg_compute_checksum_4al_5;
    446                       nm_spec = "genericg_compute_checksum_4al_5"; break;
    447              case 6:  fn_spec =  genericg_compute_checksum_4al_6;
    448                       nm_spec = "genericg_compute_checksum_4al_6"; break;
    449              case 7:  fn_spec =  genericg_compute_checksum_4al_7;
    450                       nm_spec = "genericg_compute_checksum_4al_7"; break;
    451              case 8:  fn_spec =  genericg_compute_checksum_4al_8;
    452                       nm_spec = "genericg_compute_checksum_4al_8"; break;
    453              case 9:  fn_spec =  genericg_compute_checksum_4al_9;
    454                       nm_spec = "genericg_compute_checksum_4al_9"; break;
    455              case 10: fn_spec =  genericg_compute_checksum_4al_10;
    456                       nm_spec = "genericg_compute_checksum_4al_10"; break;
    457              case 11: fn_spec =  genericg_compute_checksum_4al_11;
    458                       nm_spec = "genericg_compute_checksum_4al_11"; break;
    459              case 12: fn_spec =  genericg_compute_checksum_4al_12;
    460                       nm_spec = "genericg_compute_checksum_4al_12"; break;
    461              default: break;
    462          }
    463 
    464          expected32 = fn_generic( first_w32, w32s_to_check );
    465          /* If we got a specialised version, check it produces the same
    466             result as the generic version! */
    467          if (fn_spec) {
    468             vassert(nm_spec);
    469             vassert(expected32 == fn_spec( first_w32 ));
    470          } else {
    471             vassert(!nm_spec);
    472          }
    473 
    474          /* Set TISTART and TILEN.  These will describe to the despatcher
    475             the area of guest code to invalidate should we exit with a
    476             self-check failure. */
    477 
    478          tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
    479          tilen_tmp   = newIRTemp(irsb->tyenv, guest_word_type);
    480 
    481          irsb->stmts[selfcheck_idx+0]
    482             = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(guest_IP_bbstart_IRConst) );
    483 
    484          irsb->stmts[selfcheck_idx+1]
    485             = IRStmt_WrTmp(tilen_tmp,
    486                            guest_word_type==Ity_I32
    487                               ? IRExpr_Const(IRConst_U32(len2check))
    488                               : IRExpr_Const(IRConst_U64(len2check))
    489               );
    490 
    491          irsb->stmts[selfcheck_idx+2]
    492             = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) );
    493 
    494          irsb->stmts[selfcheck_idx+3]
    495             = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
    496 
    497          /* Generate the entry point descriptors */
    498          if (abiinfo_both->host_ppc_calls_use_fndescrs) {
    499             HWord* descr = (HWord*)fn_generic;
    500             fn_generic_entry = descr[0];
    501             if (fn_spec) {
    502                descr = (HWord*)fn_spec;
    503                fn_spec_entry = descr[0];
    504             } else {
    505                fn_spec_entry = (HWord)NULL;
    506             }
    507          } else {
    508             fn_generic_entry = (HWord)fn_generic;
    509             if (fn_spec) {
    510                fn_spec_entry = (HWord)fn_spec;
    511             } else {
    512                fn_spec_entry = (HWord)NULL;
    513             }
    514          }
    515 
    516          IRExpr* callexpr = NULL;
    517          if (fn_spec) {
    518             callexpr = mkIRExprCCall(
    519                           Ity_I32, 1/*regparms*/,
    520                           nm_spec, (void*)fn_spec_entry,
    521                           mkIRExprVec_1(
    522                              mkIRExpr_HWord( (HWord)first_w32 )
    523                           )
    524                        );
    525          } else {
    526             callexpr = mkIRExprCCall(
    527                           Ity_I32, 2/*regparms*/,
    528                           nm_generic, (void*)fn_generic_entry,
    529                           mkIRExprVec_2(
    530                              mkIRExpr_HWord( (HWord)first_w32 ),
    531                              mkIRExpr_HWord( (HWord)w32s_to_check )
    532                           )
    533                        );
    534          }
    535 
    536          irsb->stmts[selfcheck_idx+4]
    537             = IRStmt_Exit(
    538                  IRExpr_Binop(
    539                     Iop_CmpNE32,
    540                     callexpr,
    541                     IRExpr_Const(IRConst_U32(expected32))
    542                  ),
    543                  Ijk_TInval,
    544                  guest_IP_bbstart_IRConst
    545               );
    546       }
    547    }
    548 
    549    return irsb;
    550 }
    551 
    552 
    553 /*-------------------------------------------------------------
    554   A support routine for doing self-checking translations.
    555   -------------------------------------------------------------*/
    556 
    557 /* CLEAN HELPER */
    558 /* CALLED FROM GENERATED CODE */
    559 
    560 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
    561    as possible.  The _4al_4plus version is assured that the request is
    562    for 4-aligned memory and for a block of 4 or more long, whilst the
    563    _generic version must be able to handle any alignment, and lengths
    564    down to zero too.  This fn is called once for every use of a
    565    self-checking translation, so it needs to be as fast as
    566    possible. */
    567 
    568 static inline UInt ROL32 ( UInt w, Int n ) {
    569    w = (w << n) | (w >> (32-n));
    570    return w;
    571 }
    572 
    573 __attribute((regparm(2)))
    574 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
    575 {
    576    UInt  sum1 = 0, sum2 = 0;
    577    UInt* p = (UInt*)first_w32;
    578    /* unrolled */
    579    while (n_w32s >= 4) {
    580       UInt  w;
    581       w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    582       w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    583       w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    584       w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    585       p += 4;
    586       n_w32s -= 4;
    587       sum1 ^= sum2;
    588    }
    589    while (n_w32s >= 1) {
    590       UInt  w;
    591       w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    592       p += 1;
    593       n_w32s -= 1;
    594       sum1 ^= sum2;
    595    }
    596    return sum1 + sum2;
    597 }
    598 
    599 /* Specialised versions of the above function */
    600 
    601 __attribute__((regparm(1)))
    602 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
    603 {
    604    UInt  sum1 = 0, sum2 = 0;
    605    UInt* p = (UInt*)first_w32;
    606    UInt  w;
    607    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    608    sum1 ^= sum2;
    609    return sum1 + sum2;
    610 }
    611 
    612 __attribute__((regparm(1)))
    613 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
    614 {
    615    UInt  sum1 = 0, sum2 = 0;
    616    UInt* p = (UInt*)first_w32;
    617    UInt  w;
    618    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    619    sum1 ^= sum2;
    620    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    621    sum1 ^= sum2;
    622    return sum1 + sum2;
    623 }
    624 
    625 __attribute__((regparm(1)))
    626 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
    627 {
    628    UInt  sum1 = 0, sum2 = 0;
    629    UInt* p = (UInt*)first_w32;
    630    UInt  w;
    631    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    632    sum1 ^= sum2;
    633    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    634    sum1 ^= sum2;
    635    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    636    sum1 ^= sum2;
    637    return sum1 + sum2;
    638 }
    639 
    640 __attribute__((regparm(1)))
    641 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
    642 {
    643    UInt  sum1 = 0, sum2 = 0;
    644    UInt* p = (UInt*)first_w32;
    645    UInt  w;
    646    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    647    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    648    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    649    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    650    sum1 ^= sum2;
    651    return sum1 + sum2;
    652 }
    653 
    654 __attribute__((regparm(1)))
    655 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
    656 {
    657    UInt  sum1 = 0, sum2 = 0;
    658    UInt* p = (UInt*)first_w32;
    659    UInt  w;
    660    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    661    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    662    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    663    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    664    sum1 ^= sum2;
    665    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    666    sum1 ^= sum2;
    667    return sum1 + sum2;
    668 }
    669 
    670 __attribute__((regparm(1)))
    671 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
    672 {
    673    UInt  sum1 = 0, sum2 = 0;
    674    UInt* p = (UInt*)first_w32;
    675    UInt  w;
    676    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    677    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    678    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    679    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    680    sum1 ^= sum2;
    681    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    682    sum1 ^= sum2;
    683    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    684    sum1 ^= sum2;
    685    return sum1 + sum2;
    686 }
    687 
    688 __attribute__((regparm(1)))
    689 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
    690 {
    691    UInt  sum1 = 0, sum2 = 0;
    692    UInt* p = (UInt*)first_w32;
    693    UInt  w;
    694    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    695    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    696    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    697    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    698    sum1 ^= sum2;
    699    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    700    sum1 ^= sum2;
    701    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    702    sum1 ^= sum2;
    703    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    704    sum1 ^= sum2;
    705    return sum1 + sum2;
    706 }
    707 
    708 __attribute__((regparm(1)))
    709 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
    710 {
    711    UInt  sum1 = 0, sum2 = 0;
    712    UInt* p = (UInt*)first_w32;
    713    UInt  w;
    714    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    715    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    716    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    717    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    718    sum1 ^= sum2;
    719    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    720    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    721    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    722    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    723    sum1 ^= sum2;
    724    return sum1 + sum2;
    725 }
    726 
    727 __attribute__((regparm(1)))
    728 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
    729 {
    730    UInt  sum1 = 0, sum2 = 0;
    731    UInt* p = (UInt*)first_w32;
    732    UInt  w;
    733    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    734    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    735    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    736    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    737    sum1 ^= sum2;
    738    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    739    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    740    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    741    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    742    sum1 ^= sum2;
    743    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    744    sum1 ^= sum2;
    745    return sum1 + sum2;
    746 }
    747 
    748 __attribute__((regparm(1)))
    749 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
    750 {
    751    UInt  sum1 = 0, sum2 = 0;
    752    UInt* p = (UInt*)first_w32;
    753    UInt  w;
    754    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    755    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    756    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    757    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    758    sum1 ^= sum2;
    759    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    760    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    761    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    762    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    763    sum1 ^= sum2;
    764    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    765    sum1 ^= sum2;
    766    w = p[9];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    767    sum1 ^= sum2;
    768    return sum1 + sum2;
    769 }
    770 
    771 __attribute__((regparm(1)))
    772 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
    773 {
    774    UInt  sum1 = 0, sum2 = 0;
    775    UInt* p = (UInt*)first_w32;
    776    UInt  w;
    777    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    778    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    779    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    780    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    781    sum1 ^= sum2;
    782    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    783    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    784    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    785    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    786    sum1 ^= sum2;
    787    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    788    sum1 ^= sum2;
    789    w = p[9];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    790    sum1 ^= sum2;
    791    w = p[10]; sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    792    sum1 ^= sum2;
    793    return sum1 + sum2;
    794 }
    795 
    796 __attribute__((regparm(1)))
    797 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
    798 {
    799    UInt  sum1 = 0, sum2 = 0;
    800    UInt* p = (UInt*)first_w32;
    801    UInt  w;
    802    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    803    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    804    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    805    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    806    sum1 ^= sum2;
    807    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    808    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    809    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    810    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    811    sum1 ^= sum2;
    812    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    813    w = p[9];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    814    w = p[10]; sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    815    w = p[11]; sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
    816    sum1 ^= sum2;
    817    return sum1 + sum2;
    818 }
    819 
    820 /*--------------------------------------------------------------------*/
    821 /*--- end                                 guest_generic_bb_to_IR.c ---*/
    822 /*--------------------------------------------------------------------*/
    823