Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                               guest_x86_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_emwarn.h"
     38 #include "libvex_guest_x86.h"
     39 #include "libvex_ir.h"
     40 #include "libvex.h"
     41 
     42 #include "main_util.h"
     43 #include "guest_generic_bb_to_IR.h"
     44 #include "guest_x86_defs.h"
     45 #include "guest_generic_x87.h"
     46 
     47 
     48 /* This file contains helper functions for x86 guest code.
     49    Calls to these functions are generated by the back end.
     50    These calls are of course in the host machine code and
     51    this file will be compiled to host machine code, so that
     52    all makes sense.
     53 
     54    Only change the signatures of these helper functions very
     55    carefully.  If you change the signature here, you'll have to change
     56    the parameters passed to it in the IR calls constructed by
     57    guest-x86/toIR.c.
     58 
     59    The convention used is that all functions called from generated
     60    code are named x86g_<something>, and any function whose name lacks
     61    that prefix is not called from generated code.  Note that some
     62    LibVEX_* functions can however be called by VEX's client, but that
     63    is not the same as calling them from VEX-generated code.
     64 */
     65 
     66 
     67 /* Set to 1 to get detailed profiling info about use of the flag
     68    machinery. */
     69 #define PROFILE_EFLAGS 0
     70 
     71 
     72 /*---------------------------------------------------------------*/
     73 /*--- %eflags run-time helpers.                               ---*/
     74 /*---------------------------------------------------------------*/
     75 
     76 static const UChar parity_table[256] = {
     77     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     78     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     80     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     81     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     82     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     84     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     86     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     88     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     89     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     90     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     92     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     93     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     94     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     96     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     97     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     98     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    100     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    102     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    104     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    105     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    106     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    108     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    109 };
    110 
    111 /* generalised left-shifter */
    112 inline static Int lshift ( Int x, Int n )
    113 {
    114    if (n >= 0)
    115       return x << n;
    116    else
    117       return x >> (-n);
    118 }
    119 
    120 /* identity on ULong */
    121 static inline ULong idULong ( ULong x )
    122 {
    123    return x;
    124 }
    125 
    126 
    127 #define PREAMBLE(__data_bits)					\
    128    /* const */ UInt DATA_MASK 					\
    129       = __data_bits==8 ? 0xFF 					\
    130                        : (__data_bits==16 ? 0xFFFF 		\
    131                                           : 0xFFFFFFFF); 	\
    132    /* const */ UInt SIGN_MASK = 1 << (__data_bits - 1);		\
    133    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
    134    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
    135    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
    136    /* Four bogus assignments, which hopefully gcc can     */	\
    137    /* optimise away, and which stop it complaining about  */	\
    138    /* unused variables.                                   */	\
    139    SIGN_MASK = SIGN_MASK;					\
    140    DATA_MASK = DATA_MASK;					\
    141    CC_DEP2 = CC_DEP2;						\
    142    CC_NDEP = CC_NDEP;
    143 
    144 
    145 /*-------------------------------------------------------------*/
    146 
    147 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
    148 {								\
    149    PREAMBLE(DATA_BITS);						\
    150    { Int cf, pf, af, zf, sf, of;				\
    151      Int argL, argR, res;					\
    152      argL = CC_DEP1;						\
    153      argR = CC_DEP2;						\
    154      res  = argL + argR;					\
    155      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
    156      pf = parity_table[(UChar)res];				\
    157      af = (res ^ argL ^ argR) & 0x10;				\
    158      zf = ((DATA_UTYPE)res == 0) << 6;				\
    159      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    160      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    161                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    162      return cf | pf | af | zf | sf | of;			\
    163    }								\
    164 }
    165 
    166 /*-------------------------------------------------------------*/
    167 
    168 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
    169 {								\
    170    PREAMBLE(DATA_BITS);						\
    171    { Int cf, pf, af, zf, sf, of;				\
    172      Int argL, argR, res;					\
    173      argL = CC_DEP1;						\
    174      argR = CC_DEP2;						\
    175      res  = argL - argR;					\
    176      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
    177      pf = parity_table[(UChar)res];				\
    178      af = (res ^ argL ^ argR) & 0x10;				\
    179      zf = ((DATA_UTYPE)res == 0) << 6;				\
    180      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    181      of = lshift((argL ^ argR) & (argL ^ res),	 		\
    182                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
    183      return cf | pf | af | zf | sf | of;			\
    184    }								\
    185 }
    186 
    187 /*-------------------------------------------------------------*/
    188 
    189 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
    190 {								\
    191    PREAMBLE(DATA_BITS);						\
    192    { Int cf, pf, af, zf, sf, of;				\
    193      Int argL, argR, oldC, res;		       			\
    194      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    195      argL = CC_DEP1;						\
    196      argR = CC_DEP2 ^ oldC;	       				\
    197      res  = (argL + argR) + oldC;				\
    198      if (oldC)							\
    199         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
    200      else							\
    201         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
    202      pf = parity_table[(UChar)res];				\
    203      af = (res ^ argL ^ argR) & 0x10;				\
    204      zf = ((DATA_UTYPE)res == 0) << 6;				\
    205      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    206      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    207                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
    208      return cf | pf | af | zf | sf | of;			\
    209    }								\
    210 }
    211 
    212 /*-------------------------------------------------------------*/
    213 
    214 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
    215 {								\
    216    PREAMBLE(DATA_BITS);						\
    217    { Int cf, pf, af, zf, sf, of;				\
    218      Int argL, argR, oldC, res;		       			\
    219      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    220      argL = CC_DEP1;						\
    221      argR = CC_DEP2 ^ oldC;	       				\
    222      res  = (argL - argR) - oldC;				\
    223      if (oldC)							\
    224         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
    225      else							\
    226         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
    227      pf = parity_table[(UChar)res];				\
    228      af = (res ^ argL ^ argR) & 0x10;				\
    229      zf = ((DATA_UTYPE)res == 0) << 6;				\
    230      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    231      of = lshift((argL ^ argR) & (argL ^ res), 			\
    232                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    233      return cf | pf | af | zf | sf | of;			\
    234    }								\
    235 }
    236 
    237 /*-------------------------------------------------------------*/
    238 
    239 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
    240 {								\
    241    PREAMBLE(DATA_BITS);						\
    242    { Int cf, pf, af, zf, sf, of;				\
    243      cf = 0;							\
    244      pf = parity_table[(UChar)CC_DEP1];				\
    245      af = 0;							\
    246      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    247      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    248      of = 0;							\
    249      return cf | pf | af | zf | sf | of;			\
    250    }								\
    251 }
    252 
    253 /*-------------------------------------------------------------*/
    254 
    255 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
    256 {								\
    257    PREAMBLE(DATA_BITS);						\
    258    { Int cf, pf, af, zf, sf, of;				\
    259      Int argL, argR, res;					\
    260      res  = CC_DEP1;						\
    261      argL = res - 1;						\
    262      argR = 1;							\
    263      cf = CC_NDEP & X86G_CC_MASK_C;				\
    264      pf = parity_table[(UChar)res];				\
    265      af = (res ^ argL ^ argR) & 0x10;				\
    266      zf = ((DATA_UTYPE)res == 0) << 6;				\
    267      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    268      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
    269      return cf | pf | af | zf | sf | of;			\
    270    }								\
    271 }
    272 
    273 /*-------------------------------------------------------------*/
    274 
    275 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
    276 {								\
    277    PREAMBLE(DATA_BITS);						\
    278    { Int cf, pf, af, zf, sf, of;				\
    279      Int argL, argR, res;					\
    280      res  = CC_DEP1;						\
    281      argL = res + 1;						\
    282      argR = 1;							\
    283      cf = CC_NDEP & X86G_CC_MASK_C;				\
    284      pf = parity_table[(UChar)res];				\
    285      af = (res ^ argL ^ argR) & 0x10;				\
    286      zf = ((DATA_UTYPE)res == 0) << 6;				\
    287      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    288      of = ((res & DATA_MASK) 					\
    289           == ((UInt)SIGN_MASK - 1)) << 11;			\
    290      return cf | pf | af | zf | sf | of;			\
    291    }								\
    292 }
    293 
    294 /*-------------------------------------------------------------*/
    295 
    296 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
    297 {								\
    298    PREAMBLE(DATA_BITS);						\
    299    { Int cf, pf, af, zf, sf, of;				\
    300      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
    301      pf = parity_table[(UChar)CC_DEP1];				\
    302      af = 0; /* undefined */					\
    303      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    304      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    305      /* of is defined if shift count == 1 */			\
    306      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
    307           & X86G_CC_MASK_O;					\
    308      return cf | pf | af | zf | sf | of;			\
    309    }								\
    310 }
    311 
    312 /*-------------------------------------------------------------*/
    313 
    314 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
    315 {								\
    316    PREAMBLE(DATA_BITS);  					\
    317    { Int cf, pf, af, zf, sf, of;				\
    318      cf = CC_DEP2 & 1;						\
    319      pf = parity_table[(UChar)CC_DEP1];				\
    320      af = 0; /* undefined */					\
    321      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    322      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    323      /* of is defined if shift count == 1 */			\
    324      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
    325           & X86G_CC_MASK_O;					\
    326      return cf | pf | af | zf | sf | of;			\
    327    }								\
    328 }
    329 
    330 /*-------------------------------------------------------------*/
    331 
    332 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
    333 /* DEP1 = result, NDEP = old flags */
    334 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
    335 {								\
    336    PREAMBLE(DATA_BITS);						\
    337    { Int fl 							\
    338         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    339           | (X86G_CC_MASK_C & CC_DEP1)				\
    340           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
    341                                       11-(DATA_BITS-1)) 	\
    342                      ^ lshift(CC_DEP1, 11)));			\
    343      return fl;							\
    344    }								\
    345 }
    346 
    347 /*-------------------------------------------------------------*/
    348 
    349 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
    350 /* DEP1 = result, NDEP = old flags */
    351 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
    352 {								\
    353    PREAMBLE(DATA_BITS);						\
    354    { Int fl 							\
    355         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    356           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
    357           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
    358                                       11-(DATA_BITS-1)) 	\
    359                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
    360      return fl;							\
    361    }								\
    362 }
    363 
    364 /*-------------------------------------------------------------*/
    365 
    366 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
    367                                 DATA_U2TYPE, NARROWto2U)        \
    368 {                                                               \
    369    PREAMBLE(DATA_BITS);                                         \
    370    { Int cf, pf, af, zf, sf, of;                                \
    371      DATA_UTYPE  hi;                                            \
    372      DATA_UTYPE  lo                                             \
    373         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
    374                      * ((DATA_UTYPE)CC_DEP2) );                 \
    375      DATA_U2TYPE rr                                             \
    376         = NARROWto2U(                                           \
    377              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
    378              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
    379      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
    380      cf = (hi != 0);                                            \
    381      pf = parity_table[(UChar)lo];                              \
    382      af = 0; /* undefined */                                    \
    383      zf = (lo == 0) << 6;                                       \
    384      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    385      of = cf << 11;                                             \
    386      return cf | pf | af | zf | sf | of;                        \
    387    }								\
    388 }
    389 
    390 /*-------------------------------------------------------------*/
    391 
    392 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
    393                                 DATA_S2TYPE, NARROWto2S)        \
    394 {                                                               \
    395    PREAMBLE(DATA_BITS);                                         \
    396    { Int cf, pf, af, zf, sf, of;                                \
    397      DATA_STYPE  hi;                                            \
    398      DATA_STYPE  lo                                             \
    399         = NARROWtoS( ((DATA_STYPE)CC_DEP1)                      \
    400                      * ((DATA_STYPE)CC_DEP2) );                 \
    401      DATA_S2TYPE rr                                             \
    402         = NARROWto2S(                                           \
    403              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
    404              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
    405      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
    406      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
    407      pf = parity_table[(UChar)lo];                              \
    408      af = 0; /* undefined */                                    \
    409      zf = (lo == 0) << 6;                                       \
    410      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    411      of = cf << 11;                                             \
    412      return cf | pf | af | zf | sf | of;                        \
    413    }								\
    414 }
    415 
    416 
    417 #if PROFILE_EFLAGS
    418 
    419 static Bool initted     = False;
    420 
    421 /* C flag, fast route */
    422 static UInt tabc_fast[X86G_CC_OP_NUMBER];
    423 /* C flag, slow route */
    424 static UInt tabc_slow[X86G_CC_OP_NUMBER];
    425 /* table for calculate_cond */
    426 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
    427 /* total entry counts for calc_all, calc_c, calc_cond. */
    428 static UInt n_calc_all  = 0;
    429 static UInt n_calc_c    = 0;
    430 static UInt n_calc_cond = 0;
    431 
    432 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
    433 
    434 
    435 static void showCounts ( void )
    436 {
    437    Int op, co;
    438    Char ch;
    439    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
    440               n_calc_all, n_calc_cond, n_calc_c);
    441 
    442    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
    443               "    S   NS    P   NP    L   NL   LE  NLE\n");
    444    vex_printf("     -----------------------------------------------------"
    445               "----------------------------------------\n");
    446    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    447 
    448       ch = ' ';
    449       if (op > 0 && (op-1) % 3 == 0)
    450          ch = 'B';
    451       if (op > 0 && (op-1) % 3 == 1)
    452          ch = 'W';
    453       if (op > 0 && (op-1) % 3 == 2)
    454          ch = 'L';
    455 
    456       vex_printf("%2d%c: ", op, ch);
    457       vex_printf("%6u ", tabc_slow[op]);
    458       vex_printf("%6u ", tabc_fast[op]);
    459       for (co = 0; co < 16; co++) {
    460          Int n = tab_cond[op][co];
    461          if (n >= 1000) {
    462             vex_printf(" %3dK", n / 1000);
    463          } else
    464          if (n >= 0) {
    465             vex_printf(" %3d ", n );
    466          } else {
    467             vex_printf("     ");
    468          }
    469       }
    470       vex_printf("\n");
    471    }
    472    vex_printf("\n");
    473 }
    474 
    475 static void initCounts ( void )
    476 {
    477    Int op, co;
    478    initted = True;
    479    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    480       tabc_fast[op] = tabc_slow[op] = 0;
    481       for (co = 0; co < 16; co++)
    482          tab_cond[op][co] = 0;
    483    }
    484 }
    485 
    486 #endif /* PROFILE_EFLAGS */
    487 
    488 
    489 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    490 /* Calculate all the 6 flags from the supplied thunk parameters.
    491    Worker function, not directly called from generated code. */
    492 static
    493 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
    494                                      UInt cc_dep1_formal,
    495                                      UInt cc_dep2_formal,
    496                                      UInt cc_ndep_formal )
    497 {
    498    switch (cc_op) {
    499       case X86G_CC_OP_COPY:
    500          return cc_dep1_formal
    501                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
    502                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
    503 
    504       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
    505       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
    506       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
    507 
    508       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
    509       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
    510       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
    511 
    512       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
    513       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
    514       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
    515 
    516       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
    517       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
    518       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
    519 
    520       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
    521       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
    522       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
    523 
    524       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
    525       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
    526       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
    527 
    528       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
    529       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
    530       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
    531 
    532       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
    533       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
    534       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
    535 
    536       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
    537       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
    538       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
    539 
    540       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
    541       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
    542       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
    543 
    544       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
    545       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
    546       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
    547 
    548       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
    549                                                 UShort, toUShort );
    550       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
    551                                                 UInt,   toUInt );
    552       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
    553                                                 ULong,  idULong );
    554 
    555       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
    556                                                 Short,  toUShort );
    557       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
    558                                                 Int,    toUInt   );
    559       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
    560                                                 Long,   idULong );
    561 
    562       default:
    563          /* shouldn't really make these calls from generated code */
    564          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
    565                     "( %u, 0x%x, 0x%x, 0x%x )\n",
    566                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
    567          vpanic("x86g_calculate_eflags_all_WRK(X86)");
    568    }
    569 }
    570 
    571 
    572 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    573 /* Calculate all the 6 flags from the supplied thunk parameters. */
    574 UInt x86g_calculate_eflags_all ( UInt cc_op,
    575                                  UInt cc_dep1,
    576                                  UInt cc_dep2,
    577                                  UInt cc_ndep )
    578 {
    579 #  if PROFILE_EFLAGS
    580    if (!initted) initCounts();
    581    n_calc_all++;
    582    if (SHOW_COUNTS_NOW) showCounts();
    583 #  endif
    584    return
    585       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
    586 }
    587 
    588 
    589 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    590 /* Calculate just the carry flag from the supplied thunk parameters. */
    591 VEX_REGPARM(3)
    592 UInt x86g_calculate_eflags_c ( UInt cc_op,
    593                                UInt cc_dep1,
    594                                UInt cc_dep2,
    595                                UInt cc_ndep )
    596 {
    597 #  if PROFILE_EFLAGS
    598    if (!initted) initCounts();
    599    n_calc_c++;
    600    tabc_fast[cc_op]++;
    601    if (SHOW_COUNTS_NOW) showCounts();
    602 #  endif
    603 
    604    /* Fast-case some common ones. */
    605    switch (cc_op) {
    606       case X86G_CC_OP_LOGICL:
    607       case X86G_CC_OP_LOGICW:
    608       case X86G_CC_OP_LOGICB:
    609          return 0;
    610       case X86G_CC_OP_SUBL:
    611          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
    612                    ? X86G_CC_MASK_C : 0;
    613       case X86G_CC_OP_SUBW:
    614          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
    615                    ? X86G_CC_MASK_C : 0;
    616       case X86G_CC_OP_SUBB:
    617          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
    618                    ? X86G_CC_MASK_C : 0;
    619       case X86G_CC_OP_INCL:
    620       case X86G_CC_OP_DECL:
    621          return cc_ndep & X86G_CC_MASK_C;
    622       default:
    623          break;
    624    }
    625 
    626 #  if PROFILE_EFLAGS
    627    tabc_fast[cc_op]--;
    628    tabc_slow[cc_op]++;
    629 #  endif
    630 
    631    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
    632           & X86G_CC_MASK_C;
    633 }
    634 
    635 
    636 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    637 /* returns 1 or 0 */
    638 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
    639                                 UInt cc_op,
    640                                 UInt cc_dep1,
    641                                 UInt cc_dep2,
    642                                 UInt cc_ndep )
    643 {
    644    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
    645                                                cc_dep2, cc_ndep);
    646    UInt of,sf,zf,cf,pf;
    647    UInt inv = cond & 1;
    648 
    649 #  if PROFILE_EFLAGS
    650    if (!initted) initCounts();
    651    tab_cond[cc_op][cond]++;
    652    n_calc_cond++;
    653    if (SHOW_COUNTS_NOW) showCounts();
    654 #  endif
    655 
    656    switch (cond) {
    657       case X86CondNO:
    658       case X86CondO: /* OF == 1 */
    659          of = eflags >> X86G_CC_SHIFT_O;
    660          return 1 & (inv ^ of);
    661 
    662       case X86CondNZ:
    663       case X86CondZ: /* ZF == 1 */
    664          zf = eflags >> X86G_CC_SHIFT_Z;
    665          return 1 & (inv ^ zf);
    666 
    667       case X86CondNB:
    668       case X86CondB: /* CF == 1 */
    669          cf = eflags >> X86G_CC_SHIFT_C;
    670          return 1 & (inv ^ cf);
    671          break;
    672 
    673       case X86CondNBE:
    674       case X86CondBE: /* (CF or ZF) == 1 */
    675          cf = eflags >> X86G_CC_SHIFT_C;
    676          zf = eflags >> X86G_CC_SHIFT_Z;
    677          return 1 & (inv ^ (cf | zf));
    678          break;
    679 
    680       case X86CondNS:
    681       case X86CondS: /* SF == 1 */
    682          sf = eflags >> X86G_CC_SHIFT_S;
    683          return 1 & (inv ^ sf);
    684 
    685       case X86CondNP:
    686       case X86CondP: /* PF == 1 */
    687          pf = eflags >> X86G_CC_SHIFT_P;
    688          return 1 & (inv ^ pf);
    689 
    690       case X86CondNL:
    691       case X86CondL: /* (SF xor OF) == 1 */
    692          sf = eflags >> X86G_CC_SHIFT_S;
    693          of = eflags >> X86G_CC_SHIFT_O;
    694          return 1 & (inv ^ (sf ^ of));
    695          break;
    696 
    697       case X86CondNLE:
    698       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
    699          sf = eflags >> X86G_CC_SHIFT_S;
    700          of = eflags >> X86G_CC_SHIFT_O;
    701          zf = eflags >> X86G_CC_SHIFT_Z;
    702          return 1 & (inv ^ ((sf ^ of) | zf));
    703          break;
    704 
    705       default:
    706          /* shouldn't really make these calls from generated code */
    707          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
    708                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
    709          vpanic("x86g_calculate_condition");
    710    }
    711 }
    712 
    713 
    714 /* VISIBLE TO LIBVEX CLIENT */
    715 UInt LibVEX_GuestX86_get_eflags ( /*IN*/VexGuestX86State* vex_state )
    716 {
    717    UInt eflags = x86g_calculate_eflags_all_WRK(
    718                     vex_state->guest_CC_OP,
    719                     vex_state->guest_CC_DEP1,
    720                     vex_state->guest_CC_DEP2,
    721                     vex_state->guest_CC_NDEP
    722                  );
    723    UInt dflag = vex_state->guest_DFLAG;
    724    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
    725    if (dflag == 0xFFFFFFFF)
    726       eflags |= (1<<10);
    727    if (vex_state->guest_IDFLAG == 1)
    728       eflags |= (1<<21);
    729    if (vex_state->guest_ACFLAG == 1)
    730       eflags |= (1<<18);
    731 
    732    return eflags;
    733 }
    734 
    735 /* VISIBLE TO LIBVEX CLIENT */
    736 void
    737 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
    738                               /*MOD*/VexGuestX86State* vex_state )
    739 {
    740    UInt oszacp = x86g_calculate_eflags_all_WRK(
    741                     vex_state->guest_CC_OP,
    742                     vex_state->guest_CC_DEP1,
    743                     vex_state->guest_CC_DEP2,
    744                     vex_state->guest_CC_NDEP
    745                  );
    746    if (new_carry_flag & 1) {
    747       oszacp |= X86G_CC_MASK_C;
    748    } else {
    749       oszacp &= ~X86G_CC_MASK_C;
    750    }
    751    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
    752    vex_state->guest_CC_DEP1 = oszacp;
    753    vex_state->guest_CC_DEP2 = 0;
    754    vex_state->guest_CC_NDEP = 0;
    755 }
    756 
    757 
    758 /*---------------------------------------------------------------*/
    759 /*--- %eflags translation-time function specialisers.         ---*/
    760 /*--- These help iropt specialise calls the above run-time    ---*/
    761 /*--- %eflags functions.                                      ---*/
    762 /*---------------------------------------------------------------*/
    763 
    764 /* Used by the optimiser to try specialisations.  Returns an
    765    equivalent expression, or NULL if none. */
    766 
    767 static inline Bool isU32 ( IRExpr* e, UInt n )
    768 {
    769    return
    770       toBool( e->tag == Iex_Const
    771               && e->Iex.Const.con->tag == Ico_U32
    772               && e->Iex.Const.con->Ico.U32 == n );
    773 }
    774 
    775 IRExpr* guest_x86_spechelper ( HChar*   function_name,
    776                                IRExpr** args,
    777                                IRStmt** precedingStmts,
    778                                Int      n_precedingStmts )
    779 {
    780 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
    781 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
    782 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
    783 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
    784 
    785    Int i, arity = 0;
    786    for (i = 0; args[i]; i++)
    787       arity++;
    788 #  if 0
    789    vex_printf("spec request:\n");
    790    vex_printf("   %s  ", function_name);
    791    for (i = 0; i < arity; i++) {
    792       vex_printf("  ");
    793       ppIRExpr(args[i]);
    794    }
    795    vex_printf("\n");
    796 #  endif
    797 
    798    /* --------- specialising "x86g_calculate_condition" --------- */
    799 
    800    if (vex_streq(function_name, "x86g_calculate_condition")) {
    801       /* specialise calls to above "calculate condition" function */
    802       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
    803       vassert(arity == 5);
    804       cond    = args[0];
    805       cc_op   = args[1];
    806       cc_dep1 = args[2];
    807       cc_dep2 = args[3];
    808 
    809       /*---------------- ADDL ----------------*/
    810 
    811       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
    812          /* long add, then Z --> test (dst+src == 0) */
    813          return unop(Iop_1Uto32,
    814                      binop(Iop_CmpEQ32,
    815                            binop(Iop_Add32, cc_dep1, cc_dep2),
    816                            mkU32(0)));
    817       }
    818 
    819       /*---------------- SUBL ----------------*/
    820 
    821       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
    822          /* long sub/cmp, then Z --> test dst==src */
    823          return unop(Iop_1Uto32,
    824                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
    825       }
    826       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
    827          /* long sub/cmp, then NZ --> test dst!=src */
    828          return unop(Iop_1Uto32,
    829                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
    830       }
    831 
    832       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
    833          /* long sub/cmp, then L (signed less than)
    834             --> test dst <s src */
    835          return unop(Iop_1Uto32,
    836                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
    837       }
    838       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
    839          /* long sub/cmp, then NL (signed greater than or equal)
    840             --> test !(dst <s src) */
    841          return binop(Iop_Xor32,
    842                       unop(Iop_1Uto32,
    843                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
    844                       mkU32(1));
    845       }
    846 
    847       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
    848          /* long sub/cmp, then LE (signed less than or equal)
    849             --> test dst <=s src */
    850          return unop(Iop_1Uto32,
    851                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
    852       }
    853       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
    854          /* long sub/cmp, then NLE (signed not less than or equal)
    855             --> test dst >s src
    856             --> test !(dst <=s src) */
    857          return binop(Iop_Xor32,
    858                       unop(Iop_1Uto32,
    859                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
    860                       mkU32(1));
    861       }
    862 
    863       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
    864          /* long sub/cmp, then BE (unsigned less than or equal)
    865             --> test dst <=u src */
    866          return unop(Iop_1Uto32,
    867                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
    868       }
    869       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
    870          /* long sub/cmp, then BE (unsigned greater than)
    871             --> test !(dst <=u src) */
    872          return binop(Iop_Xor32,
    873                       unop(Iop_1Uto32,
    874                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
    875                       mkU32(1));
    876       }
    877 
    878       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
    879          /* long sub/cmp, then B (unsigned less than)
    880             --> test dst <u src */
    881          return unop(Iop_1Uto32,
    882                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
    883       }
    884       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
    885          /* long sub/cmp, then NB (unsigned greater than or equal)
    886             --> test !(dst <u src) */
    887          return binop(Iop_Xor32,
    888                       unop(Iop_1Uto32,
    889                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
    890                       mkU32(1));
    891       }
    892 
    893       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
    894          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
    895          return unop(Iop_1Uto32,
    896                      binop(Iop_CmpLT32S,
    897                            binop(Iop_Sub32, cc_dep1, cc_dep2),
    898                            mkU32(0)));
    899       }
    900       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
    901          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
    902          return binop(Iop_Xor32,
    903                       unop(Iop_1Uto32,
    904                            binop(Iop_CmpLT32S,
    905                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
    906                                  mkU32(0))),
    907                       mkU32(1));
    908       }
    909 
    910       /*---------------- SUBW ----------------*/
    911 
    912       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
    913          /* word sub/cmp, then Z --> test dst==src */
    914          return unop(Iop_1Uto32,
    915                      binop(Iop_CmpEQ16,
    916                            unop(Iop_32to16,cc_dep1),
    917                            unop(Iop_32to16,cc_dep2)));
    918       }
    919       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
    920          /* word sub/cmp, then NZ --> test dst!=src */
    921          return unop(Iop_1Uto32,
    922                      binop(Iop_CmpNE16,
    923                            unop(Iop_32to16,cc_dep1),
    924                            unop(Iop_32to16,cc_dep2)));
    925       }
    926 
    927       /*---------------- SUBB ----------------*/
    928 
    929       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
    930          /* byte sub/cmp, then Z --> test dst==src */
    931          return unop(Iop_1Uto32,
    932                      binop(Iop_CmpEQ8,
    933                            unop(Iop_32to8,cc_dep1),
    934                            unop(Iop_32to8,cc_dep2)));
    935       }
    936       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
    937          /* byte sub/cmp, then NZ --> test dst!=src */
    938          return unop(Iop_1Uto32,
    939                      binop(Iop_CmpNE8,
    940                            unop(Iop_32to8,cc_dep1),
    941                            unop(Iop_32to8,cc_dep2)));
    942       }
    943 
    944       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
    945          /* byte sub/cmp, then NBE (unsigned greater than)
    946             --> test src <u dst */
    947          /* Note, args are opposite way round from the usual */
    948          return unop(Iop_1Uto32,
    949                      binop(Iop_CmpLT32U,
    950                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
    951 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
    952       }
    953 
    954       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
    955                                         && isU32(cc_dep2, 0)) {
    956          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
    957                                          --> test dst <s 0
    958                                          --> (UInt)dst[7]
    959             This is yet another scheme by which gcc figures out if the
    960             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
    961          /* Note: isU32(cc_dep2, 0) is correct, even though this is
    962             for an 8-bit comparison, since the args to the helper
    963             function are always U32s. */
    964          return binop(Iop_And32,
    965                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
    966                       mkU32(1));
    967       }
    968       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
    969                                         && isU32(cc_dep2, 0)) {
    970          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
    971                                           --> test !(dst <s 0)
    972                                           --> (UInt) !dst[7]
    973          */
    974          return binop(Iop_Xor32,
    975                       binop(Iop_And32,
    976                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
    977                             mkU32(1)),
    978                 mkU32(1));
    979       }
    980 
    981       /*---------------- LOGICL ----------------*/
    982 
    983       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
    984          /* long and/or/xor, then Z --> test dst==0 */
    985          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
    986       }
    987       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
    988          /* long and/or/xor, then NZ --> test dst!=0 */
    989          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
    990       }
    991 
    992       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
    993          /* long and/or/xor, then LE
    994             This is pretty subtle.  LOGIC sets SF and ZF according to the
    995             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
    996             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
    997             the result is <=signed 0.  Hence ...
    998          */
    999          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
   1000       }
   1001 
   1002       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
   1003          /* long and/or/xor, then BE
   1004             LOGIC sets ZF according to the result and makes CF be zero.
   1005             BE computes (CF | ZF), but CF is zero, so this reduces ZF
   1006             -- which will be 1 iff the result is zero.  Hence ...
   1007          */
   1008          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1009       }
   1010 
   1011       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
   1012          /* see comment below for (LOGICB, CondS) */
   1013          /* long and/or/xor, then S --> (UInt)result[31] */
   1014          return binop(Iop_And32,
   1015                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1016                       mkU32(1));
   1017       }
   1018       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
   1019          /* see comment below for (LOGICB, CondNS) */
   1020          /* long and/or/xor, then S --> (UInt) ~ result[31] */
   1021          return binop(Iop_Xor32,
   1022                 binop(Iop_And32,
   1023                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1024                       mkU32(1)),
   1025                 mkU32(1));
   1026       }
   1027 
   1028       /*---------------- LOGICW ----------------*/
   1029 
   1030       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
   1031          /* word and/or/xor, then Z --> test dst==0 */
   1032          return unop(Iop_1Uto32,
   1033                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
   1034                                         mkU32(0)));
   1035       }
   1036 
   1037       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
   1038          /* see comment below for (LOGICB, CondS) */
   1039          /* word and/or/xor, then S --> (UInt)result[15] */
   1040          return binop(Iop_And32,
   1041                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
   1042                       mkU32(1));
   1043       }
   1044 
   1045       /*---------------- LOGICB ----------------*/
   1046 
   1047       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
   1048          /* byte and/or/xor, then Z --> test dst==0 */
   1049          return unop(Iop_1Uto32,
   1050                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1051                                         mkU32(0)));
   1052       }
   1053       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
   1054          /* byte and/or/xor, then Z --> test dst!=0 */
   1055          /* b9ac9:       84 c0                   test   %al,%al
   1056             b9acb:       75 0d                   jne    b9ada */
   1057          return unop(Iop_1Uto32,
   1058                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1059                                         mkU32(0)));
   1060       }
   1061 
   1062       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
   1063          /* this is an idiom gcc sometimes uses to find out if the top
   1064             bit of a byte register is set: eg testb %al,%al; js ..
   1065             Since it just depends on the top bit of the byte, extract
   1066             that bit and explicitly get rid of all the rest.  This
   1067             helps memcheck avoid false positives in the case where any
   1068             of the other bits in the byte are undefined. */
   1069          /* byte and/or/xor, then S --> (UInt)result[7] */
   1070          return binop(Iop_And32,
   1071                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1072                       mkU32(1));
   1073       }
   1074       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
   1075          /* ditto, for negation-of-S. */
   1076          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
   1077          return binop(Iop_Xor32,
   1078                 binop(Iop_And32,
   1079                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1080                       mkU32(1)),
   1081                 mkU32(1));
   1082       }
   1083 
   1084       /*---------------- DECL ----------------*/
   1085 
   1086       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
   1087          /* dec L, then Z --> test dst == 0 */
   1088          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1089       }
   1090 
   1091       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
   1092          /* dec L, then S --> compare DST <s 0 */
   1093          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
   1094       }
   1095 
   1096       /*---------------- DECW ----------------*/
   1097 
   1098       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
   1099          /* dec W, then Z --> test dst == 0 */
   1100          return unop(Iop_1Uto32,
   1101                      binop(Iop_CmpEQ32,
   1102                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1103                            mkU32(0)));
   1104       }
   1105 
   1106       /*---------------- INCW ----------------*/
   1107 
   1108       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
   1109          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
   1110          /* inc W, then Z --> test dst == 0 */
   1111          return unop(Iop_1Uto32,
   1112                      binop(Iop_CmpEQ32,
   1113                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1114                            mkU32(0)));
   1115       }
   1116 
   1117       /*---------------- SHRL ----------------*/
   1118 
   1119       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
   1120          /* SHRL, then Z --> test dep1 == 0 */
   1121          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1122       }
   1123 
   1124       /*---------------- COPY ----------------*/
   1125       /* This can happen, as a result of x87 FP compares: "fcom ... ;
   1126          fnstsw %ax ; sahf ; jbe" for example. */
   1127 
   1128       if (isU32(cc_op, X86G_CC_OP_COPY) &&
   1129           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
   1130          /* COPY, then BE --> extract C and Z from dep1, and test
   1131             (C or Z) == 1. */
   1132          /* COPY, then NBE --> extract C and Z from dep1, and test
   1133             (C or Z) == 0. */
   1134          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
   1135          return
   1136             unop(
   1137                Iop_1Uto32,
   1138                binop(
   1139                   Iop_CmpEQ32,
   1140                   binop(
   1141                      Iop_And32,
   1142                      binop(
   1143                         Iop_Or32,
   1144                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1145                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
   1146                      ),
   1147                      mkU32(1)
   1148                   ),
   1149                   mkU32(nnn)
   1150                )
   1151             );
   1152       }
   1153 
   1154       if (isU32(cc_op, X86G_CC_OP_COPY)
   1155           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
   1156          /* COPY, then B --> extract C from dep1, and test (C == 1). */
   1157          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
   1158          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
   1159          return
   1160             unop(
   1161                Iop_1Uto32,
   1162                binop(
   1163                   Iop_CmpEQ32,
   1164                   binop(
   1165                      Iop_And32,
   1166                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1167                      mkU32(1)
   1168                   ),
   1169                   mkU32(nnn)
   1170                )
   1171             );
   1172       }
   1173 
   1174       if (isU32(cc_op, X86G_CC_OP_COPY)
   1175           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
   1176          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
   1177          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
   1178          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
   1179          return
   1180             unop(
   1181                Iop_1Uto32,
   1182                binop(
   1183                   Iop_CmpEQ32,
   1184                   binop(
   1185                      Iop_And32,
   1186                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
   1187                      mkU32(1)
   1188                   ),
   1189                   mkU32(nnn)
   1190                )
   1191             );
   1192       }
   1193 
   1194       if (isU32(cc_op, X86G_CC_OP_COPY)
   1195           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
   1196          /* COPY, then P --> extract P from dep1, and test (P == 1). */
   1197          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
   1198          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
   1199          return
   1200             unop(
   1201                Iop_1Uto32,
   1202                binop(
   1203                   Iop_CmpEQ32,
   1204                   binop(
   1205                      Iop_And32,
   1206                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
   1207                      mkU32(1)
   1208                   ),
   1209                   mkU32(nnn)
   1210                )
   1211             );
   1212       }
   1213 
   1214       return NULL;
   1215    }
   1216 
   1217    /* --------- specialising "x86g_calculate_eflags_c" --------- */
   1218 
   1219    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
   1220       /* specialise calls to above "calculate_eflags_c" function */
   1221       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1222       vassert(arity == 4);
   1223       cc_op   = args[0];
   1224       cc_dep1 = args[1];
   1225       cc_dep2 = args[2];
   1226       cc_ndep = args[3];
   1227 
   1228       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
   1229          /* C after sub denotes unsigned less than */
   1230          return unop(Iop_1Uto32,
   1231                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
   1232       }
   1233       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
   1234          /* C after sub denotes unsigned less than */
   1235          return unop(Iop_1Uto32,
   1236                      binop(Iop_CmpLT32U,
   1237                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
   1238                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
   1239       }
   1240       if (isU32(cc_op, X86G_CC_OP_LOGICL)
   1241           || isU32(cc_op, X86G_CC_OP_LOGICW)
   1242           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
   1243          /* cflag after logic is zero */
   1244          return mkU32(0);
   1245       }
   1246       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
   1247          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
   1248          return cc_ndep;
   1249       }
   1250       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1251          /* cflag after COPY is stored in DEP1. */
   1252          return
   1253             binop(
   1254                Iop_And32,
   1255                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1256                mkU32(1)
   1257             );
   1258       }
   1259       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
   1260          /* C after add denotes sum <u either arg */
   1261          return unop(Iop_1Uto32,
   1262                      binop(Iop_CmpLT32U,
   1263                            binop(Iop_Add32, cc_dep1, cc_dep2),
   1264                            cc_dep1));
   1265       }
   1266       // ATC, requires verification, no test case known
   1267       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
   1268       //   /* C after signed widening multiply denotes the case where
   1269       //      the top half of the result isn't simply the sign extension
   1270       //      of the bottom half (iow the result doesn't fit completely
   1271       //      in the bottom half).  Hence:
   1272       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
   1273       //      where 'x' denotes signed widening multiply.*/
   1274       //   return
   1275       //      unop(Iop_1Uto32,
   1276       //           binop(Iop_CmpNE32,
   1277       //                 unop(Iop_64HIto32,
   1278       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
   1279       //                 binop(Iop_Sar32,
   1280       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
   1281       //}
   1282 #     if 0
   1283       if (cc_op->tag == Iex_Const) {
   1284          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
   1285       }
   1286 #     endif
   1287 
   1288       return NULL;
   1289    }
   1290 
   1291    /* --------- specialising "x86g_calculate_eflags_all" --------- */
   1292 
   1293    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
   1294       /* specialise calls to above "calculate_eflags_all" function */
   1295       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
   1296       vassert(arity == 4);
   1297       cc_op   = args[0];
   1298       cc_dep1 = args[1];
   1299       /* cc_dep2 = args[2]; */
   1300       /* cc_ndep = args[3]; */
   1301 
   1302       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1303          /* eflags after COPY are stored in DEP1. */
   1304          return
   1305             binop(
   1306                Iop_And32,
   1307                cc_dep1,
   1308                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
   1309                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
   1310             );
   1311       }
   1312       return NULL;
   1313    }
   1314 
   1315 #  undef unop
   1316 #  undef binop
   1317 #  undef mkU32
   1318 #  undef mkU8
   1319 
   1320    return NULL;
   1321 }
   1322 
   1323 
   1324 /*---------------------------------------------------------------*/
   1325 /*--- Supporting functions for x87 FPU activities.            ---*/
   1326 /*---------------------------------------------------------------*/
   1327 
   1328 static inline Bool host_is_little_endian ( void )
   1329 {
   1330    UInt x = 0x76543210;
   1331    UChar* p = (UChar*)(&x);
   1332    return toBool(*p == 0x10);
   1333 }
   1334 
   1335 /* 80 and 64-bit floating point formats:
   1336 
   1337    80-bit:
   1338 
   1339     S  0       0-------0      zero
   1340     S  0       0X------X      denormals
   1341     S  1-7FFE  1X------X      normals (all normals have leading 1)
   1342     S  7FFF    10------0      infinity
   1343     S  7FFF    10X-----X      snan
   1344     S  7FFF    11X-----X      qnan
   1345 
   1346    S is the sign bit.  For runs X----X, at least one of the Xs must be
   1347    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
   1348    there is an explicitly represented leading 1, and a sign bit,
   1349    giving 80 in total.
   1350 
   1351    64-bit avoids the confusion of an explicitly represented leading 1
   1352    and so is simpler:
   1353 
   1354     S  0      0------0   zero
   1355     S  0      X------X   denormals
   1356     S  1-7FE  any        normals
   1357     S  7FF    0------0   infinity
   1358     S  7FF    0X-----X   snan
   1359     S  7FF    1X-----X   qnan
   1360 
   1361    Exponent is 11 bits, fractional part is 52 bits, and there is a
   1362    sign bit, giving 64 in total.
   1363 */
   1364 
   1365 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
   1366 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1367 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
   1368 {
   1369    Bool   mantissaIsZero;
   1370    Int    bexp;
   1371    UChar  sign;
   1372    UChar* f64;
   1373 
   1374    vassert(host_is_little_endian());
   1375 
   1376    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
   1377 
   1378    f64  = (UChar*)(&dbl);
   1379    sign = toUChar( (f64[7] >> 7) & 1 );
   1380 
   1381    /* First off, if the tag indicates the register was empty,
   1382       return 1,0,sign,1 */
   1383    if (tag == 0) {
   1384       /* vex_printf("Empty\n"); */
   1385       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
   1386                                  | X86G_FC_MASK_C0;
   1387    }
   1388 
   1389    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
   1390    bexp &= 0x7FF;
   1391 
   1392    mantissaIsZero
   1393       = toBool(
   1394            (f64[6] & 0x0F) == 0
   1395            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
   1396         );
   1397 
   1398    /* If both exponent and mantissa are zero, the value is zero.
   1399       Return 1,0,sign,0. */
   1400    if (bexp == 0 && mantissaIsZero) {
   1401       /* vex_printf("Zero\n"); */
   1402       return X86G_FC_MASK_C3 | 0
   1403                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1404    }
   1405 
   1406    /* If exponent is zero but mantissa isn't, it's a denormal.
   1407       Return 1,1,sign,0. */
   1408    if (bexp == 0 && !mantissaIsZero) {
   1409       /* vex_printf("Denormal\n"); */
   1410       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
   1411                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1412    }
   1413 
   1414    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
   1415       Return 0,1,sign,1. */
   1416    if (bexp == 0x7FF && mantissaIsZero) {
   1417       /* vex_printf("Inf\n"); */
   1418       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
   1419                                  | X86G_FC_MASK_C0;
   1420    }
   1421 
   1422    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
   1423       Return 0,0,sign,1. */
   1424    if (bexp == 0x7FF && !mantissaIsZero) {
   1425       /* vex_printf("NaN\n"); */
   1426       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
   1427    }
   1428 
   1429    /* Uh, ok, we give up.  It must be a normal finite number.
   1430       Return 0,1,sign,0.
   1431    */
   1432    /* vex_printf("normal\n"); */
   1433    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
   1434 }
   1435 
   1436 
   1437 /* CALLED FROM GENERATED CODE */
   1438 /* DIRTY HELPER (reads guest memory) */
   1439 ULong x86g_dirtyhelper_loadF80le ( UInt addrU )
   1440 {
   1441    ULong f64;
   1442    convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
   1443    return f64;
   1444 }
   1445 
   1446 /* CALLED FROM GENERATED CODE */
   1447 /* DIRTY HELPER (writes guest memory) */
   1448 void x86g_dirtyhelper_storeF80le ( UInt addrU, ULong f64 )
   1449 {
   1450    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
   1451 }
   1452 
   1453 
   1454 /*----------------------------------------------*/
   1455 /*--- The exported fns ..                    ---*/
   1456 /*----------------------------------------------*/
   1457 
   1458 /* Layout of the real x87 state. */
   1459 /* 13 June 05: Fpu_State and auxiliary constants was moved to
   1460    g_generic_x87.h */
   1461 
   1462 
   1463 /* CLEAN HELPER */
   1464 /* fpucw[15:0] contains a x87 native format FPU control word.
   1465    Extract from it the required FPROUND value and any resulting
   1466    emulation warning, and return (warn << 32) | fpround value.
   1467 */
   1468 ULong x86g_check_fldcw ( UInt fpucw )
   1469 {
   1470    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
   1471    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1472    UInt rmode = (fpucw >> 10) & 3;
   1473 
   1474    /* Detect any required emulation warnings. */
   1475    VexEmWarn ew = EmWarn_NONE;
   1476 
   1477    if ((fpucw & 0x3F) != 0x3F) {
   1478       /* unmasked exceptions! */
   1479       ew = EmWarn_X86_x87exns;
   1480    }
   1481    else
   1482    if (((fpucw >> 8) & 3) != 3) {
   1483       /* unsupported precision */
   1484       ew = EmWarn_X86_x87precision;
   1485    }
   1486 
   1487    return (((ULong)ew) << 32) | ((ULong)rmode);
   1488 }
   1489 
   1490 /* CLEAN HELPER */
   1491 /* Given fpround as an IRRoundingMode value, create a suitable x87
   1492    native format FPU control word. */
   1493 UInt x86g_create_fpucw ( UInt fpround )
   1494 {
   1495    fpround &= 3;
   1496    return 0x037F | (fpround << 10);
   1497 }
   1498 
   1499 
   1500 /* CLEAN HELPER */
   1501 /* mxcsr[15:0] contains a SSE native format MXCSR value.
   1502    Extract from it the required SSEROUND value and any resulting
   1503    emulation warning, and return (warn << 32) | sseround value.
   1504 */
   1505 ULong x86g_check_ldmxcsr ( UInt mxcsr )
   1506 {
   1507    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
   1508    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1509    UInt rmode = (mxcsr >> 13) & 3;
   1510 
   1511    /* Detect any required emulation warnings. */
   1512    VexEmWarn ew = EmWarn_NONE;
   1513 
   1514    if ((mxcsr & 0x1F80) != 0x1F80) {
   1515       /* unmasked exceptions! */
   1516       ew = EmWarn_X86_sseExns;
   1517    }
   1518    else
   1519    if (mxcsr & (1<<15)) {
   1520       /* FZ is set */
   1521       ew = EmWarn_X86_fz;
   1522    }
   1523    else
   1524    if (mxcsr & (1<<6)) {
   1525       /* DAZ is set */
   1526       ew = EmWarn_X86_daz;
   1527    }
   1528 
   1529    return (((ULong)ew) << 32) | ((ULong)rmode);
   1530 }
   1531 
   1532 
   1533 /* CLEAN HELPER */
   1534 /* Given sseround as an IRRoundingMode value, create a suitable SSE
   1535    native format MXCSR value. */
   1536 UInt x86g_create_mxcsr ( UInt sseround )
   1537 {
   1538    sseround &= 3;
   1539    return 0x1F80 | (sseround << 13);
   1540 }
   1541 
   1542 
   1543 /* CALLED FROM GENERATED CODE */
   1544 /* DIRTY HELPER (writes guest state) */
   1545 /* Initialise the x87 FPU state as per 'finit'. */
   1546 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
   1547 {
   1548    Int i;
   1549    gst->guest_FTOP = 0;
   1550    for (i = 0; i < 8; i++) {
   1551       gst->guest_FPTAG[i] = 0; /* empty */
   1552       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
   1553    }
   1554    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
   1555    gst->guest_FC3210  = 0;
   1556 }
   1557 
   1558 
   1559 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
   1560    appears to differ from the former only in that the 8 FP registers
   1561    themselves are not transferred into the guest state. */
   1562 static
   1563 VexEmWarn do_put_x87 ( Bool moveRegs,
   1564                        /*IN*/UChar* x87_state,
   1565                        /*OUT*/VexGuestX86State* vex_state )
   1566 {
   1567    Int        stno, preg;
   1568    UInt       tag;
   1569    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1570    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1571    Fpu_State* x87     = (Fpu_State*)x87_state;
   1572    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
   1573    UInt       tagw    = x87->env[FP_ENV_TAG];
   1574    UInt       fpucw   = x87->env[FP_ENV_CTRL];
   1575    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
   1576    VexEmWarn  ew;
   1577    UInt       fpround;
   1578    ULong      pair;
   1579 
   1580    /* Copy registers and tags */
   1581    for (stno = 0; stno < 8; stno++) {
   1582       preg = (stno + ftop) & 7;
   1583       tag = (tagw >> (2*preg)) & 3;
   1584       if (tag == 3) {
   1585          /* register is empty */
   1586          /* hmm, if it's empty, does it still get written?  Probably
   1587             safer to say it does.  If we don't, memcheck could get out
   1588             of sync, in that it thinks all FP registers are defined by
   1589             this helper, but in reality some have not been updated. */
   1590          if (moveRegs)
   1591             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
   1592          vexTags[preg] = 0;
   1593       } else {
   1594          /* register is non-empty */
   1595          if (moveRegs)
   1596             convert_f80le_to_f64le( &x87->reg[10*stno],
   1597                                     (UChar*)&vexRegs[preg] );
   1598          vexTags[preg] = 1;
   1599       }
   1600    }
   1601 
   1602    /* stack pointer */
   1603    vex_state->guest_FTOP = ftop;
   1604 
   1605    /* status word */
   1606    vex_state->guest_FC3210 = c3210;
   1607 
   1608    /* handle the control word, setting FPROUND and detecting any
   1609       emulation warnings. */
   1610    pair    = x86g_check_fldcw ( (UInt)fpucw );
   1611    fpround = (UInt)pair;
   1612    ew      = (VexEmWarn)(pair >> 32);
   1613 
   1614    vex_state->guest_FPROUND = fpround & 3;
   1615 
   1616    /* emulation warnings --> caller */
   1617    return ew;
   1618 }
   1619 
   1620 
   1621 /* Create an x87 FPU state from the guest state, as close as
   1622    we can approximate it. */
   1623 static
   1624 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
   1625                   /*OUT*/UChar* x87_state )
   1626 {
   1627    Int        i, stno, preg;
   1628    UInt       tagw;
   1629    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1630    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1631    Fpu_State* x87     = (Fpu_State*)x87_state;
   1632    UInt       ftop    = vex_state->guest_FTOP;
   1633    UInt       c3210   = vex_state->guest_FC3210;
   1634 
   1635    for (i = 0; i < 14; i++)
   1636       x87->env[i] = 0;
   1637 
   1638    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
   1639    x87->env[FP_ENV_STAT]
   1640       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
   1641    x87->env[FP_ENV_CTRL]
   1642       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
   1643 
   1644    /* Dump the register stack in ST order. */
   1645    tagw = 0;
   1646    for (stno = 0; stno < 8; stno++) {
   1647       preg = (stno + ftop) & 7;
   1648       if (vexTags[preg] == 0) {
   1649          /* register is empty */
   1650          tagw |= (3 << (2*preg));
   1651          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1652                                  &x87->reg[10*stno] );
   1653       } else {
   1654          /* register is full. */
   1655          tagw |= (0 << (2*preg));
   1656          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1657                                  &x87->reg[10*stno] );
   1658       }
   1659    }
   1660    x87->env[FP_ENV_TAG] = toUShort(tagw);
   1661 }
   1662 
   1663 
   1664 /* CALLED FROM GENERATED CODE */
   1665 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1666 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
   1667 {
   1668    /* Somewhat roundabout, but at least it's simple. */
   1669    Fpu_State tmp;
   1670    UShort*   addrS = (UShort*)addr;
   1671    UChar*    addrC = (UChar*)addr;
   1672    U128*     xmm   = (U128*)(addr + 160);
   1673    UInt      mxcsr;
   1674    UShort    fp_tags;
   1675    UInt      summary_tags;
   1676    Int       r, stno;
   1677    UShort    *srcS, *dstS;
   1678 
   1679    do_get_x87( gst, (UChar*)&tmp );
   1680    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
   1681 
   1682    /* Now build the proper fxsave image from the x87 image we just
   1683       made. */
   1684 
   1685    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
   1686    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
   1687 
   1688    /* set addrS[2] in an endian-independent way */
   1689    summary_tags = 0;
   1690    fp_tags = tmp.env[FP_ENV_TAG];
   1691    for (r = 0; r < 8; r++) {
   1692       if ( ((fp_tags >> (2*r)) & 3) != 3 )
   1693          summary_tags |= (1 << r);
   1694    }
   1695    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
   1696    addrC[5]  = 0; /* pad */
   1697 
   1698    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
   1699    addrS[4]  = 0;
   1700    addrS[5]  = 0; /* FPU IP (bogus) */
   1701    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
   1702                      could conceivably dump %CS here) */
   1703 
   1704    addrS[7]  = 0; /* Intel reserved */
   1705 
   1706    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
   1707    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
   1708    addrS[10] = 0; /* segment selector for above operand pointer; %DS
   1709                      perhaps? */
   1710    addrS[11] = 0; /* Intel reserved */
   1711 
   1712    addrS[12] = toUShort(mxcsr);  /* MXCSR */
   1713    addrS[13] = toUShort(mxcsr >> 16);
   1714 
   1715    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
   1716    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
   1717 
   1718    /* Copy in the FP registers, in ST order. */
   1719    for (stno = 0; stno < 8; stno++) {
   1720       srcS = (UShort*)(&tmp.reg[10*stno]);
   1721       dstS = (UShort*)(&addrS[16 + 8*stno]);
   1722       dstS[0] = srcS[0];
   1723       dstS[1] = srcS[1];
   1724       dstS[2] = srcS[2];
   1725       dstS[3] = srcS[3];
   1726       dstS[4] = srcS[4];
   1727       dstS[5] = 0;
   1728       dstS[6] = 0;
   1729       dstS[7] = 0;
   1730    }
   1731 
   1732    /* That's the first 160 bytes of the image done.  Now only %xmm0
   1733       .. %xmm7 remain to be copied.  If the host is big-endian, these
   1734       need to be byte-swapped. */
   1735    vassert(host_is_little_endian());
   1736 
   1737 #  define COPY_U128(_dst,_src)                       \
   1738       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1739            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1740       while (0)
   1741 
   1742    COPY_U128( xmm[0], gst->guest_XMM0 );
   1743    COPY_U128( xmm[1], gst->guest_XMM1 );
   1744    COPY_U128( xmm[2], gst->guest_XMM2 );
   1745    COPY_U128( xmm[3], gst->guest_XMM3 );
   1746    COPY_U128( xmm[4], gst->guest_XMM4 );
   1747    COPY_U128( xmm[5], gst->guest_XMM5 );
   1748    COPY_U128( xmm[6], gst->guest_XMM6 );
   1749    COPY_U128( xmm[7], gst->guest_XMM7 );
   1750 
   1751 #  undef COPY_U128
   1752 }
   1753 
   1754 
   1755 /* CALLED FROM GENERATED CODE */
   1756 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1757 VexEmWarn x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
   1758 {
   1759    Fpu_State tmp;
   1760    VexEmWarn warnX87 = EmWarn_NONE;
   1761    VexEmWarn warnXMM = EmWarn_NONE;
   1762    UShort*   addrS   = (UShort*)addr;
   1763    UChar*    addrC   = (UChar*)addr;
   1764    U128*     xmm     = (U128*)(addr + 160);
   1765    UShort    fp_tags;
   1766    Int       r, stno, i;
   1767 
   1768    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
   1769       to be byte-swapped. */
   1770    vassert(host_is_little_endian());
   1771 
   1772 #  define COPY_U128(_dst,_src)                       \
   1773       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1774            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1775       while (0)
   1776 
   1777    COPY_U128( gst->guest_XMM0, xmm[0] );
   1778    COPY_U128( gst->guest_XMM1, xmm[1] );
   1779    COPY_U128( gst->guest_XMM2, xmm[2] );
   1780    COPY_U128( gst->guest_XMM3, xmm[3] );
   1781    COPY_U128( gst->guest_XMM4, xmm[4] );
   1782    COPY_U128( gst->guest_XMM5, xmm[5] );
   1783    COPY_U128( gst->guest_XMM6, xmm[6] );
   1784    COPY_U128( gst->guest_XMM7, xmm[7] );
   1785 
   1786 #  undef COPY_U128
   1787 
   1788    /* Copy the x87 registers out of the image, into a temporary
   1789       Fpu_State struct. */
   1790    for (i = 0; i < 14; i++) tmp.env[i] = 0;
   1791    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
   1792    /* fill in tmp.reg[0..7] */
   1793    for (stno = 0; stno < 8; stno++) {
   1794       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
   1795       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
   1796       dstS[0] = srcS[0];
   1797       dstS[1] = srcS[1];
   1798       dstS[2] = srcS[2];
   1799       dstS[3] = srcS[3];
   1800       dstS[4] = srcS[4];
   1801    }
   1802    /* fill in tmp.env[0..13] */
   1803    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
   1804    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
   1805 
   1806    fp_tags = 0;
   1807    for (r = 0; r < 8; r++) {
   1808       if (addrC[4] & (1<<r))
   1809          fp_tags |= (0 << (2*r)); /* EMPTY */
   1810       else
   1811          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
   1812    }
   1813    tmp.env[FP_ENV_TAG] = fp_tags;
   1814 
   1815    /* Now write 'tmp' into the guest state. */
   1816    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
   1817 
   1818    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
   1819                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
   1820      ULong w64 = x86g_check_ldmxcsr( w32 );
   1821 
   1822      warnXMM = (VexEmWarn)(w64 >> 32);
   1823 
   1824      gst->guest_SSEROUND = (UInt)w64;
   1825    }
   1826 
   1827    /* Prefer an X87 emwarn over an XMM one, if both exist. */
   1828    if (warnX87 != EmWarn_NONE)
   1829       return warnX87;
   1830    else
   1831       return warnXMM;
   1832 }
   1833 
   1834 
   1835 /* CALLED FROM GENERATED CODE */
   1836 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1837 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
   1838 {
   1839    do_get_x87( gst, (UChar*)addr );
   1840 }
   1841 
   1842 /* CALLED FROM GENERATED CODE */
   1843 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1844 VexEmWarn x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
   1845 {
   1846    return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
   1847 }
   1848 
   1849 /* CALLED FROM GENERATED CODE */
   1850 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1851 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
   1852 {
   1853    /* Somewhat roundabout, but at least it's simple. */
   1854    Int       i;
   1855    UShort*   addrP = (UShort*)addr;
   1856    Fpu_State tmp;
   1857    do_get_x87( gst, (UChar*)&tmp );
   1858    for (i = 0; i < 14; i++)
   1859       addrP[i] = tmp.env[i];
   1860 }
   1861 
   1862 /* CALLED FROM GENERATED CODE */
   1863 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1864 VexEmWarn x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
   1865 {
   1866    return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
   1867 }
   1868 
   1869 
   1870 /*---------------------------------------------------------------*/
   1871 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
   1872 /*---------------------------------------------------------------*/
   1873 
   1874 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1875 /* Calculate both flags and value result for rotate right
   1876    through the carry bit.  Result in low 32 bits,
   1877    new flags (OSZACP) in high 32 bits.
   1878 */
   1879 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   1880 {
   1881    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   1882 
   1883    switch (sz) {
   1884       case 4:
   1885          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1886          of        = ((arg >> 31) ^ cf) & 1;
   1887          while (tempCOUNT > 0) {
   1888             tempcf = arg & 1;
   1889             arg    = (arg >> 1) | (cf << 31);
   1890             cf     = tempcf;
   1891             tempCOUNT--;
   1892          }
   1893          break;
   1894       case 2:
   1895          while (tempCOUNT >= 17) tempCOUNT -= 17;
   1896          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1897          of        = ((arg >> 15) ^ cf) & 1;
   1898          while (tempCOUNT > 0) {
   1899             tempcf = arg & 1;
   1900             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
   1901             cf     = tempcf;
   1902             tempCOUNT--;
   1903          }
   1904          break;
   1905       case 1:
   1906          while (tempCOUNT >= 9) tempCOUNT -= 9;
   1907          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1908          of        = ((arg >> 7) ^ cf) & 1;
   1909          while (tempCOUNT > 0) {
   1910             tempcf = arg & 1;
   1911             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
   1912             cf     = tempcf;
   1913             tempCOUNT--;
   1914          }
   1915          break;
   1916       default:
   1917          vpanic("calculate_RCR: invalid size");
   1918    }
   1919 
   1920    cf &= 1;
   1921    of &= 1;
   1922    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   1923    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   1924 
   1925    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   1926 }
   1927 
   1928 
   1929 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1930 /* Calculate both flags and value result for rotate left
   1931    through the carry bit.  Result in low 32 bits,
   1932    new flags (OSZACP) in high 32 bits.
   1933 */
   1934 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   1935 {
   1936    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   1937 
   1938    switch (sz) {
   1939       case 4:
   1940          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1941          while (tempCOUNT > 0) {
   1942             tempcf = (arg >> 31) & 1;
   1943             arg    = (arg << 1) | (cf & 1);
   1944             cf     = tempcf;
   1945             tempCOUNT--;
   1946          }
   1947          of = ((arg >> 31) ^ cf) & 1;
   1948          break;
   1949       case 2:
   1950          while (tempCOUNT >= 17) tempCOUNT -= 17;
   1951          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1952          while (tempCOUNT > 0) {
   1953             tempcf = (arg >> 15) & 1;
   1954             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
   1955             cf     = tempcf;
   1956             tempCOUNT--;
   1957          }
   1958          of = ((arg >> 15) ^ cf) & 1;
   1959          break;
   1960       case 1:
   1961          while (tempCOUNT >= 9) tempCOUNT -= 9;
   1962          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1963          while (tempCOUNT > 0) {
   1964             tempcf = (arg >> 7) & 1;
   1965             arg    = 0xFF & ((arg << 1) | (cf & 1));
   1966             cf     = tempcf;
   1967             tempCOUNT--;
   1968          }
   1969          of = ((arg >> 7) ^ cf) & 1;
   1970          break;
   1971       default:
   1972          vpanic("calculate_RCL: invalid size");
   1973    }
   1974 
   1975    cf &= 1;
   1976    of &= 1;
   1977    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   1978    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   1979 
   1980    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   1981 }
   1982 
   1983 
   1984 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1985 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
   1986    AX value in low half of arg, OSZACP in upper half.
   1987    See guest-x86/toIR.c usage point for details.
   1988 */
   1989 static UInt calc_parity_8bit ( UInt w32 ) {
   1990    UInt i;
   1991    UInt p = 1;
   1992    for (i = 0; i < 8; i++)
   1993       p ^= (1 & (w32 >> i));
   1994    return p;
   1995 }
   1996 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
   1997 {
   1998    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   1999    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2000    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2001    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2002    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2003    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2004    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2005    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2006    UInt result = 0;
   2007 
   2008    switch (opcode) {
   2009       case 0x27: { /* DAA */
   2010          UInt old_AL = r_AL;
   2011          UInt old_C  = r_C;
   2012          r_C = 0;
   2013          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2014             r_AL = r_AL + 6;
   2015             r_C  = old_C;
   2016             if (r_AL >= 0x100) r_C = 1;
   2017             r_A = 1;
   2018          } else {
   2019             r_A = 0;
   2020          }
   2021          if (old_AL > 0x99 || old_C == 1) {
   2022             r_AL = r_AL + 0x60;
   2023             r_C  = 1;
   2024          } else {
   2025             r_C = 0;
   2026          }
   2027          /* O is undefined.  S Z and P are set according to the
   2028 	    result. */
   2029          r_AL &= 0xFF;
   2030          r_O = 0; /* let's say */
   2031          r_S = (r_AL & 0x80) ? 1 : 0;
   2032          r_Z = (r_AL == 0) ? 1 : 0;
   2033          r_P = calc_parity_8bit( r_AL );
   2034          break;
   2035       }
   2036       case 0x2F: { /* DAS */
   2037          UInt old_AL = r_AL;
   2038          UInt old_C  = r_C;
   2039          r_C = 0;
   2040          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2041             Bool borrow = r_AL < 6;
   2042             r_AL = r_AL - 6;
   2043             r_C  = old_C;
   2044             if (borrow) r_C = 1;
   2045             r_A = 1;
   2046          } else {
   2047             r_A = 0;
   2048          }
   2049          if (old_AL > 0x99 || old_C == 1) {
   2050             r_AL = r_AL - 0x60;
   2051             r_C  = 1;
   2052          } else {
   2053             /* Intel docs are wrong: r_C = 0; */
   2054          }
   2055          /* O is undefined.  S Z and P are set according to the
   2056 	    result. */
   2057          r_AL &= 0xFF;
   2058          r_O = 0; /* let's say */
   2059          r_S = (r_AL & 0x80) ? 1 : 0;
   2060          r_Z = (r_AL == 0) ? 1 : 0;
   2061          r_P = calc_parity_8bit( r_AL );
   2062          break;
   2063       }
   2064       case 0x37: { /* AAA */
   2065          Bool nudge = r_AL > 0xF9;
   2066          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2067             r_AL = r_AL + 6;
   2068             r_AH = r_AH + 1 + (nudge ? 1 : 0);
   2069             r_A  = 1;
   2070             r_C  = 1;
   2071             r_AL = r_AL & 0xF;
   2072          } else {
   2073             r_A  = 0;
   2074             r_C  = 0;
   2075             r_AL = r_AL & 0xF;
   2076          }
   2077          /* O S Z and P are undefined. */
   2078          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2079          break;
   2080       }
   2081       case 0x3F: { /* AAS */
   2082          Bool nudge = r_AL < 0x06;
   2083          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2084             r_AL = r_AL - 6;
   2085             r_AH = r_AH - 1 - (nudge ? 1 : 0);
   2086             r_A  = 1;
   2087             r_C  = 1;
   2088             r_AL = r_AL & 0xF;
   2089          } else {
   2090             r_A  = 0;
   2091             r_C  = 0;
   2092             r_AL = r_AL & 0xF;
   2093          }
   2094          /* O S Z and P are undefined. */
   2095          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2096          break;
   2097       }
   2098       default:
   2099          vassert(0);
   2100    }
   2101    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2102             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2103             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2104             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2105             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2106             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2107             | ( (r_AH & 0xFF) << 8 )
   2108             | ( (r_AL & 0xFF) << 0 );
   2109    return result;
   2110 }
   2111 
   2112 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
   2113 {
   2114    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   2115    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2116    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2117    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2118    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2119    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2120    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2121    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2122    UInt result = 0;
   2123 
   2124    switch (opcode) {
   2125       case 0xD4: { /* AAM */
   2126          r_AH = r_AL / 10;
   2127          r_AL = r_AL % 10;
   2128          break;
   2129       }
   2130       case 0xD5: { /* AAD */
   2131          r_AL = ((r_AH * 10) + r_AL) & 0xff;
   2132          r_AH = 0;
   2133          break;
   2134       }
   2135       default:
   2136          vassert(0);
   2137    }
   2138 
   2139    r_O = 0; /* let's say (undefined) */
   2140    r_C = 0; /* let's say (undefined) */
   2141    r_A = 0; /* let's say (undefined) */
   2142    r_S = (r_AL & 0x80) ? 1 : 0;
   2143    r_Z = (r_AL == 0) ? 1 : 0;
   2144    r_P = calc_parity_8bit( r_AL );
   2145 
   2146    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2147             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2148             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2149             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2150             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2151             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2152             | ( (r_AH & 0xFF) << 8 )
   2153             | ( (r_AL & 0xFF) << 0 );
   2154    return result;
   2155 }
   2156 
   2157 
   2158 /* CALLED FROM GENERATED CODE */
   2159 /* DIRTY HELPER (non-referentially-transparent) */
   2160 /* Horrible hack.  On non-x86 platforms, return 1. */
   2161 ULong x86g_dirtyhelper_RDTSC ( void )
   2162 {
   2163 #  if defined(__i386__)
   2164    ULong res;
   2165    __asm__ __volatile__("rdtsc" : "=A" (res));
   2166    return res;
   2167 #  else
   2168    return 1ULL;
   2169 #  endif
   2170 }
   2171 
   2172 
   2173 /* CALLED FROM GENERATED CODE */
   2174 /* DIRTY HELPER (modifies guest state) */
   2175 /* Claim to be a P55C (Intel Pentium/MMX) */
   2176 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
   2177 {
   2178    switch (st->guest_EAX) {
   2179       case 0:
   2180          st->guest_EAX = 0x1;
   2181          st->guest_EBX = 0x756e6547;
   2182          st->guest_ECX = 0x6c65746e;
   2183          st->guest_EDX = 0x49656e69;
   2184          break;
   2185       default:
   2186          st->guest_EAX = 0x543;
   2187          st->guest_EBX = 0x0;
   2188          st->guest_ECX = 0x0;
   2189          st->guest_EDX = 0x8001bf;
   2190          break;
   2191    }
   2192 }
   2193 
   2194 /* CALLED FROM GENERATED CODE */
   2195 /* DIRTY HELPER (modifies guest state) */
   2196 /* Claim to be the following SSE1-capable CPU:
   2197    vendor_id       : GenuineIntel
   2198    cpu family      : 6
   2199    model           : 11
   2200    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
   2201    stepping        : 1
   2202    cpu MHz         : 1131.013
   2203    cache size      : 512 KB
   2204 */
   2205 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
   2206 {
   2207    switch (st->guest_EAX) {
   2208       case 0:
   2209          st->guest_EAX = 0x00000002;
   2210          st->guest_EBX = 0x756e6547;
   2211          st->guest_ECX = 0x6c65746e;
   2212          st->guest_EDX = 0x49656e69;
   2213          break;
   2214       case 1:
   2215          st->guest_EAX = 0x000006b1;
   2216          st->guest_EBX = 0x00000004;
   2217          st->guest_ECX = 0x00000000;
   2218          st->guest_EDX = 0x0383fbff;
   2219          break;
   2220       default:
   2221          st->guest_EAX = 0x03020101;
   2222          st->guest_EBX = 0x00000000;
   2223          st->guest_ECX = 0x00000000;
   2224          st->guest_EDX = 0x0c040883;
   2225          break;
   2226    }
   2227 }
   2228 
   2229 /* Claim to be the following SSSE3-capable CPU (2 x ...):
   2230    vendor_id       : GenuineIntel
   2231    cpu family      : 6
   2232    model           : 15
   2233    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
   2234    stepping        : 6
   2235    cpu MHz         : 2394.000
   2236    cache size      : 4096 KB
   2237    physical id     : 0
   2238    siblings        : 2
   2239    core id         : 0
   2240    cpu cores       : 2
   2241    fpu             : yes
   2242    fpu_exception   : yes
   2243    cpuid level     : 10
   2244    wp              : yes
   2245    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2246                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2247                      mmx fxsr sse sse2 ss ht tm syscall nx lm
   2248                      constant_tsc pni monitor ds_cpl vmx est tm2
   2249                      cx16 xtpr lahf_lm
   2250    bogomips        : 4798.78
   2251    clflush size    : 64
   2252    cache_alignment : 64
   2253    address sizes   : 36 bits physical, 48 bits virtual
   2254    power management:
   2255 */
   2256 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
   2257 {
   2258 #  define SET_ABCD(_a,_b,_c,_d)               \
   2259       do { st->guest_EAX = (UInt)(_a);        \
   2260            st->guest_EBX = (UInt)(_b);        \
   2261            st->guest_ECX = (UInt)(_c);        \
   2262            st->guest_EDX = (UInt)(_d);        \
   2263       } while (0)
   2264 
   2265    switch (st->guest_EAX) {
   2266       case 0x00000000:
   2267          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
   2268          break;
   2269       case 0x00000001:
   2270          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
   2271          break;
   2272       case 0x00000002:
   2273          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
   2274          break;
   2275       case 0x00000003:
   2276          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2277          break;
   2278       case 0x00000004: {
   2279          switch (st->guest_ECX) {
   2280             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
   2281                                       0x0000003f, 0x00000001); break;
   2282             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
   2283                                       0x0000003f, 0x00000001); break;
   2284             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
   2285                                       0x00000fff, 0x00000001); break;
   2286             default:         SET_ABCD(0x00000000, 0x00000000,
   2287                                       0x00000000, 0x00000000); break;
   2288          }
   2289          break;
   2290       }
   2291       case 0x00000005:
   2292          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
   2293          break;
   2294       case 0x00000006:
   2295          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
   2296          break;
   2297       case 0x00000007:
   2298          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2299          break;
   2300       case 0x00000008:
   2301          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
   2302          break;
   2303       case 0x00000009:
   2304          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2305          break;
   2306       case 0x0000000a:
   2307       unhandled_eax_value:
   2308          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
   2309          break;
   2310       case 0x80000000:
   2311          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2312          break;
   2313       case 0x80000001:
   2314          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
   2315          break;
   2316       case 0x80000002:
   2317          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2318          break;
   2319       case 0x80000003:
   2320          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
   2321          break;
   2322       case 0x80000004:
   2323          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
   2324          break;
   2325       case 0x80000005:
   2326          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2327          break;
   2328       case 0x80000006:
   2329          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
   2330          break;
   2331       case 0x80000007:
   2332          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2333          break;
   2334       case 0x80000008:
   2335          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2336          break;
   2337       default:
   2338          goto unhandled_eax_value;
   2339    }
   2340 #  undef SET_ABCD
   2341 }
   2342 
   2343 
   2344 /* CALLED FROM GENERATED CODE */
   2345 /* DIRTY HELPER (non-referentially-transparent) */
   2346 /* Horrible hack.  On non-x86 platforms, return 0. */
   2347 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
   2348 {
   2349 #  if defined(__i386__)
   2350    UInt r = 0;
   2351    portno &= 0xFFFF;
   2352    switch (sz) {
   2353       case 4:
   2354          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
   2355                               : "=a" (r) : "Nd" (portno));
   2356 	 break;
   2357       case 2:
   2358          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
   2359                               : "=a" (r) : "Nd" (portno));
   2360 	 break;
   2361       case 1:
   2362          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
   2363                               : "=a" (r) : "Nd" (portno));
   2364 	 break;
   2365       default:
   2366          break;
   2367    }
   2368    return r;
   2369 #  else
   2370    return 0;
   2371 #  endif
   2372 }
   2373 
   2374 
   2375 /* CALLED FROM GENERATED CODE */
   2376 /* DIRTY HELPER (non-referentially-transparent) */
   2377 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2378 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
   2379 {
   2380 #  if defined(__i386__)
   2381    portno &= 0xFFFF;
   2382    switch (sz) {
   2383       case 4:
   2384          __asm__ __volatile__("outl %0, %w1"
   2385                               : : "a" (data), "Nd" (portno));
   2386 	 break;
   2387       case 2:
   2388          __asm__ __volatile__("outw %w0, %w1"
   2389                               : : "a" (data), "Nd" (portno));
   2390 	 break;
   2391       case 1:
   2392          __asm__ __volatile__("outb %b0, %w1"
   2393                               : : "a" (data), "Nd" (portno));
   2394 	 break;
   2395       default:
   2396          break;
   2397    }
   2398 #  else
   2399    /* do nothing */
   2400 #  endif
   2401 }
   2402 
   2403 /* CALLED FROM GENERATED CODE */
   2404 /* DIRTY HELPER (non-referentially-transparent) */
   2405 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2406 /* op = 0: call the native SGDT instruction.
   2407    op = 1: call the native SIDT instruction.
   2408 */
   2409 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
   2410 #  if defined(__i386__)
   2411    switch (op) {
   2412       case 0:
   2413          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
   2414          break;
   2415       case 1:
   2416          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
   2417          break;
   2418       default:
   2419          vpanic("x86g_dirtyhelper_SxDT");
   2420    }
   2421 #  else
   2422    /* do nothing */
   2423    UChar* p = (UChar*)address;
   2424    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
   2425 #  endif
   2426 }
   2427 
   2428 /*---------------------------------------------------------------*/
   2429 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
   2430 /*---------------------------------------------------------------*/
   2431 
   2432 static inline UChar abdU8 ( UChar xx, UChar yy ) {
   2433    return toUChar(xx>yy ? xx-yy : yy-xx);
   2434 }
   2435 
   2436 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
   2437    return (((ULong)w1) << 32) | ((ULong)w0);
   2438 }
   2439 
   2440 static inline UShort sel16x4_3 ( ULong w64 ) {
   2441    UInt hi32 = toUInt(w64 >> 32);
   2442    return toUShort(hi32 >> 16);
   2443 }
   2444 static inline UShort sel16x4_2 ( ULong w64 ) {
   2445    UInt hi32 = toUInt(w64 >> 32);
   2446    return toUShort(hi32);
   2447 }
   2448 static inline UShort sel16x4_1 ( ULong w64 ) {
   2449    UInt lo32 = toUInt(w64);
   2450    return toUShort(lo32 >> 16);
   2451 }
   2452 static inline UShort sel16x4_0 ( ULong w64 ) {
   2453    UInt lo32 = toUInt(w64);
   2454    return toUShort(lo32);
   2455 }
   2456 
   2457 static inline UChar sel8x8_7 ( ULong w64 ) {
   2458    UInt hi32 = toUInt(w64 >> 32);
   2459    return toUChar(hi32 >> 24);
   2460 }
   2461 static inline UChar sel8x8_6 ( ULong w64 ) {
   2462    UInt hi32 = toUInt(w64 >> 32);
   2463    return toUChar(hi32 >> 16);
   2464 }
   2465 static inline UChar sel8x8_5 ( ULong w64 ) {
   2466    UInt hi32 = toUInt(w64 >> 32);
   2467    return toUChar(hi32 >> 8);
   2468 }
   2469 static inline UChar sel8x8_4 ( ULong w64 ) {
   2470    UInt hi32 = toUInt(w64 >> 32);
   2471    return toUChar(hi32 >> 0);
   2472 }
   2473 static inline UChar sel8x8_3 ( ULong w64 ) {
   2474    UInt lo32 = toUInt(w64);
   2475    return toUChar(lo32 >> 24);
   2476 }
   2477 static inline UChar sel8x8_2 ( ULong w64 ) {
   2478    UInt lo32 = toUInt(w64);
   2479    return toUChar(lo32 >> 16);
   2480 }
   2481 static inline UChar sel8x8_1 ( ULong w64 ) {
   2482    UInt lo32 = toUInt(w64);
   2483    return toUChar(lo32 >> 8);
   2484 }
   2485 static inline UChar sel8x8_0 ( ULong w64 ) {
   2486    UInt lo32 = toUInt(w64);
   2487    return toUChar(lo32 >> 0);
   2488 }
   2489 
   2490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2491 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
   2492 {
   2493    return
   2494       mk32x2(
   2495          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
   2496             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
   2497          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
   2498             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
   2499       );
   2500 }
   2501 
   2502 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2503 UInt x86g_calculate_mmx_pmovmskb ( ULong xx )
   2504 {
   2505    UInt r = 0;
   2506    if (xx & (1ULL << (64-1))) r |= (1<<7);
   2507    if (xx & (1ULL << (56-1))) r |= (1<<6);
   2508    if (xx & (1ULL << (48-1))) r |= (1<<5);
   2509    if (xx & (1ULL << (40-1))) r |= (1<<4);
   2510    if (xx & (1ULL << (32-1))) r |= (1<<3);
   2511    if (xx & (1ULL << (24-1))) r |= (1<<2);
   2512    if (xx & (1ULL << (16-1))) r |= (1<<1);
   2513    if (xx & (1ULL << ( 8-1))) r |= (1<<0);
   2514    return r;
   2515 }
   2516 
   2517 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2518 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
   2519 {
   2520    UInt t = 0;
   2521    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
   2522    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
   2523    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
   2524    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
   2525    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
   2526    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
   2527    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
   2528    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
   2529    t &= 0xFFFF;
   2530    return (ULong)t;
   2531 }
   2532 
   2533 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2534 UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
   2535 {
   2536    UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi );
   2537    UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo );
   2538    return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
   2539 }
   2540 
   2541 
   2542 /*---------------------------------------------------------------*/
   2543 /*--- Helpers for dealing with segment overrides.             ---*/
   2544 /*---------------------------------------------------------------*/
   2545 
   2546 static inline
   2547 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
   2548 {
   2549    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
   2550    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
   2551    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
   2552    return (hi << 24) | (mid << 16) | lo;
   2553 }
   2554 
   2555 static inline
   2556 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
   2557 {
   2558     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
   2559     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
   2560     UInt limit = (hi << 16) | lo;
   2561     if (ent->LdtEnt.Bits.Granularity)
   2562        limit = (limit << 12) | 0xFFF;
   2563     return limit;
   2564 }
   2565 
   2566 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2567 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
   2568                               UInt seg_selector, UInt virtual_addr )
   2569 {
   2570    UInt tiBit, base, limit;
   2571    VexGuestX86SegDescr* the_descrs;
   2572 
   2573    Bool verboze = False;
   2574 
   2575    /* If this isn't true, we're in Big Trouble. */
   2576    vassert(8 == sizeof(VexGuestX86SegDescr));
   2577 
   2578    if (verboze)
   2579       vex_printf("x86h_use_seg_selector: "
   2580                  "seg_selector = 0x%x, vaddr = 0x%x\n",
   2581                  seg_selector, virtual_addr);
   2582 
   2583    /* Check for wildly invalid selector. */
   2584    if (seg_selector & ~0xFFFF)
   2585       goto bad;
   2586 
   2587    seg_selector &= 0x0000FFFF;
   2588 
   2589    /* Sanity check the segment selector.  Ensure that RPL=11b (least
   2590       privilege).  This forms the bottom 2 bits of the selector. */
   2591    if ((seg_selector & 3) != 3)
   2592       goto bad;
   2593 
   2594    /* Extract the TI bit (0 means GDT, 1 means LDT) */
   2595    tiBit = (seg_selector >> 2) & 1;
   2596 
   2597    /* Convert the segment selector onto a table index */
   2598    seg_selector >>= 3;
   2599    vassert(seg_selector >= 0 && seg_selector < 8192);
   2600 
   2601    if (tiBit == 0) {
   2602 
   2603       /* GDT access. */
   2604       /* Do we actually have a GDT to look at? */
   2605       if (gdt == 0)
   2606          goto bad;
   2607 
   2608       /* Check for access to non-existent entry. */
   2609       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
   2610          goto bad;
   2611 
   2612       the_descrs = (VexGuestX86SegDescr*)gdt;
   2613       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2614       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2615 
   2616    } else {
   2617 
   2618       /* All the same stuff, except for the LDT. */
   2619       if (ldt == 0)
   2620          goto bad;
   2621 
   2622       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
   2623          goto bad;
   2624 
   2625       the_descrs = (VexGuestX86SegDescr*)ldt;
   2626       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2627       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2628 
   2629    }
   2630 
   2631    /* Do the limit check.  Note, this check is just slightly too
   2632       slack.  Really it should be "if (virtual_addr + size - 1 >=
   2633       limit)," but we don't have the size info to hand.  Getting it
   2634       could be significantly complex.  */
   2635    if (virtual_addr >= limit)
   2636       goto bad;
   2637 
   2638    if (verboze)
   2639       vex_printf("x86h_use_seg_selector: "
   2640                  "base = 0x%x, addr = 0x%x\n",
   2641                  base, base + virtual_addr);
   2642 
   2643    /* High 32 bits are zero, indicating success. */
   2644    return (ULong)( ((UInt)virtual_addr) + base );
   2645 
   2646  bad:
   2647    return 1ULL << 32;
   2648 }
   2649 
   2650 
   2651 /*---------------------------------------------------------------*/
   2652 /*--- Helpers for dealing with, and describing,               ---*/
   2653 /*--- guest state as a whole.                                 ---*/
   2654 /*---------------------------------------------------------------*/
   2655 
   2656 /* Initialise the entire x86 guest state. */
   2657 /* VISIBLE TO LIBVEX CLIENT */
   2658 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
   2659 {
   2660    vex_state->guest_EAX = 0;
   2661    vex_state->guest_ECX = 0;
   2662    vex_state->guest_EDX = 0;
   2663    vex_state->guest_EBX = 0;
   2664    vex_state->guest_ESP = 0;
   2665    vex_state->guest_EBP = 0;
   2666    vex_state->guest_ESI = 0;
   2667    vex_state->guest_EDI = 0;
   2668 
   2669    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
   2670    vex_state->guest_CC_DEP1 = 0;
   2671    vex_state->guest_CC_DEP2 = 0;
   2672    vex_state->guest_CC_NDEP = 0;
   2673    vex_state->guest_DFLAG   = 1; /* forwards */
   2674    vex_state->guest_IDFLAG  = 0;
   2675    vex_state->guest_ACFLAG  = 0;
   2676 
   2677    vex_state->guest_EIP = 0;
   2678 
   2679    /* Initialise the simulated FPU */
   2680    x86g_dirtyhelper_FINIT( vex_state );
   2681 
   2682    /* Initialse the SSE state. */
   2683 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
   2684 
   2685    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
   2686    SSEZERO(vex_state->guest_XMM0);
   2687    SSEZERO(vex_state->guest_XMM1);
   2688    SSEZERO(vex_state->guest_XMM2);
   2689    SSEZERO(vex_state->guest_XMM3);
   2690    SSEZERO(vex_state->guest_XMM4);
   2691    SSEZERO(vex_state->guest_XMM5);
   2692    SSEZERO(vex_state->guest_XMM6);
   2693    SSEZERO(vex_state->guest_XMM7);
   2694 
   2695 #  undef SSEZERO
   2696 
   2697    vex_state->guest_CS  = 0;
   2698    vex_state->guest_DS  = 0;
   2699    vex_state->guest_ES  = 0;
   2700    vex_state->guest_FS  = 0;
   2701    vex_state->guest_GS  = 0;
   2702    vex_state->guest_SS  = 0;
   2703    vex_state->guest_LDT = 0;
   2704    vex_state->guest_GDT = 0;
   2705 
   2706    vex_state->guest_EMWARN = EmWarn_NONE;
   2707 
   2708    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
   2709    vex_state->guest_TISTART = 0;
   2710    vex_state->guest_TILEN   = 0;
   2711 
   2712    vex_state->guest_NRADDR   = 0;
   2713    vex_state->guest_SC_CLASS = 0;
   2714    vex_state->guest_IP_AT_SYSCALL = 0;
   2715 
   2716    vex_state->padding1 = 0;
   2717    vex_state->padding2 = 0;
   2718    vex_state->padding3 = 0;
   2719 }
   2720 
   2721 
   2722 /* Figure out if any part of the guest state contained in minoff
   2723    .. maxoff requires precise memory exceptions.  If in doubt return
   2724    True (but this is generates significantly slower code).
   2725 
   2726    By default we enforce precise exns for guest %ESP, %EBP and %EIP
   2727    only.  These are the minimum needed to extract correct stack
   2728    backtraces from x86 code.
   2729 */
   2730 Bool guest_x86_state_requires_precise_mem_exns ( Int minoff,
   2731                                                  Int maxoff)
   2732 {
   2733    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
   2734    Int ebp_max = ebp_min + 4 - 1;
   2735    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
   2736    Int esp_max = esp_min + 4 - 1;
   2737    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
   2738    Int eip_max = eip_min + 4 - 1;
   2739 
   2740    if (maxoff < ebp_min || minoff > ebp_max) {
   2741       /* no overlap with ebp */
   2742    } else {
   2743       return True;
   2744    }
   2745 
   2746    if (maxoff < esp_min || minoff > esp_max) {
   2747       /* no overlap with esp */
   2748    } else {
   2749       return True;
   2750    }
   2751 
   2752    if (maxoff < eip_min || minoff > eip_max) {
   2753       /* no overlap with eip */
   2754    } else {
   2755       return True;
   2756    }
   2757 
   2758    return False;
   2759 }
   2760 
   2761 
   2762 #define ALWAYSDEFD(field)                           \
   2763     { offsetof(VexGuestX86State, field),            \
   2764       (sizeof ((VexGuestX86State*)0)->field) }
   2765 
   2766 VexGuestLayout
   2767    x86guest_layout
   2768       = {
   2769           /* Total size of the guest state, in bytes. */
   2770           .total_sizeB = sizeof(VexGuestX86State),
   2771 
   2772           /* Describe the stack pointer. */
   2773           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
   2774           .sizeof_SP = 4,
   2775 
   2776           /* Describe the frame pointer. */
   2777           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
   2778           .sizeof_FP = 4,
   2779 
   2780           /* Describe the instruction pointer. */
   2781           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
   2782           .sizeof_IP = 4,
   2783 
   2784           /* Describe any sections to be regarded by Memcheck as
   2785              'always-defined'. */
   2786           .n_alwaysDefd = 24,
   2787 
   2788           /* flags thunk: OP and NDEP are always defd, whereas DEP1
   2789              and DEP2 have to be tracked.  See detailed comment in
   2790              gdefs.h on meaning of thunk fields. */
   2791           .alwaysDefd
   2792              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
   2793                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
   2794                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
   2795                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
   2796                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
   2797                  /*  5 */ ALWAYSDEFD(guest_EIP),
   2798                  /*  6 */ ALWAYSDEFD(guest_FTOP),
   2799                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
   2800                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
   2801                  /*  9 */ ALWAYSDEFD(guest_FC3210),
   2802                  /* 10 */ ALWAYSDEFD(guest_CS),
   2803                  /* 11 */ ALWAYSDEFD(guest_DS),
   2804                  /* 12 */ ALWAYSDEFD(guest_ES),
   2805                  /* 13 */ ALWAYSDEFD(guest_FS),
   2806                  /* 14 */ ALWAYSDEFD(guest_GS),
   2807                  /* 15 */ ALWAYSDEFD(guest_SS),
   2808                  /* 16 */ ALWAYSDEFD(guest_LDT),
   2809                  /* 17 */ ALWAYSDEFD(guest_GDT),
   2810                  /* 18 */ ALWAYSDEFD(guest_EMWARN),
   2811                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
   2812                  /* 20 */ ALWAYSDEFD(guest_TISTART),
   2813                  /* 21 */ ALWAYSDEFD(guest_TILEN),
   2814                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
   2815                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
   2816                }
   2817         };
   2818 
   2819 
   2820 /*---------------------------------------------------------------*/
   2821 /*--- end                                 guest_x86_helpers.c ---*/
   2822 /*---------------------------------------------------------------*/
   2823