Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                               guest_x86_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2012 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_emwarn.h"
     38 #include "libvex_guest_x86.h"
     39 #include "libvex_ir.h"
     40 #include "libvex.h"
     41 
     42 #include "main_util.h"
     43 #include "guest_generic_bb_to_IR.h"
     44 #include "guest_x86_defs.h"
     45 #include "guest_generic_x87.h"
     46 
     47 
     48 /* This file contains helper functions for x86 guest code.
     49    Calls to these functions are generated by the back end.
     50    These calls are of course in the host machine code and
     51    this file will be compiled to host machine code, so that
     52    all makes sense.
     53 
     54    Only change the signatures of these helper functions very
     55    carefully.  If you change the signature here, you'll have to change
     56    the parameters passed to it in the IR calls constructed by
     57    guest-x86/toIR.c.
     58 
     59    The convention used is that all functions called from generated
     60    code are named x86g_<something>, and any function whose name lacks
     61    that prefix is not called from generated code.  Note that some
     62    LibVEX_* functions can however be called by VEX's client, but that
     63    is not the same as calling them from VEX-generated code.
     64 */
     65 
     66 
     67 /* Set to 1 to get detailed profiling info about use of the flag
     68    machinery. */
     69 #define PROFILE_EFLAGS 0
     70 
     71 
     72 /*---------------------------------------------------------------*/
     73 /*--- %eflags run-time helpers.                               ---*/
     74 /*---------------------------------------------------------------*/
     75 
     76 static const UChar parity_table[256] = {
     77     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     78     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     80     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     81     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     82     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     84     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     86     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     88     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     89     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     90     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     92     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     93     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     94     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     96     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     97     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     98     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    100     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    102     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    104     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    105     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    106     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    108     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    109 };
    110 
    111 /* generalised left-shifter */
    112 inline static Int lshift ( Int x, Int n )
    113 {
    114    if (n >= 0)
    115       return x << n;
    116    else
    117       return x >> (-n);
    118 }
    119 
    120 /* identity on ULong */
    121 static inline ULong idULong ( ULong x )
    122 {
    123    return x;
    124 }
    125 
    126 
    127 #define PREAMBLE(__data_bits)					\
    128    /* const */ UInt DATA_MASK 					\
    129       = __data_bits==8 ? 0xFF 					\
    130                        : (__data_bits==16 ? 0xFFFF 		\
    131                                           : 0xFFFFFFFF); 	\
    132    /* const */ UInt SIGN_MASK = 1 << (__data_bits - 1);		\
    133    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
    134    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
    135    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
    136    /* Four bogus assignments, which hopefully gcc can     */	\
    137    /* optimise away, and which stop it complaining about  */	\
    138    /* unused variables.                                   */	\
    139    SIGN_MASK = SIGN_MASK;					\
    140    DATA_MASK = DATA_MASK;					\
    141    CC_DEP2 = CC_DEP2;						\
    142    CC_NDEP = CC_NDEP;
    143 
    144 
    145 /*-------------------------------------------------------------*/
    146 
    147 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
    148 {								\
    149    PREAMBLE(DATA_BITS);						\
    150    { Int cf, pf, af, zf, sf, of;				\
    151      Int argL, argR, res;					\
    152      argL = CC_DEP1;						\
    153      argR = CC_DEP2;						\
    154      res  = argL + argR;					\
    155      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
    156      pf = parity_table[(UChar)res];				\
    157      af = (res ^ argL ^ argR) & 0x10;				\
    158      zf = ((DATA_UTYPE)res == 0) << 6;				\
    159      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    160      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    161                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    162      return cf | pf | af | zf | sf | of;			\
    163    }								\
    164 }
    165 
    166 /*-------------------------------------------------------------*/
    167 
    168 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
    169 {								\
    170    PREAMBLE(DATA_BITS);						\
    171    { Int cf, pf, af, zf, sf, of;				\
    172      Int argL, argR, res;					\
    173      argL = CC_DEP1;						\
    174      argR = CC_DEP2;						\
    175      res  = argL - argR;					\
    176      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
    177      pf = parity_table[(UChar)res];				\
    178      af = (res ^ argL ^ argR) & 0x10;				\
    179      zf = ((DATA_UTYPE)res == 0) << 6;				\
    180      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    181      of = lshift((argL ^ argR) & (argL ^ res),	 		\
    182                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
    183      return cf | pf | af | zf | sf | of;			\
    184    }								\
    185 }
    186 
    187 /*-------------------------------------------------------------*/
    188 
    189 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
    190 {								\
    191    PREAMBLE(DATA_BITS);						\
    192    { Int cf, pf, af, zf, sf, of;				\
    193      Int argL, argR, oldC, res;		       			\
    194      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    195      argL = CC_DEP1;						\
    196      argR = CC_DEP2 ^ oldC;	       				\
    197      res  = (argL + argR) + oldC;				\
    198      if (oldC)							\
    199         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
    200      else							\
    201         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
    202      pf = parity_table[(UChar)res];				\
    203      af = (res ^ argL ^ argR) & 0x10;				\
    204      zf = ((DATA_UTYPE)res == 0) << 6;				\
    205      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    206      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    207                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
    208      return cf | pf | af | zf | sf | of;			\
    209    }								\
    210 }
    211 
    212 /*-------------------------------------------------------------*/
    213 
    214 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
    215 {								\
    216    PREAMBLE(DATA_BITS);						\
    217    { Int cf, pf, af, zf, sf, of;				\
    218      Int argL, argR, oldC, res;		       			\
    219      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    220      argL = CC_DEP1;						\
    221      argR = CC_DEP2 ^ oldC;	       				\
    222      res  = (argL - argR) - oldC;				\
    223      if (oldC)							\
    224         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
    225      else							\
    226         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
    227      pf = parity_table[(UChar)res];				\
    228      af = (res ^ argL ^ argR) & 0x10;				\
    229      zf = ((DATA_UTYPE)res == 0) << 6;				\
    230      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    231      of = lshift((argL ^ argR) & (argL ^ res), 			\
    232                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    233      return cf | pf | af | zf | sf | of;			\
    234    }								\
    235 }
    236 
    237 /*-------------------------------------------------------------*/
    238 
    239 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
    240 {								\
    241    PREAMBLE(DATA_BITS);						\
    242    { Int cf, pf, af, zf, sf, of;				\
    243      cf = 0;							\
    244      pf = parity_table[(UChar)CC_DEP1];				\
    245      af = 0;							\
    246      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    247      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    248      of = 0;							\
    249      return cf | pf | af | zf | sf | of;			\
    250    }								\
    251 }
    252 
    253 /*-------------------------------------------------------------*/
    254 
    255 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
    256 {								\
    257    PREAMBLE(DATA_BITS);						\
    258    { Int cf, pf, af, zf, sf, of;				\
    259      Int argL, argR, res;					\
    260      res  = CC_DEP1;						\
    261      argL = res - 1;						\
    262      argR = 1;							\
    263      cf = CC_NDEP & X86G_CC_MASK_C;				\
    264      pf = parity_table[(UChar)res];				\
    265      af = (res ^ argL ^ argR) & 0x10;				\
    266      zf = ((DATA_UTYPE)res == 0) << 6;				\
    267      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    268      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
    269      return cf | pf | af | zf | sf | of;			\
    270    }								\
    271 }
    272 
    273 /*-------------------------------------------------------------*/
    274 
    275 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
    276 {								\
    277    PREAMBLE(DATA_BITS);						\
    278    { Int cf, pf, af, zf, sf, of;				\
    279      Int argL, argR, res;					\
    280      res  = CC_DEP1;						\
    281      argL = res + 1;						\
    282      argR = 1;							\
    283      cf = CC_NDEP & X86G_CC_MASK_C;				\
    284      pf = parity_table[(UChar)res];				\
    285      af = (res ^ argL ^ argR) & 0x10;				\
    286      zf = ((DATA_UTYPE)res == 0) << 6;				\
    287      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    288      of = ((res & DATA_MASK) 					\
    289           == ((UInt)SIGN_MASK - 1)) << 11;			\
    290      return cf | pf | af | zf | sf | of;			\
    291    }								\
    292 }
    293 
    294 /*-------------------------------------------------------------*/
    295 
    296 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
    297 {								\
    298    PREAMBLE(DATA_BITS);						\
    299    { Int cf, pf, af, zf, sf, of;				\
    300      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
    301      pf = parity_table[(UChar)CC_DEP1];				\
    302      af = 0; /* undefined */					\
    303      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    304      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    305      /* of is defined if shift count == 1 */			\
    306      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
    307           & X86G_CC_MASK_O;					\
    308      return cf | pf | af | zf | sf | of;			\
    309    }								\
    310 }
    311 
    312 /*-------------------------------------------------------------*/
    313 
    314 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
    315 {								\
    316    PREAMBLE(DATA_BITS);  					\
    317    { Int cf, pf, af, zf, sf, of;				\
    318      cf = CC_DEP2 & 1;						\
    319      pf = parity_table[(UChar)CC_DEP1];				\
    320      af = 0; /* undefined */					\
    321      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    322      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    323      /* of is defined if shift count == 1 */			\
    324      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
    325           & X86G_CC_MASK_O;					\
    326      return cf | pf | af | zf | sf | of;			\
    327    }								\
    328 }
    329 
    330 /*-------------------------------------------------------------*/
    331 
    332 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
    333 /* DEP1 = result, NDEP = old flags */
    334 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
    335 {								\
    336    PREAMBLE(DATA_BITS);						\
    337    { Int fl 							\
    338         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    339           | (X86G_CC_MASK_C & CC_DEP1)				\
    340           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
    341                                       11-(DATA_BITS-1)) 	\
    342                      ^ lshift(CC_DEP1, 11)));			\
    343      return fl;							\
    344    }								\
    345 }
    346 
    347 /*-------------------------------------------------------------*/
    348 
    349 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
    350 /* DEP1 = result, NDEP = old flags */
    351 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
    352 {								\
    353    PREAMBLE(DATA_BITS);						\
    354    { Int fl 							\
    355         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    356           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
    357           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
    358                                       11-(DATA_BITS-1)) 	\
    359                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
    360      return fl;							\
    361    }								\
    362 }
    363 
    364 /*-------------------------------------------------------------*/
    365 
    366 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
    367                                 DATA_U2TYPE, NARROWto2U)        \
    368 {                                                               \
    369    PREAMBLE(DATA_BITS);                                         \
    370    { Int cf, pf, af, zf, sf, of;                                \
    371      DATA_UTYPE  hi;                                            \
    372      DATA_UTYPE  lo                                             \
    373         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
    374                      * ((DATA_UTYPE)CC_DEP2) );                 \
    375      DATA_U2TYPE rr                                             \
    376         = NARROWto2U(                                           \
    377              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
    378              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
    379      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
    380      cf = (hi != 0);                                            \
    381      pf = parity_table[(UChar)lo];                              \
    382      af = 0; /* undefined */                                    \
    383      zf = (lo == 0) << 6;                                       \
    384      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    385      of = cf << 11;                                             \
    386      return cf | pf | af | zf | sf | of;                        \
    387    }								\
    388 }
    389 
    390 /*-------------------------------------------------------------*/
    391 
    392 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
    393                                 DATA_S2TYPE, NARROWto2S)        \
    394 {                                                               \
    395    PREAMBLE(DATA_BITS);                                         \
    396    { Int cf, pf, af, zf, sf, of;                                \
    397      DATA_STYPE  hi;                                            \
    398      DATA_STYPE  lo                                             \
    399         = NARROWtoS( ((DATA_STYPE)CC_DEP1)                      \
    400                      * ((DATA_STYPE)CC_DEP2) );                 \
    401      DATA_S2TYPE rr                                             \
    402         = NARROWto2S(                                           \
    403              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
    404              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
    405      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
    406      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
    407      pf = parity_table[(UChar)lo];                              \
    408      af = 0; /* undefined */                                    \
    409      zf = (lo == 0) << 6;                                       \
    410      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    411      of = cf << 11;                                             \
    412      return cf | pf | af | zf | sf | of;                        \
    413    }								\
    414 }
    415 
    416 
    417 #if PROFILE_EFLAGS
    418 
    419 static Bool initted     = False;
    420 
    421 /* C flag, fast route */
    422 static UInt tabc_fast[X86G_CC_OP_NUMBER];
    423 /* C flag, slow route */
    424 static UInt tabc_slow[X86G_CC_OP_NUMBER];
    425 /* table for calculate_cond */
    426 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
    427 /* total entry counts for calc_all, calc_c, calc_cond. */
    428 static UInt n_calc_all  = 0;
    429 static UInt n_calc_c    = 0;
    430 static UInt n_calc_cond = 0;
    431 
    432 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
    433 
    434 
    435 static void showCounts ( void )
    436 {
    437    Int op, co;
    438    Char ch;
    439    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
    440               n_calc_all, n_calc_cond, n_calc_c);
    441 
    442    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
    443               "    S   NS    P   NP    L   NL   LE  NLE\n");
    444    vex_printf("     -----------------------------------------------------"
    445               "----------------------------------------\n");
    446    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    447 
    448       ch = ' ';
    449       if (op > 0 && (op-1) % 3 == 0)
    450          ch = 'B';
    451       if (op > 0 && (op-1) % 3 == 1)
    452          ch = 'W';
    453       if (op > 0 && (op-1) % 3 == 2)
    454          ch = 'L';
    455 
    456       vex_printf("%2d%c: ", op, ch);
    457       vex_printf("%6u ", tabc_slow[op]);
    458       vex_printf("%6u ", tabc_fast[op]);
    459       for (co = 0; co < 16; co++) {
    460          Int n = tab_cond[op][co];
    461          if (n >= 1000) {
    462             vex_printf(" %3dK", n / 1000);
    463          } else
    464          if (n >= 0) {
    465             vex_printf(" %3d ", n );
    466          } else {
    467             vex_printf("     ");
    468          }
    469       }
    470       vex_printf("\n");
    471    }
    472    vex_printf("\n");
    473 }
    474 
    475 static void initCounts ( void )
    476 {
    477    Int op, co;
    478    initted = True;
    479    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    480       tabc_fast[op] = tabc_slow[op] = 0;
    481       for (co = 0; co < 16; co++)
    482          tab_cond[op][co] = 0;
    483    }
    484 }
    485 
    486 #endif /* PROFILE_EFLAGS */
    487 
    488 
    489 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    490 /* Calculate all the 6 flags from the supplied thunk parameters.
    491    Worker function, not directly called from generated code. */
    492 static
    493 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
    494                                      UInt cc_dep1_formal,
    495                                      UInt cc_dep2_formal,
    496                                      UInt cc_ndep_formal )
    497 {
    498    switch (cc_op) {
    499       case X86G_CC_OP_COPY:
    500          return cc_dep1_formal
    501                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
    502                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
    503 
    504       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
    505       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
    506       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
    507 
    508       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
    509       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
    510       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
    511 
    512       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
    513       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
    514       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
    515 
    516       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
    517       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
    518       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
    519 
    520       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
    521       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
    522       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
    523 
    524       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
    525       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
    526       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
    527 
    528       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
    529       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
    530       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
    531 
    532       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
    533       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
    534       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
    535 
    536       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
    537       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
    538       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
    539 
    540       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
    541       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
    542       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
    543 
    544       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
    545       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
    546       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
    547 
    548       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
    549                                                 UShort, toUShort );
    550       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
    551                                                 UInt,   toUInt );
    552       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
    553                                                 ULong,  idULong );
    554 
    555       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
    556                                                 Short,  toUShort );
    557       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
    558                                                 Int,    toUInt   );
    559       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
    560                                                 Long,   idULong );
    561 
    562       default:
    563          /* shouldn't really make these calls from generated code */
    564          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
    565                     "( %u, 0x%x, 0x%x, 0x%x )\n",
    566                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
    567          vpanic("x86g_calculate_eflags_all_WRK(X86)");
    568    }
    569 }
    570 
    571 
    572 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    573 /* Calculate all the 6 flags from the supplied thunk parameters. */
    574 UInt x86g_calculate_eflags_all ( UInt cc_op,
    575                                  UInt cc_dep1,
    576                                  UInt cc_dep2,
    577                                  UInt cc_ndep )
    578 {
    579 #  if PROFILE_EFLAGS
    580    if (!initted) initCounts();
    581    n_calc_all++;
    582    if (SHOW_COUNTS_NOW) showCounts();
    583 #  endif
    584    return
    585       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
    586 }
    587 
    588 
    589 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    590 /* Calculate just the carry flag from the supplied thunk parameters. */
    591 VEX_REGPARM(3)
    592 UInt x86g_calculate_eflags_c ( UInt cc_op,
    593                                UInt cc_dep1,
    594                                UInt cc_dep2,
    595                                UInt cc_ndep )
    596 {
    597 #  if PROFILE_EFLAGS
    598    if (!initted) initCounts();
    599    n_calc_c++;
    600    tabc_fast[cc_op]++;
    601    if (SHOW_COUNTS_NOW) showCounts();
    602 #  endif
    603 
    604    /* Fast-case some common ones. */
    605    switch (cc_op) {
    606       case X86G_CC_OP_LOGICL:
    607       case X86G_CC_OP_LOGICW:
    608       case X86G_CC_OP_LOGICB:
    609          return 0;
    610       case X86G_CC_OP_SUBL:
    611          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
    612                    ? X86G_CC_MASK_C : 0;
    613       case X86G_CC_OP_SUBW:
    614          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
    615                    ? X86G_CC_MASK_C : 0;
    616       case X86G_CC_OP_SUBB:
    617          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
    618                    ? X86G_CC_MASK_C : 0;
    619       case X86G_CC_OP_INCL:
    620       case X86G_CC_OP_DECL:
    621          return cc_ndep & X86G_CC_MASK_C;
    622       default:
    623          break;
    624    }
    625 
    626 #  if PROFILE_EFLAGS
    627    tabc_fast[cc_op]--;
    628    tabc_slow[cc_op]++;
    629 #  endif
    630 
    631    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
    632           & X86G_CC_MASK_C;
    633 }
    634 
    635 
    636 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    637 /* returns 1 or 0 */
    638 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
    639                                 UInt cc_op,
    640                                 UInt cc_dep1,
    641                                 UInt cc_dep2,
    642                                 UInt cc_ndep )
    643 {
    644    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
    645                                                cc_dep2, cc_ndep);
    646    UInt of,sf,zf,cf,pf;
    647    UInt inv = cond & 1;
    648 
    649 #  if PROFILE_EFLAGS
    650    if (!initted) initCounts();
    651    tab_cond[cc_op][cond]++;
    652    n_calc_cond++;
    653    if (SHOW_COUNTS_NOW) showCounts();
    654 #  endif
    655 
    656    switch (cond) {
    657       case X86CondNO:
    658       case X86CondO: /* OF == 1 */
    659          of = eflags >> X86G_CC_SHIFT_O;
    660          return 1 & (inv ^ of);
    661 
    662       case X86CondNZ:
    663       case X86CondZ: /* ZF == 1 */
    664          zf = eflags >> X86G_CC_SHIFT_Z;
    665          return 1 & (inv ^ zf);
    666 
    667       case X86CondNB:
    668       case X86CondB: /* CF == 1 */
    669          cf = eflags >> X86G_CC_SHIFT_C;
    670          return 1 & (inv ^ cf);
    671          break;
    672 
    673       case X86CondNBE:
    674       case X86CondBE: /* (CF or ZF) == 1 */
    675          cf = eflags >> X86G_CC_SHIFT_C;
    676          zf = eflags >> X86G_CC_SHIFT_Z;
    677          return 1 & (inv ^ (cf | zf));
    678          break;
    679 
    680       case X86CondNS:
    681       case X86CondS: /* SF == 1 */
    682          sf = eflags >> X86G_CC_SHIFT_S;
    683          return 1 & (inv ^ sf);
    684 
    685       case X86CondNP:
    686       case X86CondP: /* PF == 1 */
    687          pf = eflags >> X86G_CC_SHIFT_P;
    688          return 1 & (inv ^ pf);
    689 
    690       case X86CondNL:
    691       case X86CondL: /* (SF xor OF) == 1 */
    692          sf = eflags >> X86G_CC_SHIFT_S;
    693          of = eflags >> X86G_CC_SHIFT_O;
    694          return 1 & (inv ^ (sf ^ of));
    695          break;
    696 
    697       case X86CondNLE:
    698       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
    699          sf = eflags >> X86G_CC_SHIFT_S;
    700          of = eflags >> X86G_CC_SHIFT_O;
    701          zf = eflags >> X86G_CC_SHIFT_Z;
    702          return 1 & (inv ^ ((sf ^ of) | zf));
    703          break;
    704 
    705       default:
    706          /* shouldn't really make these calls from generated code */
    707          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
    708                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
    709          vpanic("x86g_calculate_condition");
    710    }
    711 }
    712 
    713 
    714 /* VISIBLE TO LIBVEX CLIENT */
    715 UInt LibVEX_GuestX86_get_eflags ( /*IN*/VexGuestX86State* vex_state )
    716 {
    717    UInt eflags = x86g_calculate_eflags_all_WRK(
    718                     vex_state->guest_CC_OP,
    719                     vex_state->guest_CC_DEP1,
    720                     vex_state->guest_CC_DEP2,
    721                     vex_state->guest_CC_NDEP
    722                  );
    723    UInt dflag = vex_state->guest_DFLAG;
    724    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
    725    if (dflag == 0xFFFFFFFF)
    726       eflags |= (1<<10);
    727    if (vex_state->guest_IDFLAG == 1)
    728       eflags |= (1<<21);
    729    if (vex_state->guest_ACFLAG == 1)
    730       eflags |= (1<<18);
    731 
    732    return eflags;
    733 }
    734 
    735 /* VISIBLE TO LIBVEX CLIENT */
    736 void
    737 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
    738                               /*MOD*/VexGuestX86State* vex_state )
    739 {
    740    UInt oszacp = x86g_calculate_eflags_all_WRK(
    741                     vex_state->guest_CC_OP,
    742                     vex_state->guest_CC_DEP1,
    743                     vex_state->guest_CC_DEP2,
    744                     vex_state->guest_CC_NDEP
    745                  );
    746    if (new_carry_flag & 1) {
    747       oszacp |= X86G_CC_MASK_C;
    748    } else {
    749       oszacp &= ~X86G_CC_MASK_C;
    750    }
    751    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
    752    vex_state->guest_CC_DEP1 = oszacp;
    753    vex_state->guest_CC_DEP2 = 0;
    754    vex_state->guest_CC_NDEP = 0;
    755 }
    756 
    757 
    758 /*---------------------------------------------------------------*/
    759 /*--- %eflags translation-time function specialisers.         ---*/
    760 /*--- These help iropt specialise calls the above run-time    ---*/
    761 /*--- %eflags functions.                                      ---*/
    762 /*---------------------------------------------------------------*/
    763 
    764 /* Used by the optimiser to try specialisations.  Returns an
    765    equivalent expression, or NULL if none. */
    766 
    767 static inline Bool isU32 ( IRExpr* e, UInt n )
    768 {
    769    return
    770       toBool( e->tag == Iex_Const
    771               && e->Iex.Const.con->tag == Ico_U32
    772               && e->Iex.Const.con->Ico.U32 == n );
    773 }
    774 
    775 IRExpr* guest_x86_spechelper ( HChar*   function_name,
    776                                IRExpr** args,
    777                                IRStmt** precedingStmts,
    778                                Int      n_precedingStmts )
    779 {
    780 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
    781 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
    782 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
    783 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
    784 
    785    Int i, arity = 0;
    786    for (i = 0; args[i]; i++)
    787       arity++;
    788 #  if 0
    789    vex_printf("spec request:\n");
    790    vex_printf("   %s  ", function_name);
    791    for (i = 0; i < arity; i++) {
    792       vex_printf("  ");
    793       ppIRExpr(args[i]);
    794    }
    795    vex_printf("\n");
    796 #  endif
    797 
    798    /* --------- specialising "x86g_calculate_condition" --------- */
    799 
    800    if (vex_streq(function_name, "x86g_calculate_condition")) {
    801       /* specialise calls to above "calculate condition" function */
    802       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
    803       vassert(arity == 5);
    804       cond    = args[0];
    805       cc_op   = args[1];
    806       cc_dep1 = args[2];
    807       cc_dep2 = args[3];
    808 
    809       /*---------------- ADDL ----------------*/
    810 
    811       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
    812          /* long add, then Z --> test (dst+src == 0) */
    813          return unop(Iop_1Uto32,
    814                      binop(Iop_CmpEQ32,
    815                            binop(Iop_Add32, cc_dep1, cc_dep2),
    816                            mkU32(0)));
    817       }
    818 
    819       /*---------------- SUBL ----------------*/
    820 
    821       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
    822          /* long sub/cmp, then Z --> test dst==src */
    823          return unop(Iop_1Uto32,
    824                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
    825       }
    826       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
    827          /* long sub/cmp, then NZ --> test dst!=src */
    828          return unop(Iop_1Uto32,
    829                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
    830       }
    831 
    832       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
    833          /* long sub/cmp, then L (signed less than)
    834             --> test dst <s src */
    835          return unop(Iop_1Uto32,
    836                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
    837       }
    838       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
    839          /* long sub/cmp, then NL (signed greater than or equal)
    840             --> test !(dst <s src) */
    841          return binop(Iop_Xor32,
    842                       unop(Iop_1Uto32,
    843                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
    844                       mkU32(1));
    845       }
    846 
    847       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
    848          /* long sub/cmp, then LE (signed less than or equal)
    849             --> test dst <=s src */
    850          return unop(Iop_1Uto32,
    851                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
    852       }
    853       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
    854          /* long sub/cmp, then NLE (signed not less than or equal)
    855             --> test dst >s src
    856             --> test !(dst <=s src) */
    857          return binop(Iop_Xor32,
    858                       unop(Iop_1Uto32,
    859                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
    860                       mkU32(1));
    861       }
    862 
    863       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
    864          /* long sub/cmp, then BE (unsigned less than or equal)
    865             --> test dst <=u src */
    866          return unop(Iop_1Uto32,
    867                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
    868       }
    869       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
    870          /* long sub/cmp, then BE (unsigned greater than)
    871             --> test !(dst <=u src) */
    872          return binop(Iop_Xor32,
    873                       unop(Iop_1Uto32,
    874                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
    875                       mkU32(1));
    876       }
    877 
    878       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
    879          /* long sub/cmp, then B (unsigned less than)
    880             --> test dst <u src */
    881          return unop(Iop_1Uto32,
    882                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
    883       }
    884       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
    885          /* long sub/cmp, then NB (unsigned greater than or equal)
    886             --> test !(dst <u src) */
    887          return binop(Iop_Xor32,
    888                       unop(Iop_1Uto32,
    889                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
    890                       mkU32(1));
    891       }
    892 
    893       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
    894          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
    895          return unop(Iop_1Uto32,
    896                      binop(Iop_CmpLT32S,
    897                            binop(Iop_Sub32, cc_dep1, cc_dep2),
    898                            mkU32(0)));
    899       }
    900       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
    901          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
    902          return binop(Iop_Xor32,
    903                       unop(Iop_1Uto32,
    904                            binop(Iop_CmpLT32S,
    905                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
    906                                  mkU32(0))),
    907                       mkU32(1));
    908       }
    909 
    910       /*---------------- SUBW ----------------*/
    911 
    912       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
    913          /* word sub/cmp, then Z --> test dst==src */
    914          return unop(Iop_1Uto32,
    915                      binop(Iop_CmpEQ16,
    916                            unop(Iop_32to16,cc_dep1),
    917                            unop(Iop_32to16,cc_dep2)));
    918       }
    919       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
    920          /* word sub/cmp, then NZ --> test dst!=src */
    921          return unop(Iop_1Uto32,
    922                      binop(Iop_CmpNE16,
    923                            unop(Iop_32to16,cc_dep1),
    924                            unop(Iop_32to16,cc_dep2)));
    925       }
    926 
    927       /*---------------- SUBB ----------------*/
    928 
    929       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
    930          /* byte sub/cmp, then Z --> test dst==src */
    931          return unop(Iop_1Uto32,
    932                      binop(Iop_CmpEQ8,
    933                            unop(Iop_32to8,cc_dep1),
    934                            unop(Iop_32to8,cc_dep2)));
    935       }
    936       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
    937          /* byte sub/cmp, then NZ --> test dst!=src */
    938          return unop(Iop_1Uto32,
    939                      binop(Iop_CmpNE8,
    940                            unop(Iop_32to8,cc_dep1),
    941                            unop(Iop_32to8,cc_dep2)));
    942       }
    943 
    944       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
    945          /* byte sub/cmp, then NBE (unsigned greater than)
    946             --> test src <u dst */
    947          /* Note, args are opposite way round from the usual */
    948          return unop(Iop_1Uto32,
    949                      binop(Iop_CmpLT32U,
    950                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
    951 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
    952       }
    953 
    954       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
    955                                         && isU32(cc_dep2, 0)) {
    956          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
    957                                          --> test dst <s 0
    958                                          --> (UInt)dst[7]
    959             This is yet another scheme by which gcc figures out if the
    960             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
    961          /* Note: isU32(cc_dep2, 0) is correct, even though this is
    962             for an 8-bit comparison, since the args to the helper
    963             function are always U32s. */
    964          return binop(Iop_And32,
    965                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
    966                       mkU32(1));
    967       }
    968       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
    969                                         && isU32(cc_dep2, 0)) {
    970          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
    971                                           --> test !(dst <s 0)
    972                                           --> (UInt) !dst[7]
    973          */
    974          return binop(Iop_Xor32,
    975                       binop(Iop_And32,
    976                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
    977                             mkU32(1)),
    978                 mkU32(1));
    979       }
    980 
    981       /*---------------- LOGICL ----------------*/
    982 
    983       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
    984          /* long and/or/xor, then Z --> test dst==0 */
    985          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
    986       }
    987       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
    988          /* long and/or/xor, then NZ --> test dst!=0 */
    989          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
    990       }
    991 
    992       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
    993          /* long and/or/xor, then LE
    994             This is pretty subtle.  LOGIC sets SF and ZF according to the
    995             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
    996             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
    997             the result is <=signed 0.  Hence ...
    998          */
    999          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
   1000       }
   1001 
   1002       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
   1003          /* long and/or/xor, then BE
   1004             LOGIC sets ZF according to the result and makes CF be zero.
   1005             BE computes (CF | ZF), but CF is zero, so this reduces ZF
   1006             -- which will be 1 iff the result is zero.  Hence ...
   1007          */
   1008          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1009       }
   1010 
   1011       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
   1012          /* see comment below for (LOGICB, CondS) */
   1013          /* long and/or/xor, then S --> (UInt)result[31] */
   1014          return binop(Iop_And32,
   1015                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1016                       mkU32(1));
   1017       }
   1018       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
   1019          /* see comment below for (LOGICB, CondNS) */
   1020          /* long and/or/xor, then S --> (UInt) ~ result[31] */
   1021          return binop(Iop_Xor32,
   1022                 binop(Iop_And32,
   1023                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1024                       mkU32(1)),
   1025                 mkU32(1));
   1026       }
   1027 
   1028       /*---------------- LOGICW ----------------*/
   1029 
   1030       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
   1031          /* word and/or/xor, then Z --> test dst==0 */
   1032          return unop(Iop_1Uto32,
   1033                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
   1034                                         mkU32(0)));
   1035       }
   1036 
   1037       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
   1038          /* see comment below for (LOGICB, CondS) */
   1039          /* word and/or/xor, then S --> (UInt)result[15] */
   1040          return binop(Iop_And32,
   1041                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
   1042                       mkU32(1));
   1043       }
   1044 
   1045       /*---------------- LOGICB ----------------*/
   1046 
   1047       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
   1048          /* byte and/or/xor, then Z --> test dst==0 */
   1049          return unop(Iop_1Uto32,
   1050                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1051                                         mkU32(0)));
   1052       }
   1053       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
   1054          /* byte and/or/xor, then Z --> test dst!=0 */
   1055          /* b9ac9:       84 c0                   test   %al,%al
   1056             b9acb:       75 0d                   jne    b9ada */
   1057          return unop(Iop_1Uto32,
   1058                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1059                                         mkU32(0)));
   1060       }
   1061 
   1062       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
   1063          /* this is an idiom gcc sometimes uses to find out if the top
   1064             bit of a byte register is set: eg testb %al,%al; js ..
   1065             Since it just depends on the top bit of the byte, extract
   1066             that bit and explicitly get rid of all the rest.  This
   1067             helps memcheck avoid false positives in the case where any
   1068             of the other bits in the byte are undefined. */
   1069          /* byte and/or/xor, then S --> (UInt)result[7] */
   1070          return binop(Iop_And32,
   1071                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1072                       mkU32(1));
   1073       }
   1074       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
   1075          /* ditto, for negation-of-S. */
   1076          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
   1077          return binop(Iop_Xor32,
   1078                 binop(Iop_And32,
   1079                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1080                       mkU32(1)),
   1081                 mkU32(1));
   1082       }
   1083 
   1084       /*---------------- DECL ----------------*/
   1085 
   1086       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
   1087          /* dec L, then Z --> test dst == 0 */
   1088          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1089       }
   1090 
   1091       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
   1092          /* dec L, then S --> compare DST <s 0 */
   1093          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
   1094       }
   1095 
   1096       /*---------------- DECW ----------------*/
   1097 
   1098       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
   1099          /* dec W, then Z --> test dst == 0 */
   1100          return unop(Iop_1Uto32,
   1101                      binop(Iop_CmpEQ32,
   1102                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1103                            mkU32(0)));
   1104       }
   1105 
   1106       /*---------------- INCW ----------------*/
   1107 
   1108       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
   1109          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
   1110          /* inc W, then Z --> test dst == 0 */
   1111          return unop(Iop_1Uto32,
   1112                      binop(Iop_CmpEQ32,
   1113                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1114                            mkU32(0)));
   1115       }
   1116 
   1117       /*---------------- SHRL ----------------*/
   1118 
   1119       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
   1120          /* SHRL, then Z --> test dep1 == 0 */
   1121          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1122       }
   1123 
   1124       /*---------------- COPY ----------------*/
   1125       /* This can happen, as a result of x87 FP compares: "fcom ... ;
   1126          fnstsw %ax ; sahf ; jbe" for example. */
   1127 
   1128       if (isU32(cc_op, X86G_CC_OP_COPY) &&
   1129           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
   1130          /* COPY, then BE --> extract C and Z from dep1, and test
   1131             (C or Z) == 1. */
   1132          /* COPY, then NBE --> extract C and Z from dep1, and test
   1133             (C or Z) == 0. */
   1134          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
   1135          return
   1136             unop(
   1137                Iop_1Uto32,
   1138                binop(
   1139                   Iop_CmpEQ32,
   1140                   binop(
   1141                      Iop_And32,
   1142                      binop(
   1143                         Iop_Or32,
   1144                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1145                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
   1146                      ),
   1147                      mkU32(1)
   1148                   ),
   1149                   mkU32(nnn)
   1150                )
   1151             );
   1152       }
   1153 
   1154       if (isU32(cc_op, X86G_CC_OP_COPY)
   1155           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
   1156          /* COPY, then B --> extract C from dep1, and test (C == 1). */
   1157          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
   1158          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
   1159          return
   1160             unop(
   1161                Iop_1Uto32,
   1162                binop(
   1163                   Iop_CmpEQ32,
   1164                   binop(
   1165                      Iop_And32,
   1166                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1167                      mkU32(1)
   1168                   ),
   1169                   mkU32(nnn)
   1170                )
   1171             );
   1172       }
   1173 
   1174       if (isU32(cc_op, X86G_CC_OP_COPY)
   1175           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
   1176          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
   1177          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
   1178          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
   1179          return
   1180             unop(
   1181                Iop_1Uto32,
   1182                binop(
   1183                   Iop_CmpEQ32,
   1184                   binop(
   1185                      Iop_And32,
   1186                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
   1187                      mkU32(1)
   1188                   ),
   1189                   mkU32(nnn)
   1190                )
   1191             );
   1192       }
   1193 
   1194       if (isU32(cc_op, X86G_CC_OP_COPY)
   1195           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
   1196          /* COPY, then P --> extract P from dep1, and test (P == 1). */
   1197          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
   1198          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
   1199          return
   1200             unop(
   1201                Iop_1Uto32,
   1202                binop(
   1203                   Iop_CmpEQ32,
   1204                   binop(
   1205                      Iop_And32,
   1206                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
   1207                      mkU32(1)
   1208                   ),
   1209                   mkU32(nnn)
   1210                )
   1211             );
   1212       }
   1213 
   1214       return NULL;
   1215    }
   1216 
   1217    /* --------- specialising "x86g_calculate_eflags_c" --------- */
   1218 
   1219    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
   1220       /* specialise calls to above "calculate_eflags_c" function */
   1221       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1222       vassert(arity == 4);
   1223       cc_op   = args[0];
   1224       cc_dep1 = args[1];
   1225       cc_dep2 = args[2];
   1226       cc_ndep = args[3];
   1227 
   1228       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
   1229          /* C after sub denotes unsigned less than */
   1230          return unop(Iop_1Uto32,
   1231                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
   1232       }
   1233       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
   1234          /* C after sub denotes unsigned less than */
   1235          return unop(Iop_1Uto32,
   1236                      binop(Iop_CmpLT32U,
   1237                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
   1238                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
   1239       }
   1240       if (isU32(cc_op, X86G_CC_OP_LOGICL)
   1241           || isU32(cc_op, X86G_CC_OP_LOGICW)
   1242           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
   1243          /* cflag after logic is zero */
   1244          return mkU32(0);
   1245       }
   1246       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
   1247          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
   1248          return cc_ndep;
   1249       }
   1250       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1251          /* cflag after COPY is stored in DEP1. */
   1252          return
   1253             binop(
   1254                Iop_And32,
   1255                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1256                mkU32(1)
   1257             );
   1258       }
   1259       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
   1260          /* C after add denotes sum <u either arg */
   1261          return unop(Iop_1Uto32,
   1262                      binop(Iop_CmpLT32U,
   1263                            binop(Iop_Add32, cc_dep1, cc_dep2),
   1264                            cc_dep1));
   1265       }
   1266       // ATC, requires verification, no test case known
   1267       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
   1268       //   /* C after signed widening multiply denotes the case where
   1269       //      the top half of the result isn't simply the sign extension
   1270       //      of the bottom half (iow the result doesn't fit completely
   1271       //      in the bottom half).  Hence:
   1272       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
   1273       //      where 'x' denotes signed widening multiply.*/
   1274       //   return
   1275       //      unop(Iop_1Uto32,
   1276       //           binop(Iop_CmpNE32,
   1277       //                 unop(Iop_64HIto32,
   1278       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
   1279       //                 binop(Iop_Sar32,
   1280       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
   1281       //}
   1282 #     if 0
   1283       if (cc_op->tag == Iex_Const) {
   1284          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
   1285       }
   1286 #     endif
   1287 
   1288       return NULL;
   1289    }
   1290 
   1291    /* --------- specialising "x86g_calculate_eflags_all" --------- */
   1292 
   1293    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
   1294       /* specialise calls to above "calculate_eflags_all" function */
   1295       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
   1296       vassert(arity == 4);
   1297       cc_op   = args[0];
   1298       cc_dep1 = args[1];
   1299       /* cc_dep2 = args[2]; */
   1300       /* cc_ndep = args[3]; */
   1301 
   1302       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1303          /* eflags after COPY are stored in DEP1. */
   1304          return
   1305             binop(
   1306                Iop_And32,
   1307                cc_dep1,
   1308                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
   1309                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
   1310             );
   1311       }
   1312       return NULL;
   1313    }
   1314 
   1315 #  undef unop
   1316 #  undef binop
   1317 #  undef mkU32
   1318 #  undef mkU8
   1319 
   1320    return NULL;
   1321 }
   1322 
   1323 
   1324 /*---------------------------------------------------------------*/
   1325 /*--- Supporting functions for x87 FPU activities.            ---*/
   1326 /*---------------------------------------------------------------*/
   1327 
   1328 static inline Bool host_is_little_endian ( void )
   1329 {
   1330    UInt x = 0x76543210;
   1331    UChar* p = (UChar*)(&x);
   1332    return toBool(*p == 0x10);
   1333 }
   1334 
   1335 /* 80 and 64-bit floating point formats:
   1336 
   1337    80-bit:
   1338 
   1339     S  0       0-------0      zero
   1340     S  0       0X------X      denormals
   1341     S  1-7FFE  1X------X      normals (all normals have leading 1)
   1342     S  7FFF    10------0      infinity
   1343     S  7FFF    10X-----X      snan
   1344     S  7FFF    11X-----X      qnan
   1345 
   1346    S is the sign bit.  For runs X----X, at least one of the Xs must be
   1347    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
   1348    there is an explicitly represented leading 1, and a sign bit,
   1349    giving 80 in total.
   1350 
   1351    64-bit avoids the confusion of an explicitly represented leading 1
   1352    and so is simpler:
   1353 
   1354     S  0      0------0   zero
   1355     S  0      X------X   denormals
   1356     S  1-7FE  any        normals
   1357     S  7FF    0------0   infinity
   1358     S  7FF    0X-----X   snan
   1359     S  7FF    1X-----X   qnan
   1360 
   1361    Exponent is 11 bits, fractional part is 52 bits, and there is a
   1362    sign bit, giving 64 in total.
   1363 */
   1364 
   1365 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
   1366 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1367 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
   1368 {
   1369    Bool   mantissaIsZero;
   1370    Int    bexp;
   1371    UChar  sign;
   1372    UChar* f64;
   1373 
   1374    vassert(host_is_little_endian());
   1375 
   1376    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
   1377 
   1378    f64  = (UChar*)(&dbl);
   1379    sign = toUChar( (f64[7] >> 7) & 1 );
   1380 
   1381    /* First off, if the tag indicates the register was empty,
   1382       return 1,0,sign,1 */
   1383    if (tag == 0) {
   1384       /* vex_printf("Empty\n"); */
   1385       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
   1386                                  | X86G_FC_MASK_C0;
   1387    }
   1388 
   1389    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
   1390    bexp &= 0x7FF;
   1391 
   1392    mantissaIsZero
   1393       = toBool(
   1394            (f64[6] & 0x0F) == 0
   1395            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
   1396         );
   1397 
   1398    /* If both exponent and mantissa are zero, the value is zero.
   1399       Return 1,0,sign,0. */
   1400    if (bexp == 0 && mantissaIsZero) {
   1401       /* vex_printf("Zero\n"); */
   1402       return X86G_FC_MASK_C3 | 0
   1403                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1404    }
   1405 
   1406    /* If exponent is zero but mantissa isn't, it's a denormal.
   1407       Return 1,1,sign,0. */
   1408    if (bexp == 0 && !mantissaIsZero) {
   1409       /* vex_printf("Denormal\n"); */
   1410       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
   1411                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1412    }
   1413 
   1414    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
   1415       Return 0,1,sign,1. */
   1416    if (bexp == 0x7FF && mantissaIsZero) {
   1417       /* vex_printf("Inf\n"); */
   1418       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
   1419                                  | X86G_FC_MASK_C0;
   1420    }
   1421 
   1422    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
   1423       Return 0,0,sign,1. */
   1424    if (bexp == 0x7FF && !mantissaIsZero) {
   1425       /* vex_printf("NaN\n"); */
   1426       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
   1427    }
   1428 
   1429    /* Uh, ok, we give up.  It must be a normal finite number.
   1430       Return 0,1,sign,0.
   1431    */
   1432    /* vex_printf("normal\n"); */
   1433    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
   1434 }
   1435 
   1436 
   1437 /* CALLED FROM GENERATED CODE */
   1438 /* DIRTY HELPER (reads guest memory) */
   1439 ULong x86g_dirtyhelper_loadF80le ( UInt addrU )
   1440 {
   1441    ULong f64;
   1442    convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
   1443    return f64;
   1444 }
   1445 
   1446 /* CALLED FROM GENERATED CODE */
   1447 /* DIRTY HELPER (writes guest memory) */
   1448 void x86g_dirtyhelper_storeF80le ( UInt addrU, ULong f64 )
   1449 {
   1450    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
   1451 }
   1452 
   1453 
   1454 /*----------------------------------------------*/
   1455 /*--- The exported fns ..                    ---*/
   1456 /*----------------------------------------------*/
   1457 
   1458 /* Layout of the real x87 state. */
   1459 /* 13 June 05: Fpu_State and auxiliary constants was moved to
   1460    g_generic_x87.h */
   1461 
   1462 
   1463 /* CLEAN HELPER */
   1464 /* fpucw[15:0] contains a x87 native format FPU control word.
   1465    Extract from it the required FPROUND value and any resulting
   1466    emulation warning, and return (warn << 32) | fpround value.
   1467 */
   1468 ULong x86g_check_fldcw ( UInt fpucw )
   1469 {
   1470    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
   1471    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1472    UInt rmode = (fpucw >> 10) & 3;
   1473 
   1474    /* Detect any required emulation warnings. */
   1475    VexEmWarn ew = EmWarn_NONE;
   1476 
   1477    if ((fpucw & 0x3F) != 0x3F) {
   1478       /* unmasked exceptions! */
   1479       ew = EmWarn_X86_x87exns;
   1480    }
   1481    else
   1482    if (((fpucw >> 8) & 3) != 3) {
   1483       /* unsupported precision */
   1484       ew = EmWarn_X86_x87precision;
   1485    }
   1486 
   1487    return (((ULong)ew) << 32) | ((ULong)rmode);
   1488 }
   1489 
   1490 /* CLEAN HELPER */
   1491 /* Given fpround as an IRRoundingMode value, create a suitable x87
   1492    native format FPU control word. */
   1493 UInt x86g_create_fpucw ( UInt fpround )
   1494 {
   1495    fpround &= 3;
   1496    return 0x037F | (fpround << 10);
   1497 }
   1498 
   1499 
   1500 /* CLEAN HELPER */
   1501 /* mxcsr[15:0] contains a SSE native format MXCSR value.
   1502    Extract from it the required SSEROUND value and any resulting
   1503    emulation warning, and return (warn << 32) | sseround value.
   1504 */
   1505 ULong x86g_check_ldmxcsr ( UInt mxcsr )
   1506 {
   1507    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
   1508    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1509    UInt rmode = (mxcsr >> 13) & 3;
   1510 
   1511    /* Detect any required emulation warnings. */
   1512    VexEmWarn ew = EmWarn_NONE;
   1513 
   1514    if ((mxcsr & 0x1F80) != 0x1F80) {
   1515       /* unmasked exceptions! */
   1516       ew = EmWarn_X86_sseExns;
   1517    }
   1518    else
   1519    if (mxcsr & (1<<15)) {
   1520       /* FZ is set */
   1521       ew = EmWarn_X86_fz;
   1522    }
   1523    else
   1524    if (mxcsr & (1<<6)) {
   1525       /* DAZ is set */
   1526       ew = EmWarn_X86_daz;
   1527    }
   1528 
   1529    return (((ULong)ew) << 32) | ((ULong)rmode);
   1530 }
   1531 
   1532 
   1533 /* CLEAN HELPER */
   1534 /* Given sseround as an IRRoundingMode value, create a suitable SSE
   1535    native format MXCSR value. */
   1536 UInt x86g_create_mxcsr ( UInt sseround )
   1537 {
   1538    sseround &= 3;
   1539    return 0x1F80 | (sseround << 13);
   1540 }
   1541 
   1542 
   1543 /* CALLED FROM GENERATED CODE */
   1544 /* DIRTY HELPER (writes guest state) */
   1545 /* Initialise the x87 FPU state as per 'finit'. */
   1546 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
   1547 {
   1548    Int i;
   1549    gst->guest_FTOP = 0;
   1550    for (i = 0; i < 8; i++) {
   1551       gst->guest_FPTAG[i] = 0; /* empty */
   1552       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
   1553    }
   1554    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
   1555    gst->guest_FC3210  = 0;
   1556 }
   1557 
   1558 
   1559 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
   1560    appears to differ from the former only in that the 8 FP registers
   1561    themselves are not transferred into the guest state. */
   1562 static
   1563 VexEmWarn do_put_x87 ( Bool moveRegs,
   1564                        /*IN*/UChar* x87_state,
   1565                        /*OUT*/VexGuestX86State* vex_state )
   1566 {
   1567    Int        stno, preg;
   1568    UInt       tag;
   1569    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1570    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1571    Fpu_State* x87     = (Fpu_State*)x87_state;
   1572    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
   1573    UInt       tagw    = x87->env[FP_ENV_TAG];
   1574    UInt       fpucw   = x87->env[FP_ENV_CTRL];
   1575    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
   1576    VexEmWarn  ew;
   1577    UInt       fpround;
   1578    ULong      pair;
   1579 
   1580    /* Copy registers and tags */
   1581    for (stno = 0; stno < 8; stno++) {
   1582       preg = (stno + ftop) & 7;
   1583       tag = (tagw >> (2*preg)) & 3;
   1584       if (tag == 3) {
   1585          /* register is empty */
   1586          /* hmm, if it's empty, does it still get written?  Probably
   1587             safer to say it does.  If we don't, memcheck could get out
   1588             of sync, in that it thinks all FP registers are defined by
   1589             this helper, but in reality some have not been updated. */
   1590          if (moveRegs)
   1591             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
   1592          vexTags[preg] = 0;
   1593       } else {
   1594          /* register is non-empty */
   1595          if (moveRegs)
   1596             convert_f80le_to_f64le( &x87->reg[10*stno],
   1597                                     (UChar*)&vexRegs[preg] );
   1598          vexTags[preg] = 1;
   1599       }
   1600    }
   1601 
   1602    /* stack pointer */
   1603    vex_state->guest_FTOP = ftop;
   1604 
   1605    /* status word */
   1606    vex_state->guest_FC3210 = c3210;
   1607 
   1608    /* handle the control word, setting FPROUND and detecting any
   1609       emulation warnings. */
   1610    pair    = x86g_check_fldcw ( (UInt)fpucw );
   1611    fpround = (UInt)pair;
   1612    ew      = (VexEmWarn)(pair >> 32);
   1613 
   1614    vex_state->guest_FPROUND = fpround & 3;
   1615 
   1616    /* emulation warnings --> caller */
   1617    return ew;
   1618 }
   1619 
   1620 
   1621 /* Create an x87 FPU state from the guest state, as close as
   1622    we can approximate it. */
   1623 static
   1624 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
   1625                   /*OUT*/UChar* x87_state )
   1626 {
   1627    Int        i, stno, preg;
   1628    UInt       tagw;
   1629    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1630    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1631    Fpu_State* x87     = (Fpu_State*)x87_state;
   1632    UInt       ftop    = vex_state->guest_FTOP;
   1633    UInt       c3210   = vex_state->guest_FC3210;
   1634 
   1635    for (i = 0; i < 14; i++)
   1636       x87->env[i] = 0;
   1637 
   1638    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
   1639    x87->env[FP_ENV_STAT]
   1640       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
   1641    x87->env[FP_ENV_CTRL]
   1642       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
   1643 
   1644    /* Dump the register stack in ST order. */
   1645    tagw = 0;
   1646    for (stno = 0; stno < 8; stno++) {
   1647       preg = (stno + ftop) & 7;
   1648       if (vexTags[preg] == 0) {
   1649          /* register is empty */
   1650          tagw |= (3 << (2*preg));
   1651          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1652                                  &x87->reg[10*stno] );
   1653       } else {
   1654          /* register is full. */
   1655          tagw |= (0 << (2*preg));
   1656          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1657                                  &x87->reg[10*stno] );
   1658       }
   1659    }
   1660    x87->env[FP_ENV_TAG] = toUShort(tagw);
   1661 }
   1662 
   1663 
   1664 /* CALLED FROM GENERATED CODE */
   1665 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1666 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
   1667 {
   1668    /* Somewhat roundabout, but at least it's simple. */
   1669    Fpu_State tmp;
   1670    UShort*   addrS = (UShort*)addr;
   1671    UChar*    addrC = (UChar*)addr;
   1672    U128*     xmm   = (U128*)(addr + 160);
   1673    UInt      mxcsr;
   1674    UShort    fp_tags;
   1675    UInt      summary_tags;
   1676    Int       r, stno;
   1677    UShort    *srcS, *dstS;
   1678 
   1679    do_get_x87( gst, (UChar*)&tmp );
   1680    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
   1681 
   1682    /* Now build the proper fxsave image from the x87 image we just
   1683       made. */
   1684 
   1685    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
   1686    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
   1687 
   1688    /* set addrS[2] in an endian-independent way */
   1689    summary_tags = 0;
   1690    fp_tags = tmp.env[FP_ENV_TAG];
   1691    for (r = 0; r < 8; r++) {
   1692       if ( ((fp_tags >> (2*r)) & 3) != 3 )
   1693          summary_tags |= (1 << r);
   1694    }
   1695    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
   1696    addrC[5]  = 0; /* pad */
   1697 
   1698    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
   1699    addrS[4]  = 0;
   1700    addrS[5]  = 0; /* FPU IP (bogus) */
   1701    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
   1702                      could conceivably dump %CS here) */
   1703 
   1704    addrS[7]  = 0; /* Intel reserved */
   1705 
   1706    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
   1707    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
   1708    addrS[10] = 0; /* segment selector for above operand pointer; %DS
   1709                      perhaps? */
   1710    addrS[11] = 0; /* Intel reserved */
   1711 
   1712    addrS[12] = toUShort(mxcsr);  /* MXCSR */
   1713    addrS[13] = toUShort(mxcsr >> 16);
   1714 
   1715    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
   1716    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
   1717 
   1718    /* Copy in the FP registers, in ST order. */
   1719    for (stno = 0; stno < 8; stno++) {
   1720       srcS = (UShort*)(&tmp.reg[10*stno]);
   1721       dstS = (UShort*)(&addrS[16 + 8*stno]);
   1722       dstS[0] = srcS[0];
   1723       dstS[1] = srcS[1];
   1724       dstS[2] = srcS[2];
   1725       dstS[3] = srcS[3];
   1726       dstS[4] = srcS[4];
   1727       dstS[5] = 0;
   1728       dstS[6] = 0;
   1729       dstS[7] = 0;
   1730    }
   1731 
   1732    /* That's the first 160 bytes of the image done.  Now only %xmm0
   1733       .. %xmm7 remain to be copied.  If the host is big-endian, these
   1734       need to be byte-swapped. */
   1735    vassert(host_is_little_endian());
   1736 
   1737 #  define COPY_U128(_dst,_src)                       \
   1738       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1739            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1740       while (0)
   1741 
   1742    COPY_U128( xmm[0], gst->guest_XMM0 );
   1743    COPY_U128( xmm[1], gst->guest_XMM1 );
   1744    COPY_U128( xmm[2], gst->guest_XMM2 );
   1745    COPY_U128( xmm[3], gst->guest_XMM3 );
   1746    COPY_U128( xmm[4], gst->guest_XMM4 );
   1747    COPY_U128( xmm[5], gst->guest_XMM5 );
   1748    COPY_U128( xmm[6], gst->guest_XMM6 );
   1749    COPY_U128( xmm[7], gst->guest_XMM7 );
   1750 
   1751 #  undef COPY_U128
   1752 }
   1753 
   1754 
   1755 /* CALLED FROM GENERATED CODE */
   1756 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1757 VexEmWarn x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
   1758 {
   1759    Fpu_State tmp;
   1760    VexEmWarn warnX87 = EmWarn_NONE;
   1761    VexEmWarn warnXMM = EmWarn_NONE;
   1762    UShort*   addrS   = (UShort*)addr;
   1763    UChar*    addrC   = (UChar*)addr;
   1764    U128*     xmm     = (U128*)(addr + 160);
   1765    UShort    fp_tags;
   1766    Int       r, stno, i;
   1767 
   1768    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
   1769       to be byte-swapped. */
   1770    vassert(host_is_little_endian());
   1771 
   1772 #  define COPY_U128(_dst,_src)                       \
   1773       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1774            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1775       while (0)
   1776 
   1777    COPY_U128( gst->guest_XMM0, xmm[0] );
   1778    COPY_U128( gst->guest_XMM1, xmm[1] );
   1779    COPY_U128( gst->guest_XMM2, xmm[2] );
   1780    COPY_U128( gst->guest_XMM3, xmm[3] );
   1781    COPY_U128( gst->guest_XMM4, xmm[4] );
   1782    COPY_U128( gst->guest_XMM5, xmm[5] );
   1783    COPY_U128( gst->guest_XMM6, xmm[6] );
   1784    COPY_U128( gst->guest_XMM7, xmm[7] );
   1785 
   1786 #  undef COPY_U128
   1787 
   1788    /* Copy the x87 registers out of the image, into a temporary
   1789       Fpu_State struct. */
   1790 
   1791    /* LLVM on Darwin turns the following loop into a movaps plus a
   1792       handful of scalar stores.  This would work fine except for the
   1793       fact that VEX doesn't keep the stack correctly (16-) aligned for
   1794       the call, so it segfaults.  Hence, split the loop into two
   1795       pieces (and pray LLVM doesn't merely glue them back together) so
   1796       it's composed only of scalar stores and so is alignment
   1797       insensitive.  Of course this is a kludge of the lamest kind --
   1798       VEX should be fixed properly. */
   1799    /* Code that seems to trigger the problem:
   1800       for (i = 0; i < 14; i++) tmp.env[i] = 0; */
   1801    for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
   1802    for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
   1803 
   1804    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
   1805    /* fill in tmp.reg[0..7] */
   1806    for (stno = 0; stno < 8; stno++) {
   1807       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
   1808       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
   1809       dstS[0] = srcS[0];
   1810       dstS[1] = srcS[1];
   1811       dstS[2] = srcS[2];
   1812       dstS[3] = srcS[3];
   1813       dstS[4] = srcS[4];
   1814    }
   1815    /* fill in tmp.env[0..13] */
   1816    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
   1817    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
   1818 
   1819    fp_tags = 0;
   1820    for (r = 0; r < 8; r++) {
   1821       if (addrC[4] & (1<<r))
   1822          fp_tags |= (0 << (2*r)); /* EMPTY */
   1823       else
   1824          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
   1825    }
   1826    tmp.env[FP_ENV_TAG] = fp_tags;
   1827 
   1828    /* Now write 'tmp' into the guest state. */
   1829    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
   1830 
   1831    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
   1832                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
   1833      ULong w64 = x86g_check_ldmxcsr( w32 );
   1834 
   1835      warnXMM = (VexEmWarn)(w64 >> 32);
   1836 
   1837      gst->guest_SSEROUND = (UInt)w64;
   1838    }
   1839 
   1840    /* Prefer an X87 emwarn over an XMM one, if both exist. */
   1841    if (warnX87 != EmWarn_NONE)
   1842       return warnX87;
   1843    else
   1844       return warnXMM;
   1845 }
   1846 
   1847 
   1848 /* CALLED FROM GENERATED CODE */
   1849 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1850 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
   1851 {
   1852    do_get_x87( gst, (UChar*)addr );
   1853 }
   1854 
   1855 /* CALLED FROM GENERATED CODE */
   1856 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1857 VexEmWarn x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
   1858 {
   1859    return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
   1860 }
   1861 
   1862 /* CALLED FROM GENERATED CODE */
   1863 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1864 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
   1865 {
   1866    /* Somewhat roundabout, but at least it's simple. */
   1867    Int       i;
   1868    UShort*   addrP = (UShort*)addr;
   1869    Fpu_State tmp;
   1870    do_get_x87( gst, (UChar*)&tmp );
   1871    for (i = 0; i < 14; i++)
   1872       addrP[i] = tmp.env[i];
   1873 }
   1874 
   1875 /* CALLED FROM GENERATED CODE */
   1876 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1877 VexEmWarn x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
   1878 {
   1879    return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
   1880 }
   1881 
   1882 
   1883 /*---------------------------------------------------------------*/
   1884 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
   1885 /*---------------------------------------------------------------*/
   1886 
   1887 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1888 /* Calculate both flags and value result for rotate right
   1889    through the carry bit.  Result in low 32 bits,
   1890    new flags (OSZACP) in high 32 bits.
   1891 */
   1892 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   1893 {
   1894    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   1895 
   1896    switch (sz) {
   1897       case 4:
   1898          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1899          of        = ((arg >> 31) ^ cf) & 1;
   1900          while (tempCOUNT > 0) {
   1901             tempcf = arg & 1;
   1902             arg    = (arg >> 1) | (cf << 31);
   1903             cf     = tempcf;
   1904             tempCOUNT--;
   1905          }
   1906          break;
   1907       case 2:
   1908          while (tempCOUNT >= 17) tempCOUNT -= 17;
   1909          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1910          of        = ((arg >> 15) ^ cf) & 1;
   1911          while (tempCOUNT > 0) {
   1912             tempcf = arg & 1;
   1913             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
   1914             cf     = tempcf;
   1915             tempCOUNT--;
   1916          }
   1917          break;
   1918       case 1:
   1919          while (tempCOUNT >= 9) tempCOUNT -= 9;
   1920          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1921          of        = ((arg >> 7) ^ cf) & 1;
   1922          while (tempCOUNT > 0) {
   1923             tempcf = arg & 1;
   1924             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
   1925             cf     = tempcf;
   1926             tempCOUNT--;
   1927          }
   1928          break;
   1929       default:
   1930          vpanic("calculate_RCR: invalid size");
   1931    }
   1932 
   1933    cf &= 1;
   1934    of &= 1;
   1935    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   1936    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   1937 
   1938    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   1939 }
   1940 
   1941 
   1942 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1943 /* Calculate both flags and value result for rotate left
   1944    through the carry bit.  Result in low 32 bits,
   1945    new flags (OSZACP) in high 32 bits.
   1946 */
   1947 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   1948 {
   1949    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   1950 
   1951    switch (sz) {
   1952       case 4:
   1953          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1954          while (tempCOUNT > 0) {
   1955             tempcf = (arg >> 31) & 1;
   1956             arg    = (arg << 1) | (cf & 1);
   1957             cf     = tempcf;
   1958             tempCOUNT--;
   1959          }
   1960          of = ((arg >> 31) ^ cf) & 1;
   1961          break;
   1962       case 2:
   1963          while (tempCOUNT >= 17) tempCOUNT -= 17;
   1964          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1965          while (tempCOUNT > 0) {
   1966             tempcf = (arg >> 15) & 1;
   1967             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
   1968             cf     = tempcf;
   1969             tempCOUNT--;
   1970          }
   1971          of = ((arg >> 15) ^ cf) & 1;
   1972          break;
   1973       case 1:
   1974          while (tempCOUNT >= 9) tempCOUNT -= 9;
   1975          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1976          while (tempCOUNT > 0) {
   1977             tempcf = (arg >> 7) & 1;
   1978             arg    = 0xFF & ((arg << 1) | (cf & 1));
   1979             cf     = tempcf;
   1980             tempCOUNT--;
   1981          }
   1982          of = ((arg >> 7) ^ cf) & 1;
   1983          break;
   1984       default:
   1985          vpanic("calculate_RCL: invalid size");
   1986    }
   1987 
   1988    cf &= 1;
   1989    of &= 1;
   1990    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   1991    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   1992 
   1993    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   1994 }
   1995 
   1996 
   1997 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1998 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
   1999    AX value in low half of arg, OSZACP in upper half.
   2000    See guest-x86/toIR.c usage point for details.
   2001 */
   2002 static UInt calc_parity_8bit ( UInt w32 ) {
   2003    UInt i;
   2004    UInt p = 1;
   2005    for (i = 0; i < 8; i++)
   2006       p ^= (1 & (w32 >> i));
   2007    return p;
   2008 }
   2009 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
   2010 {
   2011    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   2012    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2013    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2014    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2015    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2016    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2017    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2018    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2019    UInt result = 0;
   2020 
   2021    switch (opcode) {
   2022       case 0x27: { /* DAA */
   2023          UInt old_AL = r_AL;
   2024          UInt old_C  = r_C;
   2025          r_C = 0;
   2026          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2027             r_AL = r_AL + 6;
   2028             r_C  = old_C;
   2029             if (r_AL >= 0x100) r_C = 1;
   2030             r_A = 1;
   2031          } else {
   2032             r_A = 0;
   2033          }
   2034          if (old_AL > 0x99 || old_C == 1) {
   2035             r_AL = r_AL + 0x60;
   2036             r_C  = 1;
   2037          } else {
   2038             r_C = 0;
   2039          }
   2040          /* O is undefined.  S Z and P are set according to the
   2041 	    result. */
   2042          r_AL &= 0xFF;
   2043          r_O = 0; /* let's say */
   2044          r_S = (r_AL & 0x80) ? 1 : 0;
   2045          r_Z = (r_AL == 0) ? 1 : 0;
   2046          r_P = calc_parity_8bit( r_AL );
   2047          break;
   2048       }
   2049       case 0x2F: { /* DAS */
   2050          UInt old_AL = r_AL;
   2051          UInt old_C  = r_C;
   2052          r_C = 0;
   2053          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2054             Bool borrow = r_AL < 6;
   2055             r_AL = r_AL - 6;
   2056             r_C  = old_C;
   2057             if (borrow) r_C = 1;
   2058             r_A = 1;
   2059          } else {
   2060             r_A = 0;
   2061          }
   2062          if (old_AL > 0x99 || old_C == 1) {
   2063             r_AL = r_AL - 0x60;
   2064             r_C  = 1;
   2065          } else {
   2066             /* Intel docs are wrong: r_C = 0; */
   2067          }
   2068          /* O is undefined.  S Z and P are set according to the
   2069 	    result. */
   2070          r_AL &= 0xFF;
   2071          r_O = 0; /* let's say */
   2072          r_S = (r_AL & 0x80) ? 1 : 0;
   2073          r_Z = (r_AL == 0) ? 1 : 0;
   2074          r_P = calc_parity_8bit( r_AL );
   2075          break;
   2076       }
   2077       case 0x37: { /* AAA */
   2078          Bool nudge = r_AL > 0xF9;
   2079          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2080             r_AL = r_AL + 6;
   2081             r_AH = r_AH + 1 + (nudge ? 1 : 0);
   2082             r_A  = 1;
   2083             r_C  = 1;
   2084             r_AL = r_AL & 0xF;
   2085          } else {
   2086             r_A  = 0;
   2087             r_C  = 0;
   2088             r_AL = r_AL & 0xF;
   2089          }
   2090          /* O S Z and P are undefined. */
   2091          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2092          break;
   2093       }
   2094       case 0x3F: { /* AAS */
   2095          Bool nudge = r_AL < 0x06;
   2096          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2097             r_AL = r_AL - 6;
   2098             r_AH = r_AH - 1 - (nudge ? 1 : 0);
   2099             r_A  = 1;
   2100             r_C  = 1;
   2101             r_AL = r_AL & 0xF;
   2102          } else {
   2103             r_A  = 0;
   2104             r_C  = 0;
   2105             r_AL = r_AL & 0xF;
   2106          }
   2107          /* O S Z and P are undefined. */
   2108          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2109          break;
   2110       }
   2111       default:
   2112          vassert(0);
   2113    }
   2114    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2115             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2116             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2117             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2118             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2119             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2120             | ( (r_AH & 0xFF) << 8 )
   2121             | ( (r_AL & 0xFF) << 0 );
   2122    return result;
   2123 }
   2124 
   2125 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
   2126 {
   2127    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   2128    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2129    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2130    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2131    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2132    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2133    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2134    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2135    UInt result = 0;
   2136 
   2137    switch (opcode) {
   2138       case 0xD4: { /* AAM */
   2139          r_AH = r_AL / 10;
   2140          r_AL = r_AL % 10;
   2141          break;
   2142       }
   2143       case 0xD5: { /* AAD */
   2144          r_AL = ((r_AH * 10) + r_AL) & 0xff;
   2145          r_AH = 0;
   2146          break;
   2147       }
   2148       default:
   2149          vassert(0);
   2150    }
   2151 
   2152    r_O = 0; /* let's say (undefined) */
   2153    r_C = 0; /* let's say (undefined) */
   2154    r_A = 0; /* let's say (undefined) */
   2155    r_S = (r_AL & 0x80) ? 1 : 0;
   2156    r_Z = (r_AL == 0) ? 1 : 0;
   2157    r_P = calc_parity_8bit( r_AL );
   2158 
   2159    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2160             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2161             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2162             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2163             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2164             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2165             | ( (r_AH & 0xFF) << 8 )
   2166             | ( (r_AL & 0xFF) << 0 );
   2167    return result;
   2168 }
   2169 
   2170 
   2171 /* CALLED FROM GENERATED CODE */
   2172 /* DIRTY HELPER (non-referentially-transparent) */
   2173 /* Horrible hack.  On non-x86 platforms, return 1. */
   2174 ULong x86g_dirtyhelper_RDTSC ( void )
   2175 {
   2176 #  if defined(__i386__)
   2177    ULong res;
   2178    __asm__ __volatile__("rdtsc" : "=A" (res));
   2179    return res;
   2180 #  else
   2181    return 1ULL;
   2182 #  endif
   2183 }
   2184 
   2185 
   2186 /* CALLED FROM GENERATED CODE */
   2187 /* DIRTY HELPER (modifies guest state) */
   2188 /* Claim to be a P55C (Intel Pentium/MMX) */
   2189 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
   2190 {
   2191    switch (st->guest_EAX) {
   2192       case 0:
   2193          st->guest_EAX = 0x1;
   2194          st->guest_EBX = 0x756e6547;
   2195          st->guest_ECX = 0x6c65746e;
   2196          st->guest_EDX = 0x49656e69;
   2197          break;
   2198       default:
   2199          st->guest_EAX = 0x543;
   2200          st->guest_EBX = 0x0;
   2201          st->guest_ECX = 0x0;
   2202          st->guest_EDX = 0x8001bf;
   2203          break;
   2204    }
   2205 }
   2206 
   2207 /* CALLED FROM GENERATED CODE */
   2208 /* DIRTY HELPER (modifies guest state) */
   2209 /* Claim to be the following SSE1-capable CPU:
   2210    vendor_id       : GenuineIntel
   2211    cpu family      : 6
   2212    model           : 11
   2213    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
   2214    stepping        : 1
   2215    cpu MHz         : 1131.013
   2216    cache size      : 512 KB
   2217 */
   2218 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
   2219 {
   2220    switch (st->guest_EAX) {
   2221       case 0:
   2222          st->guest_EAX = 0x00000002;
   2223          st->guest_EBX = 0x756e6547;
   2224          st->guest_ECX = 0x6c65746e;
   2225          st->guest_EDX = 0x49656e69;
   2226          break;
   2227       case 1:
   2228          st->guest_EAX = 0x000006b1;
   2229          st->guest_EBX = 0x00000004;
   2230          st->guest_ECX = 0x00000000;
   2231          st->guest_EDX = 0x0383fbff;
   2232          break;
   2233       default:
   2234          st->guest_EAX = 0x03020101;
   2235          st->guest_EBX = 0x00000000;
   2236          st->guest_ECX = 0x00000000;
   2237          st->guest_EDX = 0x0c040883;
   2238          break;
   2239    }
   2240 }
   2241 
   2242 /* Claim to be the following SSSE3-capable CPU (2 x ...):
   2243    vendor_id       : GenuineIntel
   2244    cpu family      : 6
   2245    model           : 15
   2246    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
   2247    stepping        : 6
   2248    cpu MHz         : 2394.000
   2249    cache size      : 4096 KB
   2250    physical id     : 0
   2251    siblings        : 2
   2252    core id         : 0
   2253    cpu cores       : 2
   2254    fpu             : yes
   2255    fpu_exception   : yes
   2256    cpuid level     : 10
   2257    wp              : yes
   2258    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2259                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2260                      mmx fxsr sse sse2 ss ht tm syscall nx lm
   2261                      constant_tsc pni monitor ds_cpl vmx est tm2
   2262                      cx16 xtpr lahf_lm
   2263    bogomips        : 4798.78
   2264    clflush size    : 64
   2265    cache_alignment : 64
   2266    address sizes   : 36 bits physical, 48 bits virtual
   2267    power management:
   2268 */
   2269 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
   2270 {
   2271 #  define SET_ABCD(_a,_b,_c,_d)               \
   2272       do { st->guest_EAX = (UInt)(_a);        \
   2273            st->guest_EBX = (UInt)(_b);        \
   2274            st->guest_ECX = (UInt)(_c);        \
   2275            st->guest_EDX = (UInt)(_d);        \
   2276       } while (0)
   2277 
   2278    switch (st->guest_EAX) {
   2279       case 0x00000000:
   2280          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
   2281          break;
   2282       case 0x00000001:
   2283          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
   2284          break;
   2285       case 0x00000002:
   2286          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
   2287          break;
   2288       case 0x00000003:
   2289          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2290          break;
   2291       case 0x00000004: {
   2292          switch (st->guest_ECX) {
   2293             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
   2294                                       0x0000003f, 0x00000001); break;
   2295             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
   2296                                       0x0000003f, 0x00000001); break;
   2297             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
   2298                                       0x00000fff, 0x00000001); break;
   2299             default:         SET_ABCD(0x00000000, 0x00000000,
   2300                                       0x00000000, 0x00000000); break;
   2301          }
   2302          break;
   2303       }
   2304       case 0x00000005:
   2305          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
   2306          break;
   2307       case 0x00000006:
   2308          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
   2309          break;
   2310       case 0x00000007:
   2311          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2312          break;
   2313       case 0x00000008:
   2314          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
   2315          break;
   2316       case 0x00000009:
   2317          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2318          break;
   2319       case 0x0000000a:
   2320       unhandled_eax_value:
   2321          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
   2322          break;
   2323       case 0x80000000:
   2324          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2325          break;
   2326       case 0x80000001:
   2327          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
   2328          break;
   2329       case 0x80000002:
   2330          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2331          break;
   2332       case 0x80000003:
   2333          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
   2334          break;
   2335       case 0x80000004:
   2336          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
   2337          break;
   2338       case 0x80000005:
   2339          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2340          break;
   2341       case 0x80000006:
   2342          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
   2343          break;
   2344       case 0x80000007:
   2345          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2346          break;
   2347       case 0x80000008:
   2348          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2349          break;
   2350       default:
   2351          goto unhandled_eax_value;
   2352    }
   2353 #  undef SET_ABCD
   2354 }
   2355 
   2356 
   2357 /* CALLED FROM GENERATED CODE */
   2358 /* DIRTY HELPER (non-referentially-transparent) */
   2359 /* Horrible hack.  On non-x86 platforms, return 0. */
   2360 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
   2361 {
   2362 #  if defined(__i386__)
   2363    UInt r = 0;
   2364    portno &= 0xFFFF;
   2365    switch (sz) {
   2366       case 4:
   2367          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
   2368                               : "=a" (r) : "Nd" (portno));
   2369 	 break;
   2370       case 2:
   2371          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
   2372                               : "=a" (r) : "Nd" (portno));
   2373 	 break;
   2374       case 1:
   2375          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
   2376                               : "=a" (r) : "Nd" (portno));
   2377 	 break;
   2378       default:
   2379          break;
   2380    }
   2381    return r;
   2382 #  else
   2383    return 0;
   2384 #  endif
   2385 }
   2386 
   2387 
   2388 /* CALLED FROM GENERATED CODE */
   2389 /* DIRTY HELPER (non-referentially-transparent) */
   2390 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2391 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
   2392 {
   2393 #  if defined(__i386__)
   2394    portno &= 0xFFFF;
   2395    switch (sz) {
   2396       case 4:
   2397          __asm__ __volatile__("outl %0, %w1"
   2398                               : : "a" (data), "Nd" (portno));
   2399 	 break;
   2400       case 2:
   2401          __asm__ __volatile__("outw %w0, %w1"
   2402                               : : "a" (data), "Nd" (portno));
   2403 	 break;
   2404       case 1:
   2405          __asm__ __volatile__("outb %b0, %w1"
   2406                               : : "a" (data), "Nd" (portno));
   2407 	 break;
   2408       default:
   2409          break;
   2410    }
   2411 #  else
   2412    /* do nothing */
   2413 #  endif
   2414 }
   2415 
   2416 /* CALLED FROM GENERATED CODE */
   2417 /* DIRTY HELPER (non-referentially-transparent) */
   2418 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2419 /* op = 0: call the native SGDT instruction.
   2420    op = 1: call the native SIDT instruction.
   2421 */
   2422 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
   2423 #  if defined(__i386__)
   2424    switch (op) {
   2425       case 0:
   2426          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
   2427          break;
   2428       case 1:
   2429          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
   2430          break;
   2431       default:
   2432          vpanic("x86g_dirtyhelper_SxDT");
   2433    }
   2434 #  else
   2435    /* do nothing */
   2436    UChar* p = (UChar*)address;
   2437    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
   2438 #  endif
   2439 }
   2440 
   2441 /*---------------------------------------------------------------*/
   2442 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
   2443 /*---------------------------------------------------------------*/
   2444 
   2445 static inline UChar abdU8 ( UChar xx, UChar yy ) {
   2446    return toUChar(xx>yy ? xx-yy : yy-xx);
   2447 }
   2448 
   2449 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
   2450    return (((ULong)w1) << 32) | ((ULong)w0);
   2451 }
   2452 
   2453 static inline UShort sel16x4_3 ( ULong w64 ) {
   2454    UInt hi32 = toUInt(w64 >> 32);
   2455    return toUShort(hi32 >> 16);
   2456 }
   2457 static inline UShort sel16x4_2 ( ULong w64 ) {
   2458    UInt hi32 = toUInt(w64 >> 32);
   2459    return toUShort(hi32);
   2460 }
   2461 static inline UShort sel16x4_1 ( ULong w64 ) {
   2462    UInt lo32 = toUInt(w64);
   2463    return toUShort(lo32 >> 16);
   2464 }
   2465 static inline UShort sel16x4_0 ( ULong w64 ) {
   2466    UInt lo32 = toUInt(w64);
   2467    return toUShort(lo32);
   2468 }
   2469 
   2470 static inline UChar sel8x8_7 ( ULong w64 ) {
   2471    UInt hi32 = toUInt(w64 >> 32);
   2472    return toUChar(hi32 >> 24);
   2473 }
   2474 static inline UChar sel8x8_6 ( ULong w64 ) {
   2475    UInt hi32 = toUInt(w64 >> 32);
   2476    return toUChar(hi32 >> 16);
   2477 }
   2478 static inline UChar sel8x8_5 ( ULong w64 ) {
   2479    UInt hi32 = toUInt(w64 >> 32);
   2480    return toUChar(hi32 >> 8);
   2481 }
   2482 static inline UChar sel8x8_4 ( ULong w64 ) {
   2483    UInt hi32 = toUInt(w64 >> 32);
   2484    return toUChar(hi32 >> 0);
   2485 }
   2486 static inline UChar sel8x8_3 ( ULong w64 ) {
   2487    UInt lo32 = toUInt(w64);
   2488    return toUChar(lo32 >> 24);
   2489 }
   2490 static inline UChar sel8x8_2 ( ULong w64 ) {
   2491    UInt lo32 = toUInt(w64);
   2492    return toUChar(lo32 >> 16);
   2493 }
   2494 static inline UChar sel8x8_1 ( ULong w64 ) {
   2495    UInt lo32 = toUInt(w64);
   2496    return toUChar(lo32 >> 8);
   2497 }
   2498 static inline UChar sel8x8_0 ( ULong w64 ) {
   2499    UInt lo32 = toUInt(w64);
   2500    return toUChar(lo32 >> 0);
   2501 }
   2502 
   2503 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2504 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
   2505 {
   2506    return
   2507       mk32x2(
   2508          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
   2509             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
   2510          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
   2511             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
   2512       );
   2513 }
   2514 
   2515 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2516 UInt x86g_calculate_mmx_pmovmskb ( ULong xx )
   2517 {
   2518    UInt r = 0;
   2519    if (xx & (1ULL << (64-1))) r |= (1<<7);
   2520    if (xx & (1ULL << (56-1))) r |= (1<<6);
   2521    if (xx & (1ULL << (48-1))) r |= (1<<5);
   2522    if (xx & (1ULL << (40-1))) r |= (1<<4);
   2523    if (xx & (1ULL << (32-1))) r |= (1<<3);
   2524    if (xx & (1ULL << (24-1))) r |= (1<<2);
   2525    if (xx & (1ULL << (16-1))) r |= (1<<1);
   2526    if (xx & (1ULL << ( 8-1))) r |= (1<<0);
   2527    return r;
   2528 }
   2529 
   2530 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2531 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
   2532 {
   2533    UInt t = 0;
   2534    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
   2535    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
   2536    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
   2537    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
   2538    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
   2539    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
   2540    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
   2541    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
   2542    t &= 0xFFFF;
   2543    return (ULong)t;
   2544 }
   2545 
   2546 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2547 UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
   2548 {
   2549    UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi );
   2550    UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo );
   2551    return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
   2552 }
   2553 
   2554 
   2555 /*---------------------------------------------------------------*/
   2556 /*--- Helpers for dealing with segment overrides.             ---*/
   2557 /*---------------------------------------------------------------*/
   2558 
   2559 static inline
   2560 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
   2561 {
   2562    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
   2563    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
   2564    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
   2565    return (hi << 24) | (mid << 16) | lo;
   2566 }
   2567 
   2568 static inline
   2569 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
   2570 {
   2571     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
   2572     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
   2573     UInt limit = (hi << 16) | lo;
   2574     if (ent->LdtEnt.Bits.Granularity)
   2575        limit = (limit << 12) | 0xFFF;
   2576     return limit;
   2577 }
   2578 
   2579 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2580 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
   2581                               UInt seg_selector, UInt virtual_addr )
   2582 {
   2583    UInt tiBit, base, limit;
   2584    VexGuestX86SegDescr* the_descrs;
   2585 
   2586    Bool verboze = False;
   2587 
   2588    /* If this isn't true, we're in Big Trouble. */
   2589    vassert(8 == sizeof(VexGuestX86SegDescr));
   2590 
   2591    if (verboze)
   2592       vex_printf("x86h_use_seg_selector: "
   2593                  "seg_selector = 0x%x, vaddr = 0x%x\n",
   2594                  seg_selector, virtual_addr);
   2595 
   2596    /* Check for wildly invalid selector. */
   2597    if (seg_selector & ~0xFFFF)
   2598       goto bad;
   2599 
   2600    seg_selector &= 0x0000FFFF;
   2601 
   2602    /* Sanity check the segment selector.  Ensure that RPL=11b (least
   2603       privilege).  This forms the bottom 2 bits of the selector. */
   2604    if ((seg_selector & 3) != 3)
   2605       goto bad;
   2606 
   2607    /* Extract the TI bit (0 means GDT, 1 means LDT) */
   2608    tiBit = (seg_selector >> 2) & 1;
   2609 
   2610    /* Convert the segment selector onto a table index */
   2611    seg_selector >>= 3;
   2612    vassert(seg_selector >= 0 && seg_selector < 8192);
   2613 
   2614    if (tiBit == 0) {
   2615 
   2616       /* GDT access. */
   2617       /* Do we actually have a GDT to look at? */
   2618       if (gdt == 0)
   2619          goto bad;
   2620 
   2621       /* Check for access to non-existent entry. */
   2622       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
   2623          goto bad;
   2624 
   2625       the_descrs = (VexGuestX86SegDescr*)gdt;
   2626       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2627       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2628 
   2629    } else {
   2630 
   2631       /* All the same stuff, except for the LDT. */
   2632       if (ldt == 0)
   2633          goto bad;
   2634 
   2635       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
   2636          goto bad;
   2637 
   2638       the_descrs = (VexGuestX86SegDescr*)ldt;
   2639       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2640       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2641 
   2642    }
   2643 
   2644    /* Do the limit check.  Note, this check is just slightly too
   2645       slack.  Really it should be "if (virtual_addr + size - 1 >=
   2646       limit)," but we don't have the size info to hand.  Getting it
   2647       could be significantly complex.  */
   2648    if (virtual_addr >= limit)
   2649       goto bad;
   2650 
   2651    if (verboze)
   2652       vex_printf("x86h_use_seg_selector: "
   2653                  "base = 0x%x, addr = 0x%x\n",
   2654                  base, base + virtual_addr);
   2655 
   2656    /* High 32 bits are zero, indicating success. */
   2657    return (ULong)( ((UInt)virtual_addr) + base );
   2658 
   2659  bad:
   2660    return 1ULL << 32;
   2661 }
   2662 
   2663 
   2664 /*---------------------------------------------------------------*/
   2665 /*--- Helpers for dealing with, and describing,               ---*/
   2666 /*--- guest state as a whole.                                 ---*/
   2667 /*---------------------------------------------------------------*/
   2668 
   2669 /* Initialise the entire x86 guest state. */
   2670 /* VISIBLE TO LIBVEX CLIENT */
   2671 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
   2672 {
   2673    vex_state->host_EvC_FAILADDR = 0;
   2674    vex_state->host_EvC_COUNTER = 0;
   2675 
   2676    vex_state->guest_EAX = 0;
   2677    vex_state->guest_ECX = 0;
   2678    vex_state->guest_EDX = 0;
   2679    vex_state->guest_EBX = 0;
   2680    vex_state->guest_ESP = 0;
   2681    vex_state->guest_EBP = 0;
   2682    vex_state->guest_ESI = 0;
   2683    vex_state->guest_EDI = 0;
   2684 
   2685    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
   2686    vex_state->guest_CC_DEP1 = 0;
   2687    vex_state->guest_CC_DEP2 = 0;
   2688    vex_state->guest_CC_NDEP = 0;
   2689    vex_state->guest_DFLAG   = 1; /* forwards */
   2690    vex_state->guest_IDFLAG  = 0;
   2691    vex_state->guest_ACFLAG  = 0;
   2692 
   2693    vex_state->guest_EIP = 0;
   2694 
   2695    /* Initialise the simulated FPU */
   2696    x86g_dirtyhelper_FINIT( vex_state );
   2697 
   2698    /* Initialse the SSE state. */
   2699 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
   2700 
   2701    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
   2702    SSEZERO(vex_state->guest_XMM0);
   2703    SSEZERO(vex_state->guest_XMM1);
   2704    SSEZERO(vex_state->guest_XMM2);
   2705    SSEZERO(vex_state->guest_XMM3);
   2706    SSEZERO(vex_state->guest_XMM4);
   2707    SSEZERO(vex_state->guest_XMM5);
   2708    SSEZERO(vex_state->guest_XMM6);
   2709    SSEZERO(vex_state->guest_XMM7);
   2710 
   2711 #  undef SSEZERO
   2712 
   2713    vex_state->guest_CS  = 0;
   2714    vex_state->guest_DS  = 0;
   2715    vex_state->guest_ES  = 0;
   2716    vex_state->guest_FS  = 0;
   2717    vex_state->guest_GS  = 0;
   2718    vex_state->guest_SS  = 0;
   2719    vex_state->guest_LDT = 0;
   2720    vex_state->guest_GDT = 0;
   2721 
   2722    vex_state->guest_EMWARN = EmWarn_NONE;
   2723 
   2724    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
   2725    vex_state->guest_TISTART = 0;
   2726    vex_state->guest_TILEN   = 0;
   2727 
   2728    vex_state->guest_NRADDR   = 0;
   2729    vex_state->guest_SC_CLASS = 0;
   2730    vex_state->guest_IP_AT_SYSCALL = 0;
   2731 
   2732    Int i;
   2733    for (i = 0; i < sizeof(vex_state->padding)
   2734                    / sizeof(vex_state->padding[0]); i++) {
   2735       vex_state->padding[i] = 0;
   2736    }
   2737 }
   2738 
   2739 
   2740 /* Figure out if any part of the guest state contained in minoff
   2741    .. maxoff requires precise memory exceptions.  If in doubt return
   2742    True (but this is generates significantly slower code).
   2743 
   2744    By default we enforce precise exns for guest %ESP, %EBP and %EIP
   2745    only.  These are the minimum needed to extract correct stack
   2746    backtraces from x86 code.
   2747 */
   2748 Bool guest_x86_state_requires_precise_mem_exns ( Int minoff,
   2749                                                  Int maxoff)
   2750 {
   2751    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
   2752    Int ebp_max = ebp_min + 4 - 1;
   2753    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
   2754    Int esp_max = esp_min + 4 - 1;
   2755    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
   2756    Int eip_max = eip_min + 4 - 1;
   2757 
   2758    if (maxoff < ebp_min || minoff > ebp_max) {
   2759       /* no overlap with ebp */
   2760    } else {
   2761       return True;
   2762    }
   2763 
   2764    if (maxoff < esp_min || minoff > esp_max) {
   2765       /* no overlap with esp */
   2766    } else {
   2767       return True;
   2768    }
   2769 
   2770    if (maxoff < eip_min || minoff > eip_max) {
   2771       /* no overlap with eip */
   2772    } else {
   2773       return True;
   2774    }
   2775 
   2776    return False;
   2777 }
   2778 
   2779 
   2780 #define ALWAYSDEFD(field)                           \
   2781     { offsetof(VexGuestX86State, field),            \
   2782       (sizeof ((VexGuestX86State*)0)->field) }
   2783 
   2784 VexGuestLayout
   2785    x86guest_layout
   2786       = {
   2787           /* Total size of the guest state, in bytes. */
   2788           .total_sizeB = sizeof(VexGuestX86State),
   2789 
   2790           /* Describe the stack pointer. */
   2791           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
   2792           .sizeof_SP = 4,
   2793 
   2794           /* Describe the frame pointer. */
   2795           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
   2796           .sizeof_FP = 4,
   2797 
   2798           /* Describe the instruction pointer. */
   2799           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
   2800           .sizeof_IP = 4,
   2801 
   2802           /* Describe any sections to be regarded by Memcheck as
   2803              'always-defined'. */
   2804           .n_alwaysDefd = 24,
   2805 
   2806           /* flags thunk: OP and NDEP are always defd, whereas DEP1
   2807              and DEP2 have to be tracked.  See detailed comment in
   2808              gdefs.h on meaning of thunk fields. */
   2809           .alwaysDefd
   2810              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
   2811                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
   2812                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
   2813                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
   2814                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
   2815                  /*  5 */ ALWAYSDEFD(guest_EIP),
   2816                  /*  6 */ ALWAYSDEFD(guest_FTOP),
   2817                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
   2818                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
   2819                  /*  9 */ ALWAYSDEFD(guest_FC3210),
   2820                  /* 10 */ ALWAYSDEFD(guest_CS),
   2821                  /* 11 */ ALWAYSDEFD(guest_DS),
   2822                  /* 12 */ ALWAYSDEFD(guest_ES),
   2823                  /* 13 */ ALWAYSDEFD(guest_FS),
   2824                  /* 14 */ ALWAYSDEFD(guest_GS),
   2825                  /* 15 */ ALWAYSDEFD(guest_SS),
   2826                  /* 16 */ ALWAYSDEFD(guest_LDT),
   2827                  /* 17 */ ALWAYSDEFD(guest_GDT),
   2828                  /* 18 */ ALWAYSDEFD(guest_EMWARN),
   2829                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
   2830                  /* 20 */ ALWAYSDEFD(guest_TISTART),
   2831                  /* 21 */ ALWAYSDEFD(guest_TILEN),
   2832                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
   2833                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
   2834                }
   2835         };
   2836 
   2837 
   2838 /*---------------------------------------------------------------*/
   2839 /*--- end                                 guest_x86_helpers.c ---*/
   2840 /*---------------------------------------------------------------*/
   2841