Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                               guest_x86_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2015 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_emnote.h"
     38 #include "libvex_guest_x86.h"
     39 #include "libvex_ir.h"
     40 #include "libvex.h"
     41 
     42 #include "main_util.h"
     43 #include "main_globals.h"
     44 #include "guest_generic_bb_to_IR.h"
     45 #include "guest_x86_defs.h"
     46 #include "guest_generic_x87.h"
     47 
     48 
     49 /* This file contains helper functions for x86 guest code.
     50    Calls to these functions are generated by the back end.
     51    These calls are of course in the host machine code and
     52    this file will be compiled to host machine code, so that
     53    all makes sense.
     54 
     55    Only change the signatures of these helper functions very
     56    carefully.  If you change the signature here, you'll have to change
     57    the parameters passed to it in the IR calls constructed by
     58    guest-x86/toIR.c.
     59 
     60    The convention used is that all functions called from generated
     61    code are named x86g_<something>, and any function whose name lacks
     62    that prefix is not called from generated code.  Note that some
     63    LibVEX_* functions can however be called by VEX's client, but that
     64    is not the same as calling them from VEX-generated code.
     65 */
     66 
     67 
     68 /* Set to 1 to get detailed profiling info about use of the flag
     69    machinery. */
     70 #define PROFILE_EFLAGS 0
     71 
     72 
     73 /*---------------------------------------------------------------*/
     74 /*--- %eflags run-time helpers.                               ---*/
     75 /*---------------------------------------------------------------*/
     76 
     77 static const UChar parity_table[256] = {
     78     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     80     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     81     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     82     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     84     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     86     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     88     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     89     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     90     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     92     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     93     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     94     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     96     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     97     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     98     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    100     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    102     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    104     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    105     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    106     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    108     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    109     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    110 };
    111 
    112 /* generalised left-shifter */
    113 inline static Int lshift ( Int x, Int n )
    114 {
    115    if (n >= 0)
    116       return (UInt)x << n;
    117    else
    118       return x >> (-n);
    119 }
    120 
    121 /* identity on ULong */
    122 static inline ULong idULong ( ULong x )
    123 {
    124    return x;
    125 }
    126 
    127 
    128 #define PREAMBLE(__data_bits)					\
    129    /* const */ UInt DATA_MASK 					\
    130       = __data_bits==8 ? 0xFF 					\
    131                        : (__data_bits==16 ? 0xFFFF 		\
    132                                           : 0xFFFFFFFF); 	\
    133    /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
    134    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
    135    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
    136    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
    137    /* Four bogus assignments, which hopefully gcc can     */	\
    138    /* optimise away, and which stop it complaining about  */	\
    139    /* unused variables.                                   */	\
    140    SIGN_MASK = SIGN_MASK;					\
    141    DATA_MASK = DATA_MASK;					\
    142    CC_DEP2 = CC_DEP2;						\
    143    CC_NDEP = CC_NDEP;
    144 
    145 
    146 /*-------------------------------------------------------------*/
    147 
    148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
    149 {								\
    150    PREAMBLE(DATA_BITS);						\
    151    { UInt cf, pf, af, zf, sf, of;				\
    152      UInt argL, argR, res;					\
    153      argL = CC_DEP1;						\
    154      argR = CC_DEP2;						\
    155      res  = argL + argR;					\
    156      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
    157      pf = parity_table[(UChar)res];				\
    158      af = (res ^ argL ^ argR) & 0x10;				\
    159      zf = ((DATA_UTYPE)res == 0) << 6;				\
    160      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    161      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    162                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    163      return cf | pf | af | zf | sf | of;			\
    164    }								\
    165 }
    166 
    167 /*-------------------------------------------------------------*/
    168 
    169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
    170 {								\
    171    PREAMBLE(DATA_BITS);						\
    172    { UInt cf, pf, af, zf, sf, of;				\
    173      UInt argL, argR, res;					\
    174      argL = CC_DEP1;						\
    175      argR = CC_DEP2;						\
    176      res  = argL - argR;					\
    177      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
    178      pf = parity_table[(UChar)res];				\
    179      af = (res ^ argL ^ argR) & 0x10;				\
    180      zf = ((DATA_UTYPE)res == 0) << 6;				\
    181      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    182      of = lshift((argL ^ argR) & (argL ^ res),	 		\
    183                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
    184      return cf | pf | af | zf | sf | of;			\
    185    }								\
    186 }
    187 
    188 /*-------------------------------------------------------------*/
    189 
    190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
    191 {								\
    192    PREAMBLE(DATA_BITS);						\
    193    { UInt cf, pf, af, zf, sf, of;				\
    194      UInt argL, argR, oldC, res;		       		\
    195      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    196      argL = CC_DEP1;						\
    197      argR = CC_DEP2 ^ oldC;	       				\
    198      res  = (argL + argR) + oldC;				\
    199      if (oldC)							\
    200         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
    201      else							\
    202         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
    203      pf = parity_table[(UChar)res];				\
    204      af = (res ^ argL ^ argR) & 0x10;				\
    205      zf = ((DATA_UTYPE)res == 0) << 6;				\
    206      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    207      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    208                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
    209      return cf | pf | af | zf | sf | of;			\
    210    }								\
    211 }
    212 
    213 /*-------------------------------------------------------------*/
    214 
    215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
    216 {								\
    217    PREAMBLE(DATA_BITS);						\
    218    { UInt cf, pf, af, zf, sf, of;				\
    219      UInt argL, argR, oldC, res;		       		\
    220      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    221      argL = CC_DEP1;						\
    222      argR = CC_DEP2 ^ oldC;	       				\
    223      res  = (argL - argR) - oldC;				\
    224      if (oldC)							\
    225         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
    226      else							\
    227         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
    228      pf = parity_table[(UChar)res];				\
    229      af = (res ^ argL ^ argR) & 0x10;				\
    230      zf = ((DATA_UTYPE)res == 0) << 6;				\
    231      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    232      of = lshift((argL ^ argR) & (argL ^ res), 			\
    233                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    234      return cf | pf | af | zf | sf | of;			\
    235    }								\
    236 }
    237 
    238 /*-------------------------------------------------------------*/
    239 
    240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
    241 {								\
    242    PREAMBLE(DATA_BITS);						\
    243    { UInt cf, pf, af, zf, sf, of;				\
    244      cf = 0;							\
    245      pf = parity_table[(UChar)CC_DEP1];				\
    246      af = 0;							\
    247      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    248      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    249      of = 0;							\
    250      return cf | pf | af | zf | sf | of;			\
    251    }								\
    252 }
    253 
    254 /*-------------------------------------------------------------*/
    255 
    256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
    257 {								\
    258    PREAMBLE(DATA_BITS);						\
    259    { UInt cf, pf, af, zf, sf, of;				\
    260      UInt argL, argR, res;					\
    261      res  = CC_DEP1;						\
    262      argL = res - 1;						\
    263      argR = 1;							\
    264      cf = CC_NDEP & X86G_CC_MASK_C;				\
    265      pf = parity_table[(UChar)res];				\
    266      af = (res ^ argL ^ argR) & 0x10;				\
    267      zf = ((DATA_UTYPE)res == 0) << 6;				\
    268      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    269      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
    270      return cf | pf | af | zf | sf | of;			\
    271    }								\
    272 }
    273 
    274 /*-------------------------------------------------------------*/
    275 
    276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
    277 {								\
    278    PREAMBLE(DATA_BITS);						\
    279    { UInt cf, pf, af, zf, sf, of;				\
    280      UInt argL, argR, res;					\
    281      res  = CC_DEP1;						\
    282      argL = res + 1;						\
    283      argR = 1;							\
    284      cf = CC_NDEP & X86G_CC_MASK_C;				\
    285      pf = parity_table[(UChar)res];				\
    286      af = (res ^ argL ^ argR) & 0x10;				\
    287      zf = ((DATA_UTYPE)res == 0) << 6;				\
    288      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    289      of = ((res & DATA_MASK) 					\
    290           == ((UInt)SIGN_MASK - 1)) << 11;			\
    291      return cf | pf | af | zf | sf | of;			\
    292    }								\
    293 }
    294 
    295 /*-------------------------------------------------------------*/
    296 
    297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
    298 {								\
    299    PREAMBLE(DATA_BITS);						\
    300    { UInt cf, pf, af, zf, sf, of;				\
    301      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
    302      pf = parity_table[(UChar)CC_DEP1];				\
    303      af = 0; /* undefined */					\
    304      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    305      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    306      /* of is defined if shift count == 1 */			\
    307      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
    308           & X86G_CC_MASK_O;					\
    309      return cf | pf | af | zf | sf | of;			\
    310    }								\
    311 }
    312 
    313 /*-------------------------------------------------------------*/
    314 
    315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
    316 {								\
    317    PREAMBLE(DATA_BITS);  					\
    318    { UInt cf, pf, af, zf, sf, of;				\
    319      cf = CC_DEP2 & 1;						\
    320      pf = parity_table[(UChar)CC_DEP1];				\
    321      af = 0; /* undefined */					\
    322      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    323      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    324      /* of is defined if shift count == 1 */			\
    325      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
    326           & X86G_CC_MASK_O;					\
    327      return cf | pf | af | zf | sf | of;			\
    328    }								\
    329 }
    330 
    331 /*-------------------------------------------------------------*/
    332 
    333 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
    334 /* DEP1 = result, NDEP = old flags */
    335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
    336 {								\
    337    PREAMBLE(DATA_BITS);						\
    338    { UInt fl 							\
    339         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    340           | (X86G_CC_MASK_C & CC_DEP1)				\
    341           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
    342                                       11-(DATA_BITS-1)) 	\
    343                      ^ lshift(CC_DEP1, 11)));			\
    344      return fl;							\
    345    }								\
    346 }
    347 
    348 /*-------------------------------------------------------------*/
    349 
    350 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
    351 /* DEP1 = result, NDEP = old flags */
    352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
    353 {								\
    354    PREAMBLE(DATA_BITS);						\
    355    { UInt fl 							\
    356         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    357           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
    358           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
    359                                       11-(DATA_BITS-1)) 	\
    360                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
    361      return fl;							\
    362    }								\
    363 }
    364 
    365 /*-------------------------------------------------------------*/
    366 
    367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
    368                                 DATA_U2TYPE, NARROWto2U)        \
    369 {                                                               \
    370    PREAMBLE(DATA_BITS);                                         \
    371    { UInt cf, pf, af, zf, sf, of;                               \
    372      DATA_UTYPE  hi;                                            \
    373      DATA_UTYPE  lo                                             \
    374         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
    375                      * ((DATA_UTYPE)CC_DEP2) );                 \
    376      DATA_U2TYPE rr                                             \
    377         = NARROWto2U(                                           \
    378              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
    379              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
    380      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
    381      cf = (hi != 0);                                            \
    382      pf = parity_table[(UChar)lo];                              \
    383      af = 0; /* undefined */                                    \
    384      zf = (lo == 0) << 6;                                       \
    385      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    386      of = cf << 11;                                             \
    387      return cf | pf | af | zf | sf | of;                        \
    388    }								\
    389 }
    390 
    391 /*-------------------------------------------------------------*/
    392 
    393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
    394                                 DATA_S2TYPE, NARROWto2S)        \
    395 {                                                               \
    396    PREAMBLE(DATA_BITS);                                         \
    397    { UInt cf, pf, af, zf, sf, of;                               \
    398      DATA_STYPE  hi;                                            \
    399      DATA_STYPE  lo                                             \
    400         = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
    401                      * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
    402      DATA_S2TYPE rr                                             \
    403         = NARROWto2S(                                           \
    404              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
    405              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
    406      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
    407      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
    408      pf = parity_table[(UChar)lo];                              \
    409      af = 0; /* undefined */                                    \
    410      zf = (lo == 0) << 6;                                       \
    411      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    412      of = cf << 11;                                             \
    413      return cf | pf | af | zf | sf | of;                        \
    414    }								\
    415 }
    416 
    417 
    418 #if PROFILE_EFLAGS
    419 
    420 static Bool initted     = False;
    421 
    422 /* C flag, fast route */
    423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
    424 /* C flag, slow route */
    425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
    426 /* table for calculate_cond */
    427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
    428 /* total entry counts for calc_all, calc_c, calc_cond. */
    429 static UInt n_calc_all  = 0;
    430 static UInt n_calc_c    = 0;
    431 static UInt n_calc_cond = 0;
    432 
    433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
    434 
    435 
    436 static void showCounts ( void )
    437 {
    438    Int op, co;
    439    HChar ch;
    440    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
    441               n_calc_all, n_calc_cond, n_calc_c);
    442 
    443    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
    444               "    S   NS    P   NP    L   NL   LE  NLE\n");
    445    vex_printf("     -----------------------------------------------------"
    446               "----------------------------------------\n");
    447    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    448 
    449       ch = ' ';
    450       if (op > 0 && (op-1) % 3 == 0)
    451          ch = 'B';
    452       if (op > 0 && (op-1) % 3 == 1)
    453          ch = 'W';
    454       if (op > 0 && (op-1) % 3 == 2)
    455          ch = 'L';
    456 
    457       vex_printf("%2d%c: ", op, ch);
    458       vex_printf("%6u ", tabc_slow[op]);
    459       vex_printf("%6u ", tabc_fast[op]);
    460       for (co = 0; co < 16; co++) {
    461          Int n = tab_cond[op][co];
    462          if (n >= 1000) {
    463             vex_printf(" %3dK", n / 1000);
    464          } else
    465          if (n >= 0) {
    466             vex_printf(" %3d ", n );
    467          } else {
    468             vex_printf("     ");
    469          }
    470       }
    471       vex_printf("\n");
    472    }
    473    vex_printf("\n");
    474 }
    475 
    476 static void initCounts ( void )
    477 {
    478    Int op, co;
    479    initted = True;
    480    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    481       tabc_fast[op] = tabc_slow[op] = 0;
    482       for (co = 0; co < 16; co++)
    483          tab_cond[op][co] = 0;
    484    }
    485 }
    486 
    487 #endif /* PROFILE_EFLAGS */
    488 
    489 
    490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    491 /* Calculate all the 6 flags from the supplied thunk parameters.
    492    Worker function, not directly called from generated code. */
    493 static
    494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
    495                                      UInt cc_dep1_formal,
    496                                      UInt cc_dep2_formal,
    497                                      UInt cc_ndep_formal )
    498 {
    499    switch (cc_op) {
    500       case X86G_CC_OP_COPY:
    501          return cc_dep1_formal
    502                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
    503                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
    504 
    505       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
    506       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
    507       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
    508 
    509       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
    510       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
    511       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
    512 
    513       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
    514       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
    515       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
    516 
    517       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
    518       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
    519       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
    520 
    521       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
    522       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
    523       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
    524 
    525       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
    526       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
    527       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
    528 
    529       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
    530       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
    531       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
    532 
    533       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
    534       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
    535       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
    536 
    537       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
    538       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
    539       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
    540 
    541       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
    542       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
    543       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
    544 
    545       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
    546       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
    547       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
    548 
    549       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
    550                                                 UShort, toUShort );
    551       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
    552                                                 UInt,   toUInt );
    553       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
    554                                                 ULong,  idULong );
    555 
    556       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
    557                                                 Short,  toUShort );
    558       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
    559                                                 Int,    toUInt   );
    560       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
    561                                                 Long,   idULong );
    562 
    563       default:
    564          /* shouldn't really make these calls from generated code */
    565          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
    566                     "( %u, 0x%x, 0x%x, 0x%x )\n",
    567                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
    568          vpanic("x86g_calculate_eflags_all_WRK(X86)");
    569    }
    570 }
    571 
    572 
    573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    574 /* Calculate all the 6 flags from the supplied thunk parameters. */
    575 UInt x86g_calculate_eflags_all ( UInt cc_op,
    576                                  UInt cc_dep1,
    577                                  UInt cc_dep2,
    578                                  UInt cc_ndep )
    579 {
    580 #  if PROFILE_EFLAGS
    581    if (!initted) initCounts();
    582    n_calc_all++;
    583    if (SHOW_COUNTS_NOW) showCounts();
    584 #  endif
    585    return
    586       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
    587 }
    588 
    589 
    590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    591 /* Calculate just the carry flag from the supplied thunk parameters. */
    592 VEX_REGPARM(3)
    593 UInt x86g_calculate_eflags_c ( UInt cc_op,
    594                                UInt cc_dep1,
    595                                UInt cc_dep2,
    596                                UInt cc_ndep )
    597 {
    598 #  if PROFILE_EFLAGS
    599    if (!initted) initCounts();
    600    n_calc_c++;
    601    tabc_fast[cc_op]++;
    602    if (SHOW_COUNTS_NOW) showCounts();
    603 #  endif
    604 
    605    /* Fast-case some common ones. */
    606    switch (cc_op) {
    607       case X86G_CC_OP_LOGICL:
    608       case X86G_CC_OP_LOGICW:
    609       case X86G_CC_OP_LOGICB:
    610          return 0;
    611       case X86G_CC_OP_SUBL:
    612          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
    613                    ? X86G_CC_MASK_C : 0;
    614       case X86G_CC_OP_SUBW:
    615          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
    616                    ? X86G_CC_MASK_C : 0;
    617       case X86G_CC_OP_SUBB:
    618          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
    619                    ? X86G_CC_MASK_C : 0;
    620       case X86G_CC_OP_INCL:
    621       case X86G_CC_OP_DECL:
    622          return cc_ndep & X86G_CC_MASK_C;
    623       default:
    624          break;
    625    }
    626 
    627 #  if PROFILE_EFLAGS
    628    tabc_fast[cc_op]--;
    629    tabc_slow[cc_op]++;
    630 #  endif
    631 
    632    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
    633           & X86G_CC_MASK_C;
    634 }
    635 
    636 
    637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    638 /* returns 1 or 0 */
    639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
    640                                 UInt cc_op,
    641                                 UInt cc_dep1,
    642                                 UInt cc_dep2,
    643                                 UInt cc_ndep )
    644 {
    645    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
    646                                                cc_dep2, cc_ndep);
    647    UInt of,sf,zf,cf,pf;
    648    UInt inv = cond & 1;
    649 
    650 #  if PROFILE_EFLAGS
    651    if (!initted) initCounts();
    652    tab_cond[cc_op][cond]++;
    653    n_calc_cond++;
    654    if (SHOW_COUNTS_NOW) showCounts();
    655 #  endif
    656 
    657    switch (cond) {
    658       case X86CondNO:
    659       case X86CondO: /* OF == 1 */
    660          of = eflags >> X86G_CC_SHIFT_O;
    661          return 1 & (inv ^ of);
    662 
    663       case X86CondNZ:
    664       case X86CondZ: /* ZF == 1 */
    665          zf = eflags >> X86G_CC_SHIFT_Z;
    666          return 1 & (inv ^ zf);
    667 
    668       case X86CondNB:
    669       case X86CondB: /* CF == 1 */
    670          cf = eflags >> X86G_CC_SHIFT_C;
    671          return 1 & (inv ^ cf);
    672          break;
    673 
    674       case X86CondNBE:
    675       case X86CondBE: /* (CF or ZF) == 1 */
    676          cf = eflags >> X86G_CC_SHIFT_C;
    677          zf = eflags >> X86G_CC_SHIFT_Z;
    678          return 1 & (inv ^ (cf | zf));
    679          break;
    680 
    681       case X86CondNS:
    682       case X86CondS: /* SF == 1 */
    683          sf = eflags >> X86G_CC_SHIFT_S;
    684          return 1 & (inv ^ sf);
    685 
    686       case X86CondNP:
    687       case X86CondP: /* PF == 1 */
    688          pf = eflags >> X86G_CC_SHIFT_P;
    689          return 1 & (inv ^ pf);
    690 
    691       case X86CondNL:
    692       case X86CondL: /* (SF xor OF) == 1 */
    693          sf = eflags >> X86G_CC_SHIFT_S;
    694          of = eflags >> X86G_CC_SHIFT_O;
    695          return 1 & (inv ^ (sf ^ of));
    696          break;
    697 
    698       case X86CondNLE:
    699       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
    700          sf = eflags >> X86G_CC_SHIFT_S;
    701          of = eflags >> X86G_CC_SHIFT_O;
    702          zf = eflags >> X86G_CC_SHIFT_Z;
    703          return 1 & (inv ^ ((sf ^ of) | zf));
    704          break;
    705 
    706       default:
    707          /* shouldn't really make these calls from generated code */
    708          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
    709                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
    710          vpanic("x86g_calculate_condition");
    711    }
    712 }
    713 
    714 
    715 /* VISIBLE TO LIBVEX CLIENT */
    716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
    717 {
    718    UInt eflags = x86g_calculate_eflags_all_WRK(
    719                     vex_state->guest_CC_OP,
    720                     vex_state->guest_CC_DEP1,
    721                     vex_state->guest_CC_DEP2,
    722                     vex_state->guest_CC_NDEP
    723                  );
    724    UInt dflag = vex_state->guest_DFLAG;
    725    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
    726    if (dflag == 0xFFFFFFFF)
    727       eflags |= X86G_CC_MASK_D;
    728    if (vex_state->guest_IDFLAG == 1)
    729       eflags |= X86G_CC_MASK_ID;
    730    if (vex_state->guest_ACFLAG == 1)
    731       eflags |= X86G_CC_MASK_AC;
    732 
    733    return eflags;
    734 }
    735 
    736 /* VISIBLE TO LIBVEX CLIENT */
    737 void
    738 LibVEX_GuestX86_put_eflags ( UInt eflags,
    739                              /*MOD*/VexGuestX86State* vex_state )
    740 {
    741    /* D flag */
    742    if (eflags & X86G_CC_MASK_D) {
    743       vex_state->guest_DFLAG = 0xFFFFFFFF;
    744       eflags &= ~X86G_CC_MASK_D;
    745    }
    746    else
    747       vex_state->guest_DFLAG = 1;
    748 
    749    /* ID flag */
    750    if (eflags & X86G_CC_MASK_ID) {
    751       vex_state->guest_IDFLAG = 1;
    752       eflags &= ~X86G_CC_MASK_ID;
    753    }
    754    else
    755       vex_state->guest_IDFLAG = 0;
    756 
    757    /* AC flag */
    758    if (eflags & X86G_CC_MASK_AC) {
    759       vex_state->guest_ACFLAG = 1;
    760       eflags &= ~X86G_CC_MASK_AC;
    761    }
    762    else
    763       vex_state->guest_ACFLAG = 0;
    764 
    765    UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
    766                   X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
    767    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
    768    vex_state->guest_CC_DEP1 = eflags & cc_mask;
    769    vex_state->guest_CC_DEP2 = 0;
    770    vex_state->guest_CC_NDEP = 0;
    771 }
    772 
    773 /* VISIBLE TO LIBVEX CLIENT */
    774 void
    775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
    776                               /*MOD*/VexGuestX86State* vex_state )
    777 {
    778    UInt oszacp = x86g_calculate_eflags_all_WRK(
    779                     vex_state->guest_CC_OP,
    780                     vex_state->guest_CC_DEP1,
    781                     vex_state->guest_CC_DEP2,
    782                     vex_state->guest_CC_NDEP
    783                  );
    784    if (new_carry_flag & 1) {
    785       oszacp |= X86G_CC_MASK_C;
    786    } else {
    787       oszacp &= ~X86G_CC_MASK_C;
    788    }
    789    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
    790    vex_state->guest_CC_DEP1 = oszacp;
    791    vex_state->guest_CC_DEP2 = 0;
    792    vex_state->guest_CC_NDEP = 0;
    793 }
    794 
    795 
    796 /*---------------------------------------------------------------*/
    797 /*--- %eflags translation-time function specialisers.         ---*/
    798 /*--- These help iropt specialise calls the above run-time    ---*/
    799 /*--- %eflags functions.                                      ---*/
    800 /*---------------------------------------------------------------*/
    801 
    802 /* Used by the optimiser to try specialisations.  Returns an
    803    equivalent expression, or NULL if none. */
    804 
    805 static inline Bool isU32 ( IRExpr* e, UInt n )
    806 {
    807    return
    808       toBool( e->tag == Iex_Const
    809               && e->Iex.Const.con->tag == Ico_U32
    810               && e->Iex.Const.con->Ico.U32 == n );
    811 }
    812 
    813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
    814                                IRExpr** args,
    815                                IRStmt** precedingStmts,
    816                                Int      n_precedingStmts )
    817 {
    818 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
    819 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
    820 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
    821 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
    822 
    823    Int i, arity = 0;
    824    for (i = 0; args[i]; i++)
    825       arity++;
    826 #  if 0
    827    vex_printf("spec request:\n");
    828    vex_printf("   %s  ", function_name);
    829    for (i = 0; i < arity; i++) {
    830       vex_printf("  ");
    831       ppIRExpr(args[i]);
    832    }
    833    vex_printf("\n");
    834 #  endif
    835 
    836    /* --------- specialising "x86g_calculate_condition" --------- */
    837 
    838    if (vex_streq(function_name, "x86g_calculate_condition")) {
    839       /* specialise calls to above "calculate condition" function */
    840       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
    841       vassert(arity == 5);
    842       cond    = args[0];
    843       cc_op   = args[1];
    844       cc_dep1 = args[2];
    845       cc_dep2 = args[3];
    846 
    847       /*---------------- ADDL ----------------*/
    848 
    849       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
    850          /* long add, then Z --> test (dst+src == 0) */
    851          return unop(Iop_1Uto32,
    852                      binop(Iop_CmpEQ32,
    853                            binop(Iop_Add32, cc_dep1, cc_dep2),
    854                            mkU32(0)));
    855       }
    856 
    857       /*---------------- SUBL ----------------*/
    858 
    859       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
    860          /* long sub/cmp, then Z --> test dst==src */
    861          return unop(Iop_1Uto32,
    862                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
    863       }
    864       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
    865          /* long sub/cmp, then NZ --> test dst!=src */
    866          return unop(Iop_1Uto32,
    867                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
    868       }
    869 
    870       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
    871          /* long sub/cmp, then L (signed less than)
    872             --> test dst <s src */
    873          return unop(Iop_1Uto32,
    874                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
    875       }
    876       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
    877          /* long sub/cmp, then NL (signed greater than or equal)
    878             --> test !(dst <s src) */
    879          return binop(Iop_Xor32,
    880                       unop(Iop_1Uto32,
    881                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
    882                       mkU32(1));
    883       }
    884 
    885       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
    886          /* long sub/cmp, then LE (signed less than or equal)
    887             --> test dst <=s src */
    888          return unop(Iop_1Uto32,
    889                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
    890       }
    891       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
    892          /* long sub/cmp, then NLE (signed not less than or equal)
    893             --> test dst >s src
    894             --> test !(dst <=s src) */
    895          return binop(Iop_Xor32,
    896                       unop(Iop_1Uto32,
    897                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
    898                       mkU32(1));
    899       }
    900 
    901       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
    902          /* long sub/cmp, then BE (unsigned less than or equal)
    903             --> test dst <=u src */
    904          return unop(Iop_1Uto32,
    905                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
    906       }
    907       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
    908          /* long sub/cmp, then BE (unsigned greater than)
    909             --> test !(dst <=u src) */
    910          return binop(Iop_Xor32,
    911                       unop(Iop_1Uto32,
    912                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
    913                       mkU32(1));
    914       }
    915 
    916       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
    917          /* long sub/cmp, then B (unsigned less than)
    918             --> test dst <u src */
    919          return unop(Iop_1Uto32,
    920                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
    921       }
    922       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
    923          /* long sub/cmp, then NB (unsigned greater than or equal)
    924             --> test !(dst <u src) */
    925          return binop(Iop_Xor32,
    926                       unop(Iop_1Uto32,
    927                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
    928                       mkU32(1));
    929       }
    930 
    931       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
    932          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
    933          return unop(Iop_1Uto32,
    934                      binop(Iop_CmpLT32S,
    935                            binop(Iop_Sub32, cc_dep1, cc_dep2),
    936                            mkU32(0)));
    937       }
    938       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
    939          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
    940          return binop(Iop_Xor32,
    941                       unop(Iop_1Uto32,
    942                            binop(Iop_CmpLT32S,
    943                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
    944                                  mkU32(0))),
    945                       mkU32(1));
    946       }
    947 
    948       /*---------------- SUBW ----------------*/
    949 
    950       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
    951          /* word sub/cmp, then Z --> test dst==src */
    952          return unop(Iop_1Uto32,
    953                      binop(Iop_CmpEQ16,
    954                            unop(Iop_32to16,cc_dep1),
    955                            unop(Iop_32to16,cc_dep2)));
    956       }
    957       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
    958          /* word sub/cmp, then NZ --> test dst!=src */
    959          return unop(Iop_1Uto32,
    960                      binop(Iop_CmpNE16,
    961                            unop(Iop_32to16,cc_dep1),
    962                            unop(Iop_32to16,cc_dep2)));
    963       }
    964 
    965       /*---------------- SUBB ----------------*/
    966 
    967       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
    968          /* byte sub/cmp, then Z --> test dst==src */
    969          return unop(Iop_1Uto32,
    970                      binop(Iop_CmpEQ8,
    971                            unop(Iop_32to8,cc_dep1),
    972                            unop(Iop_32to8,cc_dep2)));
    973       }
    974       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
    975          /* byte sub/cmp, then NZ --> test dst!=src */
    976          return unop(Iop_1Uto32,
    977                      binop(Iop_CmpNE8,
    978                            unop(Iop_32to8,cc_dep1),
    979                            unop(Iop_32to8,cc_dep2)));
    980       }
    981 
    982       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
    983          /* byte sub/cmp, then NBE (unsigned greater than)
    984             --> test src <u dst */
    985          /* Note, args are opposite way round from the usual */
    986          return unop(Iop_1Uto32,
    987                      binop(Iop_CmpLT32U,
    988                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
    989 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
    990       }
    991 
    992       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
    993                                         && isU32(cc_dep2, 0)) {
    994          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
    995                                          --> test dst <s 0
    996                                          --> (UInt)dst[7]
    997             This is yet another scheme by which gcc figures out if the
    998             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
    999          /* Note: isU32(cc_dep2, 0) is correct, even though this is
   1000             for an 8-bit comparison, since the args to the helper
   1001             function are always U32s. */
   1002          return binop(Iop_And32,
   1003                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1004                       mkU32(1));
   1005       }
   1006       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
   1007                                         && isU32(cc_dep2, 0)) {
   1008          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
   1009                                           --> test !(dst <s 0)
   1010                                           --> (UInt) !dst[7]
   1011          */
   1012          return binop(Iop_Xor32,
   1013                       binop(Iop_And32,
   1014                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1015                             mkU32(1)),
   1016                 mkU32(1));
   1017       }
   1018 
   1019       /*---------------- LOGICL ----------------*/
   1020 
   1021       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
   1022          /* long and/or/xor, then Z --> test dst==0 */
   1023          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1024       }
   1025       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
   1026          /* long and/or/xor, then NZ --> test dst!=0 */
   1027          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
   1028       }
   1029 
   1030       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
   1031          /* long and/or/xor, then LE
   1032             This is pretty subtle.  LOGIC sets SF and ZF according to the
   1033             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
   1034             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
   1035             the result is <=signed 0.  Hence ...
   1036          */
   1037          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
   1038       }
   1039 
   1040       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
   1041          /* long and/or/xor, then BE
   1042             LOGIC sets ZF according to the result and makes CF be zero.
   1043             BE computes (CF | ZF), but CF is zero, so this reduces ZF
   1044             -- which will be 1 iff the result is zero.  Hence ...
   1045          */
   1046          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1047       }
   1048 
   1049       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
   1050          /* see comment below for (LOGICB, CondS) */
   1051          /* long and/or/xor, then S --> (UInt)result[31] */
   1052          return binop(Iop_And32,
   1053                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1054                       mkU32(1));
   1055       }
   1056       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
   1057          /* see comment below for (LOGICB, CondNS) */
   1058          /* long and/or/xor, then S --> (UInt) ~ result[31] */
   1059          return binop(Iop_Xor32,
   1060                 binop(Iop_And32,
   1061                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1062                       mkU32(1)),
   1063                 mkU32(1));
   1064       }
   1065 
   1066       /*---------------- LOGICW ----------------*/
   1067 
   1068       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
   1069          /* word and/or/xor, then Z --> test dst==0 */
   1070          return unop(Iop_1Uto32,
   1071                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
   1072                                         mkU32(0)));
   1073       }
   1074 
   1075       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
   1076          /* see comment below for (LOGICB, CondS) */
   1077          /* word and/or/xor, then S --> (UInt)result[15] */
   1078          return binop(Iop_And32,
   1079                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
   1080                       mkU32(1));
   1081       }
   1082 
   1083       /*---------------- LOGICB ----------------*/
   1084 
   1085       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
   1086          /* byte and/or/xor, then Z --> test dst==0 */
   1087          return unop(Iop_1Uto32,
   1088                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1089                                         mkU32(0)));
   1090       }
   1091       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
   1092          /* byte and/or/xor, then Z --> test dst!=0 */
   1093          /* b9ac9:       84 c0                   test   %al,%al
   1094             b9acb:       75 0d                   jne    b9ada */
   1095          return unop(Iop_1Uto32,
   1096                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1097                                         mkU32(0)));
   1098       }
   1099 
   1100       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
   1101          /* this is an idiom gcc sometimes uses to find out if the top
   1102             bit of a byte register is set: eg testb %al,%al; js ..
   1103             Since it just depends on the top bit of the byte, extract
   1104             that bit and explicitly get rid of all the rest.  This
   1105             helps memcheck avoid false positives in the case where any
   1106             of the other bits in the byte are undefined. */
   1107          /* byte and/or/xor, then S --> (UInt)result[7] */
   1108          return binop(Iop_And32,
   1109                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1110                       mkU32(1));
   1111       }
   1112       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
   1113          /* ditto, for negation-of-S. */
   1114          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
   1115          return binop(Iop_Xor32,
   1116                 binop(Iop_And32,
   1117                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1118                       mkU32(1)),
   1119                 mkU32(1));
   1120       }
   1121 
   1122       /*---------------- DECL ----------------*/
   1123 
   1124       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
   1125          /* dec L, then Z --> test dst == 0 */
   1126          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1127       }
   1128 
   1129       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
   1130          /* dec L, then S --> compare DST <s 0 */
   1131          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
   1132       }
   1133 
   1134       /*---------------- DECW ----------------*/
   1135 
   1136       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
   1137          /* dec W, then Z --> test dst == 0 */
   1138          return unop(Iop_1Uto32,
   1139                      binop(Iop_CmpEQ32,
   1140                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1141                            mkU32(0)));
   1142       }
   1143 
   1144       /*---------------- INCW ----------------*/
   1145 
   1146       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
   1147          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
   1148          /* inc W, then Z --> test dst == 0 */
   1149          return unop(Iop_1Uto32,
   1150                      binop(Iop_CmpEQ32,
   1151                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1152                            mkU32(0)));
   1153       }
   1154 
   1155       /*---------------- SHRL ----------------*/
   1156 
   1157       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
   1158          /* SHRL, then Z --> test dep1 == 0 */
   1159          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1160       }
   1161 
   1162       /*---------------- COPY ----------------*/
   1163       /* This can happen, as a result of x87 FP compares: "fcom ... ;
   1164          fnstsw %ax ; sahf ; jbe" for example. */
   1165 
   1166       if (isU32(cc_op, X86G_CC_OP_COPY) &&
   1167           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
   1168          /* COPY, then BE --> extract C and Z from dep1, and test
   1169             (C or Z) == 1. */
   1170          /* COPY, then NBE --> extract C and Z from dep1, and test
   1171             (C or Z) == 0. */
   1172          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
   1173          return
   1174             unop(
   1175                Iop_1Uto32,
   1176                binop(
   1177                   Iop_CmpEQ32,
   1178                   binop(
   1179                      Iop_And32,
   1180                      binop(
   1181                         Iop_Or32,
   1182                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1183                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
   1184                      ),
   1185                      mkU32(1)
   1186                   ),
   1187                   mkU32(nnn)
   1188                )
   1189             );
   1190       }
   1191 
   1192       if (isU32(cc_op, X86G_CC_OP_COPY)
   1193           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
   1194          /* COPY, then B --> extract C from dep1, and test (C == 1). */
   1195          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
   1196          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
   1197          return
   1198             unop(
   1199                Iop_1Uto32,
   1200                binop(
   1201                   Iop_CmpEQ32,
   1202                   binop(
   1203                      Iop_And32,
   1204                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1205                      mkU32(1)
   1206                   ),
   1207                   mkU32(nnn)
   1208                )
   1209             );
   1210       }
   1211 
   1212       if (isU32(cc_op, X86G_CC_OP_COPY)
   1213           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
   1214          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
   1215          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
   1216          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
   1217          return
   1218             unop(
   1219                Iop_1Uto32,
   1220                binop(
   1221                   Iop_CmpEQ32,
   1222                   binop(
   1223                      Iop_And32,
   1224                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
   1225                      mkU32(1)
   1226                   ),
   1227                   mkU32(nnn)
   1228                )
   1229             );
   1230       }
   1231 
   1232       if (isU32(cc_op, X86G_CC_OP_COPY)
   1233           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
   1234          /* COPY, then P --> extract P from dep1, and test (P == 1). */
   1235          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
   1236          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
   1237          return
   1238             unop(
   1239                Iop_1Uto32,
   1240                binop(
   1241                   Iop_CmpEQ32,
   1242                   binop(
   1243                      Iop_And32,
   1244                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
   1245                      mkU32(1)
   1246                   ),
   1247                   mkU32(nnn)
   1248                )
   1249             );
   1250       }
   1251 
   1252       return NULL;
   1253    }
   1254 
   1255    /* --------- specialising "x86g_calculate_eflags_c" --------- */
   1256 
   1257    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
   1258       /* specialise calls to above "calculate_eflags_c" function */
   1259       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1260       vassert(arity == 4);
   1261       cc_op   = args[0];
   1262       cc_dep1 = args[1];
   1263       cc_dep2 = args[2];
   1264       cc_ndep = args[3];
   1265 
   1266       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
   1267          /* C after sub denotes unsigned less than */
   1268          return unop(Iop_1Uto32,
   1269                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
   1270       }
   1271       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
   1272          /* C after sub denotes unsigned less than */
   1273          return unop(Iop_1Uto32,
   1274                      binop(Iop_CmpLT32U,
   1275                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
   1276                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
   1277       }
   1278       if (isU32(cc_op, X86G_CC_OP_LOGICL)
   1279           || isU32(cc_op, X86G_CC_OP_LOGICW)
   1280           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
   1281          /* cflag after logic is zero */
   1282          return mkU32(0);
   1283       }
   1284       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
   1285          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
   1286          return cc_ndep;
   1287       }
   1288       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1289          /* cflag after COPY is stored in DEP1. */
   1290          return
   1291             binop(
   1292                Iop_And32,
   1293                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1294                mkU32(1)
   1295             );
   1296       }
   1297       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
   1298          /* C after add denotes sum <u either arg */
   1299          return unop(Iop_1Uto32,
   1300                      binop(Iop_CmpLT32U,
   1301                            binop(Iop_Add32, cc_dep1, cc_dep2),
   1302                            cc_dep1));
   1303       }
   1304       // ATC, requires verification, no test case known
   1305       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
   1306       //   /* C after signed widening multiply denotes the case where
   1307       //      the top half of the result isn't simply the sign extension
   1308       //      of the bottom half (iow the result doesn't fit completely
   1309       //      in the bottom half).  Hence:
   1310       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
   1311       //      where 'x' denotes signed widening multiply.*/
   1312       //   return
   1313       //      unop(Iop_1Uto32,
   1314       //           binop(Iop_CmpNE32,
   1315       //                 unop(Iop_64HIto32,
   1316       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
   1317       //                 binop(Iop_Sar32,
   1318       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
   1319       //}
   1320 #     if 0
   1321       if (cc_op->tag == Iex_Const) {
   1322          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
   1323       }
   1324 #     endif
   1325 
   1326       return NULL;
   1327    }
   1328 
   1329    /* --------- specialising "x86g_calculate_eflags_all" --------- */
   1330 
   1331    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
   1332       /* specialise calls to above "calculate_eflags_all" function */
   1333       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
   1334       vassert(arity == 4);
   1335       cc_op   = args[0];
   1336       cc_dep1 = args[1];
   1337       /* cc_dep2 = args[2]; */
   1338       /* cc_ndep = args[3]; */
   1339 
   1340       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1341          /* eflags after COPY are stored in DEP1. */
   1342          return
   1343             binop(
   1344                Iop_And32,
   1345                cc_dep1,
   1346                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
   1347                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
   1348             );
   1349       }
   1350       return NULL;
   1351    }
   1352 
   1353 #  undef unop
   1354 #  undef binop
   1355 #  undef mkU32
   1356 #  undef mkU8
   1357 
   1358    return NULL;
   1359 }
   1360 
   1361 
   1362 /*---------------------------------------------------------------*/
   1363 /*--- Supporting functions for x87 FPU activities.            ---*/
   1364 /*---------------------------------------------------------------*/
   1365 
   1366 static inline Bool host_is_little_endian ( void )
   1367 {
   1368    UInt x = 0x76543210;
   1369    UChar* p = (UChar*)(&x);
   1370    return toBool(*p == 0x10);
   1371 }
   1372 
   1373 /* 80 and 64-bit floating point formats:
   1374 
   1375    80-bit:
   1376 
   1377     S  0       0-------0      zero
   1378     S  0       0X------X      denormals
   1379     S  1-7FFE  1X------X      normals (all normals have leading 1)
   1380     S  7FFF    10------0      infinity
   1381     S  7FFF    10X-----X      snan
   1382     S  7FFF    11X-----X      qnan
   1383 
   1384    S is the sign bit.  For runs X----X, at least one of the Xs must be
   1385    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
   1386    there is an explicitly represented leading 1, and a sign bit,
   1387    giving 80 in total.
   1388 
   1389    64-bit avoids the confusion of an explicitly represented leading 1
   1390    and so is simpler:
   1391 
   1392     S  0      0------0   zero
   1393     S  0      X------X   denormals
   1394     S  1-7FE  any        normals
   1395     S  7FF    0------0   infinity
   1396     S  7FF    0X-----X   snan
   1397     S  7FF    1X-----X   qnan
   1398 
   1399    Exponent is 11 bits, fractional part is 52 bits, and there is a
   1400    sign bit, giving 64 in total.
   1401 */
   1402 
   1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
   1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
   1406 {
   1407    Bool   mantissaIsZero;
   1408    Int    bexp;
   1409    UChar  sign;
   1410    UChar* f64;
   1411 
   1412    vassert(host_is_little_endian());
   1413 
   1414    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
   1415 
   1416    f64  = (UChar*)(&dbl);
   1417    sign = toUChar( (f64[7] >> 7) & 1 );
   1418 
   1419    /* First off, if the tag indicates the register was empty,
   1420       return 1,0,sign,1 */
   1421    if (tag == 0) {
   1422       /* vex_printf("Empty\n"); */
   1423       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
   1424                                  | X86G_FC_MASK_C0;
   1425    }
   1426 
   1427    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
   1428    bexp &= 0x7FF;
   1429 
   1430    mantissaIsZero
   1431       = toBool(
   1432            (f64[6] & 0x0F) == 0
   1433            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
   1434         );
   1435 
   1436    /* If both exponent and mantissa are zero, the value is zero.
   1437       Return 1,0,sign,0. */
   1438    if (bexp == 0 && mantissaIsZero) {
   1439       /* vex_printf("Zero\n"); */
   1440       return X86G_FC_MASK_C3 | 0
   1441                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1442    }
   1443 
   1444    /* If exponent is zero but mantissa isn't, it's a denormal.
   1445       Return 1,1,sign,0. */
   1446    if (bexp == 0 && !mantissaIsZero) {
   1447       /* vex_printf("Denormal\n"); */
   1448       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
   1449                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1450    }
   1451 
   1452    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
   1453       Return 0,1,sign,1. */
   1454    if (bexp == 0x7FF && mantissaIsZero) {
   1455       /* vex_printf("Inf\n"); */
   1456       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
   1457                                  | X86G_FC_MASK_C0;
   1458    }
   1459 
   1460    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
   1461       Return 0,0,sign,1. */
   1462    if (bexp == 0x7FF && !mantissaIsZero) {
   1463       /* vex_printf("NaN\n"); */
   1464       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
   1465    }
   1466 
   1467    /* Uh, ok, we give up.  It must be a normal finite number.
   1468       Return 0,1,sign,0.
   1469    */
   1470    /* vex_printf("normal\n"); */
   1471    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
   1472 }
   1473 
   1474 
   1475 /* CALLED FROM GENERATED CODE */
   1476 /* DIRTY HELPER (reads guest memory) */
   1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
   1478 {
   1479    ULong f64;
   1480    convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
   1481    return f64;
   1482 }
   1483 
   1484 /* CALLED FROM GENERATED CODE */
   1485 /* DIRTY HELPER (writes guest memory) */
   1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
   1487 {
   1488    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
   1489 }
   1490 
   1491 
   1492 /*----------------------------------------------*/
   1493 /*--- The exported fns ..                    ---*/
   1494 /*----------------------------------------------*/
   1495 
   1496 /* Layout of the real x87 state. */
   1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to
   1498    g_generic_x87.h */
   1499 
   1500 
   1501 /* CLEAN HELPER */
   1502 /* fpucw[15:0] contains a x87 native format FPU control word.
   1503    Extract from it the required FPROUND value and any resulting
   1504    emulation warning, and return (warn << 32) | fpround value.
   1505 */
   1506 ULong x86g_check_fldcw ( UInt fpucw )
   1507 {
   1508    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
   1509    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1510    UInt rmode = (fpucw >> 10) & 3;
   1511 
   1512    /* Detect any required emulation warnings. */
   1513    VexEmNote ew = EmNote_NONE;
   1514 
   1515    if ((fpucw & 0x3F) != 0x3F) {
   1516       /* unmasked exceptions! */
   1517       ew = EmWarn_X86_x87exns;
   1518    }
   1519    else
   1520    if (((fpucw >> 8) & 3) != 3) {
   1521       /* unsupported precision */
   1522       ew = EmWarn_X86_x87precision;
   1523    }
   1524 
   1525    return (((ULong)ew) << 32) | ((ULong)rmode);
   1526 }
   1527 
   1528 /* CLEAN HELPER */
   1529 /* Given fpround as an IRRoundingMode value, create a suitable x87
   1530    native format FPU control word. */
   1531 UInt x86g_create_fpucw ( UInt fpround )
   1532 {
   1533    fpround &= 3;
   1534    return 0x037F | (fpround << 10);
   1535 }
   1536 
   1537 
   1538 /* CLEAN HELPER */
   1539 /* mxcsr[15:0] contains a SSE native format MXCSR value.
   1540    Extract from it the required SSEROUND value and any resulting
   1541    emulation warning, and return (warn << 32) | sseround value.
   1542 */
   1543 ULong x86g_check_ldmxcsr ( UInt mxcsr )
   1544 {
   1545    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
   1546    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1547    UInt rmode = (mxcsr >> 13) & 3;
   1548 
   1549    /* Detect any required emulation warnings. */
   1550    VexEmNote ew = EmNote_NONE;
   1551 
   1552    if ((mxcsr & 0x1F80) != 0x1F80) {
   1553       /* unmasked exceptions! */
   1554       ew = EmWarn_X86_sseExns;
   1555    }
   1556    else
   1557    if (mxcsr & (1<<15)) {
   1558       /* FZ is set */
   1559       ew = EmWarn_X86_fz;
   1560    }
   1561    else
   1562    if (mxcsr & (1<<6)) {
   1563       /* DAZ is set */
   1564       ew = EmWarn_X86_daz;
   1565    }
   1566 
   1567    return (((ULong)ew) << 32) | ((ULong)rmode);
   1568 }
   1569 
   1570 
   1571 /* CLEAN HELPER */
   1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE
   1573    native format MXCSR value. */
   1574 UInt x86g_create_mxcsr ( UInt sseround )
   1575 {
   1576    sseround &= 3;
   1577    return 0x1F80 | (sseround << 13);
   1578 }
   1579 
   1580 
   1581 /* CALLED FROM GENERATED CODE */
   1582 /* DIRTY HELPER (writes guest state) */
   1583 /* Initialise the x87 FPU state as per 'finit'. */
   1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
   1585 {
   1586    Int i;
   1587    gst->guest_FTOP = 0;
   1588    for (i = 0; i < 8; i++) {
   1589       gst->guest_FPTAG[i] = 0; /* empty */
   1590       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
   1591    }
   1592    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
   1593    gst->guest_FC3210  = 0;
   1594 }
   1595 
   1596 
   1597 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
   1598    appears to differ from the former only in that the 8 FP registers
   1599    themselves are not transferred into the guest state. */
   1600 static
   1601 VexEmNote do_put_x87 ( Bool moveRegs,
   1602                        /*IN*/UChar* x87_state,
   1603                        /*OUT*/VexGuestX86State* vex_state )
   1604 {
   1605    Int        stno, preg;
   1606    UInt       tag;
   1607    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1608    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1609    Fpu_State* x87     = (Fpu_State*)x87_state;
   1610    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
   1611    UInt       tagw    = x87->env[FP_ENV_TAG];
   1612    UInt       fpucw   = x87->env[FP_ENV_CTRL];
   1613    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
   1614    VexEmNote  ew;
   1615    UInt       fpround;
   1616    ULong      pair;
   1617 
   1618    /* Copy registers and tags */
   1619    for (stno = 0; stno < 8; stno++) {
   1620       preg = (stno + ftop) & 7;
   1621       tag = (tagw >> (2*preg)) & 3;
   1622       if (tag == 3) {
   1623          /* register is empty */
   1624          /* hmm, if it's empty, does it still get written?  Probably
   1625             safer to say it does.  If we don't, memcheck could get out
   1626             of sync, in that it thinks all FP registers are defined by
   1627             this helper, but in reality some have not been updated. */
   1628          if (moveRegs)
   1629             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
   1630          vexTags[preg] = 0;
   1631       } else {
   1632          /* register is non-empty */
   1633          if (moveRegs)
   1634             convert_f80le_to_f64le( &x87->reg[10*stno],
   1635                                     (UChar*)&vexRegs[preg] );
   1636          vexTags[preg] = 1;
   1637       }
   1638    }
   1639 
   1640    /* stack pointer */
   1641    vex_state->guest_FTOP = ftop;
   1642 
   1643    /* status word */
   1644    vex_state->guest_FC3210 = c3210;
   1645 
   1646    /* handle the control word, setting FPROUND and detecting any
   1647       emulation warnings. */
   1648    pair    = x86g_check_fldcw ( (UInt)fpucw );
   1649    fpround = (UInt)pair;
   1650    ew      = (VexEmNote)(pair >> 32);
   1651 
   1652    vex_state->guest_FPROUND = fpround & 3;
   1653 
   1654    /* emulation warnings --> caller */
   1655    return ew;
   1656 }
   1657 
   1658 
   1659 /* Create an x87 FPU state from the guest state, as close as
   1660    we can approximate it. */
   1661 static
   1662 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
   1663                   /*OUT*/UChar* x87_state )
   1664 {
   1665    Int        i, stno, preg;
   1666    UInt       tagw;
   1667    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1668    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1669    Fpu_State* x87     = (Fpu_State*)x87_state;
   1670    UInt       ftop    = vex_state->guest_FTOP;
   1671    UInt       c3210   = vex_state->guest_FC3210;
   1672 
   1673    for (i = 0; i < 14; i++)
   1674       x87->env[i] = 0;
   1675 
   1676    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
   1677    x87->env[FP_ENV_STAT]
   1678       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
   1679    x87->env[FP_ENV_CTRL]
   1680       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
   1681 
   1682    /* Dump the register stack in ST order. */
   1683    tagw = 0;
   1684    for (stno = 0; stno < 8; stno++) {
   1685       preg = (stno + ftop) & 7;
   1686       if (vexTags[preg] == 0) {
   1687          /* register is empty */
   1688          tagw |= (3 << (2*preg));
   1689          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1690                                  &x87->reg[10*stno] );
   1691       } else {
   1692          /* register is full. */
   1693          tagw |= (0 << (2*preg));
   1694          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1695                                  &x87->reg[10*stno] );
   1696       }
   1697    }
   1698    x87->env[FP_ENV_TAG] = toUShort(tagw);
   1699 }
   1700 
   1701 
   1702 /* CALLED FROM GENERATED CODE */
   1703 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1704 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
   1705 {
   1706    /* Somewhat roundabout, but at least it's simple. */
   1707    Fpu_State tmp;
   1708    UShort*   addrS = (UShort*)addr;
   1709    UChar*    addrC = (UChar*)addr;
   1710    U128*     xmm   = (U128*)(addr + 160);
   1711    UInt      mxcsr;
   1712    UShort    fp_tags;
   1713    UInt      summary_tags;
   1714    Int       r, stno;
   1715    UShort    *srcS, *dstS;
   1716 
   1717    do_get_x87( gst, (UChar*)&tmp );
   1718    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
   1719 
   1720    /* Now build the proper fxsave image from the x87 image we just
   1721       made. */
   1722 
   1723    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
   1724    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
   1725 
   1726    /* set addrS[2] in an endian-independent way */
   1727    summary_tags = 0;
   1728    fp_tags = tmp.env[FP_ENV_TAG];
   1729    for (r = 0; r < 8; r++) {
   1730       if ( ((fp_tags >> (2*r)) & 3) != 3 )
   1731          summary_tags |= (1 << r);
   1732    }
   1733    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
   1734    addrC[5]  = 0; /* pad */
   1735 
   1736    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
   1737    addrS[4]  = 0;
   1738    addrS[5]  = 0; /* FPU IP (bogus) */
   1739    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
   1740                      could conceivably dump %CS here) */
   1741 
   1742    addrS[7]  = 0; /* Intel reserved */
   1743 
   1744    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
   1745    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
   1746    addrS[10] = 0; /* segment selector for above operand pointer; %DS
   1747                      perhaps? */
   1748    addrS[11] = 0; /* Intel reserved */
   1749 
   1750    addrS[12] = toUShort(mxcsr);  /* MXCSR */
   1751    addrS[13] = toUShort(mxcsr >> 16);
   1752 
   1753    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
   1754    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
   1755 
   1756    /* Copy in the FP registers, in ST order. */
   1757    for (stno = 0; stno < 8; stno++) {
   1758       srcS = (UShort*)(&tmp.reg[10*stno]);
   1759       dstS = (UShort*)(&addrS[16 + 8*stno]);
   1760       dstS[0] = srcS[0];
   1761       dstS[1] = srcS[1];
   1762       dstS[2] = srcS[2];
   1763       dstS[3] = srcS[3];
   1764       dstS[4] = srcS[4];
   1765       dstS[5] = 0;
   1766       dstS[6] = 0;
   1767       dstS[7] = 0;
   1768    }
   1769 
   1770    /* That's the first 160 bytes of the image done.  Now only %xmm0
   1771       .. %xmm7 remain to be copied.  If the host is big-endian, these
   1772       need to be byte-swapped. */
   1773    vassert(host_is_little_endian());
   1774 
   1775 #  define COPY_U128(_dst,_src)                       \
   1776       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1777            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1778       while (0)
   1779 
   1780    COPY_U128( xmm[0], gst->guest_XMM0 );
   1781    COPY_U128( xmm[1], gst->guest_XMM1 );
   1782    COPY_U128( xmm[2], gst->guest_XMM2 );
   1783    COPY_U128( xmm[3], gst->guest_XMM3 );
   1784    COPY_U128( xmm[4], gst->guest_XMM4 );
   1785    COPY_U128( xmm[5], gst->guest_XMM5 );
   1786    COPY_U128( xmm[6], gst->guest_XMM6 );
   1787    COPY_U128( xmm[7], gst->guest_XMM7 );
   1788 
   1789 #  undef COPY_U128
   1790 }
   1791 
   1792 
   1793 /* CALLED FROM GENERATED CODE */
   1794 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1795 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
   1796 {
   1797    Fpu_State tmp;
   1798    VexEmNote warnX87 = EmNote_NONE;
   1799    VexEmNote warnXMM = EmNote_NONE;
   1800    UShort*   addrS   = (UShort*)addr;
   1801    UChar*    addrC   = (UChar*)addr;
   1802    U128*     xmm     = (U128*)(addr + 160);
   1803    UShort    fp_tags;
   1804    Int       r, stno, i;
   1805 
   1806    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
   1807       to be byte-swapped. */
   1808    vassert(host_is_little_endian());
   1809 
   1810 #  define COPY_U128(_dst,_src)                       \
   1811       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1812            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1813       while (0)
   1814 
   1815    COPY_U128( gst->guest_XMM0, xmm[0] );
   1816    COPY_U128( gst->guest_XMM1, xmm[1] );
   1817    COPY_U128( gst->guest_XMM2, xmm[2] );
   1818    COPY_U128( gst->guest_XMM3, xmm[3] );
   1819    COPY_U128( gst->guest_XMM4, xmm[4] );
   1820    COPY_U128( gst->guest_XMM5, xmm[5] );
   1821    COPY_U128( gst->guest_XMM6, xmm[6] );
   1822    COPY_U128( gst->guest_XMM7, xmm[7] );
   1823 
   1824 #  undef COPY_U128
   1825 
   1826    /* Copy the x87 registers out of the image, into a temporary
   1827       Fpu_State struct. */
   1828 
   1829    /* LLVM on Darwin turns the following loop into a movaps plus a
   1830       handful of scalar stores.  This would work fine except for the
   1831       fact that VEX doesn't keep the stack correctly (16-) aligned for
   1832       the call, so it segfaults.  Hence, split the loop into two
   1833       pieces (and pray LLVM doesn't merely glue them back together) so
   1834       it's composed only of scalar stores and so is alignment
   1835       insensitive.  Of course this is a kludge of the lamest kind --
   1836       VEX should be fixed properly. */
   1837    /* Code that seems to trigger the problem:
   1838       for (i = 0; i < 14; i++) tmp.env[i] = 0; */
   1839    for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
   1840    __asm__ __volatile__("" ::: "memory");
   1841    for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
   1842 
   1843    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
   1844    /* fill in tmp.reg[0..7] */
   1845    for (stno = 0; stno < 8; stno++) {
   1846       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
   1847       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
   1848       dstS[0] = srcS[0];
   1849       dstS[1] = srcS[1];
   1850       dstS[2] = srcS[2];
   1851       dstS[3] = srcS[3];
   1852       dstS[4] = srcS[4];
   1853    }
   1854    /* fill in tmp.env[0..13] */
   1855    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
   1856    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
   1857 
   1858    fp_tags = 0;
   1859    for (r = 0; r < 8; r++) {
   1860       if (addrC[4] & (1<<r))
   1861          fp_tags |= (0 << (2*r)); /* EMPTY */
   1862       else
   1863          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
   1864    }
   1865    tmp.env[FP_ENV_TAG] = fp_tags;
   1866 
   1867    /* Now write 'tmp' into the guest state. */
   1868    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
   1869 
   1870    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
   1871                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
   1872      ULong w64 = x86g_check_ldmxcsr( w32 );
   1873 
   1874      warnXMM = (VexEmNote)(w64 >> 32);
   1875 
   1876      gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
   1877    }
   1878 
   1879    /* Prefer an X87 emwarn over an XMM one, if both exist. */
   1880    if (warnX87 != EmNote_NONE)
   1881       return warnX87;
   1882    else
   1883       return warnXMM;
   1884 }
   1885 
   1886 
   1887 /* CALLED FROM GENERATED CODE */
   1888 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1889 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
   1890 {
   1891    do_get_x87( gst, (UChar*)addr );
   1892 }
   1893 
   1894 /* CALLED FROM GENERATED CODE */
   1895 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1896 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
   1897 {
   1898    return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
   1899 }
   1900 
   1901 /* CALLED FROM GENERATED CODE */
   1902 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1903 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
   1904 {
   1905    /* Somewhat roundabout, but at least it's simple. */
   1906    Int       i;
   1907    UShort*   addrP = (UShort*)addr;
   1908    Fpu_State tmp;
   1909    do_get_x87( gst, (UChar*)&tmp );
   1910    for (i = 0; i < 14; i++)
   1911       addrP[i] = tmp.env[i];
   1912 }
   1913 
   1914 /* CALLED FROM GENERATED CODE */
   1915 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1916 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
   1917 {
   1918    return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
   1919 }
   1920 
   1921 /* VISIBLE TO LIBVEX CLIENT */
   1922 /* Do x87 save from the supplied VexGuestX86State structure and store the
   1923    result at the given address which represents a buffer of at least 108
   1924    bytes. */
   1925 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
   1926                                /*OUT*/UChar* x87_state )
   1927 {
   1928    do_get_x87 ( vex_state, x87_state );
   1929 }
   1930 
   1931 /* VISIBLE TO LIBVEX CLIENT */
   1932 /* Do x87 restore from the supplied address and store read values to the given
   1933    VexGuestX86State structure. */
   1934 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
   1935                                     /*MOD*/VexGuestX86State* vex_state )
   1936 {
   1937    return do_put_x87 ( True/*moveRegs*/, x87_state, vex_state );
   1938 }
   1939 
   1940 /* VISIBLE TO LIBVEX CLIENT */
   1941 /* Return mxcsr from the supplied VexGuestX86State structure. */
   1942 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
   1943 {
   1944    return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
   1945 }
   1946 
   1947 /* VISIBLE TO LIBVEX CLIENT */
   1948 /* Modify the given VexGuestX86State structure according to the passed mxcsr
   1949    value. */
   1950 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
   1951                                       /*MOD*/VexGuestX86State* vex_state)
   1952 {
   1953    ULong w64 = x86g_check_ldmxcsr( mxcsr );
   1954    vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
   1955    return (VexEmNote)(w64 >> 32);
   1956 }
   1957 
   1958 /*---------------------------------------------------------------*/
   1959 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
   1960 /*---------------------------------------------------------------*/
   1961 
   1962 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1963 /* Calculate both flags and value result for rotate right
   1964    through the carry bit.  Result in low 32 bits,
   1965    new flags (OSZACP) in high 32 bits.
   1966 */
   1967 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   1968 {
   1969    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   1970 
   1971    switch (sz) {
   1972       case 4:
   1973          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1974          of        = ((arg >> 31) ^ cf) & 1;
   1975          while (tempCOUNT > 0) {
   1976             tempcf = arg & 1;
   1977             arg    = (arg >> 1) | (cf << 31);
   1978             cf     = tempcf;
   1979             tempCOUNT--;
   1980          }
   1981          break;
   1982       case 2:
   1983          while (tempCOUNT >= 17) tempCOUNT -= 17;
   1984          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1985          of        = ((arg >> 15) ^ cf) & 1;
   1986          while (tempCOUNT > 0) {
   1987             tempcf = arg & 1;
   1988             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
   1989             cf     = tempcf;
   1990             tempCOUNT--;
   1991          }
   1992          break;
   1993       case 1:
   1994          while (tempCOUNT >= 9) tempCOUNT -= 9;
   1995          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1996          of        = ((arg >> 7) ^ cf) & 1;
   1997          while (tempCOUNT > 0) {
   1998             tempcf = arg & 1;
   1999             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
   2000             cf     = tempcf;
   2001             tempCOUNT--;
   2002          }
   2003          break;
   2004       default:
   2005          vpanic("calculate_RCR: invalid size");
   2006    }
   2007 
   2008    cf &= 1;
   2009    of &= 1;
   2010    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   2011    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   2012 
   2013    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   2014 }
   2015 
   2016 
   2017 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2018 /* Calculate both flags and value result for rotate left
   2019    through the carry bit.  Result in low 32 bits,
   2020    new flags (OSZACP) in high 32 bits.
   2021 */
   2022 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   2023 {
   2024    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   2025 
   2026    switch (sz) {
   2027       case 4:
   2028          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   2029          while (tempCOUNT > 0) {
   2030             tempcf = (arg >> 31) & 1;
   2031             arg    = (arg << 1) | (cf & 1);
   2032             cf     = tempcf;
   2033             tempCOUNT--;
   2034          }
   2035          of = ((arg >> 31) ^ cf) & 1;
   2036          break;
   2037       case 2:
   2038          while (tempCOUNT >= 17) tempCOUNT -= 17;
   2039          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   2040          while (tempCOUNT > 0) {
   2041             tempcf = (arg >> 15) & 1;
   2042             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
   2043             cf     = tempcf;
   2044             tempCOUNT--;
   2045          }
   2046          of = ((arg >> 15) ^ cf) & 1;
   2047          break;
   2048       case 1:
   2049          while (tempCOUNT >= 9) tempCOUNT -= 9;
   2050          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   2051          while (tempCOUNT > 0) {
   2052             tempcf = (arg >> 7) & 1;
   2053             arg    = 0xFF & ((arg << 1) | (cf & 1));
   2054             cf     = tempcf;
   2055             tempCOUNT--;
   2056          }
   2057          of = ((arg >> 7) ^ cf) & 1;
   2058          break;
   2059       default:
   2060          vpanic("calculate_RCL: invalid size");
   2061    }
   2062 
   2063    cf &= 1;
   2064    of &= 1;
   2065    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   2066    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   2067 
   2068    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   2069 }
   2070 
   2071 
   2072 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2073 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
   2074    AX value in low half of arg, OSZACP in upper half.
   2075    See guest-x86/toIR.c usage point for details.
   2076 */
   2077 static UInt calc_parity_8bit ( UInt w32 ) {
   2078    UInt i;
   2079    UInt p = 1;
   2080    for (i = 0; i < 8; i++)
   2081       p ^= (1 & (w32 >> i));
   2082    return p;
   2083 }
   2084 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
   2085 {
   2086    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   2087    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2088    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2089    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2090    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2091    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2092    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2093    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2094    UInt result = 0;
   2095 
   2096    switch (opcode) {
   2097       case 0x27: { /* DAA */
   2098          UInt old_AL = r_AL;
   2099          UInt old_C  = r_C;
   2100          r_C = 0;
   2101          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2102             r_AL = r_AL + 6;
   2103             r_C  = old_C;
   2104             if (r_AL >= 0x100) r_C = 1;
   2105             r_A = 1;
   2106          } else {
   2107             r_A = 0;
   2108          }
   2109          if (old_AL > 0x99 || old_C == 1) {
   2110             r_AL = r_AL + 0x60;
   2111             r_C  = 1;
   2112          } else {
   2113             r_C = 0;
   2114          }
   2115          /* O is undefined.  S Z and P are set according to the
   2116 	    result. */
   2117          r_AL &= 0xFF;
   2118          r_O = 0; /* let's say */
   2119          r_S = (r_AL & 0x80) ? 1 : 0;
   2120          r_Z = (r_AL == 0) ? 1 : 0;
   2121          r_P = calc_parity_8bit( r_AL );
   2122          break;
   2123       }
   2124       case 0x2F: { /* DAS */
   2125          UInt old_AL = r_AL;
   2126          UInt old_C  = r_C;
   2127          r_C = 0;
   2128          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2129             Bool borrow = r_AL < 6;
   2130             r_AL = r_AL - 6;
   2131             r_C  = old_C;
   2132             if (borrow) r_C = 1;
   2133             r_A = 1;
   2134          } else {
   2135             r_A = 0;
   2136          }
   2137          if (old_AL > 0x99 || old_C == 1) {
   2138             r_AL = r_AL - 0x60;
   2139             r_C  = 1;
   2140          } else {
   2141             /* Intel docs are wrong: r_C = 0; */
   2142          }
   2143          /* O is undefined.  S Z and P are set according to the
   2144 	    result. */
   2145          r_AL &= 0xFF;
   2146          r_O = 0; /* let's say */
   2147          r_S = (r_AL & 0x80) ? 1 : 0;
   2148          r_Z = (r_AL == 0) ? 1 : 0;
   2149          r_P = calc_parity_8bit( r_AL );
   2150          break;
   2151       }
   2152       case 0x37: { /* AAA */
   2153          Bool nudge = r_AL > 0xF9;
   2154          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2155             r_AL = r_AL + 6;
   2156             r_AH = r_AH + 1 + (nudge ? 1 : 0);
   2157             r_A  = 1;
   2158             r_C  = 1;
   2159             r_AL = r_AL & 0xF;
   2160          } else {
   2161             r_A  = 0;
   2162             r_C  = 0;
   2163             r_AL = r_AL & 0xF;
   2164          }
   2165          /* O S Z and P are undefined. */
   2166          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2167          break;
   2168       }
   2169       case 0x3F: { /* AAS */
   2170          Bool nudge = r_AL < 0x06;
   2171          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2172             r_AL = r_AL - 6;
   2173             r_AH = r_AH - 1 - (nudge ? 1 : 0);
   2174             r_A  = 1;
   2175             r_C  = 1;
   2176             r_AL = r_AL & 0xF;
   2177          } else {
   2178             r_A  = 0;
   2179             r_C  = 0;
   2180             r_AL = r_AL & 0xF;
   2181          }
   2182          /* O S Z and P are undefined. */
   2183          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2184          break;
   2185       }
   2186       default:
   2187          vassert(0);
   2188    }
   2189    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2190             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2191             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2192             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2193             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2194             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2195             | ( (r_AH & 0xFF) << 8 )
   2196             | ( (r_AL & 0xFF) << 0 );
   2197    return result;
   2198 }
   2199 
   2200 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
   2201 {
   2202    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   2203    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2204    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2205    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2206    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2207    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2208    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2209    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2210    UInt result = 0;
   2211 
   2212    switch (opcode) {
   2213       case 0xD4: { /* AAM */
   2214          r_AH = r_AL / 10;
   2215          r_AL = r_AL % 10;
   2216          break;
   2217       }
   2218       case 0xD5: { /* AAD */
   2219          r_AL = ((r_AH * 10) + r_AL) & 0xff;
   2220          r_AH = 0;
   2221          break;
   2222       }
   2223       default:
   2224          vassert(0);
   2225    }
   2226 
   2227    r_O = 0; /* let's say (undefined) */
   2228    r_C = 0; /* let's say (undefined) */
   2229    r_A = 0; /* let's say (undefined) */
   2230    r_S = (r_AL & 0x80) ? 1 : 0;
   2231    r_Z = (r_AL == 0) ? 1 : 0;
   2232    r_P = calc_parity_8bit( r_AL );
   2233 
   2234    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2235             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2236             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2237             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2238             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2239             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2240             | ( (r_AH & 0xFF) << 8 )
   2241             | ( (r_AL & 0xFF) << 0 );
   2242    return result;
   2243 }
   2244 
   2245 
   2246 /* CALLED FROM GENERATED CODE */
   2247 /* DIRTY HELPER (non-referentially-transparent) */
   2248 /* Horrible hack.  On non-x86 platforms, return 1. */
   2249 ULong x86g_dirtyhelper_RDTSC ( void )
   2250 {
   2251 #  if defined(__i386__)
   2252    ULong res;
   2253    __asm__ __volatile__("rdtsc" : "=A" (res));
   2254    return res;
   2255 #  else
   2256    return 1ULL;
   2257 #  endif
   2258 }
   2259 
   2260 
   2261 /* CALLED FROM GENERATED CODE */
   2262 /* DIRTY HELPER (modifies guest state) */
   2263 /* Claim to be a P55C (Intel Pentium/MMX) */
   2264 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
   2265 {
   2266    switch (st->guest_EAX) {
   2267       case 0:
   2268          st->guest_EAX = 0x1;
   2269          st->guest_EBX = 0x756e6547;
   2270          st->guest_ECX = 0x6c65746e;
   2271          st->guest_EDX = 0x49656e69;
   2272          break;
   2273       default:
   2274          st->guest_EAX = 0x543;
   2275          st->guest_EBX = 0x0;
   2276          st->guest_ECX = 0x0;
   2277          st->guest_EDX = 0x8001bf;
   2278          break;
   2279    }
   2280 }
   2281 
   2282 /* CALLED FROM GENERATED CODE */
   2283 /* DIRTY HELPER (modifies guest state) */
   2284 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
   2285 /* But without 3DNow support (weird, but we really don't support it). */
   2286 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
   2287 {
   2288    switch (st->guest_EAX) {
   2289       /* vendor ID */
   2290       case 0:
   2291          st->guest_EAX = 0x1;
   2292          st->guest_EBX = 0x68747541;
   2293          st->guest_ECX = 0x444d4163;
   2294          st->guest_EDX = 0x69746e65;
   2295          break;
   2296       /* feature bits */
   2297       case 1:
   2298          st->guest_EAX = 0x621;
   2299          st->guest_EBX = 0x0;
   2300          st->guest_ECX = 0x0;
   2301          st->guest_EDX = 0x183f9ff;
   2302          break;
   2303       /* Highest Extended Function Supported (0x80000004 brand string) */
   2304       case 0x80000000:
   2305          st->guest_EAX = 0x80000004;
   2306          st->guest_EBX = 0x68747541;
   2307          st->guest_ECX = 0x444d4163;
   2308          st->guest_EDX = 0x69746e65;
   2309          break;
   2310       /* Extended Processor Info and Feature Bits */
   2311       case 0x80000001:
   2312          st->guest_EAX = 0x721;
   2313          st->guest_EBX = 0x0;
   2314          st->guest_ECX = 0x0;
   2315          st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
   2316          break;
   2317       /* Processor Brand String "AMD Athlon(tm) Processor" */
   2318       case 0x80000002:
   2319          st->guest_EAX = 0x20444d41;
   2320          st->guest_EBX = 0x6c687441;
   2321          st->guest_ECX = 0x74286e6f;
   2322          st->guest_EDX = 0x5020296d;
   2323          break;
   2324       case 0x80000003:
   2325          st->guest_EAX = 0x65636f72;
   2326          st->guest_EBX = 0x726f7373;
   2327          st->guest_ECX = 0x0;
   2328          st->guest_EDX = 0x0;
   2329          break;
   2330       default:
   2331          st->guest_EAX = 0x0;
   2332          st->guest_EBX = 0x0;
   2333          st->guest_ECX = 0x0;
   2334          st->guest_EDX = 0x0;
   2335          break;
   2336    }
   2337 }
   2338 
   2339 /* CALLED FROM GENERATED CODE */
   2340 /* DIRTY HELPER (modifies guest state) */
   2341 /* Claim to be the following SSE1-capable CPU:
   2342    vendor_id       : GenuineIntel
   2343    cpu family      : 6
   2344    model           : 11
   2345    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
   2346    stepping        : 1
   2347    cpu MHz         : 1131.013
   2348    cache size      : 512 KB
   2349 */
   2350 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
   2351 {
   2352    switch (st->guest_EAX) {
   2353       case 0:
   2354          st->guest_EAX = 0x00000002;
   2355          st->guest_EBX = 0x756e6547;
   2356          st->guest_ECX = 0x6c65746e;
   2357          st->guest_EDX = 0x49656e69;
   2358          break;
   2359       case 1:
   2360          st->guest_EAX = 0x000006b1;
   2361          st->guest_EBX = 0x00000004;
   2362          st->guest_ECX = 0x00000000;
   2363          st->guest_EDX = 0x0383fbff;
   2364          break;
   2365       default:
   2366          st->guest_EAX = 0x03020101;
   2367          st->guest_EBX = 0x00000000;
   2368          st->guest_ECX = 0x00000000;
   2369          st->guest_EDX = 0x0c040883;
   2370          break;
   2371    }
   2372 }
   2373 
   2374 /* Claim to be the following SSE2-capable CPU:
   2375    vendor_id    : GenuineIntel
   2376    cpu family   : 15
   2377    model        : 2
   2378    model name   : Intel(R) Pentium(R) 4 CPU 3.00GHz
   2379    stepping     : 9
   2380    microcode    : 0x17
   2381    cpu MHz      : 2992.577
   2382    cache size   : 512 KB
   2383    flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
   2384                   pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
   2385                    pebs bts cid xtpr
   2386    clflush size : 64
   2387    cache_alignment : 128
   2388    address sizes : 36 bits physical, 32 bits virtual
   2389 */
   2390 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
   2391 {
   2392    switch (st->guest_EAX) {
   2393       case 0:
   2394          st->guest_EAX = 0x00000002;
   2395          st->guest_EBX = 0x756e6547;
   2396          st->guest_ECX = 0x6c65746e;
   2397          st->guest_EDX = 0x49656e69;
   2398          break;
   2399       case 1:
   2400          st->guest_EAX = 0x00000f29;
   2401          st->guest_EBX = 0x01020809;
   2402          st->guest_ECX = 0x00004400;
   2403          st->guest_EDX = 0xbfebfbff;
   2404          break;
   2405       default:
   2406          st->guest_EAX = 0x03020101;
   2407          st->guest_EBX = 0x00000000;
   2408          st->guest_ECX = 0x00000000;
   2409          st->guest_EDX = 0x0c040883;
   2410          break;
   2411    }
   2412 }
   2413 
   2414 /* Claim to be the following SSSE3-capable CPU (2 x ...):
   2415    vendor_id       : GenuineIntel
   2416    cpu family      : 6
   2417    model           : 15
   2418    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
   2419    stepping        : 6
   2420    cpu MHz         : 2394.000
   2421    cache size      : 4096 KB
   2422    physical id     : 0
   2423    siblings        : 2
   2424    core id         : 0
   2425    cpu cores       : 2
   2426    fpu             : yes
   2427    fpu_exception   : yes
   2428    cpuid level     : 10
   2429    wp              : yes
   2430    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2431                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2432                      mmx fxsr sse sse2 ss ht tm syscall nx lm
   2433                      constant_tsc pni monitor ds_cpl vmx est tm2
   2434                      cx16 xtpr lahf_lm
   2435    bogomips        : 4798.78
   2436    clflush size    : 64
   2437    cache_alignment : 64
   2438    address sizes   : 36 bits physical, 48 bits virtual
   2439    power management:
   2440 */
   2441 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
   2442 {
   2443 #  define SET_ABCD(_a,_b,_c,_d)               \
   2444       do { st->guest_EAX = (UInt)(_a);        \
   2445            st->guest_EBX = (UInt)(_b);        \
   2446            st->guest_ECX = (UInt)(_c);        \
   2447            st->guest_EDX = (UInt)(_d);        \
   2448       } while (0)
   2449 
   2450    switch (st->guest_EAX) {
   2451       case 0x00000000:
   2452          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
   2453          break;
   2454       case 0x00000001:
   2455          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
   2456          break;
   2457       case 0x00000002:
   2458          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
   2459          break;
   2460       case 0x00000003:
   2461          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2462          break;
   2463       case 0x00000004: {
   2464          switch (st->guest_ECX) {
   2465             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
   2466                                       0x0000003f, 0x00000001); break;
   2467             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
   2468                                       0x0000003f, 0x00000001); break;
   2469             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
   2470                                       0x00000fff, 0x00000001); break;
   2471             default:         SET_ABCD(0x00000000, 0x00000000,
   2472                                       0x00000000, 0x00000000); break;
   2473          }
   2474          break;
   2475       }
   2476       case 0x00000005:
   2477          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
   2478          break;
   2479       case 0x00000006:
   2480          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
   2481          break;
   2482       case 0x00000007:
   2483          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2484          break;
   2485       case 0x00000008:
   2486          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
   2487          break;
   2488       case 0x00000009:
   2489          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2490          break;
   2491       case 0x0000000a:
   2492       unhandled_eax_value:
   2493          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
   2494          break;
   2495       case 0x80000000:
   2496          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2497          break;
   2498       case 0x80000001:
   2499          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
   2500          break;
   2501       case 0x80000002:
   2502          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2503          break;
   2504       case 0x80000003:
   2505          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
   2506          break;
   2507       case 0x80000004:
   2508          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
   2509          break;
   2510       case 0x80000005:
   2511          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2512          break;
   2513       case 0x80000006:
   2514          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
   2515          break;
   2516       case 0x80000007:
   2517          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2518          break;
   2519       case 0x80000008:
   2520          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2521          break;
   2522       default:
   2523          goto unhandled_eax_value;
   2524    }
   2525 #  undef SET_ABCD
   2526 }
   2527 
   2528 
   2529 /* CALLED FROM GENERATED CODE */
   2530 /* DIRTY HELPER (non-referentially-transparent) */
   2531 /* Horrible hack.  On non-x86 platforms, return 0. */
   2532 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
   2533 {
   2534 #  if defined(__i386__)
   2535    UInt r = 0;
   2536    portno &= 0xFFFF;
   2537    switch (sz) {
   2538       case 4:
   2539          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
   2540                               : "=a" (r) : "Nd" (portno));
   2541 	 break;
   2542       case 2:
   2543          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
   2544                               : "=a" (r) : "Nd" (portno));
   2545 	 break;
   2546       case 1:
   2547          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
   2548                               : "=a" (r) : "Nd" (portno));
   2549 	 break;
   2550       default:
   2551          break;
   2552    }
   2553    return r;
   2554 #  else
   2555    return 0;
   2556 #  endif
   2557 }
   2558 
   2559 
   2560 /* CALLED FROM GENERATED CODE */
   2561 /* DIRTY HELPER (non-referentially-transparent) */
   2562 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2563 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
   2564 {
   2565 #  if defined(__i386__)
   2566    portno &= 0xFFFF;
   2567    switch (sz) {
   2568       case 4:
   2569          __asm__ __volatile__("outl %0, %w1"
   2570                               : : "a" (data), "Nd" (portno));
   2571 	 break;
   2572       case 2:
   2573          __asm__ __volatile__("outw %w0, %w1"
   2574                               : : "a" (data), "Nd" (portno));
   2575 	 break;
   2576       case 1:
   2577          __asm__ __volatile__("outb %b0, %w1"
   2578                               : : "a" (data), "Nd" (portno));
   2579 	 break;
   2580       default:
   2581          break;
   2582    }
   2583 #  else
   2584    /* do nothing */
   2585 #  endif
   2586 }
   2587 
   2588 /* CALLED FROM GENERATED CODE */
   2589 /* DIRTY HELPER (non-referentially-transparent) */
   2590 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2591 /* op = 0: call the native SGDT instruction.
   2592    op = 1: call the native SIDT instruction.
   2593 */
   2594 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
   2595 #  if defined(__i386__)
   2596    switch (op) {
   2597       case 0:
   2598          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
   2599          break;
   2600       case 1:
   2601          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
   2602          break;
   2603       default:
   2604          vpanic("x86g_dirtyhelper_SxDT");
   2605    }
   2606 #  else
   2607    /* do nothing */
   2608    UChar* p = (UChar*)address;
   2609    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
   2610 #  endif
   2611 }
   2612 
   2613 /*---------------------------------------------------------------*/
   2614 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
   2615 /*---------------------------------------------------------------*/
   2616 
   2617 static inline UChar abdU8 ( UChar xx, UChar yy ) {
   2618    return toUChar(xx>yy ? xx-yy : yy-xx);
   2619 }
   2620 
   2621 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
   2622    return (((ULong)w1) << 32) | ((ULong)w0);
   2623 }
   2624 
   2625 static inline UShort sel16x4_3 ( ULong w64 ) {
   2626    UInt hi32 = toUInt(w64 >> 32);
   2627    return toUShort(hi32 >> 16);
   2628 }
   2629 static inline UShort sel16x4_2 ( ULong w64 ) {
   2630    UInt hi32 = toUInt(w64 >> 32);
   2631    return toUShort(hi32);
   2632 }
   2633 static inline UShort sel16x4_1 ( ULong w64 ) {
   2634    UInt lo32 = toUInt(w64);
   2635    return toUShort(lo32 >> 16);
   2636 }
   2637 static inline UShort sel16x4_0 ( ULong w64 ) {
   2638    UInt lo32 = toUInt(w64);
   2639    return toUShort(lo32);
   2640 }
   2641 
   2642 static inline UChar sel8x8_7 ( ULong w64 ) {
   2643    UInt hi32 = toUInt(w64 >> 32);
   2644    return toUChar(hi32 >> 24);
   2645 }
   2646 static inline UChar sel8x8_6 ( ULong w64 ) {
   2647    UInt hi32 = toUInt(w64 >> 32);
   2648    return toUChar(hi32 >> 16);
   2649 }
   2650 static inline UChar sel8x8_5 ( ULong w64 ) {
   2651    UInt hi32 = toUInt(w64 >> 32);
   2652    return toUChar(hi32 >> 8);
   2653 }
   2654 static inline UChar sel8x8_4 ( ULong w64 ) {
   2655    UInt hi32 = toUInt(w64 >> 32);
   2656    return toUChar(hi32 >> 0);
   2657 }
   2658 static inline UChar sel8x8_3 ( ULong w64 ) {
   2659    UInt lo32 = toUInt(w64);
   2660    return toUChar(lo32 >> 24);
   2661 }
   2662 static inline UChar sel8x8_2 ( ULong w64 ) {
   2663    UInt lo32 = toUInt(w64);
   2664    return toUChar(lo32 >> 16);
   2665 }
   2666 static inline UChar sel8x8_1 ( ULong w64 ) {
   2667    UInt lo32 = toUInt(w64);
   2668    return toUChar(lo32 >> 8);
   2669 }
   2670 static inline UChar sel8x8_0 ( ULong w64 ) {
   2671    UInt lo32 = toUInt(w64);
   2672    return toUChar(lo32 >> 0);
   2673 }
   2674 
   2675 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2676 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
   2677 {
   2678    return
   2679       mk32x2(
   2680          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
   2681             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
   2682          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
   2683             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
   2684       );
   2685 }
   2686 
   2687 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2688 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
   2689 {
   2690    UInt t = 0;
   2691    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
   2692    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
   2693    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
   2694    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
   2695    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
   2696    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
   2697    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
   2698    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
   2699    t &= 0xFFFF;
   2700    return (ULong)t;
   2701 }
   2702 
   2703 
   2704 /*---------------------------------------------------------------*/
   2705 /*--- Helpers for dealing with segment overrides.             ---*/
   2706 /*---------------------------------------------------------------*/
   2707 
   2708 static inline
   2709 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
   2710 {
   2711    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
   2712    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
   2713    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
   2714    return (hi << 24) | (mid << 16) | lo;
   2715 }
   2716 
   2717 static inline
   2718 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
   2719 {
   2720     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
   2721     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
   2722     UInt limit = (hi << 16) | lo;
   2723     if (ent->LdtEnt.Bits.Granularity)
   2724        limit = (limit << 12) | 0xFFF;
   2725     return limit;
   2726 }
   2727 
   2728 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2729 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
   2730                               UInt seg_selector, UInt virtual_addr )
   2731 {
   2732    UInt tiBit, base, limit;
   2733    VexGuestX86SegDescr* the_descrs;
   2734 
   2735    Bool verboze = False;
   2736 
   2737    /* If this isn't true, we're in Big Trouble. */
   2738    vassert(8 == sizeof(VexGuestX86SegDescr));
   2739 
   2740    if (verboze)
   2741       vex_printf("x86h_use_seg_selector: "
   2742                  "seg_selector = 0x%x, vaddr = 0x%x\n",
   2743                  seg_selector, virtual_addr);
   2744 
   2745    /* Check for wildly invalid selector. */
   2746    if (seg_selector & ~0xFFFF)
   2747       goto bad;
   2748 
   2749    seg_selector &= 0x0000FFFF;
   2750 
   2751    /* Sanity check the segment selector.  Ensure that RPL=11b (least
   2752       privilege).  This forms the bottom 2 bits of the selector. */
   2753    if ((seg_selector & 3) != 3)
   2754       goto bad;
   2755 
   2756    /* Extract the TI bit (0 means GDT, 1 means LDT) */
   2757    tiBit = (seg_selector >> 2) & 1;
   2758 
   2759    /* Convert the segment selector onto a table index */
   2760    seg_selector >>= 3;
   2761    vassert(seg_selector >= 0 && seg_selector < 8192);
   2762 
   2763    if (tiBit == 0) {
   2764 
   2765       /* GDT access. */
   2766       /* Do we actually have a GDT to look at? */
   2767       if (gdt == 0)
   2768          goto bad;
   2769 
   2770       /* Check for access to non-existent entry. */
   2771       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
   2772          goto bad;
   2773 
   2774       the_descrs = (VexGuestX86SegDescr*)gdt;
   2775       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2776       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2777 
   2778    } else {
   2779 
   2780       /* All the same stuff, except for the LDT. */
   2781       if (ldt == 0)
   2782          goto bad;
   2783 
   2784       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
   2785          goto bad;
   2786 
   2787       the_descrs = (VexGuestX86SegDescr*)ldt;
   2788       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2789       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2790 
   2791    }
   2792 
   2793    /* Do the limit check.  Note, this check is just slightly too
   2794       slack.  Really it should be "if (virtual_addr + size - 1 >=
   2795       limit)," but we don't have the size info to hand.  Getting it
   2796       could be significantly complex.  */
   2797    if (virtual_addr >= limit)
   2798       goto bad;
   2799 
   2800    if (verboze)
   2801       vex_printf("x86h_use_seg_selector: "
   2802                  "base = 0x%x, addr = 0x%x\n",
   2803                  base, base + virtual_addr);
   2804 
   2805    /* High 32 bits are zero, indicating success. */
   2806    return (ULong)( ((UInt)virtual_addr) + base );
   2807 
   2808  bad:
   2809    return 1ULL << 32;
   2810 }
   2811 
   2812 
   2813 /*---------------------------------------------------------------*/
   2814 /*--- Helpers for dealing with, and describing,               ---*/
   2815 /*--- guest state as a whole.                                 ---*/
   2816 /*---------------------------------------------------------------*/
   2817 
   2818 /* Initialise the entire x86 guest state. */
   2819 /* VISIBLE TO LIBVEX CLIENT */
   2820 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
   2821 {
   2822    vex_state->host_EvC_FAILADDR = 0;
   2823    vex_state->host_EvC_COUNTER = 0;
   2824 
   2825    vex_state->guest_EAX = 0;
   2826    vex_state->guest_ECX = 0;
   2827    vex_state->guest_EDX = 0;
   2828    vex_state->guest_EBX = 0;
   2829    vex_state->guest_ESP = 0;
   2830    vex_state->guest_EBP = 0;
   2831    vex_state->guest_ESI = 0;
   2832    vex_state->guest_EDI = 0;
   2833 
   2834    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
   2835    vex_state->guest_CC_DEP1 = 0;
   2836    vex_state->guest_CC_DEP2 = 0;
   2837    vex_state->guest_CC_NDEP = 0;
   2838    vex_state->guest_DFLAG   = 1; /* forwards */
   2839    vex_state->guest_IDFLAG  = 0;
   2840    vex_state->guest_ACFLAG  = 0;
   2841 
   2842    vex_state->guest_EIP = 0;
   2843 
   2844    /* Initialise the simulated FPU */
   2845    x86g_dirtyhelper_FINIT( vex_state );
   2846 
   2847    /* Initialse the SSE state. */
   2848 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
   2849 
   2850    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
   2851    SSEZERO(vex_state->guest_XMM0);
   2852    SSEZERO(vex_state->guest_XMM1);
   2853    SSEZERO(vex_state->guest_XMM2);
   2854    SSEZERO(vex_state->guest_XMM3);
   2855    SSEZERO(vex_state->guest_XMM4);
   2856    SSEZERO(vex_state->guest_XMM5);
   2857    SSEZERO(vex_state->guest_XMM6);
   2858    SSEZERO(vex_state->guest_XMM7);
   2859 
   2860 #  undef SSEZERO
   2861 
   2862    vex_state->guest_CS  = 0;
   2863    vex_state->guest_DS  = 0;
   2864    vex_state->guest_ES  = 0;
   2865    vex_state->guest_FS  = 0;
   2866    vex_state->guest_GS  = 0;
   2867    vex_state->guest_SS  = 0;
   2868    vex_state->guest_LDT = 0;
   2869    vex_state->guest_GDT = 0;
   2870 
   2871    vex_state->guest_EMNOTE = EmNote_NONE;
   2872 
   2873    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
   2874    vex_state->guest_CMSTART = 0;
   2875    vex_state->guest_CMLEN   = 0;
   2876 
   2877    vex_state->guest_NRADDR   = 0;
   2878    vex_state->guest_SC_CLASS = 0;
   2879    vex_state->guest_IP_AT_SYSCALL = 0;
   2880 
   2881    vex_state->padding1 = 0;
   2882 }
   2883 
   2884 
   2885 /* Figure out if any part of the guest state contained in minoff
   2886    .. maxoff requires precise memory exceptions.  If in doubt return
   2887    True (but this generates significantly slower code).
   2888 
   2889    By default we enforce precise exns for guest %ESP, %EBP and %EIP
   2890    only.  These are the minimum needed to extract correct stack
   2891    backtraces from x86 code.
   2892 
   2893    Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
   2894 */
   2895 Bool guest_x86_state_requires_precise_mem_exns (
   2896         Int minoff, Int maxoff, VexRegisterUpdates pxControl
   2897      )
   2898 {
   2899    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
   2900    Int ebp_max = ebp_min + 4 - 1;
   2901    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
   2902    Int esp_max = esp_min + 4 - 1;
   2903    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
   2904    Int eip_max = eip_min + 4 - 1;
   2905 
   2906    if (maxoff < esp_min || minoff > esp_max) {
   2907       /* no overlap with esp */
   2908       if (pxControl == VexRegUpdSpAtMemAccess)
   2909          return False; // We only need to check stack pointer.
   2910    } else {
   2911       return True;
   2912    }
   2913 
   2914    if (maxoff < ebp_min || minoff > ebp_max) {
   2915       /* no overlap with ebp */
   2916    } else {
   2917       return True;
   2918    }
   2919 
   2920    if (maxoff < eip_min || minoff > eip_max) {
   2921       /* no overlap with eip */
   2922    } else {
   2923       return True;
   2924    }
   2925 
   2926    return False;
   2927 }
   2928 
   2929 
   2930 #define ALWAYSDEFD(field)                           \
   2931     { offsetof(VexGuestX86State, field),            \
   2932       (sizeof ((VexGuestX86State*)0)->field) }
   2933 
   2934 VexGuestLayout
   2935    x86guest_layout
   2936       = {
   2937           /* Total size of the guest state, in bytes. */
   2938           .total_sizeB = sizeof(VexGuestX86State),
   2939 
   2940           /* Describe the stack pointer. */
   2941           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
   2942           .sizeof_SP = 4,
   2943 
   2944           /* Describe the frame pointer. */
   2945           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
   2946           .sizeof_FP = 4,
   2947 
   2948           /* Describe the instruction pointer. */
   2949           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
   2950           .sizeof_IP = 4,
   2951 
   2952           /* Describe any sections to be regarded by Memcheck as
   2953              'always-defined'. */
   2954           .n_alwaysDefd = 24,
   2955 
   2956           /* flags thunk: OP and NDEP are always defd, whereas DEP1
   2957              and DEP2 have to be tracked.  See detailed comment in
   2958              gdefs.h on meaning of thunk fields. */
   2959           .alwaysDefd
   2960              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
   2961                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
   2962                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
   2963                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
   2964                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
   2965                  /*  5 */ ALWAYSDEFD(guest_EIP),
   2966                  /*  6 */ ALWAYSDEFD(guest_FTOP),
   2967                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
   2968                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
   2969                  /*  9 */ ALWAYSDEFD(guest_FC3210),
   2970                  /* 10 */ ALWAYSDEFD(guest_CS),
   2971                  /* 11 */ ALWAYSDEFD(guest_DS),
   2972                  /* 12 */ ALWAYSDEFD(guest_ES),
   2973                  /* 13 */ ALWAYSDEFD(guest_FS),
   2974                  /* 14 */ ALWAYSDEFD(guest_GS),
   2975                  /* 15 */ ALWAYSDEFD(guest_SS),
   2976                  /* 16 */ ALWAYSDEFD(guest_LDT),
   2977                  /* 17 */ ALWAYSDEFD(guest_GDT),
   2978                  /* 18 */ ALWAYSDEFD(guest_EMNOTE),
   2979                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
   2980                  /* 20 */ ALWAYSDEFD(guest_CMSTART),
   2981                  /* 21 */ ALWAYSDEFD(guest_CMLEN),
   2982                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
   2983                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
   2984                }
   2985         };
   2986 
   2987 
   2988 /*---------------------------------------------------------------*/
   2989 /*--- end                                 guest_x86_helpers.c ---*/
   2990 /*---------------------------------------------------------------*/
   2991