Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                               guest_x86_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2017 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_emnote.h"
     38 #include "libvex_guest_x86.h"
     39 #include "libvex_ir.h"
     40 #include "libvex.h"
     41 
     42 #include "main_util.h"
     43 #include "main_globals.h"
     44 #include "guest_generic_bb_to_IR.h"
     45 #include "guest_x86_defs.h"
     46 #include "guest_generic_x87.h"
     47 
     48 
     49 /* This file contains helper functions for x86 guest code.
     50    Calls to these functions are generated by the back end.
     51    These calls are of course in the host machine code and
     52    this file will be compiled to host machine code, so that
     53    all makes sense.
     54 
     55    Only change the signatures of these helper functions very
     56    carefully.  If you change the signature here, you'll have to change
     57    the parameters passed to it in the IR calls constructed by
     58    guest-x86/toIR.c.
     59 
     60    The convention used is that all functions called from generated
     61    code are named x86g_<something>, and any function whose name lacks
     62    that prefix is not called from generated code.  Note that some
     63    LibVEX_* functions can however be called by VEX's client, but that
     64    is not the same as calling them from VEX-generated code.
     65 */
     66 
     67 
     68 /* Set to 1 to get detailed profiling info about use of the flag
     69    machinery. */
     70 #define PROFILE_EFLAGS 0
     71 
     72 
     73 /*---------------------------------------------------------------*/
     74 /*--- %eflags run-time helpers.                               ---*/
     75 /*---------------------------------------------------------------*/
     76 
     77 static const UChar parity_table[256] = {
     78     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     80     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     81     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     82     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     84     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     86     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     88     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     89     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     90     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     92     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     93     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     94     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     96     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     97     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
     98     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
     99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    100     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    102     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    104     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    105     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    106     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    108     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
    109     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
    110 };
    111 
    112 /* generalised left-shifter */
    113 inline static Int lshift ( Int x, Int n )
    114 {
    115    if (n >= 0)
    116       return (UInt)x << n;
    117    else
    118       return x >> (-n);
    119 }
    120 
    121 /* identity on ULong */
    122 static inline ULong idULong ( ULong x )
    123 {
    124    return x;
    125 }
    126 
    127 
    128 #define PREAMBLE(__data_bits)					\
    129    /* const */ UInt DATA_MASK 					\
    130       = __data_bits==8 ? 0xFF 					\
    131                        : (__data_bits==16 ? 0xFFFF 		\
    132                                           : 0xFFFFFFFF); 	\
    133    /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
    134    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
    135    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
    136    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
    137    /* Four bogus assignments, which hopefully gcc can     */	\
    138    /* optimise away, and which stop it complaining about  */	\
    139    /* unused variables.                                   */	\
    140    SIGN_MASK = SIGN_MASK;					\
    141    DATA_MASK = DATA_MASK;					\
    142    CC_DEP2 = CC_DEP2;						\
    143    CC_NDEP = CC_NDEP;
    144 
    145 
    146 /*-------------------------------------------------------------*/
    147 
    148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
    149 {								\
    150    PREAMBLE(DATA_BITS);						\
    151    { UInt cf, pf, af, zf, sf, of;				\
    152      UInt argL, argR, res;					\
    153      argL = CC_DEP1;						\
    154      argR = CC_DEP2;						\
    155      res  = argL + argR;					\
    156      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
    157      pf = parity_table[(UChar)res];				\
    158      af = (res ^ argL ^ argR) & 0x10;				\
    159      zf = ((DATA_UTYPE)res == 0) << 6;				\
    160      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    161      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    162                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    163      return cf | pf | af | zf | sf | of;			\
    164    }								\
    165 }
    166 
    167 /*-------------------------------------------------------------*/
    168 
    169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
    170 {								\
    171    PREAMBLE(DATA_BITS);						\
    172    { UInt cf, pf, af, zf, sf, of;				\
    173      UInt argL, argR, res;					\
    174      argL = CC_DEP1;						\
    175      argR = CC_DEP2;						\
    176      res  = argL - argR;					\
    177      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
    178      pf = parity_table[(UChar)res];				\
    179      af = (res ^ argL ^ argR) & 0x10;				\
    180      zf = ((DATA_UTYPE)res == 0) << 6;				\
    181      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    182      of = lshift((argL ^ argR) & (argL ^ res),	 		\
    183                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
    184      return cf | pf | af | zf | sf | of;			\
    185    }								\
    186 }
    187 
    188 /*-------------------------------------------------------------*/
    189 
    190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
    191 {								\
    192    PREAMBLE(DATA_BITS);						\
    193    { UInt cf, pf, af, zf, sf, of;				\
    194      UInt argL, argR, oldC, res;		       		\
    195      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    196      argL = CC_DEP1;						\
    197      argR = CC_DEP2 ^ oldC;	       				\
    198      res  = (argL + argR) + oldC;				\
    199      if (oldC)							\
    200         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
    201      else							\
    202         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
    203      pf = parity_table[(UChar)res];				\
    204      af = (res ^ argL ^ argR) & 0x10;				\
    205      zf = ((DATA_UTYPE)res == 0) << 6;				\
    206      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    207      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    208                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
    209      return cf | pf | af | zf | sf | of;			\
    210    }								\
    211 }
    212 
    213 /*-------------------------------------------------------------*/
    214 
    215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
    216 {								\
    217    PREAMBLE(DATA_BITS);						\
    218    { UInt cf, pf, af, zf, sf, of;				\
    219      UInt argL, argR, oldC, res;		       		\
    220      oldC = CC_NDEP & X86G_CC_MASK_C;				\
    221      argL = CC_DEP1;						\
    222      argR = CC_DEP2 ^ oldC;	       				\
    223      res  = (argL - argR) - oldC;				\
    224      if (oldC)							\
    225         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
    226      else							\
    227         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
    228      pf = parity_table[(UChar)res];				\
    229      af = (res ^ argL ^ argR) & 0x10;				\
    230      zf = ((DATA_UTYPE)res == 0) << 6;				\
    231      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    232      of = lshift((argL ^ argR) & (argL ^ res), 			\
    233                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
    234      return cf | pf | af | zf | sf | of;			\
    235    }								\
    236 }
    237 
    238 /*-------------------------------------------------------------*/
    239 
    240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
    241 {								\
    242    PREAMBLE(DATA_BITS);						\
    243    { UInt cf, pf, af, zf, sf, of;				\
    244      cf = 0;							\
    245      pf = parity_table[(UChar)CC_DEP1];				\
    246      af = 0;							\
    247      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    248      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    249      of = 0;							\
    250      return cf | pf | af | zf | sf | of;			\
    251    }								\
    252 }
    253 
    254 /*-------------------------------------------------------------*/
    255 
    256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
    257 {								\
    258    PREAMBLE(DATA_BITS);						\
    259    { UInt cf, pf, af, zf, sf, of;				\
    260      UInt argL, argR, res;					\
    261      res  = CC_DEP1;						\
    262      argL = res - 1;						\
    263      argR = 1;							\
    264      cf = CC_NDEP & X86G_CC_MASK_C;				\
    265      pf = parity_table[(UChar)res];				\
    266      af = (res ^ argL ^ argR) & 0x10;				\
    267      zf = ((DATA_UTYPE)res == 0) << 6;				\
    268      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    269      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
    270      return cf | pf | af | zf | sf | of;			\
    271    }								\
    272 }
    273 
    274 /*-------------------------------------------------------------*/
    275 
    276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
    277 {								\
    278    PREAMBLE(DATA_BITS);						\
    279    { UInt cf, pf, af, zf, sf, of;				\
    280      UInt argL, argR, res;					\
    281      res  = CC_DEP1;						\
    282      argL = res + 1;						\
    283      argR = 1;							\
    284      cf = CC_NDEP & X86G_CC_MASK_C;				\
    285      pf = parity_table[(UChar)res];				\
    286      af = (res ^ argL ^ argR) & 0x10;				\
    287      zf = ((DATA_UTYPE)res == 0) << 6;				\
    288      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    289      of = ((res & DATA_MASK) 					\
    290           == ((UInt)SIGN_MASK - 1)) << 11;			\
    291      return cf | pf | af | zf | sf | of;			\
    292    }								\
    293 }
    294 
    295 /*-------------------------------------------------------------*/
    296 
    297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
    298 {								\
    299    PREAMBLE(DATA_BITS);						\
    300    { UInt cf, pf, af, zf, sf, of;				\
    301      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
    302      pf = parity_table[(UChar)CC_DEP1];				\
    303      af = 0; /* undefined */					\
    304      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    305      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    306      /* of is defined if shift count == 1 */			\
    307      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
    308           & X86G_CC_MASK_O;					\
    309      return cf | pf | af | zf | sf | of;			\
    310    }								\
    311 }
    312 
    313 /*-------------------------------------------------------------*/
    314 
    315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
    316 {								\
    317    PREAMBLE(DATA_BITS);  					\
    318    { UInt cf, pf, af, zf, sf, of;				\
    319      cf = CC_DEP2 & 1;						\
    320      pf = parity_table[(UChar)CC_DEP1];				\
    321      af = 0; /* undefined */					\
    322      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    323      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    324      /* of is defined if shift count == 1 */			\
    325      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
    326           & X86G_CC_MASK_O;					\
    327      return cf | pf | af | zf | sf | of;			\
    328    }								\
    329 }
    330 
    331 /*-------------------------------------------------------------*/
    332 
    333 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
    334 /* DEP1 = result, NDEP = old flags */
    335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
    336 {								\
    337    PREAMBLE(DATA_BITS);						\
    338    { UInt fl 							\
    339         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    340           | (X86G_CC_MASK_C & CC_DEP1)				\
    341           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
    342                                       11-(DATA_BITS-1)) 	\
    343                      ^ lshift(CC_DEP1, 11)));			\
    344      return fl;							\
    345    }								\
    346 }
    347 
    348 /*-------------------------------------------------------------*/
    349 
    350 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
    351 /* DEP1 = result, NDEP = old flags */
    352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
    353 {								\
    354    PREAMBLE(DATA_BITS);						\
    355    { UInt fl 							\
    356         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
    357           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
    358           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
    359                                       11-(DATA_BITS-1)) 	\
    360                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
    361      return fl;							\
    362    }								\
    363 }
    364 
    365 /*-------------------------------------------------------------*/
    366 
    367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
    368                                 DATA_U2TYPE, NARROWto2U)        \
    369 {                                                               \
    370    PREAMBLE(DATA_BITS);                                         \
    371    { UInt cf, pf, af, zf, sf, of;                               \
    372      DATA_UTYPE  hi;                                            \
    373      DATA_UTYPE  lo                                             \
    374         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
    375                      * ((DATA_UTYPE)CC_DEP2) );                 \
    376      DATA_U2TYPE rr                                             \
    377         = NARROWto2U(                                           \
    378              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
    379              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
    380      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
    381      cf = (hi != 0);                                            \
    382      pf = parity_table[(UChar)lo];                              \
    383      af = 0; /* undefined */                                    \
    384      zf = (lo == 0) << 6;                                       \
    385      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    386      of = cf << 11;                                             \
    387      return cf | pf | af | zf | sf | of;                        \
    388    }								\
    389 }
    390 
    391 /*-------------------------------------------------------------*/
    392 
    393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
    394                                 DATA_S2TYPE, NARROWto2S)        \
    395 {                                                               \
    396    PREAMBLE(DATA_BITS);                                         \
    397    { UInt cf, pf, af, zf, sf, of;                               \
    398      DATA_STYPE  hi;                                            \
    399      DATA_STYPE  lo                                             \
    400         = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
    401                      * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
    402      DATA_S2TYPE rr                                             \
    403         = NARROWto2S(                                           \
    404              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
    405              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
    406      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
    407      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
    408      pf = parity_table[(UChar)lo];                              \
    409      af = 0; /* undefined */                                    \
    410      zf = (lo == 0) << 6;                                       \
    411      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    412      of = cf << 11;                                             \
    413      return cf | pf | af | zf | sf | of;                        \
    414    }								\
    415 }
    416 
    417 
    418 #if PROFILE_EFLAGS
    419 
    420 static Bool initted     = False;
    421 
    422 /* C flag, fast route */
    423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
    424 /* C flag, slow route */
    425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
    426 /* table for calculate_cond */
    427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
    428 /* total entry counts for calc_all, calc_c, calc_cond. */
    429 static UInt n_calc_all  = 0;
    430 static UInt n_calc_c    = 0;
    431 static UInt n_calc_cond = 0;
    432 
    433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
    434 
    435 
    436 static void showCounts ( void )
    437 {
    438    Int op, co;
    439    HChar ch;
    440    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
    441               n_calc_all, n_calc_cond, n_calc_c);
    442 
    443    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
    444               "    S   NS    P   NP    L   NL   LE  NLE\n");
    445    vex_printf("     -----------------------------------------------------"
    446               "----------------------------------------\n");
    447    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    448 
    449       ch = ' ';
    450       if (op > 0 && (op-1) % 3 == 0)
    451          ch = 'B';
    452       if (op > 0 && (op-1) % 3 == 1)
    453          ch = 'W';
    454       if (op > 0 && (op-1) % 3 == 2)
    455          ch = 'L';
    456 
    457       vex_printf("%2d%c: ", op, ch);
    458       vex_printf("%6u ", tabc_slow[op]);
    459       vex_printf("%6u ", tabc_fast[op]);
    460       for (co = 0; co < 16; co++) {
    461          Int n = tab_cond[op][co];
    462          if (n >= 1000) {
    463             vex_printf(" %3dK", n / 1000);
    464          } else
    465          if (n >= 0) {
    466             vex_printf(" %3d ", n );
    467          } else {
    468             vex_printf("     ");
    469          }
    470       }
    471       vex_printf("\n");
    472    }
    473    vex_printf("\n");
    474 }
    475 
    476 static void initCounts ( void )
    477 {
    478    Int op, co;
    479    initted = True;
    480    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
    481       tabc_fast[op] = tabc_slow[op] = 0;
    482       for (co = 0; co < 16; co++)
    483          tab_cond[op][co] = 0;
    484    }
    485 }
    486 
    487 #endif /* PROFILE_EFLAGS */
    488 
    489 
    490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    491 /* Calculate all the 6 flags from the supplied thunk parameters.
    492    Worker function, not directly called from generated code. */
    493 static
    494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
    495                                      UInt cc_dep1_formal,
    496                                      UInt cc_dep2_formal,
    497                                      UInt cc_ndep_formal )
    498 {
    499    switch (cc_op) {
    500       case X86G_CC_OP_COPY:
    501          return cc_dep1_formal
    502                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
    503                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
    504 
    505       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
    506       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
    507       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
    508 
    509       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
    510       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
    511       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
    512 
    513       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
    514       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
    515       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
    516 
    517       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
    518       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
    519       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
    520 
    521       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
    522       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
    523       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
    524 
    525       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
    526       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
    527       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
    528 
    529       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
    530       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
    531       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
    532 
    533       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
    534       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
    535       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
    536 
    537       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
    538       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
    539       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
    540 
    541       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
    542       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
    543       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
    544 
    545       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
    546       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
    547       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
    548 
    549       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
    550                                                 UShort, toUShort );
    551       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
    552                                                 UInt,   toUInt );
    553       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
    554                                                 ULong,  idULong );
    555 
    556       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
    557                                                 Short,  toUShort );
    558       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
    559                                                 Int,    toUInt   );
    560       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
    561                                                 Long,   idULong );
    562 
    563       default:
    564          /* shouldn't really make these calls from generated code */
    565          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
    566                     "( %u, 0x%x, 0x%x, 0x%x )\n",
    567                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
    568          vpanic("x86g_calculate_eflags_all_WRK(X86)");
    569    }
    570 }
    571 
    572 
    573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    574 /* Calculate all the 6 flags from the supplied thunk parameters. */
    575 UInt x86g_calculate_eflags_all ( UInt cc_op,
    576                                  UInt cc_dep1,
    577                                  UInt cc_dep2,
    578                                  UInt cc_ndep )
    579 {
    580 #  if PROFILE_EFLAGS
    581    if (!initted) initCounts();
    582    n_calc_all++;
    583    if (SHOW_COUNTS_NOW) showCounts();
    584 #  endif
    585    return
    586       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
    587 }
    588 
    589 
    590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    591 /* Calculate just the carry flag from the supplied thunk parameters. */
    592 VEX_REGPARM(3)
    593 UInt x86g_calculate_eflags_c ( UInt cc_op,
    594                                UInt cc_dep1,
    595                                UInt cc_dep2,
    596                                UInt cc_ndep )
    597 {
    598 #  if PROFILE_EFLAGS
    599    if (!initted) initCounts();
    600    n_calc_c++;
    601    tabc_fast[cc_op]++;
    602    if (SHOW_COUNTS_NOW) showCounts();
    603 #  endif
    604 
    605    /* Fast-case some common ones. */
    606    switch (cc_op) {
    607       case X86G_CC_OP_LOGICL:
    608       case X86G_CC_OP_LOGICW:
    609       case X86G_CC_OP_LOGICB:
    610          return 0;
    611       case X86G_CC_OP_SUBL:
    612          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
    613                    ? X86G_CC_MASK_C : 0;
    614       case X86G_CC_OP_SUBW:
    615          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
    616                    ? X86G_CC_MASK_C : 0;
    617       case X86G_CC_OP_SUBB:
    618          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
    619                    ? X86G_CC_MASK_C : 0;
    620       case X86G_CC_OP_INCL:
    621       case X86G_CC_OP_DECL:
    622          return cc_ndep & X86G_CC_MASK_C;
    623       default:
    624          break;
    625    }
    626 
    627 #  if PROFILE_EFLAGS
    628    tabc_fast[cc_op]--;
    629    tabc_slow[cc_op]++;
    630 #  endif
    631 
    632    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
    633           & X86G_CC_MASK_C;
    634 }
    635 
    636 
    637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    638 /* returns 1 or 0 */
    639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
    640                                 UInt cc_op,
    641                                 UInt cc_dep1,
    642                                 UInt cc_dep2,
    643                                 UInt cc_ndep )
    644 {
    645    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
    646                                                cc_dep2, cc_ndep);
    647    UInt of,sf,zf,cf,pf;
    648    UInt inv = cond & 1;
    649 
    650 #  if PROFILE_EFLAGS
    651    if (!initted) initCounts();
    652    tab_cond[cc_op][cond]++;
    653    n_calc_cond++;
    654    if (SHOW_COUNTS_NOW) showCounts();
    655 #  endif
    656 
    657    switch (cond) {
    658       case X86CondNO:
    659       case X86CondO: /* OF == 1 */
    660          of = eflags >> X86G_CC_SHIFT_O;
    661          return 1 & (inv ^ of);
    662 
    663       case X86CondNZ:
    664       case X86CondZ: /* ZF == 1 */
    665          zf = eflags >> X86G_CC_SHIFT_Z;
    666          return 1 & (inv ^ zf);
    667 
    668       case X86CondNB:
    669       case X86CondB: /* CF == 1 */
    670          cf = eflags >> X86G_CC_SHIFT_C;
    671          return 1 & (inv ^ cf);
    672          break;
    673 
    674       case X86CondNBE:
    675       case X86CondBE: /* (CF or ZF) == 1 */
    676          cf = eflags >> X86G_CC_SHIFT_C;
    677          zf = eflags >> X86G_CC_SHIFT_Z;
    678          return 1 & (inv ^ (cf | zf));
    679          break;
    680 
    681       case X86CondNS:
    682       case X86CondS: /* SF == 1 */
    683          sf = eflags >> X86G_CC_SHIFT_S;
    684          return 1 & (inv ^ sf);
    685 
    686       case X86CondNP:
    687       case X86CondP: /* PF == 1 */
    688          pf = eflags >> X86G_CC_SHIFT_P;
    689          return 1 & (inv ^ pf);
    690 
    691       case X86CondNL:
    692       case X86CondL: /* (SF xor OF) == 1 */
    693          sf = eflags >> X86G_CC_SHIFT_S;
    694          of = eflags >> X86G_CC_SHIFT_O;
    695          return 1 & (inv ^ (sf ^ of));
    696          break;
    697 
    698       case X86CondNLE:
    699       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
    700          sf = eflags >> X86G_CC_SHIFT_S;
    701          of = eflags >> X86G_CC_SHIFT_O;
    702          zf = eflags >> X86G_CC_SHIFT_Z;
    703          return 1 & (inv ^ ((sf ^ of) | zf));
    704          break;
    705 
    706       default:
    707          /* shouldn't really make these calls from generated code */
    708          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
    709                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
    710          vpanic("x86g_calculate_condition");
    711    }
    712 }
    713 
    714 
    715 /* VISIBLE TO LIBVEX CLIENT */
    716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
    717 {
    718    UInt eflags = x86g_calculate_eflags_all_WRK(
    719                     vex_state->guest_CC_OP,
    720                     vex_state->guest_CC_DEP1,
    721                     vex_state->guest_CC_DEP2,
    722                     vex_state->guest_CC_NDEP
    723                  );
    724    UInt dflag = vex_state->guest_DFLAG;
    725    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
    726    if (dflag == 0xFFFFFFFF)
    727       eflags |= X86G_CC_MASK_D;
    728    if (vex_state->guest_IDFLAG == 1)
    729       eflags |= X86G_CC_MASK_ID;
    730    if (vex_state->guest_ACFLAG == 1)
    731       eflags |= X86G_CC_MASK_AC;
    732 
    733    return eflags;
    734 }
    735 
    736 /* VISIBLE TO LIBVEX CLIENT */
    737 void
    738 LibVEX_GuestX86_put_eflags ( UInt eflags,
    739                              /*MOD*/VexGuestX86State* vex_state )
    740 {
    741    /* D flag */
    742    if (eflags & X86G_CC_MASK_D) {
    743       vex_state->guest_DFLAG = 0xFFFFFFFF;
    744       eflags &= ~X86G_CC_MASK_D;
    745    }
    746    else
    747       vex_state->guest_DFLAG = 1;
    748 
    749    /* ID flag */
    750    if (eflags & X86G_CC_MASK_ID) {
    751       vex_state->guest_IDFLAG = 1;
    752       eflags &= ~X86G_CC_MASK_ID;
    753    }
    754    else
    755       vex_state->guest_IDFLAG = 0;
    756 
    757    /* AC flag */
    758    if (eflags & X86G_CC_MASK_AC) {
    759       vex_state->guest_ACFLAG = 1;
    760       eflags &= ~X86G_CC_MASK_AC;
    761    }
    762    else
    763       vex_state->guest_ACFLAG = 0;
    764 
    765    UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
    766                   X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
    767    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
    768    vex_state->guest_CC_DEP1 = eflags & cc_mask;
    769    vex_state->guest_CC_DEP2 = 0;
    770    vex_state->guest_CC_NDEP = 0;
    771 }
    772 
    773 /* VISIBLE TO LIBVEX CLIENT */
    774 void
    775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
    776                               /*MOD*/VexGuestX86State* vex_state )
    777 {
    778    UInt oszacp = x86g_calculate_eflags_all_WRK(
    779                     vex_state->guest_CC_OP,
    780                     vex_state->guest_CC_DEP1,
    781                     vex_state->guest_CC_DEP2,
    782                     vex_state->guest_CC_NDEP
    783                  );
    784    if (new_carry_flag & 1) {
    785       oszacp |= X86G_CC_MASK_C;
    786    } else {
    787       oszacp &= ~X86G_CC_MASK_C;
    788    }
    789    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
    790    vex_state->guest_CC_DEP1 = oszacp;
    791    vex_state->guest_CC_DEP2 = 0;
    792    vex_state->guest_CC_NDEP = 0;
    793 }
    794 
    795 
    796 /*---------------------------------------------------------------*/
    797 /*--- %eflags translation-time function specialisers.         ---*/
    798 /*--- These help iropt specialise calls the above run-time    ---*/
    799 /*--- %eflags functions.                                      ---*/
    800 /*---------------------------------------------------------------*/
    801 
    802 /* Used by the optimiser to try specialisations.  Returns an
    803    equivalent expression, or NULL if none. */
    804 
    805 static inline Bool isU32 ( IRExpr* e, UInt n )
    806 {
    807    return
    808       toBool( e->tag == Iex_Const
    809               && e->Iex.Const.con->tag == Ico_U32
    810               && e->Iex.Const.con->Ico.U32 == n );
    811 }
    812 
    813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
    814                                IRExpr** args,
    815                                IRStmt** precedingStmts,
    816                                Int      n_precedingStmts )
    817 {
    818 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
    819 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
    820 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
    821 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
    822 
    823    Int i, arity = 0;
    824    for (i = 0; args[i]; i++)
    825       arity++;
    826 #  if 0
    827    vex_printf("spec request:\n");
    828    vex_printf("   %s  ", function_name);
    829    for (i = 0; i < arity; i++) {
    830       vex_printf("  ");
    831       ppIRExpr(args[i]);
    832    }
    833    vex_printf("\n");
    834 #  endif
    835 
    836    /* --------- specialising "x86g_calculate_condition" --------- */
    837 
    838    if (vex_streq(function_name, "x86g_calculate_condition")) {
    839       /* specialise calls to above "calculate condition" function */
    840       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
    841       vassert(arity == 5);
    842       cond    = args[0];
    843       cc_op   = args[1];
    844       cc_dep1 = args[2];
    845       cc_dep2 = args[3];
    846 
    847       /*---------------- ADDL ----------------*/
    848 
    849       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
    850          /* long add, then Z --> test (dst+src == 0) */
    851          return unop(Iop_1Uto32,
    852                      binop(Iop_CmpEQ32,
    853                            binop(Iop_Add32, cc_dep1, cc_dep2),
    854                            mkU32(0)));
    855       }
    856 
    857       /*---------------- SUBL ----------------*/
    858 
    859       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
    860          /* long sub/cmp, then Z --> test dst==src */
    861          return unop(Iop_1Uto32,
    862                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
    863       }
    864       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
    865          /* long sub/cmp, then NZ --> test dst!=src */
    866          return unop(Iop_1Uto32,
    867                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
    868       }
    869 
    870       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
    871          /* long sub/cmp, then L (signed less than)
    872             --> test dst <s src */
    873          return unop(Iop_1Uto32,
    874                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
    875       }
    876       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
    877          /* long sub/cmp, then NL (signed greater than or equal)
    878             --> test !(dst <s src) */
    879          return binop(Iop_Xor32,
    880                       unop(Iop_1Uto32,
    881                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
    882                       mkU32(1));
    883       }
    884 
    885       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
    886          /* long sub/cmp, then LE (signed less than or equal)
    887             --> test dst <=s src */
    888          return unop(Iop_1Uto32,
    889                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
    890       }
    891       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
    892          /* long sub/cmp, then NLE (signed not less than or equal)
    893             --> test dst >s src
    894             --> test !(dst <=s src) */
    895          return binop(Iop_Xor32,
    896                       unop(Iop_1Uto32,
    897                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
    898                       mkU32(1));
    899       }
    900 
    901       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
    902          /* long sub/cmp, then BE (unsigned less than or equal)
    903             --> test dst <=u src */
    904          return unop(Iop_1Uto32,
    905                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
    906       }
    907       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
    908          /* long sub/cmp, then BE (unsigned greater than)
    909             --> test !(dst <=u src) */
    910          return binop(Iop_Xor32,
    911                       unop(Iop_1Uto32,
    912                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
    913                       mkU32(1));
    914       }
    915 
    916       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
    917          /* long sub/cmp, then B (unsigned less than)
    918             --> test dst <u src */
    919          return unop(Iop_1Uto32,
    920                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
    921       }
    922       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
    923          /* long sub/cmp, then NB (unsigned greater than or equal)
    924             --> test !(dst <u src) */
    925          return binop(Iop_Xor32,
    926                       unop(Iop_1Uto32,
    927                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
    928                       mkU32(1));
    929       }
    930 
    931       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
    932          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
    933          return unop(Iop_1Uto32,
    934                      binop(Iop_CmpLT32S,
    935                            binop(Iop_Sub32, cc_dep1, cc_dep2),
    936                            mkU32(0)));
    937       }
    938       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
    939          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
    940          return binop(Iop_Xor32,
    941                       unop(Iop_1Uto32,
    942                            binop(Iop_CmpLT32S,
    943                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
    944                                  mkU32(0))),
    945                       mkU32(1));
    946       }
    947 
    948       /*---------------- SUBW ----------------*/
    949 
    950       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
    951          /* word sub/cmp, then Z --> test dst==src */
    952          return unop(Iop_1Uto32,
    953                      binop(Iop_CmpEQ16,
    954                            unop(Iop_32to16,cc_dep1),
    955                            unop(Iop_32to16,cc_dep2)));
    956       }
    957       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
    958          /* word sub/cmp, then NZ --> test dst!=src */
    959          return unop(Iop_1Uto32,
    960                      binop(Iop_CmpNE16,
    961                            unop(Iop_32to16,cc_dep1),
    962                            unop(Iop_32to16,cc_dep2)));
    963       }
    964 
    965       /*---------------- SUBB ----------------*/
    966 
    967       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
    968          /* byte sub/cmp, then Z --> test dst==src */
    969          return unop(Iop_1Uto32,
    970                      binop(Iop_CmpEQ8,
    971                            unop(Iop_32to8,cc_dep1),
    972                            unop(Iop_32to8,cc_dep2)));
    973       }
    974       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
    975          /* byte sub/cmp, then NZ --> test dst!=src */
    976          return unop(Iop_1Uto32,
    977                      binop(Iop_CmpNE8,
    978                            unop(Iop_32to8,cc_dep1),
    979                            unop(Iop_32to8,cc_dep2)));
    980       }
    981 
    982       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
    983          /* byte sub/cmp, then NBE (unsigned greater than)
    984             --> test src <u dst */
    985          /* Note, args are opposite way round from the usual */
    986          return unop(Iop_1Uto32,
    987                      binop(Iop_CmpLT32U,
    988                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
    989 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
    990       }
    991 
    992       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
    993                                         && isU32(cc_dep2, 0)) {
    994          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
    995                                          --> test dst <s 0
    996                                          --> (UInt)dst[7]
    997             This is yet another scheme by which gcc figures out if the
    998             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
    999          /* Note: isU32(cc_dep2, 0) is correct, even though this is
   1000             for an 8-bit comparison, since the args to the helper
   1001             function are always U32s. */
   1002          return binop(Iop_And32,
   1003                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1004                       mkU32(1));
   1005       }
   1006       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
   1007                                         && isU32(cc_dep2, 0)) {
   1008          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
   1009                                           --> test !(dst <s 0)
   1010                                           --> (UInt) !dst[7]
   1011          */
   1012          return binop(Iop_Xor32,
   1013                       binop(Iop_And32,
   1014                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1015                             mkU32(1)),
   1016                 mkU32(1));
   1017       }
   1018 
   1019       /*---------------- LOGICL ----------------*/
   1020 
   1021       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
   1022          /* long and/or/xor, then Z --> test dst==0 */
   1023          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1024       }
   1025       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
   1026          /* long and/or/xor, then NZ --> test dst!=0 */
   1027          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
   1028       }
   1029 
   1030       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
   1031          /* long and/or/xor, then LE
   1032             This is pretty subtle.  LOGIC sets SF and ZF according to the
   1033             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
   1034             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
   1035             the result is <=signed 0.  Hence ...
   1036          */
   1037          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
   1038       }
   1039 
   1040       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
   1041          /* long and/or/xor, then BE
   1042             LOGIC sets ZF according to the result and makes CF be zero.
   1043             BE computes (CF | ZF), but CF is zero, so this reduces ZF
   1044             -- which will be 1 iff the result is zero.  Hence ...
   1045          */
   1046          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1047       }
   1048 
   1049       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
   1050          /* see comment below for (LOGICB, CondS) */
   1051          /* long and/or/xor, then S --> (UInt)result[31] */
   1052          return binop(Iop_And32,
   1053                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1054                       mkU32(1));
   1055       }
   1056       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
   1057          /* see comment below for (LOGICB, CondNS) */
   1058          /* long and/or/xor, then S --> (UInt) ~ result[31] */
   1059          return binop(Iop_Xor32,
   1060                 binop(Iop_And32,
   1061                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
   1062                       mkU32(1)),
   1063                 mkU32(1));
   1064       }
   1065 
   1066       /*---------------- LOGICW ----------------*/
   1067 
   1068       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
   1069          /* word and/or/xor, then Z --> test dst==0 */
   1070          return unop(Iop_1Uto32,
   1071                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
   1072                                         mkU32(0)));
   1073       }
   1074 
   1075       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
   1076          /* see comment below for (LOGICB, CondS) */
   1077          /* word and/or/xor, then S --> (UInt)result[15] */
   1078          return binop(Iop_And32,
   1079                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
   1080                       mkU32(1));
   1081       }
   1082 
   1083       /*---------------- LOGICB ----------------*/
   1084 
   1085       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
   1086          /* byte and/or/xor, then Z --> test dst==0 */
   1087          return unop(Iop_1Uto32,
   1088                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1089                                         mkU32(0)));
   1090       }
   1091       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
   1092          /* byte and/or/xor, then Z --> test dst!=0 */
   1093          /* b9ac9:       84 c0                   test   %al,%al
   1094             b9acb:       75 0d                   jne    b9ada */
   1095          return unop(Iop_1Uto32,
   1096                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
   1097                                         mkU32(0)));
   1098       }
   1099 
   1100       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
   1101          /* this is an idiom gcc sometimes uses to find out if the top
   1102             bit of a byte register is set: eg testb %al,%al; js ..
   1103             Since it just depends on the top bit of the byte, extract
   1104             that bit and explicitly get rid of all the rest.  This
   1105             helps memcheck avoid false positives in the case where any
   1106             of the other bits in the byte are undefined. */
   1107          /* byte and/or/xor, then S --> (UInt)result[7] */
   1108          return binop(Iop_And32,
   1109                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1110                       mkU32(1));
   1111       }
   1112       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
   1113          /* ditto, for negation-of-S. */
   1114          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
   1115          return binop(Iop_Xor32,
   1116                 binop(Iop_And32,
   1117                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
   1118                       mkU32(1)),
   1119                 mkU32(1));
   1120       }
   1121 
   1122       /*---------------- DECL ----------------*/
   1123 
   1124       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
   1125          /* dec L, then Z --> test dst == 0 */
   1126          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1127       }
   1128 
   1129       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
   1130          /* dec L, then S --> compare DST <s 0 */
   1131          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
   1132       }
   1133 
   1134       /*---------------- DECW ----------------*/
   1135 
   1136       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
   1137          /* dec W, then Z --> test dst == 0 */
   1138          return unop(Iop_1Uto32,
   1139                      binop(Iop_CmpEQ32,
   1140                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1141                            mkU32(0)));
   1142       }
   1143 
   1144       /*---------------- INCW ----------------*/
   1145 
   1146       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
   1147          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
   1148          /* inc W, then Z --> test dst == 0 */
   1149          return unop(Iop_1Uto32,
   1150                      binop(Iop_CmpEQ32,
   1151                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
   1152                            mkU32(0)));
   1153       }
   1154 
   1155       /*---------------- SHRL ----------------*/
   1156 
   1157       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
   1158          /* SHRL, then Z --> test dep1 == 0 */
   1159          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1160       }
   1161 
   1162       /*---------------- COPY ----------------*/
   1163       /* This can happen, as a result of x87 FP compares: "fcom ... ;
   1164          fnstsw %ax ; sahf ; jbe" for example. */
   1165 
   1166       if (isU32(cc_op, X86G_CC_OP_COPY) &&
   1167           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
   1168          /* COPY, then BE --> extract C and Z from dep1, and test
   1169             (C or Z) == 1. */
   1170          /* COPY, then NBE --> extract C and Z from dep1, and test
   1171             (C or Z) == 0. */
   1172          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
   1173          return
   1174             unop(
   1175                Iop_1Uto32,
   1176                binop(
   1177                   Iop_CmpEQ32,
   1178                   binop(
   1179                      Iop_And32,
   1180                      binop(
   1181                         Iop_Or32,
   1182                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1183                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
   1184                      ),
   1185                      mkU32(1)
   1186                   ),
   1187                   mkU32(nnn)
   1188                )
   1189             );
   1190       }
   1191 
   1192       if (isU32(cc_op, X86G_CC_OP_COPY)
   1193           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
   1194          /* COPY, then B --> extract C from dep1, and test (C == 1). */
   1195          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
   1196          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
   1197          return
   1198             unop(
   1199                Iop_1Uto32,
   1200                binop(
   1201                   Iop_CmpEQ32,
   1202                   binop(
   1203                      Iop_And32,
   1204                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1205                      mkU32(1)
   1206                   ),
   1207                   mkU32(nnn)
   1208                )
   1209             );
   1210       }
   1211 
   1212       if (isU32(cc_op, X86G_CC_OP_COPY)
   1213           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
   1214          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
   1215          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
   1216          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
   1217          return
   1218             unop(
   1219                Iop_1Uto32,
   1220                binop(
   1221                   Iop_CmpEQ32,
   1222                   binop(
   1223                      Iop_And32,
   1224                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
   1225                      mkU32(1)
   1226                   ),
   1227                   mkU32(nnn)
   1228                )
   1229             );
   1230       }
   1231 
   1232       if (isU32(cc_op, X86G_CC_OP_COPY)
   1233           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
   1234          /* COPY, then P --> extract P from dep1, and test (P == 1). */
   1235          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
   1236          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
   1237          return
   1238             unop(
   1239                Iop_1Uto32,
   1240                binop(
   1241                   Iop_CmpEQ32,
   1242                   binop(
   1243                      Iop_And32,
   1244                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
   1245                      mkU32(1)
   1246                   ),
   1247                   mkU32(nnn)
   1248                )
   1249             );
   1250       }
   1251 
   1252       return NULL;
   1253    }
   1254 
   1255    /* --------- specialising "x86g_calculate_eflags_c" --------- */
   1256 
   1257    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
   1258       /* specialise calls to above "calculate_eflags_c" function */
   1259       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1260       vassert(arity == 4);
   1261       cc_op   = args[0];
   1262       cc_dep1 = args[1];
   1263       cc_dep2 = args[2];
   1264       cc_ndep = args[3];
   1265 
   1266       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
   1267          /* C after sub denotes unsigned less than */
   1268          return unop(Iop_1Uto32,
   1269                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
   1270       }
   1271       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
   1272          /* C after sub denotes unsigned less than */
   1273          return unop(Iop_1Uto32,
   1274                      binop(Iop_CmpLT32U,
   1275                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
   1276                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
   1277       }
   1278       if (isU32(cc_op, X86G_CC_OP_LOGICL)
   1279           || isU32(cc_op, X86G_CC_OP_LOGICW)
   1280           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
   1281          /* cflag after logic is zero */
   1282          return mkU32(0);
   1283       }
   1284       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
   1285          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
   1286          return cc_ndep;
   1287       }
   1288       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1289          /* cflag after COPY is stored in DEP1. */
   1290          return
   1291             binop(
   1292                Iop_And32,
   1293                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
   1294                mkU32(1)
   1295             );
   1296       }
   1297       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
   1298          /* C after add denotes sum <u either arg */
   1299          return unop(Iop_1Uto32,
   1300                      binop(Iop_CmpLT32U,
   1301                            binop(Iop_Add32, cc_dep1, cc_dep2),
   1302                            cc_dep1));
   1303       }
   1304       // ATC, requires verification, no test case known
   1305       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
   1306       //   /* C after signed widening multiply denotes the case where
   1307       //      the top half of the result isn't simply the sign extension
   1308       //      of the bottom half (iow the result doesn't fit completely
   1309       //      in the bottom half).  Hence:
   1310       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
   1311       //      where 'x' denotes signed widening multiply.*/
   1312       //   return
   1313       //      unop(Iop_1Uto32,
   1314       //           binop(Iop_CmpNE32,
   1315       //                 unop(Iop_64HIto32,
   1316       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
   1317       //                 binop(Iop_Sar32,
   1318       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
   1319       //}
   1320 #     if 0
   1321       if (cc_op->tag == Iex_Const) {
   1322          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
   1323       }
   1324 #     endif
   1325 
   1326       return NULL;
   1327    }
   1328 
   1329    /* --------- specialising "x86g_calculate_eflags_all" --------- */
   1330 
   1331    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
   1332       /* specialise calls to above "calculate_eflags_all" function */
   1333       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
   1334       vassert(arity == 4);
   1335       cc_op   = args[0];
   1336       cc_dep1 = args[1];
   1337       /* cc_dep2 = args[2]; */
   1338       /* cc_ndep = args[3]; */
   1339 
   1340       if (isU32(cc_op, X86G_CC_OP_COPY)) {
   1341          /* eflags after COPY are stored in DEP1. */
   1342          return
   1343             binop(
   1344                Iop_And32,
   1345                cc_dep1,
   1346                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
   1347                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
   1348             );
   1349       }
   1350       return NULL;
   1351    }
   1352 
   1353 #  undef unop
   1354 #  undef binop
   1355 #  undef mkU32
   1356 #  undef mkU8
   1357 
   1358    return NULL;
   1359 }
   1360 
   1361 
   1362 /*---------------------------------------------------------------*/
   1363 /*--- Supporting functions for x87 FPU activities.            ---*/
   1364 /*---------------------------------------------------------------*/
   1365 
   1366 static inline Bool host_is_little_endian ( void )
   1367 {
   1368    UInt x = 0x76543210;
   1369    UChar* p = (UChar*)(&x);
   1370    return toBool(*p == 0x10);
   1371 }
   1372 
   1373 /* 80 and 64-bit floating point formats:
   1374 
   1375    80-bit:
   1376 
   1377     S  0       0-------0      zero
   1378     S  0       0X------X      denormals
   1379     S  1-7FFE  1X------X      normals (all normals have leading 1)
   1380     S  7FFF    10------0      infinity
   1381     S  7FFF    10X-----X      snan
   1382     S  7FFF    11X-----X      qnan
   1383 
   1384    S is the sign bit.  For runs X----X, at least one of the Xs must be
   1385    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
   1386    there is an explicitly represented leading 1, and a sign bit,
   1387    giving 80 in total.
   1388 
   1389    64-bit avoids the confusion of an explicitly represented leading 1
   1390    and so is simpler:
   1391 
   1392     S  0      0------0   zero
   1393     S  0      X------X   denormals
   1394     S  1-7FE  any        normals
   1395     S  7FF    0------0   infinity
   1396     S  7FF    0X-----X   snan
   1397     S  7FF    1X-----X   qnan
   1398 
   1399    Exponent is 11 bits, fractional part is 52 bits, and there is a
   1400    sign bit, giving 64 in total.
   1401 */
   1402 
   1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
   1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
   1406 {
   1407    Bool   mantissaIsZero;
   1408    Int    bexp;
   1409    UChar  sign;
   1410    UChar* f64;
   1411 
   1412    vassert(host_is_little_endian());
   1413 
   1414    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
   1415 
   1416    f64  = (UChar*)(&dbl);
   1417    sign = toUChar( (f64[7] >> 7) & 1 );
   1418 
   1419    /* First off, if the tag indicates the register was empty,
   1420       return 1,0,sign,1 */
   1421    if (tag == 0) {
   1422       /* vex_printf("Empty\n"); */
   1423       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
   1424                                  | X86G_FC_MASK_C0;
   1425    }
   1426 
   1427    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
   1428    bexp &= 0x7FF;
   1429 
   1430    mantissaIsZero
   1431       = toBool(
   1432            (f64[6] & 0x0F) == 0
   1433            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
   1434         );
   1435 
   1436    /* If both exponent and mantissa are zero, the value is zero.
   1437       Return 1,0,sign,0. */
   1438    if (bexp == 0 && mantissaIsZero) {
   1439       /* vex_printf("Zero\n"); */
   1440       return X86G_FC_MASK_C3 | 0
   1441                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1442    }
   1443 
   1444    /* If exponent is zero but mantissa isn't, it's a denormal.
   1445       Return 1,1,sign,0. */
   1446    if (bexp == 0 && !mantissaIsZero) {
   1447       /* vex_printf("Denormal\n"); */
   1448       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
   1449                              | (sign << X86G_FC_SHIFT_C1) | 0;
   1450    }
   1451 
   1452    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
   1453       Return 0,1,sign,1. */
   1454    if (bexp == 0x7FF && mantissaIsZero) {
   1455       /* vex_printf("Inf\n"); */
   1456       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
   1457                                  | X86G_FC_MASK_C0;
   1458    }
   1459 
   1460    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
   1461       Return 0,0,sign,1. */
   1462    if (bexp == 0x7FF && !mantissaIsZero) {
   1463       /* vex_printf("NaN\n"); */
   1464       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
   1465    }
   1466 
   1467    /* Uh, ok, we give up.  It must be a normal finite number.
   1468       Return 0,1,sign,0.
   1469    */
   1470    /* vex_printf("normal\n"); */
   1471    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
   1472 }
   1473 
   1474 
   1475 /* CALLED FROM GENERATED CODE */
   1476 /* DIRTY HELPER (reads guest memory) */
   1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
   1478 {
   1479    ULong f64;
   1480    convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
   1481    return f64;
   1482 }
   1483 
   1484 /* CALLED FROM GENERATED CODE */
   1485 /* DIRTY HELPER (writes guest memory) */
   1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
   1487 {
   1488    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
   1489 }
   1490 
   1491 
   1492 /*----------------------------------------------*/
   1493 /*--- The exported fns ..                    ---*/
   1494 /*----------------------------------------------*/
   1495 
   1496 /* Layout of the real x87 state. */
   1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to
   1498    g_generic_x87.h */
   1499 
   1500 
   1501 /* CLEAN HELPER */
   1502 /* fpucw[15:0] contains a x87 native format FPU control word.
   1503    Extract from it the required FPROUND value and any resulting
   1504    emulation warning, and return (warn << 32) | fpround value.
   1505 */
   1506 ULong x86g_check_fldcw ( UInt fpucw )
   1507 {
   1508    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
   1509    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1510    UInt rmode = (fpucw >> 10) & 3;
   1511 
   1512    /* Detect any required emulation warnings. */
   1513    VexEmNote ew = EmNote_NONE;
   1514 
   1515    if ((fpucw & 0x3F) != 0x3F) {
   1516       /* unmasked exceptions! */
   1517       ew = EmWarn_X86_x87exns;
   1518    }
   1519    else
   1520    if (((fpucw >> 8) & 3) != 3) {
   1521       /* unsupported precision */
   1522       ew = EmWarn_X86_x87precision;
   1523    }
   1524 
   1525    return (((ULong)ew) << 32) | ((ULong)rmode);
   1526 }
   1527 
   1528 /* CLEAN HELPER */
   1529 /* Given fpround as an IRRoundingMode value, create a suitable x87
   1530    native format FPU control word. */
   1531 UInt x86g_create_fpucw ( UInt fpround )
   1532 {
   1533    fpround &= 3;
   1534    return 0x037F | (fpround << 10);
   1535 }
   1536 
   1537 
   1538 /* CLEAN HELPER */
   1539 /* mxcsr[15:0] contains a SSE native format MXCSR value.
   1540    Extract from it the required SSEROUND value and any resulting
   1541    emulation warning, and return (warn << 32) | sseround value.
   1542 */
   1543 ULong x86g_check_ldmxcsr ( UInt mxcsr )
   1544 {
   1545    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
   1546    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1547    UInt rmode = (mxcsr >> 13) & 3;
   1548 
   1549    /* Detect any required emulation warnings. */
   1550    VexEmNote ew = EmNote_NONE;
   1551 
   1552    if ((mxcsr & 0x1F80) != 0x1F80) {
   1553       /* unmasked exceptions! */
   1554       ew = EmWarn_X86_sseExns;
   1555    }
   1556    else
   1557    if (mxcsr & (1<<15)) {
   1558       /* FZ is set */
   1559       ew = EmWarn_X86_fz;
   1560    }
   1561    else
   1562    if (mxcsr & (1<<6)) {
   1563       /* DAZ is set */
   1564       ew = EmWarn_X86_daz;
   1565    }
   1566 
   1567    return (((ULong)ew) << 32) | ((ULong)rmode);
   1568 }
   1569 
   1570 
   1571 /* CLEAN HELPER */
   1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE
   1573    native format MXCSR value. */
   1574 UInt x86g_create_mxcsr ( UInt sseround )
   1575 {
   1576    sseround &= 3;
   1577    return 0x1F80 | (sseround << 13);
   1578 }
   1579 
   1580 
   1581 /* CALLED FROM GENERATED CODE */
   1582 /* DIRTY HELPER (writes guest state) */
   1583 /* Initialise the x87 FPU state as per 'finit'. */
   1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
   1585 {
   1586    Int i;
   1587    gst->guest_FTOP = 0;
   1588    for (i = 0; i < 8; i++) {
   1589       gst->guest_FPTAG[i] = 0; /* empty */
   1590       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
   1591    }
   1592    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
   1593    gst->guest_FC3210  = 0;
   1594 }
   1595 
   1596 
   1597 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
   1598    appears to differ from the former only in that the 8 FP registers
   1599    themselves are not transferred into the guest state. */
   1600 static
   1601 VexEmNote do_put_x87 ( Bool moveRegs,
   1602                        /*IN*/Fpu_State* x87_state,
   1603                        /*OUT*/VexGuestX86State* vex_state )
   1604 {
   1605    Int        stno, preg;
   1606    UInt       tag;
   1607    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1608    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1609    UInt       ftop    = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
   1610    UInt       tagw    = x87_state->env[FP_ENV_TAG];
   1611    UInt       fpucw   = x87_state->env[FP_ENV_CTRL];
   1612    UInt       c3210   = x87_state->env[FP_ENV_STAT] & 0x4700;
   1613    VexEmNote  ew;
   1614    UInt       fpround;
   1615    ULong      pair;
   1616 
   1617    /* Copy registers and tags */
   1618    for (stno = 0; stno < 8; stno++) {
   1619       preg = (stno + ftop) & 7;
   1620       tag = (tagw >> (2*preg)) & 3;
   1621       if (tag == 3) {
   1622          /* register is empty */
   1623          /* hmm, if it's empty, does it still get written?  Probably
   1624             safer to say it does.  If we don't, memcheck could get out
   1625             of sync, in that it thinks all FP registers are defined by
   1626             this helper, but in reality some have not been updated. */
   1627          if (moveRegs)
   1628             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
   1629          vexTags[preg] = 0;
   1630       } else {
   1631          /* register is non-empty */
   1632          if (moveRegs)
   1633             convert_f80le_to_f64le( &x87_state->reg[10*stno],
   1634                                     (UChar*)&vexRegs[preg] );
   1635          vexTags[preg] = 1;
   1636       }
   1637    }
   1638 
   1639    /* stack pointer */
   1640    vex_state->guest_FTOP = ftop;
   1641 
   1642    /* status word */
   1643    vex_state->guest_FC3210 = c3210;
   1644 
   1645    /* handle the control word, setting FPROUND and detecting any
   1646       emulation warnings. */
   1647    pair    = x86g_check_fldcw ( (UInt)fpucw );
   1648    fpround = (UInt)pair;
   1649    ew      = (VexEmNote)(pair >> 32);
   1650 
   1651    vex_state->guest_FPROUND = fpround & 3;
   1652 
   1653    /* emulation warnings --> caller */
   1654    return ew;
   1655 }
   1656 
   1657 
   1658 /* Create an x87 FPU state from the guest state, as close as
   1659    we can approximate it. */
   1660 static
   1661 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
   1662                   /*OUT*/Fpu_State* x87_state )
   1663 {
   1664    Int        i, stno, preg;
   1665    UInt       tagw;
   1666    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1667    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1668    UInt       ftop    = vex_state->guest_FTOP;
   1669    UInt       c3210   = vex_state->guest_FC3210;
   1670 
   1671    for (i = 0; i < 14; i++)
   1672       x87_state->env[i] = 0;
   1673 
   1674    x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
   1675       = x87_state->env[13] = 0xFFFF;
   1676    x87_state->env[FP_ENV_STAT]
   1677       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
   1678    x87_state->env[FP_ENV_CTRL]
   1679       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
   1680 
   1681    /* Dump the register stack in ST order. */
   1682    tagw = 0;
   1683    for (stno = 0; stno < 8; stno++) {
   1684       preg = (stno + ftop) & 7;
   1685       if (vexTags[preg] == 0) {
   1686          /* register is empty */
   1687          tagw |= (3 << (2*preg));
   1688          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1689                                  &x87_state->reg[10*stno] );
   1690       } else {
   1691          /* register is full. */
   1692          tagw |= (0 << (2*preg));
   1693          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1694                                  &x87_state->reg[10*stno] );
   1695       }
   1696    }
   1697    x87_state->env[FP_ENV_TAG] = toUShort(tagw);
   1698 }
   1699 
   1700 
   1701 /* CALLED FROM GENERATED CODE */
   1702 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1703 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
   1704 {
   1705    /* Somewhat roundabout, but at least it's simple. */
   1706    Fpu_State tmp;
   1707    UShort*   addrS = (UShort*)addr;
   1708    UChar*    addrC = (UChar*)addr;
   1709    U128*     xmm   = (U128*)(addr + 160);
   1710    UInt      mxcsr;
   1711    UShort    fp_tags;
   1712    UInt      summary_tags;
   1713    Int       r, stno;
   1714    UShort    *srcS, *dstS;
   1715 
   1716    do_get_x87( gst, &tmp );
   1717    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
   1718 
   1719    /* Now build the proper fxsave image from the x87 image we just
   1720       made. */
   1721 
   1722    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
   1723    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
   1724 
   1725    /* set addrS[2] in an endian-independent way */
   1726    summary_tags = 0;
   1727    fp_tags = tmp.env[FP_ENV_TAG];
   1728    for (r = 0; r < 8; r++) {
   1729       if ( ((fp_tags >> (2*r)) & 3) != 3 )
   1730          summary_tags |= (1 << r);
   1731    }
   1732    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
   1733    addrC[5]  = 0; /* pad */
   1734 
   1735    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
   1736    addrS[4]  = 0;
   1737    addrS[5]  = 0; /* FPU IP (bogus) */
   1738    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
   1739                      could conceivably dump %CS here) */
   1740 
   1741    addrS[7]  = 0; /* Intel reserved */
   1742 
   1743    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
   1744    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
   1745    addrS[10] = 0; /* segment selector for above operand pointer; %DS
   1746                      perhaps? */
   1747    addrS[11] = 0; /* Intel reserved */
   1748 
   1749    addrS[12] = toUShort(mxcsr);  /* MXCSR */
   1750    addrS[13] = toUShort(mxcsr >> 16);
   1751 
   1752    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
   1753    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
   1754 
   1755    /* Copy in the FP registers, in ST order. */
   1756    for (stno = 0; stno < 8; stno++) {
   1757       srcS = (UShort*)(&tmp.reg[10*stno]);
   1758       dstS = (UShort*)(&addrS[16 + 8*stno]);
   1759       dstS[0] = srcS[0];
   1760       dstS[1] = srcS[1];
   1761       dstS[2] = srcS[2];
   1762       dstS[3] = srcS[3];
   1763       dstS[4] = srcS[4];
   1764       dstS[5] = 0;
   1765       dstS[6] = 0;
   1766       dstS[7] = 0;
   1767    }
   1768 
   1769    /* That's the first 160 bytes of the image done.  Now only %xmm0
   1770       .. %xmm7 remain to be copied.  If the host is big-endian, these
   1771       need to be byte-swapped. */
   1772    vassert(host_is_little_endian());
   1773 
   1774 #  define COPY_U128(_dst,_src)                       \
   1775       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1776            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1777       while (0)
   1778 
   1779    COPY_U128( xmm[0], gst->guest_XMM0 );
   1780    COPY_U128( xmm[1], gst->guest_XMM1 );
   1781    COPY_U128( xmm[2], gst->guest_XMM2 );
   1782    COPY_U128( xmm[3], gst->guest_XMM3 );
   1783    COPY_U128( xmm[4], gst->guest_XMM4 );
   1784    COPY_U128( xmm[5], gst->guest_XMM5 );
   1785    COPY_U128( xmm[6], gst->guest_XMM6 );
   1786    COPY_U128( xmm[7], gst->guest_XMM7 );
   1787 
   1788 #  undef COPY_U128
   1789 }
   1790 
   1791 
   1792 /* CALLED FROM GENERATED CODE */
   1793 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1794 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
   1795 {
   1796    Fpu_State tmp;
   1797    VexEmNote warnX87 = EmNote_NONE;
   1798    VexEmNote warnXMM = EmNote_NONE;
   1799    UShort*   addrS   = (UShort*)addr;
   1800    UChar*    addrC   = (UChar*)addr;
   1801    U128*     xmm     = (U128*)(addr + 160);
   1802    UShort    fp_tags;
   1803    Int       r, stno, i;
   1804 
   1805    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
   1806       to be byte-swapped. */
   1807    vassert(host_is_little_endian());
   1808 
   1809 #  define COPY_U128(_dst,_src)                       \
   1810       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1811            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1812       while (0)
   1813 
   1814    COPY_U128( gst->guest_XMM0, xmm[0] );
   1815    COPY_U128( gst->guest_XMM1, xmm[1] );
   1816    COPY_U128( gst->guest_XMM2, xmm[2] );
   1817    COPY_U128( gst->guest_XMM3, xmm[3] );
   1818    COPY_U128( gst->guest_XMM4, xmm[4] );
   1819    COPY_U128( gst->guest_XMM5, xmm[5] );
   1820    COPY_U128( gst->guest_XMM6, xmm[6] );
   1821    COPY_U128( gst->guest_XMM7, xmm[7] );
   1822 
   1823 #  undef COPY_U128
   1824 
   1825    /* Copy the x87 registers out of the image, into a temporary
   1826       Fpu_State struct. */
   1827 
   1828    /* LLVM on Darwin turns the following loop into a movaps plus a
   1829       handful of scalar stores.  This would work fine except for the
   1830       fact that VEX doesn't keep the stack correctly (16-) aligned for
   1831       the call, so it segfaults.  Hence, split the loop into two
   1832       pieces (and pray LLVM doesn't merely glue them back together) so
   1833       it's composed only of scalar stores and so is alignment
   1834       insensitive.  Of course this is a kludge of the lamest kind --
   1835       VEX should be fixed properly. */
   1836    /* Code that seems to trigger the problem:
   1837       for (i = 0; i < 14; i++) tmp.env[i] = 0; */
   1838    for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
   1839    __asm__ __volatile__("" ::: "memory");
   1840    for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
   1841 
   1842    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
   1843    /* fill in tmp.reg[0..7] */
   1844    for (stno = 0; stno < 8; stno++) {
   1845       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
   1846       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
   1847       dstS[0] = srcS[0];
   1848       dstS[1] = srcS[1];
   1849       dstS[2] = srcS[2];
   1850       dstS[3] = srcS[3];
   1851       dstS[4] = srcS[4];
   1852    }
   1853    /* fill in tmp.env[0..13] */
   1854    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
   1855    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
   1856 
   1857    fp_tags = 0;
   1858    for (r = 0; r < 8; r++) {
   1859       if (addrC[4] & (1<<r))
   1860          fp_tags |= (0 << (2*r)); /* EMPTY */
   1861       else
   1862          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
   1863    }
   1864    tmp.env[FP_ENV_TAG] = fp_tags;
   1865 
   1866    /* Now write 'tmp' into the guest state. */
   1867    warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
   1868 
   1869    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
   1870                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
   1871      ULong w64 = x86g_check_ldmxcsr( w32 );
   1872 
   1873      warnXMM = (VexEmNote)(w64 >> 32);
   1874 
   1875      gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
   1876    }
   1877 
   1878    /* Prefer an X87 emwarn over an XMM one, if both exist. */
   1879    if (warnX87 != EmNote_NONE)
   1880       return warnX87;
   1881    else
   1882       return warnXMM;
   1883 }
   1884 
   1885 
   1886 /* CALLED FROM GENERATED CODE */
   1887 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1888 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
   1889 {
   1890    do_get_x87( gst, (Fpu_State*)addr );
   1891 }
   1892 
   1893 /* CALLED FROM GENERATED CODE */
   1894 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1895 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
   1896 {
   1897    return do_put_x87( True/*regs too*/, (Fpu_State*)addr, gst );
   1898 }
   1899 
   1900 /* CALLED FROM GENERATED CODE */
   1901 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1902 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
   1903 {
   1904    /* Somewhat roundabout, but at least it's simple. */
   1905    Int       i;
   1906    UShort*   addrP = (UShort*)addr;
   1907    Fpu_State tmp;
   1908    do_get_x87( gst, &tmp );
   1909    for (i = 0; i < 14; i++)
   1910       addrP[i] = tmp.env[i];
   1911 }
   1912 
   1913 /* CALLED FROM GENERATED CODE */
   1914 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1915 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
   1916 {
   1917    return do_put_x87( False/*don't move regs*/, (Fpu_State*)addr, gst);
   1918 }
   1919 
   1920 /* VISIBLE TO LIBVEX CLIENT */
   1921 /* Do x87 save from the supplied VexGuestX86State structure and store the
   1922    result at the given address which represents a buffer of at least 108
   1923    bytes. */
   1924 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
   1925                                /*OUT*/UChar* x87_state )
   1926 {
   1927    do_get_x87 ( vex_state, (Fpu_State*)x87_state );
   1928 }
   1929 
   1930 /* VISIBLE TO LIBVEX CLIENT */
   1931 /* Do x87 restore from the supplied address and store read values to the given
   1932    VexGuestX86State structure. */
   1933 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
   1934                                     /*MOD*/VexGuestX86State* vex_state )
   1935 {
   1936    return do_put_x87 ( True/*moveRegs*/, (Fpu_State*)x87_state, vex_state );
   1937 }
   1938 
   1939 /* VISIBLE TO LIBVEX CLIENT */
   1940 /* Return mxcsr from the supplied VexGuestX86State structure. */
   1941 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
   1942 {
   1943    return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
   1944 }
   1945 
   1946 /* VISIBLE TO LIBVEX CLIENT */
   1947 /* Modify the given VexGuestX86State structure according to the passed mxcsr
   1948    value. */
   1949 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
   1950                                       /*MOD*/VexGuestX86State* vex_state)
   1951 {
   1952    ULong w64 = x86g_check_ldmxcsr( mxcsr );
   1953    vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
   1954    return (VexEmNote)(w64 >> 32);
   1955 }
   1956 
   1957 /*---------------------------------------------------------------*/
   1958 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
   1959 /*---------------------------------------------------------------*/
   1960 
   1961 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1962 /* Calculate both flags and value result for rotate right
   1963    through the carry bit.  Result in low 32 bits,
   1964    new flags (OSZACP) in high 32 bits.
   1965 */
   1966 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   1967 {
   1968    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   1969 
   1970    switch (sz) {
   1971       case 4:
   1972          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1973          of        = ((arg >> 31) ^ cf) & 1;
   1974          while (tempCOUNT > 0) {
   1975             tempcf = arg & 1;
   1976             arg    = (arg >> 1) | (cf << 31);
   1977             cf     = tempcf;
   1978             tempCOUNT--;
   1979          }
   1980          break;
   1981       case 2:
   1982          while (tempCOUNT >= 17) tempCOUNT -= 17;
   1983          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1984          of        = ((arg >> 15) ^ cf) & 1;
   1985          while (tempCOUNT > 0) {
   1986             tempcf = arg & 1;
   1987             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
   1988             cf     = tempcf;
   1989             tempCOUNT--;
   1990          }
   1991          break;
   1992       case 1:
   1993          while (tempCOUNT >= 9) tempCOUNT -= 9;
   1994          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   1995          of        = ((arg >> 7) ^ cf) & 1;
   1996          while (tempCOUNT > 0) {
   1997             tempcf = arg & 1;
   1998             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
   1999             cf     = tempcf;
   2000             tempCOUNT--;
   2001          }
   2002          break;
   2003       default:
   2004          vpanic("calculate_RCR: invalid size");
   2005    }
   2006 
   2007    cf &= 1;
   2008    of &= 1;
   2009    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   2010    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   2011 
   2012    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   2013 }
   2014 
   2015 
   2016 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2017 /* Calculate both flags and value result for rotate left
   2018    through the carry bit.  Result in low 32 bits,
   2019    new flags (OSZACP) in high 32 bits.
   2020 */
   2021 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
   2022 {
   2023    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
   2024 
   2025    switch (sz) {
   2026       case 4:
   2027          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   2028          while (tempCOUNT > 0) {
   2029             tempcf = (arg >> 31) & 1;
   2030             arg    = (arg << 1) | (cf & 1);
   2031             cf     = tempcf;
   2032             tempCOUNT--;
   2033          }
   2034          of = ((arg >> 31) ^ cf) & 1;
   2035          break;
   2036       case 2:
   2037          while (tempCOUNT >= 17) tempCOUNT -= 17;
   2038          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   2039          while (tempCOUNT > 0) {
   2040             tempcf = (arg >> 15) & 1;
   2041             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
   2042             cf     = tempcf;
   2043             tempCOUNT--;
   2044          }
   2045          of = ((arg >> 15) ^ cf) & 1;
   2046          break;
   2047       case 1:
   2048          while (tempCOUNT >= 9) tempCOUNT -= 9;
   2049          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
   2050          while (tempCOUNT > 0) {
   2051             tempcf = (arg >> 7) & 1;
   2052             arg    = 0xFF & ((arg << 1) | (cf & 1));
   2053             cf     = tempcf;
   2054             tempCOUNT--;
   2055          }
   2056          of = ((arg >> 7) ^ cf) & 1;
   2057          break;
   2058       default:
   2059          vpanic("calculate_RCL: invalid size");
   2060    }
   2061 
   2062    cf &= 1;
   2063    of &= 1;
   2064    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
   2065    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
   2066 
   2067    return (((ULong)eflags_in) << 32) | ((ULong)arg);
   2068 }
   2069 
   2070 
   2071 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2072 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
   2073    AX value in low half of arg, OSZACP in upper half.
   2074    See guest-x86/toIR.c usage point for details.
   2075 */
   2076 static UInt calc_parity_8bit ( UInt w32 ) {
   2077    UInt i;
   2078    UInt p = 1;
   2079    for (i = 0; i < 8; i++)
   2080       p ^= (1 & (w32 >> i));
   2081    return p;
   2082 }
   2083 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
   2084 {
   2085    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   2086    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2087    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2088    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2089    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2090    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2091    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2092    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2093    UInt result = 0;
   2094 
   2095    switch (opcode) {
   2096       case 0x27: { /* DAA */
   2097          UInt old_AL = r_AL;
   2098          UInt old_C  = r_C;
   2099          r_C = 0;
   2100          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2101             r_AL = r_AL + 6;
   2102             r_C  = old_C;
   2103             if (r_AL >= 0x100) r_C = 1;
   2104             r_A = 1;
   2105          } else {
   2106             r_A = 0;
   2107          }
   2108          if (old_AL > 0x99 || old_C == 1) {
   2109             r_AL = r_AL + 0x60;
   2110             r_C  = 1;
   2111          } else {
   2112             r_C = 0;
   2113          }
   2114          /* O is undefined.  S Z and P are set according to the
   2115 	    result. */
   2116          r_AL &= 0xFF;
   2117          r_O = 0; /* let's say */
   2118          r_S = (r_AL & 0x80) ? 1 : 0;
   2119          r_Z = (r_AL == 0) ? 1 : 0;
   2120          r_P = calc_parity_8bit( r_AL );
   2121          break;
   2122       }
   2123       case 0x2F: { /* DAS */
   2124          UInt old_AL = r_AL;
   2125          UInt old_C  = r_C;
   2126          r_C = 0;
   2127          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2128             Bool borrow = r_AL < 6;
   2129             r_AL = r_AL - 6;
   2130             r_C  = old_C;
   2131             if (borrow) r_C = 1;
   2132             r_A = 1;
   2133          } else {
   2134             r_A = 0;
   2135          }
   2136          if (old_AL > 0x99 || old_C == 1) {
   2137             r_AL = r_AL - 0x60;
   2138             r_C  = 1;
   2139          } else {
   2140             /* Intel docs are wrong: r_C = 0; */
   2141          }
   2142          /* O is undefined.  S Z and P are set according to the
   2143 	    result. */
   2144          r_AL &= 0xFF;
   2145          r_O = 0; /* let's say */
   2146          r_S = (r_AL & 0x80) ? 1 : 0;
   2147          r_Z = (r_AL == 0) ? 1 : 0;
   2148          r_P = calc_parity_8bit( r_AL );
   2149          break;
   2150       }
   2151       case 0x37: { /* AAA */
   2152          Bool nudge = r_AL > 0xF9;
   2153          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2154             r_AL = r_AL + 6;
   2155             r_AH = r_AH + 1 + (nudge ? 1 : 0);
   2156             r_A  = 1;
   2157             r_C  = 1;
   2158             r_AL = r_AL & 0xF;
   2159          } else {
   2160             r_A  = 0;
   2161             r_C  = 0;
   2162             r_AL = r_AL & 0xF;
   2163          }
   2164          /* O S Z and P are undefined. */
   2165          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2166          break;
   2167       }
   2168       case 0x3F: { /* AAS */
   2169          Bool nudge = r_AL < 0x06;
   2170          if ((r_AL & 0xF) > 9 || r_A == 1) {
   2171             r_AL = r_AL - 6;
   2172             r_AH = r_AH - 1 - (nudge ? 1 : 0);
   2173             r_A  = 1;
   2174             r_C  = 1;
   2175             r_AL = r_AL & 0xF;
   2176          } else {
   2177             r_A  = 0;
   2178             r_C  = 0;
   2179             r_AL = r_AL & 0xF;
   2180          }
   2181          /* O S Z and P are undefined. */
   2182          r_O = r_S = r_Z = r_P = 0; /* let's say */
   2183          break;
   2184       }
   2185       default:
   2186          vassert(0);
   2187    }
   2188    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2189             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2190             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2191             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2192             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2193             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2194             | ( (r_AH & 0xFF) << 8 )
   2195             | ( (r_AL & 0xFF) << 0 );
   2196    return result;
   2197 }
   2198 
   2199 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
   2200 {
   2201    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
   2202    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
   2203    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
   2204    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
   2205    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
   2206    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
   2207    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
   2208    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
   2209    UInt result = 0;
   2210 
   2211    switch (opcode) {
   2212       case 0xD4: { /* AAM */
   2213          r_AH = r_AL / 10;
   2214          r_AL = r_AL % 10;
   2215          break;
   2216       }
   2217       case 0xD5: { /* AAD */
   2218          r_AL = ((r_AH * 10) + r_AL) & 0xff;
   2219          r_AH = 0;
   2220          break;
   2221       }
   2222       default:
   2223          vassert(0);
   2224    }
   2225 
   2226    r_O = 0; /* let's say (undefined) */
   2227    r_C = 0; /* let's say (undefined) */
   2228    r_A = 0; /* let's say (undefined) */
   2229    r_S = (r_AL & 0x80) ? 1 : 0;
   2230    r_Z = (r_AL == 0) ? 1 : 0;
   2231    r_P = calc_parity_8bit( r_AL );
   2232 
   2233    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
   2234             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
   2235             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
   2236             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
   2237             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
   2238             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
   2239             | ( (r_AH & 0xFF) << 8 )
   2240             | ( (r_AL & 0xFF) << 0 );
   2241    return result;
   2242 }
   2243 
   2244 
   2245 /* CALLED FROM GENERATED CODE */
   2246 /* DIRTY HELPER (non-referentially-transparent) */
   2247 /* Horrible hack.  On non-x86 platforms, return 1. */
   2248 ULong x86g_dirtyhelper_RDTSC ( void )
   2249 {
   2250 #  if defined(__i386__)
   2251    ULong res;
   2252    __asm__ __volatile__("rdtsc" : "=A" (res));
   2253    return res;
   2254 #  else
   2255    return 1ULL;
   2256 #  endif
   2257 }
   2258 
   2259 
   2260 /* CALLED FROM GENERATED CODE */
   2261 /* DIRTY HELPER (modifies guest state) */
   2262 /* Claim to be a P55C (Intel Pentium/MMX) */
   2263 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
   2264 {
   2265    switch (st->guest_EAX) {
   2266       case 0:
   2267          st->guest_EAX = 0x1;
   2268          st->guest_EBX = 0x756e6547;
   2269          st->guest_ECX = 0x6c65746e;
   2270          st->guest_EDX = 0x49656e69;
   2271          break;
   2272       default:
   2273          st->guest_EAX = 0x543;
   2274          st->guest_EBX = 0x0;
   2275          st->guest_ECX = 0x0;
   2276          st->guest_EDX = 0x8001bf;
   2277          break;
   2278    }
   2279 }
   2280 
   2281 /* CALLED FROM GENERATED CODE */
   2282 /* DIRTY HELPER (modifies guest state) */
   2283 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
   2284 /* But without 3DNow support (weird, but we really don't support it). */
   2285 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
   2286 {
   2287    switch (st->guest_EAX) {
   2288       /* vendor ID */
   2289       case 0:
   2290          st->guest_EAX = 0x1;
   2291          st->guest_EBX = 0x68747541;
   2292          st->guest_ECX = 0x444d4163;
   2293          st->guest_EDX = 0x69746e65;
   2294          break;
   2295       /* feature bits */
   2296       case 1:
   2297          st->guest_EAX = 0x621;
   2298          st->guest_EBX = 0x0;
   2299          st->guest_ECX = 0x0;
   2300          st->guest_EDX = 0x183f9ff;
   2301          break;
   2302       /* Highest Extended Function Supported (0x80000004 brand string) */
   2303       case 0x80000000:
   2304          st->guest_EAX = 0x80000004;
   2305          st->guest_EBX = 0x68747541;
   2306          st->guest_ECX = 0x444d4163;
   2307          st->guest_EDX = 0x69746e65;
   2308          break;
   2309       /* Extended Processor Info and Feature Bits */
   2310       case 0x80000001:
   2311          st->guest_EAX = 0x721;
   2312          st->guest_EBX = 0x0;
   2313          st->guest_ECX = 0x0;
   2314          st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
   2315          break;
   2316       /* Processor Brand String "AMD Athlon(tm) Processor" */
   2317       case 0x80000002:
   2318          st->guest_EAX = 0x20444d41;
   2319          st->guest_EBX = 0x6c687441;
   2320          st->guest_ECX = 0x74286e6f;
   2321          st->guest_EDX = 0x5020296d;
   2322          break;
   2323       case 0x80000003:
   2324          st->guest_EAX = 0x65636f72;
   2325          st->guest_EBX = 0x726f7373;
   2326          st->guest_ECX = 0x0;
   2327          st->guest_EDX = 0x0;
   2328          break;
   2329       default:
   2330          st->guest_EAX = 0x0;
   2331          st->guest_EBX = 0x0;
   2332          st->guest_ECX = 0x0;
   2333          st->guest_EDX = 0x0;
   2334          break;
   2335    }
   2336 }
   2337 
   2338 /* CALLED FROM GENERATED CODE */
   2339 /* DIRTY HELPER (modifies guest state) */
   2340 /* Claim to be the following SSE1-capable CPU:
   2341    vendor_id       : GenuineIntel
   2342    cpu family      : 6
   2343    model           : 11
   2344    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
   2345    stepping        : 1
   2346    cpu MHz         : 1131.013
   2347    cache size      : 512 KB
   2348 */
   2349 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
   2350 {
   2351    switch (st->guest_EAX) {
   2352       case 0:
   2353          st->guest_EAX = 0x00000002;
   2354          st->guest_EBX = 0x756e6547;
   2355          st->guest_ECX = 0x6c65746e;
   2356          st->guest_EDX = 0x49656e69;
   2357          break;
   2358       case 1:
   2359          st->guest_EAX = 0x000006b1;
   2360          st->guest_EBX = 0x00000004;
   2361          st->guest_ECX = 0x00000000;
   2362          st->guest_EDX = 0x0383fbff;
   2363          break;
   2364       default:
   2365          st->guest_EAX = 0x03020101;
   2366          st->guest_EBX = 0x00000000;
   2367          st->guest_ECX = 0x00000000;
   2368          st->guest_EDX = 0x0c040883;
   2369          break;
   2370    }
   2371 }
   2372 
   2373 /* Claim to be the following SSE2-capable CPU:
   2374    vendor_id    : GenuineIntel
   2375    cpu family   : 15
   2376    model        : 2
   2377    model name   : Intel(R) Pentium(R) 4 CPU 3.00GHz
   2378    stepping     : 9
   2379    microcode    : 0x17
   2380    cpu MHz      : 2992.577
   2381    cache size   : 512 KB
   2382    flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
   2383                   pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
   2384                    pebs bts cid xtpr
   2385    clflush size : 64
   2386    cache_alignment : 128
   2387    address sizes : 36 bits physical, 32 bits virtual
   2388 */
   2389 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
   2390 {
   2391    switch (st->guest_EAX) {
   2392       case 0:
   2393          st->guest_EAX = 0x00000002;
   2394          st->guest_EBX = 0x756e6547;
   2395          st->guest_ECX = 0x6c65746e;
   2396          st->guest_EDX = 0x49656e69;
   2397          break;
   2398       case 1:
   2399          st->guest_EAX = 0x00000f29;
   2400          st->guest_EBX = 0x01020809;
   2401          st->guest_ECX = 0x00004400;
   2402          st->guest_EDX = 0xbfebfbff;
   2403          break;
   2404       default:
   2405          st->guest_EAX = 0x03020101;
   2406          st->guest_EBX = 0x00000000;
   2407          st->guest_ECX = 0x00000000;
   2408          st->guest_EDX = 0x0c040883;
   2409          break;
   2410    }
   2411 }
   2412 
   2413 /* Claim to be the following SSSE3-capable CPU (2 x ...):
   2414    vendor_id       : GenuineIntel
   2415    cpu family      : 6
   2416    model           : 15
   2417    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
   2418    stepping        : 6
   2419    cpu MHz         : 2394.000
   2420    cache size      : 4096 KB
   2421    physical id     : 0
   2422    siblings        : 2
   2423    core id         : 0
   2424    cpu cores       : 2
   2425    fpu             : yes
   2426    fpu_exception   : yes
   2427    cpuid level     : 10
   2428    wp              : yes
   2429    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2430                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2431                      mmx fxsr sse sse2 ss ht tm syscall nx lm
   2432                      constant_tsc pni monitor ds_cpl vmx est tm2
   2433                      cx16 xtpr lahf_lm
   2434    bogomips        : 4798.78
   2435    clflush size    : 64
   2436    cache_alignment : 64
   2437    address sizes   : 36 bits physical, 48 bits virtual
   2438    power management:
   2439 */
   2440 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
   2441 {
   2442 #  define SET_ABCD(_a,_b,_c,_d)               \
   2443       do { st->guest_EAX = (UInt)(_a);        \
   2444            st->guest_EBX = (UInt)(_b);        \
   2445            st->guest_ECX = (UInt)(_c);        \
   2446            st->guest_EDX = (UInt)(_d);        \
   2447       } while (0)
   2448 
   2449    switch (st->guest_EAX) {
   2450       case 0x00000000:
   2451          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
   2452          break;
   2453       case 0x00000001:
   2454          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
   2455          break;
   2456       case 0x00000002:
   2457          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
   2458          break;
   2459       case 0x00000003:
   2460          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2461          break;
   2462       case 0x00000004: {
   2463          switch (st->guest_ECX) {
   2464             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
   2465                                       0x0000003f, 0x00000001); break;
   2466             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
   2467                                       0x0000003f, 0x00000001); break;
   2468             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
   2469                                       0x00000fff, 0x00000001); break;
   2470             default:         SET_ABCD(0x00000000, 0x00000000,
   2471                                       0x00000000, 0x00000000); break;
   2472          }
   2473          break;
   2474       }
   2475       case 0x00000005:
   2476          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
   2477          break;
   2478       case 0x00000006:
   2479          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
   2480          break;
   2481       case 0x00000007:
   2482          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2483          break;
   2484       case 0x00000008:
   2485          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
   2486          break;
   2487       case 0x00000009:
   2488          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2489          break;
   2490       case 0x0000000a:
   2491       unhandled_eax_value:
   2492          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
   2493          break;
   2494       case 0x80000000:
   2495          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2496          break;
   2497       case 0x80000001:
   2498          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
   2499          break;
   2500       case 0x80000002:
   2501          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2502          break;
   2503       case 0x80000003:
   2504          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
   2505          break;
   2506       case 0x80000004:
   2507          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
   2508          break;
   2509       case 0x80000005:
   2510          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2511          break;
   2512       case 0x80000006:
   2513          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
   2514          break;
   2515       case 0x80000007:
   2516          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2517          break;
   2518       case 0x80000008:
   2519          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2520          break;
   2521       default:
   2522          goto unhandled_eax_value;
   2523    }
   2524 #  undef SET_ABCD
   2525 }
   2526 
   2527 
   2528 /* CALLED FROM GENERATED CODE */
   2529 /* DIRTY HELPER (non-referentially-transparent) */
   2530 /* Horrible hack.  On non-x86 platforms, return 0. */
   2531 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
   2532 {
   2533 #  if defined(__i386__)
   2534    UInt r = 0;
   2535    portno &= 0xFFFF;
   2536    switch (sz) {
   2537       case 4:
   2538          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
   2539                               : "=a" (r) : "Nd" (portno));
   2540 	 break;
   2541       case 2:
   2542          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
   2543                               : "=a" (r) : "Nd" (portno));
   2544 	 break;
   2545       case 1:
   2546          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
   2547                               : "=a" (r) : "Nd" (portno));
   2548 	 break;
   2549       default:
   2550          break;
   2551    }
   2552    return r;
   2553 #  else
   2554    return 0;
   2555 #  endif
   2556 }
   2557 
   2558 
   2559 /* CALLED FROM GENERATED CODE */
   2560 /* DIRTY HELPER (non-referentially-transparent) */
   2561 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2562 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
   2563 {
   2564 #  if defined(__i386__)
   2565    portno &= 0xFFFF;
   2566    switch (sz) {
   2567       case 4:
   2568          __asm__ __volatile__("outl %0, %w1"
   2569                               : : "a" (data), "Nd" (portno));
   2570 	 break;
   2571       case 2:
   2572          __asm__ __volatile__("outw %w0, %w1"
   2573                               : : "a" (data), "Nd" (portno));
   2574 	 break;
   2575       case 1:
   2576          __asm__ __volatile__("outb %b0, %w1"
   2577                               : : "a" (data), "Nd" (portno));
   2578 	 break;
   2579       default:
   2580          break;
   2581    }
   2582 #  else
   2583    /* do nothing */
   2584 #  endif
   2585 }
   2586 
   2587 /* CALLED FROM GENERATED CODE */
   2588 /* DIRTY HELPER (non-referentially-transparent) */
   2589 /* Horrible hack.  On non-x86 platforms, do nothing. */
   2590 /* op = 0: call the native SGDT instruction.
   2591    op = 1: call the native SIDT instruction.
   2592 */
   2593 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
   2594 #  if defined(__i386__)
   2595    switch (op) {
   2596       case 0:
   2597          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
   2598          break;
   2599       case 1:
   2600          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
   2601          break;
   2602       default:
   2603          vpanic("x86g_dirtyhelper_SxDT");
   2604    }
   2605 #  else
   2606    /* do nothing */
   2607    UChar* p = (UChar*)address;
   2608    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
   2609 #  endif
   2610 }
   2611 
   2612 /*---------------------------------------------------------------*/
   2613 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
   2614 /*---------------------------------------------------------------*/
   2615 
   2616 static inline UChar abdU8 ( UChar xx, UChar yy ) {
   2617    return toUChar(xx>yy ? xx-yy : yy-xx);
   2618 }
   2619 
   2620 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
   2621    return (((ULong)w1) << 32) | ((ULong)w0);
   2622 }
   2623 
   2624 static inline UShort sel16x4_3 ( ULong w64 ) {
   2625    UInt hi32 = toUInt(w64 >> 32);
   2626    return toUShort(hi32 >> 16);
   2627 }
   2628 static inline UShort sel16x4_2 ( ULong w64 ) {
   2629    UInt hi32 = toUInt(w64 >> 32);
   2630    return toUShort(hi32);
   2631 }
   2632 static inline UShort sel16x4_1 ( ULong w64 ) {
   2633    UInt lo32 = toUInt(w64);
   2634    return toUShort(lo32 >> 16);
   2635 }
   2636 static inline UShort sel16x4_0 ( ULong w64 ) {
   2637    UInt lo32 = toUInt(w64);
   2638    return toUShort(lo32);
   2639 }
   2640 
   2641 static inline UChar sel8x8_7 ( ULong w64 ) {
   2642    UInt hi32 = toUInt(w64 >> 32);
   2643    return toUChar(hi32 >> 24);
   2644 }
   2645 static inline UChar sel8x8_6 ( ULong w64 ) {
   2646    UInt hi32 = toUInt(w64 >> 32);
   2647    return toUChar(hi32 >> 16);
   2648 }
   2649 static inline UChar sel8x8_5 ( ULong w64 ) {
   2650    UInt hi32 = toUInt(w64 >> 32);
   2651    return toUChar(hi32 >> 8);
   2652 }
   2653 static inline UChar sel8x8_4 ( ULong w64 ) {
   2654    UInt hi32 = toUInt(w64 >> 32);
   2655    return toUChar(hi32 >> 0);
   2656 }
   2657 static inline UChar sel8x8_3 ( ULong w64 ) {
   2658    UInt lo32 = toUInt(w64);
   2659    return toUChar(lo32 >> 24);
   2660 }
   2661 static inline UChar sel8x8_2 ( ULong w64 ) {
   2662    UInt lo32 = toUInt(w64);
   2663    return toUChar(lo32 >> 16);
   2664 }
   2665 static inline UChar sel8x8_1 ( ULong w64 ) {
   2666    UInt lo32 = toUInt(w64);
   2667    return toUChar(lo32 >> 8);
   2668 }
   2669 static inline UChar sel8x8_0 ( ULong w64 ) {
   2670    UInt lo32 = toUInt(w64);
   2671    return toUChar(lo32 >> 0);
   2672 }
   2673 
   2674 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2675 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
   2676 {
   2677    return
   2678       mk32x2(
   2679          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
   2680             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
   2681          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
   2682             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
   2683       );
   2684 }
   2685 
   2686 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2687 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
   2688 {
   2689    UInt t = 0;
   2690    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
   2691    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
   2692    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
   2693    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
   2694    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
   2695    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
   2696    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
   2697    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
   2698    t &= 0xFFFF;
   2699    return (ULong)t;
   2700 }
   2701 
   2702 
   2703 /*---------------------------------------------------------------*/
   2704 /*--- Helpers for dealing with segment overrides.             ---*/
   2705 /*---------------------------------------------------------------*/
   2706 
   2707 static inline
   2708 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
   2709 {
   2710    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
   2711    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
   2712    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
   2713    return (hi << 24) | (mid << 16) | lo;
   2714 }
   2715 
   2716 static inline
   2717 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
   2718 {
   2719     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
   2720     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
   2721     UInt limit = (hi << 16) | lo;
   2722     if (ent->LdtEnt.Bits.Granularity)
   2723        limit = (limit << 12) | 0xFFF;
   2724     return limit;
   2725 }
   2726 
   2727 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2728 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
   2729                               UInt seg_selector, UInt virtual_addr )
   2730 {
   2731    UInt tiBit, base, limit;
   2732    VexGuestX86SegDescr* the_descrs;
   2733 
   2734    Bool verboze = False;
   2735 
   2736    /* If this isn't true, we're in Big Trouble. */
   2737    vassert(8 == sizeof(VexGuestX86SegDescr));
   2738 
   2739    if (verboze)
   2740       vex_printf("x86h_use_seg_selector: "
   2741                  "seg_selector = 0x%x, vaddr = 0x%x\n",
   2742                  seg_selector, virtual_addr);
   2743 
   2744    /* Check for wildly invalid selector. */
   2745    if (seg_selector & ~0xFFFF)
   2746       goto bad;
   2747 
   2748    seg_selector &= 0x0000FFFF;
   2749 
   2750    /* Sanity check the segment selector.  Ensure that RPL=11b (least
   2751       privilege).  This forms the bottom 2 bits of the selector. */
   2752    if ((seg_selector & 3) != 3)
   2753       goto bad;
   2754 
   2755    /* Extract the TI bit (0 means GDT, 1 means LDT) */
   2756    tiBit = (seg_selector >> 2) & 1;
   2757 
   2758    /* Convert the segment selector onto a table index */
   2759    seg_selector >>= 3;
   2760    vassert(seg_selector >= 0 && seg_selector < 8192);
   2761 
   2762    if (tiBit == 0) {
   2763 
   2764       /* GDT access. */
   2765       /* Do we actually have a GDT to look at? */
   2766       if (gdt == 0)
   2767          goto bad;
   2768 
   2769       /* Check for access to non-existent entry. */
   2770       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
   2771          goto bad;
   2772 
   2773       the_descrs = (VexGuestX86SegDescr*)gdt;
   2774       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2775       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2776 
   2777    } else {
   2778 
   2779       /* All the same stuff, except for the LDT. */
   2780       if (ldt == 0)
   2781          goto bad;
   2782 
   2783       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
   2784          goto bad;
   2785 
   2786       the_descrs = (VexGuestX86SegDescr*)ldt;
   2787       base  = get_segdescr_base (&the_descrs[seg_selector]);
   2788       limit = get_segdescr_limit(&the_descrs[seg_selector]);
   2789 
   2790    }
   2791 
   2792    /* Do the limit check.  Note, this check is just slightly too
   2793       slack.  Really it should be "if (virtual_addr + size - 1 >=
   2794       limit)," but we don't have the size info to hand.  Getting it
   2795       could be significantly complex.  */
   2796    if (virtual_addr >= limit)
   2797       goto bad;
   2798 
   2799    if (verboze)
   2800       vex_printf("x86h_use_seg_selector: "
   2801                  "base = 0x%x, addr = 0x%x\n",
   2802                  base, base + virtual_addr);
   2803 
   2804    /* High 32 bits are zero, indicating success. */
   2805    return (ULong)( ((UInt)virtual_addr) + base );
   2806 
   2807  bad:
   2808    return 1ULL << 32;
   2809 }
   2810 
   2811 
   2812 /*---------------------------------------------------------------*/
   2813 /*--- Helpers for dealing with, and describing,               ---*/
   2814 /*--- guest state as a whole.                                 ---*/
   2815 /*---------------------------------------------------------------*/
   2816 
   2817 /* Initialise the entire x86 guest state. */
   2818 /* VISIBLE TO LIBVEX CLIENT */
   2819 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
   2820 {
   2821    vex_state->host_EvC_FAILADDR = 0;
   2822    vex_state->host_EvC_COUNTER = 0;
   2823 
   2824    vex_state->guest_EAX = 0;
   2825    vex_state->guest_ECX = 0;
   2826    vex_state->guest_EDX = 0;
   2827    vex_state->guest_EBX = 0;
   2828    vex_state->guest_ESP = 0;
   2829    vex_state->guest_EBP = 0;
   2830    vex_state->guest_ESI = 0;
   2831    vex_state->guest_EDI = 0;
   2832 
   2833    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
   2834    vex_state->guest_CC_DEP1 = 0;
   2835    vex_state->guest_CC_DEP2 = 0;
   2836    vex_state->guest_CC_NDEP = 0;
   2837    vex_state->guest_DFLAG   = 1; /* forwards */
   2838    vex_state->guest_IDFLAG  = 0;
   2839    vex_state->guest_ACFLAG  = 0;
   2840 
   2841    vex_state->guest_EIP = 0;
   2842 
   2843    /* Initialise the simulated FPU */
   2844    x86g_dirtyhelper_FINIT( vex_state );
   2845 
   2846    /* Initialse the SSE state. */
   2847 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
   2848 
   2849    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
   2850    SSEZERO(vex_state->guest_XMM0);
   2851    SSEZERO(vex_state->guest_XMM1);
   2852    SSEZERO(vex_state->guest_XMM2);
   2853    SSEZERO(vex_state->guest_XMM3);
   2854    SSEZERO(vex_state->guest_XMM4);
   2855    SSEZERO(vex_state->guest_XMM5);
   2856    SSEZERO(vex_state->guest_XMM6);
   2857    SSEZERO(vex_state->guest_XMM7);
   2858 
   2859 #  undef SSEZERO
   2860 
   2861    vex_state->guest_CS  = 0;
   2862    vex_state->guest_DS  = 0;
   2863    vex_state->guest_ES  = 0;
   2864    vex_state->guest_FS  = 0;
   2865    vex_state->guest_GS  = 0;
   2866    vex_state->guest_SS  = 0;
   2867    vex_state->guest_LDT = 0;
   2868    vex_state->guest_GDT = 0;
   2869 
   2870    vex_state->guest_EMNOTE = EmNote_NONE;
   2871 
   2872    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
   2873    vex_state->guest_CMSTART = 0;
   2874    vex_state->guest_CMLEN   = 0;
   2875 
   2876    vex_state->guest_NRADDR   = 0;
   2877    vex_state->guest_SC_CLASS = 0;
   2878    vex_state->guest_IP_AT_SYSCALL = 0;
   2879 
   2880    vex_state->padding1 = 0;
   2881    vex_state->padding2 = 0;
   2882    vex_state->padding3 = 0;
   2883 }
   2884 
   2885 
   2886 /* Figure out if any part of the guest state contained in minoff
   2887    .. maxoff requires precise memory exceptions.  If in doubt return
   2888    True (but this generates significantly slower code).
   2889 
   2890    By default we enforce precise exns for guest %ESP, %EBP and %EIP
   2891    only.  These are the minimum needed to extract correct stack
   2892    backtraces from x86 code.
   2893 
   2894    Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
   2895 */
   2896 Bool guest_x86_state_requires_precise_mem_exns (
   2897         Int minoff, Int maxoff, VexRegisterUpdates pxControl
   2898      )
   2899 {
   2900    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
   2901    Int ebp_max = ebp_min + 4 - 1;
   2902    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
   2903    Int esp_max = esp_min + 4 - 1;
   2904    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
   2905    Int eip_max = eip_min + 4 - 1;
   2906 
   2907    if (maxoff < esp_min || minoff > esp_max) {
   2908       /* no overlap with esp */
   2909       if (pxControl == VexRegUpdSpAtMemAccess)
   2910          return False; // We only need to check stack pointer.
   2911    } else {
   2912       return True;
   2913    }
   2914 
   2915    if (maxoff < ebp_min || minoff > ebp_max) {
   2916       /* no overlap with ebp */
   2917    } else {
   2918       return True;
   2919    }
   2920 
   2921    if (maxoff < eip_min || minoff > eip_max) {
   2922       /* no overlap with eip */
   2923    } else {
   2924       return True;
   2925    }
   2926 
   2927    return False;
   2928 }
   2929 
   2930 
   2931 #define ALWAYSDEFD(field)                           \
   2932     { offsetof(VexGuestX86State, field),            \
   2933       (sizeof ((VexGuestX86State*)0)->field) }
   2934 
   2935 VexGuestLayout
   2936    x86guest_layout
   2937       = {
   2938           /* Total size of the guest state, in bytes. */
   2939           .total_sizeB = sizeof(VexGuestX86State),
   2940 
   2941           /* Describe the stack pointer. */
   2942           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
   2943           .sizeof_SP = 4,
   2944 
   2945           /* Describe the frame pointer. */
   2946           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
   2947           .sizeof_FP = 4,
   2948 
   2949           /* Describe the instruction pointer. */
   2950           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
   2951           .sizeof_IP = 4,
   2952 
   2953           /* Describe any sections to be regarded by Memcheck as
   2954              'always-defined'. */
   2955           .n_alwaysDefd = 24,
   2956 
   2957           /* flags thunk: OP and NDEP are always defd, whereas DEP1
   2958              and DEP2 have to be tracked.  See detailed comment in
   2959              gdefs.h on meaning of thunk fields. */
   2960           .alwaysDefd
   2961              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
   2962                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
   2963                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
   2964                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
   2965                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
   2966                  /*  5 */ ALWAYSDEFD(guest_EIP),
   2967                  /*  6 */ ALWAYSDEFD(guest_FTOP),
   2968                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
   2969                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
   2970                  /*  9 */ ALWAYSDEFD(guest_FC3210),
   2971                  /* 10 */ ALWAYSDEFD(guest_CS),
   2972                  /* 11 */ ALWAYSDEFD(guest_DS),
   2973                  /* 12 */ ALWAYSDEFD(guest_ES),
   2974                  /* 13 */ ALWAYSDEFD(guest_FS),
   2975                  /* 14 */ ALWAYSDEFD(guest_GS),
   2976                  /* 15 */ ALWAYSDEFD(guest_SS),
   2977                  /* 16 */ ALWAYSDEFD(guest_LDT),
   2978                  /* 17 */ ALWAYSDEFD(guest_GDT),
   2979                  /* 18 */ ALWAYSDEFD(guest_EMNOTE),
   2980                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
   2981                  /* 20 */ ALWAYSDEFD(guest_CMSTART),
   2982                  /* 21 */ ALWAYSDEFD(guest_CMLEN),
   2983                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
   2984                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
   2985                }
   2986         };
   2987 
   2988 
   2989 /*---------------------------------------------------------------*/
   2990 /*--- end                                 guest_x86_helpers.c ---*/
   2991 /*---------------------------------------------------------------*/
   2992