Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                             guest_amd64_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2010 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_emwarn.h"
     38 #include "libvex_guest_amd64.h"
     39 #include "libvex_ir.h"
     40 #include "libvex.h"
     41 
     42 #include "main_util.h"
     43 #include "guest_generic_bb_to_IR.h"
     44 #include "guest_amd64_defs.h"
     45 #include "guest_generic_x87.h"
     46 
     47 
     48 /* This file contains helper functions for amd64 guest code.
     49    Calls to these functions are generated by the back end.
     50    These calls are of course in the host machine code and
     51    this file will be compiled to host machine code, so that
     52    all makes sense.
     53 
     54    Only change the signatures of these helper functions very
     55    carefully.  If you change the signature here, you'll have to change
     56    the parameters passed to it in the IR calls constructed by
     57    guest-amd64/toIR.c.
     58 
     59    The convention used is that all functions called from generated
     60    code are named amd64g_<something>, and any function whose name lacks
     61    that prefix is not called from generated code.  Note that some
     62    LibVEX_* functions can however be called by VEX's client, but that
     63    is not the same as calling them from VEX-generated code.
     64 */
     65 
     66 
     67 /* Set to 1 to get detailed profiling info about use of the flag
     68    machinery. */
     69 #define PROFILE_RFLAGS 0
     70 
     71 
     72 /*---------------------------------------------------------------*/
     73 /*--- %rflags run-time helpers.                               ---*/
     74 /*---------------------------------------------------------------*/
     75 
     76 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
     77    after imulq/mulq. */
     78 
     79 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
     80 {
     81    ULong u0, v0, w0;
     82     Long u1, v1, w1, w2, t;
     83    u0   = u & 0xFFFFFFFFULL;
     84    u1   = u >> 32;
     85    v0   = v & 0xFFFFFFFFULL;
     86    v1   = v >> 32;
     87    w0   = u0 * v0;
     88    t    = u1 * v0 + (w0 >> 32);
     89    w1   = t & 0xFFFFFFFFULL;
     90    w2   = t >> 32;
     91    w1   = u0 * v1 + w1;
     92    *rHi = u1 * v1 + w2 + (w1 >> 32);
     93    *rLo = u * v;
     94 }
     95 
     96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
     97 {
     98    ULong u0, v0, w0;
     99    ULong u1, v1, w1,w2,t;
    100    u0   = u & 0xFFFFFFFFULL;
    101    u1   = u >> 32;
    102    v0   = v & 0xFFFFFFFFULL;
    103    v1   = v >> 32;
    104    w0   = u0 * v0;
    105    t    = u1 * v0 + (w0 >> 32);
    106    w1   = t & 0xFFFFFFFFULL;
    107    w2   = t >> 32;
    108    w1   = u0 * v1 + w1;
    109    *rHi = u1 * v1 + w2 + (w1 >> 32);
    110    *rLo = u * v;
    111 }
    112 
    113 
    114 static const UChar parity_table[256] = {
    115     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    116     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    117     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    118     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    119     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    120     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    121     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    122     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    123     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    124     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    125     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    126     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    127     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    128     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    129     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    130     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    131     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    132     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    133     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    134     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    135     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    136     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    137     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    138     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    139     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    140     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    141     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    142     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    143     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    144     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    145     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    146     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    147 };
    148 
    149 /* generalised left-shifter */
    150 static inline Long lshift ( Long x, Int n )
    151 {
    152    if (n >= 0)
    153       return x << n;
    154    else
    155       return x >> (-n);
    156 }
    157 
    158 /* identity on ULong */
    159 static inline ULong idULong ( ULong x )
    160 {
    161    return x;
    162 }
    163 
    164 
    165 #define PREAMBLE(__data_bits)					\
    166    /* const */ ULong DATA_MASK 					\
    167       = __data_bits==8                                          \
    168            ? 0xFFULL 					        \
    169            : (__data_bits==16                                   \
    170                 ? 0xFFFFULL 		                        \
    171                 : (__data_bits==32                              \
    172                      ? 0xFFFFFFFFULL                            \
    173                      : 0xFFFFFFFFFFFFFFFFULL));                 \
    174    /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1);     \
    175    /* const */ ULong CC_DEP1 = cc_dep1_formal;			\
    176    /* const */ ULong CC_DEP2 = cc_dep2_formal;			\
    177    /* const */ ULong CC_NDEP = cc_ndep_formal;			\
    178    /* Four bogus assignments, which hopefully gcc can     */	\
    179    /* optimise away, and which stop it complaining about  */	\
    180    /* unused variables.                                   */	\
    181    SIGN_MASK = SIGN_MASK;					\
    182    DATA_MASK = DATA_MASK;					\
    183    CC_DEP2 = CC_DEP2;						\
    184    CC_NDEP = CC_NDEP;
    185 
    186 
    187 /*-------------------------------------------------------------*/
    188 
    189 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
    190 {								\
    191    PREAMBLE(DATA_BITS);						\
    192    { Long cf, pf, af, zf, sf, of;				\
    193      Long argL, argR, res;					\
    194      argL = CC_DEP1;						\
    195      argR = CC_DEP2;						\
    196      res  = argL + argR;					\
    197      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
    198      pf = parity_table[(UChar)res];				\
    199      af = (res ^ argL ^ argR) & 0x10;				\
    200      zf = ((DATA_UTYPE)res == 0) << 6;				\
    201      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    202      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    203                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
    204      return cf | pf | af | zf | sf | of;			\
    205    }								\
    206 }
    207 
    208 /*-------------------------------------------------------------*/
    209 
    210 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
    211 {								\
    212    PREAMBLE(DATA_BITS);						\
    213    { Long cf, pf, af, zf, sf, of;				\
    214      Long argL, argR, res;					\
    215      argL = CC_DEP1;						\
    216      argR = CC_DEP2;						\
    217      res  = argL - argR;					\
    218      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
    219      pf = parity_table[(UChar)res];				\
    220      af = (res ^ argL ^ argR) & 0x10;				\
    221      zf = ((DATA_UTYPE)res == 0) << 6;				\
    222      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    223      of = lshift((argL ^ argR) & (argL ^ res),	 		\
    224                  12 - DATA_BITS) & AMD64G_CC_MASK_O; 		\
    225      return cf | pf | af | zf | sf | of;			\
    226    }								\
    227 }
    228 
    229 /*-------------------------------------------------------------*/
    230 
    231 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
    232 {								\
    233    PREAMBLE(DATA_BITS);						\
    234    { Long cf, pf, af, zf, sf, of;				\
    235      Long argL, argR, oldC, res;		 		\
    236      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
    237      argL = CC_DEP1;						\
    238      argR = CC_DEP2 ^ oldC;	       				\
    239      res  = (argL + argR) + oldC;				\
    240      if (oldC)							\
    241         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
    242      else							\
    243         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
    244      pf = parity_table[(UChar)res];				\
    245      af = (res ^ argL ^ argR) & 0x10;				\
    246      zf = ((DATA_UTYPE)res == 0) << 6;				\
    247      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    248      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    249                   12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
    250      return cf | pf | af | zf | sf | of;			\
    251    }								\
    252 }
    253 
    254 /*-------------------------------------------------------------*/
    255 
    256 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
    257 {								\
    258    PREAMBLE(DATA_BITS);						\
    259    { Long cf, pf, af, zf, sf, of;				\
    260      Long argL, argR, oldC, res;	       			\
    261      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
    262      argL = CC_DEP1;						\
    263      argR = CC_DEP2 ^ oldC;	       				\
    264      res  = (argL - argR) - oldC;				\
    265      if (oldC)							\
    266         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
    267      else							\
    268         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
    269      pf = parity_table[(UChar)res];				\
    270      af = (res ^ argL ^ argR) & 0x10;				\
    271      zf = ((DATA_UTYPE)res == 0) << 6;				\
    272      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    273      of = lshift((argL ^ argR) & (argL ^ res), 			\
    274                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
    275      return cf | pf | af | zf | sf | of;			\
    276    }								\
    277 }
    278 
    279 /*-------------------------------------------------------------*/
    280 
    281 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
    282 {								\
    283    PREAMBLE(DATA_BITS);						\
    284    { Long cf, pf, af, zf, sf, of;				\
    285      cf = 0;							\
    286      pf = parity_table[(UChar)CC_DEP1];				\
    287      af = 0;							\
    288      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    289      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    290      of = 0;							\
    291      return cf | pf | af | zf | sf | of;			\
    292    }								\
    293 }
    294 
    295 /*-------------------------------------------------------------*/
    296 
    297 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
    298 {								\
    299    PREAMBLE(DATA_BITS);						\
    300    { Long cf, pf, af, zf, sf, of;				\
    301      Long argL, argR, res;					\
    302      res  = CC_DEP1;						\
    303      argL = res - 1;						\
    304      argR = 1;							\
    305      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
    306      pf = parity_table[(UChar)res];				\
    307      af = (res ^ argL ^ argR) & 0x10;				\
    308      zf = ((DATA_UTYPE)res == 0) << 6;				\
    309      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    310      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
    311      return cf | pf | af | zf | sf | of;			\
    312    }								\
    313 }
    314 
    315 /*-------------------------------------------------------------*/
    316 
    317 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
    318 {								\
    319    PREAMBLE(DATA_BITS);						\
    320    { Long cf, pf, af, zf, sf, of;				\
    321      Long argL, argR, res;					\
    322      res  = CC_DEP1;						\
    323      argL = res + 1;						\
    324      argR = 1;							\
    325      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
    326      pf = parity_table[(UChar)res];				\
    327      af = (res ^ argL ^ argR) & 0x10;				\
    328      zf = ((DATA_UTYPE)res == 0) << 6;				\
    329      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    330      of = ((res & DATA_MASK) 					\
    331           == ((ULong)SIGN_MASK - 1)) << 11;			\
    332      return cf | pf | af | zf | sf | of;			\
    333    }								\
    334 }
    335 
    336 /*-------------------------------------------------------------*/
    337 
    338 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
    339 {								\
    340    PREAMBLE(DATA_BITS);						\
    341    { Long cf, pf, af, zf, sf, of;				\
    342      cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C;	\
    343      pf = parity_table[(UChar)CC_DEP1];				\
    344      af = 0; /* undefined */					\
    345      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    346      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    347      /* of is defined if shift count == 1 */			\
    348      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
    349           & AMD64G_CC_MASK_O;					\
    350      return cf | pf | af | zf | sf | of;			\
    351    }								\
    352 }
    353 
    354 /*-------------------------------------------------------------*/
    355 
    356 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
    357 {								\
    358    PREAMBLE(DATA_BITS);  					\
    359    { Long cf, pf, af, zf, sf, of;				\
    360      cf = CC_DEP2 & 1;						\
    361      pf = parity_table[(UChar)CC_DEP1];				\
    362      af = 0; /* undefined */					\
    363      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    364      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    365      /* of is defined if shift count == 1 */			\
    366      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
    367           & AMD64G_CC_MASK_O;					\
    368      return cf | pf | af | zf | sf | of;			\
    369    }								\
    370 }
    371 
    372 /*-------------------------------------------------------------*/
    373 
    374 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
    375 /* DEP1 = result, NDEP = old flags */
    376 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
    377 {								\
    378    PREAMBLE(DATA_BITS);						\
    379    { Long fl 							\
    380         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
    381           | (AMD64G_CC_MASK_C & CC_DEP1)			\
    382           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1,  		\
    383                                       11-(DATA_BITS-1)) 	\
    384                      ^ lshift(CC_DEP1, 11)));			\
    385      return fl;							\
    386    }								\
    387 }
    388 
    389 /*-------------------------------------------------------------*/
    390 
    391 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
    392 /* DEP1 = result, NDEP = old flags */
    393 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
    394 {								\
    395    PREAMBLE(DATA_BITS);						\
    396    { Long fl 							\
    397         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
    398           | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
    399           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, 		\
    400                                       11-(DATA_BITS-1)) 	\
    401                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
    402      return fl;							\
    403    }								\
    404 }
    405 
    406 /*-------------------------------------------------------------*/
    407 
    408 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
    409                                 DATA_U2TYPE, NARROWto2U)        \
    410 {                                                               \
    411    PREAMBLE(DATA_BITS);                                         \
    412    { Long cf, pf, af, zf, sf, of;                               \
    413      DATA_UTYPE  hi;                                            \
    414      DATA_UTYPE  lo                                             \
    415         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
    416                      * ((DATA_UTYPE)CC_DEP2) );                 \
    417      DATA_U2TYPE rr                                             \
    418         = NARROWto2U(                                           \
    419              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
    420              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
    421      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
    422      cf = (hi != 0);                                            \
    423      pf = parity_table[(UChar)lo];                              \
    424      af = 0; /* undefined */                                    \
    425      zf = (lo == 0) << 6;                                       \
    426      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    427      of = cf << 11;                                             \
    428      return cf | pf | af | zf | sf | of;                        \
    429    }								\
    430 }
    431 
    432 /*-------------------------------------------------------------*/
    433 
    434 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
    435                                 DATA_S2TYPE, NARROWto2S)        \
    436 {                                                               \
    437    PREAMBLE(DATA_BITS);                                         \
    438    { Long cf, pf, af, zf, sf, of;                               \
    439      DATA_STYPE  hi;                                            \
    440      DATA_STYPE  lo                                             \
    441         = NARROWtoS( ((DATA_STYPE)CC_DEP1)                      \
    442                      * ((DATA_STYPE)CC_DEP2) );                 \
    443      DATA_S2TYPE rr                                             \
    444         = NARROWto2S(                                           \
    445              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
    446              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
    447      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
    448      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
    449      pf = parity_table[(UChar)lo];                              \
    450      af = 0; /* undefined */                                    \
    451      zf = (lo == 0) << 6;                                       \
    452      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    453      of = cf << 11;                                             \
    454      return cf | pf | af | zf | sf | of;                        \
    455    }								\
    456 }
    457 
    458 /*-------------------------------------------------------------*/
    459 
    460 #define ACTIONS_UMULQ                                           \
    461 {                                                               \
    462    PREAMBLE(64);                                                \
    463    { Long cf, pf, af, zf, sf, of;                               \
    464      ULong lo, hi;                                              \
    465      mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo );       \
    466      cf = (hi != 0);                                            \
    467      pf = parity_table[(UChar)lo];                              \
    468      af = 0; /* undefined */                                    \
    469      zf = (lo == 0) << 6;                                       \
    470      sf = lshift(lo, 8 - 64) & 0x80;                            \
    471      of = cf << 11;                                             \
    472      return cf | pf | af | zf | sf | of;                        \
    473    }								\
    474 }
    475 
    476 /*-------------------------------------------------------------*/
    477 
    478 #define ACTIONS_SMULQ                                           \
    479 {                                                               \
    480    PREAMBLE(64);                                                \
    481    { Long cf, pf, af, zf, sf, of;                               \
    482      Long lo, hi;                                               \
    483      mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo );         \
    484      cf = (hi != (lo >>/*s*/ (64-1)));                          \
    485      pf = parity_table[(UChar)lo];                              \
    486      af = 0; /* undefined */                                    \
    487      zf = (lo == 0) << 6;                                       \
    488      sf = lshift(lo, 8 - 64) & 0x80;                            \
    489      of = cf << 11;                                             \
    490      return cf | pf | af | zf | sf | of;                        \
    491    }								\
    492 }
    493 
    494 
    495 #if PROFILE_RFLAGS
    496 
    497 static Bool initted     = False;
    498 
    499 /* C flag, fast route */
    500 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
    501 /* C flag, slow route */
    502 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
    503 /* table for calculate_cond */
    504 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
    505 /* total entry counts for calc_all, calc_c, calc_cond. */
    506 static UInt n_calc_all  = 0;
    507 static UInt n_calc_c    = 0;
    508 static UInt n_calc_cond = 0;
    509 
    510 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
    511 
    512 
    513 static void showCounts ( void )
    514 {
    515    Int op, co;
    516    Char ch;
    517    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
    518               n_calc_all, n_calc_cond, n_calc_c);
    519 
    520    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
    521               "    S   NS    P   NP    L   NL   LE  NLE\n");
    522    vex_printf("     -----------------------------------------------------"
    523               "----------------------------------------\n");
    524    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
    525 
    526       ch = ' ';
    527       if (op > 0 && (op-1) % 4 == 0)
    528          ch = 'B';
    529       if (op > 0 && (op-1) % 4 == 1)
    530          ch = 'W';
    531       if (op > 0 && (op-1) % 4 == 2)
    532          ch = 'L';
    533       if (op > 0 && (op-1) % 4 == 3)
    534          ch = 'Q';
    535 
    536       vex_printf("%2d%c: ", op, ch);
    537       vex_printf("%6u ", tabc_slow[op]);
    538       vex_printf("%6u ", tabc_fast[op]);
    539       for (co = 0; co < 16; co++) {
    540          Int n = tab_cond[op][co];
    541          if (n >= 1000) {
    542             vex_printf(" %3dK", n / 1000);
    543          } else
    544          if (n >= 0) {
    545             vex_printf(" %3d ", n );
    546          } else {
    547             vex_printf("     ");
    548          }
    549       }
    550       vex_printf("\n");
    551    }
    552    vex_printf("\n");
    553 }
    554 
    555 static void initCounts ( void )
    556 {
    557    Int op, co;
    558    initted = True;
    559    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
    560       tabc_fast[op] = tabc_slow[op] = 0;
    561       for (co = 0; co < 16; co++)
    562          tab_cond[op][co] = 0;
    563    }
    564 }
    565 
    566 #endif /* PROFILE_RFLAGS */
    567 
    568 
    569 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    570 /* Calculate all the 6 flags from the supplied thunk parameters.
    571    Worker function, not directly called from generated code. */
    572 static
    573 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
    574                                         ULong cc_dep1_formal,
    575                                         ULong cc_dep2_formal,
    576                                         ULong cc_ndep_formal )
    577 {
    578    switch (cc_op) {
    579       case AMD64G_CC_OP_COPY:
    580          return cc_dep1_formal
    581                 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
    582                    | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
    583 
    584       case AMD64G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
    585       case AMD64G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
    586       case AMD64G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
    587       case AMD64G_CC_OP_ADDQ:   ACTIONS_ADD( 64, ULong  );
    588 
    589       case AMD64G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
    590       case AMD64G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
    591       case AMD64G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
    592       case AMD64G_CC_OP_ADCQ:   ACTIONS_ADC( 64, ULong  );
    593 
    594       case AMD64G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
    595       case AMD64G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
    596       case AMD64G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
    597       case AMD64G_CC_OP_SUBQ:   ACTIONS_SUB( 64, ULong  );
    598 
    599       case AMD64G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
    600       case AMD64G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
    601       case AMD64G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
    602       case AMD64G_CC_OP_SBBQ:   ACTIONS_SBB( 64, ULong  );
    603 
    604       case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
    605       case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
    606       case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
    607       case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong  );
    608 
    609       case AMD64G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
    610       case AMD64G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
    611       case AMD64G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
    612       case AMD64G_CC_OP_INCQ:   ACTIONS_INC( 64, ULong  );
    613 
    614       case AMD64G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
    615       case AMD64G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
    616       case AMD64G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
    617       case AMD64G_CC_OP_DECQ:   ACTIONS_DEC( 64, ULong  );
    618 
    619       case AMD64G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
    620       case AMD64G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
    621       case AMD64G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
    622       case AMD64G_CC_OP_SHLQ:   ACTIONS_SHL( 64, ULong  );
    623 
    624       case AMD64G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
    625       case AMD64G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
    626       case AMD64G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
    627       case AMD64G_CC_OP_SHRQ:   ACTIONS_SHR( 64, ULong  );
    628 
    629       case AMD64G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
    630       case AMD64G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
    631       case AMD64G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
    632       case AMD64G_CC_OP_ROLQ:   ACTIONS_ROL( 64, ULong  );
    633 
    634       case AMD64G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
    635       case AMD64G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
    636       case AMD64G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
    637       case AMD64G_CC_OP_RORQ:   ACTIONS_ROR( 64, ULong  );
    638 
    639       case AMD64G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
    640                                                   UShort, toUShort );
    641       case AMD64G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
    642                                                   UInt,   toUInt );
    643       case AMD64G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
    644                                                   ULong,  idULong );
    645 
    646       case AMD64G_CC_OP_UMULQ:  ACTIONS_UMULQ;
    647 
    648       case AMD64G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
    649                                                   Short,  toUShort );
    650       case AMD64G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
    651                                                   Int,    toUInt   );
    652       case AMD64G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
    653                                                   Long,   idULong );
    654 
    655       case AMD64G_CC_OP_SMULQ:  ACTIONS_SMULQ;
    656 
    657       default:
    658          /* shouldn't really make these calls from generated code */
    659          vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
    660                     "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
    661                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
    662          vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
    663    }
    664 }
    665 
    666 
    667 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    668 /* Calculate all the 6 flags from the supplied thunk parameters. */
    669 ULong amd64g_calculate_rflags_all ( ULong cc_op,
    670                                     ULong cc_dep1,
    671                                     ULong cc_dep2,
    672                                     ULong cc_ndep )
    673 {
    674 #  if PROFILE_RFLAGS
    675    if (!initted) initCounts();
    676    n_calc_all++;
    677    if (SHOW_COUNTS_NOW) showCounts();
    678 #  endif
    679    return
    680       amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
    681 }
    682 
    683 
    684 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    685 /* Calculate just the carry flag from the supplied thunk parameters. */
    686 ULong amd64g_calculate_rflags_c ( ULong cc_op,
    687                                   ULong cc_dep1,
    688                                   ULong cc_dep2,
    689                                   ULong cc_ndep )
    690 {
    691 #  if PROFILE_RFLAGS
    692    if (!initted) initCounts();
    693    n_calc_c++;
    694    tabc_fast[cc_op]++;
    695    if (SHOW_COUNTS_NOW) showCounts();
    696 #  endif
    697 
    698    /* Fast-case some common ones. */
    699    switch (cc_op) {
    700       case AMD64G_CC_OP_COPY:
    701          return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
    702       case AMD64G_CC_OP_LOGICQ:
    703       case AMD64G_CC_OP_LOGICL:
    704       case AMD64G_CC_OP_LOGICW:
    705       case AMD64G_CC_OP_LOGICB:
    706          return 0;
    707 	 //      case AMD64G_CC_OP_SUBL:
    708 	 //         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
    709 	 //                   ? AMD64G_CC_MASK_C : 0;
    710 	 //      case AMD64G_CC_OP_SUBW:
    711 	 //         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
    712 	 //                   ? AMD64G_CC_MASK_C : 0;
    713 	 //      case AMD64G_CC_OP_SUBB:
    714 	 //         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
    715 	 //                   ? AMD64G_CC_MASK_C : 0;
    716 	 //      case AMD64G_CC_OP_INCL:
    717 	 //      case AMD64G_CC_OP_DECL:
    718 	 //         return cc_ndep & AMD64G_CC_MASK_C;
    719       default:
    720          break;
    721    }
    722 
    723 #  if PROFILE_RFLAGS
    724    tabc_fast[cc_op]--;
    725    tabc_slow[cc_op]++;
    726 #  endif
    727 
    728    return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
    729           & AMD64G_CC_MASK_C;
    730 }
    731 
    732 
    733 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    734 /* returns 1 or 0 */
    735 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
    736                                    ULong cc_op,
    737                                    ULong cc_dep1,
    738                                    ULong cc_dep2,
    739                                    ULong cc_ndep )
    740 {
    741    ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
    742                                                   cc_dep2, cc_ndep);
    743    ULong of,sf,zf,cf,pf;
    744    ULong inv = cond & 1;
    745 
    746 #  if PROFILE_RFLAGS
    747    if (!initted) initCounts();
    748    tab_cond[cc_op][cond]++;
    749    n_calc_cond++;
    750    if (SHOW_COUNTS_NOW) showCounts();
    751 #  endif
    752 
    753    switch (cond) {
    754       case AMD64CondNO:
    755       case AMD64CondO: /* OF == 1 */
    756          of = rflags >> AMD64G_CC_SHIFT_O;
    757          return 1 & (inv ^ of);
    758 
    759       case AMD64CondNZ:
    760       case AMD64CondZ: /* ZF == 1 */
    761          zf = rflags >> AMD64G_CC_SHIFT_Z;
    762          return 1 & (inv ^ zf);
    763 
    764       case AMD64CondNB:
    765       case AMD64CondB: /* CF == 1 */
    766          cf = rflags >> AMD64G_CC_SHIFT_C;
    767          return 1 & (inv ^ cf);
    768          break;
    769 
    770       case AMD64CondNBE:
    771       case AMD64CondBE: /* (CF or ZF) == 1 */
    772          cf = rflags >> AMD64G_CC_SHIFT_C;
    773          zf = rflags >> AMD64G_CC_SHIFT_Z;
    774          return 1 & (inv ^ (cf | zf));
    775          break;
    776 
    777       case AMD64CondNS:
    778       case AMD64CondS: /* SF == 1 */
    779          sf = rflags >> AMD64G_CC_SHIFT_S;
    780          return 1 & (inv ^ sf);
    781 
    782       case AMD64CondNP:
    783       case AMD64CondP: /* PF == 1 */
    784          pf = rflags >> AMD64G_CC_SHIFT_P;
    785          return 1 & (inv ^ pf);
    786 
    787       case AMD64CondNL:
    788       case AMD64CondL: /* (SF xor OF) == 1 */
    789          sf = rflags >> AMD64G_CC_SHIFT_S;
    790          of = rflags >> AMD64G_CC_SHIFT_O;
    791          return 1 & (inv ^ (sf ^ of));
    792          break;
    793 
    794       case AMD64CondNLE:
    795       case AMD64CondLE: /* ((SF xor OF) or ZF)  == 1 */
    796          sf = rflags >> AMD64G_CC_SHIFT_S;
    797          of = rflags >> AMD64G_CC_SHIFT_O;
    798          zf = rflags >> AMD64G_CC_SHIFT_Z;
    799          return 1 & (inv ^ ((sf ^ of) | zf));
    800          break;
    801 
    802       default:
    803          /* shouldn't really make these calls from generated code */
    804          vex_printf("amd64g_calculate_condition"
    805                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
    806                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
    807          vpanic("amd64g_calculate_condition");
    808    }
    809 }
    810 
    811 
    812 /* VISIBLE TO LIBVEX CLIENT */
    813 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state )
    814 {
    815    ULong rflags = amd64g_calculate_rflags_all_WRK(
    816                      vex_state->guest_CC_OP,
    817                      vex_state->guest_CC_DEP1,
    818                      vex_state->guest_CC_DEP2,
    819                      vex_state->guest_CC_NDEP
    820                   );
    821    Long dflag = vex_state->guest_DFLAG;
    822    vassert(dflag == 1 || dflag == -1);
    823    if (dflag == -1)
    824       rflags |= (1<<10);
    825    if (vex_state->guest_IDFLAG == 1)
    826       rflags |= (1<<21);
    827    if (vex_state->guest_ACFLAG == 1)
    828       rflags |= (1<<18);
    829 
    830    return rflags;
    831 }
    832 
    833 /* VISIBLE TO LIBVEX CLIENT */
    834 void
    835 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
    836                                /*MOD*/VexGuestAMD64State* vex_state )
    837 {
    838    ULong oszacp = amd64g_calculate_rflags_all_WRK(
    839                      vex_state->guest_CC_OP,
    840                      vex_state->guest_CC_DEP1,
    841                      vex_state->guest_CC_DEP2,
    842                      vex_state->guest_CC_NDEP
    843                   );
    844    if (new_carry_flag & 1) {
    845       oszacp |= AMD64G_CC_MASK_C;
    846    } else {
    847       oszacp &= ~AMD64G_CC_MASK_C;
    848    }
    849    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
    850    vex_state->guest_CC_DEP1 = oszacp;
    851    vex_state->guest_CC_DEP2 = 0;
    852    vex_state->guest_CC_NDEP = 0;
    853 }
    854 
    855 
    856 /*---------------------------------------------------------------*/
    857 /*--- %rflags translation-time function specialisers.         ---*/
    858 /*--- These help iropt specialise calls the above run-time    ---*/
    859 /*--- %rflags functions.                                      ---*/
    860 /*---------------------------------------------------------------*/
    861 
    862 /* Used by the optimiser to try specialisations.  Returns an
    863    equivalent expression, or NULL if none. */
    864 
    865 static Bool isU64 ( IRExpr* e, ULong n )
    866 {
    867    return toBool( e->tag == Iex_Const
    868                   && e->Iex.Const.con->tag == Ico_U64
    869                   && e->Iex.Const.con->Ico.U64 == n );
    870 }
    871 
    872 IRExpr* guest_amd64_spechelper ( HChar* function_name,
    873                                  IRExpr** args,
    874                                  IRStmt** precedingStmts,
    875                                  Int      n_precedingStmts )
    876 {
    877 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
    878 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
    879 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
    880 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
    881 
    882    Int i, arity = 0;
    883    for (i = 0; args[i]; i++)
    884       arity++;
    885 #  if 0
    886    vex_printf("spec request:\n");
    887    vex_printf("   %s  ", function_name);
    888    for (i = 0; i < arity; i++) {
    889       vex_printf("  ");
    890       ppIRExpr(args[i]);
    891    }
    892    vex_printf("\n");
    893 #  endif
    894 
    895    /* --------- specialising "amd64g_calculate_condition" --------- */
    896 
    897    if (vex_streq(function_name, "amd64g_calculate_condition")) {
    898       /* specialise calls to above "calculate condition" function */
    899       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
    900       vassert(arity == 5);
    901       cond    = args[0];
    902       cc_op   = args[1];
    903       cc_dep1 = args[2];
    904       cc_dep2 = args[3];
    905 
    906       /*---------------- ADDQ ----------------*/
    907 
    908       if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
    909          /* long long add, then Z --> test (dst+src == 0) */
    910          return unop(Iop_1Uto64,
    911                      binop(Iop_CmpEQ64,
    912                            binop(Iop_Add64, cc_dep1, cc_dep2),
    913                            mkU64(0)));
    914       }
    915 
    916       /*---------------- SUBQ ----------------*/
    917 
    918       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
    919          /* long long sub/cmp, then Z --> test dst==src */
    920          return unop(Iop_1Uto64,
    921                      binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
    922       }
    923       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
    924          /* long long sub/cmp, then NZ --> test dst!=src */
    925          return unop(Iop_1Uto64,
    926                      binop(Iop_CmpNE64,cc_dep1,cc_dep2));
    927       }
    928 
    929       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
    930          /* long long sub/cmp, then L (signed less than)
    931             --> test dst <s src */
    932          return unop(Iop_1Uto64,
    933                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
    934       }
    935 
    936       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
    937          /* long long sub/cmp, then B (unsigned less than)
    938             --> test dst <u src */
    939          return unop(Iop_1Uto64,
    940                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
    941       }
    942       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
    943          /* long long sub/cmp, then NB (unsigned greater than or equal)
    944             --> test src <=u dst */
    945          /* Note, args are opposite way round from the usual */
    946          return unop(Iop_1Uto64,
    947                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
    948       }
    949 
    950       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
    951          /* long long sub/cmp, then BE (unsigned less than or equal)
    952             --> test dst <=u src */
    953          return unop(Iop_1Uto64,
    954                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
    955       }
    956 
    957       /*---------------- SUBL ----------------*/
    958 
    959       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
    960          /* long sub/cmp, then Z --> test dst==src */
    961          return unop(Iop_1Uto64,
    962                      binop(Iop_CmpEQ64,
    963                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
    964                            binop(Iop_Shl64,cc_dep2,mkU8(32))));
    965       }
    966       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
    967          /* long sub/cmp, then NZ --> test dst!=src */
    968          return unop(Iop_1Uto64,
    969                      binop(Iop_CmpNE64,
    970                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
    971                            binop(Iop_Shl64,cc_dep2,mkU8(32))));
    972       }
    973 
    974       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
    975          /* long sub/cmp, then L (signed less than)
    976             --> test dst <s src */
    977          return unop(Iop_1Uto64,
    978                      binop(Iop_CmpLT64S,
    979                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
    980                            binop(Iop_Shl64,cc_dep2,mkU8(32))));
    981       }
    982 
    983       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
    984          /* long sub/cmp, then LE (signed less than or equal)
    985             --> test dst <=s src */
    986          return unop(Iop_1Uto64,
    987                      binop(Iop_CmpLE64S,
    988                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
    989                            binop(Iop_Shl64,cc_dep2,mkU8(32))));
    990 
    991       }
    992       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
    993          /* long sub/cmp, then NLE (signed greater than)
    994             --> test !(dst <=s src)
    995             --> test (dst >s src)
    996             --> test (src <s dst) */
    997          return unop(Iop_1Uto64,
    998                      binop(Iop_CmpLT64S,
    999                            binop(Iop_Shl64,cc_dep2,mkU8(32)),
   1000                            binop(Iop_Shl64,cc_dep1,mkU8(32))));
   1001 
   1002       }
   1003 
   1004       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
   1005          /* long sub/cmp, then BE (unsigned less than or equal)
   1006             --> test dst <=u src */
   1007          return unop(Iop_1Uto64,
   1008                      binop(Iop_CmpLE64U,
   1009                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
   1010                            binop(Iop_Shl64,cc_dep2,mkU8(32))));
   1011       }
   1012       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
   1013          /* long sub/cmp, then NBE (unsigned greater than)
   1014             --> test src <u dst */
   1015          /* Note, args are opposite way round from the usual */
   1016          return unop(Iop_1Uto64,
   1017                      binop(Iop_CmpLT64U,
   1018                            binop(Iop_Shl64,cc_dep2,mkU8(32)),
   1019                            binop(Iop_Shl64,cc_dep1,mkU8(32))));
   1020       }
   1021 
   1022       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
   1023          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
   1024          return unop(Iop_1Uto64,
   1025                      binop(Iop_CmpLT64S,
   1026                            binop(Iop_Sub64,
   1027                                  binop(Iop_Shl64, cc_dep1, mkU8(32)),
   1028                                  binop(Iop_Shl64, cc_dep2, mkU8(32))),
   1029                            mkU64(0)));
   1030       }
   1031 
   1032       /*---------------- SUBW ----------------*/
   1033 
   1034       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
   1035          /* word sub/cmp, then Z --> test dst==src */
   1036          return unop(Iop_1Uto64,
   1037                      binop(Iop_CmpEQ16,
   1038                            unop(Iop_64to16,cc_dep1),
   1039                            unop(Iop_64to16,cc_dep2)));
   1040       }
   1041       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
   1042          /* word sub/cmp, then NZ --> test dst!=src */
   1043          return unop(Iop_1Uto64,
   1044                      binop(Iop_CmpNE16,
   1045                            unop(Iop_64to16,cc_dep1),
   1046                            unop(Iop_64to16,cc_dep2)));
   1047       }
   1048 
   1049       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
   1050          /* word sub/cmp, then LE (signed less than or equal)
   1051             --> test dst <=s src */
   1052          return unop(Iop_1Uto64,
   1053                      binop(Iop_CmpLE64S,
   1054                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
   1055                            binop(Iop_Shl64,cc_dep2,mkU8(48))));
   1056 
   1057       }
   1058 
   1059       /*---------------- SUBB ----------------*/
   1060 
   1061       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
   1062          /* byte sub/cmp, then Z --> test dst==src */
   1063          return unop(Iop_1Uto64,
   1064                      binop(Iop_CmpEQ8,
   1065                            unop(Iop_64to8,cc_dep1),
   1066                            unop(Iop_64to8,cc_dep2)));
   1067       }
   1068       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
   1069          /* byte sub/cmp, then NZ --> test dst!=src */
   1070          return unop(Iop_1Uto64,
   1071                      binop(Iop_CmpNE8,
   1072                            unop(Iop_64to8,cc_dep1),
   1073                            unop(Iop_64to8,cc_dep2)));
   1074       }
   1075 
   1076       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
   1077                                           && isU64(cc_dep2, 0)) {
   1078          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
   1079                                          --> test dst <s 0
   1080                                          --> (ULong)dst[7]
   1081             This is yet another scheme by which gcc figures out if the
   1082             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
   1083          /* Note: isU64(cc_dep2, 0) is correct, even though this is
   1084             for an 8-bit comparison, since the args to the helper
   1085             function are always U64s. */
   1086          return binop(Iop_And64,
   1087                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
   1088                       mkU64(1));
   1089       }
   1090       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
   1091                                           && isU64(cc_dep2, 0)) {
   1092          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
   1093                                           --> test !(dst <s 0)
   1094                                           --> (ULong) !dst[7]
   1095          */
   1096          return binop(Iop_Xor64,
   1097                       binop(Iop_And64,
   1098                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
   1099                             mkU64(1)),
   1100                       mkU64(1));
   1101       }
   1102 
   1103       /*---------------- LOGICQ ----------------*/
   1104 
   1105       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
   1106          /* long long and/or/xor, then Z --> test dst==0 */
   1107          return unop(Iop_1Uto64,
   1108                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
   1109       }
   1110 
   1111       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
   1112          /* long long and/or/xor, then L
   1113             LOGIC sets SF and ZF according to the
   1114             result and makes OF be zero.  L computes SF ^ OF, but
   1115             OF is zero, so this reduces to SF -- which will be 1 iff
   1116             the result is < signed 0.  Hence ...
   1117          */
   1118          return unop(Iop_1Uto64,
   1119                      binop(Iop_CmpLT64S,
   1120                            cc_dep1,
   1121                            mkU64(0)));
   1122       }
   1123 
   1124       /*---------------- LOGICL ----------------*/
   1125 
   1126       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
   1127          /* long and/or/xor, then Z --> test dst==0 */
   1128          return unop(Iop_1Uto64,
   1129                      binop(Iop_CmpEQ64,
   1130                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
   1131                            mkU64(0)));
   1132       }
   1133 
   1134       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
   1135          /* long and/or/xor, then NZ --> test dst!=0 */
   1136          return unop(Iop_1Uto64,
   1137                      binop(Iop_CmpNE64,
   1138                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
   1139                            mkU64(0)));
   1140       }
   1141 
   1142       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
   1143          /* long and/or/xor, then LE
   1144             This is pretty subtle.  LOGIC sets SF and ZF according to the
   1145             result and makes OF be zero.  LE computes (SF ^ OF) | ZF, but
   1146             OF is zero, so this reduces to SF | ZF -- which will be 1 iff
   1147             the result is <=signed 0.  Hence ...
   1148          */
   1149          return unop(Iop_1Uto64,
   1150                      binop(Iop_CmpLE64S,
   1151                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
   1152                            mkU64(0)));
   1153       }
   1154 
   1155       /*---------------- LOGICB ----------------*/
   1156 
   1157       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
   1158          /* byte and/or/xor, then Z --> test dst==0 */
   1159          return unop(Iop_1Uto64,
   1160                      binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
   1161                                         mkU64(0)));
   1162       }
   1163       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
   1164          /* byte and/or/xor, then NZ --> test dst!=0 */
   1165          return unop(Iop_1Uto64,
   1166                      binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
   1167                                         mkU64(0)));
   1168       }
   1169 
   1170       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
   1171          /* this is an idiom gcc sometimes uses to find out if the top
   1172             bit of a byte register is set: eg testb %al,%al; js ..
   1173             Since it just depends on the top bit of the byte, extract
   1174             that bit and explicitly get rid of all the rest.  This
   1175             helps memcheck avoid false positives in the case where any
   1176             of the other bits in the byte are undefined. */
   1177          /* byte and/or/xor, then S --> (UInt)result[7] */
   1178          return binop(Iop_And64,
   1179                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
   1180                       mkU64(1));
   1181       }
   1182 
   1183       /*---------------- INCB ----------------*/
   1184 
   1185       if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
   1186          /* 8-bit inc, then LE --> sign bit of the arg */
   1187          return binop(Iop_And64,
   1188                       binop(Iop_Shr64,
   1189                             binop(Iop_Sub64, cc_dep1, mkU64(1)),
   1190                             mkU8(7)),
   1191                       mkU64(1));
   1192       }
   1193 
   1194       /*---------------- INCW ----------------*/
   1195 
   1196       if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
   1197          /* 16-bit inc, then Z --> test dst == 0 */
   1198          return unop(Iop_1Uto64,
   1199                      binop(Iop_CmpEQ64,
   1200                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
   1201                            mkU64(0)));
   1202       }
   1203 
   1204       /*---------------- DECL ----------------*/
   1205 
   1206       if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
   1207          /* dec L, then Z --> test dst == 0 */
   1208          return unop(Iop_1Uto64,
   1209                      binop(Iop_CmpEQ64,
   1210                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
   1211                            mkU64(0)));
   1212       }
   1213 
   1214       /*---------------- DECW ----------------*/
   1215 
   1216       if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
   1217          /* 16-bit dec, then NZ --> test dst != 0 */
   1218          return unop(Iop_1Uto64,
   1219                      binop(Iop_CmpNE64,
   1220                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
   1221                            mkU64(0)));
   1222       }
   1223 
   1224       /*---------------- COPY ----------------*/
   1225       /* This can happen, as a result of amd64 FP compares: "comisd ... ;
   1226          jbe" for example. */
   1227 
   1228       if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
   1229           (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
   1230          /* COPY, then BE --> extract C and Z from dep1, and test (C
   1231             or Z == 1). */
   1232          /* COPY, then NBE --> extract C and Z from dep1, and test (C
   1233             or Z == 0). */
   1234          ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
   1235          return
   1236             unop(
   1237                Iop_1Uto64,
   1238                binop(
   1239                   Iop_CmpEQ64,
   1240                   binop(
   1241                      Iop_And64,
   1242                      binop(
   1243                         Iop_Or64,
   1244                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
   1245                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
   1246                      ),
   1247                      mkU64(1)
   1248                   ),
   1249                   mkU64(nnn)
   1250                )
   1251             );
   1252       }
   1253 
   1254       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
   1255          /* COPY, then B --> extract C dep1, and test (C == 1). */
   1256          return
   1257             unop(
   1258                Iop_1Uto64,
   1259                binop(
   1260                   Iop_CmpNE64,
   1261                   binop(
   1262                      Iop_And64,
   1263                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
   1264                      mkU64(1)
   1265                   ),
   1266                   mkU64(0)
   1267                )
   1268             );
   1269       }
   1270 
   1271       if (isU64(cc_op, AMD64G_CC_OP_COPY)
   1272           && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
   1273          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
   1274          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
   1275          UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
   1276          return
   1277             unop(
   1278                Iop_1Uto64,
   1279                binop(
   1280                   Iop_CmpEQ64,
   1281                   binop(
   1282                      Iop_And64,
   1283                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
   1284                      mkU64(1)
   1285                   ),
   1286                   mkU64(nnn)
   1287                )
   1288             );
   1289       }
   1290 
   1291       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
   1292          /* COPY, then P --> extract P from dep1, and test (P == 1). */
   1293          return
   1294             unop(
   1295                Iop_1Uto64,
   1296                binop(
   1297                   Iop_CmpNE64,
   1298                   binop(
   1299                      Iop_And64,
   1300                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
   1301                      mkU64(1)
   1302                   ),
   1303                   mkU64(0)
   1304                )
   1305             );
   1306       }
   1307 
   1308       return NULL;
   1309    }
   1310 
   1311    /* --------- specialising "amd64g_calculate_rflags_c" --------- */
   1312 
   1313    if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
   1314       /* specialise calls to above "calculate_rflags_c" function */
   1315       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1316       vassert(arity == 4);
   1317       cc_op   = args[0];
   1318       cc_dep1 = args[1];
   1319       cc_dep2 = args[2];
   1320       cc_ndep = args[3];
   1321 
   1322       if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
   1323          /* C after sub denotes unsigned less than */
   1324          return unop(Iop_1Uto64,
   1325                      binop(Iop_CmpLT64U,
   1326                            cc_dep1,
   1327                            cc_dep2));
   1328       }
   1329       if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
   1330          /* C after sub denotes unsigned less than */
   1331          return unop(Iop_1Uto64,
   1332                      binop(Iop_CmpLT64U,
   1333                            binop(Iop_Shl64,cc_dep1,mkU8(32)),
   1334                            binop(Iop_Shl64,cc_dep2,mkU8(32))));
   1335       }
   1336       if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
   1337          /* C after sub denotes unsigned less than */
   1338          return unop(Iop_1Uto64,
   1339                      binop(Iop_CmpLT64U,
   1340                            binop(Iop_And64,cc_dep1,mkU64(0xFF)),
   1341                            binop(Iop_And64,cc_dep2,mkU64(0xFF))));
   1342       }
   1343       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
   1344           || isU64(cc_op, AMD64G_CC_OP_LOGICL)
   1345           || isU64(cc_op, AMD64G_CC_OP_LOGICW)
   1346           || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
   1347          /* cflag after logic is zero */
   1348          return mkU64(0);
   1349       }
   1350       if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
   1351           || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
   1352          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
   1353          return cc_ndep;
   1354       }
   1355 
   1356 #     if 0
   1357       if (cc_op->tag == Iex_Const) {
   1358          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
   1359       }
   1360 #     endif
   1361 
   1362       return NULL;
   1363    }
   1364 
   1365 #  undef unop
   1366 #  undef binop
   1367 #  undef mkU64
   1368 #  undef mkU8
   1369 
   1370    return NULL;
   1371 }
   1372 
   1373 
   1374 /*---------------------------------------------------------------*/
   1375 /*--- Supporting functions for x87 FPU activities.            ---*/
   1376 /*---------------------------------------------------------------*/
   1377 
   1378 static inline Bool host_is_little_endian ( void )
   1379 {
   1380    UInt x = 0x76543210;
   1381    UChar* p = (UChar*)(&x);
   1382    return toBool(*p == 0x10);
   1383 }
   1384 
   1385 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
   1386 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1387 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
   1388 {
   1389    Bool   mantissaIsZero;
   1390    Int    bexp;
   1391    UChar  sign;
   1392    UChar* f64;
   1393 
   1394    vassert(host_is_little_endian());
   1395 
   1396    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
   1397 
   1398    f64  = (UChar*)(&dbl);
   1399    sign = toUChar( (f64[7] >> 7) & 1 );
   1400 
   1401    /* First off, if the tag indicates the register was empty,
   1402       return 1,0,sign,1 */
   1403    if (tag == 0) {
   1404       /* vex_printf("Empty\n"); */
   1405       return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
   1406                                    | AMD64G_FC_MASK_C0;
   1407    }
   1408 
   1409    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
   1410    bexp &= 0x7FF;
   1411 
   1412    mantissaIsZero
   1413       = toBool(
   1414            (f64[6] & 0x0F) == 0
   1415            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
   1416         );
   1417 
   1418    /* If both exponent and mantissa are zero, the value is zero.
   1419       Return 1,0,sign,0. */
   1420    if (bexp == 0 && mantissaIsZero) {
   1421       /* vex_printf("Zero\n"); */
   1422       return AMD64G_FC_MASK_C3 | 0
   1423                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
   1424    }
   1425 
   1426    /* If exponent is zero but mantissa isn't, it's a denormal.
   1427       Return 1,1,sign,0. */
   1428    if (bexp == 0 && !mantissaIsZero) {
   1429       /* vex_printf("Denormal\n"); */
   1430       return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
   1431                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
   1432    }
   1433 
   1434    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
   1435       Return 0,1,sign,1. */
   1436    if (bexp == 0x7FF && mantissaIsZero) {
   1437       /* vex_printf("Inf\n"); */
   1438       return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
   1439                                    | AMD64G_FC_MASK_C0;
   1440    }
   1441 
   1442    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
   1443       Return 0,0,sign,1. */
   1444    if (bexp == 0x7FF && !mantissaIsZero) {
   1445       /* vex_printf("NaN\n"); */
   1446       return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
   1447    }
   1448 
   1449    /* Uh, ok, we give up.  It must be a normal finite number.
   1450       Return 0,1,sign,0.
   1451    */
   1452    /* vex_printf("normal\n"); */
   1453    return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
   1454 }
   1455 
   1456 
   1457 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
   1458    appears to differ from the former only in that the 8 FP registers
   1459    themselves are not transferred into the guest state. */
   1460 static
   1461 VexEmWarn do_put_x87 ( Bool moveRegs,
   1462                        /*IN*/UChar* x87_state,
   1463                        /*OUT*/VexGuestAMD64State* vex_state )
   1464 {
   1465    Int        stno, preg;
   1466    UInt       tag;
   1467    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1468    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1469    Fpu_State* x87     = (Fpu_State*)x87_state;
   1470    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
   1471    UInt       tagw    = x87->env[FP_ENV_TAG];
   1472    UInt       fpucw   = x87->env[FP_ENV_CTRL];
   1473    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
   1474    VexEmWarn  ew;
   1475    UInt       fpround;
   1476    ULong      pair;
   1477 
   1478    /* Copy registers and tags */
   1479    for (stno = 0; stno < 8; stno++) {
   1480       preg = (stno + ftop) & 7;
   1481       tag = (tagw >> (2*preg)) & 3;
   1482       if (tag == 3) {
   1483          /* register is empty */
   1484          /* hmm, if it's empty, does it still get written?  Probably
   1485             safer to say it does.  If we don't, memcheck could get out
   1486             of sync, in that it thinks all FP registers are defined by
   1487             this helper, but in reality some have not been updated. */
   1488          if (moveRegs)
   1489             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
   1490          vexTags[preg] = 0;
   1491       } else {
   1492          /* register is non-empty */
   1493          if (moveRegs)
   1494             convert_f80le_to_f64le( &x87->reg[10*stno],
   1495                                     (UChar*)&vexRegs[preg] );
   1496          vexTags[preg] = 1;
   1497       }
   1498    }
   1499 
   1500    /* stack pointer */
   1501    vex_state->guest_FTOP = ftop;
   1502 
   1503    /* status word */
   1504    vex_state->guest_FC3210 = c3210;
   1505 
   1506    /* handle the control word, setting FPROUND and detecting any
   1507       emulation warnings. */
   1508    pair    = amd64g_check_fldcw ( (ULong)fpucw );
   1509    fpround = (UInt)pair;
   1510    ew      = (VexEmWarn)(pair >> 32);
   1511 
   1512    vex_state->guest_FPROUND = fpround & 3;
   1513 
   1514    /* emulation warnings --> caller */
   1515    return ew;
   1516 }
   1517 
   1518 
   1519 /* Create an x87 FPU state from the guest state, as close as
   1520    we can approximate it. */
   1521 static
   1522 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
   1523                   /*OUT*/UChar* x87_state )
   1524 {
   1525    Int        i, stno, preg;
   1526    UInt       tagw;
   1527    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1528    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1529    Fpu_State* x87     = (Fpu_State*)x87_state;
   1530    UInt       ftop    = vex_state->guest_FTOP;
   1531    UInt       c3210   = vex_state->guest_FC3210;
   1532 
   1533    for (i = 0; i < 14; i++)
   1534       x87->env[i] = 0;
   1535 
   1536    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
   1537    x87->env[FP_ENV_STAT]
   1538       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
   1539    x87->env[FP_ENV_CTRL]
   1540       = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
   1541 
   1542    /* Dump the register stack in ST order. */
   1543    tagw = 0;
   1544    for (stno = 0; stno < 8; stno++) {
   1545       preg = (stno + ftop) & 7;
   1546       if (vexTags[preg] == 0) {
   1547          /* register is empty */
   1548          tagw |= (3 << (2*preg));
   1549          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1550                                  &x87->reg[10*stno] );
   1551       } else {
   1552          /* register is full. */
   1553          tagw |= (0 << (2*preg));
   1554          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1555                                  &x87->reg[10*stno] );
   1556       }
   1557    }
   1558    x87->env[FP_ENV_TAG] = toUShort(tagw);
   1559 }
   1560 
   1561 
   1562 /* CALLED FROM GENERATED CODE */
   1563 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1564 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
   1565 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
   1566 {
   1567    /* Derived from values obtained from
   1568       vendor_id       : AuthenticAMD
   1569       cpu family      : 15
   1570       model           : 12
   1571       model name      : AMD Athlon(tm) 64 Processor 3200+
   1572       stepping        : 0
   1573       cpu MHz         : 2200.000
   1574       cache size      : 512 KB
   1575    */
   1576    /* Somewhat roundabout, but at least it's simple. */
   1577    Fpu_State tmp;
   1578    UShort*   addrS = (UShort*)addr;
   1579    UChar*    addrC = (UChar*)addr;
   1580    U128*     xmm   = (U128*)(addr + 160);
   1581    UInt      mxcsr;
   1582    UShort    fp_tags;
   1583    UInt      summary_tags;
   1584    Int       r, stno;
   1585    UShort    *srcS, *dstS;
   1586 
   1587    do_get_x87( gst, (UChar*)&tmp );
   1588    mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
   1589 
   1590    /* Now build the proper fxsave image from the x87 image we just
   1591       made. */
   1592 
   1593    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
   1594    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
   1595 
   1596    /* set addrS[2] in an endian-independent way */
   1597    summary_tags = 0;
   1598    fp_tags = tmp.env[FP_ENV_TAG];
   1599    for (r = 0; r < 8; r++) {
   1600       if ( ((fp_tags >> (2*r)) & 3) != 3 )
   1601          summary_tags |= (1 << r);
   1602    }
   1603    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
   1604    addrC[5]  = 0; /* pad */
   1605 
   1606    /* FOP: faulting fpu opcode.  From experimentation, the real CPU
   1607       does not write this field. (?!) */
   1608    addrS[3]  = 0; /* BOGUS */
   1609 
   1610    /* RIP (Last x87 instruction pointer).  From experimentation, the
   1611       real CPU does not write this field. (?!) */
   1612    addrS[4]  = 0; /* BOGUS */
   1613    addrS[5]  = 0; /* BOGUS */
   1614    addrS[6]  = 0; /* BOGUS */
   1615    addrS[7]  = 0; /* BOGUS */
   1616 
   1617    /* RDP (Last x87 data pointer).  From experimentation, the real CPU
   1618       does not write this field. (?!) */
   1619    addrS[8]  = 0; /* BOGUS */
   1620    addrS[9]  = 0; /* BOGUS */
   1621    addrS[10] = 0; /* BOGUS */
   1622    addrS[11] = 0; /* BOGUS */
   1623 
   1624    addrS[12] = toUShort(mxcsr);  /* MXCSR */
   1625    addrS[13] = toUShort(mxcsr >> 16);
   1626 
   1627    addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
   1628    addrS[15] = 0x0000; /* MXCSR mask (hi16) */
   1629 
   1630    /* Copy in the FP registers, in ST order. */
   1631    for (stno = 0; stno < 8; stno++) {
   1632       srcS = (UShort*)(&tmp.reg[10*stno]);
   1633       dstS = (UShort*)(&addrS[16 + 8*stno]);
   1634       dstS[0] = srcS[0];
   1635       dstS[1] = srcS[1];
   1636       dstS[2] = srcS[2];
   1637       dstS[3] = srcS[3];
   1638       dstS[4] = srcS[4];
   1639       dstS[5] = 0;
   1640       dstS[6] = 0;
   1641       dstS[7] = 0;
   1642    }
   1643 
   1644    /* That's the first 160 bytes of the image done.  Now only %xmm0
   1645       .. %xmm15 remain to be copied.  If the host is big-endian, these
   1646       need to be byte-swapped. */
   1647    vassert(host_is_little_endian());
   1648 
   1649 #  define COPY_U128(_dst,_src)                       \
   1650       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1651            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1652       while (0)
   1653 
   1654    COPY_U128( xmm[0],  gst->guest_XMM0 );
   1655    COPY_U128( xmm[1],  gst->guest_XMM1 );
   1656    COPY_U128( xmm[2],  gst->guest_XMM2 );
   1657    COPY_U128( xmm[3],  gst->guest_XMM3 );
   1658    COPY_U128( xmm[4],  gst->guest_XMM4 );
   1659    COPY_U128( xmm[5],  gst->guest_XMM5 );
   1660    COPY_U128( xmm[6],  gst->guest_XMM6 );
   1661    COPY_U128( xmm[7],  gst->guest_XMM7 );
   1662    COPY_U128( xmm[8],  gst->guest_XMM8 );
   1663    COPY_U128( xmm[9],  gst->guest_XMM9 );
   1664    COPY_U128( xmm[10], gst->guest_XMM10 );
   1665    COPY_U128( xmm[11], gst->guest_XMM11 );
   1666    COPY_U128( xmm[12], gst->guest_XMM12 );
   1667    COPY_U128( xmm[13], gst->guest_XMM13 );
   1668    COPY_U128( xmm[14], gst->guest_XMM14 );
   1669    COPY_U128( xmm[15], gst->guest_XMM15 );
   1670 
   1671 #  undef COPY_U128
   1672 }
   1673 
   1674 
   1675 /* CALLED FROM GENERATED CODE */
   1676 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1677 VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr )
   1678 {
   1679    Fpu_State tmp;
   1680    VexEmWarn warnX87 = EmWarn_NONE;
   1681    VexEmWarn warnXMM = EmWarn_NONE;
   1682    UShort*   addrS   = (UShort*)addr;
   1683    UChar*    addrC   = (UChar*)addr;
   1684    U128*     xmm     = (U128*)(addr + 160);
   1685    UShort    fp_tags;
   1686    Int       r, stno, i;
   1687 
   1688    /* Restore %xmm0 .. %xmm15.  If the host is big-endian, these need
   1689       to be byte-swapped. */
   1690    vassert(host_is_little_endian());
   1691 
   1692 #  define COPY_U128(_dst,_src)                       \
   1693       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1694            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1695       while (0)
   1696 
   1697    COPY_U128( gst->guest_XMM0, xmm[0] );
   1698    COPY_U128( gst->guest_XMM1, xmm[1] );
   1699    COPY_U128( gst->guest_XMM2, xmm[2] );
   1700    COPY_U128( gst->guest_XMM3, xmm[3] );
   1701    COPY_U128( gst->guest_XMM4, xmm[4] );
   1702    COPY_U128( gst->guest_XMM5, xmm[5] );
   1703    COPY_U128( gst->guest_XMM6, xmm[6] );
   1704    COPY_U128( gst->guest_XMM7, xmm[7] );
   1705    COPY_U128( gst->guest_XMM8, xmm[8] );
   1706    COPY_U128( gst->guest_XMM9, xmm[9] );
   1707    COPY_U128( gst->guest_XMM10, xmm[10] );
   1708    COPY_U128( gst->guest_XMM11, xmm[11] );
   1709    COPY_U128( gst->guest_XMM12, xmm[12] );
   1710    COPY_U128( gst->guest_XMM13, xmm[13] );
   1711    COPY_U128( gst->guest_XMM14, xmm[14] );
   1712    COPY_U128( gst->guest_XMM15, xmm[15] );
   1713 
   1714 #  undef COPY_U128
   1715 
   1716    /* Copy the x87 registers out of the image, into a temporary
   1717       Fpu_State struct. */
   1718    for (i = 0; i < 14; i++) tmp.env[i] = 0;
   1719    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
   1720    /* fill in tmp.reg[0..7] */
   1721    for (stno = 0; stno < 8; stno++) {
   1722       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
   1723       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
   1724       dstS[0] = srcS[0];
   1725       dstS[1] = srcS[1];
   1726       dstS[2] = srcS[2];
   1727       dstS[3] = srcS[3];
   1728       dstS[4] = srcS[4];
   1729    }
   1730    /* fill in tmp.env[0..13] */
   1731    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
   1732    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
   1733 
   1734    fp_tags = 0;
   1735    for (r = 0; r < 8; r++) {
   1736       if (addrC[4] & (1<<r))
   1737          fp_tags |= (0 << (2*r)); /* EMPTY */
   1738       else
   1739          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
   1740    }
   1741    tmp.env[FP_ENV_TAG] = fp_tags;
   1742 
   1743    /* Now write 'tmp' into the guest state. */
   1744    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
   1745 
   1746    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
   1747                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
   1748      ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
   1749 
   1750      warnXMM = (VexEmWarn)(w64 >> 32);
   1751 
   1752      gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
   1753    }
   1754 
   1755    /* Prefer an X87 emwarn over an XMM one, if both exist. */
   1756    if (warnX87 != EmWarn_NONE)
   1757       return warnX87;
   1758    else
   1759       return warnXMM;
   1760 }
   1761 
   1762 
   1763 /* DIRTY HELPER (writes guest state) */
   1764 /* Initialise the x87 FPU state as per 'finit'. */
   1765 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
   1766 {
   1767    Int i;
   1768    gst->guest_FTOP = 0;
   1769    for (i = 0; i < 8; i++) {
   1770       gst->guest_FPTAG[i] = 0; /* empty */
   1771       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
   1772    }
   1773    gst->guest_FPROUND = (ULong)Irrm_NEAREST;
   1774    gst->guest_FC3210  = 0;
   1775 }
   1776 
   1777 
   1778 /* CALLED FROM GENERATED CODE */
   1779 /* DIRTY HELPER (reads guest memory) */
   1780 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU )
   1781 {
   1782    ULong f64;
   1783    convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
   1784    return f64;
   1785 }
   1786 
   1787 /* CALLED FROM GENERATED CODE */
   1788 /* DIRTY HELPER (writes guest memory) */
   1789 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 )
   1790 {
   1791    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
   1792 }
   1793 
   1794 
   1795 /* CALLED FROM GENERATED CODE */
   1796 /* CLEAN HELPER */
   1797 /* mxcsr[15:0] contains a SSE native format MXCSR value.
   1798    Extract from it the required SSEROUND value and any resulting
   1799    emulation warning, and return (warn << 32) | sseround value.
   1800 */
   1801 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
   1802 {
   1803    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
   1804    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1805    ULong rmode = (mxcsr >> 13) & 3;
   1806 
   1807    /* Detect any required emulation warnings. */
   1808    VexEmWarn ew = EmWarn_NONE;
   1809 
   1810    if ((mxcsr & 0x1F80) != 0x1F80) {
   1811       /* unmasked exceptions! */
   1812       ew = EmWarn_X86_sseExns;
   1813    }
   1814    else
   1815    if (mxcsr & (1<<15)) {
   1816       /* FZ is set */
   1817       ew = EmWarn_X86_fz;
   1818    }
   1819    else
   1820    if (mxcsr & (1<<6)) {
   1821       /* DAZ is set */
   1822       ew = EmWarn_X86_daz;
   1823    }
   1824 
   1825    return (((ULong)ew) << 32) | ((ULong)rmode);
   1826 }
   1827 
   1828 
   1829 /* CALLED FROM GENERATED CODE */
   1830 /* CLEAN HELPER */
   1831 /* Given sseround as an IRRoundingMode value, create a suitable SSE
   1832    native format MXCSR value. */
   1833 ULong amd64g_create_mxcsr ( ULong sseround )
   1834 {
   1835    sseround &= 3;
   1836    return 0x1F80 | (sseround << 13);
   1837 }
   1838 
   1839 
   1840 /* CLEAN HELPER */
   1841 /* fpucw[15:0] contains a x87 native format FPU control word.
   1842    Extract from it the required FPROUND value and any resulting
   1843    emulation warning, and return (warn << 32) | fpround value.
   1844 */
   1845 ULong amd64g_check_fldcw ( ULong fpucw )
   1846 {
   1847    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
   1848    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1849    ULong rmode = (fpucw >> 10) & 3;
   1850 
   1851    /* Detect any required emulation warnings. */
   1852    VexEmWarn ew = EmWarn_NONE;
   1853 
   1854    if ((fpucw & 0x3F) != 0x3F) {
   1855       /* unmasked exceptions! */
   1856       ew = EmWarn_X86_x87exns;
   1857    }
   1858    else
   1859    if (((fpucw >> 8) & 3) != 3) {
   1860       /* unsupported precision */
   1861       ew = EmWarn_X86_x87precision;
   1862    }
   1863 
   1864    return (((ULong)ew) << 32) | ((ULong)rmode);
   1865 }
   1866 
   1867 
   1868 /* CLEAN HELPER */
   1869 /* Given fpround as an IRRoundingMode value, create a suitable x87
   1870    native format FPU control word. */
   1871 ULong amd64g_create_fpucw ( ULong fpround )
   1872 {
   1873    fpround &= 3;
   1874    return 0x037F | (fpround << 10);
   1875 }
   1876 
   1877 
   1878 /* This is used to implement 'fldenv'.
   1879    Reads 28 bytes at x87_state[0 .. 27]. */
   1880 /* CALLED FROM GENERATED CODE */
   1881 /* DIRTY HELPER */
   1882 VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
   1883                                       /*IN*/HWord x87_state)
   1884 {
   1885    Int        stno, preg;
   1886    UInt       tag;
   1887    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1888    Fpu_State* x87     = (Fpu_State*)x87_state;
   1889    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
   1890    UInt       tagw    = x87->env[FP_ENV_TAG];
   1891    UInt       fpucw   = x87->env[FP_ENV_CTRL];
   1892    ULong      c3210   = x87->env[FP_ENV_STAT] & 0x4700;
   1893    VexEmWarn  ew;
   1894    ULong      fpround;
   1895    ULong      pair;
   1896 
   1897    /* Copy tags */
   1898    for (stno = 0; stno < 8; stno++) {
   1899       preg = (stno + ftop) & 7;
   1900       tag = (tagw >> (2*preg)) & 3;
   1901       if (tag == 3) {
   1902          /* register is empty */
   1903          vexTags[preg] = 0;
   1904       } else {
   1905          /* register is non-empty */
   1906          vexTags[preg] = 1;
   1907       }
   1908    }
   1909 
   1910    /* stack pointer */
   1911    vex_state->guest_FTOP = ftop;
   1912 
   1913    /* status word */
   1914    vex_state->guest_FC3210 = c3210;
   1915 
   1916    /* handle the control word, setting FPROUND and detecting any
   1917       emulation warnings. */
   1918    pair    = amd64g_check_fldcw ( (ULong)fpucw );
   1919    fpround = pair & 0xFFFFFFFFULL;
   1920    ew      = (VexEmWarn)(pair >> 32);
   1921 
   1922    vex_state->guest_FPROUND = fpround & 3;
   1923 
   1924    /* emulation warnings --> caller */
   1925    return ew;
   1926 }
   1927 
   1928 
   1929 /* CALLED FROM GENERATED CODE */
   1930 /* DIRTY HELPER */
   1931 /* Create an x87 FPU env from the guest state, as close as we can
   1932    approximate it.  Writes 28 bytes at x87_state[0..27]. */
   1933 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
   1934                                  /*OUT*/HWord x87_state )
   1935 {
   1936    Int        i, stno, preg;
   1937    UInt       tagw;
   1938    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1939    Fpu_State* x87     = (Fpu_State*)x87_state;
   1940    UInt       ftop    = vex_state->guest_FTOP;
   1941    ULong      c3210   = vex_state->guest_FC3210;
   1942 
   1943    for (i = 0; i < 14; i++)
   1944       x87->env[i] = 0;
   1945 
   1946    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
   1947    x87->env[FP_ENV_STAT]
   1948       = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
   1949    x87->env[FP_ENV_CTRL]
   1950       = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
   1951 
   1952    /* Compute the x87 tag word. */
   1953    tagw = 0;
   1954    for (stno = 0; stno < 8; stno++) {
   1955       preg = (stno + ftop) & 7;
   1956       if (vexTags[preg] == 0) {
   1957          /* register is empty */
   1958          tagw |= (3 << (2*preg));
   1959       } else {
   1960          /* register is full. */
   1961          tagw |= (0 << (2*preg));
   1962       }
   1963    }
   1964    x87->env[FP_ENV_TAG] = toUShort(tagw);
   1965 
   1966    /* We don't dump the x87 registers, tho. */
   1967 }
   1968 
   1969 
   1970 /*---------------------------------------------------------------*/
   1971 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
   1972 /*---------------------------------------------------------------*/
   1973 
   1974 /* Claim to be the following CPU, which is probably representative of
   1975    the lowliest (earliest) amd64 offerings.  It can do neither sse3
   1976    nor cx16.
   1977 
   1978    vendor_id       : AuthenticAMD
   1979    cpu family      : 15
   1980    model           : 5
   1981    model name      : AMD Opteron (tm) Processor 848
   1982    stepping        : 10
   1983    cpu MHz         : 1797.682
   1984    cache size      : 1024 KB
   1985    fpu             : yes
   1986    fpu_exception   : yes
   1987    cpuid level     : 1
   1988    wp              : yes
   1989    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   1990                      mtrr pge mca cmov pat pse36 clflush mmx fxsr
   1991                      sse sse2 syscall nx mmxext lm 3dnowext 3dnow
   1992    bogomips        : 3600.62
   1993    TLB size        : 1088 4K pages
   1994    clflush size    : 64
   1995    cache_alignment : 64
   1996    address sizes   : 40 bits physical, 48 bits virtual
   1997    power management: ts fid vid ttp
   1998 */
   1999 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
   2000 {
   2001 #  define SET_ABCD(_a,_b,_c,_d)                \
   2002       do { st->guest_RAX = (ULong)(_a);        \
   2003            st->guest_RBX = (ULong)(_b);        \
   2004            st->guest_RCX = (ULong)(_c);        \
   2005            st->guest_RDX = (ULong)(_d);        \
   2006       } while (0)
   2007 
   2008    switch (0xFFFFFFFF & st->guest_RAX) {
   2009       case 0x00000000:
   2010          SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
   2011          break;
   2012       case 0x00000001:
   2013          SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
   2014          break;
   2015       case 0x80000000:
   2016          SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
   2017          break;
   2018       case 0x80000001:
   2019          SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
   2020          break;
   2021       case 0x80000002:
   2022          SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
   2023          break;
   2024       case 0x80000003:
   2025          SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
   2026          break;
   2027       case 0x80000004:
   2028          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2029          break;
   2030       case 0x80000005:
   2031          SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
   2032          break;
   2033       case 0x80000006:
   2034          SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
   2035          break;
   2036       case 0x80000007:
   2037          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
   2038          break;
   2039       case 0x80000008:
   2040          SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
   2041          break;
   2042       default:
   2043          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2044          break;
   2045    }
   2046 #  undef SET_ABCD
   2047 }
   2048 
   2049 
   2050 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
   2051    capable.
   2052 
   2053    vendor_id       : GenuineIntel
   2054    cpu family      : 6
   2055    model           : 15
   2056    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
   2057    stepping        : 6
   2058    cpu MHz         : 2394.000
   2059    cache size      : 4096 KB
   2060    physical id     : 0
   2061    siblings        : 2
   2062    core id         : 0
   2063    cpu cores       : 2
   2064    fpu             : yes
   2065    fpu_exception   : yes
   2066    cpuid level     : 10
   2067    wp              : yes
   2068    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2069                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2070                      mmx fxsr sse sse2 ss ht tm syscall nx lm
   2071                      constant_tsc pni monitor ds_cpl vmx est tm2
   2072                      cx16 xtpr lahf_lm
   2073    bogomips        : 4798.78
   2074    clflush size    : 64
   2075    cache_alignment : 64
   2076    address sizes   : 36 bits physical, 48 bits virtual
   2077    power management:
   2078 */
   2079 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
   2080 {
   2081 #  define SET_ABCD(_a,_b,_c,_d)                \
   2082       do { st->guest_RAX = (ULong)(_a);        \
   2083            st->guest_RBX = (ULong)(_b);        \
   2084            st->guest_RCX = (ULong)(_c);        \
   2085            st->guest_RDX = (ULong)(_d);        \
   2086       } while (0)
   2087 
   2088    switch (0xFFFFFFFF & st->guest_RAX) {
   2089       case 0x00000000:
   2090          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
   2091          break;
   2092       case 0x00000001:
   2093          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
   2094          break;
   2095       case 0x00000002:
   2096          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
   2097          break;
   2098       case 0x00000003:
   2099          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2100          break;
   2101       case 0x00000004: {
   2102          switch (0xFFFFFFFF & st->guest_RCX) {
   2103             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
   2104                                       0x0000003f, 0x00000001); break;
   2105             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
   2106                                       0x0000003f, 0x00000001); break;
   2107             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
   2108                                       0x00000fff, 0x00000001); break;
   2109             default:         SET_ABCD(0x00000000, 0x00000000,
   2110                                       0x00000000, 0x00000000); break;
   2111          }
   2112          break;
   2113       }
   2114       case 0x00000005:
   2115          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
   2116          break;
   2117       case 0x00000006:
   2118          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
   2119          break;
   2120       case 0x00000007:
   2121          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2122          break;
   2123       case 0x00000008:
   2124          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
   2125          break;
   2126       case 0x00000009:
   2127          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2128          break;
   2129       case 0x0000000a:
   2130       unhandled_eax_value:
   2131          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
   2132          break;
   2133       case 0x80000000:
   2134          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2135          break;
   2136       case 0x80000001:
   2137          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
   2138          break;
   2139       case 0x80000002:
   2140          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2141          break;
   2142       case 0x80000003:
   2143          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
   2144          break;
   2145       case 0x80000004:
   2146          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
   2147          break;
   2148       case 0x80000005:
   2149          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2150          break;
   2151       case 0x80000006:
   2152          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
   2153          break;
   2154       case 0x80000007:
   2155          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2156          break;
   2157       case 0x80000008:
   2158          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2159          break;
   2160       default:
   2161          goto unhandled_eax_value;
   2162    }
   2163 #  undef SET_ABCD
   2164 }
   2165 
   2166 
   2167 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
   2168    capable.
   2169 
   2170    vendor_id       : GenuineIntel
   2171    cpu family      : 6
   2172    model           : 37
   2173    model name      : Intel(R) Core(TM) i5 CPU         670  @ 3.47GHz
   2174    stepping        : 2
   2175    cpu MHz         : 3334.000
   2176    cache size      : 4096 KB
   2177    physical id     : 0
   2178    siblings        : 4
   2179    core id         : 0
   2180    cpu cores       : 2
   2181    apicid          : 0
   2182    initial apicid  : 0
   2183    fpu             : yes
   2184    fpu_exception   : yes
   2185    cpuid level     : 11
   2186    wp              : yes
   2187    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2188                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2189                      mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
   2190                      lm constant_tsc arch_perfmon pebs bts rep_good
   2191                      xtopology nonstop_tsc aperfmperf pni pclmulqdq
   2192                      dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
   2193                      xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
   2194                      arat tpr_shadow vnmi flexpriority ept vpid
   2195                      MINUS aes (see below)
   2196    bogomips        : 6957.57
   2197    clflush size    : 64
   2198    cache_alignment : 64
   2199    address sizes   : 36 bits physical, 48 bits virtual
   2200    power management:
   2201 */
   2202 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
   2203 {
   2204 #  define SET_ABCD(_a,_b,_c,_d)                \
   2205       do { st->guest_RAX = (ULong)(_a);        \
   2206            st->guest_RBX = (ULong)(_b);        \
   2207            st->guest_RCX = (ULong)(_c);        \
   2208            st->guest_RDX = (ULong)(_d);        \
   2209       } while (0)
   2210 
   2211    UInt old_eax = (UInt)st->guest_RAX;
   2212    UInt old_ecx = (UInt)st->guest_RCX;
   2213 
   2214    switch (old_eax) {
   2215       case 0x00000000:
   2216          SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
   2217          break;
   2218       case 0x00000001:
   2219          // & ~(1<<25): don't claim to support AES insns.  See
   2220          // bug 249991.
   2221          SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25),
   2222                                           0xbfebfbff);
   2223          break;
   2224       case 0x00000002:
   2225          SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
   2226          break;
   2227       case 0x00000003:
   2228          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2229          break;
   2230       case 0x00000004:
   2231          switch (old_ecx) {
   2232             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
   2233                                       0x0000003f, 0x00000000); break;
   2234             case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
   2235                                       0x0000007f, 0x00000000); break;
   2236             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
   2237                                       0x000001ff, 0x00000000); break;
   2238             case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
   2239                                       0x00000fff, 0x00000002); break;
   2240             default:         SET_ABCD(0x00000000, 0x00000000,
   2241                                       0x00000000, 0x00000000); break;
   2242          }
   2243          break;
   2244       case 0x00000005:
   2245          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
   2246          break;
   2247       case 0x00000006:
   2248          SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
   2249          break;
   2250       case 0x00000007:
   2251          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2252          break;
   2253       case 0x00000008:
   2254          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2255          break;
   2256       case 0x00000009:
   2257          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2258          break;
   2259       case 0x0000000a:
   2260          SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
   2261          break;
   2262       case 0x0000000b:
   2263          switch (old_ecx) {
   2264             case 0x00000000:
   2265                SET_ABCD(0x00000001, 0x00000002,
   2266                         0x00000100, 0x00000000); break;
   2267             case 0x00000001:
   2268                SET_ABCD(0x00000004, 0x00000004,
   2269                         0x00000201, 0x00000000); break;
   2270             default:
   2271                SET_ABCD(0x00000000, 0x00000000,
   2272                         old_ecx,    0x00000000); break;
   2273          }
   2274          break;
   2275       case 0x0000000c:
   2276          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
   2277          break;
   2278       case 0x0000000d:
   2279          switch (old_ecx) {
   2280             case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
   2281                                       0x00000100, 0x00000000); break;
   2282             case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
   2283                                       0x00000201, 0x00000000); break;
   2284             default:         SET_ABCD(0x00000000, 0x00000000,
   2285                                       old_ecx,    0x00000000); break;
   2286          }
   2287          break;
   2288       case 0x80000000:
   2289          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2290          break;
   2291       case 0x80000001:
   2292          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
   2293          break;
   2294       case 0x80000002:
   2295          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2296          break;
   2297       case 0x80000003:
   2298          SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
   2299          break;
   2300       case 0x80000004:
   2301          SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
   2302          break;
   2303       case 0x80000005:
   2304          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2305          break;
   2306       case 0x80000006:
   2307          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
   2308          break;
   2309       case 0x80000007:
   2310          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
   2311          break;
   2312       case 0x80000008:
   2313          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2314          break;
   2315       default:
   2316          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
   2317          break;
   2318    }
   2319 #  undef SET_ABCD
   2320 }
   2321 
   2322 
   2323 ULong amd64g_calculate_RCR ( ULong arg,
   2324                              ULong rot_amt,
   2325                              ULong rflags_in,
   2326                              Long  szIN )
   2327 {
   2328    Bool  wantRflags = toBool(szIN < 0);
   2329    ULong sz         = wantRflags ? (-szIN) : szIN;
   2330    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
   2331    ULong cf=0, of=0, tempcf;
   2332 
   2333    switch (sz) {
   2334       case 8:
   2335          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2336          of        = ((arg >> 63) ^ cf) & 1;
   2337          while (tempCOUNT > 0) {
   2338             tempcf = arg & 1;
   2339             arg    = (arg >> 1) | (cf << 63);
   2340             cf     = tempcf;
   2341             tempCOUNT--;
   2342          }
   2343          break;
   2344       case 4:
   2345          while (tempCOUNT >= 33) tempCOUNT -= 33;
   2346          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2347          of        = ((arg >> 31) ^ cf) & 1;
   2348          while (tempCOUNT > 0) {
   2349             tempcf = arg & 1;
   2350             arg    = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
   2351             cf     = tempcf;
   2352             tempCOUNT--;
   2353          }
   2354          break;
   2355       case 2:
   2356          while (tempCOUNT >= 17) tempCOUNT -= 17;
   2357          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2358          of        = ((arg >> 15) ^ cf) & 1;
   2359          while (tempCOUNT > 0) {
   2360             tempcf = arg & 1;
   2361             arg    = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
   2362             cf     = tempcf;
   2363             tempCOUNT--;
   2364          }
   2365          break;
   2366       case 1:
   2367          while (tempCOUNT >= 9) tempCOUNT -= 9;
   2368          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2369          of        = ((arg >> 7) ^ cf) & 1;
   2370          while (tempCOUNT > 0) {
   2371             tempcf = arg & 1;
   2372             arg    = ((arg >> 1) & 0x7FULL) | (cf << 7);
   2373             cf     = tempcf;
   2374             tempCOUNT--;
   2375          }
   2376          break;
   2377       default:
   2378          vpanic("calculate_RCR(amd64g): invalid size");
   2379    }
   2380 
   2381    cf &= 1;
   2382    of &= 1;
   2383    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
   2384    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
   2385 
   2386    /* caller can ask to have back either the resulting flags or
   2387       resulting value, but not both */
   2388    return wantRflags ? rflags_in : arg;
   2389 }
   2390 
   2391 ULong amd64g_calculate_RCL ( ULong arg,
   2392                              ULong rot_amt,
   2393                              ULong rflags_in,
   2394                              Long  szIN )
   2395 {
   2396    Bool  wantRflags = toBool(szIN < 0);
   2397    ULong sz         = wantRflags ? (-szIN) : szIN;
   2398    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
   2399    ULong cf=0, of=0, tempcf;
   2400 
   2401    switch (sz) {
   2402       case 8:
   2403          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2404          while (tempCOUNT > 0) {
   2405             tempcf = (arg >> 63) & 1;
   2406             arg    = (arg << 1) | (cf & 1);
   2407             cf     = tempcf;
   2408             tempCOUNT--;
   2409          }
   2410          of = ((arg >> 63) ^ cf) & 1;
   2411          break;
   2412       case 4:
   2413          while (tempCOUNT >= 33) tempCOUNT -= 33;
   2414          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2415          while (tempCOUNT > 0) {
   2416             tempcf = (arg >> 31) & 1;
   2417             arg    = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
   2418             cf     = tempcf;
   2419             tempCOUNT--;
   2420          }
   2421          of = ((arg >> 31) ^ cf) & 1;
   2422          break;
   2423       case 2:
   2424          while (tempCOUNT >= 17) tempCOUNT -= 17;
   2425          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2426          while (tempCOUNT > 0) {
   2427             tempcf = (arg >> 15) & 1;
   2428             arg    = 0xFFFFULL & ((arg << 1) | (cf & 1));
   2429             cf     = tempcf;
   2430             tempCOUNT--;
   2431          }
   2432          of = ((arg >> 15) ^ cf) & 1;
   2433          break;
   2434       case 1:
   2435          while (tempCOUNT >= 9) tempCOUNT -= 9;
   2436          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2437          while (tempCOUNT > 0) {
   2438             tempcf = (arg >> 7) & 1;
   2439             arg    = 0xFFULL & ((arg << 1) | (cf & 1));
   2440             cf     = tempcf;
   2441             tempCOUNT--;
   2442          }
   2443          of = ((arg >> 7) ^ cf) & 1;
   2444          break;
   2445       default:
   2446          vpanic("calculate_RCL(amd64g): invalid size");
   2447    }
   2448 
   2449    cf &= 1;
   2450    of &= 1;
   2451    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
   2452    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
   2453 
   2454    return wantRflags ? rflags_in : arg;
   2455 }
   2456 
   2457 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
   2458  * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
   2459  */
   2460 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
   2461 {
   2462     ULong hi, lo, tmp, A[16];
   2463 
   2464    A[0] = 0;            A[1] = a;
   2465    A[2] = A[1] << 1;    A[3] = A[2] ^ a;
   2466    A[4] = A[2] << 1;    A[5] = A[4] ^ a;
   2467    A[6] = A[3] << 1;    A[7] = A[6] ^ a;
   2468    A[8] = A[4] << 1;    A[9] = A[8] ^ a;
   2469    A[10] = A[5] << 1;   A[11] = A[10] ^ a;
   2470    A[12] = A[6] << 1;   A[13] = A[12] ^ a;
   2471    A[14] = A[7] << 1;   A[15] = A[14] ^ a;
   2472 
   2473    lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
   2474    hi = lo >> 56;
   2475    lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
   2476    hi = (hi << 8) | (lo >> 56);
   2477    lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
   2478    hi = (hi << 8) | (lo >> 56);
   2479    lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
   2480    hi = (hi << 8) | (lo >> 56);
   2481    lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
   2482    hi = (hi << 8) | (lo >> 56);
   2483    lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
   2484    hi = (hi << 8) | (lo >> 56);
   2485    lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
   2486    hi = (hi << 8) | (lo >> 56);
   2487    lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
   2488 
   2489    ULong m0 = -1;
   2490    m0 /= 255;
   2491    tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
   2492    tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
   2493    tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
   2494    tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
   2495    tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
   2496    tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
   2497    tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
   2498 
   2499    return which ? hi : lo;
   2500 }
   2501 
   2502 
   2503 /* CALLED FROM GENERATED CODE */
   2504 /* DIRTY HELPER (non-referentially-transparent) */
   2505 /* Horrible hack.  On non-amd64 platforms, return 1. */
   2506 ULong amd64g_dirtyhelper_RDTSC ( void )
   2507 {
   2508 #  if defined(__x86_64__)
   2509    UInt  eax, edx;
   2510    __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
   2511    return (((ULong)edx) << 32) | ((ULong)eax);
   2512 #  else
   2513    return 1ULL;
   2514 #  endif
   2515 }
   2516 
   2517 
   2518 /* CALLED FROM GENERATED CODE */
   2519 /* DIRTY HELPER (non-referentially-transparent) */
   2520 /* Horrible hack.  On non-amd64 platforms, return 0. */
   2521 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
   2522 {
   2523 #  if defined(__x86_64__)
   2524    ULong r = 0;
   2525    portno &= 0xFFFF;
   2526    switch (sz) {
   2527       case 4:
   2528          __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
   2529                               : "=a" (r) : "Nd" (portno));
   2530 	 break;
   2531       case 2:
   2532          __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
   2533                               : "=a" (r) : "Nd" (portno));
   2534 	 break;
   2535       case 1:
   2536          __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
   2537                               : "=a" (r) : "Nd" (portno));
   2538 	 break;
   2539       default:
   2540          break; /* note: no 64-bit version of insn exists */
   2541    }
   2542    return r;
   2543 #  else
   2544    return 0;
   2545 #  endif
   2546 }
   2547 
   2548 
   2549 /* CALLED FROM GENERATED CODE */
   2550 /* DIRTY HELPER (non-referentially-transparent) */
   2551 /* Horrible hack.  On non-amd64 platforms, do nothing. */
   2552 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
   2553 {
   2554 #  if defined(__x86_64__)
   2555    portno &= 0xFFFF;
   2556    switch (sz) {
   2557       case 4:
   2558          __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
   2559                               : : "a" (data), "Nd" (portno));
   2560 	 break;
   2561       case 2:
   2562          __asm__ __volatile__("outw %w0, %w1"
   2563                               : : "a" (data), "Nd" (portno));
   2564 	 break;
   2565       case 1:
   2566          __asm__ __volatile__("outb %b0, %w1"
   2567                               : : "a" (data), "Nd" (portno));
   2568 	 break;
   2569       default:
   2570          break; /* note: no 64-bit version of insn exists */
   2571    }
   2572 #  else
   2573    /* do nothing */
   2574 #  endif
   2575 }
   2576 
   2577 /* CALLED FROM GENERATED CODE */
   2578 /* DIRTY HELPER (non-referentially-transparent) */
   2579 /* Horrible hack.  On non-amd64 platforms, do nothing. */
   2580 /* op = 0: call the native SGDT instruction.
   2581    op = 1: call the native SIDT instruction.
   2582 */
   2583 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
   2584 #  if defined(__x86_64__)
   2585    switch (op) {
   2586       case 0:
   2587          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
   2588          break;
   2589       case 1:
   2590          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
   2591          break;
   2592       default:
   2593          vpanic("amd64g_dirtyhelper_SxDT");
   2594    }
   2595 #  else
   2596    /* do nothing */
   2597    UChar* p = (UChar*)address;
   2598    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
   2599    p[6] = p[7] = p[8] = p[9] = 0;
   2600 #  endif
   2601 }
   2602 
   2603 /*---------------------------------------------------------------*/
   2604 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
   2605 /*---------------------------------------------------------------*/
   2606 
   2607 static inline UChar abdU8 ( UChar xx, UChar yy ) {
   2608    return toUChar(xx>yy ? xx-yy : yy-xx);
   2609 }
   2610 
   2611 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
   2612    return (((ULong)w1) << 32) | ((ULong)w0);
   2613 }
   2614 
   2615 static inline UShort sel16x4_3 ( ULong w64 ) {
   2616    UInt hi32 = toUInt(w64 >> 32);
   2617    return toUShort(hi32 >> 16);
   2618 }
   2619 static inline UShort sel16x4_2 ( ULong w64 ) {
   2620    UInt hi32 = toUInt(w64 >> 32);
   2621    return toUShort(hi32);
   2622 }
   2623 static inline UShort sel16x4_1 ( ULong w64 ) {
   2624    UInt lo32 = toUInt(w64);
   2625    return toUShort(lo32 >> 16);
   2626 }
   2627 static inline UShort sel16x4_0 ( ULong w64 ) {
   2628    UInt lo32 = toUInt(w64);
   2629    return toUShort(lo32);
   2630 }
   2631 
   2632 static inline UChar sel8x8_7 ( ULong w64 ) {
   2633    UInt hi32 = toUInt(w64 >> 32);
   2634    return toUChar(hi32 >> 24);
   2635 }
   2636 static inline UChar sel8x8_6 ( ULong w64 ) {
   2637    UInt hi32 = toUInt(w64 >> 32);
   2638    return toUChar(hi32 >> 16);
   2639 }
   2640 static inline UChar sel8x8_5 ( ULong w64 ) {
   2641    UInt hi32 = toUInt(w64 >> 32);
   2642    return toUChar(hi32 >> 8);
   2643 }
   2644 static inline UChar sel8x8_4 ( ULong w64 ) {
   2645    UInt hi32 = toUInt(w64 >> 32);
   2646    return toUChar(hi32 >> 0);
   2647 }
   2648 static inline UChar sel8x8_3 ( ULong w64 ) {
   2649    UInt lo32 = toUInt(w64);
   2650    return toUChar(lo32 >> 24);
   2651 }
   2652 static inline UChar sel8x8_2 ( ULong w64 ) {
   2653    UInt lo32 = toUInt(w64);
   2654    return toUChar(lo32 >> 16);
   2655 }
   2656 static inline UChar sel8x8_1 ( ULong w64 ) {
   2657    UInt lo32 = toUInt(w64);
   2658    return toUChar(lo32 >> 8);
   2659 }
   2660 static inline UChar sel8x8_0 ( ULong w64 ) {
   2661    UInt lo32 = toUInt(w64);
   2662    return toUChar(lo32 >> 0);
   2663 }
   2664 
   2665 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2666 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
   2667 {
   2668    return
   2669       mk32x2(
   2670          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
   2671             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
   2672          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
   2673             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
   2674       );
   2675 }
   2676 
   2677 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2678 ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
   2679 {
   2680    ULong r = 0;
   2681    if (xx & (1ULL << (64-1))) r |= (1<<7);
   2682    if (xx & (1ULL << (56-1))) r |= (1<<6);
   2683    if (xx & (1ULL << (48-1))) r |= (1<<5);
   2684    if (xx & (1ULL << (40-1))) r |= (1<<4);
   2685    if (xx & (1ULL << (32-1))) r |= (1<<3);
   2686    if (xx & (1ULL << (24-1))) r |= (1<<2);
   2687    if (xx & (1ULL << (16-1))) r |= (1<<1);
   2688    if (xx & (1ULL << ( 8-1))) r |= (1<<0);
   2689    return r;
   2690 }
   2691 
   2692 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2693 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
   2694 {
   2695    UInt t = 0;
   2696    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
   2697    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
   2698    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
   2699    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
   2700    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
   2701    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
   2702    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
   2703    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
   2704    t &= 0xFFFF;
   2705    return (ULong)t;
   2706 }
   2707 
   2708 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2709 ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
   2710 {
   2711    ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi );
   2712    ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo );
   2713    return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
   2714 }
   2715 
   2716 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2717 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
   2718 {
   2719    UInt  i;
   2720    ULong crc = (b & 0xFFULL) ^ crcIn;
   2721    for (i = 0; i < 8; i++)
   2722       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
   2723    return crc;
   2724 }
   2725 
   2726 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2727 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
   2728 {
   2729    UInt  i;
   2730    ULong crc = (w & 0xFFFFULL) ^ crcIn;
   2731    for (i = 0; i < 16; i++)
   2732       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
   2733    return crc;
   2734 }
   2735 
   2736 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2737 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
   2738 {
   2739    UInt i;
   2740    ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
   2741    for (i = 0; i < 32; i++)
   2742       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
   2743    return crc;
   2744 }
   2745 
   2746 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2747 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
   2748 {
   2749    ULong crc = amd64g_calc_crc32l(crcIn, q);
   2750    return amd64g_calc_crc32l(crc, q >> 32);
   2751 }
   2752 
   2753 
   2754 /*---------------------------------------------------------------*/
   2755 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M}                    ---*/
   2756 /*---------------------------------------------------------------*/
   2757 
   2758 static UInt zmask_from_V128 ( V128* arg )
   2759 {
   2760    UInt i, res = 0;
   2761    for (i = 0; i < 16; i++) {
   2762       res |=  ((arg->w8[i] == 0) ? 1 : 0) << i;
   2763    }
   2764    return res;
   2765 }
   2766 
   2767 /* Helps with PCMP{I,E}STR{I,M}.
   2768 
   2769    CALLED FROM GENERATED CODE: DIRTY HELPER(s).  (But not really,
   2770    actually it could be a clean helper, but for the fact that we can't
   2771    pass by value 2 x V128 to a clean helper, nor have one returned.)
   2772    Reads guest state, writes to guest state for the xSTRM cases, no
   2773    accesses of memory, is a pure function.
   2774 
   2775    opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
   2776    the callee knows which I/E and I/M variant it is dealing with and
   2777    what the specific operation is.  4th byte of opcode is in the range
   2778    0x60 to 0x63:
   2779        istri  66 0F 3A 63
   2780        istrm  66 0F 3A 62
   2781        estri  66 0F 3A 61
   2782        estrm  66 0F 3A 60
   2783 
   2784    gstOffL and gstOffR are the guest state offsets for the two XMM
   2785    register inputs.  We never have to deal with the memory case since
   2786    that is handled by pre-loading the relevant value into the fake
   2787    XMM16 register.
   2788 
   2789    For ESTRx variants, edxIN and eaxIN hold the values of those two
   2790    registers.
   2791 
   2792    In all cases, the bottom 16 bits of the result contain the new
   2793    OSZACP %rflags values.  For xSTRI variants, bits[31:16] of the
   2794    result hold the new %ecx value.  For xSTRM variants, the helper
   2795    writes the result directly to the guest XMM0.
   2796 
   2797    Declarable side effects: in all cases, reads guest state at
   2798    [gstOffL, +16) and [gstOffR, +16).  For xSTRM variants, also writes
   2799    guest_XMM0.
   2800 
   2801    Is expected to be called with opc_and_imm combinations which have
   2802    actually been validated, and will assert if otherwise.  The front
   2803    end should ensure we're only called with verified values.
   2804 */
   2805 ULong amd64g_dirtyhelper_PCMPxSTRx (
   2806           VexGuestAMD64State* gst,
   2807           HWord opc4_and_imm,
   2808           HWord gstOffL, HWord gstOffR,
   2809           HWord edxIN, HWord eaxIN
   2810        )
   2811 {
   2812    HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
   2813    HWord imm8 = opc4_and_imm & 0xFF;
   2814    HWord isISTRx = opc4 & 2;
   2815    HWord isxSTRM = (opc4 & 1) ^ 1;
   2816    vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
   2817    vassert((imm8 & 1) == 0); /* we support byte-size cases only */
   2818 
   2819    // where the args are
   2820    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
   2821    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
   2822 
   2823    /* Create the arg validity masks, either from the vectors
   2824       themselves or from the supplied edx/eax values. */
   2825    // FIXME: this is only right for the 8-bit data cases.
   2826    // At least that is asserted above.
   2827    UInt zmaskL, zmaskR;
   2828    if (isISTRx) {
   2829       zmaskL = zmask_from_V128(argL);
   2830       zmaskR = zmask_from_V128(argR);
   2831    } else {
   2832       Int tmp;
   2833       tmp = edxIN & 0xFFFFFFFF;
   2834       if (tmp < -16) tmp = -16;
   2835       if (tmp > 16)  tmp = 16;
   2836       if (tmp < 0)   tmp = -tmp;
   2837       vassert(tmp >= 0 && tmp <= 16);
   2838       zmaskL = (1 << tmp) & 0xFFFF;
   2839       tmp = eaxIN & 0xFFFFFFFF;
   2840       if (tmp < -16) tmp = -16;
   2841       if (tmp > 16)  tmp = 16;
   2842       if (tmp < 0)   tmp = -tmp;
   2843       vassert(tmp >= 0 && tmp <= 16);
   2844       zmaskR = (1 << tmp) & 0xFFFF;
   2845    }
   2846 
   2847    // temp spot for the resulting flags and vector.
   2848    V128 resV;
   2849    UInt resOSZACP;
   2850 
   2851    // do the meyaath
   2852    Bool ok = compute_PCMPxSTRx (
   2853                 &resV, &resOSZACP, argL, argR,
   2854                 zmaskL, zmaskR, imm8, (Bool)isxSTRM
   2855              );
   2856 
   2857    // front end shouldn't pass us any imm8 variants we can't
   2858    // handle.  Hence:
   2859    vassert(ok);
   2860 
   2861    // So, finally we need to get the results back to the caller.
   2862    // In all cases, the new OSZACP value is the lowest 16 of
   2863    // the return value.
   2864    if (isxSTRM) {
   2865       /* gst->guest_XMM0 = resV; */ // gcc don't like that
   2866       gst->guest_XMM0[0] = resV.w32[0];
   2867       gst->guest_XMM0[1] = resV.w32[1];
   2868       gst->guest_XMM0[2] = resV.w32[2];
   2869       gst->guest_XMM0[3] = resV.w32[3];
   2870       return resOSZACP & 0x8D5;
   2871    } else {
   2872       UInt newECX = resV.w32[0] & 0xFFFF;
   2873       return (newECX << 16) | (resOSZACP & 0x8D5);
   2874    }
   2875 }
   2876 
   2877 
   2878 /*---------------------------------------------------------------*/
   2879 /*--- Helpers for dealing with, and describing,               ---*/
   2880 /*--- guest state as a whole.                                 ---*/
   2881 /*---------------------------------------------------------------*/
   2882 
   2883 /* Initialise the entire amd64 guest state. */
   2884 /* VISIBLE TO LIBVEX CLIENT */
   2885 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
   2886 {
   2887    vex_state->guest_RAX = 0;
   2888    vex_state->guest_RCX = 0;
   2889    vex_state->guest_RDX = 0;
   2890    vex_state->guest_RBX = 0;
   2891    vex_state->guest_RSP = 0;
   2892    vex_state->guest_RBP = 0;
   2893    vex_state->guest_RSI = 0;
   2894    vex_state->guest_RDI = 0;
   2895    vex_state->guest_R8  = 0;
   2896    vex_state->guest_R9  = 0;
   2897    vex_state->guest_R10 = 0;
   2898    vex_state->guest_R11 = 0;
   2899    vex_state->guest_R12 = 0;
   2900    vex_state->guest_R13 = 0;
   2901    vex_state->guest_R14 = 0;
   2902    vex_state->guest_R15 = 0;
   2903 
   2904    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
   2905    vex_state->guest_CC_DEP1 = 0;
   2906    vex_state->guest_CC_DEP2 = 0;
   2907    vex_state->guest_CC_NDEP = 0;
   2908 
   2909    vex_state->guest_DFLAG   = 1; /* forwards */
   2910    vex_state->guest_IDFLAG  = 0;
   2911 
   2912    /* HACK: represent the offset associated with %fs==0. This
   2913       assumes that %fs is only ever zero. */
   2914    vex_state->guest_FS_ZERO = 0;
   2915 
   2916    vex_state->guest_RIP = 0;
   2917 
   2918    /* Initialise the simulated FPU */
   2919    amd64g_dirtyhelper_FINIT( vex_state );
   2920 
   2921    /* Initialise the SSE state. */
   2922 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
   2923 
   2924    vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
   2925    SSEZERO(vex_state->guest_XMM0);
   2926    SSEZERO(vex_state->guest_XMM1);
   2927    SSEZERO(vex_state->guest_XMM2);
   2928    SSEZERO(vex_state->guest_XMM3);
   2929    SSEZERO(vex_state->guest_XMM4);
   2930    SSEZERO(vex_state->guest_XMM5);
   2931    SSEZERO(vex_state->guest_XMM6);
   2932    SSEZERO(vex_state->guest_XMM7);
   2933    SSEZERO(vex_state->guest_XMM8);
   2934    SSEZERO(vex_state->guest_XMM9);
   2935    SSEZERO(vex_state->guest_XMM10);
   2936    SSEZERO(vex_state->guest_XMM11);
   2937    SSEZERO(vex_state->guest_XMM12);
   2938    SSEZERO(vex_state->guest_XMM13);
   2939    SSEZERO(vex_state->guest_XMM14);
   2940    SSEZERO(vex_state->guest_XMM15);
   2941    SSEZERO(vex_state->guest_XMM16);
   2942 
   2943 #  undef SSEZERO
   2944 
   2945    vex_state->guest_EMWARN = EmWarn_NONE;
   2946 
   2947    /* These should not ever be either read or written, but we
   2948       initialise them anyway. */
   2949    vex_state->guest_TISTART = 0;
   2950    vex_state->guest_TILEN   = 0;
   2951 
   2952    vex_state->guest_NRADDR   = 0;
   2953    vex_state->guest_SC_CLASS = 0;
   2954    vex_state->guest_GS_0x60  = 0;
   2955 
   2956    vex_state->guest_IP_AT_SYSCALL = 0;
   2957    /* vex_state->padding = 0; */
   2958 }
   2959 
   2960 
   2961 /* Figure out if any part of the guest state contained in minoff
   2962    .. maxoff requires precise memory exceptions.  If in doubt return
   2963    True (but this is generates significantly slower code).
   2964 
   2965    By default we enforce precise exns for guest %RSP, %RBP and %RIP
   2966    only.  These are the minimum needed to extract correct stack
   2967    backtraces from amd64 code.
   2968 */
   2969 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
   2970                                                    Int maxoff)
   2971 {
   2972    Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
   2973    Int rbp_max = rbp_min + 8 - 1;
   2974    Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
   2975    Int rsp_max = rsp_min + 8 - 1;
   2976    Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
   2977    Int rip_max = rip_min + 8 - 1;
   2978 
   2979    if (maxoff < rbp_min || minoff > rbp_max) {
   2980       /* no overlap with rbp */
   2981    } else {
   2982       return True;
   2983    }
   2984 
   2985    if (maxoff < rsp_min || minoff > rsp_max) {
   2986       /* no overlap with rsp */
   2987    } else {
   2988       return True;
   2989    }
   2990 
   2991    if (maxoff < rip_min || minoff > rip_max) {
   2992       /* no overlap with eip */
   2993    } else {
   2994       return True;
   2995    }
   2996 
   2997    return False;
   2998 }
   2999 
   3000 
   3001 #define ALWAYSDEFD(field)                             \
   3002     { offsetof(VexGuestAMD64State, field),            \
   3003       (sizeof ((VexGuestAMD64State*)0)->field) }
   3004 
   3005 VexGuestLayout
   3006    amd64guest_layout
   3007       = {
   3008           /* Total size of the guest state, in bytes. */
   3009           .total_sizeB = sizeof(VexGuestAMD64State),
   3010 
   3011           /* Describe the stack pointer. */
   3012           .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
   3013           .sizeof_SP = 8,
   3014 
   3015           /* Describe the frame pointer. */
   3016           .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
   3017           .sizeof_FP = 8,
   3018 
   3019           /* Describe the instruction pointer. */
   3020           .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
   3021           .sizeof_IP = 8,
   3022 
   3023           /* Describe any sections to be regarded by Memcheck as
   3024              'always-defined'. */
   3025           .n_alwaysDefd = 16,
   3026 
   3027           /* flags thunk: OP and NDEP are always defd, whereas DEP1
   3028              and DEP2 have to be tracked.  See detailed comment in
   3029              gdefs.h on meaning of thunk fields. */
   3030           .alwaysDefd
   3031              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
   3032                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
   3033 		 /*  2 */ ALWAYSDEFD(guest_DFLAG),
   3034                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
   3035                  /*  4 */ ALWAYSDEFD(guest_RIP),
   3036                  /*  5 */ ALWAYSDEFD(guest_FS_ZERO),
   3037                  /*  6 */ ALWAYSDEFD(guest_FTOP),
   3038                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
   3039                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
   3040                  /*  9 */ ALWAYSDEFD(guest_FC3210),
   3041                  // /* */ ALWAYSDEFD(guest_CS),
   3042                  // /* */ ALWAYSDEFD(guest_DS),
   3043                  // /* */ ALWAYSDEFD(guest_ES),
   3044                  // /* */ ALWAYSDEFD(guest_FS),
   3045                  // /* */ ALWAYSDEFD(guest_GS),
   3046                  // /* */ ALWAYSDEFD(guest_SS),
   3047                  // /* */ ALWAYSDEFD(guest_LDT),
   3048                  // /* */ ALWAYSDEFD(guest_GDT),
   3049                  /* 10 */ ALWAYSDEFD(guest_EMWARN),
   3050                  /* 11 */ ALWAYSDEFD(guest_SSEROUND),
   3051                  /* 12 */ ALWAYSDEFD(guest_TISTART),
   3052                  /* 13 */ ALWAYSDEFD(guest_TILEN),
   3053                  /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
   3054                  /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
   3055                }
   3056         };
   3057 
   3058 
   3059 /*---------------------------------------------------------------*/
   3060 /*--- end                               guest_amd64_helpers.c ---*/
   3061 /*---------------------------------------------------------------*/
   3062