Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                             guest_amd64_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_emwarn.h"
     38 #include "libvex_guest_amd64.h"
     39 #include "libvex_ir.h"
     40 #include "libvex.h"
     41 
     42 #include "main_util.h"
     43 #include "guest_generic_bb_to_IR.h"
     44 #include "guest_amd64_defs.h"
     45 #include "guest_generic_x87.h"
     46 
     47 
     48 /* This file contains helper functions for amd64 guest code.
     49    Calls to these functions are generated by the back end.
     50    These calls are of course in the host machine code and
     51    this file will be compiled to host machine code, so that
     52    all makes sense.
     53 
     54    Only change the signatures of these helper functions very
     55    carefully.  If you change the signature here, you'll have to change
     56    the parameters passed to it in the IR calls constructed by
     57    guest-amd64/toIR.c.
     58 
     59    The convention used is that all functions called from generated
     60    code are named amd64g_<something>, and any function whose name lacks
     61    that prefix is not called from generated code.  Note that some
     62    LibVEX_* functions can however be called by VEX's client, but that
     63    is not the same as calling them from VEX-generated code.
     64 */
     65 
     66 
     67 /* Set to 1 to get detailed profiling info about use of the flag
     68    machinery. */
     69 #define PROFILE_RFLAGS 0
     70 
     71 
     72 /*---------------------------------------------------------------*/
     73 /*--- %rflags run-time helpers.                               ---*/
     74 /*---------------------------------------------------------------*/
     75 
     76 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
     77    after imulq/mulq. */
     78 
     79 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
     80 {
     81    ULong u0, v0, w0;
     82     Long u1, v1, w1, w2, t;
     83    u0   = u & 0xFFFFFFFFULL;
     84    u1   = u >> 32;
     85    v0   = v & 0xFFFFFFFFULL;
     86    v1   = v >> 32;
     87    w0   = u0 * v0;
     88    t    = u1 * v0 + (w0 >> 32);
     89    w1   = t & 0xFFFFFFFFULL;
     90    w2   = t >> 32;
     91    w1   = u0 * v1 + w1;
     92    *rHi = u1 * v1 + w2 + (w1 >> 32);
     93    *rLo = u * v;
     94 }
     95 
     96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
     97 {
     98    ULong u0, v0, w0;
     99    ULong u1, v1, w1,w2,t;
    100    u0   = u & 0xFFFFFFFFULL;
    101    u1   = u >> 32;
    102    v0   = v & 0xFFFFFFFFULL;
    103    v1   = v >> 32;
    104    w0   = u0 * v0;
    105    t    = u1 * v0 + (w0 >> 32);
    106    w1   = t & 0xFFFFFFFFULL;
    107    w2   = t >> 32;
    108    w1   = u0 * v1 + w1;
    109    *rHi = u1 * v1 + w2 + (w1 >> 32);
    110    *rLo = u * v;
    111 }
    112 
    113 
    114 static const UChar parity_table[256] = {
    115     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    116     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    117     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    118     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    119     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    120     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    121     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    122     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    123     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    124     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    125     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    126     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    127     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    128     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    129     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    130     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    131     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    132     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    133     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    134     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    135     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    136     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    137     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    138     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    139     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    140     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    141     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    142     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    143     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    144     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    145     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
    146     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
    147 };
    148 
    149 /* generalised left-shifter */
    150 static inline Long lshift ( Long x, Int n )
    151 {
    152    if (n >= 0)
    153       return x << n;
    154    else
    155       return x >> (-n);
    156 }
    157 
    158 /* identity on ULong */
    159 static inline ULong idULong ( ULong x )
    160 {
    161    return x;
    162 }
    163 
    164 
    165 #define PREAMBLE(__data_bits)					\
    166    /* const */ ULong DATA_MASK 					\
    167       = __data_bits==8                                          \
    168            ? 0xFFULL 					        \
    169            : (__data_bits==16                                   \
    170                 ? 0xFFFFULL 		                        \
    171                 : (__data_bits==32                              \
    172                      ? 0xFFFFFFFFULL                            \
    173                      : 0xFFFFFFFFFFFFFFFFULL));                 \
    174    /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1);     \
    175    /* const */ ULong CC_DEP1 = cc_dep1_formal;			\
    176    /* const */ ULong CC_DEP2 = cc_dep2_formal;			\
    177    /* const */ ULong CC_NDEP = cc_ndep_formal;			\
    178    /* Four bogus assignments, which hopefully gcc can     */	\
    179    /* optimise away, and which stop it complaining about  */	\
    180    /* unused variables.                                   */	\
    181    SIGN_MASK = SIGN_MASK;					\
    182    DATA_MASK = DATA_MASK;					\
    183    CC_DEP2 = CC_DEP2;						\
    184    CC_NDEP = CC_NDEP;
    185 
    186 
    187 /*-------------------------------------------------------------*/
    188 
    189 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
    190 {								\
    191    PREAMBLE(DATA_BITS);						\
    192    { Long cf, pf, af, zf, sf, of;				\
    193      Long argL, argR, res;					\
    194      argL = CC_DEP1;						\
    195      argR = CC_DEP2;						\
    196      res  = argL + argR;					\
    197      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
    198      pf = parity_table[(UChar)res];				\
    199      af = (res ^ argL ^ argR) & 0x10;				\
    200      zf = ((DATA_UTYPE)res == 0) << 6;				\
    201      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    202      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    203                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
    204      return cf | pf | af | zf | sf | of;			\
    205    }								\
    206 }
    207 
    208 /*-------------------------------------------------------------*/
    209 
    210 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
    211 {								\
    212    PREAMBLE(DATA_BITS);						\
    213    { Long cf, pf, af, zf, sf, of;				\
    214      Long argL, argR, res;					\
    215      argL = CC_DEP1;						\
    216      argR = CC_DEP2;						\
    217      res  = argL - argR;					\
    218      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
    219      pf = parity_table[(UChar)res];				\
    220      af = (res ^ argL ^ argR) & 0x10;				\
    221      zf = ((DATA_UTYPE)res == 0) << 6;				\
    222      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    223      of = lshift((argL ^ argR) & (argL ^ res),	 		\
    224                  12 - DATA_BITS) & AMD64G_CC_MASK_O; 		\
    225      return cf | pf | af | zf | sf | of;			\
    226    }								\
    227 }
    228 
    229 /*-------------------------------------------------------------*/
    230 
    231 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
    232 {								\
    233    PREAMBLE(DATA_BITS);						\
    234    { Long cf, pf, af, zf, sf, of;				\
    235      Long argL, argR, oldC, res;		 		\
    236      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
    237      argL = CC_DEP1;						\
    238      argR = CC_DEP2 ^ oldC;	       				\
    239      res  = (argL + argR) + oldC;				\
    240      if (oldC)							\
    241         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
    242      else							\
    243         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
    244      pf = parity_table[(UChar)res];				\
    245      af = (res ^ argL ^ argR) & 0x10;				\
    246      zf = ((DATA_UTYPE)res == 0) << 6;				\
    247      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    248      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
    249                   12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
    250      return cf | pf | af | zf | sf | of;			\
    251    }								\
    252 }
    253 
    254 /*-------------------------------------------------------------*/
    255 
    256 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
    257 {								\
    258    PREAMBLE(DATA_BITS);						\
    259    { Long cf, pf, af, zf, sf, of;				\
    260      Long argL, argR, oldC, res;	       			\
    261      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
    262      argL = CC_DEP1;						\
    263      argR = CC_DEP2 ^ oldC;	       				\
    264      res  = (argL - argR) - oldC;				\
    265      if (oldC)							\
    266         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
    267      else							\
    268         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
    269      pf = parity_table[(UChar)res];				\
    270      af = (res ^ argL ^ argR) & 0x10;				\
    271      zf = ((DATA_UTYPE)res == 0) << 6;				\
    272      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    273      of = lshift((argL ^ argR) & (argL ^ res), 			\
    274                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
    275      return cf | pf | af | zf | sf | of;			\
    276    }								\
    277 }
    278 
    279 /*-------------------------------------------------------------*/
    280 
    281 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
    282 {								\
    283    PREAMBLE(DATA_BITS);						\
    284    { Long cf, pf, af, zf, sf, of;				\
    285      cf = 0;							\
    286      pf = parity_table[(UChar)CC_DEP1];				\
    287      af = 0;							\
    288      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    289      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    290      of = 0;							\
    291      return cf | pf | af | zf | sf | of;			\
    292    }								\
    293 }
    294 
    295 /*-------------------------------------------------------------*/
    296 
    297 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
    298 {								\
    299    PREAMBLE(DATA_BITS);						\
    300    { Long cf, pf, af, zf, sf, of;				\
    301      Long argL, argR, res;					\
    302      res  = CC_DEP1;						\
    303      argL = res - 1;						\
    304      argR = 1;							\
    305      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
    306      pf = parity_table[(UChar)res];				\
    307      af = (res ^ argL ^ argR) & 0x10;				\
    308      zf = ((DATA_UTYPE)res == 0) << 6;				\
    309      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    310      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
    311      return cf | pf | af | zf | sf | of;			\
    312    }								\
    313 }
    314 
    315 /*-------------------------------------------------------------*/
    316 
    317 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
    318 {								\
    319    PREAMBLE(DATA_BITS);						\
    320    { Long cf, pf, af, zf, sf, of;				\
    321      Long argL, argR, res;					\
    322      res  = CC_DEP1;						\
    323      argL = res + 1;						\
    324      argR = 1;							\
    325      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
    326      pf = parity_table[(UChar)res];				\
    327      af = (res ^ argL ^ argR) & 0x10;				\
    328      zf = ((DATA_UTYPE)res == 0) << 6;				\
    329      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
    330      of = ((res & DATA_MASK) 					\
    331           == ((ULong)SIGN_MASK - 1)) << 11;			\
    332      return cf | pf | af | zf | sf | of;			\
    333    }								\
    334 }
    335 
    336 /*-------------------------------------------------------------*/
    337 
    338 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
    339 {								\
    340    PREAMBLE(DATA_BITS);						\
    341    { Long cf, pf, af, zf, sf, of;				\
    342      cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C;	\
    343      pf = parity_table[(UChar)CC_DEP1];				\
    344      af = 0; /* undefined */					\
    345      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    346      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    347      /* of is defined if shift count == 1 */			\
    348      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
    349           & AMD64G_CC_MASK_O;					\
    350      return cf | pf | af | zf | sf | of;			\
    351    }								\
    352 }
    353 
    354 /*-------------------------------------------------------------*/
    355 
    356 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
    357 {								\
    358    PREAMBLE(DATA_BITS);  					\
    359    { Long cf, pf, af, zf, sf, of;				\
    360      cf = CC_DEP2 & 1;						\
    361      pf = parity_table[(UChar)CC_DEP1];				\
    362      af = 0; /* undefined */					\
    363      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
    364      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
    365      /* of is defined if shift count == 1 */			\
    366      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
    367           & AMD64G_CC_MASK_O;					\
    368      return cf | pf | af | zf | sf | of;			\
    369    }								\
    370 }
    371 
    372 /*-------------------------------------------------------------*/
    373 
    374 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
    375 /* DEP1 = result, NDEP = old flags */
    376 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
    377 {								\
    378    PREAMBLE(DATA_BITS);						\
    379    { Long fl 							\
    380         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
    381           | (AMD64G_CC_MASK_C & CC_DEP1)			\
    382           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1,  		\
    383                                       11-(DATA_BITS-1)) 	\
    384                      ^ lshift(CC_DEP1, 11)));			\
    385      return fl;							\
    386    }								\
    387 }
    388 
    389 /*-------------------------------------------------------------*/
    390 
    391 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
    392 /* DEP1 = result, NDEP = old flags */
    393 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
    394 {								\
    395    PREAMBLE(DATA_BITS);						\
    396    { Long fl 							\
    397         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
    398           | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
    399           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, 		\
    400                                       11-(DATA_BITS-1)) 	\
    401                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
    402      return fl;							\
    403    }								\
    404 }
    405 
    406 /*-------------------------------------------------------------*/
    407 
    408 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
    409                                 DATA_U2TYPE, NARROWto2U)        \
    410 {                                                               \
    411    PREAMBLE(DATA_BITS);                                         \
    412    { Long cf, pf, af, zf, sf, of;                               \
    413      DATA_UTYPE  hi;                                            \
    414      DATA_UTYPE  lo                                             \
    415         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
    416                      * ((DATA_UTYPE)CC_DEP2) );                 \
    417      DATA_U2TYPE rr                                             \
    418         = NARROWto2U(                                           \
    419              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
    420              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
    421      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
    422      cf = (hi != 0);                                            \
    423      pf = parity_table[(UChar)lo];                              \
    424      af = 0; /* undefined */                                    \
    425      zf = (lo == 0) << 6;                                       \
    426      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    427      of = cf << 11;                                             \
    428      return cf | pf | af | zf | sf | of;                        \
    429    }								\
    430 }
    431 
    432 /*-------------------------------------------------------------*/
    433 
    434 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
    435                                 DATA_S2TYPE, NARROWto2S)        \
    436 {                                                               \
    437    PREAMBLE(DATA_BITS);                                         \
    438    { Long cf, pf, af, zf, sf, of;                               \
    439      DATA_STYPE  hi;                                            \
    440      DATA_STYPE  lo                                             \
    441         = NARROWtoS( ((DATA_STYPE)CC_DEP1)                      \
    442                      * ((DATA_STYPE)CC_DEP2) );                 \
    443      DATA_S2TYPE rr                                             \
    444         = NARROWto2S(                                           \
    445              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
    446              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
    447      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
    448      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
    449      pf = parity_table[(UChar)lo];                              \
    450      af = 0; /* undefined */                                    \
    451      zf = (lo == 0) << 6;                                       \
    452      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
    453      of = cf << 11;                                             \
    454      return cf | pf | af | zf | sf | of;                        \
    455    }								\
    456 }
    457 
    458 /*-------------------------------------------------------------*/
    459 
    460 #define ACTIONS_UMULQ                                           \
    461 {                                                               \
    462    PREAMBLE(64);                                                \
    463    { Long cf, pf, af, zf, sf, of;                               \
    464      ULong lo, hi;                                              \
    465      mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo );       \
    466      cf = (hi != 0);                                            \
    467      pf = parity_table[(UChar)lo];                              \
    468      af = 0; /* undefined */                                    \
    469      zf = (lo == 0) << 6;                                       \
    470      sf = lshift(lo, 8 - 64) & 0x80;                            \
    471      of = cf << 11;                                             \
    472      return cf | pf | af | zf | sf | of;                        \
    473    }								\
    474 }
    475 
    476 /*-------------------------------------------------------------*/
    477 
    478 #define ACTIONS_SMULQ                                           \
    479 {                                                               \
    480    PREAMBLE(64);                                                \
    481    { Long cf, pf, af, zf, sf, of;                               \
    482      Long lo, hi;                                               \
    483      mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo );         \
    484      cf = (hi != (lo >>/*s*/ (64-1)));                          \
    485      pf = parity_table[(UChar)lo];                              \
    486      af = 0; /* undefined */                                    \
    487      zf = (lo == 0) << 6;                                       \
    488      sf = lshift(lo, 8 - 64) & 0x80;                            \
    489      of = cf << 11;                                             \
    490      return cf | pf | af | zf | sf | of;                        \
    491    }								\
    492 }
    493 
    494 
    495 #if PROFILE_RFLAGS
    496 
    497 static Bool initted     = False;
    498 
    499 /* C flag, fast route */
    500 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
    501 /* C flag, slow route */
    502 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
    503 /* table for calculate_cond */
    504 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
    505 /* total entry counts for calc_all, calc_c, calc_cond. */
    506 static UInt n_calc_all  = 0;
    507 static UInt n_calc_c    = 0;
    508 static UInt n_calc_cond = 0;
    509 
    510 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
    511 
    512 
    513 static void showCounts ( void )
    514 {
    515    Int op, co;
    516    Char ch;
    517    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
    518               n_calc_all, n_calc_cond, n_calc_c);
    519 
    520    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
    521               "    S   NS    P   NP    L   NL   LE  NLE\n");
    522    vex_printf("     -----------------------------------------------------"
    523               "----------------------------------------\n");
    524    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
    525 
    526       ch = ' ';
    527       if (op > 0 && (op-1) % 4 == 0)
    528          ch = 'B';
    529       if (op > 0 && (op-1) % 4 == 1)
    530          ch = 'W';
    531       if (op > 0 && (op-1) % 4 == 2)
    532          ch = 'L';
    533       if (op > 0 && (op-1) % 4 == 3)
    534          ch = 'Q';
    535 
    536       vex_printf("%2d%c: ", op, ch);
    537       vex_printf("%6u ", tabc_slow[op]);
    538       vex_printf("%6u ", tabc_fast[op]);
    539       for (co = 0; co < 16; co++) {
    540          Int n = tab_cond[op][co];
    541          if (n >= 1000) {
    542             vex_printf(" %3dK", n / 1000);
    543          } else
    544          if (n >= 0) {
    545             vex_printf(" %3d ", n );
    546          } else {
    547             vex_printf("     ");
    548          }
    549       }
    550       vex_printf("\n");
    551    }
    552    vex_printf("\n");
    553 }
    554 
    555 static void initCounts ( void )
    556 {
    557    Int op, co;
    558    initted = True;
    559    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
    560       tabc_fast[op] = tabc_slow[op] = 0;
    561       for (co = 0; co < 16; co++)
    562          tab_cond[op][co] = 0;
    563    }
    564 }
    565 
    566 #endif /* PROFILE_RFLAGS */
    567 
    568 
    569 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    570 /* Calculate all the 6 flags from the supplied thunk parameters.
    571    Worker function, not directly called from generated code. */
    572 static
    573 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
    574                                         ULong cc_dep1_formal,
    575                                         ULong cc_dep2_formal,
    576                                         ULong cc_ndep_formal )
    577 {
    578    switch (cc_op) {
    579       case AMD64G_CC_OP_COPY:
    580          return cc_dep1_formal
    581                 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
    582                    | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
    583 
    584       case AMD64G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
    585       case AMD64G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
    586       case AMD64G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
    587       case AMD64G_CC_OP_ADDQ:   ACTIONS_ADD( 64, ULong  );
    588 
    589       case AMD64G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
    590       case AMD64G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
    591       case AMD64G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
    592       case AMD64G_CC_OP_ADCQ:   ACTIONS_ADC( 64, ULong  );
    593 
    594       case AMD64G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
    595       case AMD64G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
    596       case AMD64G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
    597       case AMD64G_CC_OP_SUBQ:   ACTIONS_SUB( 64, ULong  );
    598 
    599       case AMD64G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
    600       case AMD64G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
    601       case AMD64G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
    602       case AMD64G_CC_OP_SBBQ:   ACTIONS_SBB( 64, ULong  );
    603 
    604       case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
    605       case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
    606       case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
    607       case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong  );
    608 
    609       case AMD64G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
    610       case AMD64G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
    611       case AMD64G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
    612       case AMD64G_CC_OP_INCQ:   ACTIONS_INC( 64, ULong  );
    613 
    614       case AMD64G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
    615       case AMD64G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
    616       case AMD64G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
    617       case AMD64G_CC_OP_DECQ:   ACTIONS_DEC( 64, ULong  );
    618 
    619       case AMD64G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
    620       case AMD64G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
    621       case AMD64G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
    622       case AMD64G_CC_OP_SHLQ:   ACTIONS_SHL( 64, ULong  );
    623 
    624       case AMD64G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
    625       case AMD64G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
    626       case AMD64G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
    627       case AMD64G_CC_OP_SHRQ:   ACTIONS_SHR( 64, ULong  );
    628 
    629       case AMD64G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
    630       case AMD64G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
    631       case AMD64G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
    632       case AMD64G_CC_OP_ROLQ:   ACTIONS_ROL( 64, ULong  );
    633 
    634       case AMD64G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
    635       case AMD64G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
    636       case AMD64G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
    637       case AMD64G_CC_OP_RORQ:   ACTIONS_ROR( 64, ULong  );
    638 
    639       case AMD64G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
    640                                                   UShort, toUShort );
    641       case AMD64G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
    642                                                   UInt,   toUInt );
    643       case AMD64G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
    644                                                   ULong,  idULong );
    645 
    646       case AMD64G_CC_OP_UMULQ:  ACTIONS_UMULQ;
    647 
    648       case AMD64G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
    649                                                   Short,  toUShort );
    650       case AMD64G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
    651                                                   Int,    toUInt   );
    652       case AMD64G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
    653                                                   Long,   idULong );
    654 
    655       case AMD64G_CC_OP_SMULQ:  ACTIONS_SMULQ;
    656 
    657       default:
    658          /* shouldn't really make these calls from generated code */
    659          vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
    660                     "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
    661                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
    662          vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
    663    }
    664 }
    665 
    666 
    667 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    668 /* Calculate all the 6 flags from the supplied thunk parameters. */
    669 ULong amd64g_calculate_rflags_all ( ULong cc_op,
    670                                     ULong cc_dep1,
    671                                     ULong cc_dep2,
    672                                     ULong cc_ndep )
    673 {
    674 #  if PROFILE_RFLAGS
    675    if (!initted) initCounts();
    676    n_calc_all++;
    677    if (SHOW_COUNTS_NOW) showCounts();
    678 #  endif
    679    return
    680       amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
    681 }
    682 
    683 
    684 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    685 /* Calculate just the carry flag from the supplied thunk parameters. */
    686 ULong amd64g_calculate_rflags_c ( ULong cc_op,
    687                                   ULong cc_dep1,
    688                                   ULong cc_dep2,
    689                                   ULong cc_ndep )
    690 {
    691 #  if PROFILE_RFLAGS
    692    if (!initted) initCounts();
    693    n_calc_c++;
    694    tabc_fast[cc_op]++;
    695    if (SHOW_COUNTS_NOW) showCounts();
    696 #  endif
    697 
    698    /* Fast-case some common ones. */
    699    switch (cc_op) {
    700       case AMD64G_CC_OP_COPY:
    701          return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
    702       case AMD64G_CC_OP_LOGICQ:
    703       case AMD64G_CC_OP_LOGICL:
    704       case AMD64G_CC_OP_LOGICW:
    705       case AMD64G_CC_OP_LOGICB:
    706          return 0;
    707 	 //      case AMD64G_CC_OP_SUBL:
    708 	 //         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
    709 	 //                   ? AMD64G_CC_MASK_C : 0;
    710 	 //      case AMD64G_CC_OP_SUBW:
    711 	 //         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
    712 	 //                   ? AMD64G_CC_MASK_C : 0;
    713 	 //      case AMD64G_CC_OP_SUBB:
    714 	 //         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
    715 	 //                   ? AMD64G_CC_MASK_C : 0;
    716 	 //      case AMD64G_CC_OP_INCL:
    717 	 //      case AMD64G_CC_OP_DECL:
    718 	 //         return cc_ndep & AMD64G_CC_MASK_C;
    719       default:
    720          break;
    721    }
    722 
    723 #  if PROFILE_RFLAGS
    724    tabc_fast[cc_op]--;
    725    tabc_slow[cc_op]++;
    726 #  endif
    727 
    728    return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
    729           & AMD64G_CC_MASK_C;
    730 }
    731 
    732 
    733 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    734 /* returns 1 or 0 */
    735 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
    736                                    ULong cc_op,
    737                                    ULong cc_dep1,
    738                                    ULong cc_dep2,
    739                                    ULong cc_ndep )
    740 {
    741    ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
    742                                                   cc_dep2, cc_ndep);
    743    ULong of,sf,zf,cf,pf;
    744    ULong inv = cond & 1;
    745 
    746 #  if PROFILE_RFLAGS
    747    if (!initted) initCounts();
    748    tab_cond[cc_op][cond]++;
    749    n_calc_cond++;
    750    if (SHOW_COUNTS_NOW) showCounts();
    751 #  endif
    752 
    753    switch (cond) {
    754       case AMD64CondNO:
    755       case AMD64CondO: /* OF == 1 */
    756          of = rflags >> AMD64G_CC_SHIFT_O;
    757          return 1 & (inv ^ of);
    758 
    759       case AMD64CondNZ:
    760       case AMD64CondZ: /* ZF == 1 */
    761          zf = rflags >> AMD64G_CC_SHIFT_Z;
    762          return 1 & (inv ^ zf);
    763 
    764       case AMD64CondNB:
    765       case AMD64CondB: /* CF == 1 */
    766          cf = rflags >> AMD64G_CC_SHIFT_C;
    767          return 1 & (inv ^ cf);
    768          break;
    769 
    770       case AMD64CondNBE:
    771       case AMD64CondBE: /* (CF or ZF) == 1 */
    772          cf = rflags >> AMD64G_CC_SHIFT_C;
    773          zf = rflags >> AMD64G_CC_SHIFT_Z;
    774          return 1 & (inv ^ (cf | zf));
    775          break;
    776 
    777       case AMD64CondNS:
    778       case AMD64CondS: /* SF == 1 */
    779          sf = rflags >> AMD64G_CC_SHIFT_S;
    780          return 1 & (inv ^ sf);
    781 
    782       case AMD64CondNP:
    783       case AMD64CondP: /* PF == 1 */
    784          pf = rflags >> AMD64G_CC_SHIFT_P;
    785          return 1 & (inv ^ pf);
    786 
    787       case AMD64CondNL:
    788       case AMD64CondL: /* (SF xor OF) == 1 */
    789          sf = rflags >> AMD64G_CC_SHIFT_S;
    790          of = rflags >> AMD64G_CC_SHIFT_O;
    791          return 1 & (inv ^ (sf ^ of));
    792          break;
    793 
    794       case AMD64CondNLE:
    795       case AMD64CondLE: /* ((SF xor OF) or ZF)  == 1 */
    796          sf = rflags >> AMD64G_CC_SHIFT_S;
    797          of = rflags >> AMD64G_CC_SHIFT_O;
    798          zf = rflags >> AMD64G_CC_SHIFT_Z;
    799          return 1 & (inv ^ ((sf ^ of) | zf));
    800          break;
    801 
    802       default:
    803          /* shouldn't really make these calls from generated code */
    804          vex_printf("amd64g_calculate_condition"
    805                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
    806                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
    807          vpanic("amd64g_calculate_condition");
    808    }
    809 }
    810 
    811 
    812 /* VISIBLE TO LIBVEX CLIENT */
    813 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state )
    814 {
    815    ULong rflags = amd64g_calculate_rflags_all_WRK(
    816                      vex_state->guest_CC_OP,
    817                      vex_state->guest_CC_DEP1,
    818                      vex_state->guest_CC_DEP2,
    819                      vex_state->guest_CC_NDEP
    820                   );
    821    Long dflag = vex_state->guest_DFLAG;
    822    vassert(dflag == 1 || dflag == -1);
    823    if (dflag == -1)
    824       rflags |= (1<<10);
    825    if (vex_state->guest_IDFLAG == 1)
    826       rflags |= (1<<21);
    827    if (vex_state->guest_ACFLAG == 1)
    828       rflags |= (1<<18);
    829 
    830    return rflags;
    831 }
    832 
    833 /* VISIBLE TO LIBVEX CLIENT */
    834 void
    835 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
    836                                /*MOD*/VexGuestAMD64State* vex_state )
    837 {
    838    ULong oszacp = amd64g_calculate_rflags_all_WRK(
    839                      vex_state->guest_CC_OP,
    840                      vex_state->guest_CC_DEP1,
    841                      vex_state->guest_CC_DEP2,
    842                      vex_state->guest_CC_NDEP
    843                   );
    844    if (new_carry_flag & 1) {
    845       oszacp |= AMD64G_CC_MASK_C;
    846    } else {
    847       oszacp &= ~AMD64G_CC_MASK_C;
    848    }
    849    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
    850    vex_state->guest_CC_DEP1 = oszacp;
    851    vex_state->guest_CC_DEP2 = 0;
    852    vex_state->guest_CC_NDEP = 0;
    853 }
    854 
    855 
    856 /*---------------------------------------------------------------*/
    857 /*--- %rflags translation-time function specialisers.         ---*/
    858 /*--- These help iropt specialise calls the above run-time    ---*/
    859 /*--- %rflags functions.                                      ---*/
    860 /*---------------------------------------------------------------*/
    861 
    862 /* Used by the optimiser to try specialisations.  Returns an
    863    equivalent expression, or NULL if none. */
    864 
    865 static Bool isU64 ( IRExpr* e, ULong n )
    866 {
    867    return toBool( e->tag == Iex_Const
    868                   && e->Iex.Const.con->tag == Ico_U64
    869                   && e->Iex.Const.con->Ico.U64 == n );
    870 }
    871 
    872 IRExpr* guest_amd64_spechelper ( HChar* function_name,
    873                                  IRExpr** args,
    874                                  IRStmt** precedingStmts,
    875                                  Int      n_precedingStmts )
    876 {
    877 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
    878 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
    879 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
    880 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
    881 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
    882 
    883    Int i, arity = 0;
    884    for (i = 0; args[i]; i++)
    885       arity++;
    886 #  if 0
    887    vex_printf("spec request:\n");
    888    vex_printf("   %s  ", function_name);
    889    for (i = 0; i < arity; i++) {
    890       vex_printf("  ");
    891       ppIRExpr(args[i]);
    892    }
    893    vex_printf("\n");
    894 #  endif
    895 
    896    /* --------- specialising "amd64g_calculate_condition" --------- */
    897 
    898    if (vex_streq(function_name, "amd64g_calculate_condition")) {
    899       /* specialise calls to above "calculate condition" function */
    900       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
    901       vassert(arity == 5);
    902       cond    = args[0];
    903       cc_op   = args[1];
    904       cc_dep1 = args[2];
    905       cc_dep2 = args[3];
    906 
    907       /*---------------- ADDQ ----------------*/
    908 
    909       if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
    910          /* long long add, then Z --> test (dst+src == 0) */
    911          return unop(Iop_1Uto64,
    912                      binop(Iop_CmpEQ64,
    913                            binop(Iop_Add64, cc_dep1, cc_dep2),
    914                            mkU64(0)));
    915       }
    916 
    917       /*---------------- SUBQ ----------------*/
    918 
    919       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
    920          /* long long sub/cmp, then Z --> test dst==src */
    921          return unop(Iop_1Uto64,
    922                      binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
    923       }
    924       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
    925          /* long long sub/cmp, then NZ --> test dst!=src */
    926          return unop(Iop_1Uto64,
    927                      binop(Iop_CmpNE64,cc_dep1,cc_dep2));
    928       }
    929 
    930       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
    931          /* long long sub/cmp, then L (signed less than)
    932             --> test dst <s src */
    933          return unop(Iop_1Uto64,
    934                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
    935       }
    936 
    937       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
    938          /* long long sub/cmp, then B (unsigned less than)
    939             --> test dst <u src */
    940          return unop(Iop_1Uto64,
    941                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
    942       }
    943       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
    944          /* long long sub/cmp, then NB (unsigned greater than or equal)
    945             --> test src <=u dst */
    946          /* Note, args are opposite way round from the usual */
    947          return unop(Iop_1Uto64,
    948                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
    949       }
    950 
    951       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
    952          /* long long sub/cmp, then BE (unsigned less than or equal)
    953             --> test dst <=u src */
    954          return unop(Iop_1Uto64,
    955                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
    956       }
    957 
    958       /*---------------- SUBL ----------------*/
    959 
    960       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
    961          /* long sub/cmp, then Z --> test dst==src */
    962          return unop(Iop_1Uto64,
    963                      binop(Iop_CmpEQ32,
    964                            unop(Iop_64to32, cc_dep1),
    965                            unop(Iop_64to32, cc_dep2)));
    966       }
    967       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
    968          /* long sub/cmp, then NZ --> test dst!=src */
    969          return unop(Iop_1Uto64,
    970                      binop(Iop_CmpNE32,
    971                            unop(Iop_64to32, cc_dep1),
    972                            unop(Iop_64to32, cc_dep2)));
    973       }
    974 
    975       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
    976          /* long sub/cmp, then L (signed less than)
    977             --> test dst <s src */
    978          return unop(Iop_1Uto64,
    979                      binop(Iop_CmpLT32S,
    980                            unop(Iop_64to32, cc_dep1),
    981                            unop(Iop_64to32, cc_dep2)));
    982       }
    983 
    984       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
    985          /* long sub/cmp, then LE (signed less than or equal)
    986             --> test dst <=s src */
    987          return unop(Iop_1Uto64,
    988                      binop(Iop_CmpLE32S,
    989                            unop(Iop_64to32, cc_dep1),
    990                            unop(Iop_64to32, cc_dep2)));
    991 
    992       }
    993       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
    994          /* long sub/cmp, then NLE (signed greater than)
    995             --> test !(dst <=s src)
    996             --> test (dst >s src)
    997             --> test (src <s dst) */
    998          return unop(Iop_1Uto64,
    999                      binop(Iop_CmpLT32S,
   1000                            unop(Iop_64to32, cc_dep2),
   1001                            unop(Iop_64to32, cc_dep1)));
   1002 
   1003       }
   1004 
   1005       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
   1006          /* long sub/cmp, then BE (unsigned less than or equal)
   1007             --> test dst <=u src */
   1008          return unop(Iop_1Uto64,
   1009                      binop(Iop_CmpLE32U,
   1010                            unop(Iop_64to32, cc_dep1),
   1011                            unop(Iop_64to32, cc_dep2)));
   1012       }
   1013       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
   1014          /* long sub/cmp, then NBE (unsigned greater than)
   1015             --> test src <u dst */
   1016          /* Note, args are opposite way round from the usual */
   1017          return unop(Iop_1Uto64,
   1018                      binop(Iop_CmpLT32U,
   1019                            unop(Iop_64to32, cc_dep2),
   1020                            unop(Iop_64to32, cc_dep1)));
   1021       }
   1022 
   1023       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
   1024          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
   1025          return unop(Iop_1Uto64,
   1026                      binop(Iop_CmpLT32S,
   1027                            binop(Iop_Sub32,
   1028                                  unop(Iop_64to32, cc_dep1),
   1029                                  unop(Iop_64to32, cc_dep2)),
   1030                            mkU32(0)));
   1031       }
   1032 
   1033       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
   1034          /* long sub/cmp, then B (unsigned less than)
   1035             --> test dst <u src */
   1036          return unop(Iop_1Uto64,
   1037                      binop(Iop_CmpLT32U,
   1038                            unop(Iop_64to32, cc_dep1),
   1039                            unop(Iop_64to32, cc_dep2)));
   1040       }
   1041 
   1042       /*---------------- SUBW ----------------*/
   1043 
   1044       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
   1045          /* word sub/cmp, then Z --> test dst==src */
   1046          return unop(Iop_1Uto64,
   1047                      binop(Iop_CmpEQ16,
   1048                            unop(Iop_64to16,cc_dep1),
   1049                            unop(Iop_64to16,cc_dep2)));
   1050       }
   1051       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
   1052          /* word sub/cmp, then NZ --> test dst!=src */
   1053          return unop(Iop_1Uto64,
   1054                      binop(Iop_CmpNE16,
   1055                            unop(Iop_64to16,cc_dep1),
   1056                            unop(Iop_64to16,cc_dep2)));
   1057       }
   1058 
   1059       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
   1060          /* word sub/cmp, then LE (signed less than or equal)
   1061             --> test dst <=s src */
   1062          return unop(Iop_1Uto64,
   1063                      binop(Iop_CmpLE64S,
   1064                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
   1065                            binop(Iop_Shl64,cc_dep2,mkU8(48))));
   1066 
   1067       }
   1068 
   1069       /*---------------- SUBB ----------------*/
   1070 
   1071       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
   1072          /* byte sub/cmp, then Z --> test dst==src */
   1073          return unop(Iop_1Uto64,
   1074                      binop(Iop_CmpEQ8,
   1075                            unop(Iop_64to8,cc_dep1),
   1076                            unop(Iop_64to8,cc_dep2)));
   1077       }
   1078       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
   1079          /* byte sub/cmp, then NZ --> test dst!=src */
   1080          return unop(Iop_1Uto64,
   1081                      binop(Iop_CmpNE8,
   1082                            unop(Iop_64to8,cc_dep1),
   1083                            unop(Iop_64to8,cc_dep2)));
   1084       }
   1085 
   1086       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
   1087          /* byte sub/cmp, then BE (unsigned less than or equal)
   1088             --> test dst <=u src */
   1089          return unop(Iop_1Uto64,
   1090                      binop(Iop_CmpLE64U,
   1091                            binop(Iop_And64, cc_dep1, mkU64(0xFF)),
   1092                            binop(Iop_And64, cc_dep2, mkU64(0xFF))));
   1093       }
   1094 
   1095       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
   1096                                           && isU64(cc_dep2, 0)) {
   1097          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
   1098                                          --> test dst <s 0
   1099                                          --> (ULong)dst[7]
   1100             This is yet another scheme by which gcc figures out if the
   1101             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
   1102          /* Note: isU64(cc_dep2, 0) is correct, even though this is
   1103             for an 8-bit comparison, since the args to the helper
   1104             function are always U64s. */
   1105          return binop(Iop_And64,
   1106                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
   1107                       mkU64(1));
   1108       }
   1109       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
   1110                                           && isU64(cc_dep2, 0)) {
   1111          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
   1112                                           --> test !(dst <s 0)
   1113                                           --> (ULong) !dst[7]
   1114          */
   1115          return binop(Iop_Xor64,
   1116                       binop(Iop_And64,
   1117                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
   1118                             mkU64(1)),
   1119                       mkU64(1));
   1120       }
   1121 
   1122       /*---------------- LOGICQ ----------------*/
   1123 
   1124       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
   1125          /* long long and/or/xor, then Z --> test dst==0 */
   1126          return unop(Iop_1Uto64,
   1127                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
   1128       }
   1129       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
   1130          /* long long and/or/xor, then NZ --> test dst!=0 */
   1131          return unop(Iop_1Uto64,
   1132                      binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
   1133       }
   1134 
   1135       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
   1136          /* long long and/or/xor, then L
   1137             LOGIC sets SF and ZF according to the
   1138             result and makes OF be zero.  L computes SF ^ OF, but
   1139             OF is zero, so this reduces to SF -- which will be 1 iff
   1140             the result is < signed 0.  Hence ...
   1141          */
   1142          return unop(Iop_1Uto64,
   1143                      binop(Iop_CmpLT64S,
   1144                            cc_dep1,
   1145                            mkU64(0)));
   1146       }
   1147 
   1148       /*---------------- LOGICL ----------------*/
   1149 
   1150       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
   1151          /* long and/or/xor, then Z --> test dst==0 */
   1152          return unop(Iop_1Uto64,
   1153                      binop(Iop_CmpEQ32,
   1154                            unop(Iop_64to32, cc_dep1),
   1155                            mkU32(0)));
   1156       }
   1157       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
   1158          /* long and/or/xor, then NZ --> test dst!=0 */
   1159          return unop(Iop_1Uto64,
   1160                      binop(Iop_CmpNE32,
   1161                            unop(Iop_64to32, cc_dep1),
   1162                            mkU32(0)));
   1163       }
   1164 
   1165       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
   1166          /* long and/or/xor, then LE
   1167             This is pretty subtle.  LOGIC sets SF and ZF according to the
   1168             result and makes OF be zero.  LE computes (SF ^ OF) | ZF, but
   1169             OF is zero, so this reduces to SF | ZF -- which will be 1 iff
   1170             the result is <=signed 0.  Hence ...
   1171          */
   1172          return unop(Iop_1Uto64,
   1173                      binop(Iop_CmpLE32S,
   1174                            unop(Iop_64to32, cc_dep1),
   1175                            mkU32(0)));
   1176       }
   1177 
   1178       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
   1179          /* long and/or/xor, then S --> (ULong)result[31] */
   1180          return binop(Iop_And64,
   1181                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
   1182                       mkU64(1));
   1183       }
   1184       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
   1185          /* long and/or/xor, then S --> (ULong) ~ result[31] */
   1186          return binop(Iop_Xor64,
   1187                 binop(Iop_And64,
   1188                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
   1189                       mkU64(1)),
   1190                 mkU64(1));
   1191       }
   1192 
   1193       /*---------------- LOGICB ----------------*/
   1194 
   1195       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
   1196          /* byte and/or/xor, then Z --> test dst==0 */
   1197          return unop(Iop_1Uto64,
   1198                      binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
   1199                                         mkU64(0)));
   1200       }
   1201       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
   1202          /* byte and/or/xor, then NZ --> test dst!=0 */
   1203          return unop(Iop_1Uto64,
   1204                      binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
   1205                                         mkU64(0)));
   1206       }
   1207 
   1208       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
   1209          /* this is an idiom gcc sometimes uses to find out if the top
   1210             bit of a byte register is set: eg testb %al,%al; js ..
   1211             Since it just depends on the top bit of the byte, extract
   1212             that bit and explicitly get rid of all the rest.  This
   1213             helps memcheck avoid false positives in the case where any
   1214             of the other bits in the byte are undefined. */
   1215          /* byte and/or/xor, then S --> (UInt)result[7] */
   1216          return binop(Iop_And64,
   1217                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
   1218                       mkU64(1));
   1219       }
   1220       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
   1221          /* byte and/or/xor, then NS --> (UInt)!result[7] */
   1222          return binop(Iop_Xor64,
   1223                       binop(Iop_And64,
   1224                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
   1225                             mkU64(1)),
   1226                       mkU64(1));
   1227       }
   1228 
   1229       /*---------------- INCB ----------------*/
   1230 
   1231       if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
   1232          /* 8-bit inc, then LE --> sign bit of the arg */
   1233          return binop(Iop_And64,
   1234                       binop(Iop_Shr64,
   1235                             binop(Iop_Sub64, cc_dep1, mkU64(1)),
   1236                             mkU8(7)),
   1237                       mkU64(1));
   1238       }
   1239 
   1240       /*---------------- INCW ----------------*/
   1241 
   1242       if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
   1243          /* 16-bit inc, then Z --> test dst == 0 */
   1244          return unop(Iop_1Uto64,
   1245                      binop(Iop_CmpEQ64,
   1246                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
   1247                            mkU64(0)));
   1248       }
   1249 
   1250       /*---------------- DECL ----------------*/
   1251 
   1252       if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
   1253          /* dec L, then Z --> test dst == 0 */
   1254          return unop(Iop_1Uto64,
   1255                      binop(Iop_CmpEQ32,
   1256                            unop(Iop_64to32, cc_dep1),
   1257                            mkU32(0)));
   1258       }
   1259 
   1260       /*---------------- DECW ----------------*/
   1261 
   1262       if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
   1263          /* 16-bit dec, then NZ --> test dst != 0 */
   1264          return unop(Iop_1Uto64,
   1265                      binop(Iop_CmpNE64,
   1266                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
   1267                            mkU64(0)));
   1268       }
   1269 
   1270       /*---------------- COPY ----------------*/
   1271       /* This can happen, as a result of amd64 FP compares: "comisd ... ;
   1272          jbe" for example. */
   1273 
   1274       if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
   1275           (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
   1276          /* COPY, then BE --> extract C and Z from dep1, and test (C
   1277             or Z == 1). */
   1278          /* COPY, then NBE --> extract C and Z from dep1, and test (C
   1279             or Z == 0). */
   1280          ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
   1281          return
   1282             unop(
   1283                Iop_1Uto64,
   1284                binop(
   1285                   Iop_CmpEQ64,
   1286                   binop(
   1287                      Iop_And64,
   1288                      binop(
   1289                         Iop_Or64,
   1290                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
   1291                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
   1292                      ),
   1293                      mkU64(1)
   1294                   ),
   1295                   mkU64(nnn)
   1296                )
   1297             );
   1298       }
   1299 
   1300       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
   1301          /* COPY, then B --> extract C dep1, and test (C == 1). */
   1302          return
   1303             unop(
   1304                Iop_1Uto64,
   1305                binop(
   1306                   Iop_CmpNE64,
   1307                   binop(
   1308                      Iop_And64,
   1309                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
   1310                      mkU64(1)
   1311                   ),
   1312                   mkU64(0)
   1313                )
   1314             );
   1315       }
   1316 
   1317       if (isU64(cc_op, AMD64G_CC_OP_COPY)
   1318           && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
   1319          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
   1320          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
   1321          UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
   1322          return
   1323             unop(
   1324                Iop_1Uto64,
   1325                binop(
   1326                   Iop_CmpEQ64,
   1327                   binop(
   1328                      Iop_And64,
   1329                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
   1330                      mkU64(1)
   1331                   ),
   1332                   mkU64(nnn)
   1333                )
   1334             );
   1335       }
   1336 
   1337       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
   1338          /* COPY, then P --> extract P from dep1, and test (P == 1). */
   1339          return
   1340             unop(
   1341                Iop_1Uto64,
   1342                binop(
   1343                   Iop_CmpNE64,
   1344                   binop(
   1345                      Iop_And64,
   1346                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
   1347                      mkU64(1)
   1348                   ),
   1349                   mkU64(0)
   1350                )
   1351             );
   1352       }
   1353 
   1354       return NULL;
   1355    }
   1356 
   1357    /* --------- specialising "amd64g_calculate_rflags_c" --------- */
   1358 
   1359    if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
   1360       /* specialise calls to above "calculate_rflags_c" function */
   1361       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1362       vassert(arity == 4);
   1363       cc_op   = args[0];
   1364       cc_dep1 = args[1];
   1365       cc_dep2 = args[2];
   1366       cc_ndep = args[3];
   1367 
   1368       if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
   1369          /* C after sub denotes unsigned less than */
   1370          return unop(Iop_1Uto64,
   1371                      binop(Iop_CmpLT64U,
   1372                            cc_dep1,
   1373                            cc_dep2));
   1374       }
   1375       if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
   1376          /* C after sub denotes unsigned less than */
   1377          return unop(Iop_1Uto64,
   1378                      binop(Iop_CmpLT32U,
   1379                            unop(Iop_64to32, cc_dep1),
   1380                            unop(Iop_64to32, cc_dep2)));
   1381       }
   1382       if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
   1383          /* C after sub denotes unsigned less than */
   1384          return unop(Iop_1Uto64,
   1385                      binop(Iop_CmpLT64U,
   1386                            binop(Iop_And64,cc_dep1,mkU64(0xFF)),
   1387                            binop(Iop_And64,cc_dep2,mkU64(0xFF))));
   1388       }
   1389       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
   1390           || isU64(cc_op, AMD64G_CC_OP_LOGICL)
   1391           || isU64(cc_op, AMD64G_CC_OP_LOGICW)
   1392           || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
   1393          /* cflag after logic is zero */
   1394          return mkU64(0);
   1395       }
   1396       if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
   1397           || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
   1398          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
   1399          return cc_ndep;
   1400       }
   1401 
   1402 #     if 0
   1403       if (cc_op->tag == Iex_Const) {
   1404          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
   1405       }
   1406 #     endif
   1407 
   1408       return NULL;
   1409    }
   1410 
   1411 #  undef unop
   1412 #  undef binop
   1413 #  undef mkU64
   1414 #  undef mkU32
   1415 #  undef mkU8
   1416 
   1417    return NULL;
   1418 }
   1419 
   1420 
   1421 /*---------------------------------------------------------------*/
   1422 /*--- Supporting functions for x87 FPU activities.            ---*/
   1423 /*---------------------------------------------------------------*/
   1424 
   1425 static inline Bool host_is_little_endian ( void )
   1426 {
   1427    UInt x = 0x76543210;
   1428    UChar* p = (UChar*)(&x);
   1429    return toBool(*p == 0x10);
   1430 }
   1431 
   1432 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
   1433 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   1434 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
   1435 {
   1436    Bool   mantissaIsZero;
   1437    Int    bexp;
   1438    UChar  sign;
   1439    UChar* f64;
   1440 
   1441    vassert(host_is_little_endian());
   1442 
   1443    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
   1444 
   1445    f64  = (UChar*)(&dbl);
   1446    sign = toUChar( (f64[7] >> 7) & 1 );
   1447 
   1448    /* First off, if the tag indicates the register was empty,
   1449       return 1,0,sign,1 */
   1450    if (tag == 0) {
   1451       /* vex_printf("Empty\n"); */
   1452       return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
   1453                                    | AMD64G_FC_MASK_C0;
   1454    }
   1455 
   1456    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
   1457    bexp &= 0x7FF;
   1458 
   1459    mantissaIsZero
   1460       = toBool(
   1461            (f64[6] & 0x0F) == 0
   1462            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
   1463         );
   1464 
   1465    /* If both exponent and mantissa are zero, the value is zero.
   1466       Return 1,0,sign,0. */
   1467    if (bexp == 0 && mantissaIsZero) {
   1468       /* vex_printf("Zero\n"); */
   1469       return AMD64G_FC_MASK_C3 | 0
   1470                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
   1471    }
   1472 
   1473    /* If exponent is zero but mantissa isn't, it's a denormal.
   1474       Return 1,1,sign,0. */
   1475    if (bexp == 0 && !mantissaIsZero) {
   1476       /* vex_printf("Denormal\n"); */
   1477       return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
   1478                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
   1479    }
   1480 
   1481    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
   1482       Return 0,1,sign,1. */
   1483    if (bexp == 0x7FF && mantissaIsZero) {
   1484       /* vex_printf("Inf\n"); */
   1485       return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
   1486                                    | AMD64G_FC_MASK_C0;
   1487    }
   1488 
   1489    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
   1490       Return 0,0,sign,1. */
   1491    if (bexp == 0x7FF && !mantissaIsZero) {
   1492       /* vex_printf("NaN\n"); */
   1493       return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
   1494    }
   1495 
   1496    /* Uh, ok, we give up.  It must be a normal finite number.
   1497       Return 0,1,sign,0.
   1498    */
   1499    /* vex_printf("normal\n"); */
   1500    return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
   1501 }
   1502 
   1503 
   1504 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
   1505    appears to differ from the former only in that the 8 FP registers
   1506    themselves are not transferred into the guest state. */
   1507 static
   1508 VexEmWarn do_put_x87 ( Bool moveRegs,
   1509                        /*IN*/UChar* x87_state,
   1510                        /*OUT*/VexGuestAMD64State* vex_state )
   1511 {
   1512    Int        stno, preg;
   1513    UInt       tag;
   1514    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1515    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1516    Fpu_State* x87     = (Fpu_State*)x87_state;
   1517    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
   1518    UInt       tagw    = x87->env[FP_ENV_TAG];
   1519    UInt       fpucw   = x87->env[FP_ENV_CTRL];
   1520    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
   1521    VexEmWarn  ew;
   1522    UInt       fpround;
   1523    ULong      pair;
   1524 
   1525    /* Copy registers and tags */
   1526    for (stno = 0; stno < 8; stno++) {
   1527       preg = (stno + ftop) & 7;
   1528       tag = (tagw >> (2*preg)) & 3;
   1529       if (tag == 3) {
   1530          /* register is empty */
   1531          /* hmm, if it's empty, does it still get written?  Probably
   1532             safer to say it does.  If we don't, memcheck could get out
   1533             of sync, in that it thinks all FP registers are defined by
   1534             this helper, but in reality some have not been updated. */
   1535          if (moveRegs)
   1536             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
   1537          vexTags[preg] = 0;
   1538       } else {
   1539          /* register is non-empty */
   1540          if (moveRegs)
   1541             convert_f80le_to_f64le( &x87->reg[10*stno],
   1542                                     (UChar*)&vexRegs[preg] );
   1543          vexTags[preg] = 1;
   1544       }
   1545    }
   1546 
   1547    /* stack pointer */
   1548    vex_state->guest_FTOP = ftop;
   1549 
   1550    /* status word */
   1551    vex_state->guest_FC3210 = c3210;
   1552 
   1553    /* handle the control word, setting FPROUND and detecting any
   1554       emulation warnings. */
   1555    pair    = amd64g_check_fldcw ( (ULong)fpucw );
   1556    fpround = (UInt)pair;
   1557    ew      = (VexEmWarn)(pair >> 32);
   1558 
   1559    vex_state->guest_FPROUND = fpround & 3;
   1560 
   1561    /* emulation warnings --> caller */
   1562    return ew;
   1563 }
   1564 
   1565 
   1566 /* Create an x87 FPU state from the guest state, as close as
   1567    we can approximate it. */
   1568 static
   1569 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
   1570                   /*OUT*/UChar* x87_state )
   1571 {
   1572    Int        i, stno, preg;
   1573    UInt       tagw;
   1574    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
   1575    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1576    Fpu_State* x87     = (Fpu_State*)x87_state;
   1577    UInt       ftop    = vex_state->guest_FTOP;
   1578    UInt       c3210   = vex_state->guest_FC3210;
   1579 
   1580    for (i = 0; i < 14; i++)
   1581       x87->env[i] = 0;
   1582 
   1583    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
   1584    x87->env[FP_ENV_STAT]
   1585       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
   1586    x87->env[FP_ENV_CTRL]
   1587       = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
   1588 
   1589    /* Dump the register stack in ST order. */
   1590    tagw = 0;
   1591    for (stno = 0; stno < 8; stno++) {
   1592       preg = (stno + ftop) & 7;
   1593       if (vexTags[preg] == 0) {
   1594          /* register is empty */
   1595          tagw |= (3 << (2*preg));
   1596          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1597                                  &x87->reg[10*stno] );
   1598       } else {
   1599          /* register is full. */
   1600          tagw |= (0 << (2*preg));
   1601          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
   1602                                  &x87->reg[10*stno] );
   1603       }
   1604    }
   1605    x87->env[FP_ENV_TAG] = toUShort(tagw);
   1606 }
   1607 
   1608 
   1609 /* CALLED FROM GENERATED CODE */
   1610 /* DIRTY HELPER (reads guest state, writes guest mem) */
   1611 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
   1612 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
   1613 {
   1614    /* Derived from values obtained from
   1615       vendor_id       : AuthenticAMD
   1616       cpu family      : 15
   1617       model           : 12
   1618       model name      : AMD Athlon(tm) 64 Processor 3200+
   1619       stepping        : 0
   1620       cpu MHz         : 2200.000
   1621       cache size      : 512 KB
   1622    */
   1623    /* Somewhat roundabout, but at least it's simple. */
   1624    Fpu_State tmp;
   1625    UShort*   addrS = (UShort*)addr;
   1626    UChar*    addrC = (UChar*)addr;
   1627    U128*     xmm   = (U128*)(addr + 160);
   1628    UInt      mxcsr;
   1629    UShort    fp_tags;
   1630    UInt      summary_tags;
   1631    Int       r, stno;
   1632    UShort    *srcS, *dstS;
   1633 
   1634    do_get_x87( gst, (UChar*)&tmp );
   1635    mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
   1636 
   1637    /* Now build the proper fxsave image from the x87 image we just
   1638       made. */
   1639 
   1640    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
   1641    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
   1642 
   1643    /* set addrS[2] in an endian-independent way */
   1644    summary_tags = 0;
   1645    fp_tags = tmp.env[FP_ENV_TAG];
   1646    for (r = 0; r < 8; r++) {
   1647       if ( ((fp_tags >> (2*r)) & 3) != 3 )
   1648          summary_tags |= (1 << r);
   1649    }
   1650    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
   1651    addrC[5]  = 0; /* pad */
   1652 
   1653    /* FOP: faulting fpu opcode.  From experimentation, the real CPU
   1654       does not write this field. (?!) */
   1655    addrS[3]  = 0; /* BOGUS */
   1656 
   1657    /* RIP (Last x87 instruction pointer).  From experimentation, the
   1658       real CPU does not write this field. (?!) */
   1659    addrS[4]  = 0; /* BOGUS */
   1660    addrS[5]  = 0; /* BOGUS */
   1661    addrS[6]  = 0; /* BOGUS */
   1662    addrS[7]  = 0; /* BOGUS */
   1663 
   1664    /* RDP (Last x87 data pointer).  From experimentation, the real CPU
   1665       does not write this field. (?!) */
   1666    addrS[8]  = 0; /* BOGUS */
   1667    addrS[9]  = 0; /* BOGUS */
   1668    addrS[10] = 0; /* BOGUS */
   1669    addrS[11] = 0; /* BOGUS */
   1670 
   1671    addrS[12] = toUShort(mxcsr);  /* MXCSR */
   1672    addrS[13] = toUShort(mxcsr >> 16);
   1673 
   1674    addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
   1675    addrS[15] = 0x0000; /* MXCSR mask (hi16) */
   1676 
   1677    /* Copy in the FP registers, in ST order. */
   1678    for (stno = 0; stno < 8; stno++) {
   1679       srcS = (UShort*)(&tmp.reg[10*stno]);
   1680       dstS = (UShort*)(&addrS[16 + 8*stno]);
   1681       dstS[0] = srcS[0];
   1682       dstS[1] = srcS[1];
   1683       dstS[2] = srcS[2];
   1684       dstS[3] = srcS[3];
   1685       dstS[4] = srcS[4];
   1686       dstS[5] = 0;
   1687       dstS[6] = 0;
   1688       dstS[7] = 0;
   1689    }
   1690 
   1691    /* That's the first 160 bytes of the image done.  Now only %xmm0
   1692       .. %xmm15 remain to be copied.  If the host is big-endian, these
   1693       need to be byte-swapped. */
   1694    vassert(host_is_little_endian());
   1695 
   1696 #  define COPY_U128(_dst,_src)                       \
   1697       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1698            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1699       while (0)
   1700 
   1701    COPY_U128( xmm[0],  gst->guest_XMM0 );
   1702    COPY_U128( xmm[1],  gst->guest_XMM1 );
   1703    COPY_U128( xmm[2],  gst->guest_XMM2 );
   1704    COPY_U128( xmm[3],  gst->guest_XMM3 );
   1705    COPY_U128( xmm[4],  gst->guest_XMM4 );
   1706    COPY_U128( xmm[5],  gst->guest_XMM5 );
   1707    COPY_U128( xmm[6],  gst->guest_XMM6 );
   1708    COPY_U128( xmm[7],  gst->guest_XMM7 );
   1709    COPY_U128( xmm[8],  gst->guest_XMM8 );
   1710    COPY_U128( xmm[9],  gst->guest_XMM9 );
   1711    COPY_U128( xmm[10], gst->guest_XMM10 );
   1712    COPY_U128( xmm[11], gst->guest_XMM11 );
   1713    COPY_U128( xmm[12], gst->guest_XMM12 );
   1714    COPY_U128( xmm[13], gst->guest_XMM13 );
   1715    COPY_U128( xmm[14], gst->guest_XMM14 );
   1716    COPY_U128( xmm[15], gst->guest_XMM15 );
   1717 
   1718 #  undef COPY_U128
   1719 }
   1720 
   1721 
   1722 /* CALLED FROM GENERATED CODE */
   1723 /* DIRTY HELPER (writes guest state, reads guest mem) */
   1724 VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr )
   1725 {
   1726    Fpu_State tmp;
   1727    VexEmWarn warnX87 = EmWarn_NONE;
   1728    VexEmWarn warnXMM = EmWarn_NONE;
   1729    UShort*   addrS   = (UShort*)addr;
   1730    UChar*    addrC   = (UChar*)addr;
   1731    U128*     xmm     = (U128*)(addr + 160);
   1732    UShort    fp_tags;
   1733    Int       r, stno, i;
   1734 
   1735    /* Restore %xmm0 .. %xmm15.  If the host is big-endian, these need
   1736       to be byte-swapped. */
   1737    vassert(host_is_little_endian());
   1738 
   1739 #  define COPY_U128(_dst,_src)                       \
   1740       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
   1741            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
   1742       while (0)
   1743 
   1744    COPY_U128( gst->guest_XMM0, xmm[0] );
   1745    COPY_U128( gst->guest_XMM1, xmm[1] );
   1746    COPY_U128( gst->guest_XMM2, xmm[2] );
   1747    COPY_U128( gst->guest_XMM3, xmm[3] );
   1748    COPY_U128( gst->guest_XMM4, xmm[4] );
   1749    COPY_U128( gst->guest_XMM5, xmm[5] );
   1750    COPY_U128( gst->guest_XMM6, xmm[6] );
   1751    COPY_U128( gst->guest_XMM7, xmm[7] );
   1752    COPY_U128( gst->guest_XMM8, xmm[8] );
   1753    COPY_U128( gst->guest_XMM9, xmm[9] );
   1754    COPY_U128( gst->guest_XMM10, xmm[10] );
   1755    COPY_U128( gst->guest_XMM11, xmm[11] );
   1756    COPY_U128( gst->guest_XMM12, xmm[12] );
   1757    COPY_U128( gst->guest_XMM13, xmm[13] );
   1758    COPY_U128( gst->guest_XMM14, xmm[14] );
   1759    COPY_U128( gst->guest_XMM15, xmm[15] );
   1760 
   1761 #  undef COPY_U128
   1762 
   1763    /* Copy the x87 registers out of the image, into a temporary
   1764       Fpu_State struct. */
   1765    for (i = 0; i < 14; i++) tmp.env[i] = 0;
   1766    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
   1767    /* fill in tmp.reg[0..7] */
   1768    for (stno = 0; stno < 8; stno++) {
   1769       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
   1770       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
   1771       dstS[0] = srcS[0];
   1772       dstS[1] = srcS[1];
   1773       dstS[2] = srcS[2];
   1774       dstS[3] = srcS[3];
   1775       dstS[4] = srcS[4];
   1776    }
   1777    /* fill in tmp.env[0..13] */
   1778    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
   1779    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
   1780 
   1781    fp_tags = 0;
   1782    for (r = 0; r < 8; r++) {
   1783       if (addrC[4] & (1<<r))
   1784          fp_tags |= (0 << (2*r)); /* EMPTY */
   1785       else
   1786          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
   1787    }
   1788    tmp.env[FP_ENV_TAG] = fp_tags;
   1789 
   1790    /* Now write 'tmp' into the guest state. */
   1791    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
   1792 
   1793    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
   1794                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
   1795      ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
   1796 
   1797      warnXMM = (VexEmWarn)(w64 >> 32);
   1798 
   1799      gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
   1800    }
   1801 
   1802    /* Prefer an X87 emwarn over an XMM one, if both exist. */
   1803    if (warnX87 != EmWarn_NONE)
   1804       return warnX87;
   1805    else
   1806       return warnXMM;
   1807 }
   1808 
   1809 
   1810 /* DIRTY HELPER (writes guest state) */
   1811 /* Initialise the x87 FPU state as per 'finit'. */
   1812 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
   1813 {
   1814    Int i;
   1815    gst->guest_FTOP = 0;
   1816    for (i = 0; i < 8; i++) {
   1817       gst->guest_FPTAG[i] = 0; /* empty */
   1818       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
   1819    }
   1820    gst->guest_FPROUND = (ULong)Irrm_NEAREST;
   1821    gst->guest_FC3210  = 0;
   1822 }
   1823 
   1824 
   1825 /* CALLED FROM GENERATED CODE */
   1826 /* DIRTY HELPER (reads guest memory) */
   1827 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU )
   1828 {
   1829    ULong f64;
   1830    convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
   1831    return f64;
   1832 }
   1833 
   1834 /* CALLED FROM GENERATED CODE */
   1835 /* DIRTY HELPER (writes guest memory) */
   1836 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 )
   1837 {
   1838    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
   1839 }
   1840 
   1841 
   1842 /* CALLED FROM GENERATED CODE */
   1843 /* CLEAN HELPER */
   1844 /* mxcsr[15:0] contains a SSE native format MXCSR value.
   1845    Extract from it the required SSEROUND value and any resulting
   1846    emulation warning, and return (warn << 32) | sseround value.
   1847 */
   1848 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
   1849 {
   1850    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
   1851    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1852    ULong rmode = (mxcsr >> 13) & 3;
   1853 
   1854    /* Detect any required emulation warnings. */
   1855    VexEmWarn ew = EmWarn_NONE;
   1856 
   1857    if ((mxcsr & 0x1F80) != 0x1F80) {
   1858       /* unmasked exceptions! */
   1859       ew = EmWarn_X86_sseExns;
   1860    }
   1861    else
   1862    if (mxcsr & (1<<15)) {
   1863       /* FZ is set */
   1864       ew = EmWarn_X86_fz;
   1865    }
   1866    else
   1867    if (mxcsr & (1<<6)) {
   1868       /* DAZ is set */
   1869       ew = EmWarn_X86_daz;
   1870    }
   1871 
   1872    return (((ULong)ew) << 32) | ((ULong)rmode);
   1873 }
   1874 
   1875 
   1876 /* CALLED FROM GENERATED CODE */
   1877 /* CLEAN HELPER */
   1878 /* Given sseround as an IRRoundingMode value, create a suitable SSE
   1879    native format MXCSR value. */
   1880 ULong amd64g_create_mxcsr ( ULong sseround )
   1881 {
   1882    sseround &= 3;
   1883    return 0x1F80 | (sseround << 13);
   1884 }
   1885 
   1886 
   1887 /* CLEAN HELPER */
   1888 /* fpucw[15:0] contains a x87 native format FPU control word.
   1889    Extract from it the required FPROUND value and any resulting
   1890    emulation warning, and return (warn << 32) | fpround value.
   1891 */
   1892 ULong amd64g_check_fldcw ( ULong fpucw )
   1893 {
   1894    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
   1895    /* NOTE, encoded exactly as per enum IRRoundingMode. */
   1896    ULong rmode = (fpucw >> 10) & 3;
   1897 
   1898    /* Detect any required emulation warnings. */
   1899    VexEmWarn ew = EmWarn_NONE;
   1900 
   1901    if ((fpucw & 0x3F) != 0x3F) {
   1902       /* unmasked exceptions! */
   1903       ew = EmWarn_X86_x87exns;
   1904    }
   1905    else
   1906    if (((fpucw >> 8) & 3) != 3) {
   1907       /* unsupported precision */
   1908       ew = EmWarn_X86_x87precision;
   1909    }
   1910 
   1911    return (((ULong)ew) << 32) | ((ULong)rmode);
   1912 }
   1913 
   1914 
   1915 /* CLEAN HELPER */
   1916 /* Given fpround as an IRRoundingMode value, create a suitable x87
   1917    native format FPU control word. */
   1918 ULong amd64g_create_fpucw ( ULong fpround )
   1919 {
   1920    fpround &= 3;
   1921    return 0x037F | (fpround << 10);
   1922 }
   1923 
   1924 
   1925 /* This is used to implement 'fldenv'.
   1926    Reads 28 bytes at x87_state[0 .. 27]. */
   1927 /* CALLED FROM GENERATED CODE */
   1928 /* DIRTY HELPER */
   1929 VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
   1930                                       /*IN*/HWord x87_state)
   1931 {
   1932    Int        stno, preg;
   1933    UInt       tag;
   1934    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1935    Fpu_State* x87     = (Fpu_State*)x87_state;
   1936    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
   1937    UInt       tagw    = x87->env[FP_ENV_TAG];
   1938    UInt       fpucw   = x87->env[FP_ENV_CTRL];
   1939    ULong      c3210   = x87->env[FP_ENV_STAT] & 0x4700;
   1940    VexEmWarn  ew;
   1941    ULong      fpround;
   1942    ULong      pair;
   1943 
   1944    /* Copy tags */
   1945    for (stno = 0; stno < 8; stno++) {
   1946       preg = (stno + ftop) & 7;
   1947       tag = (tagw >> (2*preg)) & 3;
   1948       if (tag == 3) {
   1949          /* register is empty */
   1950          vexTags[preg] = 0;
   1951       } else {
   1952          /* register is non-empty */
   1953          vexTags[preg] = 1;
   1954       }
   1955    }
   1956 
   1957    /* stack pointer */
   1958    vex_state->guest_FTOP = ftop;
   1959 
   1960    /* status word */
   1961    vex_state->guest_FC3210 = c3210;
   1962 
   1963    /* handle the control word, setting FPROUND and detecting any
   1964       emulation warnings. */
   1965    pair    = amd64g_check_fldcw ( (ULong)fpucw );
   1966    fpround = pair & 0xFFFFFFFFULL;
   1967    ew      = (VexEmWarn)(pair >> 32);
   1968 
   1969    vex_state->guest_FPROUND = fpround & 3;
   1970 
   1971    /* emulation warnings --> caller */
   1972    return ew;
   1973 }
   1974 
   1975 
   1976 /* CALLED FROM GENERATED CODE */
   1977 /* DIRTY HELPER */
   1978 /* Create an x87 FPU env from the guest state, as close as we can
   1979    approximate it.  Writes 28 bytes at x87_state[0..27]. */
   1980 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
   1981                                  /*OUT*/HWord x87_state )
   1982 {
   1983    Int        i, stno, preg;
   1984    UInt       tagw;
   1985    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
   1986    Fpu_State* x87     = (Fpu_State*)x87_state;
   1987    UInt       ftop    = vex_state->guest_FTOP;
   1988    ULong      c3210   = vex_state->guest_FC3210;
   1989 
   1990    for (i = 0; i < 14; i++)
   1991       x87->env[i] = 0;
   1992 
   1993    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
   1994    x87->env[FP_ENV_STAT]
   1995       = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
   1996    x87->env[FP_ENV_CTRL]
   1997       = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
   1998 
   1999    /* Compute the x87 tag word. */
   2000    tagw = 0;
   2001    for (stno = 0; stno < 8; stno++) {
   2002       preg = (stno + ftop) & 7;
   2003       if (vexTags[preg] == 0) {
   2004          /* register is empty */
   2005          tagw |= (3 << (2*preg));
   2006       } else {
   2007          /* register is full. */
   2008          tagw |= (0 << (2*preg));
   2009       }
   2010    }
   2011    x87->env[FP_ENV_TAG] = toUShort(tagw);
   2012 
   2013    /* We don't dump the x87 registers, tho. */
   2014 }
   2015 
   2016 
   2017 /*---------------------------------------------------------------*/
   2018 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
   2019 /*---------------------------------------------------------------*/
   2020 
   2021 /* Claim to be the following CPU, which is probably representative of
   2022    the lowliest (earliest) amd64 offerings.  It can do neither sse3
   2023    nor cx16.
   2024 
   2025    vendor_id       : AuthenticAMD
   2026    cpu family      : 15
   2027    model           : 5
   2028    model name      : AMD Opteron (tm) Processor 848
   2029    stepping        : 10
   2030    cpu MHz         : 1797.682
   2031    cache size      : 1024 KB
   2032    fpu             : yes
   2033    fpu_exception   : yes
   2034    cpuid level     : 1
   2035    wp              : yes
   2036    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2037                      mtrr pge mca cmov pat pse36 clflush mmx fxsr
   2038                      sse sse2 syscall nx mmxext lm 3dnowext 3dnow
   2039    bogomips        : 3600.62
   2040    TLB size        : 1088 4K pages
   2041    clflush size    : 64
   2042    cache_alignment : 64
   2043    address sizes   : 40 bits physical, 48 bits virtual
   2044    power management: ts fid vid ttp
   2045 */
   2046 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
   2047 {
   2048 #  define SET_ABCD(_a,_b,_c,_d)                \
   2049       do { st->guest_RAX = (ULong)(_a);        \
   2050            st->guest_RBX = (ULong)(_b);        \
   2051            st->guest_RCX = (ULong)(_c);        \
   2052            st->guest_RDX = (ULong)(_d);        \
   2053       } while (0)
   2054 
   2055    switch (0xFFFFFFFF & st->guest_RAX) {
   2056       case 0x00000000:
   2057          SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
   2058          break;
   2059       case 0x00000001:
   2060          SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
   2061          break;
   2062       case 0x80000000:
   2063          SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
   2064          break;
   2065       case 0x80000001:
   2066          SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
   2067          break;
   2068       case 0x80000002:
   2069          SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
   2070          break;
   2071       case 0x80000003:
   2072          SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
   2073          break;
   2074       case 0x80000004:
   2075          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2076          break;
   2077       case 0x80000005:
   2078          SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
   2079          break;
   2080       case 0x80000006:
   2081          SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
   2082          break;
   2083       case 0x80000007:
   2084          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
   2085          break;
   2086       case 0x80000008:
   2087          SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
   2088          break;
   2089       default:
   2090          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2091          break;
   2092    }
   2093 #  undef SET_ABCD
   2094 }
   2095 
   2096 
   2097 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
   2098    capable.
   2099 
   2100    vendor_id       : GenuineIntel
   2101    cpu family      : 6
   2102    model           : 15
   2103    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
   2104    stepping        : 6
   2105    cpu MHz         : 2394.000
   2106    cache size      : 4096 KB
   2107    physical id     : 0
   2108    siblings        : 2
   2109    core id         : 0
   2110    cpu cores       : 2
   2111    fpu             : yes
   2112    fpu_exception   : yes
   2113    cpuid level     : 10
   2114    wp              : yes
   2115    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2116                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2117                      mmx fxsr sse sse2 ss ht tm syscall nx lm
   2118                      constant_tsc pni monitor ds_cpl vmx est tm2
   2119                      cx16 xtpr lahf_lm
   2120    bogomips        : 4798.78
   2121    clflush size    : 64
   2122    cache_alignment : 64
   2123    address sizes   : 36 bits physical, 48 bits virtual
   2124    power management:
   2125 */
   2126 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
   2127 {
   2128 #  define SET_ABCD(_a,_b,_c,_d)                \
   2129       do { st->guest_RAX = (ULong)(_a);        \
   2130            st->guest_RBX = (ULong)(_b);        \
   2131            st->guest_RCX = (ULong)(_c);        \
   2132            st->guest_RDX = (ULong)(_d);        \
   2133       } while (0)
   2134 
   2135    switch (0xFFFFFFFF & st->guest_RAX) {
   2136       case 0x00000000:
   2137          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
   2138          break;
   2139       case 0x00000001:
   2140          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
   2141          break;
   2142       case 0x00000002:
   2143          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
   2144          break;
   2145       case 0x00000003:
   2146          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2147          break;
   2148       case 0x00000004: {
   2149          switch (0xFFFFFFFF & st->guest_RCX) {
   2150             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
   2151                                       0x0000003f, 0x00000001); break;
   2152             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
   2153                                       0x0000003f, 0x00000001); break;
   2154             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
   2155                                       0x00000fff, 0x00000001); break;
   2156             default:         SET_ABCD(0x00000000, 0x00000000,
   2157                                       0x00000000, 0x00000000); break;
   2158          }
   2159          break;
   2160       }
   2161       case 0x00000005:
   2162          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
   2163          break;
   2164       case 0x00000006:
   2165          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
   2166          break;
   2167       case 0x00000007:
   2168          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2169          break;
   2170       case 0x00000008:
   2171          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
   2172          break;
   2173       case 0x00000009:
   2174          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2175          break;
   2176       case 0x0000000a:
   2177       unhandled_eax_value:
   2178          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
   2179          break;
   2180       case 0x80000000:
   2181          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2182          break;
   2183       case 0x80000001:
   2184          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
   2185          break;
   2186       case 0x80000002:
   2187          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2188          break;
   2189       case 0x80000003:
   2190          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
   2191          break;
   2192       case 0x80000004:
   2193          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
   2194          break;
   2195       case 0x80000005:
   2196          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2197          break;
   2198       case 0x80000006:
   2199          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
   2200          break;
   2201       case 0x80000007:
   2202          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2203          break;
   2204       case 0x80000008:
   2205          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2206          break;
   2207       default:
   2208          goto unhandled_eax_value;
   2209    }
   2210 #  undef SET_ABCD
   2211 }
   2212 
   2213 
   2214 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
   2215    capable.
   2216 
   2217    vendor_id       : GenuineIntel
   2218    cpu family      : 6
   2219    model           : 37
   2220    model name      : Intel(R) Core(TM) i5 CPU         670  @ 3.47GHz
   2221    stepping        : 2
   2222    cpu MHz         : 3334.000
   2223    cache size      : 4096 KB
   2224    physical id     : 0
   2225    siblings        : 4
   2226    core id         : 0
   2227    cpu cores       : 2
   2228    apicid          : 0
   2229    initial apicid  : 0
   2230    fpu             : yes
   2231    fpu_exception   : yes
   2232    cpuid level     : 11
   2233    wp              : yes
   2234    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
   2235                      mtrr pge mca cmov pat pse36 clflush dts acpi
   2236                      mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
   2237                      lm constant_tsc arch_perfmon pebs bts rep_good
   2238                      xtopology nonstop_tsc aperfmperf pni pclmulqdq
   2239                      dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
   2240                      xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
   2241                      arat tpr_shadow vnmi flexpriority ept vpid
   2242                      MINUS aes (see below)
   2243    bogomips        : 6957.57
   2244    clflush size    : 64
   2245    cache_alignment : 64
   2246    address sizes   : 36 bits physical, 48 bits virtual
   2247    power management:
   2248 */
   2249 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
   2250 {
   2251 #  define SET_ABCD(_a,_b,_c,_d)                \
   2252       do { st->guest_RAX = (ULong)(_a);        \
   2253            st->guest_RBX = (ULong)(_b);        \
   2254            st->guest_RCX = (ULong)(_c);        \
   2255            st->guest_RDX = (ULong)(_d);        \
   2256       } while (0)
   2257 
   2258    UInt old_eax = (UInt)st->guest_RAX;
   2259    UInt old_ecx = (UInt)st->guest_RCX;
   2260 
   2261    switch (old_eax) {
   2262       case 0x00000000:
   2263          SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
   2264          break;
   2265       case 0x00000001:
   2266          // & ~(1<<25): don't claim to support AES insns.  See
   2267          // bug 249991.
   2268          SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25),
   2269                                           0xbfebfbff);
   2270          break;
   2271       case 0x00000002:
   2272          SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
   2273          break;
   2274       case 0x00000003:
   2275          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2276          break;
   2277       case 0x00000004:
   2278          switch (old_ecx) {
   2279             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
   2280                                       0x0000003f, 0x00000000); break;
   2281             case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
   2282                                       0x0000007f, 0x00000000); break;
   2283             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
   2284                                       0x000001ff, 0x00000000); break;
   2285             case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
   2286                                       0x00000fff, 0x00000002); break;
   2287             default:         SET_ABCD(0x00000000, 0x00000000,
   2288                                       0x00000000, 0x00000000); break;
   2289          }
   2290          break;
   2291       case 0x00000005:
   2292          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
   2293          break;
   2294       case 0x00000006:
   2295          SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
   2296          break;
   2297       case 0x00000007:
   2298          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2299          break;
   2300       case 0x00000008:
   2301          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2302          break;
   2303       case 0x00000009:
   2304          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2305          break;
   2306       case 0x0000000a:
   2307          SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
   2308          break;
   2309       case 0x0000000b:
   2310          switch (old_ecx) {
   2311             case 0x00000000:
   2312                SET_ABCD(0x00000001, 0x00000002,
   2313                         0x00000100, 0x00000000); break;
   2314             case 0x00000001:
   2315                SET_ABCD(0x00000004, 0x00000004,
   2316                         0x00000201, 0x00000000); break;
   2317             default:
   2318                SET_ABCD(0x00000000, 0x00000000,
   2319                         old_ecx,    0x00000000); break;
   2320          }
   2321          break;
   2322       case 0x0000000c:
   2323          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
   2324          break;
   2325       case 0x0000000d:
   2326          switch (old_ecx) {
   2327             case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
   2328                                       0x00000100, 0x00000000); break;
   2329             case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
   2330                                       0x00000201, 0x00000000); break;
   2331             default:         SET_ABCD(0x00000000, 0x00000000,
   2332                                       old_ecx,    0x00000000); break;
   2333          }
   2334          break;
   2335       case 0x80000000:
   2336          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
   2337          break;
   2338       case 0x80000001:
   2339          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
   2340          break;
   2341       case 0x80000002:
   2342          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
   2343          break;
   2344       case 0x80000003:
   2345          SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
   2346          break;
   2347       case 0x80000004:
   2348          SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
   2349          break;
   2350       case 0x80000005:
   2351          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
   2352          break;
   2353       case 0x80000006:
   2354          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
   2355          break;
   2356       case 0x80000007:
   2357          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
   2358          break;
   2359       case 0x80000008:
   2360          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
   2361          break;
   2362       default:
   2363          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
   2364          break;
   2365    }
   2366 #  undef SET_ABCD
   2367 }
   2368 
   2369 
   2370 ULong amd64g_calculate_RCR ( ULong arg,
   2371                              ULong rot_amt,
   2372                              ULong rflags_in,
   2373                              Long  szIN )
   2374 {
   2375    Bool  wantRflags = toBool(szIN < 0);
   2376    ULong sz         = wantRflags ? (-szIN) : szIN;
   2377    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
   2378    ULong cf=0, of=0, tempcf;
   2379 
   2380    switch (sz) {
   2381       case 8:
   2382          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2383          of        = ((arg >> 63) ^ cf) & 1;
   2384          while (tempCOUNT > 0) {
   2385             tempcf = arg & 1;
   2386             arg    = (arg >> 1) | (cf << 63);
   2387             cf     = tempcf;
   2388             tempCOUNT--;
   2389          }
   2390          break;
   2391       case 4:
   2392          while (tempCOUNT >= 33) tempCOUNT -= 33;
   2393          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2394          of        = ((arg >> 31) ^ cf) & 1;
   2395          while (tempCOUNT > 0) {
   2396             tempcf = arg & 1;
   2397             arg    = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
   2398             cf     = tempcf;
   2399             tempCOUNT--;
   2400          }
   2401          break;
   2402       case 2:
   2403          while (tempCOUNT >= 17) tempCOUNT -= 17;
   2404          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2405          of        = ((arg >> 15) ^ cf) & 1;
   2406          while (tempCOUNT > 0) {
   2407             tempcf = arg & 1;
   2408             arg    = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
   2409             cf     = tempcf;
   2410             tempCOUNT--;
   2411          }
   2412          break;
   2413       case 1:
   2414          while (tempCOUNT >= 9) tempCOUNT -= 9;
   2415          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2416          of        = ((arg >> 7) ^ cf) & 1;
   2417          while (tempCOUNT > 0) {
   2418             tempcf = arg & 1;
   2419             arg    = ((arg >> 1) & 0x7FULL) | (cf << 7);
   2420             cf     = tempcf;
   2421             tempCOUNT--;
   2422          }
   2423          break;
   2424       default:
   2425          vpanic("calculate_RCR(amd64g): invalid size");
   2426    }
   2427 
   2428    cf &= 1;
   2429    of &= 1;
   2430    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
   2431    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
   2432 
   2433    /* caller can ask to have back either the resulting flags or
   2434       resulting value, but not both */
   2435    return wantRflags ? rflags_in : arg;
   2436 }
   2437 
   2438 ULong amd64g_calculate_RCL ( ULong arg,
   2439                              ULong rot_amt,
   2440                              ULong rflags_in,
   2441                              Long  szIN )
   2442 {
   2443    Bool  wantRflags = toBool(szIN < 0);
   2444    ULong sz         = wantRflags ? (-szIN) : szIN;
   2445    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
   2446    ULong cf=0, of=0, tempcf;
   2447 
   2448    switch (sz) {
   2449       case 8:
   2450          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2451          while (tempCOUNT > 0) {
   2452             tempcf = (arg >> 63) & 1;
   2453             arg    = (arg << 1) | (cf & 1);
   2454             cf     = tempcf;
   2455             tempCOUNT--;
   2456          }
   2457          of = ((arg >> 63) ^ cf) & 1;
   2458          break;
   2459       case 4:
   2460          while (tempCOUNT >= 33) tempCOUNT -= 33;
   2461          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2462          while (tempCOUNT > 0) {
   2463             tempcf = (arg >> 31) & 1;
   2464             arg    = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
   2465             cf     = tempcf;
   2466             tempCOUNT--;
   2467          }
   2468          of = ((arg >> 31) ^ cf) & 1;
   2469          break;
   2470       case 2:
   2471          while (tempCOUNT >= 17) tempCOUNT -= 17;
   2472          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2473          while (tempCOUNT > 0) {
   2474             tempcf = (arg >> 15) & 1;
   2475             arg    = 0xFFFFULL & ((arg << 1) | (cf & 1));
   2476             cf     = tempcf;
   2477             tempCOUNT--;
   2478          }
   2479          of = ((arg >> 15) ^ cf) & 1;
   2480          break;
   2481       case 1:
   2482          while (tempCOUNT >= 9) tempCOUNT -= 9;
   2483          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
   2484          while (tempCOUNT > 0) {
   2485             tempcf = (arg >> 7) & 1;
   2486             arg    = 0xFFULL & ((arg << 1) | (cf & 1));
   2487             cf     = tempcf;
   2488             tempCOUNT--;
   2489          }
   2490          of = ((arg >> 7) ^ cf) & 1;
   2491          break;
   2492       default:
   2493          vpanic("calculate_RCL(amd64g): invalid size");
   2494    }
   2495 
   2496    cf &= 1;
   2497    of &= 1;
   2498    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
   2499    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
   2500 
   2501    return wantRflags ? rflags_in : arg;
   2502 }
   2503 
   2504 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
   2505  * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
   2506  */
   2507 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
   2508 {
   2509     ULong hi, lo, tmp, A[16];
   2510 
   2511    A[0] = 0;            A[1] = a;
   2512    A[2] = A[1] << 1;    A[3] = A[2] ^ a;
   2513    A[4] = A[2] << 1;    A[5] = A[4] ^ a;
   2514    A[6] = A[3] << 1;    A[7] = A[6] ^ a;
   2515    A[8] = A[4] << 1;    A[9] = A[8] ^ a;
   2516    A[10] = A[5] << 1;   A[11] = A[10] ^ a;
   2517    A[12] = A[6] << 1;   A[13] = A[12] ^ a;
   2518    A[14] = A[7] << 1;   A[15] = A[14] ^ a;
   2519 
   2520    lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
   2521    hi = lo >> 56;
   2522    lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
   2523    hi = (hi << 8) | (lo >> 56);
   2524    lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
   2525    hi = (hi << 8) | (lo >> 56);
   2526    lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
   2527    hi = (hi << 8) | (lo >> 56);
   2528    lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
   2529    hi = (hi << 8) | (lo >> 56);
   2530    lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
   2531    hi = (hi << 8) | (lo >> 56);
   2532    lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
   2533    hi = (hi << 8) | (lo >> 56);
   2534    lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
   2535 
   2536    ULong m0 = -1;
   2537    m0 /= 255;
   2538    tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
   2539    tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
   2540    tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
   2541    tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
   2542    tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
   2543    tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
   2544    tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
   2545 
   2546    return which ? hi : lo;
   2547 }
   2548 
   2549 
   2550 /* CALLED FROM GENERATED CODE */
   2551 /* DIRTY HELPER (non-referentially-transparent) */
   2552 /* Horrible hack.  On non-amd64 platforms, return 1. */
   2553 ULong amd64g_dirtyhelper_RDTSC ( void )
   2554 {
   2555 #  if defined(__x86_64__)
   2556    UInt  eax, edx;
   2557    __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
   2558    return (((ULong)edx) << 32) | ((ULong)eax);
   2559 #  else
   2560    return 1ULL;
   2561 #  endif
   2562 }
   2563 
   2564 
   2565 /* CALLED FROM GENERATED CODE */
   2566 /* DIRTY HELPER (non-referentially-transparent) */
   2567 /* Horrible hack.  On non-amd64 platforms, return 0. */
   2568 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
   2569 {
   2570 #  if defined(__x86_64__)
   2571    ULong r = 0;
   2572    portno &= 0xFFFF;
   2573    switch (sz) {
   2574       case 4:
   2575          __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
   2576                               : "=a" (r) : "Nd" (portno));
   2577 	 break;
   2578       case 2:
   2579          __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
   2580                               : "=a" (r) : "Nd" (portno));
   2581 	 break;
   2582       case 1:
   2583          __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
   2584                               : "=a" (r) : "Nd" (portno));
   2585 	 break;
   2586       default:
   2587          break; /* note: no 64-bit version of insn exists */
   2588    }
   2589    return r;
   2590 #  else
   2591    return 0;
   2592 #  endif
   2593 }
   2594 
   2595 
   2596 /* CALLED FROM GENERATED CODE */
   2597 /* DIRTY HELPER (non-referentially-transparent) */
   2598 /* Horrible hack.  On non-amd64 platforms, do nothing. */
   2599 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
   2600 {
   2601 #  if defined(__x86_64__)
   2602    portno &= 0xFFFF;
   2603    switch (sz) {
   2604       case 4:
   2605          __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
   2606                               : : "a" (data), "Nd" (portno));
   2607 	 break;
   2608       case 2:
   2609          __asm__ __volatile__("outw %w0, %w1"
   2610                               : : "a" (data), "Nd" (portno));
   2611 	 break;
   2612       case 1:
   2613          __asm__ __volatile__("outb %b0, %w1"
   2614                               : : "a" (data), "Nd" (portno));
   2615 	 break;
   2616       default:
   2617          break; /* note: no 64-bit version of insn exists */
   2618    }
   2619 #  else
   2620    /* do nothing */
   2621 #  endif
   2622 }
   2623 
   2624 /* CALLED FROM GENERATED CODE */
   2625 /* DIRTY HELPER (non-referentially-transparent) */
   2626 /* Horrible hack.  On non-amd64 platforms, do nothing. */
   2627 /* op = 0: call the native SGDT instruction.
   2628    op = 1: call the native SIDT instruction.
   2629 */
   2630 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
   2631 #  if defined(__x86_64__)
   2632    switch (op) {
   2633       case 0:
   2634          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
   2635          break;
   2636       case 1:
   2637          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
   2638          break;
   2639       default:
   2640          vpanic("amd64g_dirtyhelper_SxDT");
   2641    }
   2642 #  else
   2643    /* do nothing */
   2644    UChar* p = (UChar*)address;
   2645    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
   2646    p[6] = p[7] = p[8] = p[9] = 0;
   2647 #  endif
   2648 }
   2649 
   2650 /*---------------------------------------------------------------*/
   2651 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
   2652 /*---------------------------------------------------------------*/
   2653 
   2654 static inline UChar abdU8 ( UChar xx, UChar yy ) {
   2655    return toUChar(xx>yy ? xx-yy : yy-xx);
   2656 }
   2657 
   2658 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
   2659    return (((ULong)w1) << 32) | ((ULong)w0);
   2660 }
   2661 
   2662 static inline UShort sel16x4_3 ( ULong w64 ) {
   2663    UInt hi32 = toUInt(w64 >> 32);
   2664    return toUShort(hi32 >> 16);
   2665 }
   2666 static inline UShort sel16x4_2 ( ULong w64 ) {
   2667    UInt hi32 = toUInt(w64 >> 32);
   2668    return toUShort(hi32);
   2669 }
   2670 static inline UShort sel16x4_1 ( ULong w64 ) {
   2671    UInt lo32 = toUInt(w64);
   2672    return toUShort(lo32 >> 16);
   2673 }
   2674 static inline UShort sel16x4_0 ( ULong w64 ) {
   2675    UInt lo32 = toUInt(w64);
   2676    return toUShort(lo32);
   2677 }
   2678 
   2679 static inline UChar sel8x8_7 ( ULong w64 ) {
   2680    UInt hi32 = toUInt(w64 >> 32);
   2681    return toUChar(hi32 >> 24);
   2682 }
   2683 static inline UChar sel8x8_6 ( ULong w64 ) {
   2684    UInt hi32 = toUInt(w64 >> 32);
   2685    return toUChar(hi32 >> 16);
   2686 }
   2687 static inline UChar sel8x8_5 ( ULong w64 ) {
   2688    UInt hi32 = toUInt(w64 >> 32);
   2689    return toUChar(hi32 >> 8);
   2690 }
   2691 static inline UChar sel8x8_4 ( ULong w64 ) {
   2692    UInt hi32 = toUInt(w64 >> 32);
   2693    return toUChar(hi32 >> 0);
   2694 }
   2695 static inline UChar sel8x8_3 ( ULong w64 ) {
   2696    UInt lo32 = toUInt(w64);
   2697    return toUChar(lo32 >> 24);
   2698 }
   2699 static inline UChar sel8x8_2 ( ULong w64 ) {
   2700    UInt lo32 = toUInt(w64);
   2701    return toUChar(lo32 >> 16);
   2702 }
   2703 static inline UChar sel8x8_1 ( ULong w64 ) {
   2704    UInt lo32 = toUInt(w64);
   2705    return toUChar(lo32 >> 8);
   2706 }
   2707 static inline UChar sel8x8_0 ( ULong w64 ) {
   2708    UInt lo32 = toUInt(w64);
   2709    return toUChar(lo32 >> 0);
   2710 }
   2711 
   2712 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2713 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
   2714 {
   2715    return
   2716       mk32x2(
   2717          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
   2718             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
   2719          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
   2720             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
   2721       );
   2722 }
   2723 
   2724 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2725 ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
   2726 {
   2727    ULong r = 0;
   2728    if (xx & (1ULL << (64-1))) r |= (1<<7);
   2729    if (xx & (1ULL << (56-1))) r |= (1<<6);
   2730    if (xx & (1ULL << (48-1))) r |= (1<<5);
   2731    if (xx & (1ULL << (40-1))) r |= (1<<4);
   2732    if (xx & (1ULL << (32-1))) r |= (1<<3);
   2733    if (xx & (1ULL << (24-1))) r |= (1<<2);
   2734    if (xx & (1ULL << (16-1))) r |= (1<<1);
   2735    if (xx & (1ULL << ( 8-1))) r |= (1<<0);
   2736    return r;
   2737 }
   2738 
   2739 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2740 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
   2741 {
   2742    UInt t = 0;
   2743    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
   2744    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
   2745    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
   2746    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
   2747    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
   2748    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
   2749    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
   2750    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
   2751    t &= 0xFFFF;
   2752    return (ULong)t;
   2753 }
   2754 
   2755 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2756 ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
   2757 {
   2758    ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi );
   2759    ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo );
   2760    return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
   2761 }
   2762 
   2763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2764 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
   2765 {
   2766    UInt  i;
   2767    ULong crc = (b & 0xFFULL) ^ crcIn;
   2768    for (i = 0; i < 8; i++)
   2769       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
   2770    return crc;
   2771 }
   2772 
   2773 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2774 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
   2775 {
   2776    UInt  i;
   2777    ULong crc = (w & 0xFFFFULL) ^ crcIn;
   2778    for (i = 0; i < 16; i++)
   2779       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
   2780    return crc;
   2781 }
   2782 
   2783 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2784 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
   2785 {
   2786    UInt i;
   2787    ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
   2788    for (i = 0; i < 32; i++)
   2789       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
   2790    return crc;
   2791 }
   2792 
   2793 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
   2794 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
   2795 {
   2796    ULong crc = amd64g_calc_crc32l(crcIn, q);
   2797    return amd64g_calc_crc32l(crc, q >> 32);
   2798 }
   2799 
   2800 
   2801 /*---------------------------------------------------------------*/
   2802 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M}                    ---*/
   2803 /*---------------------------------------------------------------*/
   2804 
   2805 static UInt zmask_from_V128 ( V128* arg )
   2806 {
   2807    UInt i, res = 0;
   2808    for (i = 0; i < 16; i++) {
   2809       res |=  ((arg->w8[i] == 0) ? 1 : 0) << i;
   2810    }
   2811    return res;
   2812 }
   2813 
   2814 /* Helps with PCMP{I,E}STR{I,M}.
   2815 
   2816    CALLED FROM GENERATED CODE: DIRTY HELPER(s).  (But not really,
   2817    actually it could be a clean helper, but for the fact that we can't
   2818    pass by value 2 x V128 to a clean helper, nor have one returned.)
   2819    Reads guest state, writes to guest state for the xSTRM cases, no
   2820    accesses of memory, is a pure function.
   2821 
   2822    opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
   2823    the callee knows which I/E and I/M variant it is dealing with and
   2824    what the specific operation is.  4th byte of opcode is in the range
   2825    0x60 to 0x63:
   2826        istri  66 0F 3A 63
   2827        istrm  66 0F 3A 62
   2828        estri  66 0F 3A 61
   2829        estrm  66 0F 3A 60
   2830 
   2831    gstOffL and gstOffR are the guest state offsets for the two XMM
   2832    register inputs.  We never have to deal with the memory case since
   2833    that is handled by pre-loading the relevant value into the fake
   2834    XMM16 register.
   2835 
   2836    For ESTRx variants, edxIN and eaxIN hold the values of those two
   2837    registers.
   2838 
   2839    In all cases, the bottom 16 bits of the result contain the new
   2840    OSZACP %rflags values.  For xSTRI variants, bits[31:16] of the
   2841    result hold the new %ecx value.  For xSTRM variants, the helper
   2842    writes the result directly to the guest XMM0.
   2843 
   2844    Declarable side effects: in all cases, reads guest state at
   2845    [gstOffL, +16) and [gstOffR, +16).  For xSTRM variants, also writes
   2846    guest_XMM0.
   2847 
   2848    Is expected to be called with opc_and_imm combinations which have
   2849    actually been validated, and will assert if otherwise.  The front
   2850    end should ensure we're only called with verified values.
   2851 */
   2852 ULong amd64g_dirtyhelper_PCMPxSTRx (
   2853           VexGuestAMD64State* gst,
   2854           HWord opc4_and_imm,
   2855           HWord gstOffL, HWord gstOffR,
   2856           HWord edxIN, HWord eaxIN
   2857        )
   2858 {
   2859    HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
   2860    HWord imm8 = opc4_and_imm & 0xFF;
   2861    HWord isISTRx = opc4 & 2;
   2862    HWord isxSTRM = (opc4 & 1) ^ 1;
   2863    vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
   2864    vassert((imm8 & 1) == 0); /* we support byte-size cases only */
   2865 
   2866    // where the args are
   2867    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
   2868    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
   2869 
   2870    /* Create the arg validity masks, either from the vectors
   2871       themselves or from the supplied edx/eax values. */
   2872    // FIXME: this is only right for the 8-bit data cases.
   2873    // At least that is asserted above.
   2874    UInt zmaskL, zmaskR;
   2875    if (isISTRx) {
   2876       zmaskL = zmask_from_V128(argL);
   2877       zmaskR = zmask_from_V128(argR);
   2878    } else {
   2879       Int tmp;
   2880       tmp = edxIN & 0xFFFFFFFF;
   2881       if (tmp < -16) tmp = -16;
   2882       if (tmp > 16)  tmp = 16;
   2883       if (tmp < 0)   tmp = -tmp;
   2884       vassert(tmp >= 0 && tmp <= 16);
   2885       zmaskL = (1 << tmp) & 0xFFFF;
   2886       tmp = eaxIN & 0xFFFFFFFF;
   2887       if (tmp < -16) tmp = -16;
   2888       if (tmp > 16)  tmp = 16;
   2889       if (tmp < 0)   tmp = -tmp;
   2890       vassert(tmp >= 0 && tmp <= 16);
   2891       zmaskR = (1 << tmp) & 0xFFFF;
   2892    }
   2893 
   2894    // temp spot for the resulting flags and vector.
   2895    V128 resV;
   2896    UInt resOSZACP;
   2897 
   2898    // do the meyaath
   2899    Bool ok = compute_PCMPxSTRx (
   2900                 &resV, &resOSZACP, argL, argR,
   2901                 zmaskL, zmaskR, imm8, (Bool)isxSTRM
   2902              );
   2903 
   2904    // front end shouldn't pass us any imm8 variants we can't
   2905    // handle.  Hence:
   2906    vassert(ok);
   2907 
   2908    // So, finally we need to get the results back to the caller.
   2909    // In all cases, the new OSZACP value is the lowest 16 of
   2910    // the return value.
   2911    if (isxSTRM) {
   2912       /* gst->guest_XMM0 = resV; */ // gcc don't like that
   2913       gst->guest_XMM0[0] = resV.w32[0];
   2914       gst->guest_XMM0[1] = resV.w32[1];
   2915       gst->guest_XMM0[2] = resV.w32[2];
   2916       gst->guest_XMM0[3] = resV.w32[3];
   2917       return resOSZACP & 0x8D5;
   2918    } else {
   2919       UInt newECX = resV.w32[0] & 0xFFFF;
   2920       return (newECX << 16) | (resOSZACP & 0x8D5);
   2921    }
   2922 }
   2923 
   2924 
   2925 /*---------------------------------------------------------------*/
   2926 /*--- Helpers for dealing with, and describing,               ---*/
   2927 /*--- guest state as a whole.                                 ---*/
   2928 /*---------------------------------------------------------------*/
   2929 
   2930 /* Initialise the entire amd64 guest state. */
   2931 /* VISIBLE TO LIBVEX CLIENT */
   2932 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
   2933 {
   2934    vex_state->guest_RAX = 0;
   2935    vex_state->guest_RCX = 0;
   2936    vex_state->guest_RDX = 0;
   2937    vex_state->guest_RBX = 0;
   2938    vex_state->guest_RSP = 0;
   2939    vex_state->guest_RBP = 0;
   2940    vex_state->guest_RSI = 0;
   2941    vex_state->guest_RDI = 0;
   2942    vex_state->guest_R8  = 0;
   2943    vex_state->guest_R9  = 0;
   2944    vex_state->guest_R10 = 0;
   2945    vex_state->guest_R11 = 0;
   2946    vex_state->guest_R12 = 0;
   2947    vex_state->guest_R13 = 0;
   2948    vex_state->guest_R14 = 0;
   2949    vex_state->guest_R15 = 0;
   2950 
   2951    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
   2952    vex_state->guest_CC_DEP1 = 0;
   2953    vex_state->guest_CC_DEP2 = 0;
   2954    vex_state->guest_CC_NDEP = 0;
   2955 
   2956    vex_state->guest_DFLAG   = 1; /* forwards */
   2957    vex_state->guest_IDFLAG  = 0;
   2958 
   2959    /* HACK: represent the offset associated with %fs==0. This
   2960       assumes that %fs is only ever zero. */
   2961    vex_state->guest_FS_ZERO = 0;
   2962 
   2963    vex_state->guest_RIP = 0;
   2964 
   2965    /* Initialise the simulated FPU */
   2966    amd64g_dirtyhelper_FINIT( vex_state );
   2967 
   2968    /* Initialise the SSE state. */
   2969 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
   2970 
   2971    vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
   2972    SSEZERO(vex_state->guest_XMM0);
   2973    SSEZERO(vex_state->guest_XMM1);
   2974    SSEZERO(vex_state->guest_XMM2);
   2975    SSEZERO(vex_state->guest_XMM3);
   2976    SSEZERO(vex_state->guest_XMM4);
   2977    SSEZERO(vex_state->guest_XMM5);
   2978    SSEZERO(vex_state->guest_XMM6);
   2979    SSEZERO(vex_state->guest_XMM7);
   2980    SSEZERO(vex_state->guest_XMM8);
   2981    SSEZERO(vex_state->guest_XMM9);
   2982    SSEZERO(vex_state->guest_XMM10);
   2983    SSEZERO(vex_state->guest_XMM11);
   2984    SSEZERO(vex_state->guest_XMM12);
   2985    SSEZERO(vex_state->guest_XMM13);
   2986    SSEZERO(vex_state->guest_XMM14);
   2987    SSEZERO(vex_state->guest_XMM15);
   2988    SSEZERO(vex_state->guest_XMM16);
   2989 
   2990 #  undef SSEZERO
   2991 
   2992    vex_state->guest_EMWARN = EmWarn_NONE;
   2993 
   2994    /* These should not ever be either read or written, but we
   2995       initialise them anyway. */
   2996    vex_state->guest_TISTART = 0;
   2997    vex_state->guest_TILEN   = 0;
   2998 
   2999    vex_state->guest_NRADDR   = 0;
   3000    vex_state->guest_SC_CLASS = 0;
   3001    vex_state->guest_GS_0x60  = 0;
   3002 
   3003    vex_state->guest_IP_AT_SYSCALL = 0;
   3004    /* vex_state->padding = 0; */
   3005 }
   3006 
   3007 
   3008 /* Figure out if any part of the guest state contained in minoff
   3009    .. maxoff requires precise memory exceptions.  If in doubt return
   3010    True (but this is generates significantly slower code).
   3011 
   3012    By default we enforce precise exns for guest %RSP, %RBP and %RIP
   3013    only.  These are the minimum needed to extract correct stack
   3014    backtraces from amd64 code.
   3015 */
   3016 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
   3017                                                    Int maxoff)
   3018 {
   3019    Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
   3020    Int rbp_max = rbp_min + 8 - 1;
   3021    Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
   3022    Int rsp_max = rsp_min + 8 - 1;
   3023    Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
   3024    Int rip_max = rip_min + 8 - 1;
   3025 
   3026    if (maxoff < rbp_min || minoff > rbp_max) {
   3027       /* no overlap with rbp */
   3028    } else {
   3029       return True;
   3030    }
   3031 
   3032    if (maxoff < rsp_min || minoff > rsp_max) {
   3033       /* no overlap with rsp */
   3034    } else {
   3035       return True;
   3036    }
   3037 
   3038    if (maxoff < rip_min || minoff > rip_max) {
   3039       /* no overlap with eip */
   3040    } else {
   3041       return True;
   3042    }
   3043 
   3044    return False;
   3045 }
   3046 
   3047 
   3048 #define ALWAYSDEFD(field)                             \
   3049     { offsetof(VexGuestAMD64State, field),            \
   3050       (sizeof ((VexGuestAMD64State*)0)->field) }
   3051 
   3052 VexGuestLayout
   3053    amd64guest_layout
   3054       = {
   3055           /* Total size of the guest state, in bytes. */
   3056           .total_sizeB = sizeof(VexGuestAMD64State),
   3057 
   3058           /* Describe the stack pointer. */
   3059           .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
   3060           .sizeof_SP = 8,
   3061 
   3062           /* Describe the frame pointer. */
   3063           .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
   3064           .sizeof_FP = 8,
   3065 
   3066           /* Describe the instruction pointer. */
   3067           .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
   3068           .sizeof_IP = 8,
   3069 
   3070           /* Describe any sections to be regarded by Memcheck as
   3071              'always-defined'. */
   3072           .n_alwaysDefd = 16,
   3073 
   3074           /* flags thunk: OP and NDEP are always defd, whereas DEP1
   3075              and DEP2 have to be tracked.  See detailed comment in
   3076              gdefs.h on meaning of thunk fields. */
   3077           .alwaysDefd
   3078              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
   3079                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
   3080 		 /*  2 */ ALWAYSDEFD(guest_DFLAG),
   3081                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
   3082                  /*  4 */ ALWAYSDEFD(guest_RIP),
   3083                  /*  5 */ ALWAYSDEFD(guest_FS_ZERO),
   3084                  /*  6 */ ALWAYSDEFD(guest_FTOP),
   3085                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
   3086                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
   3087                  /*  9 */ ALWAYSDEFD(guest_FC3210),
   3088                  // /* */ ALWAYSDEFD(guest_CS),
   3089                  // /* */ ALWAYSDEFD(guest_DS),
   3090                  // /* */ ALWAYSDEFD(guest_ES),
   3091                  // /* */ ALWAYSDEFD(guest_FS),
   3092                  // /* */ ALWAYSDEFD(guest_GS),
   3093                  // /* */ ALWAYSDEFD(guest_SS),
   3094                  // /* */ ALWAYSDEFD(guest_LDT),
   3095                  // /* */ ALWAYSDEFD(guest_GDT),
   3096                  /* 10 */ ALWAYSDEFD(guest_EMWARN),
   3097                  /* 11 */ ALWAYSDEFD(guest_SSEROUND),
   3098                  /* 12 */ ALWAYSDEFD(guest_TISTART),
   3099                  /* 13 */ ALWAYSDEFD(guest_TILEN),
   3100                  /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
   3101                  /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
   3102                }
   3103         };
   3104 
   3105 
   3106 /*---------------------------------------------------------------*/
   3107 /*--- end                               guest_amd64_helpers.c ---*/
   3108 /*---------------------------------------------------------------*/
   3109