1 2 /*---------------------------------------------------------------*/ 3 /*--- begin guest_x86_helpers.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2017 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_emnote.h" 38 #include "libvex_guest_x86.h" 39 #include "libvex_ir.h" 40 #include "libvex.h" 41 42 #include "main_util.h" 43 #include "main_globals.h" 44 #include "guest_generic_bb_to_IR.h" 45 #include "guest_x86_defs.h" 46 #include "guest_generic_x87.h" 47 48 49 /* This file contains helper functions for x86 guest code. 50 Calls to these functions are generated by the back end. 51 These calls are of course in the host machine code and 52 this file will be compiled to host machine code, so that 53 all makes sense. 54 55 Only change the signatures of these helper functions very 56 carefully. If you change the signature here, you'll have to change 57 the parameters passed to it in the IR calls constructed by 58 guest-x86/toIR.c. 59 60 The convention used is that all functions called from generated 61 code are named x86g_<something>, and any function whose name lacks 62 that prefix is not called from generated code. Note that some 63 LibVEX_* functions can however be called by VEX's client, but that 64 is not the same as calling them from VEX-generated code. 65 */ 66 67 68 /* Set to 1 to get detailed profiling info about use of the flag 69 machinery. */ 70 #define PROFILE_EFLAGS 0 71 72 73 /*---------------------------------------------------------------*/ 74 /*--- %eflags run-time helpers. ---*/ 75 /*---------------------------------------------------------------*/ 76 77 static const UChar parity_table[256] = { 78 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 79 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 80 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 81 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 82 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 83 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 84 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 85 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 86 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 87 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 88 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 89 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 90 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 91 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 92 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 93 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 94 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 95 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 96 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 97 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 98 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 99 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 100 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 101 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 102 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 103 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 104 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 105 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 106 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 107 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 108 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 109 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 110 }; 111 112 /* generalised left-shifter */ 113 inline static Int lshift ( Int x, Int n ) 114 { 115 if (n >= 0) 116 return (UInt)x << n; 117 else 118 return x >> (-n); 119 } 120 121 /* identity on ULong */ 122 static inline ULong idULong ( ULong x ) 123 { 124 return x; 125 } 126 127 128 #define PREAMBLE(__data_bits) \ 129 /* const */ UInt DATA_MASK \ 130 = __data_bits==8 ? 0xFF \ 131 : (__data_bits==16 ? 0xFFFF \ 132 : 0xFFFFFFFF); \ 133 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \ 134 /* const */ UInt CC_DEP1 = cc_dep1_formal; \ 135 /* const */ UInt CC_DEP2 = cc_dep2_formal; \ 136 /* const */ UInt CC_NDEP = cc_ndep_formal; \ 137 /* Four bogus assignments, which hopefully gcc can */ \ 138 /* optimise away, and which stop it complaining about */ \ 139 /* unused variables. */ \ 140 SIGN_MASK = SIGN_MASK; \ 141 DATA_MASK = DATA_MASK; \ 142 CC_DEP2 = CC_DEP2; \ 143 CC_NDEP = CC_NDEP; 144 145 146 /*-------------------------------------------------------------*/ 147 148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ 149 { \ 150 PREAMBLE(DATA_BITS); \ 151 { UInt cf, pf, af, zf, sf, of; \ 152 UInt argL, argR, res; \ 153 argL = CC_DEP1; \ 154 argR = CC_DEP2; \ 155 res = argL + argR; \ 156 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 157 pf = parity_table[(UChar)res]; \ 158 af = (res ^ argL ^ argR) & 0x10; \ 159 zf = ((DATA_UTYPE)res == 0) << 6; \ 160 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 161 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 162 12 - DATA_BITS) & X86G_CC_MASK_O; \ 163 return cf | pf | af | zf | sf | of; \ 164 } \ 165 } 166 167 /*-------------------------------------------------------------*/ 168 169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ 170 { \ 171 PREAMBLE(DATA_BITS); \ 172 { UInt cf, pf, af, zf, sf, of; \ 173 UInt argL, argR, res; \ 174 argL = CC_DEP1; \ 175 argR = CC_DEP2; \ 176 res = argL - argR; \ 177 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 178 pf = parity_table[(UChar)res]; \ 179 af = (res ^ argL ^ argR) & 0x10; \ 180 zf = ((DATA_UTYPE)res == 0) << 6; \ 181 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 182 of = lshift((argL ^ argR) & (argL ^ res), \ 183 12 - DATA_BITS) & X86G_CC_MASK_O; \ 184 return cf | pf | af | zf | sf | of; \ 185 } \ 186 } 187 188 /*-------------------------------------------------------------*/ 189 190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ 191 { \ 192 PREAMBLE(DATA_BITS); \ 193 { UInt cf, pf, af, zf, sf, of; \ 194 UInt argL, argR, oldC, res; \ 195 oldC = CC_NDEP & X86G_CC_MASK_C; \ 196 argL = CC_DEP1; \ 197 argR = CC_DEP2 ^ oldC; \ 198 res = (argL + argR) + oldC; \ 199 if (oldC) \ 200 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ 201 else \ 202 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 203 pf = parity_table[(UChar)res]; \ 204 af = (res ^ argL ^ argR) & 0x10; \ 205 zf = ((DATA_UTYPE)res == 0) << 6; \ 206 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 207 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 208 12 - DATA_BITS) & X86G_CC_MASK_O; \ 209 return cf | pf | af | zf | sf | of; \ 210 } \ 211 } 212 213 /*-------------------------------------------------------------*/ 214 215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ 216 { \ 217 PREAMBLE(DATA_BITS); \ 218 { UInt cf, pf, af, zf, sf, of; \ 219 UInt argL, argR, oldC, res; \ 220 oldC = CC_NDEP & X86G_CC_MASK_C; \ 221 argL = CC_DEP1; \ 222 argR = CC_DEP2 ^ oldC; \ 223 res = (argL - argR) - oldC; \ 224 if (oldC) \ 225 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ 226 else \ 227 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 228 pf = parity_table[(UChar)res]; \ 229 af = (res ^ argL ^ argR) & 0x10; \ 230 zf = ((DATA_UTYPE)res == 0) << 6; \ 231 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 232 of = lshift((argL ^ argR) & (argL ^ res), \ 233 12 - DATA_BITS) & X86G_CC_MASK_O; \ 234 return cf | pf | af | zf | sf | of; \ 235 } \ 236 } 237 238 /*-------------------------------------------------------------*/ 239 240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ 241 { \ 242 PREAMBLE(DATA_BITS); \ 243 { UInt cf, pf, af, zf, sf, of; \ 244 cf = 0; \ 245 pf = parity_table[(UChar)CC_DEP1]; \ 246 af = 0; \ 247 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 248 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 249 of = 0; \ 250 return cf | pf | af | zf | sf | of; \ 251 } \ 252 } 253 254 /*-------------------------------------------------------------*/ 255 256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ 257 { \ 258 PREAMBLE(DATA_BITS); \ 259 { UInt cf, pf, af, zf, sf, of; \ 260 UInt argL, argR, res; \ 261 res = CC_DEP1; \ 262 argL = res - 1; \ 263 argR = 1; \ 264 cf = CC_NDEP & X86G_CC_MASK_C; \ 265 pf = parity_table[(UChar)res]; \ 266 af = (res ^ argL ^ argR) & 0x10; \ 267 zf = ((DATA_UTYPE)res == 0) << 6; \ 268 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 269 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ 270 return cf | pf | af | zf | sf | of; \ 271 } \ 272 } 273 274 /*-------------------------------------------------------------*/ 275 276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ 277 { \ 278 PREAMBLE(DATA_BITS); \ 279 { UInt cf, pf, af, zf, sf, of; \ 280 UInt argL, argR, res; \ 281 res = CC_DEP1; \ 282 argL = res + 1; \ 283 argR = 1; \ 284 cf = CC_NDEP & X86G_CC_MASK_C; \ 285 pf = parity_table[(UChar)res]; \ 286 af = (res ^ argL ^ argR) & 0x10; \ 287 zf = ((DATA_UTYPE)res == 0) << 6; \ 288 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 289 of = ((res & DATA_MASK) \ 290 == ((UInt)SIGN_MASK - 1)) << 11; \ 291 return cf | pf | af | zf | sf | of; \ 292 } \ 293 } 294 295 /*-------------------------------------------------------------*/ 296 297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ 298 { \ 299 PREAMBLE(DATA_BITS); \ 300 { UInt cf, pf, af, zf, sf, of; \ 301 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \ 302 pf = parity_table[(UChar)CC_DEP1]; \ 303 af = 0; /* undefined */ \ 304 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 305 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 306 /* of is defined if shift count == 1 */ \ 307 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 308 & X86G_CC_MASK_O; \ 309 return cf | pf | af | zf | sf | of; \ 310 } \ 311 } 312 313 /*-------------------------------------------------------------*/ 314 315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ 316 { \ 317 PREAMBLE(DATA_BITS); \ 318 { UInt cf, pf, af, zf, sf, of; \ 319 cf = CC_DEP2 & 1; \ 320 pf = parity_table[(UChar)CC_DEP1]; \ 321 af = 0; /* undefined */ \ 322 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 323 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 324 /* of is defined if shift count == 1 */ \ 325 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 326 & X86G_CC_MASK_O; \ 327 return cf | pf | af | zf | sf | of; \ 328 } \ 329 } 330 331 /*-------------------------------------------------------------*/ 332 333 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ 334 /* DEP1 = result, NDEP = old flags */ 335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ 336 { \ 337 PREAMBLE(DATA_BITS); \ 338 { UInt fl \ 339 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \ 340 | (X86G_CC_MASK_C & CC_DEP1) \ 341 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \ 342 11-(DATA_BITS-1)) \ 343 ^ lshift(CC_DEP1, 11))); \ 344 return fl; \ 345 } \ 346 } 347 348 /*-------------------------------------------------------------*/ 349 350 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ 351 /* DEP1 = result, NDEP = old flags */ 352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ 353 { \ 354 PREAMBLE(DATA_BITS); \ 355 { UInt fl \ 356 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \ 357 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ 358 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \ 359 11-(DATA_BITS-1)) \ 360 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ 361 return fl; \ 362 } \ 363 } 364 365 /*-------------------------------------------------------------*/ 366 367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ 368 DATA_U2TYPE, NARROWto2U) \ 369 { \ 370 PREAMBLE(DATA_BITS); \ 371 { UInt cf, pf, af, zf, sf, of; \ 372 DATA_UTYPE hi; \ 373 DATA_UTYPE lo \ 374 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ 375 * ((DATA_UTYPE)CC_DEP2) ); \ 376 DATA_U2TYPE rr \ 377 = NARROWto2U( \ 378 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ 379 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ 380 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ 381 cf = (hi != 0); \ 382 pf = parity_table[(UChar)lo]; \ 383 af = 0; /* undefined */ \ 384 zf = (lo == 0) << 6; \ 385 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 386 of = cf << 11; \ 387 return cf | pf | af | zf | sf | of; \ 388 } \ 389 } 390 391 /*-------------------------------------------------------------*/ 392 393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ 394 DATA_S2TYPE, NARROWto2S) \ 395 { \ 396 PREAMBLE(DATA_BITS); \ 397 { UInt cf, pf, af, zf, sf, of; \ 398 DATA_STYPE hi; \ 399 DATA_STYPE lo \ 400 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \ 401 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \ 402 DATA_S2TYPE rr \ 403 = NARROWto2S( \ 404 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ 405 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ 406 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ 407 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ 408 pf = parity_table[(UChar)lo]; \ 409 af = 0; /* undefined */ \ 410 zf = (lo == 0) << 6; \ 411 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 412 of = cf << 11; \ 413 return cf | pf | af | zf | sf | of; \ 414 } \ 415 } 416 417 418 #if PROFILE_EFLAGS 419 420 static Bool initted = False; 421 422 /* C flag, fast route */ 423 static UInt tabc_fast[X86G_CC_OP_NUMBER]; 424 /* C flag, slow route */ 425 static UInt tabc_slow[X86G_CC_OP_NUMBER]; 426 /* table for calculate_cond */ 427 static UInt tab_cond[X86G_CC_OP_NUMBER][16]; 428 /* total entry counts for calc_all, calc_c, calc_cond. */ 429 static UInt n_calc_all = 0; 430 static UInt n_calc_c = 0; 431 static UInt n_calc_cond = 0; 432 433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) 434 435 436 static void showCounts ( void ) 437 { 438 Int op, co; 439 HChar ch; 440 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", 441 n_calc_all, n_calc_cond, n_calc_c); 442 443 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" 444 " S NS P NP L NL LE NLE\n"); 445 vex_printf(" -----------------------------------------------------" 446 "----------------------------------------\n"); 447 for (op = 0; op < X86G_CC_OP_NUMBER; op++) { 448 449 ch = ' '; 450 if (op > 0 && (op-1) % 3 == 0) 451 ch = 'B'; 452 if (op > 0 && (op-1) % 3 == 1) 453 ch = 'W'; 454 if (op > 0 && (op-1) % 3 == 2) 455 ch = 'L'; 456 457 vex_printf("%2d%c: ", op, ch); 458 vex_printf("%6u ", tabc_slow[op]); 459 vex_printf("%6u ", tabc_fast[op]); 460 for (co = 0; co < 16; co++) { 461 Int n = tab_cond[op][co]; 462 if (n >= 1000) { 463 vex_printf(" %3dK", n / 1000); 464 } else 465 if (n >= 0) { 466 vex_printf(" %3d ", n ); 467 } else { 468 vex_printf(" "); 469 } 470 } 471 vex_printf("\n"); 472 } 473 vex_printf("\n"); 474 } 475 476 static void initCounts ( void ) 477 { 478 Int op, co; 479 initted = True; 480 for (op = 0; op < X86G_CC_OP_NUMBER; op++) { 481 tabc_fast[op] = tabc_slow[op] = 0; 482 for (co = 0; co < 16; co++) 483 tab_cond[op][co] = 0; 484 } 485 } 486 487 #endif /* PROFILE_EFLAGS */ 488 489 490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 491 /* Calculate all the 6 flags from the supplied thunk parameters. 492 Worker function, not directly called from generated code. */ 493 static 494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op, 495 UInt cc_dep1_formal, 496 UInt cc_dep2_formal, 497 UInt cc_ndep_formal ) 498 { 499 switch (cc_op) { 500 case X86G_CC_OP_COPY: 501 return cc_dep1_formal 502 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z 503 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P); 504 505 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); 506 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); 507 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); 508 509 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); 510 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); 511 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); 512 513 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); 514 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); 515 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); 516 517 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); 518 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); 519 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); 520 521 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); 522 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); 523 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); 524 525 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); 526 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); 527 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); 528 529 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); 530 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); 531 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); 532 533 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); 534 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); 535 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); 536 537 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); 538 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); 539 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); 540 541 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); 542 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); 543 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); 544 545 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); 546 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); 547 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); 548 549 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, 550 UShort, toUShort ); 551 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, 552 UInt, toUInt ); 553 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, 554 ULong, idULong ); 555 556 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, 557 Short, toUShort ); 558 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, 559 Int, toUInt ); 560 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, 561 Long, idULong ); 562 563 default: 564 /* shouldn't really make these calls from generated code */ 565 vex_printf("x86g_calculate_eflags_all_WRK(X86)" 566 "( %u, 0x%x, 0x%x, 0x%x )\n", 567 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); 568 vpanic("x86g_calculate_eflags_all_WRK(X86)"); 569 } 570 } 571 572 573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 574 /* Calculate all the 6 flags from the supplied thunk parameters. */ 575 UInt x86g_calculate_eflags_all ( UInt cc_op, 576 UInt cc_dep1, 577 UInt cc_dep2, 578 UInt cc_ndep ) 579 { 580 # if PROFILE_EFLAGS 581 if (!initted) initCounts(); 582 n_calc_all++; 583 if (SHOW_COUNTS_NOW) showCounts(); 584 # endif 585 return 586 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); 587 } 588 589 590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 591 /* Calculate just the carry flag from the supplied thunk parameters. */ 592 VEX_REGPARM(3) 593 UInt x86g_calculate_eflags_c ( UInt cc_op, 594 UInt cc_dep1, 595 UInt cc_dep2, 596 UInt cc_ndep ) 597 { 598 # if PROFILE_EFLAGS 599 if (!initted) initCounts(); 600 n_calc_c++; 601 tabc_fast[cc_op]++; 602 if (SHOW_COUNTS_NOW) showCounts(); 603 # endif 604 605 /* Fast-case some common ones. */ 606 switch (cc_op) { 607 case X86G_CC_OP_LOGICL: 608 case X86G_CC_OP_LOGICW: 609 case X86G_CC_OP_LOGICB: 610 return 0; 611 case X86G_CC_OP_SUBL: 612 return ((UInt)cc_dep1) < ((UInt)cc_dep2) 613 ? X86G_CC_MASK_C : 0; 614 case X86G_CC_OP_SUBW: 615 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) 616 ? X86G_CC_MASK_C : 0; 617 case X86G_CC_OP_SUBB: 618 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) 619 ? X86G_CC_MASK_C : 0; 620 case X86G_CC_OP_INCL: 621 case X86G_CC_OP_DECL: 622 return cc_ndep & X86G_CC_MASK_C; 623 default: 624 break; 625 } 626 627 # if PROFILE_EFLAGS 628 tabc_fast[cc_op]--; 629 tabc_slow[cc_op]++; 630 # endif 631 632 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) 633 & X86G_CC_MASK_C; 634 } 635 636 637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 638 /* returns 1 or 0 */ 639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond, 640 UInt cc_op, 641 UInt cc_dep1, 642 UInt cc_dep2, 643 UInt cc_ndep ) 644 { 645 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1, 646 cc_dep2, cc_ndep); 647 UInt of,sf,zf,cf,pf; 648 UInt inv = cond & 1; 649 650 # if PROFILE_EFLAGS 651 if (!initted) initCounts(); 652 tab_cond[cc_op][cond]++; 653 n_calc_cond++; 654 if (SHOW_COUNTS_NOW) showCounts(); 655 # endif 656 657 switch (cond) { 658 case X86CondNO: 659 case X86CondO: /* OF == 1 */ 660 of = eflags >> X86G_CC_SHIFT_O; 661 return 1 & (inv ^ of); 662 663 case X86CondNZ: 664 case X86CondZ: /* ZF == 1 */ 665 zf = eflags >> X86G_CC_SHIFT_Z; 666 return 1 & (inv ^ zf); 667 668 case X86CondNB: 669 case X86CondB: /* CF == 1 */ 670 cf = eflags >> X86G_CC_SHIFT_C; 671 return 1 & (inv ^ cf); 672 break; 673 674 case X86CondNBE: 675 case X86CondBE: /* (CF or ZF) == 1 */ 676 cf = eflags >> X86G_CC_SHIFT_C; 677 zf = eflags >> X86G_CC_SHIFT_Z; 678 return 1 & (inv ^ (cf | zf)); 679 break; 680 681 case X86CondNS: 682 case X86CondS: /* SF == 1 */ 683 sf = eflags >> X86G_CC_SHIFT_S; 684 return 1 & (inv ^ sf); 685 686 case X86CondNP: 687 case X86CondP: /* PF == 1 */ 688 pf = eflags >> X86G_CC_SHIFT_P; 689 return 1 & (inv ^ pf); 690 691 case X86CondNL: 692 case X86CondL: /* (SF xor OF) == 1 */ 693 sf = eflags >> X86G_CC_SHIFT_S; 694 of = eflags >> X86G_CC_SHIFT_O; 695 return 1 & (inv ^ (sf ^ of)); 696 break; 697 698 case X86CondNLE: 699 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */ 700 sf = eflags >> X86G_CC_SHIFT_S; 701 of = eflags >> X86G_CC_SHIFT_O; 702 zf = eflags >> X86G_CC_SHIFT_Z; 703 return 1 & (inv ^ ((sf ^ of) | zf)); 704 break; 705 706 default: 707 /* shouldn't really make these calls from generated code */ 708 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n", 709 cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); 710 vpanic("x86g_calculate_condition"); 711 } 712 } 713 714 715 /* VISIBLE TO LIBVEX CLIENT */ 716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state ) 717 { 718 UInt eflags = x86g_calculate_eflags_all_WRK( 719 vex_state->guest_CC_OP, 720 vex_state->guest_CC_DEP1, 721 vex_state->guest_CC_DEP2, 722 vex_state->guest_CC_NDEP 723 ); 724 UInt dflag = vex_state->guest_DFLAG; 725 vassert(dflag == 1 || dflag == 0xFFFFFFFF); 726 if (dflag == 0xFFFFFFFF) 727 eflags |= X86G_CC_MASK_D; 728 if (vex_state->guest_IDFLAG == 1) 729 eflags |= X86G_CC_MASK_ID; 730 if (vex_state->guest_ACFLAG == 1) 731 eflags |= X86G_CC_MASK_AC; 732 733 return eflags; 734 } 735 736 /* VISIBLE TO LIBVEX CLIENT */ 737 void 738 LibVEX_GuestX86_put_eflags ( UInt eflags, 739 /*MOD*/VexGuestX86State* vex_state ) 740 { 741 /* D flag */ 742 if (eflags & X86G_CC_MASK_D) { 743 vex_state->guest_DFLAG = 0xFFFFFFFF; 744 eflags &= ~X86G_CC_MASK_D; 745 } 746 else 747 vex_state->guest_DFLAG = 1; 748 749 /* ID flag */ 750 if (eflags & X86G_CC_MASK_ID) { 751 vex_state->guest_IDFLAG = 1; 752 eflags &= ~X86G_CC_MASK_ID; 753 } 754 else 755 vex_state->guest_IDFLAG = 0; 756 757 /* AC flag */ 758 if (eflags & X86G_CC_MASK_AC) { 759 vex_state->guest_ACFLAG = 1; 760 eflags &= ~X86G_CC_MASK_AC; 761 } 762 else 763 vex_state->guest_ACFLAG = 0; 764 765 UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z | 766 X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P; 767 vex_state->guest_CC_OP = X86G_CC_OP_COPY; 768 vex_state->guest_CC_DEP1 = eflags & cc_mask; 769 vex_state->guest_CC_DEP2 = 0; 770 vex_state->guest_CC_NDEP = 0; 771 } 772 773 /* VISIBLE TO LIBVEX CLIENT */ 774 void 775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag, 776 /*MOD*/VexGuestX86State* vex_state ) 777 { 778 UInt oszacp = x86g_calculate_eflags_all_WRK( 779 vex_state->guest_CC_OP, 780 vex_state->guest_CC_DEP1, 781 vex_state->guest_CC_DEP2, 782 vex_state->guest_CC_NDEP 783 ); 784 if (new_carry_flag & 1) { 785 oszacp |= X86G_CC_MASK_C; 786 } else { 787 oszacp &= ~X86G_CC_MASK_C; 788 } 789 vex_state->guest_CC_OP = X86G_CC_OP_COPY; 790 vex_state->guest_CC_DEP1 = oszacp; 791 vex_state->guest_CC_DEP2 = 0; 792 vex_state->guest_CC_NDEP = 0; 793 } 794 795 796 /*---------------------------------------------------------------*/ 797 /*--- %eflags translation-time function specialisers. ---*/ 798 /*--- These help iropt specialise calls the above run-time ---*/ 799 /*--- %eflags functions. ---*/ 800 /*---------------------------------------------------------------*/ 801 802 /* Used by the optimiser to try specialisations. Returns an 803 equivalent expression, or NULL if none. */ 804 805 static inline Bool isU32 ( IRExpr* e, UInt n ) 806 { 807 return 808 toBool( e->tag == Iex_Const 809 && e->Iex.Const.con->tag == Ico_U32 810 && e->Iex.Const.con->Ico.U32 == n ); 811 } 812 813 IRExpr* guest_x86_spechelper ( const HChar* function_name, 814 IRExpr** args, 815 IRStmt** precedingStmts, 816 Int n_precedingStmts ) 817 { 818 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) 819 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) 820 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 821 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 822 823 Int i, arity = 0; 824 for (i = 0; args[i]; i++) 825 arity++; 826 # if 0 827 vex_printf("spec request:\n"); 828 vex_printf(" %s ", function_name); 829 for (i = 0; i < arity; i++) { 830 vex_printf(" "); 831 ppIRExpr(args[i]); 832 } 833 vex_printf("\n"); 834 # endif 835 836 /* --------- specialising "x86g_calculate_condition" --------- */ 837 838 if (vex_streq(function_name, "x86g_calculate_condition")) { 839 /* specialise calls to above "calculate condition" function */ 840 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; 841 vassert(arity == 5); 842 cond = args[0]; 843 cc_op = args[1]; 844 cc_dep1 = args[2]; 845 cc_dep2 = args[3]; 846 847 /*---------------- ADDL ----------------*/ 848 849 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) { 850 /* long add, then Z --> test (dst+src == 0) */ 851 return unop(Iop_1Uto32, 852 binop(Iop_CmpEQ32, 853 binop(Iop_Add32, cc_dep1, cc_dep2), 854 mkU32(0))); 855 } 856 857 /*---------------- SUBL ----------------*/ 858 859 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) { 860 /* long sub/cmp, then Z --> test dst==src */ 861 return unop(Iop_1Uto32, 862 binop(Iop_CmpEQ32, cc_dep1, cc_dep2)); 863 } 864 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) { 865 /* long sub/cmp, then NZ --> test dst!=src */ 866 return unop(Iop_1Uto32, 867 binop(Iop_CmpNE32, cc_dep1, cc_dep2)); 868 } 869 870 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) { 871 /* long sub/cmp, then L (signed less than) 872 --> test dst <s src */ 873 return unop(Iop_1Uto32, 874 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)); 875 } 876 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) { 877 /* long sub/cmp, then NL (signed greater than or equal) 878 --> test !(dst <s src) */ 879 return binop(Iop_Xor32, 880 unop(Iop_1Uto32, 881 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)), 882 mkU32(1)); 883 } 884 885 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) { 886 /* long sub/cmp, then LE (signed less than or equal) 887 --> test dst <=s src */ 888 return unop(Iop_1Uto32, 889 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)); 890 } 891 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) { 892 /* long sub/cmp, then NLE (signed not less than or equal) 893 --> test dst >s src 894 --> test !(dst <=s src) */ 895 return binop(Iop_Xor32, 896 unop(Iop_1Uto32, 897 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)), 898 mkU32(1)); 899 } 900 901 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) { 902 /* long sub/cmp, then BE (unsigned less than or equal) 903 --> test dst <=u src */ 904 return unop(Iop_1Uto32, 905 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)); 906 } 907 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) { 908 /* long sub/cmp, then BE (unsigned greater than) 909 --> test !(dst <=u src) */ 910 return binop(Iop_Xor32, 911 unop(Iop_1Uto32, 912 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)), 913 mkU32(1)); 914 } 915 916 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) { 917 /* long sub/cmp, then B (unsigned less than) 918 --> test dst <u src */ 919 return unop(Iop_1Uto32, 920 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)); 921 } 922 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) { 923 /* long sub/cmp, then NB (unsigned greater than or equal) 924 --> test !(dst <u src) */ 925 return binop(Iop_Xor32, 926 unop(Iop_1Uto32, 927 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)), 928 mkU32(1)); 929 } 930 931 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) { 932 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */ 933 return unop(Iop_1Uto32, 934 binop(Iop_CmpLT32S, 935 binop(Iop_Sub32, cc_dep1, cc_dep2), 936 mkU32(0))); 937 } 938 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) { 939 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */ 940 return binop(Iop_Xor32, 941 unop(Iop_1Uto32, 942 binop(Iop_CmpLT32S, 943 binop(Iop_Sub32, cc_dep1, cc_dep2), 944 mkU32(0))), 945 mkU32(1)); 946 } 947 948 /*---------------- SUBW ----------------*/ 949 950 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) { 951 /* word sub/cmp, then Z --> test dst==src */ 952 return unop(Iop_1Uto32, 953 binop(Iop_CmpEQ16, 954 unop(Iop_32to16,cc_dep1), 955 unop(Iop_32to16,cc_dep2))); 956 } 957 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) { 958 /* word sub/cmp, then NZ --> test dst!=src */ 959 return unop(Iop_1Uto32, 960 binop(Iop_CmpNE16, 961 unop(Iop_32to16,cc_dep1), 962 unop(Iop_32to16,cc_dep2))); 963 } 964 965 /*---------------- SUBB ----------------*/ 966 967 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) { 968 /* byte sub/cmp, then Z --> test dst==src */ 969 return unop(Iop_1Uto32, 970 binop(Iop_CmpEQ8, 971 unop(Iop_32to8,cc_dep1), 972 unop(Iop_32to8,cc_dep2))); 973 } 974 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) { 975 /* byte sub/cmp, then NZ --> test dst!=src */ 976 return unop(Iop_1Uto32, 977 binop(Iop_CmpNE8, 978 unop(Iop_32to8,cc_dep1), 979 unop(Iop_32to8,cc_dep2))); 980 } 981 982 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) { 983 /* byte sub/cmp, then NBE (unsigned greater than) 984 --> test src <u dst */ 985 /* Note, args are opposite way round from the usual */ 986 return unop(Iop_1Uto32, 987 binop(Iop_CmpLT32U, 988 binop(Iop_And32,cc_dep2,mkU32(0xFF)), 989 binop(Iop_And32,cc_dep1,mkU32(0xFF)))); 990 } 991 992 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS) 993 && isU32(cc_dep2, 0)) { 994 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) 995 --> test dst <s 0 996 --> (UInt)dst[7] 997 This is yet another scheme by which gcc figures out if the 998 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ 999 /* Note: isU32(cc_dep2, 0) is correct, even though this is 1000 for an 8-bit comparison, since the args to the helper 1001 function are always U32s. */ 1002 return binop(Iop_And32, 1003 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1004 mkU32(1)); 1005 } 1006 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS) 1007 && isU32(cc_dep2, 0)) { 1008 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) 1009 --> test !(dst <s 0) 1010 --> (UInt) !dst[7] 1011 */ 1012 return binop(Iop_Xor32, 1013 binop(Iop_And32, 1014 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1015 mkU32(1)), 1016 mkU32(1)); 1017 } 1018 1019 /*---------------- LOGICL ----------------*/ 1020 1021 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) { 1022 /* long and/or/xor, then Z --> test dst==0 */ 1023 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1024 } 1025 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) { 1026 /* long and/or/xor, then NZ --> test dst!=0 */ 1027 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0))); 1028 } 1029 1030 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) { 1031 /* long and/or/xor, then LE 1032 This is pretty subtle. LOGIC sets SF and ZF according to the 1033 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but 1034 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff 1035 the result is <=signed 0. Hence ... 1036 */ 1037 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0))); 1038 } 1039 1040 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) { 1041 /* long and/or/xor, then BE 1042 LOGIC sets ZF according to the result and makes CF be zero. 1043 BE computes (CF | ZF), but CF is zero, so this reduces ZF 1044 -- which will be 1 iff the result is zero. Hence ... 1045 */ 1046 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1047 } 1048 1049 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) { 1050 /* see comment below for (LOGICB, CondS) */ 1051 /* long and/or/xor, then S --> (UInt)result[31] */ 1052 return binop(Iop_And32, 1053 binop(Iop_Shr32,cc_dep1,mkU8(31)), 1054 mkU32(1)); 1055 } 1056 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) { 1057 /* see comment below for (LOGICB, CondNS) */ 1058 /* long and/or/xor, then S --> (UInt) ~ result[31] */ 1059 return binop(Iop_Xor32, 1060 binop(Iop_And32, 1061 binop(Iop_Shr32,cc_dep1,mkU8(31)), 1062 mkU32(1)), 1063 mkU32(1)); 1064 } 1065 1066 /*---------------- LOGICW ----------------*/ 1067 1068 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) { 1069 /* word and/or/xor, then Z --> test dst==0 */ 1070 return unop(Iop_1Uto32, 1071 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)), 1072 mkU32(0))); 1073 } 1074 1075 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) { 1076 /* see comment below for (LOGICB, CondS) */ 1077 /* word and/or/xor, then S --> (UInt)result[15] */ 1078 return binop(Iop_And32, 1079 binop(Iop_Shr32,cc_dep1,mkU8(15)), 1080 mkU32(1)); 1081 } 1082 1083 /*---------------- LOGICB ----------------*/ 1084 1085 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) { 1086 /* byte and/or/xor, then Z --> test dst==0 */ 1087 return unop(Iop_1Uto32, 1088 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)), 1089 mkU32(0))); 1090 } 1091 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) { 1092 /* byte and/or/xor, then Z --> test dst!=0 */ 1093 /* b9ac9: 84 c0 test %al,%al 1094 b9acb: 75 0d jne b9ada */ 1095 return unop(Iop_1Uto32, 1096 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)), 1097 mkU32(0))); 1098 } 1099 1100 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) { 1101 /* this is an idiom gcc sometimes uses to find out if the top 1102 bit of a byte register is set: eg testb %al,%al; js .. 1103 Since it just depends on the top bit of the byte, extract 1104 that bit and explicitly get rid of all the rest. This 1105 helps memcheck avoid false positives in the case where any 1106 of the other bits in the byte are undefined. */ 1107 /* byte and/or/xor, then S --> (UInt)result[7] */ 1108 return binop(Iop_And32, 1109 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1110 mkU32(1)); 1111 } 1112 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) { 1113 /* ditto, for negation-of-S. */ 1114 /* byte and/or/xor, then S --> (UInt) ~ result[7] */ 1115 return binop(Iop_Xor32, 1116 binop(Iop_And32, 1117 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1118 mkU32(1)), 1119 mkU32(1)); 1120 } 1121 1122 /*---------------- DECL ----------------*/ 1123 1124 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) { 1125 /* dec L, then Z --> test dst == 0 */ 1126 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1127 } 1128 1129 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) { 1130 /* dec L, then S --> compare DST <s 0 */ 1131 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0))); 1132 } 1133 1134 /*---------------- DECW ----------------*/ 1135 1136 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) { 1137 /* dec W, then Z --> test dst == 0 */ 1138 return unop(Iop_1Uto32, 1139 binop(Iop_CmpEQ32, 1140 binop(Iop_Shl32,cc_dep1,mkU8(16)), 1141 mkU32(0))); 1142 } 1143 1144 /*---------------- INCW ----------------*/ 1145 1146 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) { 1147 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */ 1148 /* inc W, then Z --> test dst == 0 */ 1149 return unop(Iop_1Uto32, 1150 binop(Iop_CmpEQ32, 1151 binop(Iop_Shl32,cc_dep1,mkU8(16)), 1152 mkU32(0))); 1153 } 1154 1155 /*---------------- SHRL ----------------*/ 1156 1157 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) { 1158 /* SHRL, then Z --> test dep1 == 0 */ 1159 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1160 } 1161 1162 /*---------------- COPY ----------------*/ 1163 /* This can happen, as a result of x87 FP compares: "fcom ... ; 1164 fnstsw %ax ; sahf ; jbe" for example. */ 1165 1166 if (isU32(cc_op, X86G_CC_OP_COPY) && 1167 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) { 1168 /* COPY, then BE --> extract C and Z from dep1, and test 1169 (C or Z) == 1. */ 1170 /* COPY, then NBE --> extract C and Z from dep1, and test 1171 (C or Z) == 0. */ 1172 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0; 1173 return 1174 unop( 1175 Iop_1Uto32, 1176 binop( 1177 Iop_CmpEQ32, 1178 binop( 1179 Iop_And32, 1180 binop( 1181 Iop_Or32, 1182 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), 1183 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)) 1184 ), 1185 mkU32(1) 1186 ), 1187 mkU32(nnn) 1188 ) 1189 ); 1190 } 1191 1192 if (isU32(cc_op, X86G_CC_OP_COPY) 1193 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) { 1194 /* COPY, then B --> extract C from dep1, and test (C == 1). */ 1195 /* COPY, then NB --> extract C from dep1, and test (C == 0). */ 1196 UInt nnn = isU32(cond, X86CondB) ? 1 : 0; 1197 return 1198 unop( 1199 Iop_1Uto32, 1200 binop( 1201 Iop_CmpEQ32, 1202 binop( 1203 Iop_And32, 1204 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), 1205 mkU32(1) 1206 ), 1207 mkU32(nnn) 1208 ) 1209 ); 1210 } 1211 1212 if (isU32(cc_op, X86G_CC_OP_COPY) 1213 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) { 1214 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ 1215 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ 1216 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0; 1217 return 1218 unop( 1219 Iop_1Uto32, 1220 binop( 1221 Iop_CmpEQ32, 1222 binop( 1223 Iop_And32, 1224 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)), 1225 mkU32(1) 1226 ), 1227 mkU32(nnn) 1228 ) 1229 ); 1230 } 1231 1232 if (isU32(cc_op, X86G_CC_OP_COPY) 1233 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) { 1234 /* COPY, then P --> extract P from dep1, and test (P == 1). */ 1235 /* COPY, then NP --> extract P from dep1, and test (P == 0). */ 1236 UInt nnn = isU32(cond, X86CondP) ? 1 : 0; 1237 return 1238 unop( 1239 Iop_1Uto32, 1240 binop( 1241 Iop_CmpEQ32, 1242 binop( 1243 Iop_And32, 1244 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)), 1245 mkU32(1) 1246 ), 1247 mkU32(nnn) 1248 ) 1249 ); 1250 } 1251 1252 return NULL; 1253 } 1254 1255 /* --------- specialising "x86g_calculate_eflags_c" --------- */ 1256 1257 if (vex_streq(function_name, "x86g_calculate_eflags_c")) { 1258 /* specialise calls to above "calculate_eflags_c" function */ 1259 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; 1260 vassert(arity == 4); 1261 cc_op = args[0]; 1262 cc_dep1 = args[1]; 1263 cc_dep2 = args[2]; 1264 cc_ndep = args[3]; 1265 1266 if (isU32(cc_op, X86G_CC_OP_SUBL)) { 1267 /* C after sub denotes unsigned less than */ 1268 return unop(Iop_1Uto32, 1269 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)); 1270 } 1271 if (isU32(cc_op, X86G_CC_OP_SUBB)) { 1272 /* C after sub denotes unsigned less than */ 1273 return unop(Iop_1Uto32, 1274 binop(Iop_CmpLT32U, 1275 binop(Iop_And32,cc_dep1,mkU32(0xFF)), 1276 binop(Iop_And32,cc_dep2,mkU32(0xFF)))); 1277 } 1278 if (isU32(cc_op, X86G_CC_OP_LOGICL) 1279 || isU32(cc_op, X86G_CC_OP_LOGICW) 1280 || isU32(cc_op, X86G_CC_OP_LOGICB)) { 1281 /* cflag after logic is zero */ 1282 return mkU32(0); 1283 } 1284 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) { 1285 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ 1286 return cc_ndep; 1287 } 1288 if (isU32(cc_op, X86G_CC_OP_COPY)) { 1289 /* cflag after COPY is stored in DEP1. */ 1290 return 1291 binop( 1292 Iop_And32, 1293 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), 1294 mkU32(1) 1295 ); 1296 } 1297 if (isU32(cc_op, X86G_CC_OP_ADDL)) { 1298 /* C after add denotes sum <u either arg */ 1299 return unop(Iop_1Uto32, 1300 binop(Iop_CmpLT32U, 1301 binop(Iop_Add32, cc_dep1, cc_dep2), 1302 cc_dep1)); 1303 } 1304 // ATC, requires verification, no test case known 1305 //if (isU32(cc_op, X86G_CC_OP_SMULL)) { 1306 // /* C after signed widening multiply denotes the case where 1307 // the top half of the result isn't simply the sign extension 1308 // of the bottom half (iow the result doesn't fit completely 1309 // in the bottom half). Hence: 1310 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31 1311 // where 'x' denotes signed widening multiply.*/ 1312 // return 1313 // unop(Iop_1Uto32, 1314 // binop(Iop_CmpNE32, 1315 // unop(Iop_64HIto32, 1316 // binop(Iop_MullS32, cc_dep1, cc_dep2)), 1317 // binop(Iop_Sar32, 1318 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) )); 1319 //} 1320 # if 0 1321 if (cc_op->tag == Iex_Const) { 1322 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); 1323 } 1324 # endif 1325 1326 return NULL; 1327 } 1328 1329 /* --------- specialising "x86g_calculate_eflags_all" --------- */ 1330 1331 if (vex_streq(function_name, "x86g_calculate_eflags_all")) { 1332 /* specialise calls to above "calculate_eflags_all" function */ 1333 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */ 1334 vassert(arity == 4); 1335 cc_op = args[0]; 1336 cc_dep1 = args[1]; 1337 /* cc_dep2 = args[2]; */ 1338 /* cc_ndep = args[3]; */ 1339 1340 if (isU32(cc_op, X86G_CC_OP_COPY)) { 1341 /* eflags after COPY are stored in DEP1. */ 1342 return 1343 binop( 1344 Iop_And32, 1345 cc_dep1, 1346 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z 1347 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P) 1348 ); 1349 } 1350 return NULL; 1351 } 1352 1353 # undef unop 1354 # undef binop 1355 # undef mkU32 1356 # undef mkU8 1357 1358 return NULL; 1359 } 1360 1361 1362 /*---------------------------------------------------------------*/ 1363 /*--- Supporting functions for x87 FPU activities. ---*/ 1364 /*---------------------------------------------------------------*/ 1365 1366 static inline Bool host_is_little_endian ( void ) 1367 { 1368 UInt x = 0x76543210; 1369 UChar* p = (UChar*)(&x); 1370 return toBool(*p == 0x10); 1371 } 1372 1373 /* 80 and 64-bit floating point formats: 1374 1375 80-bit: 1376 1377 S 0 0-------0 zero 1378 S 0 0X------X denormals 1379 S 1-7FFE 1X------X normals (all normals have leading 1) 1380 S 7FFF 10------0 infinity 1381 S 7FFF 10X-----X snan 1382 S 7FFF 11X-----X qnan 1383 1384 S is the sign bit. For runs X----X, at least one of the Xs must be 1385 nonzero. Exponent is 15 bits, fractional part is 63 bits, and 1386 there is an explicitly represented leading 1, and a sign bit, 1387 giving 80 in total. 1388 1389 64-bit avoids the confusion of an explicitly represented leading 1 1390 and so is simpler: 1391 1392 S 0 0------0 zero 1393 S 0 X------X denormals 1394 S 1-7FE any normals 1395 S 7FF 0------0 infinity 1396 S 7FF 0X-----X snan 1397 S 7FF 1X-----X qnan 1398 1399 Exponent is 11 bits, fractional part is 52 bits, and there is a 1400 sign bit, giving 64 in total. 1401 */ 1402 1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ 1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl ) 1406 { 1407 Bool mantissaIsZero; 1408 Int bexp; 1409 UChar sign; 1410 UChar* f64; 1411 1412 vassert(host_is_little_endian()); 1413 1414 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ 1415 1416 f64 = (UChar*)(&dbl); 1417 sign = toUChar( (f64[7] >> 7) & 1 ); 1418 1419 /* First off, if the tag indicates the register was empty, 1420 return 1,0,sign,1 */ 1421 if (tag == 0) { 1422 /* vex_printf("Empty\n"); */ 1423 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1) 1424 | X86G_FC_MASK_C0; 1425 } 1426 1427 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 1428 bexp &= 0x7FF; 1429 1430 mantissaIsZero 1431 = toBool( 1432 (f64[6] & 0x0F) == 0 1433 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 1434 ); 1435 1436 /* If both exponent and mantissa are zero, the value is zero. 1437 Return 1,0,sign,0. */ 1438 if (bexp == 0 && mantissaIsZero) { 1439 /* vex_printf("Zero\n"); */ 1440 return X86G_FC_MASK_C3 | 0 1441 | (sign << X86G_FC_SHIFT_C1) | 0; 1442 } 1443 1444 /* If exponent is zero but mantissa isn't, it's a denormal. 1445 Return 1,1,sign,0. */ 1446 if (bexp == 0 && !mantissaIsZero) { 1447 /* vex_printf("Denormal\n"); */ 1448 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2 1449 | (sign << X86G_FC_SHIFT_C1) | 0; 1450 } 1451 1452 /* If the exponent is 7FF and the mantissa is zero, this is an infinity. 1453 Return 0,1,sign,1. */ 1454 if (bexp == 0x7FF && mantissaIsZero) { 1455 /* vex_printf("Inf\n"); */ 1456 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) 1457 | X86G_FC_MASK_C0; 1458 } 1459 1460 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. 1461 Return 0,0,sign,1. */ 1462 if (bexp == 0x7FF && !mantissaIsZero) { 1463 /* vex_printf("NaN\n"); */ 1464 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0; 1465 } 1466 1467 /* Uh, ok, we give up. It must be a normal finite number. 1468 Return 0,1,sign,0. 1469 */ 1470 /* vex_printf("normal\n"); */ 1471 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0; 1472 } 1473 1474 1475 /* CALLED FROM GENERATED CODE */ 1476 /* DIRTY HELPER (reads guest memory) */ 1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU ) 1478 { 1479 ULong f64; 1480 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 ); 1481 return f64; 1482 } 1483 1484 /* CALLED FROM GENERATED CODE */ 1485 /* DIRTY HELPER (writes guest memory) */ 1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 ) 1487 { 1488 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU ); 1489 } 1490 1491 1492 /*----------------------------------------------*/ 1493 /*--- The exported fns .. ---*/ 1494 /*----------------------------------------------*/ 1495 1496 /* Layout of the real x87 state. */ 1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to 1498 g_generic_x87.h */ 1499 1500 1501 /* CLEAN HELPER */ 1502 /* fpucw[15:0] contains a x87 native format FPU control word. 1503 Extract from it the required FPROUND value and any resulting 1504 emulation warning, and return (warn << 32) | fpround value. 1505 */ 1506 ULong x86g_check_fldcw ( UInt fpucw ) 1507 { 1508 /* Decide on a rounding mode. fpucw[11:10] holds it. */ 1509 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1510 UInt rmode = (fpucw >> 10) & 3; 1511 1512 /* Detect any required emulation warnings. */ 1513 VexEmNote ew = EmNote_NONE; 1514 1515 if ((fpucw & 0x3F) != 0x3F) { 1516 /* unmasked exceptions! */ 1517 ew = EmWarn_X86_x87exns; 1518 } 1519 else 1520 if (((fpucw >> 8) & 3) != 3) { 1521 /* unsupported precision */ 1522 ew = EmWarn_X86_x87precision; 1523 } 1524 1525 return (((ULong)ew) << 32) | ((ULong)rmode); 1526 } 1527 1528 /* CLEAN HELPER */ 1529 /* Given fpround as an IRRoundingMode value, create a suitable x87 1530 native format FPU control word. */ 1531 UInt x86g_create_fpucw ( UInt fpround ) 1532 { 1533 fpround &= 3; 1534 return 0x037F | (fpround << 10); 1535 } 1536 1537 1538 /* CLEAN HELPER */ 1539 /* mxcsr[15:0] contains a SSE native format MXCSR value. 1540 Extract from it the required SSEROUND value and any resulting 1541 emulation warning, and return (warn << 32) | sseround value. 1542 */ 1543 ULong x86g_check_ldmxcsr ( UInt mxcsr ) 1544 { 1545 /* Decide on a rounding mode. mxcsr[14:13] holds it. */ 1546 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1547 UInt rmode = (mxcsr >> 13) & 3; 1548 1549 /* Detect any required emulation warnings. */ 1550 VexEmNote ew = EmNote_NONE; 1551 1552 if ((mxcsr & 0x1F80) != 0x1F80) { 1553 /* unmasked exceptions! */ 1554 ew = EmWarn_X86_sseExns; 1555 } 1556 else 1557 if (mxcsr & (1<<15)) { 1558 /* FZ is set */ 1559 ew = EmWarn_X86_fz; 1560 } 1561 else 1562 if (mxcsr & (1<<6)) { 1563 /* DAZ is set */ 1564 ew = EmWarn_X86_daz; 1565 } 1566 1567 return (((ULong)ew) << 32) | ((ULong)rmode); 1568 } 1569 1570 1571 /* CLEAN HELPER */ 1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE 1573 native format MXCSR value. */ 1574 UInt x86g_create_mxcsr ( UInt sseround ) 1575 { 1576 sseround &= 3; 1577 return 0x1F80 | (sseround << 13); 1578 } 1579 1580 1581 /* CALLED FROM GENERATED CODE */ 1582 /* DIRTY HELPER (writes guest state) */ 1583 /* Initialise the x87 FPU state as per 'finit'. */ 1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst ) 1585 { 1586 Int i; 1587 gst->guest_FTOP = 0; 1588 for (i = 0; i < 8; i++) { 1589 gst->guest_FPTAG[i] = 0; /* empty */ 1590 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ 1591 } 1592 gst->guest_FPROUND = (UInt)Irrm_NEAREST; 1593 gst->guest_FC3210 = 0; 1594 } 1595 1596 1597 /* This is used to implement both 'frstor' and 'fldenv'. The latter 1598 appears to differ from the former only in that the 8 FP registers 1599 themselves are not transferred into the guest state. */ 1600 static 1601 VexEmNote do_put_x87 ( Bool moveRegs, 1602 /*IN*/Fpu_State* x87_state, 1603 /*OUT*/VexGuestX86State* vex_state ) 1604 { 1605 Int stno, preg; 1606 UInt tag; 1607 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1608 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1609 UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7; 1610 UInt tagw = x87_state->env[FP_ENV_TAG]; 1611 UInt fpucw = x87_state->env[FP_ENV_CTRL]; 1612 UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700; 1613 VexEmNote ew; 1614 UInt fpround; 1615 ULong pair; 1616 1617 /* Copy registers and tags */ 1618 for (stno = 0; stno < 8; stno++) { 1619 preg = (stno + ftop) & 7; 1620 tag = (tagw >> (2*preg)) & 3; 1621 if (tag == 3) { 1622 /* register is empty */ 1623 /* hmm, if it's empty, does it still get written? Probably 1624 safer to say it does. If we don't, memcheck could get out 1625 of sync, in that it thinks all FP registers are defined by 1626 this helper, but in reality some have not been updated. */ 1627 if (moveRegs) 1628 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 1629 vexTags[preg] = 0; 1630 } else { 1631 /* register is non-empty */ 1632 if (moveRegs) 1633 convert_f80le_to_f64le( &x87_state->reg[10*stno], 1634 (UChar*)&vexRegs[preg] ); 1635 vexTags[preg] = 1; 1636 } 1637 } 1638 1639 /* stack pointer */ 1640 vex_state->guest_FTOP = ftop; 1641 1642 /* status word */ 1643 vex_state->guest_FC3210 = c3210; 1644 1645 /* handle the control word, setting FPROUND and detecting any 1646 emulation warnings. */ 1647 pair = x86g_check_fldcw ( (UInt)fpucw ); 1648 fpround = (UInt)pair; 1649 ew = (VexEmNote)(pair >> 32); 1650 1651 vex_state->guest_FPROUND = fpround & 3; 1652 1653 /* emulation warnings --> caller */ 1654 return ew; 1655 } 1656 1657 1658 /* Create an x87 FPU state from the guest state, as close as 1659 we can approximate it. */ 1660 static 1661 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state, 1662 /*OUT*/Fpu_State* x87_state ) 1663 { 1664 Int i, stno, preg; 1665 UInt tagw; 1666 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1667 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1668 UInt ftop = vex_state->guest_FTOP; 1669 UInt c3210 = vex_state->guest_FC3210; 1670 1671 for (i = 0; i < 14; i++) 1672 x87_state->env[i] = 0; 1673 1674 x87_state->env[1] = x87_state->env[3] = x87_state->env[5] 1675 = x87_state->env[13] = 0xFFFF; 1676 x87_state->env[FP_ENV_STAT] 1677 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 1678 x87_state->env[FP_ENV_CTRL] 1679 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND )); 1680 1681 /* Dump the register stack in ST order. */ 1682 tagw = 0; 1683 for (stno = 0; stno < 8; stno++) { 1684 preg = (stno + ftop) & 7; 1685 if (vexTags[preg] == 0) { 1686 /* register is empty */ 1687 tagw |= (3 << (2*preg)); 1688 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1689 &x87_state->reg[10*stno] ); 1690 } else { 1691 /* register is full. */ 1692 tagw |= (0 << (2*preg)); 1693 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1694 &x87_state->reg[10*stno] ); 1695 } 1696 } 1697 x87_state->env[FP_ENV_TAG] = toUShort(tagw); 1698 } 1699 1700 1701 /* CALLED FROM GENERATED CODE */ 1702 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1703 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr ) 1704 { 1705 /* Somewhat roundabout, but at least it's simple. */ 1706 Fpu_State tmp; 1707 UShort* addrS = (UShort*)addr; 1708 UChar* addrC = (UChar*)addr; 1709 U128* xmm = (U128*)(addr + 160); 1710 UInt mxcsr; 1711 UShort fp_tags; 1712 UInt summary_tags; 1713 Int r, stno; 1714 UShort *srcS, *dstS; 1715 1716 do_get_x87( gst, &tmp ); 1717 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND ); 1718 1719 /* Now build the proper fxsave image from the x87 image we just 1720 made. */ 1721 1722 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ 1723 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ 1724 1725 /* set addrS[2] in an endian-independent way */ 1726 summary_tags = 0; 1727 fp_tags = tmp.env[FP_ENV_TAG]; 1728 for (r = 0; r < 8; r++) { 1729 if ( ((fp_tags >> (2*r)) & 3) != 3 ) 1730 summary_tags |= (1 << r); 1731 } 1732 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ 1733 addrC[5] = 0; /* pad */ 1734 1735 addrS[3] = 0; /* FOP: fpu opcode (bogus) */ 1736 addrS[4] = 0; 1737 addrS[5] = 0; /* FPU IP (bogus) */ 1738 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we 1739 could conceivably dump %CS here) */ 1740 1741 addrS[7] = 0; /* Intel reserved */ 1742 1743 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */ 1744 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */ 1745 addrS[10] = 0; /* segment selector for above operand pointer; %DS 1746 perhaps? */ 1747 addrS[11] = 0; /* Intel reserved */ 1748 1749 addrS[12] = toUShort(mxcsr); /* MXCSR */ 1750 addrS[13] = toUShort(mxcsr >> 16); 1751 1752 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */ 1753 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */ 1754 1755 /* Copy in the FP registers, in ST order. */ 1756 for (stno = 0; stno < 8; stno++) { 1757 srcS = (UShort*)(&tmp.reg[10*stno]); 1758 dstS = (UShort*)(&addrS[16 + 8*stno]); 1759 dstS[0] = srcS[0]; 1760 dstS[1] = srcS[1]; 1761 dstS[2] = srcS[2]; 1762 dstS[3] = srcS[3]; 1763 dstS[4] = srcS[4]; 1764 dstS[5] = 0; 1765 dstS[6] = 0; 1766 dstS[7] = 0; 1767 } 1768 1769 /* That's the first 160 bytes of the image done. Now only %xmm0 1770 .. %xmm7 remain to be copied. If the host is big-endian, these 1771 need to be byte-swapped. */ 1772 vassert(host_is_little_endian()); 1773 1774 # define COPY_U128(_dst,_src) \ 1775 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1776 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1777 while (0) 1778 1779 COPY_U128( xmm[0], gst->guest_XMM0 ); 1780 COPY_U128( xmm[1], gst->guest_XMM1 ); 1781 COPY_U128( xmm[2], gst->guest_XMM2 ); 1782 COPY_U128( xmm[3], gst->guest_XMM3 ); 1783 COPY_U128( xmm[4], gst->guest_XMM4 ); 1784 COPY_U128( xmm[5], gst->guest_XMM5 ); 1785 COPY_U128( xmm[6], gst->guest_XMM6 ); 1786 COPY_U128( xmm[7], gst->guest_XMM7 ); 1787 1788 # undef COPY_U128 1789 } 1790 1791 1792 /* CALLED FROM GENERATED CODE */ 1793 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1794 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr ) 1795 { 1796 Fpu_State tmp; 1797 VexEmNote warnX87 = EmNote_NONE; 1798 VexEmNote warnXMM = EmNote_NONE; 1799 UShort* addrS = (UShort*)addr; 1800 UChar* addrC = (UChar*)addr; 1801 U128* xmm = (U128*)(addr + 160); 1802 UShort fp_tags; 1803 Int r, stno, i; 1804 1805 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need 1806 to be byte-swapped. */ 1807 vassert(host_is_little_endian()); 1808 1809 # define COPY_U128(_dst,_src) \ 1810 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1811 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1812 while (0) 1813 1814 COPY_U128( gst->guest_XMM0, xmm[0] ); 1815 COPY_U128( gst->guest_XMM1, xmm[1] ); 1816 COPY_U128( gst->guest_XMM2, xmm[2] ); 1817 COPY_U128( gst->guest_XMM3, xmm[3] ); 1818 COPY_U128( gst->guest_XMM4, xmm[4] ); 1819 COPY_U128( gst->guest_XMM5, xmm[5] ); 1820 COPY_U128( gst->guest_XMM6, xmm[6] ); 1821 COPY_U128( gst->guest_XMM7, xmm[7] ); 1822 1823 # undef COPY_U128 1824 1825 /* Copy the x87 registers out of the image, into a temporary 1826 Fpu_State struct. */ 1827 1828 /* LLVM on Darwin turns the following loop into a movaps plus a 1829 handful of scalar stores. This would work fine except for the 1830 fact that VEX doesn't keep the stack correctly (16-) aligned for 1831 the call, so it segfaults. Hence, split the loop into two 1832 pieces (and pray LLVM doesn't merely glue them back together) so 1833 it's composed only of scalar stores and so is alignment 1834 insensitive. Of course this is a kludge of the lamest kind -- 1835 VEX should be fixed properly. */ 1836 /* Code that seems to trigger the problem: 1837 for (i = 0; i < 14; i++) tmp.env[i] = 0; */ 1838 for (i = 0; i < 7; i++) tmp.env[i+0] = 0; 1839 __asm__ __volatile__("" ::: "memory"); 1840 for (i = 0; i < 7; i++) tmp.env[i+7] = 0; 1841 1842 for (i = 0; i < 80; i++) tmp.reg[i] = 0; 1843 /* fill in tmp.reg[0..7] */ 1844 for (stno = 0; stno < 8; stno++) { 1845 UShort* dstS = (UShort*)(&tmp.reg[10*stno]); 1846 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); 1847 dstS[0] = srcS[0]; 1848 dstS[1] = srcS[1]; 1849 dstS[2] = srcS[2]; 1850 dstS[3] = srcS[3]; 1851 dstS[4] = srcS[4]; 1852 } 1853 /* fill in tmp.env[0..13] */ 1854 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ 1855 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ 1856 1857 fp_tags = 0; 1858 for (r = 0; r < 8; r++) { 1859 if (addrC[4] & (1<<r)) 1860 fp_tags |= (0 << (2*r)); /* EMPTY */ 1861 else 1862 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ 1863 } 1864 tmp.env[FP_ENV_TAG] = fp_tags; 1865 1866 /* Now write 'tmp' into the guest state. */ 1867 warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst ); 1868 1869 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF) 1870 | ((((UInt)addrS[13]) & 0xFFFF) << 16); 1871 ULong w64 = x86g_check_ldmxcsr( w32 ); 1872 1873 warnXMM = (VexEmNote)(w64 >> 32); 1874 1875 gst->guest_SSEROUND = w64 & 0xFFFFFFFF; 1876 } 1877 1878 /* Prefer an X87 emwarn over an XMM one, if both exist. */ 1879 if (warnX87 != EmNote_NONE) 1880 return warnX87; 1881 else 1882 return warnXMM; 1883 } 1884 1885 1886 /* CALLED FROM GENERATED CODE */ 1887 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1888 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr ) 1889 { 1890 do_get_x87( gst, (Fpu_State*)addr ); 1891 } 1892 1893 /* CALLED FROM GENERATED CODE */ 1894 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1895 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr ) 1896 { 1897 return do_put_x87( True/*regs too*/, (Fpu_State*)addr, gst ); 1898 } 1899 1900 /* CALLED FROM GENERATED CODE */ 1901 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1902 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr ) 1903 { 1904 /* Somewhat roundabout, but at least it's simple. */ 1905 Int i; 1906 UShort* addrP = (UShort*)addr; 1907 Fpu_State tmp; 1908 do_get_x87( gst, &tmp ); 1909 for (i = 0; i < 14; i++) 1910 addrP[i] = tmp.env[i]; 1911 } 1912 1913 /* CALLED FROM GENERATED CODE */ 1914 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1915 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr ) 1916 { 1917 return do_put_x87( False/*don't move regs*/, (Fpu_State*)addr, gst); 1918 } 1919 1920 /* VISIBLE TO LIBVEX CLIENT */ 1921 /* Do x87 save from the supplied VexGuestX86State structure and store the 1922 result at the given address which represents a buffer of at least 108 1923 bytes. */ 1924 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state, 1925 /*OUT*/UChar* x87_state ) 1926 { 1927 do_get_x87 ( vex_state, (Fpu_State*)x87_state ); 1928 } 1929 1930 /* VISIBLE TO LIBVEX CLIENT */ 1931 /* Do x87 restore from the supplied address and store read values to the given 1932 VexGuestX86State structure. */ 1933 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state, 1934 /*MOD*/VexGuestX86State* vex_state ) 1935 { 1936 return do_put_x87 ( True/*moveRegs*/, (Fpu_State*)x87_state, vex_state ); 1937 } 1938 1939 /* VISIBLE TO LIBVEX CLIENT */ 1940 /* Return mxcsr from the supplied VexGuestX86State structure. */ 1941 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state ) 1942 { 1943 return x86g_create_mxcsr ( vex_state->guest_SSEROUND ); 1944 } 1945 1946 /* VISIBLE TO LIBVEX CLIENT */ 1947 /* Modify the given VexGuestX86State structure according to the passed mxcsr 1948 value. */ 1949 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr, 1950 /*MOD*/VexGuestX86State* vex_state) 1951 { 1952 ULong w64 = x86g_check_ldmxcsr( mxcsr ); 1953 vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF; 1954 return (VexEmNote)(w64 >> 32); 1955 } 1956 1957 /*---------------------------------------------------------------*/ 1958 /*--- Misc integer helpers, including rotates and CPUID. ---*/ 1959 /*---------------------------------------------------------------*/ 1960 1961 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1962 /* Calculate both flags and value result for rotate right 1963 through the carry bit. Result in low 32 bits, 1964 new flags (OSZACP) in high 32 bits. 1965 */ 1966 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz ) 1967 { 1968 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf; 1969 1970 switch (sz) { 1971 case 4: 1972 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 1973 of = ((arg >> 31) ^ cf) & 1; 1974 while (tempCOUNT > 0) { 1975 tempcf = arg & 1; 1976 arg = (arg >> 1) | (cf << 31); 1977 cf = tempcf; 1978 tempCOUNT--; 1979 } 1980 break; 1981 case 2: 1982 while (tempCOUNT >= 17) tempCOUNT -= 17; 1983 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 1984 of = ((arg >> 15) ^ cf) & 1; 1985 while (tempCOUNT > 0) { 1986 tempcf = arg & 1; 1987 arg = ((arg >> 1) & 0x7FFF) | (cf << 15); 1988 cf = tempcf; 1989 tempCOUNT--; 1990 } 1991 break; 1992 case 1: 1993 while (tempCOUNT >= 9) tempCOUNT -= 9; 1994 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 1995 of = ((arg >> 7) ^ cf) & 1; 1996 while (tempCOUNT > 0) { 1997 tempcf = arg & 1; 1998 arg = ((arg >> 1) & 0x7F) | (cf << 7); 1999 cf = tempcf; 2000 tempCOUNT--; 2001 } 2002 break; 2003 default: 2004 vpanic("calculate_RCR: invalid size"); 2005 } 2006 2007 cf &= 1; 2008 of &= 1; 2009 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O); 2010 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O); 2011 2012 return (((ULong)eflags_in) << 32) | ((ULong)arg); 2013 } 2014 2015 2016 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2017 /* Calculate both flags and value result for rotate left 2018 through the carry bit. Result in low 32 bits, 2019 new flags (OSZACP) in high 32 bits. 2020 */ 2021 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz ) 2022 { 2023 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf; 2024 2025 switch (sz) { 2026 case 4: 2027 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 2028 while (tempCOUNT > 0) { 2029 tempcf = (arg >> 31) & 1; 2030 arg = (arg << 1) | (cf & 1); 2031 cf = tempcf; 2032 tempCOUNT--; 2033 } 2034 of = ((arg >> 31) ^ cf) & 1; 2035 break; 2036 case 2: 2037 while (tempCOUNT >= 17) tempCOUNT -= 17; 2038 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 2039 while (tempCOUNT > 0) { 2040 tempcf = (arg >> 15) & 1; 2041 arg = 0xFFFF & ((arg << 1) | (cf & 1)); 2042 cf = tempcf; 2043 tempCOUNT--; 2044 } 2045 of = ((arg >> 15) ^ cf) & 1; 2046 break; 2047 case 1: 2048 while (tempCOUNT >= 9) tempCOUNT -= 9; 2049 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 2050 while (tempCOUNT > 0) { 2051 tempcf = (arg >> 7) & 1; 2052 arg = 0xFF & ((arg << 1) | (cf & 1)); 2053 cf = tempcf; 2054 tempCOUNT--; 2055 } 2056 of = ((arg >> 7) ^ cf) & 1; 2057 break; 2058 default: 2059 vpanic("calculate_RCL: invalid size"); 2060 } 2061 2062 cf &= 1; 2063 of &= 1; 2064 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O); 2065 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O); 2066 2067 return (((ULong)eflags_in) << 32) | ((ULong)arg); 2068 } 2069 2070 2071 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2072 /* Calculate both flags and value result for DAA/DAS/AAA/AAS. 2073 AX value in low half of arg, OSZACP in upper half. 2074 See guest-x86/toIR.c usage point for details. 2075 */ 2076 static UInt calc_parity_8bit ( UInt w32 ) { 2077 UInt i; 2078 UInt p = 1; 2079 for (i = 0; i < 8; i++) 2080 p ^= (1 & (w32 >> i)); 2081 return p; 2082 } 2083 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode ) 2084 { 2085 UInt r_AL = (flags_and_AX >> 0) & 0xFF; 2086 UInt r_AH = (flags_and_AX >> 8) & 0xFF; 2087 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1; 2088 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1; 2089 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1; 2090 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1; 2091 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1; 2092 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1; 2093 UInt result = 0; 2094 2095 switch (opcode) { 2096 case 0x27: { /* DAA */ 2097 UInt old_AL = r_AL; 2098 UInt old_C = r_C; 2099 r_C = 0; 2100 if ((r_AL & 0xF) > 9 || r_A == 1) { 2101 r_AL = r_AL + 6; 2102 r_C = old_C; 2103 if (r_AL >= 0x100) r_C = 1; 2104 r_A = 1; 2105 } else { 2106 r_A = 0; 2107 } 2108 if (old_AL > 0x99 || old_C == 1) { 2109 r_AL = r_AL + 0x60; 2110 r_C = 1; 2111 } else { 2112 r_C = 0; 2113 } 2114 /* O is undefined. S Z and P are set according to the 2115 result. */ 2116 r_AL &= 0xFF; 2117 r_O = 0; /* let's say */ 2118 r_S = (r_AL & 0x80) ? 1 : 0; 2119 r_Z = (r_AL == 0) ? 1 : 0; 2120 r_P = calc_parity_8bit( r_AL ); 2121 break; 2122 } 2123 case 0x2F: { /* DAS */ 2124 UInt old_AL = r_AL; 2125 UInt old_C = r_C; 2126 r_C = 0; 2127 if ((r_AL & 0xF) > 9 || r_A == 1) { 2128 Bool borrow = r_AL < 6; 2129 r_AL = r_AL - 6; 2130 r_C = old_C; 2131 if (borrow) r_C = 1; 2132 r_A = 1; 2133 } else { 2134 r_A = 0; 2135 } 2136 if (old_AL > 0x99 || old_C == 1) { 2137 r_AL = r_AL - 0x60; 2138 r_C = 1; 2139 } else { 2140 /* Intel docs are wrong: r_C = 0; */ 2141 } 2142 /* O is undefined. S Z and P are set according to the 2143 result. */ 2144 r_AL &= 0xFF; 2145 r_O = 0; /* let's say */ 2146 r_S = (r_AL & 0x80) ? 1 : 0; 2147 r_Z = (r_AL == 0) ? 1 : 0; 2148 r_P = calc_parity_8bit( r_AL ); 2149 break; 2150 } 2151 case 0x37: { /* AAA */ 2152 Bool nudge = r_AL > 0xF9; 2153 if ((r_AL & 0xF) > 9 || r_A == 1) { 2154 r_AL = r_AL + 6; 2155 r_AH = r_AH + 1 + (nudge ? 1 : 0); 2156 r_A = 1; 2157 r_C = 1; 2158 r_AL = r_AL & 0xF; 2159 } else { 2160 r_A = 0; 2161 r_C = 0; 2162 r_AL = r_AL & 0xF; 2163 } 2164 /* O S Z and P are undefined. */ 2165 r_O = r_S = r_Z = r_P = 0; /* let's say */ 2166 break; 2167 } 2168 case 0x3F: { /* AAS */ 2169 Bool nudge = r_AL < 0x06; 2170 if ((r_AL & 0xF) > 9 || r_A == 1) { 2171 r_AL = r_AL - 6; 2172 r_AH = r_AH - 1 - (nudge ? 1 : 0); 2173 r_A = 1; 2174 r_C = 1; 2175 r_AL = r_AL & 0xF; 2176 } else { 2177 r_A = 0; 2178 r_C = 0; 2179 r_AL = r_AL & 0xF; 2180 } 2181 /* O S Z and P are undefined. */ 2182 r_O = r_S = r_Z = r_P = 0; /* let's say */ 2183 break; 2184 } 2185 default: 2186 vassert(0); 2187 } 2188 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) ) 2189 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) ) 2190 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) ) 2191 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) ) 2192 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) ) 2193 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) ) 2194 | ( (r_AH & 0xFF) << 8 ) 2195 | ( (r_AL & 0xFF) << 0 ); 2196 return result; 2197 } 2198 2199 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode ) 2200 { 2201 UInt r_AL = (flags_and_AX >> 0) & 0xFF; 2202 UInt r_AH = (flags_and_AX >> 8) & 0xFF; 2203 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1; 2204 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1; 2205 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1; 2206 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1; 2207 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1; 2208 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1; 2209 UInt result = 0; 2210 2211 switch (opcode) { 2212 case 0xD4: { /* AAM */ 2213 r_AH = r_AL / 10; 2214 r_AL = r_AL % 10; 2215 break; 2216 } 2217 case 0xD5: { /* AAD */ 2218 r_AL = ((r_AH * 10) + r_AL) & 0xff; 2219 r_AH = 0; 2220 break; 2221 } 2222 default: 2223 vassert(0); 2224 } 2225 2226 r_O = 0; /* let's say (undefined) */ 2227 r_C = 0; /* let's say (undefined) */ 2228 r_A = 0; /* let's say (undefined) */ 2229 r_S = (r_AL & 0x80) ? 1 : 0; 2230 r_Z = (r_AL == 0) ? 1 : 0; 2231 r_P = calc_parity_8bit( r_AL ); 2232 2233 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) ) 2234 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) ) 2235 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) ) 2236 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) ) 2237 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) ) 2238 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) ) 2239 | ( (r_AH & 0xFF) << 8 ) 2240 | ( (r_AL & 0xFF) << 0 ); 2241 return result; 2242 } 2243 2244 2245 /* CALLED FROM GENERATED CODE */ 2246 /* DIRTY HELPER (non-referentially-transparent) */ 2247 /* Horrible hack. On non-x86 platforms, return 1. */ 2248 ULong x86g_dirtyhelper_RDTSC ( void ) 2249 { 2250 # if defined(__i386__) 2251 ULong res; 2252 __asm__ __volatile__("rdtsc" : "=A" (res)); 2253 return res; 2254 # else 2255 return 1ULL; 2256 # endif 2257 } 2258 2259 2260 /* CALLED FROM GENERATED CODE */ 2261 /* DIRTY HELPER (modifies guest state) */ 2262 /* Claim to be a P55C (Intel Pentium/MMX) */ 2263 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st ) 2264 { 2265 switch (st->guest_EAX) { 2266 case 0: 2267 st->guest_EAX = 0x1; 2268 st->guest_EBX = 0x756e6547; 2269 st->guest_ECX = 0x6c65746e; 2270 st->guest_EDX = 0x49656e69; 2271 break; 2272 default: 2273 st->guest_EAX = 0x543; 2274 st->guest_EBX = 0x0; 2275 st->guest_ECX = 0x0; 2276 st->guest_EDX = 0x8001bf; 2277 break; 2278 } 2279 } 2280 2281 /* CALLED FROM GENERATED CODE */ 2282 /* DIRTY HELPER (modifies guest state) */ 2283 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */ 2284 /* But without 3DNow support (weird, but we really don't support it). */ 2285 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st ) 2286 { 2287 switch (st->guest_EAX) { 2288 /* vendor ID */ 2289 case 0: 2290 st->guest_EAX = 0x1; 2291 st->guest_EBX = 0x68747541; 2292 st->guest_ECX = 0x444d4163; 2293 st->guest_EDX = 0x69746e65; 2294 break; 2295 /* feature bits */ 2296 case 1: 2297 st->guest_EAX = 0x621; 2298 st->guest_EBX = 0x0; 2299 st->guest_ECX = 0x0; 2300 st->guest_EDX = 0x183f9ff; 2301 break; 2302 /* Highest Extended Function Supported (0x80000004 brand string) */ 2303 case 0x80000000: 2304 st->guest_EAX = 0x80000004; 2305 st->guest_EBX = 0x68747541; 2306 st->guest_ECX = 0x444d4163; 2307 st->guest_EDX = 0x69746e65; 2308 break; 2309 /* Extended Processor Info and Feature Bits */ 2310 case 0x80000001: 2311 st->guest_EAX = 0x721; 2312 st->guest_EBX = 0x0; 2313 st->guest_ECX = 0x0; 2314 st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */ 2315 break; 2316 /* Processor Brand String "AMD Athlon(tm) Processor" */ 2317 case 0x80000002: 2318 st->guest_EAX = 0x20444d41; 2319 st->guest_EBX = 0x6c687441; 2320 st->guest_ECX = 0x74286e6f; 2321 st->guest_EDX = 0x5020296d; 2322 break; 2323 case 0x80000003: 2324 st->guest_EAX = 0x65636f72; 2325 st->guest_EBX = 0x726f7373; 2326 st->guest_ECX = 0x0; 2327 st->guest_EDX = 0x0; 2328 break; 2329 default: 2330 st->guest_EAX = 0x0; 2331 st->guest_EBX = 0x0; 2332 st->guest_ECX = 0x0; 2333 st->guest_EDX = 0x0; 2334 break; 2335 } 2336 } 2337 2338 /* CALLED FROM GENERATED CODE */ 2339 /* DIRTY HELPER (modifies guest state) */ 2340 /* Claim to be the following SSE1-capable CPU: 2341 vendor_id : GenuineIntel 2342 cpu family : 6 2343 model : 11 2344 model name : Intel(R) Pentium(R) III CPU family 1133MHz 2345 stepping : 1 2346 cpu MHz : 1131.013 2347 cache size : 512 KB 2348 */ 2349 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st ) 2350 { 2351 switch (st->guest_EAX) { 2352 case 0: 2353 st->guest_EAX = 0x00000002; 2354 st->guest_EBX = 0x756e6547; 2355 st->guest_ECX = 0x6c65746e; 2356 st->guest_EDX = 0x49656e69; 2357 break; 2358 case 1: 2359 st->guest_EAX = 0x000006b1; 2360 st->guest_EBX = 0x00000004; 2361 st->guest_ECX = 0x00000000; 2362 st->guest_EDX = 0x0383fbff; 2363 break; 2364 default: 2365 st->guest_EAX = 0x03020101; 2366 st->guest_EBX = 0x00000000; 2367 st->guest_ECX = 0x00000000; 2368 st->guest_EDX = 0x0c040883; 2369 break; 2370 } 2371 } 2372 2373 /* Claim to be the following SSE2-capable CPU: 2374 vendor_id : GenuineIntel 2375 cpu family : 15 2376 model : 2 2377 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz 2378 stepping : 9 2379 microcode : 0x17 2380 cpu MHz : 2992.577 2381 cache size : 512 KB 2382 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 2383 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe 2384 pebs bts cid xtpr 2385 clflush size : 64 2386 cache_alignment : 128 2387 address sizes : 36 bits physical, 32 bits virtual 2388 */ 2389 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st ) 2390 { 2391 switch (st->guest_EAX) { 2392 case 0: 2393 st->guest_EAX = 0x00000002; 2394 st->guest_EBX = 0x756e6547; 2395 st->guest_ECX = 0x6c65746e; 2396 st->guest_EDX = 0x49656e69; 2397 break; 2398 case 1: 2399 st->guest_EAX = 0x00000f29; 2400 st->guest_EBX = 0x01020809; 2401 st->guest_ECX = 0x00004400; 2402 st->guest_EDX = 0xbfebfbff; 2403 break; 2404 default: 2405 st->guest_EAX = 0x03020101; 2406 st->guest_EBX = 0x00000000; 2407 st->guest_ECX = 0x00000000; 2408 st->guest_EDX = 0x0c040883; 2409 break; 2410 } 2411 } 2412 2413 /* Claim to be the following SSSE3-capable CPU (2 x ...): 2414 vendor_id : GenuineIntel 2415 cpu family : 6 2416 model : 15 2417 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz 2418 stepping : 6 2419 cpu MHz : 2394.000 2420 cache size : 4096 KB 2421 physical id : 0 2422 siblings : 2 2423 core id : 0 2424 cpu cores : 2 2425 fpu : yes 2426 fpu_exception : yes 2427 cpuid level : 10 2428 wp : yes 2429 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2430 mtrr pge mca cmov pat pse36 clflush dts acpi 2431 mmx fxsr sse sse2 ss ht tm syscall nx lm 2432 constant_tsc pni monitor ds_cpl vmx est tm2 2433 cx16 xtpr lahf_lm 2434 bogomips : 4798.78 2435 clflush size : 64 2436 cache_alignment : 64 2437 address sizes : 36 bits physical, 48 bits virtual 2438 power management: 2439 */ 2440 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st ) 2441 { 2442 # define SET_ABCD(_a,_b,_c,_d) \ 2443 do { st->guest_EAX = (UInt)(_a); \ 2444 st->guest_EBX = (UInt)(_b); \ 2445 st->guest_ECX = (UInt)(_c); \ 2446 st->guest_EDX = (UInt)(_d); \ 2447 } while (0) 2448 2449 switch (st->guest_EAX) { 2450 case 0x00000000: 2451 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); 2452 break; 2453 case 0x00000001: 2454 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); 2455 break; 2456 case 0x00000002: 2457 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); 2458 break; 2459 case 0x00000003: 2460 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2461 break; 2462 case 0x00000004: { 2463 switch (st->guest_ECX) { 2464 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, 2465 0x0000003f, 0x00000001); break; 2466 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, 2467 0x0000003f, 0x00000001); break; 2468 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, 2469 0x00000fff, 0x00000001); break; 2470 default: SET_ABCD(0x00000000, 0x00000000, 2471 0x00000000, 0x00000000); break; 2472 } 2473 break; 2474 } 2475 case 0x00000005: 2476 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); 2477 break; 2478 case 0x00000006: 2479 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); 2480 break; 2481 case 0x00000007: 2482 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2483 break; 2484 case 0x00000008: 2485 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); 2486 break; 2487 case 0x00000009: 2488 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2489 break; 2490 case 0x0000000a: 2491 unhandled_eax_value: 2492 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); 2493 break; 2494 case 0x80000000: 2495 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2496 break; 2497 case 0x80000001: 2498 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000); 2499 break; 2500 case 0x80000002: 2501 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2502 break; 2503 case 0x80000003: 2504 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); 2505 break; 2506 case 0x80000004: 2507 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); 2508 break; 2509 case 0x80000005: 2510 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2511 break; 2512 case 0x80000006: 2513 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); 2514 break; 2515 case 0x80000007: 2516 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2517 break; 2518 case 0x80000008: 2519 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2520 break; 2521 default: 2522 goto unhandled_eax_value; 2523 } 2524 # undef SET_ABCD 2525 } 2526 2527 2528 /* CALLED FROM GENERATED CODE */ 2529 /* DIRTY HELPER (non-referentially-transparent) */ 2530 /* Horrible hack. On non-x86 platforms, return 0. */ 2531 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ ) 2532 { 2533 # if defined(__i386__) 2534 UInt r = 0; 2535 portno &= 0xFFFF; 2536 switch (sz) { 2537 case 4: 2538 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0" 2539 : "=a" (r) : "Nd" (portno)); 2540 break; 2541 case 2: 2542 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0" 2543 : "=a" (r) : "Nd" (portno)); 2544 break; 2545 case 1: 2546 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0" 2547 : "=a" (r) : "Nd" (portno)); 2548 break; 2549 default: 2550 break; 2551 } 2552 return r; 2553 # else 2554 return 0; 2555 # endif 2556 } 2557 2558 2559 /* CALLED FROM GENERATED CODE */ 2560 /* DIRTY HELPER (non-referentially-transparent) */ 2561 /* Horrible hack. On non-x86 platforms, do nothing. */ 2562 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ ) 2563 { 2564 # if defined(__i386__) 2565 portno &= 0xFFFF; 2566 switch (sz) { 2567 case 4: 2568 __asm__ __volatile__("outl %0, %w1" 2569 : : "a" (data), "Nd" (portno)); 2570 break; 2571 case 2: 2572 __asm__ __volatile__("outw %w0, %w1" 2573 : : "a" (data), "Nd" (portno)); 2574 break; 2575 case 1: 2576 __asm__ __volatile__("outb %b0, %w1" 2577 : : "a" (data), "Nd" (portno)); 2578 break; 2579 default: 2580 break; 2581 } 2582 # else 2583 /* do nothing */ 2584 # endif 2585 } 2586 2587 /* CALLED FROM GENERATED CODE */ 2588 /* DIRTY HELPER (non-referentially-transparent) */ 2589 /* Horrible hack. On non-x86 platforms, do nothing. */ 2590 /* op = 0: call the native SGDT instruction. 2591 op = 1: call the native SIDT instruction. 2592 */ 2593 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) { 2594 # if defined(__i386__) 2595 switch (op) { 2596 case 0: 2597 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); 2598 break; 2599 case 1: 2600 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); 2601 break; 2602 default: 2603 vpanic("x86g_dirtyhelper_SxDT"); 2604 } 2605 # else 2606 /* do nothing */ 2607 UChar* p = (UChar*)address; 2608 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; 2609 # endif 2610 } 2611 2612 /*---------------------------------------------------------------*/ 2613 /*--- Helpers for MMX/SSE/SSE2. ---*/ 2614 /*---------------------------------------------------------------*/ 2615 2616 static inline UChar abdU8 ( UChar xx, UChar yy ) { 2617 return toUChar(xx>yy ? xx-yy : yy-xx); 2618 } 2619 2620 static inline ULong mk32x2 ( UInt w1, UInt w0 ) { 2621 return (((ULong)w1) << 32) | ((ULong)w0); 2622 } 2623 2624 static inline UShort sel16x4_3 ( ULong w64 ) { 2625 UInt hi32 = toUInt(w64 >> 32); 2626 return toUShort(hi32 >> 16); 2627 } 2628 static inline UShort sel16x4_2 ( ULong w64 ) { 2629 UInt hi32 = toUInt(w64 >> 32); 2630 return toUShort(hi32); 2631 } 2632 static inline UShort sel16x4_1 ( ULong w64 ) { 2633 UInt lo32 = toUInt(w64); 2634 return toUShort(lo32 >> 16); 2635 } 2636 static inline UShort sel16x4_0 ( ULong w64 ) { 2637 UInt lo32 = toUInt(w64); 2638 return toUShort(lo32); 2639 } 2640 2641 static inline UChar sel8x8_7 ( ULong w64 ) { 2642 UInt hi32 = toUInt(w64 >> 32); 2643 return toUChar(hi32 >> 24); 2644 } 2645 static inline UChar sel8x8_6 ( ULong w64 ) { 2646 UInt hi32 = toUInt(w64 >> 32); 2647 return toUChar(hi32 >> 16); 2648 } 2649 static inline UChar sel8x8_5 ( ULong w64 ) { 2650 UInt hi32 = toUInt(w64 >> 32); 2651 return toUChar(hi32 >> 8); 2652 } 2653 static inline UChar sel8x8_4 ( ULong w64 ) { 2654 UInt hi32 = toUInt(w64 >> 32); 2655 return toUChar(hi32 >> 0); 2656 } 2657 static inline UChar sel8x8_3 ( ULong w64 ) { 2658 UInt lo32 = toUInt(w64); 2659 return toUChar(lo32 >> 24); 2660 } 2661 static inline UChar sel8x8_2 ( ULong w64 ) { 2662 UInt lo32 = toUInt(w64); 2663 return toUChar(lo32 >> 16); 2664 } 2665 static inline UChar sel8x8_1 ( ULong w64 ) { 2666 UInt lo32 = toUInt(w64); 2667 return toUChar(lo32 >> 8); 2668 } 2669 static inline UChar sel8x8_0 ( ULong w64 ) { 2670 UInt lo32 = toUInt(w64); 2671 return toUChar(lo32 >> 0); 2672 } 2673 2674 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2675 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) 2676 { 2677 return 2678 mk32x2( 2679 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) 2680 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), 2681 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) 2682 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) 2683 ); 2684 } 2685 2686 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2687 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy ) 2688 { 2689 UInt t = 0; 2690 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); 2691 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); 2692 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); 2693 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); 2694 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 2695 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 2696 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 2697 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 2698 t &= 0xFFFF; 2699 return (ULong)t; 2700 } 2701 2702 2703 /*---------------------------------------------------------------*/ 2704 /*--- Helpers for dealing with segment overrides. ---*/ 2705 /*---------------------------------------------------------------*/ 2706 2707 static inline 2708 UInt get_segdescr_base ( VexGuestX86SegDescr* ent ) 2709 { 2710 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow; 2711 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid; 2712 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi; 2713 return (hi << 24) | (mid << 16) | lo; 2714 } 2715 2716 static inline 2717 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent ) 2718 { 2719 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow; 2720 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi; 2721 UInt limit = (hi << 16) | lo; 2722 if (ent->LdtEnt.Bits.Granularity) 2723 limit = (limit << 12) | 0xFFF; 2724 return limit; 2725 } 2726 2727 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2728 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2729 UInt seg_selector, UInt virtual_addr ) 2730 { 2731 UInt tiBit, base, limit; 2732 VexGuestX86SegDescr* the_descrs; 2733 2734 Bool verboze = False; 2735 2736 /* If this isn't true, we're in Big Trouble. */ 2737 vassert(8 == sizeof(VexGuestX86SegDescr)); 2738 2739 if (verboze) 2740 vex_printf("x86h_use_seg_selector: " 2741 "seg_selector = 0x%x, vaddr = 0x%x\n", 2742 seg_selector, virtual_addr); 2743 2744 /* Check for wildly invalid selector. */ 2745 if (seg_selector & ~0xFFFF) 2746 goto bad; 2747 2748 seg_selector &= 0x0000FFFF; 2749 2750 /* Sanity check the segment selector. Ensure that RPL=11b (least 2751 privilege). This forms the bottom 2 bits of the selector. */ 2752 if ((seg_selector & 3) != 3) 2753 goto bad; 2754 2755 /* Extract the TI bit (0 means GDT, 1 means LDT) */ 2756 tiBit = (seg_selector >> 2) & 1; 2757 2758 /* Convert the segment selector onto a table index */ 2759 seg_selector >>= 3; 2760 vassert(seg_selector >= 0 && seg_selector < 8192); 2761 2762 if (tiBit == 0) { 2763 2764 /* GDT access. */ 2765 /* Do we actually have a GDT to look at? */ 2766 if (gdt == 0) 2767 goto bad; 2768 2769 /* Check for access to non-existent entry. */ 2770 if (seg_selector >= VEX_GUEST_X86_GDT_NENT) 2771 goto bad; 2772 2773 the_descrs = (VexGuestX86SegDescr*)gdt; 2774 base = get_segdescr_base (&the_descrs[seg_selector]); 2775 limit = get_segdescr_limit(&the_descrs[seg_selector]); 2776 2777 } else { 2778 2779 /* All the same stuff, except for the LDT. */ 2780 if (ldt == 0) 2781 goto bad; 2782 2783 if (seg_selector >= VEX_GUEST_X86_LDT_NENT) 2784 goto bad; 2785 2786 the_descrs = (VexGuestX86SegDescr*)ldt; 2787 base = get_segdescr_base (&the_descrs[seg_selector]); 2788 limit = get_segdescr_limit(&the_descrs[seg_selector]); 2789 2790 } 2791 2792 /* Do the limit check. Note, this check is just slightly too 2793 slack. Really it should be "if (virtual_addr + size - 1 >= 2794 limit)," but we don't have the size info to hand. Getting it 2795 could be significantly complex. */ 2796 if (virtual_addr >= limit) 2797 goto bad; 2798 2799 if (verboze) 2800 vex_printf("x86h_use_seg_selector: " 2801 "base = 0x%x, addr = 0x%x\n", 2802 base, base + virtual_addr); 2803 2804 /* High 32 bits are zero, indicating success. */ 2805 return (ULong)( ((UInt)virtual_addr) + base ); 2806 2807 bad: 2808 return 1ULL << 32; 2809 } 2810 2811 2812 /*---------------------------------------------------------------*/ 2813 /*--- Helpers for dealing with, and describing, ---*/ 2814 /*--- guest state as a whole. ---*/ 2815 /*---------------------------------------------------------------*/ 2816 2817 /* Initialise the entire x86 guest state. */ 2818 /* VISIBLE TO LIBVEX CLIENT */ 2819 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state ) 2820 { 2821 vex_state->host_EvC_FAILADDR = 0; 2822 vex_state->host_EvC_COUNTER = 0; 2823 2824 vex_state->guest_EAX = 0; 2825 vex_state->guest_ECX = 0; 2826 vex_state->guest_EDX = 0; 2827 vex_state->guest_EBX = 0; 2828 vex_state->guest_ESP = 0; 2829 vex_state->guest_EBP = 0; 2830 vex_state->guest_ESI = 0; 2831 vex_state->guest_EDI = 0; 2832 2833 vex_state->guest_CC_OP = X86G_CC_OP_COPY; 2834 vex_state->guest_CC_DEP1 = 0; 2835 vex_state->guest_CC_DEP2 = 0; 2836 vex_state->guest_CC_NDEP = 0; 2837 vex_state->guest_DFLAG = 1; /* forwards */ 2838 vex_state->guest_IDFLAG = 0; 2839 vex_state->guest_ACFLAG = 0; 2840 2841 vex_state->guest_EIP = 0; 2842 2843 /* Initialise the simulated FPU */ 2844 x86g_dirtyhelper_FINIT( vex_state ); 2845 2846 /* Initialse the SSE state. */ 2847 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0; 2848 2849 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST; 2850 SSEZERO(vex_state->guest_XMM0); 2851 SSEZERO(vex_state->guest_XMM1); 2852 SSEZERO(vex_state->guest_XMM2); 2853 SSEZERO(vex_state->guest_XMM3); 2854 SSEZERO(vex_state->guest_XMM4); 2855 SSEZERO(vex_state->guest_XMM5); 2856 SSEZERO(vex_state->guest_XMM6); 2857 SSEZERO(vex_state->guest_XMM7); 2858 2859 # undef SSEZERO 2860 2861 vex_state->guest_CS = 0; 2862 vex_state->guest_DS = 0; 2863 vex_state->guest_ES = 0; 2864 vex_state->guest_FS = 0; 2865 vex_state->guest_GS = 0; 2866 vex_state->guest_SS = 0; 2867 vex_state->guest_LDT = 0; 2868 vex_state->guest_GDT = 0; 2869 2870 vex_state->guest_EMNOTE = EmNote_NONE; 2871 2872 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */ 2873 vex_state->guest_CMSTART = 0; 2874 vex_state->guest_CMLEN = 0; 2875 2876 vex_state->guest_NRADDR = 0; 2877 vex_state->guest_SC_CLASS = 0; 2878 vex_state->guest_IP_AT_SYSCALL = 0; 2879 2880 vex_state->padding1 = 0; 2881 vex_state->padding2 = 0; 2882 vex_state->padding3 = 0; 2883 } 2884 2885 2886 /* Figure out if any part of the guest state contained in minoff 2887 .. maxoff requires precise memory exceptions. If in doubt return 2888 True (but this generates significantly slower code). 2889 2890 By default we enforce precise exns for guest %ESP, %EBP and %EIP 2891 only. These are the minimum needed to extract correct stack 2892 backtraces from x86 code. 2893 2894 Only %ESP is needed in mode VexRegUpdSpAtMemAccess. 2895 */ 2896 Bool guest_x86_state_requires_precise_mem_exns ( 2897 Int minoff, Int maxoff, VexRegisterUpdates pxControl 2898 ) 2899 { 2900 Int ebp_min = offsetof(VexGuestX86State, guest_EBP); 2901 Int ebp_max = ebp_min + 4 - 1; 2902 Int esp_min = offsetof(VexGuestX86State, guest_ESP); 2903 Int esp_max = esp_min + 4 - 1; 2904 Int eip_min = offsetof(VexGuestX86State, guest_EIP); 2905 Int eip_max = eip_min + 4 - 1; 2906 2907 if (maxoff < esp_min || minoff > esp_max) { 2908 /* no overlap with esp */ 2909 if (pxControl == VexRegUpdSpAtMemAccess) 2910 return False; // We only need to check stack pointer. 2911 } else { 2912 return True; 2913 } 2914 2915 if (maxoff < ebp_min || minoff > ebp_max) { 2916 /* no overlap with ebp */ 2917 } else { 2918 return True; 2919 } 2920 2921 if (maxoff < eip_min || minoff > eip_max) { 2922 /* no overlap with eip */ 2923 } else { 2924 return True; 2925 } 2926 2927 return False; 2928 } 2929 2930 2931 #define ALWAYSDEFD(field) \ 2932 { offsetof(VexGuestX86State, field), \ 2933 (sizeof ((VexGuestX86State*)0)->field) } 2934 2935 VexGuestLayout 2936 x86guest_layout 2937 = { 2938 /* Total size of the guest state, in bytes. */ 2939 .total_sizeB = sizeof(VexGuestX86State), 2940 2941 /* Describe the stack pointer. */ 2942 .offset_SP = offsetof(VexGuestX86State,guest_ESP), 2943 .sizeof_SP = 4, 2944 2945 /* Describe the frame pointer. */ 2946 .offset_FP = offsetof(VexGuestX86State,guest_EBP), 2947 .sizeof_FP = 4, 2948 2949 /* Describe the instruction pointer. */ 2950 .offset_IP = offsetof(VexGuestX86State,guest_EIP), 2951 .sizeof_IP = 4, 2952 2953 /* Describe any sections to be regarded by Memcheck as 2954 'always-defined'. */ 2955 .n_alwaysDefd = 24, 2956 2957 /* flags thunk: OP and NDEP are always defd, whereas DEP1 2958 and DEP2 have to be tracked. See detailed comment in 2959 gdefs.h on meaning of thunk fields. */ 2960 .alwaysDefd 2961 = { /* 0 */ ALWAYSDEFD(guest_CC_OP), 2962 /* 1 */ ALWAYSDEFD(guest_CC_NDEP), 2963 /* 2 */ ALWAYSDEFD(guest_DFLAG), 2964 /* 3 */ ALWAYSDEFD(guest_IDFLAG), 2965 /* 4 */ ALWAYSDEFD(guest_ACFLAG), 2966 /* 5 */ ALWAYSDEFD(guest_EIP), 2967 /* 6 */ ALWAYSDEFD(guest_FTOP), 2968 /* 7 */ ALWAYSDEFD(guest_FPTAG), 2969 /* 8 */ ALWAYSDEFD(guest_FPROUND), 2970 /* 9 */ ALWAYSDEFD(guest_FC3210), 2971 /* 10 */ ALWAYSDEFD(guest_CS), 2972 /* 11 */ ALWAYSDEFD(guest_DS), 2973 /* 12 */ ALWAYSDEFD(guest_ES), 2974 /* 13 */ ALWAYSDEFD(guest_FS), 2975 /* 14 */ ALWAYSDEFD(guest_GS), 2976 /* 15 */ ALWAYSDEFD(guest_SS), 2977 /* 16 */ ALWAYSDEFD(guest_LDT), 2978 /* 17 */ ALWAYSDEFD(guest_GDT), 2979 /* 18 */ ALWAYSDEFD(guest_EMNOTE), 2980 /* 19 */ ALWAYSDEFD(guest_SSEROUND), 2981 /* 20 */ ALWAYSDEFD(guest_CMSTART), 2982 /* 21 */ ALWAYSDEFD(guest_CMLEN), 2983 /* 22 */ ALWAYSDEFD(guest_SC_CLASS), 2984 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) 2985 } 2986 }; 2987 2988 2989 /*---------------------------------------------------------------*/ 2990 /*--- end guest_x86_helpers.c ---*/ 2991 /*---------------------------------------------------------------*/ 2992