1 2 /*---------------------------------------------------------------*/ 3 /*--- begin guest_x86_helpers.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2015 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_emnote.h" 38 #include "libvex_guest_x86.h" 39 #include "libvex_ir.h" 40 #include "libvex.h" 41 42 #include "main_util.h" 43 #include "main_globals.h" 44 #include "guest_generic_bb_to_IR.h" 45 #include "guest_x86_defs.h" 46 #include "guest_generic_x87.h" 47 48 49 /* This file contains helper functions for x86 guest code. 50 Calls to these functions are generated by the back end. 51 These calls are of course in the host machine code and 52 this file will be compiled to host machine code, so that 53 all makes sense. 54 55 Only change the signatures of these helper functions very 56 carefully. If you change the signature here, you'll have to change 57 the parameters passed to it in the IR calls constructed by 58 guest-x86/toIR.c. 59 60 The convention used is that all functions called from generated 61 code are named x86g_<something>, and any function whose name lacks 62 that prefix is not called from generated code. Note that some 63 LibVEX_* functions can however be called by VEX's client, but that 64 is not the same as calling them from VEX-generated code. 65 */ 66 67 68 /* Set to 1 to get detailed profiling info about use of the flag 69 machinery. */ 70 #define PROFILE_EFLAGS 0 71 72 73 /*---------------------------------------------------------------*/ 74 /*--- %eflags run-time helpers. ---*/ 75 /*---------------------------------------------------------------*/ 76 77 static const UChar parity_table[256] = { 78 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 79 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 80 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 81 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 82 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 83 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 84 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 85 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 86 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 87 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 88 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 89 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 90 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 91 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 92 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 93 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 94 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 95 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 96 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 97 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 98 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 99 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 100 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 101 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 102 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 103 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 104 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 105 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 106 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 107 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 108 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, 109 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 110 }; 111 112 /* generalised left-shifter */ 113 inline static Int lshift ( Int x, Int n ) 114 { 115 if (n >= 0) 116 return (UInt)x << n; 117 else 118 return x >> (-n); 119 } 120 121 /* identity on ULong */ 122 static inline ULong idULong ( ULong x ) 123 { 124 return x; 125 } 126 127 128 #define PREAMBLE(__data_bits) \ 129 /* const */ UInt DATA_MASK \ 130 = __data_bits==8 ? 0xFF \ 131 : (__data_bits==16 ? 0xFFFF \ 132 : 0xFFFFFFFF); \ 133 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \ 134 /* const */ UInt CC_DEP1 = cc_dep1_formal; \ 135 /* const */ UInt CC_DEP2 = cc_dep2_formal; \ 136 /* const */ UInt CC_NDEP = cc_ndep_formal; \ 137 /* Four bogus assignments, which hopefully gcc can */ \ 138 /* optimise away, and which stop it complaining about */ \ 139 /* unused variables. */ \ 140 SIGN_MASK = SIGN_MASK; \ 141 DATA_MASK = DATA_MASK; \ 142 CC_DEP2 = CC_DEP2; \ 143 CC_NDEP = CC_NDEP; 144 145 146 /*-------------------------------------------------------------*/ 147 148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ 149 { \ 150 PREAMBLE(DATA_BITS); \ 151 { UInt cf, pf, af, zf, sf, of; \ 152 UInt argL, argR, res; \ 153 argL = CC_DEP1; \ 154 argR = CC_DEP2; \ 155 res = argL + argR; \ 156 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 157 pf = parity_table[(UChar)res]; \ 158 af = (res ^ argL ^ argR) & 0x10; \ 159 zf = ((DATA_UTYPE)res == 0) << 6; \ 160 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 161 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 162 12 - DATA_BITS) & X86G_CC_MASK_O; \ 163 return cf | pf | af | zf | sf | of; \ 164 } \ 165 } 166 167 /*-------------------------------------------------------------*/ 168 169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ 170 { \ 171 PREAMBLE(DATA_BITS); \ 172 { UInt cf, pf, af, zf, sf, of; \ 173 UInt argL, argR, res; \ 174 argL = CC_DEP1; \ 175 argR = CC_DEP2; \ 176 res = argL - argR; \ 177 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 178 pf = parity_table[(UChar)res]; \ 179 af = (res ^ argL ^ argR) & 0x10; \ 180 zf = ((DATA_UTYPE)res == 0) << 6; \ 181 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 182 of = lshift((argL ^ argR) & (argL ^ res), \ 183 12 - DATA_BITS) & X86G_CC_MASK_O; \ 184 return cf | pf | af | zf | sf | of; \ 185 } \ 186 } 187 188 /*-------------------------------------------------------------*/ 189 190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ 191 { \ 192 PREAMBLE(DATA_BITS); \ 193 { UInt cf, pf, af, zf, sf, of; \ 194 UInt argL, argR, oldC, res; \ 195 oldC = CC_NDEP & X86G_CC_MASK_C; \ 196 argL = CC_DEP1; \ 197 argR = CC_DEP2 ^ oldC; \ 198 res = (argL + argR) + oldC; \ 199 if (oldC) \ 200 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ 201 else \ 202 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 203 pf = parity_table[(UChar)res]; \ 204 af = (res ^ argL ^ argR) & 0x10; \ 205 zf = ((DATA_UTYPE)res == 0) << 6; \ 206 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 207 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 208 12 - DATA_BITS) & X86G_CC_MASK_O; \ 209 return cf | pf | af | zf | sf | of; \ 210 } \ 211 } 212 213 /*-------------------------------------------------------------*/ 214 215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ 216 { \ 217 PREAMBLE(DATA_BITS); \ 218 { UInt cf, pf, af, zf, sf, of; \ 219 UInt argL, argR, oldC, res; \ 220 oldC = CC_NDEP & X86G_CC_MASK_C; \ 221 argL = CC_DEP1; \ 222 argR = CC_DEP2 ^ oldC; \ 223 res = (argL - argR) - oldC; \ 224 if (oldC) \ 225 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ 226 else \ 227 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 228 pf = parity_table[(UChar)res]; \ 229 af = (res ^ argL ^ argR) & 0x10; \ 230 zf = ((DATA_UTYPE)res == 0) << 6; \ 231 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 232 of = lshift((argL ^ argR) & (argL ^ res), \ 233 12 - DATA_BITS) & X86G_CC_MASK_O; \ 234 return cf | pf | af | zf | sf | of; \ 235 } \ 236 } 237 238 /*-------------------------------------------------------------*/ 239 240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ 241 { \ 242 PREAMBLE(DATA_BITS); \ 243 { UInt cf, pf, af, zf, sf, of; \ 244 cf = 0; \ 245 pf = parity_table[(UChar)CC_DEP1]; \ 246 af = 0; \ 247 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 248 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 249 of = 0; \ 250 return cf | pf | af | zf | sf | of; \ 251 } \ 252 } 253 254 /*-------------------------------------------------------------*/ 255 256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ 257 { \ 258 PREAMBLE(DATA_BITS); \ 259 { UInt cf, pf, af, zf, sf, of; \ 260 UInt argL, argR, res; \ 261 res = CC_DEP1; \ 262 argL = res - 1; \ 263 argR = 1; \ 264 cf = CC_NDEP & X86G_CC_MASK_C; \ 265 pf = parity_table[(UChar)res]; \ 266 af = (res ^ argL ^ argR) & 0x10; \ 267 zf = ((DATA_UTYPE)res == 0) << 6; \ 268 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 269 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ 270 return cf | pf | af | zf | sf | of; \ 271 } \ 272 } 273 274 /*-------------------------------------------------------------*/ 275 276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ 277 { \ 278 PREAMBLE(DATA_BITS); \ 279 { UInt cf, pf, af, zf, sf, of; \ 280 UInt argL, argR, res; \ 281 res = CC_DEP1; \ 282 argL = res + 1; \ 283 argR = 1; \ 284 cf = CC_NDEP & X86G_CC_MASK_C; \ 285 pf = parity_table[(UChar)res]; \ 286 af = (res ^ argL ^ argR) & 0x10; \ 287 zf = ((DATA_UTYPE)res == 0) << 6; \ 288 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 289 of = ((res & DATA_MASK) \ 290 == ((UInt)SIGN_MASK - 1)) << 11; \ 291 return cf | pf | af | zf | sf | of; \ 292 } \ 293 } 294 295 /*-------------------------------------------------------------*/ 296 297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ 298 { \ 299 PREAMBLE(DATA_BITS); \ 300 { UInt cf, pf, af, zf, sf, of; \ 301 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \ 302 pf = parity_table[(UChar)CC_DEP1]; \ 303 af = 0; /* undefined */ \ 304 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 305 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 306 /* of is defined if shift count == 1 */ \ 307 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 308 & X86G_CC_MASK_O; \ 309 return cf | pf | af | zf | sf | of; \ 310 } \ 311 } 312 313 /*-------------------------------------------------------------*/ 314 315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ 316 { \ 317 PREAMBLE(DATA_BITS); \ 318 { UInt cf, pf, af, zf, sf, of; \ 319 cf = CC_DEP2 & 1; \ 320 pf = parity_table[(UChar)CC_DEP1]; \ 321 af = 0; /* undefined */ \ 322 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 323 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 324 /* of is defined if shift count == 1 */ \ 325 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 326 & X86G_CC_MASK_O; \ 327 return cf | pf | af | zf | sf | of; \ 328 } \ 329 } 330 331 /*-------------------------------------------------------------*/ 332 333 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ 334 /* DEP1 = result, NDEP = old flags */ 335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ 336 { \ 337 PREAMBLE(DATA_BITS); \ 338 { UInt fl \ 339 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \ 340 | (X86G_CC_MASK_C & CC_DEP1) \ 341 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \ 342 11-(DATA_BITS-1)) \ 343 ^ lshift(CC_DEP1, 11))); \ 344 return fl; \ 345 } \ 346 } 347 348 /*-------------------------------------------------------------*/ 349 350 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ 351 /* DEP1 = result, NDEP = old flags */ 352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ 353 { \ 354 PREAMBLE(DATA_BITS); \ 355 { UInt fl \ 356 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \ 357 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ 358 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \ 359 11-(DATA_BITS-1)) \ 360 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ 361 return fl; \ 362 } \ 363 } 364 365 /*-------------------------------------------------------------*/ 366 367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ 368 DATA_U2TYPE, NARROWto2U) \ 369 { \ 370 PREAMBLE(DATA_BITS); \ 371 { UInt cf, pf, af, zf, sf, of; \ 372 DATA_UTYPE hi; \ 373 DATA_UTYPE lo \ 374 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ 375 * ((DATA_UTYPE)CC_DEP2) ); \ 376 DATA_U2TYPE rr \ 377 = NARROWto2U( \ 378 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ 379 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ 380 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ 381 cf = (hi != 0); \ 382 pf = parity_table[(UChar)lo]; \ 383 af = 0; /* undefined */ \ 384 zf = (lo == 0) << 6; \ 385 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 386 of = cf << 11; \ 387 return cf | pf | af | zf | sf | of; \ 388 } \ 389 } 390 391 /*-------------------------------------------------------------*/ 392 393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ 394 DATA_S2TYPE, NARROWto2S) \ 395 { \ 396 PREAMBLE(DATA_BITS); \ 397 { UInt cf, pf, af, zf, sf, of; \ 398 DATA_STYPE hi; \ 399 DATA_STYPE lo \ 400 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \ 401 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \ 402 DATA_S2TYPE rr \ 403 = NARROWto2S( \ 404 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ 405 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ 406 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ 407 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ 408 pf = parity_table[(UChar)lo]; \ 409 af = 0; /* undefined */ \ 410 zf = (lo == 0) << 6; \ 411 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 412 of = cf << 11; \ 413 return cf | pf | af | zf | sf | of; \ 414 } \ 415 } 416 417 418 #if PROFILE_EFLAGS 419 420 static Bool initted = False; 421 422 /* C flag, fast route */ 423 static UInt tabc_fast[X86G_CC_OP_NUMBER]; 424 /* C flag, slow route */ 425 static UInt tabc_slow[X86G_CC_OP_NUMBER]; 426 /* table for calculate_cond */ 427 static UInt tab_cond[X86G_CC_OP_NUMBER][16]; 428 /* total entry counts for calc_all, calc_c, calc_cond. */ 429 static UInt n_calc_all = 0; 430 static UInt n_calc_c = 0; 431 static UInt n_calc_cond = 0; 432 433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) 434 435 436 static void showCounts ( void ) 437 { 438 Int op, co; 439 HChar ch; 440 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", 441 n_calc_all, n_calc_cond, n_calc_c); 442 443 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" 444 " S NS P NP L NL LE NLE\n"); 445 vex_printf(" -----------------------------------------------------" 446 "----------------------------------------\n"); 447 for (op = 0; op < X86G_CC_OP_NUMBER; op++) { 448 449 ch = ' '; 450 if (op > 0 && (op-1) % 3 == 0) 451 ch = 'B'; 452 if (op > 0 && (op-1) % 3 == 1) 453 ch = 'W'; 454 if (op > 0 && (op-1) % 3 == 2) 455 ch = 'L'; 456 457 vex_printf("%2d%c: ", op, ch); 458 vex_printf("%6u ", tabc_slow[op]); 459 vex_printf("%6u ", tabc_fast[op]); 460 for (co = 0; co < 16; co++) { 461 Int n = tab_cond[op][co]; 462 if (n >= 1000) { 463 vex_printf(" %3dK", n / 1000); 464 } else 465 if (n >= 0) { 466 vex_printf(" %3d ", n ); 467 } else { 468 vex_printf(" "); 469 } 470 } 471 vex_printf("\n"); 472 } 473 vex_printf("\n"); 474 } 475 476 static void initCounts ( void ) 477 { 478 Int op, co; 479 initted = True; 480 for (op = 0; op < X86G_CC_OP_NUMBER; op++) { 481 tabc_fast[op] = tabc_slow[op] = 0; 482 for (co = 0; co < 16; co++) 483 tab_cond[op][co] = 0; 484 } 485 } 486 487 #endif /* PROFILE_EFLAGS */ 488 489 490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 491 /* Calculate all the 6 flags from the supplied thunk parameters. 492 Worker function, not directly called from generated code. */ 493 static 494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op, 495 UInt cc_dep1_formal, 496 UInt cc_dep2_formal, 497 UInt cc_ndep_formal ) 498 { 499 switch (cc_op) { 500 case X86G_CC_OP_COPY: 501 return cc_dep1_formal 502 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z 503 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P); 504 505 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); 506 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); 507 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); 508 509 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); 510 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); 511 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); 512 513 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); 514 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); 515 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); 516 517 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); 518 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); 519 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); 520 521 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); 522 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); 523 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); 524 525 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); 526 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); 527 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); 528 529 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); 530 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); 531 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); 532 533 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); 534 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); 535 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); 536 537 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); 538 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); 539 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); 540 541 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); 542 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); 543 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); 544 545 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); 546 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); 547 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); 548 549 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, 550 UShort, toUShort ); 551 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, 552 UInt, toUInt ); 553 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, 554 ULong, idULong ); 555 556 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, 557 Short, toUShort ); 558 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, 559 Int, toUInt ); 560 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, 561 Long, idULong ); 562 563 default: 564 /* shouldn't really make these calls from generated code */ 565 vex_printf("x86g_calculate_eflags_all_WRK(X86)" 566 "( %u, 0x%x, 0x%x, 0x%x )\n", 567 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); 568 vpanic("x86g_calculate_eflags_all_WRK(X86)"); 569 } 570 } 571 572 573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 574 /* Calculate all the 6 flags from the supplied thunk parameters. */ 575 UInt x86g_calculate_eflags_all ( UInt cc_op, 576 UInt cc_dep1, 577 UInt cc_dep2, 578 UInt cc_ndep ) 579 { 580 # if PROFILE_EFLAGS 581 if (!initted) initCounts(); 582 n_calc_all++; 583 if (SHOW_COUNTS_NOW) showCounts(); 584 # endif 585 return 586 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); 587 } 588 589 590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 591 /* Calculate just the carry flag from the supplied thunk parameters. */ 592 VEX_REGPARM(3) 593 UInt x86g_calculate_eflags_c ( UInt cc_op, 594 UInt cc_dep1, 595 UInt cc_dep2, 596 UInt cc_ndep ) 597 { 598 # if PROFILE_EFLAGS 599 if (!initted) initCounts(); 600 n_calc_c++; 601 tabc_fast[cc_op]++; 602 if (SHOW_COUNTS_NOW) showCounts(); 603 # endif 604 605 /* Fast-case some common ones. */ 606 switch (cc_op) { 607 case X86G_CC_OP_LOGICL: 608 case X86G_CC_OP_LOGICW: 609 case X86G_CC_OP_LOGICB: 610 return 0; 611 case X86G_CC_OP_SUBL: 612 return ((UInt)cc_dep1) < ((UInt)cc_dep2) 613 ? X86G_CC_MASK_C : 0; 614 case X86G_CC_OP_SUBW: 615 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) 616 ? X86G_CC_MASK_C : 0; 617 case X86G_CC_OP_SUBB: 618 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) 619 ? X86G_CC_MASK_C : 0; 620 case X86G_CC_OP_INCL: 621 case X86G_CC_OP_DECL: 622 return cc_ndep & X86G_CC_MASK_C; 623 default: 624 break; 625 } 626 627 # if PROFILE_EFLAGS 628 tabc_fast[cc_op]--; 629 tabc_slow[cc_op]++; 630 # endif 631 632 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) 633 & X86G_CC_MASK_C; 634 } 635 636 637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 638 /* returns 1 or 0 */ 639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond, 640 UInt cc_op, 641 UInt cc_dep1, 642 UInt cc_dep2, 643 UInt cc_ndep ) 644 { 645 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1, 646 cc_dep2, cc_ndep); 647 UInt of,sf,zf,cf,pf; 648 UInt inv = cond & 1; 649 650 # if PROFILE_EFLAGS 651 if (!initted) initCounts(); 652 tab_cond[cc_op][cond]++; 653 n_calc_cond++; 654 if (SHOW_COUNTS_NOW) showCounts(); 655 # endif 656 657 switch (cond) { 658 case X86CondNO: 659 case X86CondO: /* OF == 1 */ 660 of = eflags >> X86G_CC_SHIFT_O; 661 return 1 & (inv ^ of); 662 663 case X86CondNZ: 664 case X86CondZ: /* ZF == 1 */ 665 zf = eflags >> X86G_CC_SHIFT_Z; 666 return 1 & (inv ^ zf); 667 668 case X86CondNB: 669 case X86CondB: /* CF == 1 */ 670 cf = eflags >> X86G_CC_SHIFT_C; 671 return 1 & (inv ^ cf); 672 break; 673 674 case X86CondNBE: 675 case X86CondBE: /* (CF or ZF) == 1 */ 676 cf = eflags >> X86G_CC_SHIFT_C; 677 zf = eflags >> X86G_CC_SHIFT_Z; 678 return 1 & (inv ^ (cf | zf)); 679 break; 680 681 case X86CondNS: 682 case X86CondS: /* SF == 1 */ 683 sf = eflags >> X86G_CC_SHIFT_S; 684 return 1 & (inv ^ sf); 685 686 case X86CondNP: 687 case X86CondP: /* PF == 1 */ 688 pf = eflags >> X86G_CC_SHIFT_P; 689 return 1 & (inv ^ pf); 690 691 case X86CondNL: 692 case X86CondL: /* (SF xor OF) == 1 */ 693 sf = eflags >> X86G_CC_SHIFT_S; 694 of = eflags >> X86G_CC_SHIFT_O; 695 return 1 & (inv ^ (sf ^ of)); 696 break; 697 698 case X86CondNLE: 699 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */ 700 sf = eflags >> X86G_CC_SHIFT_S; 701 of = eflags >> X86G_CC_SHIFT_O; 702 zf = eflags >> X86G_CC_SHIFT_Z; 703 return 1 & (inv ^ ((sf ^ of) | zf)); 704 break; 705 706 default: 707 /* shouldn't really make these calls from generated code */ 708 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n", 709 cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); 710 vpanic("x86g_calculate_condition"); 711 } 712 } 713 714 715 /* VISIBLE TO LIBVEX CLIENT */ 716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state ) 717 { 718 UInt eflags = x86g_calculate_eflags_all_WRK( 719 vex_state->guest_CC_OP, 720 vex_state->guest_CC_DEP1, 721 vex_state->guest_CC_DEP2, 722 vex_state->guest_CC_NDEP 723 ); 724 UInt dflag = vex_state->guest_DFLAG; 725 vassert(dflag == 1 || dflag == 0xFFFFFFFF); 726 if (dflag == 0xFFFFFFFF) 727 eflags |= X86G_CC_MASK_D; 728 if (vex_state->guest_IDFLAG == 1) 729 eflags |= X86G_CC_MASK_ID; 730 if (vex_state->guest_ACFLAG == 1) 731 eflags |= X86G_CC_MASK_AC; 732 733 return eflags; 734 } 735 736 /* VISIBLE TO LIBVEX CLIENT */ 737 void 738 LibVEX_GuestX86_put_eflags ( UInt eflags, 739 /*MOD*/VexGuestX86State* vex_state ) 740 { 741 /* D flag */ 742 if (eflags & X86G_CC_MASK_D) { 743 vex_state->guest_DFLAG = 0xFFFFFFFF; 744 eflags &= ~X86G_CC_MASK_D; 745 } 746 else 747 vex_state->guest_DFLAG = 1; 748 749 /* ID flag */ 750 if (eflags & X86G_CC_MASK_ID) { 751 vex_state->guest_IDFLAG = 1; 752 eflags &= ~X86G_CC_MASK_ID; 753 } 754 else 755 vex_state->guest_IDFLAG = 0; 756 757 /* AC flag */ 758 if (eflags & X86G_CC_MASK_AC) { 759 vex_state->guest_ACFLAG = 1; 760 eflags &= ~X86G_CC_MASK_AC; 761 } 762 else 763 vex_state->guest_ACFLAG = 0; 764 765 UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z | 766 X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P; 767 vex_state->guest_CC_OP = X86G_CC_OP_COPY; 768 vex_state->guest_CC_DEP1 = eflags & cc_mask; 769 vex_state->guest_CC_DEP2 = 0; 770 vex_state->guest_CC_NDEP = 0; 771 } 772 773 /* VISIBLE TO LIBVEX CLIENT */ 774 void 775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag, 776 /*MOD*/VexGuestX86State* vex_state ) 777 { 778 UInt oszacp = x86g_calculate_eflags_all_WRK( 779 vex_state->guest_CC_OP, 780 vex_state->guest_CC_DEP1, 781 vex_state->guest_CC_DEP2, 782 vex_state->guest_CC_NDEP 783 ); 784 if (new_carry_flag & 1) { 785 oszacp |= X86G_CC_MASK_C; 786 } else { 787 oszacp &= ~X86G_CC_MASK_C; 788 } 789 vex_state->guest_CC_OP = X86G_CC_OP_COPY; 790 vex_state->guest_CC_DEP1 = oszacp; 791 vex_state->guest_CC_DEP2 = 0; 792 vex_state->guest_CC_NDEP = 0; 793 } 794 795 796 /*---------------------------------------------------------------*/ 797 /*--- %eflags translation-time function specialisers. ---*/ 798 /*--- These help iropt specialise calls the above run-time ---*/ 799 /*--- %eflags functions. ---*/ 800 /*---------------------------------------------------------------*/ 801 802 /* Used by the optimiser to try specialisations. Returns an 803 equivalent expression, or NULL if none. */ 804 805 static inline Bool isU32 ( IRExpr* e, UInt n ) 806 { 807 return 808 toBool( e->tag == Iex_Const 809 && e->Iex.Const.con->tag == Ico_U32 810 && e->Iex.Const.con->Ico.U32 == n ); 811 } 812 813 IRExpr* guest_x86_spechelper ( const HChar* function_name, 814 IRExpr** args, 815 IRStmt** precedingStmts, 816 Int n_precedingStmts ) 817 { 818 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) 819 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) 820 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 821 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 822 823 Int i, arity = 0; 824 for (i = 0; args[i]; i++) 825 arity++; 826 # if 0 827 vex_printf("spec request:\n"); 828 vex_printf(" %s ", function_name); 829 for (i = 0; i < arity; i++) { 830 vex_printf(" "); 831 ppIRExpr(args[i]); 832 } 833 vex_printf("\n"); 834 # endif 835 836 /* --------- specialising "x86g_calculate_condition" --------- */ 837 838 if (vex_streq(function_name, "x86g_calculate_condition")) { 839 /* specialise calls to above "calculate condition" function */ 840 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; 841 vassert(arity == 5); 842 cond = args[0]; 843 cc_op = args[1]; 844 cc_dep1 = args[2]; 845 cc_dep2 = args[3]; 846 847 /*---------------- ADDL ----------------*/ 848 849 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) { 850 /* long add, then Z --> test (dst+src == 0) */ 851 return unop(Iop_1Uto32, 852 binop(Iop_CmpEQ32, 853 binop(Iop_Add32, cc_dep1, cc_dep2), 854 mkU32(0))); 855 } 856 857 /*---------------- SUBL ----------------*/ 858 859 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) { 860 /* long sub/cmp, then Z --> test dst==src */ 861 return unop(Iop_1Uto32, 862 binop(Iop_CmpEQ32, cc_dep1, cc_dep2)); 863 } 864 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) { 865 /* long sub/cmp, then NZ --> test dst!=src */ 866 return unop(Iop_1Uto32, 867 binop(Iop_CmpNE32, cc_dep1, cc_dep2)); 868 } 869 870 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) { 871 /* long sub/cmp, then L (signed less than) 872 --> test dst <s src */ 873 return unop(Iop_1Uto32, 874 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)); 875 } 876 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) { 877 /* long sub/cmp, then NL (signed greater than or equal) 878 --> test !(dst <s src) */ 879 return binop(Iop_Xor32, 880 unop(Iop_1Uto32, 881 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)), 882 mkU32(1)); 883 } 884 885 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) { 886 /* long sub/cmp, then LE (signed less than or equal) 887 --> test dst <=s src */ 888 return unop(Iop_1Uto32, 889 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)); 890 } 891 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) { 892 /* long sub/cmp, then NLE (signed not less than or equal) 893 --> test dst >s src 894 --> test !(dst <=s src) */ 895 return binop(Iop_Xor32, 896 unop(Iop_1Uto32, 897 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)), 898 mkU32(1)); 899 } 900 901 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) { 902 /* long sub/cmp, then BE (unsigned less than or equal) 903 --> test dst <=u src */ 904 return unop(Iop_1Uto32, 905 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)); 906 } 907 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) { 908 /* long sub/cmp, then BE (unsigned greater than) 909 --> test !(dst <=u src) */ 910 return binop(Iop_Xor32, 911 unop(Iop_1Uto32, 912 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)), 913 mkU32(1)); 914 } 915 916 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) { 917 /* long sub/cmp, then B (unsigned less than) 918 --> test dst <u src */ 919 return unop(Iop_1Uto32, 920 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)); 921 } 922 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) { 923 /* long sub/cmp, then NB (unsigned greater than or equal) 924 --> test !(dst <u src) */ 925 return binop(Iop_Xor32, 926 unop(Iop_1Uto32, 927 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)), 928 mkU32(1)); 929 } 930 931 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) { 932 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */ 933 return unop(Iop_1Uto32, 934 binop(Iop_CmpLT32S, 935 binop(Iop_Sub32, cc_dep1, cc_dep2), 936 mkU32(0))); 937 } 938 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) { 939 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */ 940 return binop(Iop_Xor32, 941 unop(Iop_1Uto32, 942 binop(Iop_CmpLT32S, 943 binop(Iop_Sub32, cc_dep1, cc_dep2), 944 mkU32(0))), 945 mkU32(1)); 946 } 947 948 /*---------------- SUBW ----------------*/ 949 950 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) { 951 /* word sub/cmp, then Z --> test dst==src */ 952 return unop(Iop_1Uto32, 953 binop(Iop_CmpEQ16, 954 unop(Iop_32to16,cc_dep1), 955 unop(Iop_32to16,cc_dep2))); 956 } 957 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) { 958 /* word sub/cmp, then NZ --> test dst!=src */ 959 return unop(Iop_1Uto32, 960 binop(Iop_CmpNE16, 961 unop(Iop_32to16,cc_dep1), 962 unop(Iop_32to16,cc_dep2))); 963 } 964 965 /*---------------- SUBB ----------------*/ 966 967 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) { 968 /* byte sub/cmp, then Z --> test dst==src */ 969 return unop(Iop_1Uto32, 970 binop(Iop_CmpEQ8, 971 unop(Iop_32to8,cc_dep1), 972 unop(Iop_32to8,cc_dep2))); 973 } 974 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) { 975 /* byte sub/cmp, then NZ --> test dst!=src */ 976 return unop(Iop_1Uto32, 977 binop(Iop_CmpNE8, 978 unop(Iop_32to8,cc_dep1), 979 unop(Iop_32to8,cc_dep2))); 980 } 981 982 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) { 983 /* byte sub/cmp, then NBE (unsigned greater than) 984 --> test src <u dst */ 985 /* Note, args are opposite way round from the usual */ 986 return unop(Iop_1Uto32, 987 binop(Iop_CmpLT32U, 988 binop(Iop_And32,cc_dep2,mkU32(0xFF)), 989 binop(Iop_And32,cc_dep1,mkU32(0xFF)))); 990 } 991 992 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS) 993 && isU32(cc_dep2, 0)) { 994 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) 995 --> test dst <s 0 996 --> (UInt)dst[7] 997 This is yet another scheme by which gcc figures out if the 998 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ 999 /* Note: isU32(cc_dep2, 0) is correct, even though this is 1000 for an 8-bit comparison, since the args to the helper 1001 function are always U32s. */ 1002 return binop(Iop_And32, 1003 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1004 mkU32(1)); 1005 } 1006 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS) 1007 && isU32(cc_dep2, 0)) { 1008 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) 1009 --> test !(dst <s 0) 1010 --> (UInt) !dst[7] 1011 */ 1012 return binop(Iop_Xor32, 1013 binop(Iop_And32, 1014 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1015 mkU32(1)), 1016 mkU32(1)); 1017 } 1018 1019 /*---------------- LOGICL ----------------*/ 1020 1021 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) { 1022 /* long and/or/xor, then Z --> test dst==0 */ 1023 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1024 } 1025 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) { 1026 /* long and/or/xor, then NZ --> test dst!=0 */ 1027 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0))); 1028 } 1029 1030 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) { 1031 /* long and/or/xor, then LE 1032 This is pretty subtle. LOGIC sets SF and ZF according to the 1033 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but 1034 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff 1035 the result is <=signed 0. Hence ... 1036 */ 1037 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0))); 1038 } 1039 1040 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) { 1041 /* long and/or/xor, then BE 1042 LOGIC sets ZF according to the result and makes CF be zero. 1043 BE computes (CF | ZF), but CF is zero, so this reduces ZF 1044 -- which will be 1 iff the result is zero. Hence ... 1045 */ 1046 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1047 } 1048 1049 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) { 1050 /* see comment below for (LOGICB, CondS) */ 1051 /* long and/or/xor, then S --> (UInt)result[31] */ 1052 return binop(Iop_And32, 1053 binop(Iop_Shr32,cc_dep1,mkU8(31)), 1054 mkU32(1)); 1055 } 1056 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) { 1057 /* see comment below for (LOGICB, CondNS) */ 1058 /* long and/or/xor, then S --> (UInt) ~ result[31] */ 1059 return binop(Iop_Xor32, 1060 binop(Iop_And32, 1061 binop(Iop_Shr32,cc_dep1,mkU8(31)), 1062 mkU32(1)), 1063 mkU32(1)); 1064 } 1065 1066 /*---------------- LOGICW ----------------*/ 1067 1068 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) { 1069 /* word and/or/xor, then Z --> test dst==0 */ 1070 return unop(Iop_1Uto32, 1071 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)), 1072 mkU32(0))); 1073 } 1074 1075 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) { 1076 /* see comment below for (LOGICB, CondS) */ 1077 /* word and/or/xor, then S --> (UInt)result[15] */ 1078 return binop(Iop_And32, 1079 binop(Iop_Shr32,cc_dep1,mkU8(15)), 1080 mkU32(1)); 1081 } 1082 1083 /*---------------- LOGICB ----------------*/ 1084 1085 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) { 1086 /* byte and/or/xor, then Z --> test dst==0 */ 1087 return unop(Iop_1Uto32, 1088 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)), 1089 mkU32(0))); 1090 } 1091 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) { 1092 /* byte and/or/xor, then Z --> test dst!=0 */ 1093 /* b9ac9: 84 c0 test %al,%al 1094 b9acb: 75 0d jne b9ada */ 1095 return unop(Iop_1Uto32, 1096 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)), 1097 mkU32(0))); 1098 } 1099 1100 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) { 1101 /* this is an idiom gcc sometimes uses to find out if the top 1102 bit of a byte register is set: eg testb %al,%al; js .. 1103 Since it just depends on the top bit of the byte, extract 1104 that bit and explicitly get rid of all the rest. This 1105 helps memcheck avoid false positives in the case where any 1106 of the other bits in the byte are undefined. */ 1107 /* byte and/or/xor, then S --> (UInt)result[7] */ 1108 return binop(Iop_And32, 1109 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1110 mkU32(1)); 1111 } 1112 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) { 1113 /* ditto, for negation-of-S. */ 1114 /* byte and/or/xor, then S --> (UInt) ~ result[7] */ 1115 return binop(Iop_Xor32, 1116 binop(Iop_And32, 1117 binop(Iop_Shr32,cc_dep1,mkU8(7)), 1118 mkU32(1)), 1119 mkU32(1)); 1120 } 1121 1122 /*---------------- DECL ----------------*/ 1123 1124 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) { 1125 /* dec L, then Z --> test dst == 0 */ 1126 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1127 } 1128 1129 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) { 1130 /* dec L, then S --> compare DST <s 0 */ 1131 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0))); 1132 } 1133 1134 /*---------------- DECW ----------------*/ 1135 1136 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) { 1137 /* dec W, then Z --> test dst == 0 */ 1138 return unop(Iop_1Uto32, 1139 binop(Iop_CmpEQ32, 1140 binop(Iop_Shl32,cc_dep1,mkU8(16)), 1141 mkU32(0))); 1142 } 1143 1144 /*---------------- INCW ----------------*/ 1145 1146 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) { 1147 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */ 1148 /* inc W, then Z --> test dst == 0 */ 1149 return unop(Iop_1Uto32, 1150 binop(Iop_CmpEQ32, 1151 binop(Iop_Shl32,cc_dep1,mkU8(16)), 1152 mkU32(0))); 1153 } 1154 1155 /*---------------- SHRL ----------------*/ 1156 1157 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) { 1158 /* SHRL, then Z --> test dep1 == 0 */ 1159 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); 1160 } 1161 1162 /*---------------- COPY ----------------*/ 1163 /* This can happen, as a result of x87 FP compares: "fcom ... ; 1164 fnstsw %ax ; sahf ; jbe" for example. */ 1165 1166 if (isU32(cc_op, X86G_CC_OP_COPY) && 1167 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) { 1168 /* COPY, then BE --> extract C and Z from dep1, and test 1169 (C or Z) == 1. */ 1170 /* COPY, then NBE --> extract C and Z from dep1, and test 1171 (C or Z) == 0. */ 1172 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0; 1173 return 1174 unop( 1175 Iop_1Uto32, 1176 binop( 1177 Iop_CmpEQ32, 1178 binop( 1179 Iop_And32, 1180 binop( 1181 Iop_Or32, 1182 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), 1183 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)) 1184 ), 1185 mkU32(1) 1186 ), 1187 mkU32(nnn) 1188 ) 1189 ); 1190 } 1191 1192 if (isU32(cc_op, X86G_CC_OP_COPY) 1193 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) { 1194 /* COPY, then B --> extract C from dep1, and test (C == 1). */ 1195 /* COPY, then NB --> extract C from dep1, and test (C == 0). */ 1196 UInt nnn = isU32(cond, X86CondB) ? 1 : 0; 1197 return 1198 unop( 1199 Iop_1Uto32, 1200 binop( 1201 Iop_CmpEQ32, 1202 binop( 1203 Iop_And32, 1204 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), 1205 mkU32(1) 1206 ), 1207 mkU32(nnn) 1208 ) 1209 ); 1210 } 1211 1212 if (isU32(cc_op, X86G_CC_OP_COPY) 1213 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) { 1214 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ 1215 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ 1216 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0; 1217 return 1218 unop( 1219 Iop_1Uto32, 1220 binop( 1221 Iop_CmpEQ32, 1222 binop( 1223 Iop_And32, 1224 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)), 1225 mkU32(1) 1226 ), 1227 mkU32(nnn) 1228 ) 1229 ); 1230 } 1231 1232 if (isU32(cc_op, X86G_CC_OP_COPY) 1233 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) { 1234 /* COPY, then P --> extract P from dep1, and test (P == 1). */ 1235 /* COPY, then NP --> extract P from dep1, and test (P == 0). */ 1236 UInt nnn = isU32(cond, X86CondP) ? 1 : 0; 1237 return 1238 unop( 1239 Iop_1Uto32, 1240 binop( 1241 Iop_CmpEQ32, 1242 binop( 1243 Iop_And32, 1244 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)), 1245 mkU32(1) 1246 ), 1247 mkU32(nnn) 1248 ) 1249 ); 1250 } 1251 1252 return NULL; 1253 } 1254 1255 /* --------- specialising "x86g_calculate_eflags_c" --------- */ 1256 1257 if (vex_streq(function_name, "x86g_calculate_eflags_c")) { 1258 /* specialise calls to above "calculate_eflags_c" function */ 1259 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; 1260 vassert(arity == 4); 1261 cc_op = args[0]; 1262 cc_dep1 = args[1]; 1263 cc_dep2 = args[2]; 1264 cc_ndep = args[3]; 1265 1266 if (isU32(cc_op, X86G_CC_OP_SUBL)) { 1267 /* C after sub denotes unsigned less than */ 1268 return unop(Iop_1Uto32, 1269 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)); 1270 } 1271 if (isU32(cc_op, X86G_CC_OP_SUBB)) { 1272 /* C after sub denotes unsigned less than */ 1273 return unop(Iop_1Uto32, 1274 binop(Iop_CmpLT32U, 1275 binop(Iop_And32,cc_dep1,mkU32(0xFF)), 1276 binop(Iop_And32,cc_dep2,mkU32(0xFF)))); 1277 } 1278 if (isU32(cc_op, X86G_CC_OP_LOGICL) 1279 || isU32(cc_op, X86G_CC_OP_LOGICW) 1280 || isU32(cc_op, X86G_CC_OP_LOGICB)) { 1281 /* cflag after logic is zero */ 1282 return mkU32(0); 1283 } 1284 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) { 1285 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ 1286 return cc_ndep; 1287 } 1288 if (isU32(cc_op, X86G_CC_OP_COPY)) { 1289 /* cflag after COPY is stored in DEP1. */ 1290 return 1291 binop( 1292 Iop_And32, 1293 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), 1294 mkU32(1) 1295 ); 1296 } 1297 if (isU32(cc_op, X86G_CC_OP_ADDL)) { 1298 /* C after add denotes sum <u either arg */ 1299 return unop(Iop_1Uto32, 1300 binop(Iop_CmpLT32U, 1301 binop(Iop_Add32, cc_dep1, cc_dep2), 1302 cc_dep1)); 1303 } 1304 // ATC, requires verification, no test case known 1305 //if (isU32(cc_op, X86G_CC_OP_SMULL)) { 1306 // /* C after signed widening multiply denotes the case where 1307 // the top half of the result isn't simply the sign extension 1308 // of the bottom half (iow the result doesn't fit completely 1309 // in the bottom half). Hence: 1310 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31 1311 // where 'x' denotes signed widening multiply.*/ 1312 // return 1313 // unop(Iop_1Uto32, 1314 // binop(Iop_CmpNE32, 1315 // unop(Iop_64HIto32, 1316 // binop(Iop_MullS32, cc_dep1, cc_dep2)), 1317 // binop(Iop_Sar32, 1318 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) )); 1319 //} 1320 # if 0 1321 if (cc_op->tag == Iex_Const) { 1322 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); 1323 } 1324 # endif 1325 1326 return NULL; 1327 } 1328 1329 /* --------- specialising "x86g_calculate_eflags_all" --------- */ 1330 1331 if (vex_streq(function_name, "x86g_calculate_eflags_all")) { 1332 /* specialise calls to above "calculate_eflags_all" function */ 1333 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */ 1334 vassert(arity == 4); 1335 cc_op = args[0]; 1336 cc_dep1 = args[1]; 1337 /* cc_dep2 = args[2]; */ 1338 /* cc_ndep = args[3]; */ 1339 1340 if (isU32(cc_op, X86G_CC_OP_COPY)) { 1341 /* eflags after COPY are stored in DEP1. */ 1342 return 1343 binop( 1344 Iop_And32, 1345 cc_dep1, 1346 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z 1347 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P) 1348 ); 1349 } 1350 return NULL; 1351 } 1352 1353 # undef unop 1354 # undef binop 1355 # undef mkU32 1356 # undef mkU8 1357 1358 return NULL; 1359 } 1360 1361 1362 /*---------------------------------------------------------------*/ 1363 /*--- Supporting functions for x87 FPU activities. ---*/ 1364 /*---------------------------------------------------------------*/ 1365 1366 static inline Bool host_is_little_endian ( void ) 1367 { 1368 UInt x = 0x76543210; 1369 UChar* p = (UChar*)(&x); 1370 return toBool(*p == 0x10); 1371 } 1372 1373 /* 80 and 64-bit floating point formats: 1374 1375 80-bit: 1376 1377 S 0 0-------0 zero 1378 S 0 0X------X denormals 1379 S 1-7FFE 1X------X normals (all normals have leading 1) 1380 S 7FFF 10------0 infinity 1381 S 7FFF 10X-----X snan 1382 S 7FFF 11X-----X qnan 1383 1384 S is the sign bit. For runs X----X, at least one of the Xs must be 1385 nonzero. Exponent is 15 bits, fractional part is 63 bits, and 1386 there is an explicitly represented leading 1, and a sign bit, 1387 giving 80 in total. 1388 1389 64-bit avoids the confusion of an explicitly represented leading 1 1390 and so is simpler: 1391 1392 S 0 0------0 zero 1393 S 0 X------X denormals 1394 S 1-7FE any normals 1395 S 7FF 0------0 infinity 1396 S 7FF 0X-----X snan 1397 S 7FF 1X-----X qnan 1398 1399 Exponent is 11 bits, fractional part is 52 bits, and there is a 1400 sign bit, giving 64 in total. 1401 */ 1402 1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ 1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl ) 1406 { 1407 Bool mantissaIsZero; 1408 Int bexp; 1409 UChar sign; 1410 UChar* f64; 1411 1412 vassert(host_is_little_endian()); 1413 1414 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ 1415 1416 f64 = (UChar*)(&dbl); 1417 sign = toUChar( (f64[7] >> 7) & 1 ); 1418 1419 /* First off, if the tag indicates the register was empty, 1420 return 1,0,sign,1 */ 1421 if (tag == 0) { 1422 /* vex_printf("Empty\n"); */ 1423 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1) 1424 | X86G_FC_MASK_C0; 1425 } 1426 1427 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 1428 bexp &= 0x7FF; 1429 1430 mantissaIsZero 1431 = toBool( 1432 (f64[6] & 0x0F) == 0 1433 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 1434 ); 1435 1436 /* If both exponent and mantissa are zero, the value is zero. 1437 Return 1,0,sign,0. */ 1438 if (bexp == 0 && mantissaIsZero) { 1439 /* vex_printf("Zero\n"); */ 1440 return X86G_FC_MASK_C3 | 0 1441 | (sign << X86G_FC_SHIFT_C1) | 0; 1442 } 1443 1444 /* If exponent is zero but mantissa isn't, it's a denormal. 1445 Return 1,1,sign,0. */ 1446 if (bexp == 0 && !mantissaIsZero) { 1447 /* vex_printf("Denormal\n"); */ 1448 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2 1449 | (sign << X86G_FC_SHIFT_C1) | 0; 1450 } 1451 1452 /* If the exponent is 7FF and the mantissa is zero, this is an infinity. 1453 Return 0,1,sign,1. */ 1454 if (bexp == 0x7FF && mantissaIsZero) { 1455 /* vex_printf("Inf\n"); */ 1456 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) 1457 | X86G_FC_MASK_C0; 1458 } 1459 1460 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. 1461 Return 0,0,sign,1. */ 1462 if (bexp == 0x7FF && !mantissaIsZero) { 1463 /* vex_printf("NaN\n"); */ 1464 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0; 1465 } 1466 1467 /* Uh, ok, we give up. It must be a normal finite number. 1468 Return 0,1,sign,0. 1469 */ 1470 /* vex_printf("normal\n"); */ 1471 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0; 1472 } 1473 1474 1475 /* CALLED FROM GENERATED CODE */ 1476 /* DIRTY HELPER (reads guest memory) */ 1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU ) 1478 { 1479 ULong f64; 1480 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 ); 1481 return f64; 1482 } 1483 1484 /* CALLED FROM GENERATED CODE */ 1485 /* DIRTY HELPER (writes guest memory) */ 1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 ) 1487 { 1488 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU ); 1489 } 1490 1491 1492 /*----------------------------------------------*/ 1493 /*--- The exported fns .. ---*/ 1494 /*----------------------------------------------*/ 1495 1496 /* Layout of the real x87 state. */ 1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to 1498 g_generic_x87.h */ 1499 1500 1501 /* CLEAN HELPER */ 1502 /* fpucw[15:0] contains a x87 native format FPU control word. 1503 Extract from it the required FPROUND value and any resulting 1504 emulation warning, and return (warn << 32) | fpround value. 1505 */ 1506 ULong x86g_check_fldcw ( UInt fpucw ) 1507 { 1508 /* Decide on a rounding mode. fpucw[11:10] holds it. */ 1509 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1510 UInt rmode = (fpucw >> 10) & 3; 1511 1512 /* Detect any required emulation warnings. */ 1513 VexEmNote ew = EmNote_NONE; 1514 1515 if ((fpucw & 0x3F) != 0x3F) { 1516 /* unmasked exceptions! */ 1517 ew = EmWarn_X86_x87exns; 1518 } 1519 else 1520 if (((fpucw >> 8) & 3) != 3) { 1521 /* unsupported precision */ 1522 ew = EmWarn_X86_x87precision; 1523 } 1524 1525 return (((ULong)ew) << 32) | ((ULong)rmode); 1526 } 1527 1528 /* CLEAN HELPER */ 1529 /* Given fpround as an IRRoundingMode value, create a suitable x87 1530 native format FPU control word. */ 1531 UInt x86g_create_fpucw ( UInt fpround ) 1532 { 1533 fpround &= 3; 1534 return 0x037F | (fpround << 10); 1535 } 1536 1537 1538 /* CLEAN HELPER */ 1539 /* mxcsr[15:0] contains a SSE native format MXCSR value. 1540 Extract from it the required SSEROUND value and any resulting 1541 emulation warning, and return (warn << 32) | sseround value. 1542 */ 1543 ULong x86g_check_ldmxcsr ( UInt mxcsr ) 1544 { 1545 /* Decide on a rounding mode. mxcsr[14:13] holds it. */ 1546 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1547 UInt rmode = (mxcsr >> 13) & 3; 1548 1549 /* Detect any required emulation warnings. */ 1550 VexEmNote ew = EmNote_NONE; 1551 1552 if ((mxcsr & 0x1F80) != 0x1F80) { 1553 /* unmasked exceptions! */ 1554 ew = EmWarn_X86_sseExns; 1555 } 1556 else 1557 if (mxcsr & (1<<15)) { 1558 /* FZ is set */ 1559 ew = EmWarn_X86_fz; 1560 } 1561 else 1562 if (mxcsr & (1<<6)) { 1563 /* DAZ is set */ 1564 ew = EmWarn_X86_daz; 1565 } 1566 1567 return (((ULong)ew) << 32) | ((ULong)rmode); 1568 } 1569 1570 1571 /* CLEAN HELPER */ 1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE 1573 native format MXCSR value. */ 1574 UInt x86g_create_mxcsr ( UInt sseround ) 1575 { 1576 sseround &= 3; 1577 return 0x1F80 | (sseround << 13); 1578 } 1579 1580 1581 /* CALLED FROM GENERATED CODE */ 1582 /* DIRTY HELPER (writes guest state) */ 1583 /* Initialise the x87 FPU state as per 'finit'. */ 1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst ) 1585 { 1586 Int i; 1587 gst->guest_FTOP = 0; 1588 for (i = 0; i < 8; i++) { 1589 gst->guest_FPTAG[i] = 0; /* empty */ 1590 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ 1591 } 1592 gst->guest_FPROUND = (UInt)Irrm_NEAREST; 1593 gst->guest_FC3210 = 0; 1594 } 1595 1596 1597 /* This is used to implement both 'frstor' and 'fldenv'. The latter 1598 appears to differ from the former only in that the 8 FP registers 1599 themselves are not transferred into the guest state. */ 1600 static 1601 VexEmNote do_put_x87 ( Bool moveRegs, 1602 /*IN*/UChar* x87_state, 1603 /*OUT*/VexGuestX86State* vex_state ) 1604 { 1605 Int stno, preg; 1606 UInt tag; 1607 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1608 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1609 Fpu_State* x87 = (Fpu_State*)x87_state; 1610 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; 1611 UInt tagw = x87->env[FP_ENV_TAG]; 1612 UInt fpucw = x87->env[FP_ENV_CTRL]; 1613 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; 1614 VexEmNote ew; 1615 UInt fpround; 1616 ULong pair; 1617 1618 /* Copy registers and tags */ 1619 for (stno = 0; stno < 8; stno++) { 1620 preg = (stno + ftop) & 7; 1621 tag = (tagw >> (2*preg)) & 3; 1622 if (tag == 3) { 1623 /* register is empty */ 1624 /* hmm, if it's empty, does it still get written? Probably 1625 safer to say it does. If we don't, memcheck could get out 1626 of sync, in that it thinks all FP registers are defined by 1627 this helper, but in reality some have not been updated. */ 1628 if (moveRegs) 1629 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 1630 vexTags[preg] = 0; 1631 } else { 1632 /* register is non-empty */ 1633 if (moveRegs) 1634 convert_f80le_to_f64le( &x87->reg[10*stno], 1635 (UChar*)&vexRegs[preg] ); 1636 vexTags[preg] = 1; 1637 } 1638 } 1639 1640 /* stack pointer */ 1641 vex_state->guest_FTOP = ftop; 1642 1643 /* status word */ 1644 vex_state->guest_FC3210 = c3210; 1645 1646 /* handle the control word, setting FPROUND and detecting any 1647 emulation warnings. */ 1648 pair = x86g_check_fldcw ( (UInt)fpucw ); 1649 fpround = (UInt)pair; 1650 ew = (VexEmNote)(pair >> 32); 1651 1652 vex_state->guest_FPROUND = fpround & 3; 1653 1654 /* emulation warnings --> caller */ 1655 return ew; 1656 } 1657 1658 1659 /* Create an x87 FPU state from the guest state, as close as 1660 we can approximate it. */ 1661 static 1662 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state, 1663 /*OUT*/UChar* x87_state ) 1664 { 1665 Int i, stno, preg; 1666 UInt tagw; 1667 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1668 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1669 Fpu_State* x87 = (Fpu_State*)x87_state; 1670 UInt ftop = vex_state->guest_FTOP; 1671 UInt c3210 = vex_state->guest_FC3210; 1672 1673 for (i = 0; i < 14; i++) 1674 x87->env[i] = 0; 1675 1676 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 1677 x87->env[FP_ENV_STAT] 1678 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 1679 x87->env[FP_ENV_CTRL] 1680 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND )); 1681 1682 /* Dump the register stack in ST order. */ 1683 tagw = 0; 1684 for (stno = 0; stno < 8; stno++) { 1685 preg = (stno + ftop) & 7; 1686 if (vexTags[preg] == 0) { 1687 /* register is empty */ 1688 tagw |= (3 << (2*preg)); 1689 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1690 &x87->reg[10*stno] ); 1691 } else { 1692 /* register is full. */ 1693 tagw |= (0 << (2*preg)); 1694 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1695 &x87->reg[10*stno] ); 1696 } 1697 } 1698 x87->env[FP_ENV_TAG] = toUShort(tagw); 1699 } 1700 1701 1702 /* CALLED FROM GENERATED CODE */ 1703 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1704 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr ) 1705 { 1706 /* Somewhat roundabout, but at least it's simple. */ 1707 Fpu_State tmp; 1708 UShort* addrS = (UShort*)addr; 1709 UChar* addrC = (UChar*)addr; 1710 U128* xmm = (U128*)(addr + 160); 1711 UInt mxcsr; 1712 UShort fp_tags; 1713 UInt summary_tags; 1714 Int r, stno; 1715 UShort *srcS, *dstS; 1716 1717 do_get_x87( gst, (UChar*)&tmp ); 1718 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND ); 1719 1720 /* Now build the proper fxsave image from the x87 image we just 1721 made. */ 1722 1723 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ 1724 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ 1725 1726 /* set addrS[2] in an endian-independent way */ 1727 summary_tags = 0; 1728 fp_tags = tmp.env[FP_ENV_TAG]; 1729 for (r = 0; r < 8; r++) { 1730 if ( ((fp_tags >> (2*r)) & 3) != 3 ) 1731 summary_tags |= (1 << r); 1732 } 1733 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ 1734 addrC[5] = 0; /* pad */ 1735 1736 addrS[3] = 0; /* FOP: fpu opcode (bogus) */ 1737 addrS[4] = 0; 1738 addrS[5] = 0; /* FPU IP (bogus) */ 1739 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we 1740 could conceivably dump %CS here) */ 1741 1742 addrS[7] = 0; /* Intel reserved */ 1743 1744 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */ 1745 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */ 1746 addrS[10] = 0; /* segment selector for above operand pointer; %DS 1747 perhaps? */ 1748 addrS[11] = 0; /* Intel reserved */ 1749 1750 addrS[12] = toUShort(mxcsr); /* MXCSR */ 1751 addrS[13] = toUShort(mxcsr >> 16); 1752 1753 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */ 1754 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */ 1755 1756 /* Copy in the FP registers, in ST order. */ 1757 for (stno = 0; stno < 8; stno++) { 1758 srcS = (UShort*)(&tmp.reg[10*stno]); 1759 dstS = (UShort*)(&addrS[16 + 8*stno]); 1760 dstS[0] = srcS[0]; 1761 dstS[1] = srcS[1]; 1762 dstS[2] = srcS[2]; 1763 dstS[3] = srcS[3]; 1764 dstS[4] = srcS[4]; 1765 dstS[5] = 0; 1766 dstS[6] = 0; 1767 dstS[7] = 0; 1768 } 1769 1770 /* That's the first 160 bytes of the image done. Now only %xmm0 1771 .. %xmm7 remain to be copied. If the host is big-endian, these 1772 need to be byte-swapped. */ 1773 vassert(host_is_little_endian()); 1774 1775 # define COPY_U128(_dst,_src) \ 1776 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1777 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1778 while (0) 1779 1780 COPY_U128( xmm[0], gst->guest_XMM0 ); 1781 COPY_U128( xmm[1], gst->guest_XMM1 ); 1782 COPY_U128( xmm[2], gst->guest_XMM2 ); 1783 COPY_U128( xmm[3], gst->guest_XMM3 ); 1784 COPY_U128( xmm[4], gst->guest_XMM4 ); 1785 COPY_U128( xmm[5], gst->guest_XMM5 ); 1786 COPY_U128( xmm[6], gst->guest_XMM6 ); 1787 COPY_U128( xmm[7], gst->guest_XMM7 ); 1788 1789 # undef COPY_U128 1790 } 1791 1792 1793 /* CALLED FROM GENERATED CODE */ 1794 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1795 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr ) 1796 { 1797 Fpu_State tmp; 1798 VexEmNote warnX87 = EmNote_NONE; 1799 VexEmNote warnXMM = EmNote_NONE; 1800 UShort* addrS = (UShort*)addr; 1801 UChar* addrC = (UChar*)addr; 1802 U128* xmm = (U128*)(addr + 160); 1803 UShort fp_tags; 1804 Int r, stno, i; 1805 1806 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need 1807 to be byte-swapped. */ 1808 vassert(host_is_little_endian()); 1809 1810 # define COPY_U128(_dst,_src) \ 1811 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1812 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1813 while (0) 1814 1815 COPY_U128( gst->guest_XMM0, xmm[0] ); 1816 COPY_U128( gst->guest_XMM1, xmm[1] ); 1817 COPY_U128( gst->guest_XMM2, xmm[2] ); 1818 COPY_U128( gst->guest_XMM3, xmm[3] ); 1819 COPY_U128( gst->guest_XMM4, xmm[4] ); 1820 COPY_U128( gst->guest_XMM5, xmm[5] ); 1821 COPY_U128( gst->guest_XMM6, xmm[6] ); 1822 COPY_U128( gst->guest_XMM7, xmm[7] ); 1823 1824 # undef COPY_U128 1825 1826 /* Copy the x87 registers out of the image, into a temporary 1827 Fpu_State struct. */ 1828 1829 /* LLVM on Darwin turns the following loop into a movaps plus a 1830 handful of scalar stores. This would work fine except for the 1831 fact that VEX doesn't keep the stack correctly (16-) aligned for 1832 the call, so it segfaults. Hence, split the loop into two 1833 pieces (and pray LLVM doesn't merely glue them back together) so 1834 it's composed only of scalar stores and so is alignment 1835 insensitive. Of course this is a kludge of the lamest kind -- 1836 VEX should be fixed properly. */ 1837 /* Code that seems to trigger the problem: 1838 for (i = 0; i < 14; i++) tmp.env[i] = 0; */ 1839 for (i = 0; i < 7; i++) tmp.env[i+0] = 0; 1840 __asm__ __volatile__("" ::: "memory"); 1841 for (i = 0; i < 7; i++) tmp.env[i+7] = 0; 1842 1843 for (i = 0; i < 80; i++) tmp.reg[i] = 0; 1844 /* fill in tmp.reg[0..7] */ 1845 for (stno = 0; stno < 8; stno++) { 1846 UShort* dstS = (UShort*)(&tmp.reg[10*stno]); 1847 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); 1848 dstS[0] = srcS[0]; 1849 dstS[1] = srcS[1]; 1850 dstS[2] = srcS[2]; 1851 dstS[3] = srcS[3]; 1852 dstS[4] = srcS[4]; 1853 } 1854 /* fill in tmp.env[0..13] */ 1855 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ 1856 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ 1857 1858 fp_tags = 0; 1859 for (r = 0; r < 8; r++) { 1860 if (addrC[4] & (1<<r)) 1861 fp_tags |= (0 << (2*r)); /* EMPTY */ 1862 else 1863 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ 1864 } 1865 tmp.env[FP_ENV_TAG] = fp_tags; 1866 1867 /* Now write 'tmp' into the guest state. */ 1868 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst ); 1869 1870 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF) 1871 | ((((UInt)addrS[13]) & 0xFFFF) << 16); 1872 ULong w64 = x86g_check_ldmxcsr( w32 ); 1873 1874 warnXMM = (VexEmNote)(w64 >> 32); 1875 1876 gst->guest_SSEROUND = w64 & 0xFFFFFFFF; 1877 } 1878 1879 /* Prefer an X87 emwarn over an XMM one, if both exist. */ 1880 if (warnX87 != EmNote_NONE) 1881 return warnX87; 1882 else 1883 return warnXMM; 1884 } 1885 1886 1887 /* CALLED FROM GENERATED CODE */ 1888 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1889 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr ) 1890 { 1891 do_get_x87( gst, (UChar*)addr ); 1892 } 1893 1894 /* CALLED FROM GENERATED CODE */ 1895 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1896 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr ) 1897 { 1898 return do_put_x87( True/*regs too*/, (UChar*)addr, gst ); 1899 } 1900 1901 /* CALLED FROM GENERATED CODE */ 1902 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1903 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr ) 1904 { 1905 /* Somewhat roundabout, but at least it's simple. */ 1906 Int i; 1907 UShort* addrP = (UShort*)addr; 1908 Fpu_State tmp; 1909 do_get_x87( gst, (UChar*)&tmp ); 1910 for (i = 0; i < 14; i++) 1911 addrP[i] = tmp.env[i]; 1912 } 1913 1914 /* CALLED FROM GENERATED CODE */ 1915 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1916 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr ) 1917 { 1918 return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst); 1919 } 1920 1921 /* VISIBLE TO LIBVEX CLIENT */ 1922 /* Do x87 save from the supplied VexGuestX86State structure and store the 1923 result at the given address which represents a buffer of at least 108 1924 bytes. */ 1925 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state, 1926 /*OUT*/UChar* x87_state ) 1927 { 1928 do_get_x87 ( vex_state, x87_state ); 1929 } 1930 1931 /* VISIBLE TO LIBVEX CLIENT */ 1932 /* Do x87 restore from the supplied address and store read values to the given 1933 VexGuestX86State structure. */ 1934 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state, 1935 /*MOD*/VexGuestX86State* vex_state ) 1936 { 1937 return do_put_x87 ( True/*moveRegs*/, x87_state, vex_state ); 1938 } 1939 1940 /* VISIBLE TO LIBVEX CLIENT */ 1941 /* Return mxcsr from the supplied VexGuestX86State structure. */ 1942 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state ) 1943 { 1944 return x86g_create_mxcsr ( vex_state->guest_SSEROUND ); 1945 } 1946 1947 /* VISIBLE TO LIBVEX CLIENT */ 1948 /* Modify the given VexGuestX86State structure according to the passed mxcsr 1949 value. */ 1950 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr, 1951 /*MOD*/VexGuestX86State* vex_state) 1952 { 1953 ULong w64 = x86g_check_ldmxcsr( mxcsr ); 1954 vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF; 1955 return (VexEmNote)(w64 >> 32); 1956 } 1957 1958 /*---------------------------------------------------------------*/ 1959 /*--- Misc integer helpers, including rotates and CPUID. ---*/ 1960 /*---------------------------------------------------------------*/ 1961 1962 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1963 /* Calculate both flags and value result for rotate right 1964 through the carry bit. Result in low 32 bits, 1965 new flags (OSZACP) in high 32 bits. 1966 */ 1967 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz ) 1968 { 1969 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf; 1970 1971 switch (sz) { 1972 case 4: 1973 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 1974 of = ((arg >> 31) ^ cf) & 1; 1975 while (tempCOUNT > 0) { 1976 tempcf = arg & 1; 1977 arg = (arg >> 1) | (cf << 31); 1978 cf = tempcf; 1979 tempCOUNT--; 1980 } 1981 break; 1982 case 2: 1983 while (tempCOUNT >= 17) tempCOUNT -= 17; 1984 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 1985 of = ((arg >> 15) ^ cf) & 1; 1986 while (tempCOUNT > 0) { 1987 tempcf = arg & 1; 1988 arg = ((arg >> 1) & 0x7FFF) | (cf << 15); 1989 cf = tempcf; 1990 tempCOUNT--; 1991 } 1992 break; 1993 case 1: 1994 while (tempCOUNT >= 9) tempCOUNT -= 9; 1995 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 1996 of = ((arg >> 7) ^ cf) & 1; 1997 while (tempCOUNT > 0) { 1998 tempcf = arg & 1; 1999 arg = ((arg >> 1) & 0x7F) | (cf << 7); 2000 cf = tempcf; 2001 tempCOUNT--; 2002 } 2003 break; 2004 default: 2005 vpanic("calculate_RCR: invalid size"); 2006 } 2007 2008 cf &= 1; 2009 of &= 1; 2010 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O); 2011 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O); 2012 2013 return (((ULong)eflags_in) << 32) | ((ULong)arg); 2014 } 2015 2016 2017 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2018 /* Calculate both flags and value result for rotate left 2019 through the carry bit. Result in low 32 bits, 2020 new flags (OSZACP) in high 32 bits. 2021 */ 2022 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz ) 2023 { 2024 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf; 2025 2026 switch (sz) { 2027 case 4: 2028 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 2029 while (tempCOUNT > 0) { 2030 tempcf = (arg >> 31) & 1; 2031 arg = (arg << 1) | (cf & 1); 2032 cf = tempcf; 2033 tempCOUNT--; 2034 } 2035 of = ((arg >> 31) ^ cf) & 1; 2036 break; 2037 case 2: 2038 while (tempCOUNT >= 17) tempCOUNT -= 17; 2039 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 2040 while (tempCOUNT > 0) { 2041 tempcf = (arg >> 15) & 1; 2042 arg = 0xFFFF & ((arg << 1) | (cf & 1)); 2043 cf = tempcf; 2044 tempCOUNT--; 2045 } 2046 of = ((arg >> 15) ^ cf) & 1; 2047 break; 2048 case 1: 2049 while (tempCOUNT >= 9) tempCOUNT -= 9; 2050 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; 2051 while (tempCOUNT > 0) { 2052 tempcf = (arg >> 7) & 1; 2053 arg = 0xFF & ((arg << 1) | (cf & 1)); 2054 cf = tempcf; 2055 tempCOUNT--; 2056 } 2057 of = ((arg >> 7) ^ cf) & 1; 2058 break; 2059 default: 2060 vpanic("calculate_RCL: invalid size"); 2061 } 2062 2063 cf &= 1; 2064 of &= 1; 2065 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O); 2066 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O); 2067 2068 return (((ULong)eflags_in) << 32) | ((ULong)arg); 2069 } 2070 2071 2072 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2073 /* Calculate both flags and value result for DAA/DAS/AAA/AAS. 2074 AX value in low half of arg, OSZACP in upper half. 2075 See guest-x86/toIR.c usage point for details. 2076 */ 2077 static UInt calc_parity_8bit ( UInt w32 ) { 2078 UInt i; 2079 UInt p = 1; 2080 for (i = 0; i < 8; i++) 2081 p ^= (1 & (w32 >> i)); 2082 return p; 2083 } 2084 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode ) 2085 { 2086 UInt r_AL = (flags_and_AX >> 0) & 0xFF; 2087 UInt r_AH = (flags_and_AX >> 8) & 0xFF; 2088 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1; 2089 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1; 2090 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1; 2091 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1; 2092 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1; 2093 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1; 2094 UInt result = 0; 2095 2096 switch (opcode) { 2097 case 0x27: { /* DAA */ 2098 UInt old_AL = r_AL; 2099 UInt old_C = r_C; 2100 r_C = 0; 2101 if ((r_AL & 0xF) > 9 || r_A == 1) { 2102 r_AL = r_AL + 6; 2103 r_C = old_C; 2104 if (r_AL >= 0x100) r_C = 1; 2105 r_A = 1; 2106 } else { 2107 r_A = 0; 2108 } 2109 if (old_AL > 0x99 || old_C == 1) { 2110 r_AL = r_AL + 0x60; 2111 r_C = 1; 2112 } else { 2113 r_C = 0; 2114 } 2115 /* O is undefined. S Z and P are set according to the 2116 result. */ 2117 r_AL &= 0xFF; 2118 r_O = 0; /* let's say */ 2119 r_S = (r_AL & 0x80) ? 1 : 0; 2120 r_Z = (r_AL == 0) ? 1 : 0; 2121 r_P = calc_parity_8bit( r_AL ); 2122 break; 2123 } 2124 case 0x2F: { /* DAS */ 2125 UInt old_AL = r_AL; 2126 UInt old_C = r_C; 2127 r_C = 0; 2128 if ((r_AL & 0xF) > 9 || r_A == 1) { 2129 Bool borrow = r_AL < 6; 2130 r_AL = r_AL - 6; 2131 r_C = old_C; 2132 if (borrow) r_C = 1; 2133 r_A = 1; 2134 } else { 2135 r_A = 0; 2136 } 2137 if (old_AL > 0x99 || old_C == 1) { 2138 r_AL = r_AL - 0x60; 2139 r_C = 1; 2140 } else { 2141 /* Intel docs are wrong: r_C = 0; */ 2142 } 2143 /* O is undefined. S Z and P are set according to the 2144 result. */ 2145 r_AL &= 0xFF; 2146 r_O = 0; /* let's say */ 2147 r_S = (r_AL & 0x80) ? 1 : 0; 2148 r_Z = (r_AL == 0) ? 1 : 0; 2149 r_P = calc_parity_8bit( r_AL ); 2150 break; 2151 } 2152 case 0x37: { /* AAA */ 2153 Bool nudge = r_AL > 0xF9; 2154 if ((r_AL & 0xF) > 9 || r_A == 1) { 2155 r_AL = r_AL + 6; 2156 r_AH = r_AH + 1 + (nudge ? 1 : 0); 2157 r_A = 1; 2158 r_C = 1; 2159 r_AL = r_AL & 0xF; 2160 } else { 2161 r_A = 0; 2162 r_C = 0; 2163 r_AL = r_AL & 0xF; 2164 } 2165 /* O S Z and P are undefined. */ 2166 r_O = r_S = r_Z = r_P = 0; /* let's say */ 2167 break; 2168 } 2169 case 0x3F: { /* AAS */ 2170 Bool nudge = r_AL < 0x06; 2171 if ((r_AL & 0xF) > 9 || r_A == 1) { 2172 r_AL = r_AL - 6; 2173 r_AH = r_AH - 1 - (nudge ? 1 : 0); 2174 r_A = 1; 2175 r_C = 1; 2176 r_AL = r_AL & 0xF; 2177 } else { 2178 r_A = 0; 2179 r_C = 0; 2180 r_AL = r_AL & 0xF; 2181 } 2182 /* O S Z and P are undefined. */ 2183 r_O = r_S = r_Z = r_P = 0; /* let's say */ 2184 break; 2185 } 2186 default: 2187 vassert(0); 2188 } 2189 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) ) 2190 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) ) 2191 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) ) 2192 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) ) 2193 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) ) 2194 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) ) 2195 | ( (r_AH & 0xFF) << 8 ) 2196 | ( (r_AL & 0xFF) << 0 ); 2197 return result; 2198 } 2199 2200 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode ) 2201 { 2202 UInt r_AL = (flags_and_AX >> 0) & 0xFF; 2203 UInt r_AH = (flags_and_AX >> 8) & 0xFF; 2204 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1; 2205 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1; 2206 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1; 2207 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1; 2208 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1; 2209 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1; 2210 UInt result = 0; 2211 2212 switch (opcode) { 2213 case 0xD4: { /* AAM */ 2214 r_AH = r_AL / 10; 2215 r_AL = r_AL % 10; 2216 break; 2217 } 2218 case 0xD5: { /* AAD */ 2219 r_AL = ((r_AH * 10) + r_AL) & 0xff; 2220 r_AH = 0; 2221 break; 2222 } 2223 default: 2224 vassert(0); 2225 } 2226 2227 r_O = 0; /* let's say (undefined) */ 2228 r_C = 0; /* let's say (undefined) */ 2229 r_A = 0; /* let's say (undefined) */ 2230 r_S = (r_AL & 0x80) ? 1 : 0; 2231 r_Z = (r_AL == 0) ? 1 : 0; 2232 r_P = calc_parity_8bit( r_AL ); 2233 2234 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) ) 2235 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) ) 2236 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) ) 2237 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) ) 2238 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) ) 2239 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) ) 2240 | ( (r_AH & 0xFF) << 8 ) 2241 | ( (r_AL & 0xFF) << 0 ); 2242 return result; 2243 } 2244 2245 2246 /* CALLED FROM GENERATED CODE */ 2247 /* DIRTY HELPER (non-referentially-transparent) */ 2248 /* Horrible hack. On non-x86 platforms, return 1. */ 2249 ULong x86g_dirtyhelper_RDTSC ( void ) 2250 { 2251 # if defined(__i386__) 2252 ULong res; 2253 __asm__ __volatile__("rdtsc" : "=A" (res)); 2254 return res; 2255 # else 2256 return 1ULL; 2257 # endif 2258 } 2259 2260 2261 /* CALLED FROM GENERATED CODE */ 2262 /* DIRTY HELPER (modifies guest state) */ 2263 /* Claim to be a P55C (Intel Pentium/MMX) */ 2264 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st ) 2265 { 2266 switch (st->guest_EAX) { 2267 case 0: 2268 st->guest_EAX = 0x1; 2269 st->guest_EBX = 0x756e6547; 2270 st->guest_ECX = 0x6c65746e; 2271 st->guest_EDX = 0x49656e69; 2272 break; 2273 default: 2274 st->guest_EAX = 0x543; 2275 st->guest_EBX = 0x0; 2276 st->guest_ECX = 0x0; 2277 st->guest_EDX = 0x8001bf; 2278 break; 2279 } 2280 } 2281 2282 /* CALLED FROM GENERATED CODE */ 2283 /* DIRTY HELPER (modifies guest state) */ 2284 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */ 2285 /* But without 3DNow support (weird, but we really don't support it). */ 2286 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st ) 2287 { 2288 switch (st->guest_EAX) { 2289 /* vendor ID */ 2290 case 0: 2291 st->guest_EAX = 0x1; 2292 st->guest_EBX = 0x68747541; 2293 st->guest_ECX = 0x444d4163; 2294 st->guest_EDX = 0x69746e65; 2295 break; 2296 /* feature bits */ 2297 case 1: 2298 st->guest_EAX = 0x621; 2299 st->guest_EBX = 0x0; 2300 st->guest_ECX = 0x0; 2301 st->guest_EDX = 0x183f9ff; 2302 break; 2303 /* Highest Extended Function Supported (0x80000004 brand string) */ 2304 case 0x80000000: 2305 st->guest_EAX = 0x80000004; 2306 st->guest_EBX = 0x68747541; 2307 st->guest_ECX = 0x444d4163; 2308 st->guest_EDX = 0x69746e65; 2309 break; 2310 /* Extended Processor Info and Feature Bits */ 2311 case 0x80000001: 2312 st->guest_EAX = 0x721; 2313 st->guest_EBX = 0x0; 2314 st->guest_ECX = 0x0; 2315 st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */ 2316 break; 2317 /* Processor Brand String "AMD Athlon(tm) Processor" */ 2318 case 0x80000002: 2319 st->guest_EAX = 0x20444d41; 2320 st->guest_EBX = 0x6c687441; 2321 st->guest_ECX = 0x74286e6f; 2322 st->guest_EDX = 0x5020296d; 2323 break; 2324 case 0x80000003: 2325 st->guest_EAX = 0x65636f72; 2326 st->guest_EBX = 0x726f7373; 2327 st->guest_ECX = 0x0; 2328 st->guest_EDX = 0x0; 2329 break; 2330 default: 2331 st->guest_EAX = 0x0; 2332 st->guest_EBX = 0x0; 2333 st->guest_ECX = 0x0; 2334 st->guest_EDX = 0x0; 2335 break; 2336 } 2337 } 2338 2339 /* CALLED FROM GENERATED CODE */ 2340 /* DIRTY HELPER (modifies guest state) */ 2341 /* Claim to be the following SSE1-capable CPU: 2342 vendor_id : GenuineIntel 2343 cpu family : 6 2344 model : 11 2345 model name : Intel(R) Pentium(R) III CPU family 1133MHz 2346 stepping : 1 2347 cpu MHz : 1131.013 2348 cache size : 512 KB 2349 */ 2350 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st ) 2351 { 2352 switch (st->guest_EAX) { 2353 case 0: 2354 st->guest_EAX = 0x00000002; 2355 st->guest_EBX = 0x756e6547; 2356 st->guest_ECX = 0x6c65746e; 2357 st->guest_EDX = 0x49656e69; 2358 break; 2359 case 1: 2360 st->guest_EAX = 0x000006b1; 2361 st->guest_EBX = 0x00000004; 2362 st->guest_ECX = 0x00000000; 2363 st->guest_EDX = 0x0383fbff; 2364 break; 2365 default: 2366 st->guest_EAX = 0x03020101; 2367 st->guest_EBX = 0x00000000; 2368 st->guest_ECX = 0x00000000; 2369 st->guest_EDX = 0x0c040883; 2370 break; 2371 } 2372 } 2373 2374 /* Claim to be the following SSE2-capable CPU: 2375 vendor_id : GenuineIntel 2376 cpu family : 15 2377 model : 2 2378 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz 2379 stepping : 9 2380 microcode : 0x17 2381 cpu MHz : 2992.577 2382 cache size : 512 KB 2383 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 2384 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe 2385 pebs bts cid xtpr 2386 clflush size : 64 2387 cache_alignment : 128 2388 address sizes : 36 bits physical, 32 bits virtual 2389 */ 2390 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st ) 2391 { 2392 switch (st->guest_EAX) { 2393 case 0: 2394 st->guest_EAX = 0x00000002; 2395 st->guest_EBX = 0x756e6547; 2396 st->guest_ECX = 0x6c65746e; 2397 st->guest_EDX = 0x49656e69; 2398 break; 2399 case 1: 2400 st->guest_EAX = 0x00000f29; 2401 st->guest_EBX = 0x01020809; 2402 st->guest_ECX = 0x00004400; 2403 st->guest_EDX = 0xbfebfbff; 2404 break; 2405 default: 2406 st->guest_EAX = 0x03020101; 2407 st->guest_EBX = 0x00000000; 2408 st->guest_ECX = 0x00000000; 2409 st->guest_EDX = 0x0c040883; 2410 break; 2411 } 2412 } 2413 2414 /* Claim to be the following SSSE3-capable CPU (2 x ...): 2415 vendor_id : GenuineIntel 2416 cpu family : 6 2417 model : 15 2418 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz 2419 stepping : 6 2420 cpu MHz : 2394.000 2421 cache size : 4096 KB 2422 physical id : 0 2423 siblings : 2 2424 core id : 0 2425 cpu cores : 2 2426 fpu : yes 2427 fpu_exception : yes 2428 cpuid level : 10 2429 wp : yes 2430 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2431 mtrr pge mca cmov pat pse36 clflush dts acpi 2432 mmx fxsr sse sse2 ss ht tm syscall nx lm 2433 constant_tsc pni monitor ds_cpl vmx est tm2 2434 cx16 xtpr lahf_lm 2435 bogomips : 4798.78 2436 clflush size : 64 2437 cache_alignment : 64 2438 address sizes : 36 bits physical, 48 bits virtual 2439 power management: 2440 */ 2441 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st ) 2442 { 2443 # define SET_ABCD(_a,_b,_c,_d) \ 2444 do { st->guest_EAX = (UInt)(_a); \ 2445 st->guest_EBX = (UInt)(_b); \ 2446 st->guest_ECX = (UInt)(_c); \ 2447 st->guest_EDX = (UInt)(_d); \ 2448 } while (0) 2449 2450 switch (st->guest_EAX) { 2451 case 0x00000000: 2452 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); 2453 break; 2454 case 0x00000001: 2455 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); 2456 break; 2457 case 0x00000002: 2458 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); 2459 break; 2460 case 0x00000003: 2461 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2462 break; 2463 case 0x00000004: { 2464 switch (st->guest_ECX) { 2465 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, 2466 0x0000003f, 0x00000001); break; 2467 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, 2468 0x0000003f, 0x00000001); break; 2469 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, 2470 0x00000fff, 0x00000001); break; 2471 default: SET_ABCD(0x00000000, 0x00000000, 2472 0x00000000, 0x00000000); break; 2473 } 2474 break; 2475 } 2476 case 0x00000005: 2477 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); 2478 break; 2479 case 0x00000006: 2480 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); 2481 break; 2482 case 0x00000007: 2483 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2484 break; 2485 case 0x00000008: 2486 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); 2487 break; 2488 case 0x00000009: 2489 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2490 break; 2491 case 0x0000000a: 2492 unhandled_eax_value: 2493 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); 2494 break; 2495 case 0x80000000: 2496 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2497 break; 2498 case 0x80000001: 2499 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000); 2500 break; 2501 case 0x80000002: 2502 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2503 break; 2504 case 0x80000003: 2505 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); 2506 break; 2507 case 0x80000004: 2508 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); 2509 break; 2510 case 0x80000005: 2511 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2512 break; 2513 case 0x80000006: 2514 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); 2515 break; 2516 case 0x80000007: 2517 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2518 break; 2519 case 0x80000008: 2520 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2521 break; 2522 default: 2523 goto unhandled_eax_value; 2524 } 2525 # undef SET_ABCD 2526 } 2527 2528 2529 /* CALLED FROM GENERATED CODE */ 2530 /* DIRTY HELPER (non-referentially-transparent) */ 2531 /* Horrible hack. On non-x86 platforms, return 0. */ 2532 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ ) 2533 { 2534 # if defined(__i386__) 2535 UInt r = 0; 2536 portno &= 0xFFFF; 2537 switch (sz) { 2538 case 4: 2539 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0" 2540 : "=a" (r) : "Nd" (portno)); 2541 break; 2542 case 2: 2543 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0" 2544 : "=a" (r) : "Nd" (portno)); 2545 break; 2546 case 1: 2547 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0" 2548 : "=a" (r) : "Nd" (portno)); 2549 break; 2550 default: 2551 break; 2552 } 2553 return r; 2554 # else 2555 return 0; 2556 # endif 2557 } 2558 2559 2560 /* CALLED FROM GENERATED CODE */ 2561 /* DIRTY HELPER (non-referentially-transparent) */ 2562 /* Horrible hack. On non-x86 platforms, do nothing. */ 2563 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ ) 2564 { 2565 # if defined(__i386__) 2566 portno &= 0xFFFF; 2567 switch (sz) { 2568 case 4: 2569 __asm__ __volatile__("outl %0, %w1" 2570 : : "a" (data), "Nd" (portno)); 2571 break; 2572 case 2: 2573 __asm__ __volatile__("outw %w0, %w1" 2574 : : "a" (data), "Nd" (portno)); 2575 break; 2576 case 1: 2577 __asm__ __volatile__("outb %b0, %w1" 2578 : : "a" (data), "Nd" (portno)); 2579 break; 2580 default: 2581 break; 2582 } 2583 # else 2584 /* do nothing */ 2585 # endif 2586 } 2587 2588 /* CALLED FROM GENERATED CODE */ 2589 /* DIRTY HELPER (non-referentially-transparent) */ 2590 /* Horrible hack. On non-x86 platforms, do nothing. */ 2591 /* op = 0: call the native SGDT instruction. 2592 op = 1: call the native SIDT instruction. 2593 */ 2594 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) { 2595 # if defined(__i386__) 2596 switch (op) { 2597 case 0: 2598 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); 2599 break; 2600 case 1: 2601 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); 2602 break; 2603 default: 2604 vpanic("x86g_dirtyhelper_SxDT"); 2605 } 2606 # else 2607 /* do nothing */ 2608 UChar* p = (UChar*)address; 2609 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; 2610 # endif 2611 } 2612 2613 /*---------------------------------------------------------------*/ 2614 /*--- Helpers for MMX/SSE/SSE2. ---*/ 2615 /*---------------------------------------------------------------*/ 2616 2617 static inline UChar abdU8 ( UChar xx, UChar yy ) { 2618 return toUChar(xx>yy ? xx-yy : yy-xx); 2619 } 2620 2621 static inline ULong mk32x2 ( UInt w1, UInt w0 ) { 2622 return (((ULong)w1) << 32) | ((ULong)w0); 2623 } 2624 2625 static inline UShort sel16x4_3 ( ULong w64 ) { 2626 UInt hi32 = toUInt(w64 >> 32); 2627 return toUShort(hi32 >> 16); 2628 } 2629 static inline UShort sel16x4_2 ( ULong w64 ) { 2630 UInt hi32 = toUInt(w64 >> 32); 2631 return toUShort(hi32); 2632 } 2633 static inline UShort sel16x4_1 ( ULong w64 ) { 2634 UInt lo32 = toUInt(w64); 2635 return toUShort(lo32 >> 16); 2636 } 2637 static inline UShort sel16x4_0 ( ULong w64 ) { 2638 UInt lo32 = toUInt(w64); 2639 return toUShort(lo32); 2640 } 2641 2642 static inline UChar sel8x8_7 ( ULong w64 ) { 2643 UInt hi32 = toUInt(w64 >> 32); 2644 return toUChar(hi32 >> 24); 2645 } 2646 static inline UChar sel8x8_6 ( ULong w64 ) { 2647 UInt hi32 = toUInt(w64 >> 32); 2648 return toUChar(hi32 >> 16); 2649 } 2650 static inline UChar sel8x8_5 ( ULong w64 ) { 2651 UInt hi32 = toUInt(w64 >> 32); 2652 return toUChar(hi32 >> 8); 2653 } 2654 static inline UChar sel8x8_4 ( ULong w64 ) { 2655 UInt hi32 = toUInt(w64 >> 32); 2656 return toUChar(hi32 >> 0); 2657 } 2658 static inline UChar sel8x8_3 ( ULong w64 ) { 2659 UInt lo32 = toUInt(w64); 2660 return toUChar(lo32 >> 24); 2661 } 2662 static inline UChar sel8x8_2 ( ULong w64 ) { 2663 UInt lo32 = toUInt(w64); 2664 return toUChar(lo32 >> 16); 2665 } 2666 static inline UChar sel8x8_1 ( ULong w64 ) { 2667 UInt lo32 = toUInt(w64); 2668 return toUChar(lo32 >> 8); 2669 } 2670 static inline UChar sel8x8_0 ( ULong w64 ) { 2671 UInt lo32 = toUInt(w64); 2672 return toUChar(lo32 >> 0); 2673 } 2674 2675 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2676 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) 2677 { 2678 return 2679 mk32x2( 2680 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) 2681 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), 2682 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) 2683 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) 2684 ); 2685 } 2686 2687 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2688 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy ) 2689 { 2690 UInt t = 0; 2691 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); 2692 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); 2693 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); 2694 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); 2695 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 2696 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 2697 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 2698 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 2699 t &= 0xFFFF; 2700 return (ULong)t; 2701 } 2702 2703 2704 /*---------------------------------------------------------------*/ 2705 /*--- Helpers for dealing with segment overrides. ---*/ 2706 /*---------------------------------------------------------------*/ 2707 2708 static inline 2709 UInt get_segdescr_base ( VexGuestX86SegDescr* ent ) 2710 { 2711 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow; 2712 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid; 2713 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi; 2714 return (hi << 24) | (mid << 16) | lo; 2715 } 2716 2717 static inline 2718 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent ) 2719 { 2720 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow; 2721 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi; 2722 UInt limit = (hi << 16) | lo; 2723 if (ent->LdtEnt.Bits.Granularity) 2724 limit = (limit << 12) | 0xFFF; 2725 return limit; 2726 } 2727 2728 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2729 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2730 UInt seg_selector, UInt virtual_addr ) 2731 { 2732 UInt tiBit, base, limit; 2733 VexGuestX86SegDescr* the_descrs; 2734 2735 Bool verboze = False; 2736 2737 /* If this isn't true, we're in Big Trouble. */ 2738 vassert(8 == sizeof(VexGuestX86SegDescr)); 2739 2740 if (verboze) 2741 vex_printf("x86h_use_seg_selector: " 2742 "seg_selector = 0x%x, vaddr = 0x%x\n", 2743 seg_selector, virtual_addr); 2744 2745 /* Check for wildly invalid selector. */ 2746 if (seg_selector & ~0xFFFF) 2747 goto bad; 2748 2749 seg_selector &= 0x0000FFFF; 2750 2751 /* Sanity check the segment selector. Ensure that RPL=11b (least 2752 privilege). This forms the bottom 2 bits of the selector. */ 2753 if ((seg_selector & 3) != 3) 2754 goto bad; 2755 2756 /* Extract the TI bit (0 means GDT, 1 means LDT) */ 2757 tiBit = (seg_selector >> 2) & 1; 2758 2759 /* Convert the segment selector onto a table index */ 2760 seg_selector >>= 3; 2761 vassert(seg_selector >= 0 && seg_selector < 8192); 2762 2763 if (tiBit == 0) { 2764 2765 /* GDT access. */ 2766 /* Do we actually have a GDT to look at? */ 2767 if (gdt == 0) 2768 goto bad; 2769 2770 /* Check for access to non-existent entry. */ 2771 if (seg_selector >= VEX_GUEST_X86_GDT_NENT) 2772 goto bad; 2773 2774 the_descrs = (VexGuestX86SegDescr*)gdt; 2775 base = get_segdescr_base (&the_descrs[seg_selector]); 2776 limit = get_segdescr_limit(&the_descrs[seg_selector]); 2777 2778 } else { 2779 2780 /* All the same stuff, except for the LDT. */ 2781 if (ldt == 0) 2782 goto bad; 2783 2784 if (seg_selector >= VEX_GUEST_X86_LDT_NENT) 2785 goto bad; 2786 2787 the_descrs = (VexGuestX86SegDescr*)ldt; 2788 base = get_segdescr_base (&the_descrs[seg_selector]); 2789 limit = get_segdescr_limit(&the_descrs[seg_selector]); 2790 2791 } 2792 2793 /* Do the limit check. Note, this check is just slightly too 2794 slack. Really it should be "if (virtual_addr + size - 1 >= 2795 limit)," but we don't have the size info to hand. Getting it 2796 could be significantly complex. */ 2797 if (virtual_addr >= limit) 2798 goto bad; 2799 2800 if (verboze) 2801 vex_printf("x86h_use_seg_selector: " 2802 "base = 0x%x, addr = 0x%x\n", 2803 base, base + virtual_addr); 2804 2805 /* High 32 bits are zero, indicating success. */ 2806 return (ULong)( ((UInt)virtual_addr) + base ); 2807 2808 bad: 2809 return 1ULL << 32; 2810 } 2811 2812 2813 /*---------------------------------------------------------------*/ 2814 /*--- Helpers for dealing with, and describing, ---*/ 2815 /*--- guest state as a whole. ---*/ 2816 /*---------------------------------------------------------------*/ 2817 2818 /* Initialise the entire x86 guest state. */ 2819 /* VISIBLE TO LIBVEX CLIENT */ 2820 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state ) 2821 { 2822 vex_state->host_EvC_FAILADDR = 0; 2823 vex_state->host_EvC_COUNTER = 0; 2824 2825 vex_state->guest_EAX = 0; 2826 vex_state->guest_ECX = 0; 2827 vex_state->guest_EDX = 0; 2828 vex_state->guest_EBX = 0; 2829 vex_state->guest_ESP = 0; 2830 vex_state->guest_EBP = 0; 2831 vex_state->guest_ESI = 0; 2832 vex_state->guest_EDI = 0; 2833 2834 vex_state->guest_CC_OP = X86G_CC_OP_COPY; 2835 vex_state->guest_CC_DEP1 = 0; 2836 vex_state->guest_CC_DEP2 = 0; 2837 vex_state->guest_CC_NDEP = 0; 2838 vex_state->guest_DFLAG = 1; /* forwards */ 2839 vex_state->guest_IDFLAG = 0; 2840 vex_state->guest_ACFLAG = 0; 2841 2842 vex_state->guest_EIP = 0; 2843 2844 /* Initialise the simulated FPU */ 2845 x86g_dirtyhelper_FINIT( vex_state ); 2846 2847 /* Initialse the SSE state. */ 2848 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0; 2849 2850 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST; 2851 SSEZERO(vex_state->guest_XMM0); 2852 SSEZERO(vex_state->guest_XMM1); 2853 SSEZERO(vex_state->guest_XMM2); 2854 SSEZERO(vex_state->guest_XMM3); 2855 SSEZERO(vex_state->guest_XMM4); 2856 SSEZERO(vex_state->guest_XMM5); 2857 SSEZERO(vex_state->guest_XMM6); 2858 SSEZERO(vex_state->guest_XMM7); 2859 2860 # undef SSEZERO 2861 2862 vex_state->guest_CS = 0; 2863 vex_state->guest_DS = 0; 2864 vex_state->guest_ES = 0; 2865 vex_state->guest_FS = 0; 2866 vex_state->guest_GS = 0; 2867 vex_state->guest_SS = 0; 2868 vex_state->guest_LDT = 0; 2869 vex_state->guest_GDT = 0; 2870 2871 vex_state->guest_EMNOTE = EmNote_NONE; 2872 2873 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */ 2874 vex_state->guest_CMSTART = 0; 2875 vex_state->guest_CMLEN = 0; 2876 2877 vex_state->guest_NRADDR = 0; 2878 vex_state->guest_SC_CLASS = 0; 2879 vex_state->guest_IP_AT_SYSCALL = 0; 2880 2881 vex_state->padding1 = 0; 2882 } 2883 2884 2885 /* Figure out if any part of the guest state contained in minoff 2886 .. maxoff requires precise memory exceptions. If in doubt return 2887 True (but this generates significantly slower code). 2888 2889 By default we enforce precise exns for guest %ESP, %EBP and %EIP 2890 only. These are the minimum needed to extract correct stack 2891 backtraces from x86 code. 2892 2893 Only %ESP is needed in mode VexRegUpdSpAtMemAccess. 2894 */ 2895 Bool guest_x86_state_requires_precise_mem_exns ( 2896 Int minoff, Int maxoff, VexRegisterUpdates pxControl 2897 ) 2898 { 2899 Int ebp_min = offsetof(VexGuestX86State, guest_EBP); 2900 Int ebp_max = ebp_min + 4 - 1; 2901 Int esp_min = offsetof(VexGuestX86State, guest_ESP); 2902 Int esp_max = esp_min + 4 - 1; 2903 Int eip_min = offsetof(VexGuestX86State, guest_EIP); 2904 Int eip_max = eip_min + 4 - 1; 2905 2906 if (maxoff < esp_min || minoff > esp_max) { 2907 /* no overlap with esp */ 2908 if (pxControl == VexRegUpdSpAtMemAccess) 2909 return False; // We only need to check stack pointer. 2910 } else { 2911 return True; 2912 } 2913 2914 if (maxoff < ebp_min || minoff > ebp_max) { 2915 /* no overlap with ebp */ 2916 } else { 2917 return True; 2918 } 2919 2920 if (maxoff < eip_min || minoff > eip_max) { 2921 /* no overlap with eip */ 2922 } else { 2923 return True; 2924 } 2925 2926 return False; 2927 } 2928 2929 2930 #define ALWAYSDEFD(field) \ 2931 { offsetof(VexGuestX86State, field), \ 2932 (sizeof ((VexGuestX86State*)0)->field) } 2933 2934 VexGuestLayout 2935 x86guest_layout 2936 = { 2937 /* Total size of the guest state, in bytes. */ 2938 .total_sizeB = sizeof(VexGuestX86State), 2939 2940 /* Describe the stack pointer. */ 2941 .offset_SP = offsetof(VexGuestX86State,guest_ESP), 2942 .sizeof_SP = 4, 2943 2944 /* Describe the frame pointer. */ 2945 .offset_FP = offsetof(VexGuestX86State,guest_EBP), 2946 .sizeof_FP = 4, 2947 2948 /* Describe the instruction pointer. */ 2949 .offset_IP = offsetof(VexGuestX86State,guest_EIP), 2950 .sizeof_IP = 4, 2951 2952 /* Describe any sections to be regarded by Memcheck as 2953 'always-defined'. */ 2954 .n_alwaysDefd = 24, 2955 2956 /* flags thunk: OP and NDEP are always defd, whereas DEP1 2957 and DEP2 have to be tracked. See detailed comment in 2958 gdefs.h on meaning of thunk fields. */ 2959 .alwaysDefd 2960 = { /* 0 */ ALWAYSDEFD(guest_CC_OP), 2961 /* 1 */ ALWAYSDEFD(guest_CC_NDEP), 2962 /* 2 */ ALWAYSDEFD(guest_DFLAG), 2963 /* 3 */ ALWAYSDEFD(guest_IDFLAG), 2964 /* 4 */ ALWAYSDEFD(guest_ACFLAG), 2965 /* 5 */ ALWAYSDEFD(guest_EIP), 2966 /* 6 */ ALWAYSDEFD(guest_FTOP), 2967 /* 7 */ ALWAYSDEFD(guest_FPTAG), 2968 /* 8 */ ALWAYSDEFD(guest_FPROUND), 2969 /* 9 */ ALWAYSDEFD(guest_FC3210), 2970 /* 10 */ ALWAYSDEFD(guest_CS), 2971 /* 11 */ ALWAYSDEFD(guest_DS), 2972 /* 12 */ ALWAYSDEFD(guest_ES), 2973 /* 13 */ ALWAYSDEFD(guest_FS), 2974 /* 14 */ ALWAYSDEFD(guest_GS), 2975 /* 15 */ ALWAYSDEFD(guest_SS), 2976 /* 16 */ ALWAYSDEFD(guest_LDT), 2977 /* 17 */ ALWAYSDEFD(guest_GDT), 2978 /* 18 */ ALWAYSDEFD(guest_EMNOTE), 2979 /* 19 */ ALWAYSDEFD(guest_SSEROUND), 2980 /* 20 */ ALWAYSDEFD(guest_CMSTART), 2981 /* 21 */ ALWAYSDEFD(guest_CMLEN), 2982 /* 22 */ ALWAYSDEFD(guest_SC_CLASS), 2983 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) 2984 } 2985 }; 2986 2987 2988 /*---------------------------------------------------------------*/ 2989 /*--- end guest_x86_helpers.c ---*/ 2990 /*---------------------------------------------------------------*/ 2991