1 2 /*---------------------------------------------------------------*/ 3 /*--- begin guest_amd64_helpers.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_emwarn.h" 38 #include "libvex_guest_amd64.h" 39 #include "libvex_ir.h" 40 #include "libvex.h" 41 42 #include "main_util.h" 43 #include "guest_generic_bb_to_IR.h" 44 #include "guest_amd64_defs.h" 45 #include "guest_generic_x87.h" 46 47 48 /* This file contains helper functions for amd64 guest code. 49 Calls to these functions are generated by the back end. 50 These calls are of course in the host machine code and 51 this file will be compiled to host machine code, so that 52 all makes sense. 53 54 Only change the signatures of these helper functions very 55 carefully. If you change the signature here, you'll have to change 56 the parameters passed to it in the IR calls constructed by 57 guest-amd64/toIR.c. 58 59 The convention used is that all functions called from generated 60 code are named amd64g_<something>, and any function whose name lacks 61 that prefix is not called from generated code. Note that some 62 LibVEX_* functions can however be called by VEX's client, but that 63 is not the same as calling them from VEX-generated code. 64 */ 65 66 67 /* Set to 1 to get detailed profiling info about use of the flag 68 machinery. */ 69 #define PROFILE_RFLAGS 0 70 71 72 /*---------------------------------------------------------------*/ 73 /*--- %rflags run-time helpers. ---*/ 74 /*---------------------------------------------------------------*/ 75 76 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags 77 after imulq/mulq. */ 78 79 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) 80 { 81 ULong u0, v0, w0; 82 Long u1, v1, w1, w2, t; 83 u0 = u & 0xFFFFFFFFULL; 84 u1 = u >> 32; 85 v0 = v & 0xFFFFFFFFULL; 86 v1 = v >> 32; 87 w0 = u0 * v0; 88 t = u1 * v0 + (w0 >> 32); 89 w1 = t & 0xFFFFFFFFULL; 90 w2 = t >> 32; 91 w1 = u0 * v1 + w1; 92 *rHi = u1 * v1 + w2 + (w1 >> 32); 93 *rLo = u * v; 94 } 95 96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) 97 { 98 ULong u0, v0, w0; 99 ULong u1, v1, w1,w2,t; 100 u0 = u & 0xFFFFFFFFULL; 101 u1 = u >> 32; 102 v0 = v & 0xFFFFFFFFULL; 103 v1 = v >> 32; 104 w0 = u0 * v0; 105 t = u1 * v0 + (w0 >> 32); 106 w1 = t & 0xFFFFFFFFULL; 107 w2 = t >> 32; 108 w1 = u0 * v1 + w1; 109 *rHi = u1 * v1 + w2 + (w1 >> 32); 110 *rLo = u * v; 111 } 112 113 114 static const UChar parity_table[256] = { 115 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 116 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 118 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 119 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 120 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 122 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 124 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 126 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 127 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 128 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 130 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 131 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 132 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 134 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 135 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 136 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 138 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 140 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 142 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 143 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 144 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 146 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 147 }; 148 149 /* generalised left-shifter */ 150 static inline Long lshift ( Long x, Int n ) 151 { 152 if (n >= 0) 153 return x << n; 154 else 155 return x >> (-n); 156 } 157 158 /* identity on ULong */ 159 static inline ULong idULong ( ULong x ) 160 { 161 return x; 162 } 163 164 165 #define PREAMBLE(__data_bits) \ 166 /* const */ ULong DATA_MASK \ 167 = __data_bits==8 \ 168 ? 0xFFULL \ 169 : (__data_bits==16 \ 170 ? 0xFFFFULL \ 171 : (__data_bits==32 \ 172 ? 0xFFFFFFFFULL \ 173 : 0xFFFFFFFFFFFFFFFFULL)); \ 174 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ 175 /* const */ ULong CC_DEP1 = cc_dep1_formal; \ 176 /* const */ ULong CC_DEP2 = cc_dep2_formal; \ 177 /* const */ ULong CC_NDEP = cc_ndep_formal; \ 178 /* Four bogus assignments, which hopefully gcc can */ \ 179 /* optimise away, and which stop it complaining about */ \ 180 /* unused variables. */ \ 181 SIGN_MASK = SIGN_MASK; \ 182 DATA_MASK = DATA_MASK; \ 183 CC_DEP2 = CC_DEP2; \ 184 CC_NDEP = CC_NDEP; 185 186 187 /*-------------------------------------------------------------*/ 188 189 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ 190 { \ 191 PREAMBLE(DATA_BITS); \ 192 { Long cf, pf, af, zf, sf, of; \ 193 Long argL, argR, res; \ 194 argL = CC_DEP1; \ 195 argR = CC_DEP2; \ 196 res = argL + argR; \ 197 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 198 pf = parity_table[(UChar)res]; \ 199 af = (res ^ argL ^ argR) & 0x10; \ 200 zf = ((DATA_UTYPE)res == 0) << 6; \ 201 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 202 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 203 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 204 return cf | pf | af | zf | sf | of; \ 205 } \ 206 } 207 208 /*-------------------------------------------------------------*/ 209 210 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ 211 { \ 212 PREAMBLE(DATA_BITS); \ 213 { Long cf, pf, af, zf, sf, of; \ 214 Long argL, argR, res; \ 215 argL = CC_DEP1; \ 216 argR = CC_DEP2; \ 217 res = argL - argR; \ 218 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 219 pf = parity_table[(UChar)res]; \ 220 af = (res ^ argL ^ argR) & 0x10; \ 221 zf = ((DATA_UTYPE)res == 0) << 6; \ 222 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 223 of = lshift((argL ^ argR) & (argL ^ res), \ 224 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 225 return cf | pf | af | zf | sf | of; \ 226 } \ 227 } 228 229 /*-------------------------------------------------------------*/ 230 231 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ 232 { \ 233 PREAMBLE(DATA_BITS); \ 234 { Long cf, pf, af, zf, sf, of; \ 235 Long argL, argR, oldC, res; \ 236 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 237 argL = CC_DEP1; \ 238 argR = CC_DEP2 ^ oldC; \ 239 res = (argL + argR) + oldC; \ 240 if (oldC) \ 241 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ 242 else \ 243 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 244 pf = parity_table[(UChar)res]; \ 245 af = (res ^ argL ^ argR) & 0x10; \ 246 zf = ((DATA_UTYPE)res == 0) << 6; \ 247 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 248 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 249 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 250 return cf | pf | af | zf | sf | of; \ 251 } \ 252 } 253 254 /*-------------------------------------------------------------*/ 255 256 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ 257 { \ 258 PREAMBLE(DATA_BITS); \ 259 { Long cf, pf, af, zf, sf, of; \ 260 Long argL, argR, oldC, res; \ 261 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 262 argL = CC_DEP1; \ 263 argR = CC_DEP2 ^ oldC; \ 264 res = (argL - argR) - oldC; \ 265 if (oldC) \ 266 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ 267 else \ 268 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 269 pf = parity_table[(UChar)res]; \ 270 af = (res ^ argL ^ argR) & 0x10; \ 271 zf = ((DATA_UTYPE)res == 0) << 6; \ 272 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 273 of = lshift((argL ^ argR) & (argL ^ res), \ 274 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 275 return cf | pf | af | zf | sf | of; \ 276 } \ 277 } 278 279 /*-------------------------------------------------------------*/ 280 281 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ 282 { \ 283 PREAMBLE(DATA_BITS); \ 284 { Long cf, pf, af, zf, sf, of; \ 285 cf = 0; \ 286 pf = parity_table[(UChar)CC_DEP1]; \ 287 af = 0; \ 288 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 289 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 290 of = 0; \ 291 return cf | pf | af | zf | sf | of; \ 292 } \ 293 } 294 295 /*-------------------------------------------------------------*/ 296 297 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ 298 { \ 299 PREAMBLE(DATA_BITS); \ 300 { Long cf, pf, af, zf, sf, of; \ 301 Long argL, argR, res; \ 302 res = CC_DEP1; \ 303 argL = res - 1; \ 304 argR = 1; \ 305 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 306 pf = parity_table[(UChar)res]; \ 307 af = (res ^ argL ^ argR) & 0x10; \ 308 zf = ((DATA_UTYPE)res == 0) << 6; \ 309 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 310 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ 311 return cf | pf | af | zf | sf | of; \ 312 } \ 313 } 314 315 /*-------------------------------------------------------------*/ 316 317 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ 318 { \ 319 PREAMBLE(DATA_BITS); \ 320 { Long cf, pf, af, zf, sf, of; \ 321 Long argL, argR, res; \ 322 res = CC_DEP1; \ 323 argL = res + 1; \ 324 argR = 1; \ 325 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 326 pf = parity_table[(UChar)res]; \ 327 af = (res ^ argL ^ argR) & 0x10; \ 328 zf = ((DATA_UTYPE)res == 0) << 6; \ 329 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 330 of = ((res & DATA_MASK) \ 331 == ((ULong)SIGN_MASK - 1)) << 11; \ 332 return cf | pf | af | zf | sf | of; \ 333 } \ 334 } 335 336 /*-------------------------------------------------------------*/ 337 338 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ 339 { \ 340 PREAMBLE(DATA_BITS); \ 341 { Long cf, pf, af, zf, sf, of; \ 342 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ 343 pf = parity_table[(UChar)CC_DEP1]; \ 344 af = 0; /* undefined */ \ 345 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 346 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 347 /* of is defined if shift count == 1 */ \ 348 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 349 & AMD64G_CC_MASK_O; \ 350 return cf | pf | af | zf | sf | of; \ 351 } \ 352 } 353 354 /*-------------------------------------------------------------*/ 355 356 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ 357 { \ 358 PREAMBLE(DATA_BITS); \ 359 { Long cf, pf, af, zf, sf, of; \ 360 cf = CC_DEP2 & 1; \ 361 pf = parity_table[(UChar)CC_DEP1]; \ 362 af = 0; /* undefined */ \ 363 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 364 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 365 /* of is defined if shift count == 1 */ \ 366 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 367 & AMD64G_CC_MASK_O; \ 368 return cf | pf | af | zf | sf | of; \ 369 } \ 370 } 371 372 /*-------------------------------------------------------------*/ 373 374 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ 375 /* DEP1 = result, NDEP = old flags */ 376 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ 377 { \ 378 PREAMBLE(DATA_BITS); \ 379 { Long fl \ 380 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 381 | (AMD64G_CC_MASK_C & CC_DEP1) \ 382 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 383 11-(DATA_BITS-1)) \ 384 ^ lshift(CC_DEP1, 11))); \ 385 return fl; \ 386 } \ 387 } 388 389 /*-------------------------------------------------------------*/ 390 391 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ 392 /* DEP1 = result, NDEP = old flags */ 393 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ 394 { \ 395 PREAMBLE(DATA_BITS); \ 396 { Long fl \ 397 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 398 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ 399 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 400 11-(DATA_BITS-1)) \ 401 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ 402 return fl; \ 403 } \ 404 } 405 406 /*-------------------------------------------------------------*/ 407 408 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ 409 DATA_U2TYPE, NARROWto2U) \ 410 { \ 411 PREAMBLE(DATA_BITS); \ 412 { Long cf, pf, af, zf, sf, of; \ 413 DATA_UTYPE hi; \ 414 DATA_UTYPE lo \ 415 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ 416 * ((DATA_UTYPE)CC_DEP2) ); \ 417 DATA_U2TYPE rr \ 418 = NARROWto2U( \ 419 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ 420 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ 421 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ 422 cf = (hi != 0); \ 423 pf = parity_table[(UChar)lo]; \ 424 af = 0; /* undefined */ \ 425 zf = (lo == 0) << 6; \ 426 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 427 of = cf << 11; \ 428 return cf | pf | af | zf | sf | of; \ 429 } \ 430 } 431 432 /*-------------------------------------------------------------*/ 433 434 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ 435 DATA_S2TYPE, NARROWto2S) \ 436 { \ 437 PREAMBLE(DATA_BITS); \ 438 { Long cf, pf, af, zf, sf, of; \ 439 DATA_STYPE hi; \ 440 DATA_STYPE lo \ 441 = NARROWtoS( ((DATA_STYPE)CC_DEP1) \ 442 * ((DATA_STYPE)CC_DEP2) ); \ 443 DATA_S2TYPE rr \ 444 = NARROWto2S( \ 445 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ 446 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ 447 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ 448 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ 449 pf = parity_table[(UChar)lo]; \ 450 af = 0; /* undefined */ \ 451 zf = (lo == 0) << 6; \ 452 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 453 of = cf << 11; \ 454 return cf | pf | af | zf | sf | of; \ 455 } \ 456 } 457 458 /*-------------------------------------------------------------*/ 459 460 #define ACTIONS_UMULQ \ 461 { \ 462 PREAMBLE(64); \ 463 { Long cf, pf, af, zf, sf, of; \ 464 ULong lo, hi; \ 465 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ 466 cf = (hi != 0); \ 467 pf = parity_table[(UChar)lo]; \ 468 af = 0; /* undefined */ \ 469 zf = (lo == 0) << 6; \ 470 sf = lshift(lo, 8 - 64) & 0x80; \ 471 of = cf << 11; \ 472 return cf | pf | af | zf | sf | of; \ 473 } \ 474 } 475 476 /*-------------------------------------------------------------*/ 477 478 #define ACTIONS_SMULQ \ 479 { \ 480 PREAMBLE(64); \ 481 { Long cf, pf, af, zf, sf, of; \ 482 Long lo, hi; \ 483 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ 484 cf = (hi != (lo >>/*s*/ (64-1))); \ 485 pf = parity_table[(UChar)lo]; \ 486 af = 0; /* undefined */ \ 487 zf = (lo == 0) << 6; \ 488 sf = lshift(lo, 8 - 64) & 0x80; \ 489 of = cf << 11; \ 490 return cf | pf | af | zf | sf | of; \ 491 } \ 492 } 493 494 495 #if PROFILE_RFLAGS 496 497 static Bool initted = False; 498 499 /* C flag, fast route */ 500 static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; 501 /* C flag, slow route */ 502 static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; 503 /* table for calculate_cond */ 504 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; 505 /* total entry counts for calc_all, calc_c, calc_cond. */ 506 static UInt n_calc_all = 0; 507 static UInt n_calc_c = 0; 508 static UInt n_calc_cond = 0; 509 510 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) 511 512 513 static void showCounts ( void ) 514 { 515 Int op, co; 516 Char ch; 517 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", 518 n_calc_all, n_calc_cond, n_calc_c); 519 520 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" 521 " S NS P NP L NL LE NLE\n"); 522 vex_printf(" -----------------------------------------------------" 523 "----------------------------------------\n"); 524 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 525 526 ch = ' '; 527 if (op > 0 && (op-1) % 4 == 0) 528 ch = 'B'; 529 if (op > 0 && (op-1) % 4 == 1) 530 ch = 'W'; 531 if (op > 0 && (op-1) % 4 == 2) 532 ch = 'L'; 533 if (op > 0 && (op-1) % 4 == 3) 534 ch = 'Q'; 535 536 vex_printf("%2d%c: ", op, ch); 537 vex_printf("%6u ", tabc_slow[op]); 538 vex_printf("%6u ", tabc_fast[op]); 539 for (co = 0; co < 16; co++) { 540 Int n = tab_cond[op][co]; 541 if (n >= 1000) { 542 vex_printf(" %3dK", n / 1000); 543 } else 544 if (n >= 0) { 545 vex_printf(" %3d ", n ); 546 } else { 547 vex_printf(" "); 548 } 549 } 550 vex_printf("\n"); 551 } 552 vex_printf("\n"); 553 } 554 555 static void initCounts ( void ) 556 { 557 Int op, co; 558 initted = True; 559 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 560 tabc_fast[op] = tabc_slow[op] = 0; 561 for (co = 0; co < 16; co++) 562 tab_cond[op][co] = 0; 563 } 564 } 565 566 #endif /* PROFILE_RFLAGS */ 567 568 569 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 570 /* Calculate all the 6 flags from the supplied thunk parameters. 571 Worker function, not directly called from generated code. */ 572 static 573 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, 574 ULong cc_dep1_formal, 575 ULong cc_dep2_formal, 576 ULong cc_ndep_formal ) 577 { 578 switch (cc_op) { 579 case AMD64G_CC_OP_COPY: 580 return cc_dep1_formal 581 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z 582 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); 583 584 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); 585 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); 586 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); 587 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); 588 589 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); 590 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); 591 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); 592 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); 593 594 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); 595 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); 596 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); 597 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); 598 599 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); 600 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); 601 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); 602 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); 603 604 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); 605 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); 606 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); 607 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); 608 609 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); 610 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); 611 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); 612 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); 613 614 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); 615 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); 616 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); 617 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); 618 619 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); 620 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); 621 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); 622 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); 623 624 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); 625 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); 626 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); 627 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); 628 629 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); 630 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); 631 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); 632 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); 633 634 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); 635 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); 636 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); 637 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); 638 639 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, 640 UShort, toUShort ); 641 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, 642 UInt, toUInt ); 643 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, 644 ULong, idULong ); 645 646 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; 647 648 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, 649 Short, toUShort ); 650 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, 651 Int, toUInt ); 652 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, 653 Long, idULong ); 654 655 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; 656 657 default: 658 /* shouldn't really make these calls from generated code */ 659 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" 660 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", 661 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); 662 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); 663 } 664 } 665 666 667 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 668 /* Calculate all the 6 flags from the supplied thunk parameters. */ 669 ULong amd64g_calculate_rflags_all ( ULong cc_op, 670 ULong cc_dep1, 671 ULong cc_dep2, 672 ULong cc_ndep ) 673 { 674 # if PROFILE_RFLAGS 675 if (!initted) initCounts(); 676 n_calc_all++; 677 if (SHOW_COUNTS_NOW) showCounts(); 678 # endif 679 return 680 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); 681 } 682 683 684 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 685 /* Calculate just the carry flag from the supplied thunk parameters. */ 686 ULong amd64g_calculate_rflags_c ( ULong cc_op, 687 ULong cc_dep1, 688 ULong cc_dep2, 689 ULong cc_ndep ) 690 { 691 # if PROFILE_RFLAGS 692 if (!initted) initCounts(); 693 n_calc_c++; 694 tabc_fast[cc_op]++; 695 if (SHOW_COUNTS_NOW) showCounts(); 696 # endif 697 698 /* Fast-case some common ones. */ 699 switch (cc_op) { 700 case AMD64G_CC_OP_COPY: 701 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; 702 case AMD64G_CC_OP_LOGICQ: 703 case AMD64G_CC_OP_LOGICL: 704 case AMD64G_CC_OP_LOGICW: 705 case AMD64G_CC_OP_LOGICB: 706 return 0; 707 // case AMD64G_CC_OP_SUBL: 708 // return ((UInt)cc_dep1) < ((UInt)cc_dep2) 709 // ? AMD64G_CC_MASK_C : 0; 710 // case AMD64G_CC_OP_SUBW: 711 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) 712 // ? AMD64G_CC_MASK_C : 0; 713 // case AMD64G_CC_OP_SUBB: 714 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) 715 // ? AMD64G_CC_MASK_C : 0; 716 // case AMD64G_CC_OP_INCL: 717 // case AMD64G_CC_OP_DECL: 718 // return cc_ndep & AMD64G_CC_MASK_C; 719 default: 720 break; 721 } 722 723 # if PROFILE_RFLAGS 724 tabc_fast[cc_op]--; 725 tabc_slow[cc_op]++; 726 # endif 727 728 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) 729 & AMD64G_CC_MASK_C; 730 } 731 732 733 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 734 /* returns 1 or 0 */ 735 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, 736 ULong cc_op, 737 ULong cc_dep1, 738 ULong cc_dep2, 739 ULong cc_ndep ) 740 { 741 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, 742 cc_dep2, cc_ndep); 743 ULong of,sf,zf,cf,pf; 744 ULong inv = cond & 1; 745 746 # if PROFILE_RFLAGS 747 if (!initted) initCounts(); 748 tab_cond[cc_op][cond]++; 749 n_calc_cond++; 750 if (SHOW_COUNTS_NOW) showCounts(); 751 # endif 752 753 switch (cond) { 754 case AMD64CondNO: 755 case AMD64CondO: /* OF == 1 */ 756 of = rflags >> AMD64G_CC_SHIFT_O; 757 return 1 & (inv ^ of); 758 759 case AMD64CondNZ: 760 case AMD64CondZ: /* ZF == 1 */ 761 zf = rflags >> AMD64G_CC_SHIFT_Z; 762 return 1 & (inv ^ zf); 763 764 case AMD64CondNB: 765 case AMD64CondB: /* CF == 1 */ 766 cf = rflags >> AMD64G_CC_SHIFT_C; 767 return 1 & (inv ^ cf); 768 break; 769 770 case AMD64CondNBE: 771 case AMD64CondBE: /* (CF or ZF) == 1 */ 772 cf = rflags >> AMD64G_CC_SHIFT_C; 773 zf = rflags >> AMD64G_CC_SHIFT_Z; 774 return 1 & (inv ^ (cf | zf)); 775 break; 776 777 case AMD64CondNS: 778 case AMD64CondS: /* SF == 1 */ 779 sf = rflags >> AMD64G_CC_SHIFT_S; 780 return 1 & (inv ^ sf); 781 782 case AMD64CondNP: 783 case AMD64CondP: /* PF == 1 */ 784 pf = rflags >> AMD64G_CC_SHIFT_P; 785 return 1 & (inv ^ pf); 786 787 case AMD64CondNL: 788 case AMD64CondL: /* (SF xor OF) == 1 */ 789 sf = rflags >> AMD64G_CC_SHIFT_S; 790 of = rflags >> AMD64G_CC_SHIFT_O; 791 return 1 & (inv ^ (sf ^ of)); 792 break; 793 794 case AMD64CondNLE: 795 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ 796 sf = rflags >> AMD64G_CC_SHIFT_S; 797 of = rflags >> AMD64G_CC_SHIFT_O; 798 zf = rflags >> AMD64G_CC_SHIFT_Z; 799 return 1 & (inv ^ ((sf ^ of) | zf)); 800 break; 801 802 default: 803 /* shouldn't really make these calls from generated code */ 804 vex_printf("amd64g_calculate_condition" 805 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", 806 cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); 807 vpanic("amd64g_calculate_condition"); 808 } 809 } 810 811 812 /* VISIBLE TO LIBVEX CLIENT */ 813 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state ) 814 { 815 ULong rflags = amd64g_calculate_rflags_all_WRK( 816 vex_state->guest_CC_OP, 817 vex_state->guest_CC_DEP1, 818 vex_state->guest_CC_DEP2, 819 vex_state->guest_CC_NDEP 820 ); 821 Long dflag = vex_state->guest_DFLAG; 822 vassert(dflag == 1 || dflag == -1); 823 if (dflag == -1) 824 rflags |= (1<<10); 825 if (vex_state->guest_IDFLAG == 1) 826 rflags |= (1<<21); 827 if (vex_state->guest_ACFLAG == 1) 828 rflags |= (1<<18); 829 830 return rflags; 831 } 832 833 /* VISIBLE TO LIBVEX CLIENT */ 834 void 835 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, 836 /*MOD*/VexGuestAMD64State* vex_state ) 837 { 838 ULong oszacp = amd64g_calculate_rflags_all_WRK( 839 vex_state->guest_CC_OP, 840 vex_state->guest_CC_DEP1, 841 vex_state->guest_CC_DEP2, 842 vex_state->guest_CC_NDEP 843 ); 844 if (new_carry_flag & 1) { 845 oszacp |= AMD64G_CC_MASK_C; 846 } else { 847 oszacp &= ~AMD64G_CC_MASK_C; 848 } 849 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 850 vex_state->guest_CC_DEP1 = oszacp; 851 vex_state->guest_CC_DEP2 = 0; 852 vex_state->guest_CC_NDEP = 0; 853 } 854 855 856 /*---------------------------------------------------------------*/ 857 /*--- %rflags translation-time function specialisers. ---*/ 858 /*--- These help iropt specialise calls the above run-time ---*/ 859 /*--- %rflags functions. ---*/ 860 /*---------------------------------------------------------------*/ 861 862 /* Used by the optimiser to try specialisations. Returns an 863 equivalent expression, or NULL if none. */ 864 865 static Bool isU64 ( IRExpr* e, ULong n ) 866 { 867 return toBool( e->tag == Iex_Const 868 && e->Iex.Const.con->tag == Ico_U64 869 && e->Iex.Const.con->Ico.U64 == n ); 870 } 871 872 IRExpr* guest_amd64_spechelper ( HChar* function_name, 873 IRExpr** args, 874 IRStmt** precedingStmts, 875 Int n_precedingStmts ) 876 { 877 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) 878 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) 879 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 880 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 881 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 882 883 Int i, arity = 0; 884 for (i = 0; args[i]; i++) 885 arity++; 886 # if 0 887 vex_printf("spec request:\n"); 888 vex_printf(" %s ", function_name); 889 for (i = 0; i < arity; i++) { 890 vex_printf(" "); 891 ppIRExpr(args[i]); 892 } 893 vex_printf("\n"); 894 # endif 895 896 /* --------- specialising "amd64g_calculate_condition" --------- */ 897 898 if (vex_streq(function_name, "amd64g_calculate_condition")) { 899 /* specialise calls to above "calculate condition" function */ 900 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; 901 vassert(arity == 5); 902 cond = args[0]; 903 cc_op = args[1]; 904 cc_dep1 = args[2]; 905 cc_dep2 = args[3]; 906 907 /*---------------- ADDQ ----------------*/ 908 909 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { 910 /* long long add, then Z --> test (dst+src == 0) */ 911 return unop(Iop_1Uto64, 912 binop(Iop_CmpEQ64, 913 binop(Iop_Add64, cc_dep1, cc_dep2), 914 mkU64(0))); 915 } 916 917 /*---------------- SUBQ ----------------*/ 918 919 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { 920 /* long long sub/cmp, then Z --> test dst==src */ 921 return unop(Iop_1Uto64, 922 binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); 923 } 924 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { 925 /* long long sub/cmp, then NZ --> test dst!=src */ 926 return unop(Iop_1Uto64, 927 binop(Iop_CmpNE64,cc_dep1,cc_dep2)); 928 } 929 930 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { 931 /* long long sub/cmp, then L (signed less than) 932 --> test dst <s src */ 933 return unop(Iop_1Uto64, 934 binop(Iop_CmpLT64S, cc_dep1, cc_dep2)); 935 } 936 937 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) { 938 /* long long sub/cmp, then B (unsigned less than) 939 --> test dst <u src */ 940 return unop(Iop_1Uto64, 941 binop(Iop_CmpLT64U, cc_dep1, cc_dep2)); 942 } 943 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) { 944 /* long long sub/cmp, then NB (unsigned greater than or equal) 945 --> test src <=u dst */ 946 /* Note, args are opposite way round from the usual */ 947 return unop(Iop_1Uto64, 948 binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); 949 } 950 951 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { 952 /* long long sub/cmp, then BE (unsigned less than or equal) 953 --> test dst <=u src */ 954 return unop(Iop_1Uto64, 955 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); 956 } 957 958 /*---------------- SUBL ----------------*/ 959 960 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { 961 /* long sub/cmp, then Z --> test dst==src */ 962 return unop(Iop_1Uto64, 963 binop(Iop_CmpEQ32, 964 unop(Iop_64to32, cc_dep1), 965 unop(Iop_64to32, cc_dep2))); 966 } 967 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { 968 /* long sub/cmp, then NZ --> test dst!=src */ 969 return unop(Iop_1Uto64, 970 binop(Iop_CmpNE32, 971 unop(Iop_64to32, cc_dep1), 972 unop(Iop_64to32, cc_dep2))); 973 } 974 975 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { 976 /* long sub/cmp, then L (signed less than) 977 --> test dst <s src */ 978 return unop(Iop_1Uto64, 979 binop(Iop_CmpLT32S, 980 unop(Iop_64to32, cc_dep1), 981 unop(Iop_64to32, cc_dep2))); 982 } 983 984 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { 985 /* long sub/cmp, then LE (signed less than or equal) 986 --> test dst <=s src */ 987 return unop(Iop_1Uto64, 988 binop(Iop_CmpLE32S, 989 unop(Iop_64to32, cc_dep1), 990 unop(Iop_64to32, cc_dep2))); 991 992 } 993 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) { 994 /* long sub/cmp, then NLE (signed greater than) 995 --> test !(dst <=s src) 996 --> test (dst >s src) 997 --> test (src <s dst) */ 998 return unop(Iop_1Uto64, 999 binop(Iop_CmpLT32S, 1000 unop(Iop_64to32, cc_dep2), 1001 unop(Iop_64to32, cc_dep1))); 1002 1003 } 1004 1005 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { 1006 /* long sub/cmp, then BE (unsigned less than or equal) 1007 --> test dst <=u src */ 1008 return unop(Iop_1Uto64, 1009 binop(Iop_CmpLE32U, 1010 unop(Iop_64to32, cc_dep1), 1011 unop(Iop_64to32, cc_dep2))); 1012 } 1013 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { 1014 /* long sub/cmp, then NBE (unsigned greater than) 1015 --> test src <u dst */ 1016 /* Note, args are opposite way round from the usual */ 1017 return unop(Iop_1Uto64, 1018 binop(Iop_CmpLT32U, 1019 unop(Iop_64to32, cc_dep2), 1020 unop(Iop_64to32, cc_dep1))); 1021 } 1022 1023 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) { 1024 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */ 1025 return unop(Iop_1Uto64, 1026 binop(Iop_CmpLT32S, 1027 binop(Iop_Sub32, 1028 unop(Iop_64to32, cc_dep1), 1029 unop(Iop_64to32, cc_dep2)), 1030 mkU32(0))); 1031 } 1032 1033 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) { 1034 /* long sub/cmp, then B (unsigned less than) 1035 --> test dst <u src */ 1036 return unop(Iop_1Uto64, 1037 binop(Iop_CmpLT32U, 1038 unop(Iop_64to32, cc_dep1), 1039 unop(Iop_64to32, cc_dep2))); 1040 } 1041 1042 /*---------------- SUBW ----------------*/ 1043 1044 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { 1045 /* word sub/cmp, then Z --> test dst==src */ 1046 return unop(Iop_1Uto64, 1047 binop(Iop_CmpEQ16, 1048 unop(Iop_64to16,cc_dep1), 1049 unop(Iop_64to16,cc_dep2))); 1050 } 1051 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { 1052 /* word sub/cmp, then NZ --> test dst!=src */ 1053 return unop(Iop_1Uto64, 1054 binop(Iop_CmpNE16, 1055 unop(Iop_64to16,cc_dep1), 1056 unop(Iop_64to16,cc_dep2))); 1057 } 1058 1059 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { 1060 /* word sub/cmp, then LE (signed less than or equal) 1061 --> test dst <=s src */ 1062 return unop(Iop_1Uto64, 1063 binop(Iop_CmpLE64S, 1064 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1065 binop(Iop_Shl64,cc_dep2,mkU8(48)))); 1066 1067 } 1068 1069 /*---------------- SUBB ----------------*/ 1070 1071 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { 1072 /* byte sub/cmp, then Z --> test dst==src */ 1073 return unop(Iop_1Uto64, 1074 binop(Iop_CmpEQ8, 1075 unop(Iop_64to8,cc_dep1), 1076 unop(Iop_64to8,cc_dep2))); 1077 } 1078 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { 1079 /* byte sub/cmp, then NZ --> test dst!=src */ 1080 return unop(Iop_1Uto64, 1081 binop(Iop_CmpNE8, 1082 unop(Iop_64to8,cc_dep1), 1083 unop(Iop_64to8,cc_dep2))); 1084 } 1085 1086 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) { 1087 /* byte sub/cmp, then BE (unsigned less than or equal) 1088 --> test dst <=u src */ 1089 return unop(Iop_1Uto64, 1090 binop(Iop_CmpLE64U, 1091 binop(Iop_And64, cc_dep1, mkU64(0xFF)), 1092 binop(Iop_And64, cc_dep2, mkU64(0xFF)))); 1093 } 1094 1095 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) 1096 && isU64(cc_dep2, 0)) { 1097 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) 1098 --> test dst <s 0 1099 --> (ULong)dst[7] 1100 This is yet another scheme by which gcc figures out if the 1101 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ 1102 /* Note: isU64(cc_dep2, 0) is correct, even though this is 1103 for an 8-bit comparison, since the args to the helper 1104 function are always U64s. */ 1105 return binop(Iop_And64, 1106 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1107 mkU64(1)); 1108 } 1109 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) 1110 && isU64(cc_dep2, 0)) { 1111 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) 1112 --> test !(dst <s 0) 1113 --> (ULong) !dst[7] 1114 */ 1115 return binop(Iop_Xor64, 1116 binop(Iop_And64, 1117 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1118 mkU64(1)), 1119 mkU64(1)); 1120 } 1121 1122 /*---------------- LOGICQ ----------------*/ 1123 1124 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { 1125 /* long long and/or/xor, then Z --> test dst==0 */ 1126 return unop(Iop_1Uto64, 1127 binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); 1128 } 1129 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) { 1130 /* long long and/or/xor, then NZ --> test dst!=0 */ 1131 return unop(Iop_1Uto64, 1132 binop(Iop_CmpNE64, cc_dep1, mkU64(0))); 1133 } 1134 1135 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { 1136 /* long long and/or/xor, then L 1137 LOGIC sets SF and ZF according to the 1138 result and makes OF be zero. L computes SF ^ OF, but 1139 OF is zero, so this reduces to SF -- which will be 1 iff 1140 the result is < signed 0. Hence ... 1141 */ 1142 return unop(Iop_1Uto64, 1143 binop(Iop_CmpLT64S, 1144 cc_dep1, 1145 mkU64(0))); 1146 } 1147 1148 /*---------------- LOGICL ----------------*/ 1149 1150 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { 1151 /* long and/or/xor, then Z --> test dst==0 */ 1152 return unop(Iop_1Uto64, 1153 binop(Iop_CmpEQ32, 1154 unop(Iop_64to32, cc_dep1), 1155 mkU32(0))); 1156 } 1157 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { 1158 /* long and/or/xor, then NZ --> test dst!=0 */ 1159 return unop(Iop_1Uto64, 1160 binop(Iop_CmpNE32, 1161 unop(Iop_64to32, cc_dep1), 1162 mkU32(0))); 1163 } 1164 1165 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { 1166 /* long and/or/xor, then LE 1167 This is pretty subtle. LOGIC sets SF and ZF according to the 1168 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but 1169 OF is zero, so this reduces to SF | ZF -- which will be 1 iff 1170 the result is <=signed 0. Hence ... 1171 */ 1172 return unop(Iop_1Uto64, 1173 binop(Iop_CmpLE32S, 1174 unop(Iop_64to32, cc_dep1), 1175 mkU32(0))); 1176 } 1177 1178 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) { 1179 /* long and/or/xor, then S --> (ULong)result[31] */ 1180 return binop(Iop_And64, 1181 binop(Iop_Shr64, cc_dep1, mkU8(31)), 1182 mkU64(1)); 1183 } 1184 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) { 1185 /* long and/or/xor, then S --> (ULong) ~ result[31] */ 1186 return binop(Iop_Xor64, 1187 binop(Iop_And64, 1188 binop(Iop_Shr64, cc_dep1, mkU8(31)), 1189 mkU64(1)), 1190 mkU64(1)); 1191 } 1192 1193 /*---------------- LOGICB ----------------*/ 1194 1195 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { 1196 /* byte and/or/xor, then Z --> test dst==0 */ 1197 return unop(Iop_1Uto64, 1198 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), 1199 mkU64(0))); 1200 } 1201 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) { 1202 /* byte and/or/xor, then NZ --> test dst!=0 */ 1203 return unop(Iop_1Uto64, 1204 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)), 1205 mkU64(0))); 1206 } 1207 1208 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { 1209 /* this is an idiom gcc sometimes uses to find out if the top 1210 bit of a byte register is set: eg testb %al,%al; js .. 1211 Since it just depends on the top bit of the byte, extract 1212 that bit and explicitly get rid of all the rest. This 1213 helps memcheck avoid false positives in the case where any 1214 of the other bits in the byte are undefined. */ 1215 /* byte and/or/xor, then S --> (UInt)result[7] */ 1216 return binop(Iop_And64, 1217 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1218 mkU64(1)); 1219 } 1220 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) { 1221 /* byte and/or/xor, then NS --> (UInt)!result[7] */ 1222 return binop(Iop_Xor64, 1223 binop(Iop_And64, 1224 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1225 mkU64(1)), 1226 mkU64(1)); 1227 } 1228 1229 /*---------------- INCB ----------------*/ 1230 1231 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { 1232 /* 8-bit inc, then LE --> sign bit of the arg */ 1233 return binop(Iop_And64, 1234 binop(Iop_Shr64, 1235 binop(Iop_Sub64, cc_dep1, mkU64(1)), 1236 mkU8(7)), 1237 mkU64(1)); 1238 } 1239 1240 /*---------------- INCW ----------------*/ 1241 1242 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { 1243 /* 16-bit inc, then Z --> test dst == 0 */ 1244 return unop(Iop_1Uto64, 1245 binop(Iop_CmpEQ64, 1246 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1247 mkU64(0))); 1248 } 1249 1250 /*---------------- DECL ----------------*/ 1251 1252 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { 1253 /* dec L, then Z --> test dst == 0 */ 1254 return unop(Iop_1Uto64, 1255 binop(Iop_CmpEQ32, 1256 unop(Iop_64to32, cc_dep1), 1257 mkU32(0))); 1258 } 1259 1260 /*---------------- DECW ----------------*/ 1261 1262 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { 1263 /* 16-bit dec, then NZ --> test dst != 0 */ 1264 return unop(Iop_1Uto64, 1265 binop(Iop_CmpNE64, 1266 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1267 mkU64(0))); 1268 } 1269 1270 /*---------------- COPY ----------------*/ 1271 /* This can happen, as a result of amd64 FP compares: "comisd ... ; 1272 jbe" for example. */ 1273 1274 if (isU64(cc_op, AMD64G_CC_OP_COPY) && 1275 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { 1276 /* COPY, then BE --> extract C and Z from dep1, and test (C 1277 or Z == 1). */ 1278 /* COPY, then NBE --> extract C and Z from dep1, and test (C 1279 or Z == 0). */ 1280 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; 1281 return 1282 unop( 1283 Iop_1Uto64, 1284 binop( 1285 Iop_CmpEQ64, 1286 binop( 1287 Iop_And64, 1288 binop( 1289 Iop_Or64, 1290 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1291 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) 1292 ), 1293 mkU64(1) 1294 ), 1295 mkU64(nnn) 1296 ) 1297 ); 1298 } 1299 1300 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { 1301 /* COPY, then B --> extract C dep1, and test (C == 1). */ 1302 return 1303 unop( 1304 Iop_1Uto64, 1305 binop( 1306 Iop_CmpNE64, 1307 binop( 1308 Iop_And64, 1309 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1310 mkU64(1) 1311 ), 1312 mkU64(0) 1313 ) 1314 ); 1315 } 1316 1317 if (isU64(cc_op, AMD64G_CC_OP_COPY) 1318 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { 1319 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ 1320 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ 1321 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; 1322 return 1323 unop( 1324 Iop_1Uto64, 1325 binop( 1326 Iop_CmpEQ64, 1327 binop( 1328 Iop_And64, 1329 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), 1330 mkU64(1) 1331 ), 1332 mkU64(nnn) 1333 ) 1334 ); 1335 } 1336 1337 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { 1338 /* COPY, then P --> extract P from dep1, and test (P == 1). */ 1339 return 1340 unop( 1341 Iop_1Uto64, 1342 binop( 1343 Iop_CmpNE64, 1344 binop( 1345 Iop_And64, 1346 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), 1347 mkU64(1) 1348 ), 1349 mkU64(0) 1350 ) 1351 ); 1352 } 1353 1354 return NULL; 1355 } 1356 1357 /* --------- specialising "amd64g_calculate_rflags_c" --------- */ 1358 1359 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { 1360 /* specialise calls to above "calculate_rflags_c" function */ 1361 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; 1362 vassert(arity == 4); 1363 cc_op = args[0]; 1364 cc_dep1 = args[1]; 1365 cc_dep2 = args[2]; 1366 cc_ndep = args[3]; 1367 1368 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { 1369 /* C after sub denotes unsigned less than */ 1370 return unop(Iop_1Uto64, 1371 binop(Iop_CmpLT64U, 1372 cc_dep1, 1373 cc_dep2)); 1374 } 1375 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { 1376 /* C after sub denotes unsigned less than */ 1377 return unop(Iop_1Uto64, 1378 binop(Iop_CmpLT32U, 1379 unop(Iop_64to32, cc_dep1), 1380 unop(Iop_64to32, cc_dep2))); 1381 } 1382 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { 1383 /* C after sub denotes unsigned less than */ 1384 return unop(Iop_1Uto64, 1385 binop(Iop_CmpLT64U, 1386 binop(Iop_And64,cc_dep1,mkU64(0xFF)), 1387 binop(Iop_And64,cc_dep2,mkU64(0xFF)))); 1388 } 1389 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) 1390 || isU64(cc_op, AMD64G_CC_OP_LOGICL) 1391 || isU64(cc_op, AMD64G_CC_OP_LOGICW) 1392 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { 1393 /* cflag after logic is zero */ 1394 return mkU64(0); 1395 } 1396 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) 1397 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { 1398 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ 1399 return cc_ndep; 1400 } 1401 1402 # if 0 1403 if (cc_op->tag == Iex_Const) { 1404 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); 1405 } 1406 # endif 1407 1408 return NULL; 1409 } 1410 1411 # undef unop 1412 # undef binop 1413 # undef mkU64 1414 # undef mkU32 1415 # undef mkU8 1416 1417 return NULL; 1418 } 1419 1420 1421 /*---------------------------------------------------------------*/ 1422 /*--- Supporting functions for x87 FPU activities. ---*/ 1423 /*---------------------------------------------------------------*/ 1424 1425 static inline Bool host_is_little_endian ( void ) 1426 { 1427 UInt x = 0x76543210; 1428 UChar* p = (UChar*)(&x); 1429 return toBool(*p == 0x10); 1430 } 1431 1432 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ 1433 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1434 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) 1435 { 1436 Bool mantissaIsZero; 1437 Int bexp; 1438 UChar sign; 1439 UChar* f64; 1440 1441 vassert(host_is_little_endian()); 1442 1443 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ 1444 1445 f64 = (UChar*)(&dbl); 1446 sign = toUChar( (f64[7] >> 7) & 1 ); 1447 1448 /* First off, if the tag indicates the register was empty, 1449 return 1,0,sign,1 */ 1450 if (tag == 0) { 1451 /* vex_printf("Empty\n"); */ 1452 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) 1453 | AMD64G_FC_MASK_C0; 1454 } 1455 1456 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 1457 bexp &= 0x7FF; 1458 1459 mantissaIsZero 1460 = toBool( 1461 (f64[6] & 0x0F) == 0 1462 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 1463 ); 1464 1465 /* If both exponent and mantissa are zero, the value is zero. 1466 Return 1,0,sign,0. */ 1467 if (bexp == 0 && mantissaIsZero) { 1468 /* vex_printf("Zero\n"); */ 1469 return AMD64G_FC_MASK_C3 | 0 1470 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1471 } 1472 1473 /* If exponent is zero but mantissa isn't, it's a denormal. 1474 Return 1,1,sign,0. */ 1475 if (bexp == 0 && !mantissaIsZero) { 1476 /* vex_printf("Denormal\n"); */ 1477 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 1478 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1479 } 1480 1481 /* If the exponent is 7FF and the mantissa is zero, this is an infinity. 1482 Return 0,1,sign,1. */ 1483 if (bexp == 0x7FF && mantissaIsZero) { 1484 /* vex_printf("Inf\n"); */ 1485 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) 1486 | AMD64G_FC_MASK_C0; 1487 } 1488 1489 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. 1490 Return 0,0,sign,1. */ 1491 if (bexp == 0x7FF && !mantissaIsZero) { 1492 /* vex_printf("NaN\n"); */ 1493 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; 1494 } 1495 1496 /* Uh, ok, we give up. It must be a normal finite number. 1497 Return 0,1,sign,0. 1498 */ 1499 /* vex_printf("normal\n"); */ 1500 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1501 } 1502 1503 1504 /* This is used to implement both 'frstor' and 'fldenv'. The latter 1505 appears to differ from the former only in that the 8 FP registers 1506 themselves are not transferred into the guest state. */ 1507 static 1508 VexEmWarn do_put_x87 ( Bool moveRegs, 1509 /*IN*/UChar* x87_state, 1510 /*OUT*/VexGuestAMD64State* vex_state ) 1511 { 1512 Int stno, preg; 1513 UInt tag; 1514 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1515 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1516 Fpu_State* x87 = (Fpu_State*)x87_state; 1517 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; 1518 UInt tagw = x87->env[FP_ENV_TAG]; 1519 UInt fpucw = x87->env[FP_ENV_CTRL]; 1520 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; 1521 VexEmWarn ew; 1522 UInt fpround; 1523 ULong pair; 1524 1525 /* Copy registers and tags */ 1526 for (stno = 0; stno < 8; stno++) { 1527 preg = (stno + ftop) & 7; 1528 tag = (tagw >> (2*preg)) & 3; 1529 if (tag == 3) { 1530 /* register is empty */ 1531 /* hmm, if it's empty, does it still get written? Probably 1532 safer to say it does. If we don't, memcheck could get out 1533 of sync, in that it thinks all FP registers are defined by 1534 this helper, but in reality some have not been updated. */ 1535 if (moveRegs) 1536 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 1537 vexTags[preg] = 0; 1538 } else { 1539 /* register is non-empty */ 1540 if (moveRegs) 1541 convert_f80le_to_f64le( &x87->reg[10*stno], 1542 (UChar*)&vexRegs[preg] ); 1543 vexTags[preg] = 1; 1544 } 1545 } 1546 1547 /* stack pointer */ 1548 vex_state->guest_FTOP = ftop; 1549 1550 /* status word */ 1551 vex_state->guest_FC3210 = c3210; 1552 1553 /* handle the control word, setting FPROUND and detecting any 1554 emulation warnings. */ 1555 pair = amd64g_check_fldcw ( (ULong)fpucw ); 1556 fpround = (UInt)pair; 1557 ew = (VexEmWarn)(pair >> 32); 1558 1559 vex_state->guest_FPROUND = fpround & 3; 1560 1561 /* emulation warnings --> caller */ 1562 return ew; 1563 } 1564 1565 1566 /* Create an x87 FPU state from the guest state, as close as 1567 we can approximate it. */ 1568 static 1569 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, 1570 /*OUT*/UChar* x87_state ) 1571 { 1572 Int i, stno, preg; 1573 UInt tagw; 1574 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1575 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1576 Fpu_State* x87 = (Fpu_State*)x87_state; 1577 UInt ftop = vex_state->guest_FTOP; 1578 UInt c3210 = vex_state->guest_FC3210; 1579 1580 for (i = 0; i < 14; i++) 1581 x87->env[i] = 0; 1582 1583 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 1584 x87->env[FP_ENV_STAT] 1585 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 1586 x87->env[FP_ENV_CTRL] 1587 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); 1588 1589 /* Dump the register stack in ST order. */ 1590 tagw = 0; 1591 for (stno = 0; stno < 8; stno++) { 1592 preg = (stno + ftop) & 7; 1593 if (vexTags[preg] == 0) { 1594 /* register is empty */ 1595 tagw |= (3 << (2*preg)); 1596 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1597 &x87->reg[10*stno] ); 1598 } else { 1599 /* register is full. */ 1600 tagw |= (0 << (2*preg)); 1601 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1602 &x87->reg[10*stno] ); 1603 } 1604 } 1605 x87->env[FP_ENV_TAG] = toUShort(tagw); 1606 } 1607 1608 1609 /* CALLED FROM GENERATED CODE */ 1610 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1611 /* NOTE: only handles 32-bit format (no REX.W on the insn) */ 1612 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) 1613 { 1614 /* Derived from values obtained from 1615 vendor_id : AuthenticAMD 1616 cpu family : 15 1617 model : 12 1618 model name : AMD Athlon(tm) 64 Processor 3200+ 1619 stepping : 0 1620 cpu MHz : 2200.000 1621 cache size : 512 KB 1622 */ 1623 /* Somewhat roundabout, but at least it's simple. */ 1624 Fpu_State tmp; 1625 UShort* addrS = (UShort*)addr; 1626 UChar* addrC = (UChar*)addr; 1627 U128* xmm = (U128*)(addr + 160); 1628 UInt mxcsr; 1629 UShort fp_tags; 1630 UInt summary_tags; 1631 Int r, stno; 1632 UShort *srcS, *dstS; 1633 1634 do_get_x87( gst, (UChar*)&tmp ); 1635 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); 1636 1637 /* Now build the proper fxsave image from the x87 image we just 1638 made. */ 1639 1640 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ 1641 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ 1642 1643 /* set addrS[2] in an endian-independent way */ 1644 summary_tags = 0; 1645 fp_tags = tmp.env[FP_ENV_TAG]; 1646 for (r = 0; r < 8; r++) { 1647 if ( ((fp_tags >> (2*r)) & 3) != 3 ) 1648 summary_tags |= (1 << r); 1649 } 1650 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ 1651 addrC[5] = 0; /* pad */ 1652 1653 /* FOP: faulting fpu opcode. From experimentation, the real CPU 1654 does not write this field. (?!) */ 1655 addrS[3] = 0; /* BOGUS */ 1656 1657 /* RIP (Last x87 instruction pointer). From experimentation, the 1658 real CPU does not write this field. (?!) */ 1659 addrS[4] = 0; /* BOGUS */ 1660 addrS[5] = 0; /* BOGUS */ 1661 addrS[6] = 0; /* BOGUS */ 1662 addrS[7] = 0; /* BOGUS */ 1663 1664 /* RDP (Last x87 data pointer). From experimentation, the real CPU 1665 does not write this field. (?!) */ 1666 addrS[8] = 0; /* BOGUS */ 1667 addrS[9] = 0; /* BOGUS */ 1668 addrS[10] = 0; /* BOGUS */ 1669 addrS[11] = 0; /* BOGUS */ 1670 1671 addrS[12] = toUShort(mxcsr); /* MXCSR */ 1672 addrS[13] = toUShort(mxcsr >> 16); 1673 1674 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ 1675 addrS[15] = 0x0000; /* MXCSR mask (hi16) */ 1676 1677 /* Copy in the FP registers, in ST order. */ 1678 for (stno = 0; stno < 8; stno++) { 1679 srcS = (UShort*)(&tmp.reg[10*stno]); 1680 dstS = (UShort*)(&addrS[16 + 8*stno]); 1681 dstS[0] = srcS[0]; 1682 dstS[1] = srcS[1]; 1683 dstS[2] = srcS[2]; 1684 dstS[3] = srcS[3]; 1685 dstS[4] = srcS[4]; 1686 dstS[5] = 0; 1687 dstS[6] = 0; 1688 dstS[7] = 0; 1689 } 1690 1691 /* That's the first 160 bytes of the image done. Now only %xmm0 1692 .. %xmm15 remain to be copied. If the host is big-endian, these 1693 need to be byte-swapped. */ 1694 vassert(host_is_little_endian()); 1695 1696 # define COPY_U128(_dst,_src) \ 1697 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1698 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1699 while (0) 1700 1701 COPY_U128( xmm[0], gst->guest_XMM0 ); 1702 COPY_U128( xmm[1], gst->guest_XMM1 ); 1703 COPY_U128( xmm[2], gst->guest_XMM2 ); 1704 COPY_U128( xmm[3], gst->guest_XMM3 ); 1705 COPY_U128( xmm[4], gst->guest_XMM4 ); 1706 COPY_U128( xmm[5], gst->guest_XMM5 ); 1707 COPY_U128( xmm[6], gst->guest_XMM6 ); 1708 COPY_U128( xmm[7], gst->guest_XMM7 ); 1709 COPY_U128( xmm[8], gst->guest_XMM8 ); 1710 COPY_U128( xmm[9], gst->guest_XMM9 ); 1711 COPY_U128( xmm[10], gst->guest_XMM10 ); 1712 COPY_U128( xmm[11], gst->guest_XMM11 ); 1713 COPY_U128( xmm[12], gst->guest_XMM12 ); 1714 COPY_U128( xmm[13], gst->guest_XMM13 ); 1715 COPY_U128( xmm[14], gst->guest_XMM14 ); 1716 COPY_U128( xmm[15], gst->guest_XMM15 ); 1717 1718 # undef COPY_U128 1719 } 1720 1721 1722 /* CALLED FROM GENERATED CODE */ 1723 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1724 VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr ) 1725 { 1726 Fpu_State tmp; 1727 VexEmWarn warnX87 = EmWarn_NONE; 1728 VexEmWarn warnXMM = EmWarn_NONE; 1729 UShort* addrS = (UShort*)addr; 1730 UChar* addrC = (UChar*)addr; 1731 U128* xmm = (U128*)(addr + 160); 1732 UShort fp_tags; 1733 Int r, stno, i; 1734 1735 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need 1736 to be byte-swapped. */ 1737 vassert(host_is_little_endian()); 1738 1739 # define COPY_U128(_dst,_src) \ 1740 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1741 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1742 while (0) 1743 1744 COPY_U128( gst->guest_XMM0, xmm[0] ); 1745 COPY_U128( gst->guest_XMM1, xmm[1] ); 1746 COPY_U128( gst->guest_XMM2, xmm[2] ); 1747 COPY_U128( gst->guest_XMM3, xmm[3] ); 1748 COPY_U128( gst->guest_XMM4, xmm[4] ); 1749 COPY_U128( gst->guest_XMM5, xmm[5] ); 1750 COPY_U128( gst->guest_XMM6, xmm[6] ); 1751 COPY_U128( gst->guest_XMM7, xmm[7] ); 1752 COPY_U128( gst->guest_XMM8, xmm[8] ); 1753 COPY_U128( gst->guest_XMM9, xmm[9] ); 1754 COPY_U128( gst->guest_XMM10, xmm[10] ); 1755 COPY_U128( gst->guest_XMM11, xmm[11] ); 1756 COPY_U128( gst->guest_XMM12, xmm[12] ); 1757 COPY_U128( gst->guest_XMM13, xmm[13] ); 1758 COPY_U128( gst->guest_XMM14, xmm[14] ); 1759 COPY_U128( gst->guest_XMM15, xmm[15] ); 1760 1761 # undef COPY_U128 1762 1763 /* Copy the x87 registers out of the image, into a temporary 1764 Fpu_State struct. */ 1765 for (i = 0; i < 14; i++) tmp.env[i] = 0; 1766 for (i = 0; i < 80; i++) tmp.reg[i] = 0; 1767 /* fill in tmp.reg[0..7] */ 1768 for (stno = 0; stno < 8; stno++) { 1769 UShort* dstS = (UShort*)(&tmp.reg[10*stno]); 1770 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); 1771 dstS[0] = srcS[0]; 1772 dstS[1] = srcS[1]; 1773 dstS[2] = srcS[2]; 1774 dstS[3] = srcS[3]; 1775 dstS[4] = srcS[4]; 1776 } 1777 /* fill in tmp.env[0..13] */ 1778 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ 1779 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ 1780 1781 fp_tags = 0; 1782 for (r = 0; r < 8; r++) { 1783 if (addrC[4] & (1<<r)) 1784 fp_tags |= (0 << (2*r)); /* EMPTY */ 1785 else 1786 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ 1787 } 1788 tmp.env[FP_ENV_TAG] = fp_tags; 1789 1790 /* Now write 'tmp' into the guest state. */ 1791 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst ); 1792 1793 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF) 1794 | ((((UInt)addrS[13]) & 0xFFFF) << 16); 1795 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 ); 1796 1797 warnXMM = (VexEmWarn)(w64 >> 32); 1798 1799 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; 1800 } 1801 1802 /* Prefer an X87 emwarn over an XMM one, if both exist. */ 1803 if (warnX87 != EmWarn_NONE) 1804 return warnX87; 1805 else 1806 return warnXMM; 1807 } 1808 1809 1810 /* DIRTY HELPER (writes guest state) */ 1811 /* Initialise the x87 FPU state as per 'finit'. */ 1812 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) 1813 { 1814 Int i; 1815 gst->guest_FTOP = 0; 1816 for (i = 0; i < 8; i++) { 1817 gst->guest_FPTAG[i] = 0; /* empty */ 1818 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ 1819 } 1820 gst->guest_FPROUND = (ULong)Irrm_NEAREST; 1821 gst->guest_FC3210 = 0; 1822 } 1823 1824 1825 /* CALLED FROM GENERATED CODE */ 1826 /* DIRTY HELPER (reads guest memory) */ 1827 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU ) 1828 { 1829 ULong f64; 1830 convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 ); 1831 return f64; 1832 } 1833 1834 /* CALLED FROM GENERATED CODE */ 1835 /* DIRTY HELPER (writes guest memory) */ 1836 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 ) 1837 { 1838 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) ); 1839 } 1840 1841 1842 /* CALLED FROM GENERATED CODE */ 1843 /* CLEAN HELPER */ 1844 /* mxcsr[15:0] contains a SSE native format MXCSR value. 1845 Extract from it the required SSEROUND value and any resulting 1846 emulation warning, and return (warn << 32) | sseround value. 1847 */ 1848 ULong amd64g_check_ldmxcsr ( ULong mxcsr ) 1849 { 1850 /* Decide on a rounding mode. mxcsr[14:13] holds it. */ 1851 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1852 ULong rmode = (mxcsr >> 13) & 3; 1853 1854 /* Detect any required emulation warnings. */ 1855 VexEmWarn ew = EmWarn_NONE; 1856 1857 if ((mxcsr & 0x1F80) != 0x1F80) { 1858 /* unmasked exceptions! */ 1859 ew = EmWarn_X86_sseExns; 1860 } 1861 else 1862 if (mxcsr & (1<<15)) { 1863 /* FZ is set */ 1864 ew = EmWarn_X86_fz; 1865 } 1866 else 1867 if (mxcsr & (1<<6)) { 1868 /* DAZ is set */ 1869 ew = EmWarn_X86_daz; 1870 } 1871 1872 return (((ULong)ew) << 32) | ((ULong)rmode); 1873 } 1874 1875 1876 /* CALLED FROM GENERATED CODE */ 1877 /* CLEAN HELPER */ 1878 /* Given sseround as an IRRoundingMode value, create a suitable SSE 1879 native format MXCSR value. */ 1880 ULong amd64g_create_mxcsr ( ULong sseround ) 1881 { 1882 sseround &= 3; 1883 return 0x1F80 | (sseround << 13); 1884 } 1885 1886 1887 /* CLEAN HELPER */ 1888 /* fpucw[15:0] contains a x87 native format FPU control word. 1889 Extract from it the required FPROUND value and any resulting 1890 emulation warning, and return (warn << 32) | fpround value. 1891 */ 1892 ULong amd64g_check_fldcw ( ULong fpucw ) 1893 { 1894 /* Decide on a rounding mode. fpucw[11:10] holds it. */ 1895 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1896 ULong rmode = (fpucw >> 10) & 3; 1897 1898 /* Detect any required emulation warnings. */ 1899 VexEmWarn ew = EmWarn_NONE; 1900 1901 if ((fpucw & 0x3F) != 0x3F) { 1902 /* unmasked exceptions! */ 1903 ew = EmWarn_X86_x87exns; 1904 } 1905 else 1906 if (((fpucw >> 8) & 3) != 3) { 1907 /* unsupported precision */ 1908 ew = EmWarn_X86_x87precision; 1909 } 1910 1911 return (((ULong)ew) << 32) | ((ULong)rmode); 1912 } 1913 1914 1915 /* CLEAN HELPER */ 1916 /* Given fpround as an IRRoundingMode value, create a suitable x87 1917 native format FPU control word. */ 1918 ULong amd64g_create_fpucw ( ULong fpround ) 1919 { 1920 fpround &= 3; 1921 return 0x037F | (fpround << 10); 1922 } 1923 1924 1925 /* This is used to implement 'fldenv'. 1926 Reads 28 bytes at x87_state[0 .. 27]. */ 1927 /* CALLED FROM GENERATED CODE */ 1928 /* DIRTY HELPER */ 1929 VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, 1930 /*IN*/HWord x87_state) 1931 { 1932 Int stno, preg; 1933 UInt tag; 1934 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1935 Fpu_State* x87 = (Fpu_State*)x87_state; 1936 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; 1937 UInt tagw = x87->env[FP_ENV_TAG]; 1938 UInt fpucw = x87->env[FP_ENV_CTRL]; 1939 ULong c3210 = x87->env[FP_ENV_STAT] & 0x4700; 1940 VexEmWarn ew; 1941 ULong fpround; 1942 ULong pair; 1943 1944 /* Copy tags */ 1945 for (stno = 0; stno < 8; stno++) { 1946 preg = (stno + ftop) & 7; 1947 tag = (tagw >> (2*preg)) & 3; 1948 if (tag == 3) { 1949 /* register is empty */ 1950 vexTags[preg] = 0; 1951 } else { 1952 /* register is non-empty */ 1953 vexTags[preg] = 1; 1954 } 1955 } 1956 1957 /* stack pointer */ 1958 vex_state->guest_FTOP = ftop; 1959 1960 /* status word */ 1961 vex_state->guest_FC3210 = c3210; 1962 1963 /* handle the control word, setting FPROUND and detecting any 1964 emulation warnings. */ 1965 pair = amd64g_check_fldcw ( (ULong)fpucw ); 1966 fpround = pair & 0xFFFFFFFFULL; 1967 ew = (VexEmWarn)(pair >> 32); 1968 1969 vex_state->guest_FPROUND = fpround & 3; 1970 1971 /* emulation warnings --> caller */ 1972 return ew; 1973 } 1974 1975 1976 /* CALLED FROM GENERATED CODE */ 1977 /* DIRTY HELPER */ 1978 /* Create an x87 FPU env from the guest state, as close as we can 1979 approximate it. Writes 28 bytes at x87_state[0..27]. */ 1980 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, 1981 /*OUT*/HWord x87_state ) 1982 { 1983 Int i, stno, preg; 1984 UInt tagw; 1985 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1986 Fpu_State* x87 = (Fpu_State*)x87_state; 1987 UInt ftop = vex_state->guest_FTOP; 1988 ULong c3210 = vex_state->guest_FC3210; 1989 1990 for (i = 0; i < 14; i++) 1991 x87->env[i] = 0; 1992 1993 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 1994 x87->env[FP_ENV_STAT] 1995 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); 1996 x87->env[FP_ENV_CTRL] 1997 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); 1998 1999 /* Compute the x87 tag word. */ 2000 tagw = 0; 2001 for (stno = 0; stno < 8; stno++) { 2002 preg = (stno + ftop) & 7; 2003 if (vexTags[preg] == 0) { 2004 /* register is empty */ 2005 tagw |= (3 << (2*preg)); 2006 } else { 2007 /* register is full. */ 2008 tagw |= (0 << (2*preg)); 2009 } 2010 } 2011 x87->env[FP_ENV_TAG] = toUShort(tagw); 2012 2013 /* We don't dump the x87 registers, tho. */ 2014 } 2015 2016 2017 /*---------------------------------------------------------------*/ 2018 /*--- Misc integer helpers, including rotates and CPUID. ---*/ 2019 /*---------------------------------------------------------------*/ 2020 2021 /* Claim to be the following CPU, which is probably representative of 2022 the lowliest (earliest) amd64 offerings. It can do neither sse3 2023 nor cx16. 2024 2025 vendor_id : AuthenticAMD 2026 cpu family : 15 2027 model : 5 2028 model name : AMD Opteron (tm) Processor 848 2029 stepping : 10 2030 cpu MHz : 1797.682 2031 cache size : 1024 KB 2032 fpu : yes 2033 fpu_exception : yes 2034 cpuid level : 1 2035 wp : yes 2036 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2037 mtrr pge mca cmov pat pse36 clflush mmx fxsr 2038 sse sse2 syscall nx mmxext lm 3dnowext 3dnow 2039 bogomips : 3600.62 2040 TLB size : 1088 4K pages 2041 clflush size : 64 2042 cache_alignment : 64 2043 address sizes : 40 bits physical, 48 bits virtual 2044 power management: ts fid vid ttp 2045 */ 2046 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) 2047 { 2048 # define SET_ABCD(_a,_b,_c,_d) \ 2049 do { st->guest_RAX = (ULong)(_a); \ 2050 st->guest_RBX = (ULong)(_b); \ 2051 st->guest_RCX = (ULong)(_c); \ 2052 st->guest_RDX = (ULong)(_d); \ 2053 } while (0) 2054 2055 switch (0xFFFFFFFF & st->guest_RAX) { 2056 case 0x00000000: 2057 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); 2058 break; 2059 case 0x00000001: 2060 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); 2061 break; 2062 case 0x80000000: 2063 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); 2064 break; 2065 case 0x80000001: 2066 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff); 2067 break; 2068 case 0x80000002: 2069 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); 2070 break; 2071 case 0x80000003: 2072 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); 2073 break; 2074 case 0x80000004: 2075 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2076 break; 2077 case 0x80000005: 2078 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); 2079 break; 2080 case 0x80000006: 2081 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); 2082 break; 2083 case 0x80000007: 2084 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); 2085 break; 2086 case 0x80000008: 2087 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); 2088 break; 2089 default: 2090 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2091 break; 2092 } 2093 # undef SET_ABCD 2094 } 2095 2096 2097 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 2098 capable. 2099 2100 vendor_id : GenuineIntel 2101 cpu family : 6 2102 model : 15 2103 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz 2104 stepping : 6 2105 cpu MHz : 2394.000 2106 cache size : 4096 KB 2107 physical id : 0 2108 siblings : 2 2109 core id : 0 2110 cpu cores : 2 2111 fpu : yes 2112 fpu_exception : yes 2113 cpuid level : 10 2114 wp : yes 2115 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2116 mtrr pge mca cmov pat pse36 clflush dts acpi 2117 mmx fxsr sse sse2 ss ht tm syscall nx lm 2118 constant_tsc pni monitor ds_cpl vmx est tm2 2119 cx16 xtpr lahf_lm 2120 bogomips : 4798.78 2121 clflush size : 64 2122 cache_alignment : 64 2123 address sizes : 36 bits physical, 48 bits virtual 2124 power management: 2125 */ 2126 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) 2127 { 2128 # define SET_ABCD(_a,_b,_c,_d) \ 2129 do { st->guest_RAX = (ULong)(_a); \ 2130 st->guest_RBX = (ULong)(_b); \ 2131 st->guest_RCX = (ULong)(_c); \ 2132 st->guest_RDX = (ULong)(_d); \ 2133 } while (0) 2134 2135 switch (0xFFFFFFFF & st->guest_RAX) { 2136 case 0x00000000: 2137 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); 2138 break; 2139 case 0x00000001: 2140 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); 2141 break; 2142 case 0x00000002: 2143 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); 2144 break; 2145 case 0x00000003: 2146 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2147 break; 2148 case 0x00000004: { 2149 switch (0xFFFFFFFF & st->guest_RCX) { 2150 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, 2151 0x0000003f, 0x00000001); break; 2152 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, 2153 0x0000003f, 0x00000001); break; 2154 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, 2155 0x00000fff, 0x00000001); break; 2156 default: SET_ABCD(0x00000000, 0x00000000, 2157 0x00000000, 0x00000000); break; 2158 } 2159 break; 2160 } 2161 case 0x00000005: 2162 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); 2163 break; 2164 case 0x00000006: 2165 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); 2166 break; 2167 case 0x00000007: 2168 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2169 break; 2170 case 0x00000008: 2171 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); 2172 break; 2173 case 0x00000009: 2174 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2175 break; 2176 case 0x0000000a: 2177 unhandled_eax_value: 2178 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); 2179 break; 2180 case 0x80000000: 2181 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2182 break; 2183 case 0x80000001: 2184 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); 2185 break; 2186 case 0x80000002: 2187 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2188 break; 2189 case 0x80000003: 2190 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); 2191 break; 2192 case 0x80000004: 2193 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); 2194 break; 2195 case 0x80000005: 2196 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2197 break; 2198 case 0x80000006: 2199 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); 2200 break; 2201 case 0x80000007: 2202 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2203 break; 2204 case 0x80000008: 2205 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2206 break; 2207 default: 2208 goto unhandled_eax_value; 2209 } 2210 # undef SET_ABCD 2211 } 2212 2213 2214 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16 2215 capable. 2216 2217 vendor_id : GenuineIntel 2218 cpu family : 6 2219 model : 37 2220 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz 2221 stepping : 2 2222 cpu MHz : 3334.000 2223 cache size : 4096 KB 2224 physical id : 0 2225 siblings : 4 2226 core id : 0 2227 cpu cores : 2 2228 apicid : 0 2229 initial apicid : 0 2230 fpu : yes 2231 fpu_exception : yes 2232 cpuid level : 11 2233 wp : yes 2234 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2235 mtrr pge mca cmov pat pse36 clflush dts acpi 2236 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp 2237 lm constant_tsc arch_perfmon pebs bts rep_good 2238 xtopology nonstop_tsc aperfmperf pni pclmulqdq 2239 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 2240 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida 2241 arat tpr_shadow vnmi flexpriority ept vpid 2242 MINUS aes (see below) 2243 bogomips : 6957.57 2244 clflush size : 64 2245 cache_alignment : 64 2246 address sizes : 36 bits physical, 48 bits virtual 2247 power management: 2248 */ 2249 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ) 2250 { 2251 # define SET_ABCD(_a,_b,_c,_d) \ 2252 do { st->guest_RAX = (ULong)(_a); \ 2253 st->guest_RBX = (ULong)(_b); \ 2254 st->guest_RCX = (ULong)(_c); \ 2255 st->guest_RDX = (ULong)(_d); \ 2256 } while (0) 2257 2258 UInt old_eax = (UInt)st->guest_RAX; 2259 UInt old_ecx = (UInt)st->guest_RCX; 2260 2261 switch (old_eax) { 2262 case 0x00000000: 2263 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69); 2264 break; 2265 case 0x00000001: 2266 // & ~(1<<25): don't claim to support AES insns. See 2267 // bug 249991. 2268 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25), 2269 0xbfebfbff); 2270 break; 2271 case 0x00000002: 2272 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c); 2273 break; 2274 case 0x00000003: 2275 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2276 break; 2277 case 0x00000004: 2278 switch (old_ecx) { 2279 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, 2280 0x0000003f, 0x00000000); break; 2281 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f, 2282 0x0000007f, 0x00000000); break; 2283 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, 2284 0x000001ff, 0x00000000); break; 2285 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, 2286 0x00000fff, 0x00000002); break; 2287 default: SET_ABCD(0x00000000, 0x00000000, 2288 0x00000000, 0x00000000); break; 2289 } 2290 break; 2291 case 0x00000005: 2292 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); 2293 break; 2294 case 0x00000006: 2295 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000); 2296 break; 2297 case 0x00000007: 2298 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2299 break; 2300 case 0x00000008: 2301 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2302 break; 2303 case 0x00000009: 2304 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2305 break; 2306 case 0x0000000a: 2307 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603); 2308 break; 2309 case 0x0000000b: 2310 switch (old_ecx) { 2311 case 0x00000000: 2312 SET_ABCD(0x00000001, 0x00000002, 2313 0x00000100, 0x00000000); break; 2314 case 0x00000001: 2315 SET_ABCD(0x00000004, 0x00000004, 2316 0x00000201, 0x00000000); break; 2317 default: 2318 SET_ABCD(0x00000000, 0x00000000, 2319 old_ecx, 0x00000000); break; 2320 } 2321 break; 2322 case 0x0000000c: 2323 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2324 break; 2325 case 0x0000000d: 2326 switch (old_ecx) { 2327 case 0x00000000: SET_ABCD(0x00000001, 0x00000002, 2328 0x00000100, 0x00000000); break; 2329 case 0x00000001: SET_ABCD(0x00000004, 0x00000004, 2330 0x00000201, 0x00000000); break; 2331 default: SET_ABCD(0x00000000, 0x00000000, 2332 old_ecx, 0x00000000); break; 2333 } 2334 break; 2335 case 0x80000000: 2336 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2337 break; 2338 case 0x80000001: 2339 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); 2340 break; 2341 case 0x80000002: 2342 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2343 break; 2344 case 0x80000003: 2345 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020); 2346 break; 2347 case 0x80000004: 2348 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847); 2349 break; 2350 case 0x80000005: 2351 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2352 break; 2353 case 0x80000006: 2354 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); 2355 break; 2356 case 0x80000007: 2357 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); 2358 break; 2359 case 0x80000008: 2360 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2361 break; 2362 default: 2363 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2364 break; 2365 } 2366 # undef SET_ABCD 2367 } 2368 2369 2370 ULong amd64g_calculate_RCR ( ULong arg, 2371 ULong rot_amt, 2372 ULong rflags_in, 2373 Long szIN ) 2374 { 2375 Bool wantRflags = toBool(szIN < 0); 2376 ULong sz = wantRflags ? (-szIN) : szIN; 2377 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 2378 ULong cf=0, of=0, tempcf; 2379 2380 switch (sz) { 2381 case 8: 2382 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2383 of = ((arg >> 63) ^ cf) & 1; 2384 while (tempCOUNT > 0) { 2385 tempcf = arg & 1; 2386 arg = (arg >> 1) | (cf << 63); 2387 cf = tempcf; 2388 tempCOUNT--; 2389 } 2390 break; 2391 case 4: 2392 while (tempCOUNT >= 33) tempCOUNT -= 33; 2393 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2394 of = ((arg >> 31) ^ cf) & 1; 2395 while (tempCOUNT > 0) { 2396 tempcf = arg & 1; 2397 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); 2398 cf = tempcf; 2399 tempCOUNT--; 2400 } 2401 break; 2402 case 2: 2403 while (tempCOUNT >= 17) tempCOUNT -= 17; 2404 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2405 of = ((arg >> 15) ^ cf) & 1; 2406 while (tempCOUNT > 0) { 2407 tempcf = arg & 1; 2408 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); 2409 cf = tempcf; 2410 tempCOUNT--; 2411 } 2412 break; 2413 case 1: 2414 while (tempCOUNT >= 9) tempCOUNT -= 9; 2415 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2416 of = ((arg >> 7) ^ cf) & 1; 2417 while (tempCOUNT > 0) { 2418 tempcf = arg & 1; 2419 arg = ((arg >> 1) & 0x7FULL) | (cf << 7); 2420 cf = tempcf; 2421 tempCOUNT--; 2422 } 2423 break; 2424 default: 2425 vpanic("calculate_RCR(amd64g): invalid size"); 2426 } 2427 2428 cf &= 1; 2429 of &= 1; 2430 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 2431 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 2432 2433 /* caller can ask to have back either the resulting flags or 2434 resulting value, but not both */ 2435 return wantRflags ? rflags_in : arg; 2436 } 2437 2438 ULong amd64g_calculate_RCL ( ULong arg, 2439 ULong rot_amt, 2440 ULong rflags_in, 2441 Long szIN ) 2442 { 2443 Bool wantRflags = toBool(szIN < 0); 2444 ULong sz = wantRflags ? (-szIN) : szIN; 2445 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 2446 ULong cf=0, of=0, tempcf; 2447 2448 switch (sz) { 2449 case 8: 2450 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2451 while (tempCOUNT > 0) { 2452 tempcf = (arg >> 63) & 1; 2453 arg = (arg << 1) | (cf & 1); 2454 cf = tempcf; 2455 tempCOUNT--; 2456 } 2457 of = ((arg >> 63) ^ cf) & 1; 2458 break; 2459 case 4: 2460 while (tempCOUNT >= 33) tempCOUNT -= 33; 2461 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2462 while (tempCOUNT > 0) { 2463 tempcf = (arg >> 31) & 1; 2464 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); 2465 cf = tempcf; 2466 tempCOUNT--; 2467 } 2468 of = ((arg >> 31) ^ cf) & 1; 2469 break; 2470 case 2: 2471 while (tempCOUNT >= 17) tempCOUNT -= 17; 2472 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2473 while (tempCOUNT > 0) { 2474 tempcf = (arg >> 15) & 1; 2475 arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); 2476 cf = tempcf; 2477 tempCOUNT--; 2478 } 2479 of = ((arg >> 15) ^ cf) & 1; 2480 break; 2481 case 1: 2482 while (tempCOUNT >= 9) tempCOUNT -= 9; 2483 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2484 while (tempCOUNT > 0) { 2485 tempcf = (arg >> 7) & 1; 2486 arg = 0xFFULL & ((arg << 1) | (cf & 1)); 2487 cf = tempcf; 2488 tempCOUNT--; 2489 } 2490 of = ((arg >> 7) ^ cf) & 1; 2491 break; 2492 default: 2493 vpanic("calculate_RCL(amd64g): invalid size"); 2494 } 2495 2496 cf &= 1; 2497 of &= 1; 2498 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 2499 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 2500 2501 return wantRflags ? rflags_in : arg; 2502 } 2503 2504 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+) 2505 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25 2506 */ 2507 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which) 2508 { 2509 ULong hi, lo, tmp, A[16]; 2510 2511 A[0] = 0; A[1] = a; 2512 A[2] = A[1] << 1; A[3] = A[2] ^ a; 2513 A[4] = A[2] << 1; A[5] = A[4] ^ a; 2514 A[6] = A[3] << 1; A[7] = A[6] ^ a; 2515 A[8] = A[4] << 1; A[9] = A[8] ^ a; 2516 A[10] = A[5] << 1; A[11] = A[10] ^ a; 2517 A[12] = A[6] << 1; A[13] = A[12] ^ a; 2518 A[14] = A[7] << 1; A[15] = A[14] ^ a; 2519 2520 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15]; 2521 hi = lo >> 56; 2522 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15]; 2523 hi = (hi << 8) | (lo >> 56); 2524 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15]; 2525 hi = (hi << 8) | (lo >> 56); 2526 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15]; 2527 hi = (hi << 8) | (lo >> 56); 2528 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15]; 2529 hi = (hi << 8) | (lo >> 56); 2530 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15]; 2531 hi = (hi << 8) | (lo >> 56); 2532 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15]; 2533 hi = (hi << 8) | (lo >> 56); 2534 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15]; 2535 2536 ULong m0 = -1; 2537 m0 /= 255; 2538 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp; 2539 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp; 2540 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp; 2541 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp; 2542 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp; 2543 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp; 2544 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp; 2545 2546 return which ? hi : lo; 2547 } 2548 2549 2550 /* CALLED FROM GENERATED CODE */ 2551 /* DIRTY HELPER (non-referentially-transparent) */ 2552 /* Horrible hack. On non-amd64 platforms, return 1. */ 2553 ULong amd64g_dirtyhelper_RDTSC ( void ) 2554 { 2555 # if defined(__x86_64__) 2556 UInt eax, edx; 2557 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); 2558 return (((ULong)edx) << 32) | ((ULong)eax); 2559 # else 2560 return 1ULL; 2561 # endif 2562 } 2563 2564 2565 /* CALLED FROM GENERATED CODE */ 2566 /* DIRTY HELPER (non-referentially-transparent) */ 2567 /* Horrible hack. On non-amd64 platforms, return 0. */ 2568 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) 2569 { 2570 # if defined(__x86_64__) 2571 ULong r = 0; 2572 portno &= 0xFFFF; 2573 switch (sz) { 2574 case 4: 2575 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" 2576 : "=a" (r) : "Nd" (portno)); 2577 break; 2578 case 2: 2579 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" 2580 : "=a" (r) : "Nd" (portno)); 2581 break; 2582 case 1: 2583 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" 2584 : "=a" (r) : "Nd" (portno)); 2585 break; 2586 default: 2587 break; /* note: no 64-bit version of insn exists */ 2588 } 2589 return r; 2590 # else 2591 return 0; 2592 # endif 2593 } 2594 2595 2596 /* CALLED FROM GENERATED CODE */ 2597 /* DIRTY HELPER (non-referentially-transparent) */ 2598 /* Horrible hack. On non-amd64 platforms, do nothing. */ 2599 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) 2600 { 2601 # if defined(__x86_64__) 2602 portno &= 0xFFFF; 2603 switch (sz) { 2604 case 4: 2605 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" 2606 : : "a" (data), "Nd" (portno)); 2607 break; 2608 case 2: 2609 __asm__ __volatile__("outw %w0, %w1" 2610 : : "a" (data), "Nd" (portno)); 2611 break; 2612 case 1: 2613 __asm__ __volatile__("outb %b0, %w1" 2614 : : "a" (data), "Nd" (portno)); 2615 break; 2616 default: 2617 break; /* note: no 64-bit version of insn exists */ 2618 } 2619 # else 2620 /* do nothing */ 2621 # endif 2622 } 2623 2624 /* CALLED FROM GENERATED CODE */ 2625 /* DIRTY HELPER (non-referentially-transparent) */ 2626 /* Horrible hack. On non-amd64 platforms, do nothing. */ 2627 /* op = 0: call the native SGDT instruction. 2628 op = 1: call the native SIDT instruction. 2629 */ 2630 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) { 2631 # if defined(__x86_64__) 2632 switch (op) { 2633 case 0: 2634 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); 2635 break; 2636 case 1: 2637 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); 2638 break; 2639 default: 2640 vpanic("amd64g_dirtyhelper_SxDT"); 2641 } 2642 # else 2643 /* do nothing */ 2644 UChar* p = (UChar*)address; 2645 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; 2646 p[6] = p[7] = p[8] = p[9] = 0; 2647 # endif 2648 } 2649 2650 /*---------------------------------------------------------------*/ 2651 /*--- Helpers for MMX/SSE/SSE2. ---*/ 2652 /*---------------------------------------------------------------*/ 2653 2654 static inline UChar abdU8 ( UChar xx, UChar yy ) { 2655 return toUChar(xx>yy ? xx-yy : yy-xx); 2656 } 2657 2658 static inline ULong mk32x2 ( UInt w1, UInt w0 ) { 2659 return (((ULong)w1) << 32) | ((ULong)w0); 2660 } 2661 2662 static inline UShort sel16x4_3 ( ULong w64 ) { 2663 UInt hi32 = toUInt(w64 >> 32); 2664 return toUShort(hi32 >> 16); 2665 } 2666 static inline UShort sel16x4_2 ( ULong w64 ) { 2667 UInt hi32 = toUInt(w64 >> 32); 2668 return toUShort(hi32); 2669 } 2670 static inline UShort sel16x4_1 ( ULong w64 ) { 2671 UInt lo32 = toUInt(w64); 2672 return toUShort(lo32 >> 16); 2673 } 2674 static inline UShort sel16x4_0 ( ULong w64 ) { 2675 UInt lo32 = toUInt(w64); 2676 return toUShort(lo32); 2677 } 2678 2679 static inline UChar sel8x8_7 ( ULong w64 ) { 2680 UInt hi32 = toUInt(w64 >> 32); 2681 return toUChar(hi32 >> 24); 2682 } 2683 static inline UChar sel8x8_6 ( ULong w64 ) { 2684 UInt hi32 = toUInt(w64 >> 32); 2685 return toUChar(hi32 >> 16); 2686 } 2687 static inline UChar sel8x8_5 ( ULong w64 ) { 2688 UInt hi32 = toUInt(w64 >> 32); 2689 return toUChar(hi32 >> 8); 2690 } 2691 static inline UChar sel8x8_4 ( ULong w64 ) { 2692 UInt hi32 = toUInt(w64 >> 32); 2693 return toUChar(hi32 >> 0); 2694 } 2695 static inline UChar sel8x8_3 ( ULong w64 ) { 2696 UInt lo32 = toUInt(w64); 2697 return toUChar(lo32 >> 24); 2698 } 2699 static inline UChar sel8x8_2 ( ULong w64 ) { 2700 UInt lo32 = toUInt(w64); 2701 return toUChar(lo32 >> 16); 2702 } 2703 static inline UChar sel8x8_1 ( ULong w64 ) { 2704 UInt lo32 = toUInt(w64); 2705 return toUChar(lo32 >> 8); 2706 } 2707 static inline UChar sel8x8_0 ( ULong w64 ) { 2708 UInt lo32 = toUInt(w64); 2709 return toUChar(lo32 >> 0); 2710 } 2711 2712 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2713 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) 2714 { 2715 return 2716 mk32x2( 2717 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) 2718 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), 2719 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) 2720 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) 2721 ); 2722 } 2723 2724 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2725 ULong amd64g_calculate_mmx_pmovmskb ( ULong xx ) 2726 { 2727 ULong r = 0; 2728 if (xx & (1ULL << (64-1))) r |= (1<<7); 2729 if (xx & (1ULL << (56-1))) r |= (1<<6); 2730 if (xx & (1ULL << (48-1))) r |= (1<<5); 2731 if (xx & (1ULL << (40-1))) r |= (1<<4); 2732 if (xx & (1ULL << (32-1))) r |= (1<<3); 2733 if (xx & (1ULL << (24-1))) r |= (1<<2); 2734 if (xx & (1ULL << (16-1))) r |= (1<<1); 2735 if (xx & (1ULL << ( 8-1))) r |= (1<<0); 2736 return r; 2737 } 2738 2739 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2740 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) 2741 { 2742 UInt t = 0; 2743 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); 2744 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); 2745 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); 2746 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); 2747 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 2748 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 2749 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 2750 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 2751 t &= 0xFFFF; 2752 return (ULong)t; 2753 } 2754 2755 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2756 ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) 2757 { 2758 ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi ); 2759 ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo ); 2760 return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); 2761 } 2762 2763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2764 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) 2765 { 2766 UInt i; 2767 ULong crc = (b & 0xFFULL) ^ crcIn; 2768 for (i = 0; i < 8; i++) 2769 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 2770 return crc; 2771 } 2772 2773 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2774 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) 2775 { 2776 UInt i; 2777 ULong crc = (w & 0xFFFFULL) ^ crcIn; 2778 for (i = 0; i < 16; i++) 2779 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 2780 return crc; 2781 } 2782 2783 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2784 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) 2785 { 2786 UInt i; 2787 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; 2788 for (i = 0; i < 32; i++) 2789 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 2790 return crc; 2791 } 2792 2793 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2794 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) 2795 { 2796 ULong crc = amd64g_calc_crc32l(crcIn, q); 2797 return amd64g_calc_crc32l(crc, q >> 32); 2798 } 2799 2800 2801 /*---------------------------------------------------------------*/ 2802 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ 2803 /*---------------------------------------------------------------*/ 2804 2805 static UInt zmask_from_V128 ( V128* arg ) 2806 { 2807 UInt i, res = 0; 2808 for (i = 0; i < 16; i++) { 2809 res |= ((arg->w8[i] == 0) ? 1 : 0) << i; 2810 } 2811 return res; 2812 } 2813 2814 /* Helps with PCMP{I,E}STR{I,M}. 2815 2816 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really, 2817 actually it could be a clean helper, but for the fact that we can't 2818 pass by value 2 x V128 to a clean helper, nor have one returned.) 2819 Reads guest state, writes to guest state for the xSTRM cases, no 2820 accesses of memory, is a pure function. 2821 2822 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so 2823 the callee knows which I/E and I/M variant it is dealing with and 2824 what the specific operation is. 4th byte of opcode is in the range 2825 0x60 to 0x63: 2826 istri 66 0F 3A 63 2827 istrm 66 0F 3A 62 2828 estri 66 0F 3A 61 2829 estrm 66 0F 3A 60 2830 2831 gstOffL and gstOffR are the guest state offsets for the two XMM 2832 register inputs. We never have to deal with the memory case since 2833 that is handled by pre-loading the relevant value into the fake 2834 XMM16 register. 2835 2836 For ESTRx variants, edxIN and eaxIN hold the values of those two 2837 registers. 2838 2839 In all cases, the bottom 16 bits of the result contain the new 2840 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the 2841 result hold the new %ecx value. For xSTRM variants, the helper 2842 writes the result directly to the guest XMM0. 2843 2844 Declarable side effects: in all cases, reads guest state at 2845 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes 2846 guest_XMM0. 2847 2848 Is expected to be called with opc_and_imm combinations which have 2849 actually been validated, and will assert if otherwise. The front 2850 end should ensure we're only called with verified values. 2851 */ 2852 ULong amd64g_dirtyhelper_PCMPxSTRx ( 2853 VexGuestAMD64State* gst, 2854 HWord opc4_and_imm, 2855 HWord gstOffL, HWord gstOffR, 2856 HWord edxIN, HWord eaxIN 2857 ) 2858 { 2859 HWord opc4 = (opc4_and_imm >> 8) & 0xFF; 2860 HWord imm8 = opc4_and_imm & 0xFF; 2861 HWord isISTRx = opc4 & 2; 2862 HWord isxSTRM = (opc4 & 1) ^ 1; 2863 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */ 2864 vassert((imm8 & 1) == 0); /* we support byte-size cases only */ 2865 2866 // where the args are 2867 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 2868 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 2869 2870 /* Create the arg validity masks, either from the vectors 2871 themselves or from the supplied edx/eax values. */ 2872 // FIXME: this is only right for the 8-bit data cases. 2873 // At least that is asserted above. 2874 UInt zmaskL, zmaskR; 2875 if (isISTRx) { 2876 zmaskL = zmask_from_V128(argL); 2877 zmaskR = zmask_from_V128(argR); 2878 } else { 2879 Int tmp; 2880 tmp = edxIN & 0xFFFFFFFF; 2881 if (tmp < -16) tmp = -16; 2882 if (tmp > 16) tmp = 16; 2883 if (tmp < 0) tmp = -tmp; 2884 vassert(tmp >= 0 && tmp <= 16); 2885 zmaskL = (1 << tmp) & 0xFFFF; 2886 tmp = eaxIN & 0xFFFFFFFF; 2887 if (tmp < -16) tmp = -16; 2888 if (tmp > 16) tmp = 16; 2889 if (tmp < 0) tmp = -tmp; 2890 vassert(tmp >= 0 && tmp <= 16); 2891 zmaskR = (1 << tmp) & 0xFFFF; 2892 } 2893 2894 // temp spot for the resulting flags and vector. 2895 V128 resV; 2896 UInt resOSZACP; 2897 2898 // do the meyaath 2899 Bool ok = compute_PCMPxSTRx ( 2900 &resV, &resOSZACP, argL, argR, 2901 zmaskL, zmaskR, imm8, (Bool)isxSTRM 2902 ); 2903 2904 // front end shouldn't pass us any imm8 variants we can't 2905 // handle. Hence: 2906 vassert(ok); 2907 2908 // So, finally we need to get the results back to the caller. 2909 // In all cases, the new OSZACP value is the lowest 16 of 2910 // the return value. 2911 if (isxSTRM) { 2912 /* gst->guest_XMM0 = resV; */ // gcc don't like that 2913 gst->guest_XMM0[0] = resV.w32[0]; 2914 gst->guest_XMM0[1] = resV.w32[1]; 2915 gst->guest_XMM0[2] = resV.w32[2]; 2916 gst->guest_XMM0[3] = resV.w32[3]; 2917 return resOSZACP & 0x8D5; 2918 } else { 2919 UInt newECX = resV.w32[0] & 0xFFFF; 2920 return (newECX << 16) | (resOSZACP & 0x8D5); 2921 } 2922 } 2923 2924 2925 /*---------------------------------------------------------------*/ 2926 /*--- Helpers for dealing with, and describing, ---*/ 2927 /*--- guest state as a whole. ---*/ 2928 /*---------------------------------------------------------------*/ 2929 2930 /* Initialise the entire amd64 guest state. */ 2931 /* VISIBLE TO LIBVEX CLIENT */ 2932 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) 2933 { 2934 vex_state->guest_RAX = 0; 2935 vex_state->guest_RCX = 0; 2936 vex_state->guest_RDX = 0; 2937 vex_state->guest_RBX = 0; 2938 vex_state->guest_RSP = 0; 2939 vex_state->guest_RBP = 0; 2940 vex_state->guest_RSI = 0; 2941 vex_state->guest_RDI = 0; 2942 vex_state->guest_R8 = 0; 2943 vex_state->guest_R9 = 0; 2944 vex_state->guest_R10 = 0; 2945 vex_state->guest_R11 = 0; 2946 vex_state->guest_R12 = 0; 2947 vex_state->guest_R13 = 0; 2948 vex_state->guest_R14 = 0; 2949 vex_state->guest_R15 = 0; 2950 2951 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 2952 vex_state->guest_CC_DEP1 = 0; 2953 vex_state->guest_CC_DEP2 = 0; 2954 vex_state->guest_CC_NDEP = 0; 2955 2956 vex_state->guest_DFLAG = 1; /* forwards */ 2957 vex_state->guest_IDFLAG = 0; 2958 2959 /* HACK: represent the offset associated with %fs==0. This 2960 assumes that %fs is only ever zero. */ 2961 vex_state->guest_FS_ZERO = 0; 2962 2963 vex_state->guest_RIP = 0; 2964 2965 /* Initialise the simulated FPU */ 2966 amd64g_dirtyhelper_FINIT( vex_state ); 2967 2968 /* Initialise the SSE state. */ 2969 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0; 2970 2971 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; 2972 SSEZERO(vex_state->guest_XMM0); 2973 SSEZERO(vex_state->guest_XMM1); 2974 SSEZERO(vex_state->guest_XMM2); 2975 SSEZERO(vex_state->guest_XMM3); 2976 SSEZERO(vex_state->guest_XMM4); 2977 SSEZERO(vex_state->guest_XMM5); 2978 SSEZERO(vex_state->guest_XMM6); 2979 SSEZERO(vex_state->guest_XMM7); 2980 SSEZERO(vex_state->guest_XMM8); 2981 SSEZERO(vex_state->guest_XMM9); 2982 SSEZERO(vex_state->guest_XMM10); 2983 SSEZERO(vex_state->guest_XMM11); 2984 SSEZERO(vex_state->guest_XMM12); 2985 SSEZERO(vex_state->guest_XMM13); 2986 SSEZERO(vex_state->guest_XMM14); 2987 SSEZERO(vex_state->guest_XMM15); 2988 SSEZERO(vex_state->guest_XMM16); 2989 2990 # undef SSEZERO 2991 2992 vex_state->guest_EMWARN = EmWarn_NONE; 2993 2994 /* These should not ever be either read or written, but we 2995 initialise them anyway. */ 2996 vex_state->guest_TISTART = 0; 2997 vex_state->guest_TILEN = 0; 2998 2999 vex_state->guest_NRADDR = 0; 3000 vex_state->guest_SC_CLASS = 0; 3001 vex_state->guest_GS_0x60 = 0; 3002 3003 vex_state->guest_IP_AT_SYSCALL = 0; 3004 /* vex_state->padding = 0; */ 3005 } 3006 3007 3008 /* Figure out if any part of the guest state contained in minoff 3009 .. maxoff requires precise memory exceptions. If in doubt return 3010 True (but this is generates significantly slower code). 3011 3012 By default we enforce precise exns for guest %RSP, %RBP and %RIP 3013 only. These are the minimum needed to extract correct stack 3014 backtraces from amd64 code. 3015 */ 3016 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff, 3017 Int maxoff) 3018 { 3019 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); 3020 Int rbp_max = rbp_min + 8 - 1; 3021 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); 3022 Int rsp_max = rsp_min + 8 - 1; 3023 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); 3024 Int rip_max = rip_min + 8 - 1; 3025 3026 if (maxoff < rbp_min || minoff > rbp_max) { 3027 /* no overlap with rbp */ 3028 } else { 3029 return True; 3030 } 3031 3032 if (maxoff < rsp_min || minoff > rsp_max) { 3033 /* no overlap with rsp */ 3034 } else { 3035 return True; 3036 } 3037 3038 if (maxoff < rip_min || minoff > rip_max) { 3039 /* no overlap with eip */ 3040 } else { 3041 return True; 3042 } 3043 3044 return False; 3045 } 3046 3047 3048 #define ALWAYSDEFD(field) \ 3049 { offsetof(VexGuestAMD64State, field), \ 3050 (sizeof ((VexGuestAMD64State*)0)->field) } 3051 3052 VexGuestLayout 3053 amd64guest_layout 3054 = { 3055 /* Total size of the guest state, in bytes. */ 3056 .total_sizeB = sizeof(VexGuestAMD64State), 3057 3058 /* Describe the stack pointer. */ 3059 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), 3060 .sizeof_SP = 8, 3061 3062 /* Describe the frame pointer. */ 3063 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), 3064 .sizeof_FP = 8, 3065 3066 /* Describe the instruction pointer. */ 3067 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), 3068 .sizeof_IP = 8, 3069 3070 /* Describe any sections to be regarded by Memcheck as 3071 'always-defined'. */ 3072 .n_alwaysDefd = 16, 3073 3074 /* flags thunk: OP and NDEP are always defd, whereas DEP1 3075 and DEP2 have to be tracked. See detailed comment in 3076 gdefs.h on meaning of thunk fields. */ 3077 .alwaysDefd 3078 = { /* 0 */ ALWAYSDEFD(guest_CC_OP), 3079 /* 1 */ ALWAYSDEFD(guest_CC_NDEP), 3080 /* 2 */ ALWAYSDEFD(guest_DFLAG), 3081 /* 3 */ ALWAYSDEFD(guest_IDFLAG), 3082 /* 4 */ ALWAYSDEFD(guest_RIP), 3083 /* 5 */ ALWAYSDEFD(guest_FS_ZERO), 3084 /* 6 */ ALWAYSDEFD(guest_FTOP), 3085 /* 7 */ ALWAYSDEFD(guest_FPTAG), 3086 /* 8 */ ALWAYSDEFD(guest_FPROUND), 3087 /* 9 */ ALWAYSDEFD(guest_FC3210), 3088 // /* */ ALWAYSDEFD(guest_CS), 3089 // /* */ ALWAYSDEFD(guest_DS), 3090 // /* */ ALWAYSDEFD(guest_ES), 3091 // /* */ ALWAYSDEFD(guest_FS), 3092 // /* */ ALWAYSDEFD(guest_GS), 3093 // /* */ ALWAYSDEFD(guest_SS), 3094 // /* */ ALWAYSDEFD(guest_LDT), 3095 // /* */ ALWAYSDEFD(guest_GDT), 3096 /* 10 */ ALWAYSDEFD(guest_EMWARN), 3097 /* 11 */ ALWAYSDEFD(guest_SSEROUND), 3098 /* 12 */ ALWAYSDEFD(guest_TISTART), 3099 /* 13 */ ALWAYSDEFD(guest_TILEN), 3100 /* 14 */ ALWAYSDEFD(guest_SC_CLASS), 3101 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) 3102 } 3103 }; 3104 3105 3106 /*---------------------------------------------------------------*/ 3107 /*--- end guest_amd64_helpers.c ---*/ 3108 /*---------------------------------------------------------------*/ 3109