1 2 /*---------------------------------------------------------------*/ 3 /*--- begin guest_amd64_helpers.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_emwarn.h" 38 #include "libvex_guest_amd64.h" 39 #include "libvex_ir.h" 40 #include "libvex.h" 41 42 #include "main_util.h" 43 #include "guest_generic_bb_to_IR.h" 44 #include "guest_amd64_defs.h" 45 #include "guest_generic_x87.h" 46 47 48 /* This file contains helper functions for amd64 guest code. 49 Calls to these functions are generated by the back end. 50 These calls are of course in the host machine code and 51 this file will be compiled to host machine code, so that 52 all makes sense. 53 54 Only change the signatures of these helper functions very 55 carefully. If you change the signature here, you'll have to change 56 the parameters passed to it in the IR calls constructed by 57 guest-amd64/toIR.c. 58 59 The convention used is that all functions called from generated 60 code are named amd64g_<something>, and any function whose name lacks 61 that prefix is not called from generated code. Note that some 62 LibVEX_* functions can however be called by VEX's client, but that 63 is not the same as calling them from VEX-generated code. 64 */ 65 66 67 /* Set to 1 to get detailed profiling info about use of the flag 68 machinery. */ 69 #define PROFILE_RFLAGS 0 70 71 72 /*---------------------------------------------------------------*/ 73 /*--- %rflags run-time helpers. ---*/ 74 /*---------------------------------------------------------------*/ 75 76 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags 77 after imulq/mulq. */ 78 79 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) 80 { 81 ULong u0, v0, w0; 82 Long u1, v1, w1, w2, t; 83 u0 = u & 0xFFFFFFFFULL; 84 u1 = u >> 32; 85 v0 = v & 0xFFFFFFFFULL; 86 v1 = v >> 32; 87 w0 = u0 * v0; 88 t = u1 * v0 + (w0 >> 32); 89 w1 = t & 0xFFFFFFFFULL; 90 w2 = t >> 32; 91 w1 = u0 * v1 + w1; 92 *rHi = u1 * v1 + w2 + (w1 >> 32); 93 *rLo = u * v; 94 } 95 96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) 97 { 98 ULong u0, v0, w0; 99 ULong u1, v1, w1,w2,t; 100 u0 = u & 0xFFFFFFFFULL; 101 u1 = u >> 32; 102 v0 = v & 0xFFFFFFFFULL; 103 v1 = v >> 32; 104 w0 = u0 * v0; 105 t = u1 * v0 + (w0 >> 32); 106 w1 = t & 0xFFFFFFFFULL; 107 w2 = t >> 32; 108 w1 = u0 * v1 + w1; 109 *rHi = u1 * v1 + w2 + (w1 >> 32); 110 *rLo = u * v; 111 } 112 113 114 static const UChar parity_table[256] = { 115 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 116 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 118 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 119 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 120 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 122 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 124 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 126 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 127 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 128 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 130 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 131 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 132 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 134 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 135 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 136 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 138 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 140 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 142 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 143 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 144 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 146 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 147 }; 148 149 /* generalised left-shifter */ 150 static inline Long lshift ( Long x, Int n ) 151 { 152 if (n >= 0) 153 return x << n; 154 else 155 return x >> (-n); 156 } 157 158 /* identity on ULong */ 159 static inline ULong idULong ( ULong x ) 160 { 161 return x; 162 } 163 164 165 #define PREAMBLE(__data_bits) \ 166 /* const */ ULong DATA_MASK \ 167 = __data_bits==8 \ 168 ? 0xFFULL \ 169 : (__data_bits==16 \ 170 ? 0xFFFFULL \ 171 : (__data_bits==32 \ 172 ? 0xFFFFFFFFULL \ 173 : 0xFFFFFFFFFFFFFFFFULL)); \ 174 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ 175 /* const */ ULong CC_DEP1 = cc_dep1_formal; \ 176 /* const */ ULong CC_DEP2 = cc_dep2_formal; \ 177 /* const */ ULong CC_NDEP = cc_ndep_formal; \ 178 /* Four bogus assignments, which hopefully gcc can */ \ 179 /* optimise away, and which stop it complaining about */ \ 180 /* unused variables. */ \ 181 SIGN_MASK = SIGN_MASK; \ 182 DATA_MASK = DATA_MASK; \ 183 CC_DEP2 = CC_DEP2; \ 184 CC_NDEP = CC_NDEP; 185 186 187 /*-------------------------------------------------------------*/ 188 189 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ 190 { \ 191 PREAMBLE(DATA_BITS); \ 192 { Long cf, pf, af, zf, sf, of; \ 193 Long argL, argR, res; \ 194 argL = CC_DEP1; \ 195 argR = CC_DEP2; \ 196 res = argL + argR; \ 197 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 198 pf = parity_table[(UChar)res]; \ 199 af = (res ^ argL ^ argR) & 0x10; \ 200 zf = ((DATA_UTYPE)res == 0) << 6; \ 201 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 202 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 203 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 204 return cf | pf | af | zf | sf | of; \ 205 } \ 206 } 207 208 /*-------------------------------------------------------------*/ 209 210 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ 211 { \ 212 PREAMBLE(DATA_BITS); \ 213 { Long cf, pf, af, zf, sf, of; \ 214 Long argL, argR, res; \ 215 argL = CC_DEP1; \ 216 argR = CC_DEP2; \ 217 res = argL - argR; \ 218 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 219 pf = parity_table[(UChar)res]; \ 220 af = (res ^ argL ^ argR) & 0x10; \ 221 zf = ((DATA_UTYPE)res == 0) << 6; \ 222 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 223 of = lshift((argL ^ argR) & (argL ^ res), \ 224 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 225 return cf | pf | af | zf | sf | of; \ 226 } \ 227 } 228 229 /*-------------------------------------------------------------*/ 230 231 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ 232 { \ 233 PREAMBLE(DATA_BITS); \ 234 { Long cf, pf, af, zf, sf, of; \ 235 Long argL, argR, oldC, res; \ 236 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 237 argL = CC_DEP1; \ 238 argR = CC_DEP2 ^ oldC; \ 239 res = (argL + argR) + oldC; \ 240 if (oldC) \ 241 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ 242 else \ 243 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 244 pf = parity_table[(UChar)res]; \ 245 af = (res ^ argL ^ argR) & 0x10; \ 246 zf = ((DATA_UTYPE)res == 0) << 6; \ 247 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 248 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 249 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 250 return cf | pf | af | zf | sf | of; \ 251 } \ 252 } 253 254 /*-------------------------------------------------------------*/ 255 256 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ 257 { \ 258 PREAMBLE(DATA_BITS); \ 259 { Long cf, pf, af, zf, sf, of; \ 260 Long argL, argR, oldC, res; \ 261 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 262 argL = CC_DEP1; \ 263 argR = CC_DEP2 ^ oldC; \ 264 res = (argL - argR) - oldC; \ 265 if (oldC) \ 266 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ 267 else \ 268 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 269 pf = parity_table[(UChar)res]; \ 270 af = (res ^ argL ^ argR) & 0x10; \ 271 zf = ((DATA_UTYPE)res == 0) << 6; \ 272 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 273 of = lshift((argL ^ argR) & (argL ^ res), \ 274 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 275 return cf | pf | af | zf | sf | of; \ 276 } \ 277 } 278 279 /*-------------------------------------------------------------*/ 280 281 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ 282 { \ 283 PREAMBLE(DATA_BITS); \ 284 { Long cf, pf, af, zf, sf, of; \ 285 cf = 0; \ 286 pf = parity_table[(UChar)CC_DEP1]; \ 287 af = 0; \ 288 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 289 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 290 of = 0; \ 291 return cf | pf | af | zf | sf | of; \ 292 } \ 293 } 294 295 /*-------------------------------------------------------------*/ 296 297 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ 298 { \ 299 PREAMBLE(DATA_BITS); \ 300 { Long cf, pf, af, zf, sf, of; \ 301 Long argL, argR, res; \ 302 res = CC_DEP1; \ 303 argL = res - 1; \ 304 argR = 1; \ 305 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 306 pf = parity_table[(UChar)res]; \ 307 af = (res ^ argL ^ argR) & 0x10; \ 308 zf = ((DATA_UTYPE)res == 0) << 6; \ 309 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 310 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ 311 return cf | pf | af | zf | sf | of; \ 312 } \ 313 } 314 315 /*-------------------------------------------------------------*/ 316 317 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ 318 { \ 319 PREAMBLE(DATA_BITS); \ 320 { Long cf, pf, af, zf, sf, of; \ 321 Long argL, argR, res; \ 322 res = CC_DEP1; \ 323 argL = res + 1; \ 324 argR = 1; \ 325 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 326 pf = parity_table[(UChar)res]; \ 327 af = (res ^ argL ^ argR) & 0x10; \ 328 zf = ((DATA_UTYPE)res == 0) << 6; \ 329 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 330 of = ((res & DATA_MASK) \ 331 == ((ULong)SIGN_MASK - 1)) << 11; \ 332 return cf | pf | af | zf | sf | of; \ 333 } \ 334 } 335 336 /*-------------------------------------------------------------*/ 337 338 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ 339 { \ 340 PREAMBLE(DATA_BITS); \ 341 { Long cf, pf, af, zf, sf, of; \ 342 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ 343 pf = parity_table[(UChar)CC_DEP1]; \ 344 af = 0; /* undefined */ \ 345 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 346 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 347 /* of is defined if shift count == 1 */ \ 348 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 349 & AMD64G_CC_MASK_O; \ 350 return cf | pf | af | zf | sf | of; \ 351 } \ 352 } 353 354 /*-------------------------------------------------------------*/ 355 356 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ 357 { \ 358 PREAMBLE(DATA_BITS); \ 359 { Long cf, pf, af, zf, sf, of; \ 360 cf = CC_DEP2 & 1; \ 361 pf = parity_table[(UChar)CC_DEP1]; \ 362 af = 0; /* undefined */ \ 363 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 364 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 365 /* of is defined if shift count == 1 */ \ 366 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 367 & AMD64G_CC_MASK_O; \ 368 return cf | pf | af | zf | sf | of; \ 369 } \ 370 } 371 372 /*-------------------------------------------------------------*/ 373 374 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ 375 /* DEP1 = result, NDEP = old flags */ 376 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ 377 { \ 378 PREAMBLE(DATA_BITS); \ 379 { Long fl \ 380 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 381 | (AMD64G_CC_MASK_C & CC_DEP1) \ 382 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 383 11-(DATA_BITS-1)) \ 384 ^ lshift(CC_DEP1, 11))); \ 385 return fl; \ 386 } \ 387 } 388 389 /*-------------------------------------------------------------*/ 390 391 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ 392 /* DEP1 = result, NDEP = old flags */ 393 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ 394 { \ 395 PREAMBLE(DATA_BITS); \ 396 { Long fl \ 397 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 398 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ 399 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 400 11-(DATA_BITS-1)) \ 401 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ 402 return fl; \ 403 } \ 404 } 405 406 /*-------------------------------------------------------------*/ 407 408 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ 409 DATA_U2TYPE, NARROWto2U) \ 410 { \ 411 PREAMBLE(DATA_BITS); \ 412 { Long cf, pf, af, zf, sf, of; \ 413 DATA_UTYPE hi; \ 414 DATA_UTYPE lo \ 415 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ 416 * ((DATA_UTYPE)CC_DEP2) ); \ 417 DATA_U2TYPE rr \ 418 = NARROWto2U( \ 419 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ 420 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ 421 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ 422 cf = (hi != 0); \ 423 pf = parity_table[(UChar)lo]; \ 424 af = 0; /* undefined */ \ 425 zf = (lo == 0) << 6; \ 426 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 427 of = cf << 11; \ 428 return cf | pf | af | zf | sf | of; \ 429 } \ 430 } 431 432 /*-------------------------------------------------------------*/ 433 434 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ 435 DATA_S2TYPE, NARROWto2S) \ 436 { \ 437 PREAMBLE(DATA_BITS); \ 438 { Long cf, pf, af, zf, sf, of; \ 439 DATA_STYPE hi; \ 440 DATA_STYPE lo \ 441 = NARROWtoS( ((DATA_STYPE)CC_DEP1) \ 442 * ((DATA_STYPE)CC_DEP2) ); \ 443 DATA_S2TYPE rr \ 444 = NARROWto2S( \ 445 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ 446 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ 447 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ 448 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ 449 pf = parity_table[(UChar)lo]; \ 450 af = 0; /* undefined */ \ 451 zf = (lo == 0) << 6; \ 452 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 453 of = cf << 11; \ 454 return cf | pf | af | zf | sf | of; \ 455 } \ 456 } 457 458 /*-------------------------------------------------------------*/ 459 460 #define ACTIONS_UMULQ \ 461 { \ 462 PREAMBLE(64); \ 463 { Long cf, pf, af, zf, sf, of; \ 464 ULong lo, hi; \ 465 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ 466 cf = (hi != 0); \ 467 pf = parity_table[(UChar)lo]; \ 468 af = 0; /* undefined */ \ 469 zf = (lo == 0) << 6; \ 470 sf = lshift(lo, 8 - 64) & 0x80; \ 471 of = cf << 11; \ 472 return cf | pf | af | zf | sf | of; \ 473 } \ 474 } 475 476 /*-------------------------------------------------------------*/ 477 478 #define ACTIONS_SMULQ \ 479 { \ 480 PREAMBLE(64); \ 481 { Long cf, pf, af, zf, sf, of; \ 482 Long lo, hi; \ 483 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ 484 cf = (hi != (lo >>/*s*/ (64-1))); \ 485 pf = parity_table[(UChar)lo]; \ 486 af = 0; /* undefined */ \ 487 zf = (lo == 0) << 6; \ 488 sf = lshift(lo, 8 - 64) & 0x80; \ 489 of = cf << 11; \ 490 return cf | pf | af | zf | sf | of; \ 491 } \ 492 } 493 494 495 #if PROFILE_RFLAGS 496 497 static Bool initted = False; 498 499 /* C flag, fast route */ 500 static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; 501 /* C flag, slow route */ 502 static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; 503 /* table for calculate_cond */ 504 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; 505 /* total entry counts for calc_all, calc_c, calc_cond. */ 506 static UInt n_calc_all = 0; 507 static UInt n_calc_c = 0; 508 static UInt n_calc_cond = 0; 509 510 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) 511 512 513 static void showCounts ( void ) 514 { 515 Int op, co; 516 Char ch; 517 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", 518 n_calc_all, n_calc_cond, n_calc_c); 519 520 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" 521 " S NS P NP L NL LE NLE\n"); 522 vex_printf(" -----------------------------------------------------" 523 "----------------------------------------\n"); 524 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 525 526 ch = ' '; 527 if (op > 0 && (op-1) % 4 == 0) 528 ch = 'B'; 529 if (op > 0 && (op-1) % 4 == 1) 530 ch = 'W'; 531 if (op > 0 && (op-1) % 4 == 2) 532 ch = 'L'; 533 if (op > 0 && (op-1) % 4 == 3) 534 ch = 'Q'; 535 536 vex_printf("%2d%c: ", op, ch); 537 vex_printf("%6u ", tabc_slow[op]); 538 vex_printf("%6u ", tabc_fast[op]); 539 for (co = 0; co < 16; co++) { 540 Int n = tab_cond[op][co]; 541 if (n >= 1000) { 542 vex_printf(" %3dK", n / 1000); 543 } else 544 if (n >= 0) { 545 vex_printf(" %3d ", n ); 546 } else { 547 vex_printf(" "); 548 } 549 } 550 vex_printf("\n"); 551 } 552 vex_printf("\n"); 553 } 554 555 static void initCounts ( void ) 556 { 557 Int op, co; 558 initted = True; 559 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 560 tabc_fast[op] = tabc_slow[op] = 0; 561 for (co = 0; co < 16; co++) 562 tab_cond[op][co] = 0; 563 } 564 } 565 566 #endif /* PROFILE_RFLAGS */ 567 568 569 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 570 /* Calculate all the 6 flags from the supplied thunk parameters. 571 Worker function, not directly called from generated code. */ 572 static 573 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, 574 ULong cc_dep1_formal, 575 ULong cc_dep2_formal, 576 ULong cc_ndep_formal ) 577 { 578 switch (cc_op) { 579 case AMD64G_CC_OP_COPY: 580 return cc_dep1_formal 581 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z 582 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); 583 584 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); 585 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); 586 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); 587 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); 588 589 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); 590 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); 591 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); 592 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); 593 594 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); 595 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); 596 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); 597 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); 598 599 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); 600 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); 601 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); 602 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); 603 604 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); 605 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); 606 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); 607 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); 608 609 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); 610 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); 611 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); 612 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); 613 614 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); 615 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); 616 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); 617 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); 618 619 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); 620 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); 621 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); 622 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); 623 624 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); 625 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); 626 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); 627 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); 628 629 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); 630 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); 631 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); 632 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); 633 634 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); 635 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); 636 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); 637 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); 638 639 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, 640 UShort, toUShort ); 641 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, 642 UInt, toUInt ); 643 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, 644 ULong, idULong ); 645 646 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; 647 648 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, 649 Short, toUShort ); 650 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, 651 Int, toUInt ); 652 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, 653 Long, idULong ); 654 655 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; 656 657 default: 658 /* shouldn't really make these calls from generated code */ 659 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" 660 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", 661 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); 662 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); 663 } 664 } 665 666 667 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 668 /* Calculate all the 6 flags from the supplied thunk parameters. */ 669 ULong amd64g_calculate_rflags_all ( ULong cc_op, 670 ULong cc_dep1, 671 ULong cc_dep2, 672 ULong cc_ndep ) 673 { 674 # if PROFILE_RFLAGS 675 if (!initted) initCounts(); 676 n_calc_all++; 677 if (SHOW_COUNTS_NOW) showCounts(); 678 # endif 679 return 680 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); 681 } 682 683 684 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 685 /* Calculate just the carry flag from the supplied thunk parameters. */ 686 ULong amd64g_calculate_rflags_c ( ULong cc_op, 687 ULong cc_dep1, 688 ULong cc_dep2, 689 ULong cc_ndep ) 690 { 691 # if PROFILE_RFLAGS 692 if (!initted) initCounts(); 693 n_calc_c++; 694 tabc_fast[cc_op]++; 695 if (SHOW_COUNTS_NOW) showCounts(); 696 # endif 697 698 /* Fast-case some common ones. */ 699 switch (cc_op) { 700 case AMD64G_CC_OP_COPY: 701 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; 702 case AMD64G_CC_OP_LOGICQ: 703 case AMD64G_CC_OP_LOGICL: 704 case AMD64G_CC_OP_LOGICW: 705 case AMD64G_CC_OP_LOGICB: 706 return 0; 707 // case AMD64G_CC_OP_SUBL: 708 // return ((UInt)cc_dep1) < ((UInt)cc_dep2) 709 // ? AMD64G_CC_MASK_C : 0; 710 // case AMD64G_CC_OP_SUBW: 711 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) 712 // ? AMD64G_CC_MASK_C : 0; 713 // case AMD64G_CC_OP_SUBB: 714 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) 715 // ? AMD64G_CC_MASK_C : 0; 716 // case AMD64G_CC_OP_INCL: 717 // case AMD64G_CC_OP_DECL: 718 // return cc_ndep & AMD64G_CC_MASK_C; 719 default: 720 break; 721 } 722 723 # if PROFILE_RFLAGS 724 tabc_fast[cc_op]--; 725 tabc_slow[cc_op]++; 726 # endif 727 728 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) 729 & AMD64G_CC_MASK_C; 730 } 731 732 733 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 734 /* returns 1 or 0 */ 735 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, 736 ULong cc_op, 737 ULong cc_dep1, 738 ULong cc_dep2, 739 ULong cc_ndep ) 740 { 741 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, 742 cc_dep2, cc_ndep); 743 ULong of,sf,zf,cf,pf; 744 ULong inv = cond & 1; 745 746 # if PROFILE_RFLAGS 747 if (!initted) initCounts(); 748 tab_cond[cc_op][cond]++; 749 n_calc_cond++; 750 if (SHOW_COUNTS_NOW) showCounts(); 751 # endif 752 753 switch (cond) { 754 case AMD64CondNO: 755 case AMD64CondO: /* OF == 1 */ 756 of = rflags >> AMD64G_CC_SHIFT_O; 757 return 1 & (inv ^ of); 758 759 case AMD64CondNZ: 760 case AMD64CondZ: /* ZF == 1 */ 761 zf = rflags >> AMD64G_CC_SHIFT_Z; 762 return 1 & (inv ^ zf); 763 764 case AMD64CondNB: 765 case AMD64CondB: /* CF == 1 */ 766 cf = rflags >> AMD64G_CC_SHIFT_C; 767 return 1 & (inv ^ cf); 768 break; 769 770 case AMD64CondNBE: 771 case AMD64CondBE: /* (CF or ZF) == 1 */ 772 cf = rflags >> AMD64G_CC_SHIFT_C; 773 zf = rflags >> AMD64G_CC_SHIFT_Z; 774 return 1 & (inv ^ (cf | zf)); 775 break; 776 777 case AMD64CondNS: 778 case AMD64CondS: /* SF == 1 */ 779 sf = rflags >> AMD64G_CC_SHIFT_S; 780 return 1 & (inv ^ sf); 781 782 case AMD64CondNP: 783 case AMD64CondP: /* PF == 1 */ 784 pf = rflags >> AMD64G_CC_SHIFT_P; 785 return 1 & (inv ^ pf); 786 787 case AMD64CondNL: 788 case AMD64CondL: /* (SF xor OF) == 1 */ 789 sf = rflags >> AMD64G_CC_SHIFT_S; 790 of = rflags >> AMD64G_CC_SHIFT_O; 791 return 1 & (inv ^ (sf ^ of)); 792 break; 793 794 case AMD64CondNLE: 795 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ 796 sf = rflags >> AMD64G_CC_SHIFT_S; 797 of = rflags >> AMD64G_CC_SHIFT_O; 798 zf = rflags >> AMD64G_CC_SHIFT_Z; 799 return 1 & (inv ^ ((sf ^ of) | zf)); 800 break; 801 802 default: 803 /* shouldn't really make these calls from generated code */ 804 vex_printf("amd64g_calculate_condition" 805 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", 806 cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); 807 vpanic("amd64g_calculate_condition"); 808 } 809 } 810 811 812 /* VISIBLE TO LIBVEX CLIENT */ 813 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state ) 814 { 815 ULong rflags = amd64g_calculate_rflags_all_WRK( 816 vex_state->guest_CC_OP, 817 vex_state->guest_CC_DEP1, 818 vex_state->guest_CC_DEP2, 819 vex_state->guest_CC_NDEP 820 ); 821 Long dflag = vex_state->guest_DFLAG; 822 vassert(dflag == 1 || dflag == -1); 823 if (dflag == -1) 824 rflags |= (1<<10); 825 if (vex_state->guest_IDFLAG == 1) 826 rflags |= (1<<21); 827 if (vex_state->guest_ACFLAG == 1) 828 rflags |= (1<<18); 829 830 return rflags; 831 } 832 833 /* VISIBLE TO LIBVEX CLIENT */ 834 void 835 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, 836 /*MOD*/VexGuestAMD64State* vex_state ) 837 { 838 ULong oszacp = amd64g_calculate_rflags_all_WRK( 839 vex_state->guest_CC_OP, 840 vex_state->guest_CC_DEP1, 841 vex_state->guest_CC_DEP2, 842 vex_state->guest_CC_NDEP 843 ); 844 if (new_carry_flag & 1) { 845 oszacp |= AMD64G_CC_MASK_C; 846 } else { 847 oszacp &= ~AMD64G_CC_MASK_C; 848 } 849 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 850 vex_state->guest_CC_DEP1 = oszacp; 851 vex_state->guest_CC_DEP2 = 0; 852 vex_state->guest_CC_NDEP = 0; 853 } 854 855 856 /*---------------------------------------------------------------*/ 857 /*--- %rflags translation-time function specialisers. ---*/ 858 /*--- These help iropt specialise calls the above run-time ---*/ 859 /*--- %rflags functions. ---*/ 860 /*---------------------------------------------------------------*/ 861 862 /* Used by the optimiser to try specialisations. Returns an 863 equivalent expression, or NULL if none. */ 864 865 static Bool isU64 ( IRExpr* e, ULong n ) 866 { 867 return toBool( e->tag == Iex_Const 868 && e->Iex.Const.con->tag == Ico_U64 869 && e->Iex.Const.con->Ico.U64 == n ); 870 } 871 872 IRExpr* guest_amd64_spechelper ( HChar* function_name, 873 IRExpr** args, 874 IRStmt** precedingStmts, 875 Int n_precedingStmts ) 876 { 877 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) 878 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) 879 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 880 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 881 882 Int i, arity = 0; 883 for (i = 0; args[i]; i++) 884 arity++; 885 # if 0 886 vex_printf("spec request:\n"); 887 vex_printf(" %s ", function_name); 888 for (i = 0; i < arity; i++) { 889 vex_printf(" "); 890 ppIRExpr(args[i]); 891 } 892 vex_printf("\n"); 893 # endif 894 895 /* --------- specialising "amd64g_calculate_condition" --------- */ 896 897 if (vex_streq(function_name, "amd64g_calculate_condition")) { 898 /* specialise calls to above "calculate condition" function */ 899 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; 900 vassert(arity == 5); 901 cond = args[0]; 902 cc_op = args[1]; 903 cc_dep1 = args[2]; 904 cc_dep2 = args[3]; 905 906 /*---------------- ADDQ ----------------*/ 907 908 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { 909 /* long long add, then Z --> test (dst+src == 0) */ 910 return unop(Iop_1Uto64, 911 binop(Iop_CmpEQ64, 912 binop(Iop_Add64, cc_dep1, cc_dep2), 913 mkU64(0))); 914 } 915 916 /*---------------- SUBQ ----------------*/ 917 918 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { 919 /* long long sub/cmp, then Z --> test dst==src */ 920 return unop(Iop_1Uto64, 921 binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); 922 } 923 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { 924 /* long long sub/cmp, then NZ --> test dst!=src */ 925 return unop(Iop_1Uto64, 926 binop(Iop_CmpNE64,cc_dep1,cc_dep2)); 927 } 928 929 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { 930 /* long long sub/cmp, then L (signed less than) 931 --> test dst <s src */ 932 return unop(Iop_1Uto64, 933 binop(Iop_CmpLT64S, cc_dep1, cc_dep2)); 934 } 935 936 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) { 937 /* long long sub/cmp, then B (unsigned less than) 938 --> test dst <u src */ 939 return unop(Iop_1Uto64, 940 binop(Iop_CmpLT64U, cc_dep1, cc_dep2)); 941 } 942 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) { 943 /* long long sub/cmp, then NB (unsigned greater than or equal) 944 --> test src <=u dst */ 945 /* Note, args are opposite way round from the usual */ 946 return unop(Iop_1Uto64, 947 binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); 948 } 949 950 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { 951 /* long long sub/cmp, then BE (unsigned less than or equal) 952 --> test dst <=u src */ 953 return unop(Iop_1Uto64, 954 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); 955 } 956 957 /*---------------- SUBL ----------------*/ 958 959 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { 960 /* long sub/cmp, then Z --> test dst==src */ 961 return unop(Iop_1Uto64, 962 binop(Iop_CmpEQ64, 963 binop(Iop_Shl64,cc_dep1,mkU8(32)), 964 binop(Iop_Shl64,cc_dep2,mkU8(32)))); 965 } 966 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { 967 /* long sub/cmp, then NZ --> test dst!=src */ 968 return unop(Iop_1Uto64, 969 binop(Iop_CmpNE64, 970 binop(Iop_Shl64,cc_dep1,mkU8(32)), 971 binop(Iop_Shl64,cc_dep2,mkU8(32)))); 972 } 973 974 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { 975 /* long sub/cmp, then L (signed less than) 976 --> test dst <s src */ 977 return unop(Iop_1Uto64, 978 binop(Iop_CmpLT64S, 979 binop(Iop_Shl64,cc_dep1,mkU8(32)), 980 binop(Iop_Shl64,cc_dep2,mkU8(32)))); 981 } 982 983 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { 984 /* long sub/cmp, then LE (signed less than or equal) 985 --> test dst <=s src */ 986 return unop(Iop_1Uto64, 987 binop(Iop_CmpLE64S, 988 binop(Iop_Shl64,cc_dep1,mkU8(32)), 989 binop(Iop_Shl64,cc_dep2,mkU8(32)))); 990 991 } 992 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) { 993 /* long sub/cmp, then NLE (signed greater than) 994 --> test !(dst <=s src) 995 --> test (dst >s src) 996 --> test (src <s dst) */ 997 return unop(Iop_1Uto64, 998 binop(Iop_CmpLT64S, 999 binop(Iop_Shl64,cc_dep2,mkU8(32)), 1000 binop(Iop_Shl64,cc_dep1,mkU8(32)))); 1001 1002 } 1003 1004 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { 1005 /* long sub/cmp, then BE (unsigned less than or equal) 1006 --> test dst <=u src */ 1007 return unop(Iop_1Uto64, 1008 binop(Iop_CmpLE64U, 1009 binop(Iop_Shl64,cc_dep1,mkU8(32)), 1010 binop(Iop_Shl64,cc_dep2,mkU8(32)))); 1011 } 1012 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { 1013 /* long sub/cmp, then NBE (unsigned greater than) 1014 --> test src <u dst */ 1015 /* Note, args are opposite way round from the usual */ 1016 return unop(Iop_1Uto64, 1017 binop(Iop_CmpLT64U, 1018 binop(Iop_Shl64,cc_dep2,mkU8(32)), 1019 binop(Iop_Shl64,cc_dep1,mkU8(32)))); 1020 } 1021 1022 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) { 1023 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */ 1024 return unop(Iop_1Uto64, 1025 binop(Iop_CmpLT64S, 1026 binop(Iop_Sub64, 1027 binop(Iop_Shl64, cc_dep1, mkU8(32)), 1028 binop(Iop_Shl64, cc_dep2, mkU8(32))), 1029 mkU64(0))); 1030 } 1031 1032 /*---------------- SUBW ----------------*/ 1033 1034 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { 1035 /* word sub/cmp, then Z --> test dst==src */ 1036 return unop(Iop_1Uto64, 1037 binop(Iop_CmpEQ16, 1038 unop(Iop_64to16,cc_dep1), 1039 unop(Iop_64to16,cc_dep2))); 1040 } 1041 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { 1042 /* word sub/cmp, then NZ --> test dst!=src */ 1043 return unop(Iop_1Uto64, 1044 binop(Iop_CmpNE16, 1045 unop(Iop_64to16,cc_dep1), 1046 unop(Iop_64to16,cc_dep2))); 1047 } 1048 1049 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { 1050 /* word sub/cmp, then LE (signed less than or equal) 1051 --> test dst <=s src */ 1052 return unop(Iop_1Uto64, 1053 binop(Iop_CmpLE64S, 1054 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1055 binop(Iop_Shl64,cc_dep2,mkU8(48)))); 1056 1057 } 1058 1059 /*---------------- SUBB ----------------*/ 1060 1061 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { 1062 /* byte sub/cmp, then Z --> test dst==src */ 1063 return unop(Iop_1Uto64, 1064 binop(Iop_CmpEQ8, 1065 unop(Iop_64to8,cc_dep1), 1066 unop(Iop_64to8,cc_dep2))); 1067 } 1068 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { 1069 /* byte sub/cmp, then NZ --> test dst!=src */ 1070 return unop(Iop_1Uto64, 1071 binop(Iop_CmpNE8, 1072 unop(Iop_64to8,cc_dep1), 1073 unop(Iop_64to8,cc_dep2))); 1074 } 1075 1076 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) 1077 && isU64(cc_dep2, 0)) { 1078 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) 1079 --> test dst <s 0 1080 --> (ULong)dst[7] 1081 This is yet another scheme by which gcc figures out if the 1082 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ 1083 /* Note: isU64(cc_dep2, 0) is correct, even though this is 1084 for an 8-bit comparison, since the args to the helper 1085 function are always U64s. */ 1086 return binop(Iop_And64, 1087 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1088 mkU64(1)); 1089 } 1090 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) 1091 && isU64(cc_dep2, 0)) { 1092 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) 1093 --> test !(dst <s 0) 1094 --> (ULong) !dst[7] 1095 */ 1096 return binop(Iop_Xor64, 1097 binop(Iop_And64, 1098 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1099 mkU64(1)), 1100 mkU64(1)); 1101 } 1102 1103 /*---------------- LOGICQ ----------------*/ 1104 1105 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { 1106 /* long long and/or/xor, then Z --> test dst==0 */ 1107 return unop(Iop_1Uto64, 1108 binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); 1109 } 1110 1111 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { 1112 /* long long and/or/xor, then L 1113 LOGIC sets SF and ZF according to the 1114 result and makes OF be zero. L computes SF ^ OF, but 1115 OF is zero, so this reduces to SF -- which will be 1 iff 1116 the result is < signed 0. Hence ... 1117 */ 1118 return unop(Iop_1Uto64, 1119 binop(Iop_CmpLT64S, 1120 cc_dep1, 1121 mkU64(0))); 1122 } 1123 1124 /*---------------- LOGICL ----------------*/ 1125 1126 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { 1127 /* long and/or/xor, then Z --> test dst==0 */ 1128 return unop(Iop_1Uto64, 1129 binop(Iop_CmpEQ64, 1130 binop(Iop_Shl64,cc_dep1,mkU8(32)), 1131 mkU64(0))); 1132 } 1133 1134 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { 1135 /* long and/or/xor, then NZ --> test dst!=0 */ 1136 return unop(Iop_1Uto64, 1137 binop(Iop_CmpNE64, 1138 binop(Iop_Shl64,cc_dep1,mkU8(32)), 1139 mkU64(0))); 1140 } 1141 1142 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { 1143 /* long and/or/xor, then LE 1144 This is pretty subtle. LOGIC sets SF and ZF according to the 1145 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but 1146 OF is zero, so this reduces to SF | ZF -- which will be 1 iff 1147 the result is <=signed 0. Hence ... 1148 */ 1149 return unop(Iop_1Uto64, 1150 binop(Iop_CmpLE64S, 1151 binop(Iop_Shl64,cc_dep1,mkU8(32)), 1152 mkU64(0))); 1153 } 1154 1155 /*---------------- LOGICB ----------------*/ 1156 1157 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { 1158 /* byte and/or/xor, then Z --> test dst==0 */ 1159 return unop(Iop_1Uto64, 1160 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), 1161 mkU64(0))); 1162 } 1163 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) { 1164 /* byte and/or/xor, then NZ --> test dst!=0 */ 1165 return unop(Iop_1Uto64, 1166 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)), 1167 mkU64(0))); 1168 } 1169 1170 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { 1171 /* this is an idiom gcc sometimes uses to find out if the top 1172 bit of a byte register is set: eg testb %al,%al; js .. 1173 Since it just depends on the top bit of the byte, extract 1174 that bit and explicitly get rid of all the rest. This 1175 helps memcheck avoid false positives in the case where any 1176 of the other bits in the byte are undefined. */ 1177 /* byte and/or/xor, then S --> (UInt)result[7] */ 1178 return binop(Iop_And64, 1179 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1180 mkU64(1)); 1181 } 1182 1183 /*---------------- INCB ----------------*/ 1184 1185 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { 1186 /* 8-bit inc, then LE --> sign bit of the arg */ 1187 return binop(Iop_And64, 1188 binop(Iop_Shr64, 1189 binop(Iop_Sub64, cc_dep1, mkU64(1)), 1190 mkU8(7)), 1191 mkU64(1)); 1192 } 1193 1194 /*---------------- INCW ----------------*/ 1195 1196 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { 1197 /* 16-bit inc, then Z --> test dst == 0 */ 1198 return unop(Iop_1Uto64, 1199 binop(Iop_CmpEQ64, 1200 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1201 mkU64(0))); 1202 } 1203 1204 /*---------------- DECL ----------------*/ 1205 1206 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { 1207 /* dec L, then Z --> test dst == 0 */ 1208 return unop(Iop_1Uto64, 1209 binop(Iop_CmpEQ64, 1210 binop(Iop_Shl64,cc_dep1,mkU8(32)), 1211 mkU64(0))); 1212 } 1213 1214 /*---------------- DECW ----------------*/ 1215 1216 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { 1217 /* 16-bit dec, then NZ --> test dst != 0 */ 1218 return unop(Iop_1Uto64, 1219 binop(Iop_CmpNE64, 1220 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1221 mkU64(0))); 1222 } 1223 1224 /*---------------- COPY ----------------*/ 1225 /* This can happen, as a result of amd64 FP compares: "comisd ... ; 1226 jbe" for example. */ 1227 1228 if (isU64(cc_op, AMD64G_CC_OP_COPY) && 1229 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { 1230 /* COPY, then BE --> extract C and Z from dep1, and test (C 1231 or Z == 1). */ 1232 /* COPY, then NBE --> extract C and Z from dep1, and test (C 1233 or Z == 0). */ 1234 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; 1235 return 1236 unop( 1237 Iop_1Uto64, 1238 binop( 1239 Iop_CmpEQ64, 1240 binop( 1241 Iop_And64, 1242 binop( 1243 Iop_Or64, 1244 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1245 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) 1246 ), 1247 mkU64(1) 1248 ), 1249 mkU64(nnn) 1250 ) 1251 ); 1252 } 1253 1254 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { 1255 /* COPY, then B --> extract C dep1, and test (C == 1). */ 1256 return 1257 unop( 1258 Iop_1Uto64, 1259 binop( 1260 Iop_CmpNE64, 1261 binop( 1262 Iop_And64, 1263 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1264 mkU64(1) 1265 ), 1266 mkU64(0) 1267 ) 1268 ); 1269 } 1270 1271 if (isU64(cc_op, AMD64G_CC_OP_COPY) 1272 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { 1273 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ 1274 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ 1275 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; 1276 return 1277 unop( 1278 Iop_1Uto64, 1279 binop( 1280 Iop_CmpEQ64, 1281 binop( 1282 Iop_And64, 1283 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), 1284 mkU64(1) 1285 ), 1286 mkU64(nnn) 1287 ) 1288 ); 1289 } 1290 1291 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { 1292 /* COPY, then P --> extract P from dep1, and test (P == 1). */ 1293 return 1294 unop( 1295 Iop_1Uto64, 1296 binop( 1297 Iop_CmpNE64, 1298 binop( 1299 Iop_And64, 1300 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), 1301 mkU64(1) 1302 ), 1303 mkU64(0) 1304 ) 1305 ); 1306 } 1307 1308 return NULL; 1309 } 1310 1311 /* --------- specialising "amd64g_calculate_rflags_c" --------- */ 1312 1313 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { 1314 /* specialise calls to above "calculate_rflags_c" function */ 1315 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; 1316 vassert(arity == 4); 1317 cc_op = args[0]; 1318 cc_dep1 = args[1]; 1319 cc_dep2 = args[2]; 1320 cc_ndep = args[3]; 1321 1322 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { 1323 /* C after sub denotes unsigned less than */ 1324 return unop(Iop_1Uto64, 1325 binop(Iop_CmpLT64U, 1326 cc_dep1, 1327 cc_dep2)); 1328 } 1329 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { 1330 /* C after sub denotes unsigned less than */ 1331 return unop(Iop_1Uto64, 1332 binop(Iop_CmpLT64U, 1333 binop(Iop_Shl64,cc_dep1,mkU8(32)), 1334 binop(Iop_Shl64,cc_dep2,mkU8(32)))); 1335 } 1336 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { 1337 /* C after sub denotes unsigned less than */ 1338 return unop(Iop_1Uto64, 1339 binop(Iop_CmpLT64U, 1340 binop(Iop_And64,cc_dep1,mkU64(0xFF)), 1341 binop(Iop_And64,cc_dep2,mkU64(0xFF)))); 1342 } 1343 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) 1344 || isU64(cc_op, AMD64G_CC_OP_LOGICL) 1345 || isU64(cc_op, AMD64G_CC_OP_LOGICW) 1346 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { 1347 /* cflag after logic is zero */ 1348 return mkU64(0); 1349 } 1350 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) 1351 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { 1352 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ 1353 return cc_ndep; 1354 } 1355 1356 # if 0 1357 if (cc_op->tag == Iex_Const) { 1358 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); 1359 } 1360 # endif 1361 1362 return NULL; 1363 } 1364 1365 # undef unop 1366 # undef binop 1367 # undef mkU64 1368 # undef mkU8 1369 1370 return NULL; 1371 } 1372 1373 1374 /*---------------------------------------------------------------*/ 1375 /*--- Supporting functions for x87 FPU activities. ---*/ 1376 /*---------------------------------------------------------------*/ 1377 1378 static inline Bool host_is_little_endian ( void ) 1379 { 1380 UInt x = 0x76543210; 1381 UChar* p = (UChar*)(&x); 1382 return toBool(*p == 0x10); 1383 } 1384 1385 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ 1386 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1387 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) 1388 { 1389 Bool mantissaIsZero; 1390 Int bexp; 1391 UChar sign; 1392 UChar* f64; 1393 1394 vassert(host_is_little_endian()); 1395 1396 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ 1397 1398 f64 = (UChar*)(&dbl); 1399 sign = toUChar( (f64[7] >> 7) & 1 ); 1400 1401 /* First off, if the tag indicates the register was empty, 1402 return 1,0,sign,1 */ 1403 if (tag == 0) { 1404 /* vex_printf("Empty\n"); */ 1405 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) 1406 | AMD64G_FC_MASK_C0; 1407 } 1408 1409 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 1410 bexp &= 0x7FF; 1411 1412 mantissaIsZero 1413 = toBool( 1414 (f64[6] & 0x0F) == 0 1415 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 1416 ); 1417 1418 /* If both exponent and mantissa are zero, the value is zero. 1419 Return 1,0,sign,0. */ 1420 if (bexp == 0 && mantissaIsZero) { 1421 /* vex_printf("Zero\n"); */ 1422 return AMD64G_FC_MASK_C3 | 0 1423 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1424 } 1425 1426 /* If exponent is zero but mantissa isn't, it's a denormal. 1427 Return 1,1,sign,0. */ 1428 if (bexp == 0 && !mantissaIsZero) { 1429 /* vex_printf("Denormal\n"); */ 1430 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 1431 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1432 } 1433 1434 /* If the exponent is 7FF and the mantissa is zero, this is an infinity. 1435 Return 0,1,sign,1. */ 1436 if (bexp == 0x7FF && mantissaIsZero) { 1437 /* vex_printf("Inf\n"); */ 1438 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) 1439 | AMD64G_FC_MASK_C0; 1440 } 1441 1442 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. 1443 Return 0,0,sign,1. */ 1444 if (bexp == 0x7FF && !mantissaIsZero) { 1445 /* vex_printf("NaN\n"); */ 1446 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; 1447 } 1448 1449 /* Uh, ok, we give up. It must be a normal finite number. 1450 Return 0,1,sign,0. 1451 */ 1452 /* vex_printf("normal\n"); */ 1453 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1454 } 1455 1456 1457 /* This is used to implement both 'frstor' and 'fldenv'. The latter 1458 appears to differ from the former only in that the 8 FP registers 1459 themselves are not transferred into the guest state. */ 1460 static 1461 VexEmWarn do_put_x87 ( Bool moveRegs, 1462 /*IN*/UChar* x87_state, 1463 /*OUT*/VexGuestAMD64State* vex_state ) 1464 { 1465 Int stno, preg; 1466 UInt tag; 1467 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1468 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1469 Fpu_State* x87 = (Fpu_State*)x87_state; 1470 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; 1471 UInt tagw = x87->env[FP_ENV_TAG]; 1472 UInt fpucw = x87->env[FP_ENV_CTRL]; 1473 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; 1474 VexEmWarn ew; 1475 UInt fpround; 1476 ULong pair; 1477 1478 /* Copy registers and tags */ 1479 for (stno = 0; stno < 8; stno++) { 1480 preg = (stno + ftop) & 7; 1481 tag = (tagw >> (2*preg)) & 3; 1482 if (tag == 3) { 1483 /* register is empty */ 1484 /* hmm, if it's empty, does it still get written? Probably 1485 safer to say it does. If we don't, memcheck could get out 1486 of sync, in that it thinks all FP registers are defined by 1487 this helper, but in reality some have not been updated. */ 1488 if (moveRegs) 1489 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 1490 vexTags[preg] = 0; 1491 } else { 1492 /* register is non-empty */ 1493 if (moveRegs) 1494 convert_f80le_to_f64le( &x87->reg[10*stno], 1495 (UChar*)&vexRegs[preg] ); 1496 vexTags[preg] = 1; 1497 } 1498 } 1499 1500 /* stack pointer */ 1501 vex_state->guest_FTOP = ftop; 1502 1503 /* status word */ 1504 vex_state->guest_FC3210 = c3210; 1505 1506 /* handle the control word, setting FPROUND and detecting any 1507 emulation warnings. */ 1508 pair = amd64g_check_fldcw ( (ULong)fpucw ); 1509 fpround = (UInt)pair; 1510 ew = (VexEmWarn)(pair >> 32); 1511 1512 vex_state->guest_FPROUND = fpround & 3; 1513 1514 /* emulation warnings --> caller */ 1515 return ew; 1516 } 1517 1518 1519 /* Create an x87 FPU state from the guest state, as close as 1520 we can approximate it. */ 1521 static 1522 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, 1523 /*OUT*/UChar* x87_state ) 1524 { 1525 Int i, stno, preg; 1526 UInt tagw; 1527 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1528 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1529 Fpu_State* x87 = (Fpu_State*)x87_state; 1530 UInt ftop = vex_state->guest_FTOP; 1531 UInt c3210 = vex_state->guest_FC3210; 1532 1533 for (i = 0; i < 14; i++) 1534 x87->env[i] = 0; 1535 1536 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 1537 x87->env[FP_ENV_STAT] 1538 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 1539 x87->env[FP_ENV_CTRL] 1540 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); 1541 1542 /* Dump the register stack in ST order. */ 1543 tagw = 0; 1544 for (stno = 0; stno < 8; stno++) { 1545 preg = (stno + ftop) & 7; 1546 if (vexTags[preg] == 0) { 1547 /* register is empty */ 1548 tagw |= (3 << (2*preg)); 1549 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1550 &x87->reg[10*stno] ); 1551 } else { 1552 /* register is full. */ 1553 tagw |= (0 << (2*preg)); 1554 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1555 &x87->reg[10*stno] ); 1556 } 1557 } 1558 x87->env[FP_ENV_TAG] = toUShort(tagw); 1559 } 1560 1561 1562 /* CALLED FROM GENERATED CODE */ 1563 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1564 /* NOTE: only handles 32-bit format (no REX.W on the insn) */ 1565 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) 1566 { 1567 /* Derived from values obtained from 1568 vendor_id : AuthenticAMD 1569 cpu family : 15 1570 model : 12 1571 model name : AMD Athlon(tm) 64 Processor 3200+ 1572 stepping : 0 1573 cpu MHz : 2200.000 1574 cache size : 512 KB 1575 */ 1576 /* Somewhat roundabout, but at least it's simple. */ 1577 Fpu_State tmp; 1578 UShort* addrS = (UShort*)addr; 1579 UChar* addrC = (UChar*)addr; 1580 U128* xmm = (U128*)(addr + 160); 1581 UInt mxcsr; 1582 UShort fp_tags; 1583 UInt summary_tags; 1584 Int r, stno; 1585 UShort *srcS, *dstS; 1586 1587 do_get_x87( gst, (UChar*)&tmp ); 1588 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); 1589 1590 /* Now build the proper fxsave image from the x87 image we just 1591 made. */ 1592 1593 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ 1594 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ 1595 1596 /* set addrS[2] in an endian-independent way */ 1597 summary_tags = 0; 1598 fp_tags = tmp.env[FP_ENV_TAG]; 1599 for (r = 0; r < 8; r++) { 1600 if ( ((fp_tags >> (2*r)) & 3) != 3 ) 1601 summary_tags |= (1 << r); 1602 } 1603 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ 1604 addrC[5] = 0; /* pad */ 1605 1606 /* FOP: faulting fpu opcode. From experimentation, the real CPU 1607 does not write this field. (?!) */ 1608 addrS[3] = 0; /* BOGUS */ 1609 1610 /* RIP (Last x87 instruction pointer). From experimentation, the 1611 real CPU does not write this field. (?!) */ 1612 addrS[4] = 0; /* BOGUS */ 1613 addrS[5] = 0; /* BOGUS */ 1614 addrS[6] = 0; /* BOGUS */ 1615 addrS[7] = 0; /* BOGUS */ 1616 1617 /* RDP (Last x87 data pointer). From experimentation, the real CPU 1618 does not write this field. (?!) */ 1619 addrS[8] = 0; /* BOGUS */ 1620 addrS[9] = 0; /* BOGUS */ 1621 addrS[10] = 0; /* BOGUS */ 1622 addrS[11] = 0; /* BOGUS */ 1623 1624 addrS[12] = toUShort(mxcsr); /* MXCSR */ 1625 addrS[13] = toUShort(mxcsr >> 16); 1626 1627 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ 1628 addrS[15] = 0x0000; /* MXCSR mask (hi16) */ 1629 1630 /* Copy in the FP registers, in ST order. */ 1631 for (stno = 0; stno < 8; stno++) { 1632 srcS = (UShort*)(&tmp.reg[10*stno]); 1633 dstS = (UShort*)(&addrS[16 + 8*stno]); 1634 dstS[0] = srcS[0]; 1635 dstS[1] = srcS[1]; 1636 dstS[2] = srcS[2]; 1637 dstS[3] = srcS[3]; 1638 dstS[4] = srcS[4]; 1639 dstS[5] = 0; 1640 dstS[6] = 0; 1641 dstS[7] = 0; 1642 } 1643 1644 /* That's the first 160 bytes of the image done. Now only %xmm0 1645 .. %xmm15 remain to be copied. If the host is big-endian, these 1646 need to be byte-swapped. */ 1647 vassert(host_is_little_endian()); 1648 1649 # define COPY_U128(_dst,_src) \ 1650 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1651 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1652 while (0) 1653 1654 COPY_U128( xmm[0], gst->guest_XMM0 ); 1655 COPY_U128( xmm[1], gst->guest_XMM1 ); 1656 COPY_U128( xmm[2], gst->guest_XMM2 ); 1657 COPY_U128( xmm[3], gst->guest_XMM3 ); 1658 COPY_U128( xmm[4], gst->guest_XMM4 ); 1659 COPY_U128( xmm[5], gst->guest_XMM5 ); 1660 COPY_U128( xmm[6], gst->guest_XMM6 ); 1661 COPY_U128( xmm[7], gst->guest_XMM7 ); 1662 COPY_U128( xmm[8], gst->guest_XMM8 ); 1663 COPY_U128( xmm[9], gst->guest_XMM9 ); 1664 COPY_U128( xmm[10], gst->guest_XMM10 ); 1665 COPY_U128( xmm[11], gst->guest_XMM11 ); 1666 COPY_U128( xmm[12], gst->guest_XMM12 ); 1667 COPY_U128( xmm[13], gst->guest_XMM13 ); 1668 COPY_U128( xmm[14], gst->guest_XMM14 ); 1669 COPY_U128( xmm[15], gst->guest_XMM15 ); 1670 1671 # undef COPY_U128 1672 } 1673 1674 1675 /* CALLED FROM GENERATED CODE */ 1676 /* DIRTY HELPER (writes guest state, reads guest mem) */ 1677 VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr ) 1678 { 1679 Fpu_State tmp; 1680 VexEmWarn warnX87 = EmWarn_NONE; 1681 VexEmWarn warnXMM = EmWarn_NONE; 1682 UShort* addrS = (UShort*)addr; 1683 UChar* addrC = (UChar*)addr; 1684 U128* xmm = (U128*)(addr + 160); 1685 UShort fp_tags; 1686 Int r, stno, i; 1687 1688 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need 1689 to be byte-swapped. */ 1690 vassert(host_is_little_endian()); 1691 1692 # define COPY_U128(_dst,_src) \ 1693 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 1694 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 1695 while (0) 1696 1697 COPY_U128( gst->guest_XMM0, xmm[0] ); 1698 COPY_U128( gst->guest_XMM1, xmm[1] ); 1699 COPY_U128( gst->guest_XMM2, xmm[2] ); 1700 COPY_U128( gst->guest_XMM3, xmm[3] ); 1701 COPY_U128( gst->guest_XMM4, xmm[4] ); 1702 COPY_U128( gst->guest_XMM5, xmm[5] ); 1703 COPY_U128( gst->guest_XMM6, xmm[6] ); 1704 COPY_U128( gst->guest_XMM7, xmm[7] ); 1705 COPY_U128( gst->guest_XMM8, xmm[8] ); 1706 COPY_U128( gst->guest_XMM9, xmm[9] ); 1707 COPY_U128( gst->guest_XMM10, xmm[10] ); 1708 COPY_U128( gst->guest_XMM11, xmm[11] ); 1709 COPY_U128( gst->guest_XMM12, xmm[12] ); 1710 COPY_U128( gst->guest_XMM13, xmm[13] ); 1711 COPY_U128( gst->guest_XMM14, xmm[14] ); 1712 COPY_U128( gst->guest_XMM15, xmm[15] ); 1713 1714 # undef COPY_U128 1715 1716 /* Copy the x87 registers out of the image, into a temporary 1717 Fpu_State struct. */ 1718 for (i = 0; i < 14; i++) tmp.env[i] = 0; 1719 for (i = 0; i < 80; i++) tmp.reg[i] = 0; 1720 /* fill in tmp.reg[0..7] */ 1721 for (stno = 0; stno < 8; stno++) { 1722 UShort* dstS = (UShort*)(&tmp.reg[10*stno]); 1723 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); 1724 dstS[0] = srcS[0]; 1725 dstS[1] = srcS[1]; 1726 dstS[2] = srcS[2]; 1727 dstS[3] = srcS[3]; 1728 dstS[4] = srcS[4]; 1729 } 1730 /* fill in tmp.env[0..13] */ 1731 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ 1732 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ 1733 1734 fp_tags = 0; 1735 for (r = 0; r < 8; r++) { 1736 if (addrC[4] & (1<<r)) 1737 fp_tags |= (0 << (2*r)); /* EMPTY */ 1738 else 1739 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ 1740 } 1741 tmp.env[FP_ENV_TAG] = fp_tags; 1742 1743 /* Now write 'tmp' into the guest state. */ 1744 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst ); 1745 1746 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF) 1747 | ((((UInt)addrS[13]) & 0xFFFF) << 16); 1748 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 ); 1749 1750 warnXMM = (VexEmWarn)(w64 >> 32); 1751 1752 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; 1753 } 1754 1755 /* Prefer an X87 emwarn over an XMM one, if both exist. */ 1756 if (warnX87 != EmWarn_NONE) 1757 return warnX87; 1758 else 1759 return warnXMM; 1760 } 1761 1762 1763 /* DIRTY HELPER (writes guest state) */ 1764 /* Initialise the x87 FPU state as per 'finit'. */ 1765 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) 1766 { 1767 Int i; 1768 gst->guest_FTOP = 0; 1769 for (i = 0; i < 8; i++) { 1770 gst->guest_FPTAG[i] = 0; /* empty */ 1771 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ 1772 } 1773 gst->guest_FPROUND = (ULong)Irrm_NEAREST; 1774 gst->guest_FC3210 = 0; 1775 } 1776 1777 1778 /* CALLED FROM GENERATED CODE */ 1779 /* DIRTY HELPER (reads guest memory) */ 1780 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU ) 1781 { 1782 ULong f64; 1783 convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 ); 1784 return f64; 1785 } 1786 1787 /* CALLED FROM GENERATED CODE */ 1788 /* DIRTY HELPER (writes guest memory) */ 1789 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 ) 1790 { 1791 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) ); 1792 } 1793 1794 1795 /* CALLED FROM GENERATED CODE */ 1796 /* CLEAN HELPER */ 1797 /* mxcsr[15:0] contains a SSE native format MXCSR value. 1798 Extract from it the required SSEROUND value and any resulting 1799 emulation warning, and return (warn << 32) | sseround value. 1800 */ 1801 ULong amd64g_check_ldmxcsr ( ULong mxcsr ) 1802 { 1803 /* Decide on a rounding mode. mxcsr[14:13] holds it. */ 1804 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1805 ULong rmode = (mxcsr >> 13) & 3; 1806 1807 /* Detect any required emulation warnings. */ 1808 VexEmWarn ew = EmWarn_NONE; 1809 1810 if ((mxcsr & 0x1F80) != 0x1F80) { 1811 /* unmasked exceptions! */ 1812 ew = EmWarn_X86_sseExns; 1813 } 1814 else 1815 if (mxcsr & (1<<15)) { 1816 /* FZ is set */ 1817 ew = EmWarn_X86_fz; 1818 } 1819 else 1820 if (mxcsr & (1<<6)) { 1821 /* DAZ is set */ 1822 ew = EmWarn_X86_daz; 1823 } 1824 1825 return (((ULong)ew) << 32) | ((ULong)rmode); 1826 } 1827 1828 1829 /* CALLED FROM GENERATED CODE */ 1830 /* CLEAN HELPER */ 1831 /* Given sseround as an IRRoundingMode value, create a suitable SSE 1832 native format MXCSR value. */ 1833 ULong amd64g_create_mxcsr ( ULong sseround ) 1834 { 1835 sseround &= 3; 1836 return 0x1F80 | (sseround << 13); 1837 } 1838 1839 1840 /* CLEAN HELPER */ 1841 /* fpucw[15:0] contains a x87 native format FPU control word. 1842 Extract from it the required FPROUND value and any resulting 1843 emulation warning, and return (warn << 32) | fpround value. 1844 */ 1845 ULong amd64g_check_fldcw ( ULong fpucw ) 1846 { 1847 /* Decide on a rounding mode. fpucw[11:10] holds it. */ 1848 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 1849 ULong rmode = (fpucw >> 10) & 3; 1850 1851 /* Detect any required emulation warnings. */ 1852 VexEmWarn ew = EmWarn_NONE; 1853 1854 if ((fpucw & 0x3F) != 0x3F) { 1855 /* unmasked exceptions! */ 1856 ew = EmWarn_X86_x87exns; 1857 } 1858 else 1859 if (((fpucw >> 8) & 3) != 3) { 1860 /* unsupported precision */ 1861 ew = EmWarn_X86_x87precision; 1862 } 1863 1864 return (((ULong)ew) << 32) | ((ULong)rmode); 1865 } 1866 1867 1868 /* CLEAN HELPER */ 1869 /* Given fpround as an IRRoundingMode value, create a suitable x87 1870 native format FPU control word. */ 1871 ULong amd64g_create_fpucw ( ULong fpround ) 1872 { 1873 fpround &= 3; 1874 return 0x037F | (fpround << 10); 1875 } 1876 1877 1878 /* This is used to implement 'fldenv'. 1879 Reads 28 bytes at x87_state[0 .. 27]. */ 1880 /* CALLED FROM GENERATED CODE */ 1881 /* DIRTY HELPER */ 1882 VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, 1883 /*IN*/HWord x87_state) 1884 { 1885 Int stno, preg; 1886 UInt tag; 1887 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1888 Fpu_State* x87 = (Fpu_State*)x87_state; 1889 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; 1890 UInt tagw = x87->env[FP_ENV_TAG]; 1891 UInt fpucw = x87->env[FP_ENV_CTRL]; 1892 ULong c3210 = x87->env[FP_ENV_STAT] & 0x4700; 1893 VexEmWarn ew; 1894 ULong fpround; 1895 ULong pair; 1896 1897 /* Copy tags */ 1898 for (stno = 0; stno < 8; stno++) { 1899 preg = (stno + ftop) & 7; 1900 tag = (tagw >> (2*preg)) & 3; 1901 if (tag == 3) { 1902 /* register is empty */ 1903 vexTags[preg] = 0; 1904 } else { 1905 /* register is non-empty */ 1906 vexTags[preg] = 1; 1907 } 1908 } 1909 1910 /* stack pointer */ 1911 vex_state->guest_FTOP = ftop; 1912 1913 /* status word */ 1914 vex_state->guest_FC3210 = c3210; 1915 1916 /* handle the control word, setting FPROUND and detecting any 1917 emulation warnings. */ 1918 pair = amd64g_check_fldcw ( (ULong)fpucw ); 1919 fpround = pair & 0xFFFFFFFFULL; 1920 ew = (VexEmWarn)(pair >> 32); 1921 1922 vex_state->guest_FPROUND = fpround & 3; 1923 1924 /* emulation warnings --> caller */ 1925 return ew; 1926 } 1927 1928 1929 /* CALLED FROM GENERATED CODE */ 1930 /* DIRTY HELPER */ 1931 /* Create an x87 FPU env from the guest state, as close as we can 1932 approximate it. Writes 28 bytes at x87_state[0..27]. */ 1933 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, 1934 /*OUT*/HWord x87_state ) 1935 { 1936 Int i, stno, preg; 1937 UInt tagw; 1938 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1939 Fpu_State* x87 = (Fpu_State*)x87_state; 1940 UInt ftop = vex_state->guest_FTOP; 1941 ULong c3210 = vex_state->guest_FC3210; 1942 1943 for (i = 0; i < 14; i++) 1944 x87->env[i] = 0; 1945 1946 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 1947 x87->env[FP_ENV_STAT] 1948 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); 1949 x87->env[FP_ENV_CTRL] 1950 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); 1951 1952 /* Compute the x87 tag word. */ 1953 tagw = 0; 1954 for (stno = 0; stno < 8; stno++) { 1955 preg = (stno + ftop) & 7; 1956 if (vexTags[preg] == 0) { 1957 /* register is empty */ 1958 tagw |= (3 << (2*preg)); 1959 } else { 1960 /* register is full. */ 1961 tagw |= (0 << (2*preg)); 1962 } 1963 } 1964 x87->env[FP_ENV_TAG] = toUShort(tagw); 1965 1966 /* We don't dump the x87 registers, tho. */ 1967 } 1968 1969 1970 /*---------------------------------------------------------------*/ 1971 /*--- Misc integer helpers, including rotates and CPUID. ---*/ 1972 /*---------------------------------------------------------------*/ 1973 1974 /* Claim to be the following CPU, which is probably representative of 1975 the lowliest (earliest) amd64 offerings. It can do neither sse3 1976 nor cx16. 1977 1978 vendor_id : AuthenticAMD 1979 cpu family : 15 1980 model : 5 1981 model name : AMD Opteron (tm) Processor 848 1982 stepping : 10 1983 cpu MHz : 1797.682 1984 cache size : 1024 KB 1985 fpu : yes 1986 fpu_exception : yes 1987 cpuid level : 1 1988 wp : yes 1989 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 1990 mtrr pge mca cmov pat pse36 clflush mmx fxsr 1991 sse sse2 syscall nx mmxext lm 3dnowext 3dnow 1992 bogomips : 3600.62 1993 TLB size : 1088 4K pages 1994 clflush size : 64 1995 cache_alignment : 64 1996 address sizes : 40 bits physical, 48 bits virtual 1997 power management: ts fid vid ttp 1998 */ 1999 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) 2000 { 2001 # define SET_ABCD(_a,_b,_c,_d) \ 2002 do { st->guest_RAX = (ULong)(_a); \ 2003 st->guest_RBX = (ULong)(_b); \ 2004 st->guest_RCX = (ULong)(_c); \ 2005 st->guest_RDX = (ULong)(_d); \ 2006 } while (0) 2007 2008 switch (0xFFFFFFFF & st->guest_RAX) { 2009 case 0x00000000: 2010 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); 2011 break; 2012 case 0x00000001: 2013 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); 2014 break; 2015 case 0x80000000: 2016 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); 2017 break; 2018 case 0x80000001: 2019 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff); 2020 break; 2021 case 0x80000002: 2022 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); 2023 break; 2024 case 0x80000003: 2025 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); 2026 break; 2027 case 0x80000004: 2028 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2029 break; 2030 case 0x80000005: 2031 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); 2032 break; 2033 case 0x80000006: 2034 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); 2035 break; 2036 case 0x80000007: 2037 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); 2038 break; 2039 case 0x80000008: 2040 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); 2041 break; 2042 default: 2043 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2044 break; 2045 } 2046 # undef SET_ABCD 2047 } 2048 2049 2050 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 2051 capable. 2052 2053 vendor_id : GenuineIntel 2054 cpu family : 6 2055 model : 15 2056 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz 2057 stepping : 6 2058 cpu MHz : 2394.000 2059 cache size : 4096 KB 2060 physical id : 0 2061 siblings : 2 2062 core id : 0 2063 cpu cores : 2 2064 fpu : yes 2065 fpu_exception : yes 2066 cpuid level : 10 2067 wp : yes 2068 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2069 mtrr pge mca cmov pat pse36 clflush dts acpi 2070 mmx fxsr sse sse2 ss ht tm syscall nx lm 2071 constant_tsc pni monitor ds_cpl vmx est tm2 2072 cx16 xtpr lahf_lm 2073 bogomips : 4798.78 2074 clflush size : 64 2075 cache_alignment : 64 2076 address sizes : 36 bits physical, 48 bits virtual 2077 power management: 2078 */ 2079 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) 2080 { 2081 # define SET_ABCD(_a,_b,_c,_d) \ 2082 do { st->guest_RAX = (ULong)(_a); \ 2083 st->guest_RBX = (ULong)(_b); \ 2084 st->guest_RCX = (ULong)(_c); \ 2085 st->guest_RDX = (ULong)(_d); \ 2086 } while (0) 2087 2088 switch (0xFFFFFFFF & st->guest_RAX) { 2089 case 0x00000000: 2090 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); 2091 break; 2092 case 0x00000001: 2093 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); 2094 break; 2095 case 0x00000002: 2096 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); 2097 break; 2098 case 0x00000003: 2099 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2100 break; 2101 case 0x00000004: { 2102 switch (0xFFFFFFFF & st->guest_RCX) { 2103 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, 2104 0x0000003f, 0x00000001); break; 2105 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, 2106 0x0000003f, 0x00000001); break; 2107 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, 2108 0x00000fff, 0x00000001); break; 2109 default: SET_ABCD(0x00000000, 0x00000000, 2110 0x00000000, 0x00000000); break; 2111 } 2112 break; 2113 } 2114 case 0x00000005: 2115 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); 2116 break; 2117 case 0x00000006: 2118 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); 2119 break; 2120 case 0x00000007: 2121 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2122 break; 2123 case 0x00000008: 2124 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); 2125 break; 2126 case 0x00000009: 2127 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2128 break; 2129 case 0x0000000a: 2130 unhandled_eax_value: 2131 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); 2132 break; 2133 case 0x80000000: 2134 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2135 break; 2136 case 0x80000001: 2137 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); 2138 break; 2139 case 0x80000002: 2140 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2141 break; 2142 case 0x80000003: 2143 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); 2144 break; 2145 case 0x80000004: 2146 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); 2147 break; 2148 case 0x80000005: 2149 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2150 break; 2151 case 0x80000006: 2152 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); 2153 break; 2154 case 0x80000007: 2155 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2156 break; 2157 case 0x80000008: 2158 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2159 break; 2160 default: 2161 goto unhandled_eax_value; 2162 } 2163 # undef SET_ABCD 2164 } 2165 2166 2167 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16 2168 capable. 2169 2170 vendor_id : GenuineIntel 2171 cpu family : 6 2172 model : 37 2173 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz 2174 stepping : 2 2175 cpu MHz : 3334.000 2176 cache size : 4096 KB 2177 physical id : 0 2178 siblings : 4 2179 core id : 0 2180 cpu cores : 2 2181 apicid : 0 2182 initial apicid : 0 2183 fpu : yes 2184 fpu_exception : yes 2185 cpuid level : 11 2186 wp : yes 2187 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2188 mtrr pge mca cmov pat pse36 clflush dts acpi 2189 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp 2190 lm constant_tsc arch_perfmon pebs bts rep_good 2191 xtopology nonstop_tsc aperfmperf pni pclmulqdq 2192 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 2193 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida 2194 arat tpr_shadow vnmi flexpriority ept vpid 2195 MINUS aes (see below) 2196 bogomips : 6957.57 2197 clflush size : 64 2198 cache_alignment : 64 2199 address sizes : 36 bits physical, 48 bits virtual 2200 power management: 2201 */ 2202 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ) 2203 { 2204 # define SET_ABCD(_a,_b,_c,_d) \ 2205 do { st->guest_RAX = (ULong)(_a); \ 2206 st->guest_RBX = (ULong)(_b); \ 2207 st->guest_RCX = (ULong)(_c); \ 2208 st->guest_RDX = (ULong)(_d); \ 2209 } while (0) 2210 2211 UInt old_eax = (UInt)st->guest_RAX; 2212 UInt old_ecx = (UInt)st->guest_RCX; 2213 2214 switch (old_eax) { 2215 case 0x00000000: 2216 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69); 2217 break; 2218 case 0x00000001: 2219 // & ~(1<<25): don't claim to support AES insns. See 2220 // bug 249991. 2221 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25), 2222 0xbfebfbff); 2223 break; 2224 case 0x00000002: 2225 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c); 2226 break; 2227 case 0x00000003: 2228 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2229 break; 2230 case 0x00000004: 2231 switch (old_ecx) { 2232 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, 2233 0x0000003f, 0x00000000); break; 2234 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f, 2235 0x0000007f, 0x00000000); break; 2236 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, 2237 0x000001ff, 0x00000000); break; 2238 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, 2239 0x00000fff, 0x00000002); break; 2240 default: SET_ABCD(0x00000000, 0x00000000, 2241 0x00000000, 0x00000000); break; 2242 } 2243 break; 2244 case 0x00000005: 2245 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); 2246 break; 2247 case 0x00000006: 2248 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000); 2249 break; 2250 case 0x00000007: 2251 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2252 break; 2253 case 0x00000008: 2254 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2255 break; 2256 case 0x00000009: 2257 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2258 break; 2259 case 0x0000000a: 2260 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603); 2261 break; 2262 case 0x0000000b: 2263 switch (old_ecx) { 2264 case 0x00000000: 2265 SET_ABCD(0x00000001, 0x00000002, 2266 0x00000100, 0x00000000); break; 2267 case 0x00000001: 2268 SET_ABCD(0x00000004, 0x00000004, 2269 0x00000201, 0x00000000); break; 2270 default: 2271 SET_ABCD(0x00000000, 0x00000000, 2272 old_ecx, 0x00000000); break; 2273 } 2274 break; 2275 case 0x0000000c: 2276 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2277 break; 2278 case 0x0000000d: 2279 switch (old_ecx) { 2280 case 0x00000000: SET_ABCD(0x00000001, 0x00000002, 2281 0x00000100, 0x00000000); break; 2282 case 0x00000001: SET_ABCD(0x00000004, 0x00000004, 2283 0x00000201, 0x00000000); break; 2284 default: SET_ABCD(0x00000000, 0x00000000, 2285 old_ecx, 0x00000000); break; 2286 } 2287 break; 2288 case 0x80000000: 2289 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2290 break; 2291 case 0x80000001: 2292 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); 2293 break; 2294 case 0x80000002: 2295 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2296 break; 2297 case 0x80000003: 2298 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020); 2299 break; 2300 case 0x80000004: 2301 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847); 2302 break; 2303 case 0x80000005: 2304 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2305 break; 2306 case 0x80000006: 2307 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); 2308 break; 2309 case 0x80000007: 2310 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); 2311 break; 2312 case 0x80000008: 2313 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2314 break; 2315 default: 2316 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2317 break; 2318 } 2319 # undef SET_ABCD 2320 } 2321 2322 2323 ULong amd64g_calculate_RCR ( ULong arg, 2324 ULong rot_amt, 2325 ULong rflags_in, 2326 Long szIN ) 2327 { 2328 Bool wantRflags = toBool(szIN < 0); 2329 ULong sz = wantRflags ? (-szIN) : szIN; 2330 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 2331 ULong cf=0, of=0, tempcf; 2332 2333 switch (sz) { 2334 case 8: 2335 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2336 of = ((arg >> 63) ^ cf) & 1; 2337 while (tempCOUNT > 0) { 2338 tempcf = arg & 1; 2339 arg = (arg >> 1) | (cf << 63); 2340 cf = tempcf; 2341 tempCOUNT--; 2342 } 2343 break; 2344 case 4: 2345 while (tempCOUNT >= 33) tempCOUNT -= 33; 2346 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2347 of = ((arg >> 31) ^ cf) & 1; 2348 while (tempCOUNT > 0) { 2349 tempcf = arg & 1; 2350 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); 2351 cf = tempcf; 2352 tempCOUNT--; 2353 } 2354 break; 2355 case 2: 2356 while (tempCOUNT >= 17) tempCOUNT -= 17; 2357 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2358 of = ((arg >> 15) ^ cf) & 1; 2359 while (tempCOUNT > 0) { 2360 tempcf = arg & 1; 2361 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); 2362 cf = tempcf; 2363 tempCOUNT--; 2364 } 2365 break; 2366 case 1: 2367 while (tempCOUNT >= 9) tempCOUNT -= 9; 2368 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2369 of = ((arg >> 7) ^ cf) & 1; 2370 while (tempCOUNT > 0) { 2371 tempcf = arg & 1; 2372 arg = ((arg >> 1) & 0x7FULL) | (cf << 7); 2373 cf = tempcf; 2374 tempCOUNT--; 2375 } 2376 break; 2377 default: 2378 vpanic("calculate_RCR(amd64g): invalid size"); 2379 } 2380 2381 cf &= 1; 2382 of &= 1; 2383 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 2384 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 2385 2386 /* caller can ask to have back either the resulting flags or 2387 resulting value, but not both */ 2388 return wantRflags ? rflags_in : arg; 2389 } 2390 2391 ULong amd64g_calculate_RCL ( ULong arg, 2392 ULong rot_amt, 2393 ULong rflags_in, 2394 Long szIN ) 2395 { 2396 Bool wantRflags = toBool(szIN < 0); 2397 ULong sz = wantRflags ? (-szIN) : szIN; 2398 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 2399 ULong cf=0, of=0, tempcf; 2400 2401 switch (sz) { 2402 case 8: 2403 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2404 while (tempCOUNT > 0) { 2405 tempcf = (arg >> 63) & 1; 2406 arg = (arg << 1) | (cf & 1); 2407 cf = tempcf; 2408 tempCOUNT--; 2409 } 2410 of = ((arg >> 63) ^ cf) & 1; 2411 break; 2412 case 4: 2413 while (tempCOUNT >= 33) tempCOUNT -= 33; 2414 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2415 while (tempCOUNT > 0) { 2416 tempcf = (arg >> 31) & 1; 2417 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); 2418 cf = tempcf; 2419 tempCOUNT--; 2420 } 2421 of = ((arg >> 31) ^ cf) & 1; 2422 break; 2423 case 2: 2424 while (tempCOUNT >= 17) tempCOUNT -= 17; 2425 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2426 while (tempCOUNT > 0) { 2427 tempcf = (arg >> 15) & 1; 2428 arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); 2429 cf = tempcf; 2430 tempCOUNT--; 2431 } 2432 of = ((arg >> 15) ^ cf) & 1; 2433 break; 2434 case 1: 2435 while (tempCOUNT >= 9) tempCOUNT -= 9; 2436 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2437 while (tempCOUNT > 0) { 2438 tempcf = (arg >> 7) & 1; 2439 arg = 0xFFULL & ((arg << 1) | (cf & 1)); 2440 cf = tempcf; 2441 tempCOUNT--; 2442 } 2443 of = ((arg >> 7) ^ cf) & 1; 2444 break; 2445 default: 2446 vpanic("calculate_RCL(amd64g): invalid size"); 2447 } 2448 2449 cf &= 1; 2450 of &= 1; 2451 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 2452 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 2453 2454 return wantRflags ? rflags_in : arg; 2455 } 2456 2457 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+) 2458 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25 2459 */ 2460 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which) 2461 { 2462 ULong hi, lo, tmp, A[16]; 2463 2464 A[0] = 0; A[1] = a; 2465 A[2] = A[1] << 1; A[3] = A[2] ^ a; 2466 A[4] = A[2] << 1; A[5] = A[4] ^ a; 2467 A[6] = A[3] << 1; A[7] = A[6] ^ a; 2468 A[8] = A[4] << 1; A[9] = A[8] ^ a; 2469 A[10] = A[5] << 1; A[11] = A[10] ^ a; 2470 A[12] = A[6] << 1; A[13] = A[12] ^ a; 2471 A[14] = A[7] << 1; A[15] = A[14] ^ a; 2472 2473 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15]; 2474 hi = lo >> 56; 2475 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15]; 2476 hi = (hi << 8) | (lo >> 56); 2477 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15]; 2478 hi = (hi << 8) | (lo >> 56); 2479 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15]; 2480 hi = (hi << 8) | (lo >> 56); 2481 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15]; 2482 hi = (hi << 8) | (lo >> 56); 2483 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15]; 2484 hi = (hi << 8) | (lo >> 56); 2485 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15]; 2486 hi = (hi << 8) | (lo >> 56); 2487 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15]; 2488 2489 ULong m0 = -1; 2490 m0 /= 255; 2491 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp; 2492 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp; 2493 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp; 2494 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp; 2495 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp; 2496 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp; 2497 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp; 2498 2499 return which ? hi : lo; 2500 } 2501 2502 2503 /* CALLED FROM GENERATED CODE */ 2504 /* DIRTY HELPER (non-referentially-transparent) */ 2505 /* Horrible hack. On non-amd64 platforms, return 1. */ 2506 ULong amd64g_dirtyhelper_RDTSC ( void ) 2507 { 2508 # if defined(__x86_64__) 2509 UInt eax, edx; 2510 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); 2511 return (((ULong)edx) << 32) | ((ULong)eax); 2512 # else 2513 return 1ULL; 2514 # endif 2515 } 2516 2517 2518 /* CALLED FROM GENERATED CODE */ 2519 /* DIRTY HELPER (non-referentially-transparent) */ 2520 /* Horrible hack. On non-amd64 platforms, return 0. */ 2521 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) 2522 { 2523 # if defined(__x86_64__) 2524 ULong r = 0; 2525 portno &= 0xFFFF; 2526 switch (sz) { 2527 case 4: 2528 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" 2529 : "=a" (r) : "Nd" (portno)); 2530 break; 2531 case 2: 2532 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" 2533 : "=a" (r) : "Nd" (portno)); 2534 break; 2535 case 1: 2536 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" 2537 : "=a" (r) : "Nd" (portno)); 2538 break; 2539 default: 2540 break; /* note: no 64-bit version of insn exists */ 2541 } 2542 return r; 2543 # else 2544 return 0; 2545 # endif 2546 } 2547 2548 2549 /* CALLED FROM GENERATED CODE */ 2550 /* DIRTY HELPER (non-referentially-transparent) */ 2551 /* Horrible hack. On non-amd64 platforms, do nothing. */ 2552 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) 2553 { 2554 # if defined(__x86_64__) 2555 portno &= 0xFFFF; 2556 switch (sz) { 2557 case 4: 2558 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" 2559 : : "a" (data), "Nd" (portno)); 2560 break; 2561 case 2: 2562 __asm__ __volatile__("outw %w0, %w1" 2563 : : "a" (data), "Nd" (portno)); 2564 break; 2565 case 1: 2566 __asm__ __volatile__("outb %b0, %w1" 2567 : : "a" (data), "Nd" (portno)); 2568 break; 2569 default: 2570 break; /* note: no 64-bit version of insn exists */ 2571 } 2572 # else 2573 /* do nothing */ 2574 # endif 2575 } 2576 2577 /* CALLED FROM GENERATED CODE */ 2578 /* DIRTY HELPER (non-referentially-transparent) */ 2579 /* Horrible hack. On non-amd64 platforms, do nothing. */ 2580 /* op = 0: call the native SGDT instruction. 2581 op = 1: call the native SIDT instruction. 2582 */ 2583 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) { 2584 # if defined(__x86_64__) 2585 switch (op) { 2586 case 0: 2587 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); 2588 break; 2589 case 1: 2590 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); 2591 break; 2592 default: 2593 vpanic("amd64g_dirtyhelper_SxDT"); 2594 } 2595 # else 2596 /* do nothing */ 2597 UChar* p = (UChar*)address; 2598 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; 2599 p[6] = p[7] = p[8] = p[9] = 0; 2600 # endif 2601 } 2602 2603 /*---------------------------------------------------------------*/ 2604 /*--- Helpers for MMX/SSE/SSE2. ---*/ 2605 /*---------------------------------------------------------------*/ 2606 2607 static inline UChar abdU8 ( UChar xx, UChar yy ) { 2608 return toUChar(xx>yy ? xx-yy : yy-xx); 2609 } 2610 2611 static inline ULong mk32x2 ( UInt w1, UInt w0 ) { 2612 return (((ULong)w1) << 32) | ((ULong)w0); 2613 } 2614 2615 static inline UShort sel16x4_3 ( ULong w64 ) { 2616 UInt hi32 = toUInt(w64 >> 32); 2617 return toUShort(hi32 >> 16); 2618 } 2619 static inline UShort sel16x4_2 ( ULong w64 ) { 2620 UInt hi32 = toUInt(w64 >> 32); 2621 return toUShort(hi32); 2622 } 2623 static inline UShort sel16x4_1 ( ULong w64 ) { 2624 UInt lo32 = toUInt(w64); 2625 return toUShort(lo32 >> 16); 2626 } 2627 static inline UShort sel16x4_0 ( ULong w64 ) { 2628 UInt lo32 = toUInt(w64); 2629 return toUShort(lo32); 2630 } 2631 2632 static inline UChar sel8x8_7 ( ULong w64 ) { 2633 UInt hi32 = toUInt(w64 >> 32); 2634 return toUChar(hi32 >> 24); 2635 } 2636 static inline UChar sel8x8_6 ( ULong w64 ) { 2637 UInt hi32 = toUInt(w64 >> 32); 2638 return toUChar(hi32 >> 16); 2639 } 2640 static inline UChar sel8x8_5 ( ULong w64 ) { 2641 UInt hi32 = toUInt(w64 >> 32); 2642 return toUChar(hi32 >> 8); 2643 } 2644 static inline UChar sel8x8_4 ( ULong w64 ) { 2645 UInt hi32 = toUInt(w64 >> 32); 2646 return toUChar(hi32 >> 0); 2647 } 2648 static inline UChar sel8x8_3 ( ULong w64 ) { 2649 UInt lo32 = toUInt(w64); 2650 return toUChar(lo32 >> 24); 2651 } 2652 static inline UChar sel8x8_2 ( ULong w64 ) { 2653 UInt lo32 = toUInt(w64); 2654 return toUChar(lo32 >> 16); 2655 } 2656 static inline UChar sel8x8_1 ( ULong w64 ) { 2657 UInt lo32 = toUInt(w64); 2658 return toUChar(lo32 >> 8); 2659 } 2660 static inline UChar sel8x8_0 ( ULong w64 ) { 2661 UInt lo32 = toUInt(w64); 2662 return toUChar(lo32 >> 0); 2663 } 2664 2665 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2666 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) 2667 { 2668 return 2669 mk32x2( 2670 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) 2671 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), 2672 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) 2673 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) 2674 ); 2675 } 2676 2677 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2678 ULong amd64g_calculate_mmx_pmovmskb ( ULong xx ) 2679 { 2680 ULong r = 0; 2681 if (xx & (1ULL << (64-1))) r |= (1<<7); 2682 if (xx & (1ULL << (56-1))) r |= (1<<6); 2683 if (xx & (1ULL << (48-1))) r |= (1<<5); 2684 if (xx & (1ULL << (40-1))) r |= (1<<4); 2685 if (xx & (1ULL << (32-1))) r |= (1<<3); 2686 if (xx & (1ULL << (24-1))) r |= (1<<2); 2687 if (xx & (1ULL << (16-1))) r |= (1<<1); 2688 if (xx & (1ULL << ( 8-1))) r |= (1<<0); 2689 return r; 2690 } 2691 2692 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2693 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) 2694 { 2695 UInt t = 0; 2696 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); 2697 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); 2698 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); 2699 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); 2700 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 2701 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 2702 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 2703 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 2704 t &= 0xFFFF; 2705 return (ULong)t; 2706 } 2707 2708 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2709 ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) 2710 { 2711 ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi ); 2712 ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo ); 2713 return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); 2714 } 2715 2716 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2717 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) 2718 { 2719 UInt i; 2720 ULong crc = (b & 0xFFULL) ^ crcIn; 2721 for (i = 0; i < 8; i++) 2722 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 2723 return crc; 2724 } 2725 2726 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2727 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) 2728 { 2729 UInt i; 2730 ULong crc = (w & 0xFFFFULL) ^ crcIn; 2731 for (i = 0; i < 16; i++) 2732 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 2733 return crc; 2734 } 2735 2736 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2737 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) 2738 { 2739 UInt i; 2740 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; 2741 for (i = 0; i < 32; i++) 2742 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 2743 return crc; 2744 } 2745 2746 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 2747 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) 2748 { 2749 ULong crc = amd64g_calc_crc32l(crcIn, q); 2750 return amd64g_calc_crc32l(crc, q >> 32); 2751 } 2752 2753 2754 /*---------------------------------------------------------------*/ 2755 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ 2756 /*---------------------------------------------------------------*/ 2757 2758 static UInt zmask_from_V128 ( V128* arg ) 2759 { 2760 UInt i, res = 0; 2761 for (i = 0; i < 16; i++) { 2762 res |= ((arg->w8[i] == 0) ? 1 : 0) << i; 2763 } 2764 return res; 2765 } 2766 2767 /* Helps with PCMP{I,E}STR{I,M}. 2768 2769 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really, 2770 actually it could be a clean helper, but for the fact that we can't 2771 pass by value 2 x V128 to a clean helper, nor have one returned.) 2772 Reads guest state, writes to guest state for the xSTRM cases, no 2773 accesses of memory, is a pure function. 2774 2775 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so 2776 the callee knows which I/E and I/M variant it is dealing with and 2777 what the specific operation is. 4th byte of opcode is in the range 2778 0x60 to 0x63: 2779 istri 66 0F 3A 63 2780 istrm 66 0F 3A 62 2781 estri 66 0F 3A 61 2782 estrm 66 0F 3A 60 2783 2784 gstOffL and gstOffR are the guest state offsets for the two XMM 2785 register inputs. We never have to deal with the memory case since 2786 that is handled by pre-loading the relevant value into the fake 2787 XMM16 register. 2788 2789 For ESTRx variants, edxIN and eaxIN hold the values of those two 2790 registers. 2791 2792 In all cases, the bottom 16 bits of the result contain the new 2793 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the 2794 result hold the new %ecx value. For xSTRM variants, the helper 2795 writes the result directly to the guest XMM0. 2796 2797 Declarable side effects: in all cases, reads guest state at 2798 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes 2799 guest_XMM0. 2800 2801 Is expected to be called with opc_and_imm combinations which have 2802 actually been validated, and will assert if otherwise. The front 2803 end should ensure we're only called with verified values. 2804 */ 2805 ULong amd64g_dirtyhelper_PCMPxSTRx ( 2806 VexGuestAMD64State* gst, 2807 HWord opc4_and_imm, 2808 HWord gstOffL, HWord gstOffR, 2809 HWord edxIN, HWord eaxIN 2810 ) 2811 { 2812 HWord opc4 = (opc4_and_imm >> 8) & 0xFF; 2813 HWord imm8 = opc4_and_imm & 0xFF; 2814 HWord isISTRx = opc4 & 2; 2815 HWord isxSTRM = (opc4 & 1) ^ 1; 2816 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */ 2817 vassert((imm8 & 1) == 0); /* we support byte-size cases only */ 2818 2819 // where the args are 2820 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 2821 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 2822 2823 /* Create the arg validity masks, either from the vectors 2824 themselves or from the supplied edx/eax values. */ 2825 // FIXME: this is only right for the 8-bit data cases. 2826 // At least that is asserted above. 2827 UInt zmaskL, zmaskR; 2828 if (isISTRx) { 2829 zmaskL = zmask_from_V128(argL); 2830 zmaskR = zmask_from_V128(argR); 2831 } else { 2832 Int tmp; 2833 tmp = edxIN & 0xFFFFFFFF; 2834 if (tmp < -16) tmp = -16; 2835 if (tmp > 16) tmp = 16; 2836 if (tmp < 0) tmp = -tmp; 2837 vassert(tmp >= 0 && tmp <= 16); 2838 zmaskL = (1 << tmp) & 0xFFFF; 2839 tmp = eaxIN & 0xFFFFFFFF; 2840 if (tmp < -16) tmp = -16; 2841 if (tmp > 16) tmp = 16; 2842 if (tmp < 0) tmp = -tmp; 2843 vassert(tmp >= 0 && tmp <= 16); 2844 zmaskR = (1 << tmp) & 0xFFFF; 2845 } 2846 2847 // temp spot for the resulting flags and vector. 2848 V128 resV; 2849 UInt resOSZACP; 2850 2851 // do the meyaath 2852 Bool ok = compute_PCMPxSTRx ( 2853 &resV, &resOSZACP, argL, argR, 2854 zmaskL, zmaskR, imm8, (Bool)isxSTRM 2855 ); 2856 2857 // front end shouldn't pass us any imm8 variants we can't 2858 // handle. Hence: 2859 vassert(ok); 2860 2861 // So, finally we need to get the results back to the caller. 2862 // In all cases, the new OSZACP value is the lowest 16 of 2863 // the return value. 2864 if (isxSTRM) { 2865 /* gst->guest_XMM0 = resV; */ // gcc don't like that 2866 gst->guest_XMM0[0] = resV.w32[0]; 2867 gst->guest_XMM0[1] = resV.w32[1]; 2868 gst->guest_XMM0[2] = resV.w32[2]; 2869 gst->guest_XMM0[3] = resV.w32[3]; 2870 return resOSZACP & 0x8D5; 2871 } else { 2872 UInt newECX = resV.w32[0] & 0xFFFF; 2873 return (newECX << 16) | (resOSZACP & 0x8D5); 2874 } 2875 } 2876 2877 2878 /*---------------------------------------------------------------*/ 2879 /*--- Helpers for dealing with, and describing, ---*/ 2880 /*--- guest state as a whole. ---*/ 2881 /*---------------------------------------------------------------*/ 2882 2883 /* Initialise the entire amd64 guest state. */ 2884 /* VISIBLE TO LIBVEX CLIENT */ 2885 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) 2886 { 2887 vex_state->guest_RAX = 0; 2888 vex_state->guest_RCX = 0; 2889 vex_state->guest_RDX = 0; 2890 vex_state->guest_RBX = 0; 2891 vex_state->guest_RSP = 0; 2892 vex_state->guest_RBP = 0; 2893 vex_state->guest_RSI = 0; 2894 vex_state->guest_RDI = 0; 2895 vex_state->guest_R8 = 0; 2896 vex_state->guest_R9 = 0; 2897 vex_state->guest_R10 = 0; 2898 vex_state->guest_R11 = 0; 2899 vex_state->guest_R12 = 0; 2900 vex_state->guest_R13 = 0; 2901 vex_state->guest_R14 = 0; 2902 vex_state->guest_R15 = 0; 2903 2904 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 2905 vex_state->guest_CC_DEP1 = 0; 2906 vex_state->guest_CC_DEP2 = 0; 2907 vex_state->guest_CC_NDEP = 0; 2908 2909 vex_state->guest_DFLAG = 1; /* forwards */ 2910 vex_state->guest_IDFLAG = 0; 2911 2912 /* HACK: represent the offset associated with %fs==0. This 2913 assumes that %fs is only ever zero. */ 2914 vex_state->guest_FS_ZERO = 0; 2915 2916 vex_state->guest_RIP = 0; 2917 2918 /* Initialise the simulated FPU */ 2919 amd64g_dirtyhelper_FINIT( vex_state ); 2920 2921 /* Initialise the SSE state. */ 2922 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0; 2923 2924 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; 2925 SSEZERO(vex_state->guest_XMM0); 2926 SSEZERO(vex_state->guest_XMM1); 2927 SSEZERO(vex_state->guest_XMM2); 2928 SSEZERO(vex_state->guest_XMM3); 2929 SSEZERO(vex_state->guest_XMM4); 2930 SSEZERO(vex_state->guest_XMM5); 2931 SSEZERO(vex_state->guest_XMM6); 2932 SSEZERO(vex_state->guest_XMM7); 2933 SSEZERO(vex_state->guest_XMM8); 2934 SSEZERO(vex_state->guest_XMM9); 2935 SSEZERO(vex_state->guest_XMM10); 2936 SSEZERO(vex_state->guest_XMM11); 2937 SSEZERO(vex_state->guest_XMM12); 2938 SSEZERO(vex_state->guest_XMM13); 2939 SSEZERO(vex_state->guest_XMM14); 2940 SSEZERO(vex_state->guest_XMM15); 2941 SSEZERO(vex_state->guest_XMM16); 2942 2943 # undef SSEZERO 2944 2945 vex_state->guest_EMWARN = EmWarn_NONE; 2946 2947 /* These should not ever be either read or written, but we 2948 initialise them anyway. */ 2949 vex_state->guest_TISTART = 0; 2950 vex_state->guest_TILEN = 0; 2951 2952 vex_state->guest_NRADDR = 0; 2953 vex_state->guest_SC_CLASS = 0; 2954 vex_state->guest_GS_0x60 = 0; 2955 2956 vex_state->guest_IP_AT_SYSCALL = 0; 2957 /* vex_state->padding = 0; */ 2958 } 2959 2960 2961 /* Figure out if any part of the guest state contained in minoff 2962 .. maxoff requires precise memory exceptions. If in doubt return 2963 True (but this is generates significantly slower code). 2964 2965 By default we enforce precise exns for guest %RSP, %RBP and %RIP 2966 only. These are the minimum needed to extract correct stack 2967 backtraces from amd64 code. 2968 */ 2969 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff, 2970 Int maxoff) 2971 { 2972 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); 2973 Int rbp_max = rbp_min + 8 - 1; 2974 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); 2975 Int rsp_max = rsp_min + 8 - 1; 2976 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); 2977 Int rip_max = rip_min + 8 - 1; 2978 2979 if (maxoff < rbp_min || minoff > rbp_max) { 2980 /* no overlap with rbp */ 2981 } else { 2982 return True; 2983 } 2984 2985 if (maxoff < rsp_min || minoff > rsp_max) { 2986 /* no overlap with rsp */ 2987 } else { 2988 return True; 2989 } 2990 2991 if (maxoff < rip_min || minoff > rip_max) { 2992 /* no overlap with eip */ 2993 } else { 2994 return True; 2995 } 2996 2997 return False; 2998 } 2999 3000 3001 #define ALWAYSDEFD(field) \ 3002 { offsetof(VexGuestAMD64State, field), \ 3003 (sizeof ((VexGuestAMD64State*)0)->field) } 3004 3005 VexGuestLayout 3006 amd64guest_layout 3007 = { 3008 /* Total size of the guest state, in bytes. */ 3009 .total_sizeB = sizeof(VexGuestAMD64State), 3010 3011 /* Describe the stack pointer. */ 3012 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), 3013 .sizeof_SP = 8, 3014 3015 /* Describe the frame pointer. */ 3016 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), 3017 .sizeof_FP = 8, 3018 3019 /* Describe the instruction pointer. */ 3020 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), 3021 .sizeof_IP = 8, 3022 3023 /* Describe any sections to be regarded by Memcheck as 3024 'always-defined'. */ 3025 .n_alwaysDefd = 16, 3026 3027 /* flags thunk: OP and NDEP are always defd, whereas DEP1 3028 and DEP2 have to be tracked. See detailed comment in 3029 gdefs.h on meaning of thunk fields. */ 3030 .alwaysDefd 3031 = { /* 0 */ ALWAYSDEFD(guest_CC_OP), 3032 /* 1 */ ALWAYSDEFD(guest_CC_NDEP), 3033 /* 2 */ ALWAYSDEFD(guest_DFLAG), 3034 /* 3 */ ALWAYSDEFD(guest_IDFLAG), 3035 /* 4 */ ALWAYSDEFD(guest_RIP), 3036 /* 5 */ ALWAYSDEFD(guest_FS_ZERO), 3037 /* 6 */ ALWAYSDEFD(guest_FTOP), 3038 /* 7 */ ALWAYSDEFD(guest_FPTAG), 3039 /* 8 */ ALWAYSDEFD(guest_FPROUND), 3040 /* 9 */ ALWAYSDEFD(guest_FC3210), 3041 // /* */ ALWAYSDEFD(guest_CS), 3042 // /* */ ALWAYSDEFD(guest_DS), 3043 // /* */ ALWAYSDEFD(guest_ES), 3044 // /* */ ALWAYSDEFD(guest_FS), 3045 // /* */ ALWAYSDEFD(guest_GS), 3046 // /* */ ALWAYSDEFD(guest_SS), 3047 // /* */ ALWAYSDEFD(guest_LDT), 3048 // /* */ ALWAYSDEFD(guest_GDT), 3049 /* 10 */ ALWAYSDEFD(guest_EMWARN), 3050 /* 11 */ ALWAYSDEFD(guest_SSEROUND), 3051 /* 12 */ ALWAYSDEFD(guest_TISTART), 3052 /* 13 */ ALWAYSDEFD(guest_TILEN), 3053 /* 14 */ ALWAYSDEFD(guest_SC_CLASS), 3054 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) 3055 } 3056 }; 3057 3058 3059 /*---------------------------------------------------------------*/ 3060 /*--- end guest_amd64_helpers.c ---*/ 3061 /*---------------------------------------------------------------*/ 3062