1 2 /*---------------------------------------------------------------*/ 3 /*--- begin guest_amd64_helpers.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2015 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_emnote.h" 38 #include "libvex_guest_amd64.h" 39 #include "libvex_ir.h" 40 #include "libvex.h" 41 42 #include "main_util.h" 43 #include "main_globals.h" 44 #include "guest_generic_bb_to_IR.h" 45 #include "guest_amd64_defs.h" 46 #include "guest_generic_x87.h" 47 48 49 /* This file contains helper functions for amd64 guest code. 50 Calls to these functions are generated by the back end. 51 These calls are of course in the host machine code and 52 this file will be compiled to host machine code, so that 53 all makes sense. 54 55 Only change the signatures of these helper functions very 56 carefully. If you change the signature here, you'll have to change 57 the parameters passed to it in the IR calls constructed by 58 guest-amd64/toIR.c. 59 60 The convention used is that all functions called from generated 61 code are named amd64g_<something>, and any function whose name lacks 62 that prefix is not called from generated code. Note that some 63 LibVEX_* functions can however be called by VEX's client, but that 64 is not the same as calling them from VEX-generated code. 65 */ 66 67 68 /* Set to 1 to get detailed profiling info about use of the flag 69 machinery. */ 70 #define PROFILE_RFLAGS 0 71 72 73 /*---------------------------------------------------------------*/ 74 /*--- %rflags run-time helpers. ---*/ 75 /*---------------------------------------------------------------*/ 76 77 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags 78 after imulq/mulq. */ 79 80 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) 81 { 82 const Long halfMask = 0xFFFFFFFFLL; 83 ULong u0, v0, w0; 84 Long u1, v1, w1, w2, t; 85 u0 = u & halfMask; 86 u1 = u >> 32; 87 v0 = v & halfMask; 88 v1 = v >> 32; 89 w0 = u0 * v0; 90 t = u1 * v0 + (w0 >> 32); 91 w1 = t & halfMask; 92 w2 = t >> 32; 93 w1 = u0 * v1 + w1; 94 *rHi = u1 * v1 + w2 + (w1 >> 32); 95 *rLo = (Long)((ULong)u * (ULong)v); 96 } 97 98 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) 99 { 100 const ULong halfMask = 0xFFFFFFFFULL; 101 ULong u0, v0, w0; 102 ULong u1, v1, w1,w2,t; 103 u0 = u & halfMask; 104 u1 = u >> 32; 105 v0 = v & halfMask; 106 v1 = v >> 32; 107 w0 = u0 * v0; 108 t = u1 * v0 + (w0 >> 32); 109 w1 = t & halfMask; 110 w2 = t >> 32; 111 w1 = u0 * v1 + w1; 112 *rHi = u1 * v1 + w2 + (w1 >> 32); 113 *rLo = u * v; 114 } 115 116 117 static const UChar parity_table[256] = { 118 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 119 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 122 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 123 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 124 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 125 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 126 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 127 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 130 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 131 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 134 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 135 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 138 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 139 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 140 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 141 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 142 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 143 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 146 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 147 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 148 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 149 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 150 }; 151 152 /* generalised left-shifter */ 153 static inline Long lshift ( Long x, Int n ) 154 { 155 if (n >= 0) 156 return (ULong)x << n; 157 else 158 return x >> (-n); 159 } 160 161 /* identity on ULong */ 162 static inline ULong idULong ( ULong x ) 163 { 164 return x; 165 } 166 167 168 #define PREAMBLE(__data_bits) \ 169 /* const */ ULong DATA_MASK \ 170 = __data_bits==8 \ 171 ? 0xFFULL \ 172 : (__data_bits==16 \ 173 ? 0xFFFFULL \ 174 : (__data_bits==32 \ 175 ? 0xFFFFFFFFULL \ 176 : 0xFFFFFFFFFFFFFFFFULL)); \ 177 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ 178 /* const */ ULong CC_DEP1 = cc_dep1_formal; \ 179 /* const */ ULong CC_DEP2 = cc_dep2_formal; \ 180 /* const */ ULong CC_NDEP = cc_ndep_formal; \ 181 /* Four bogus assignments, which hopefully gcc can */ \ 182 /* optimise away, and which stop it complaining about */ \ 183 /* unused variables. */ \ 184 SIGN_MASK = SIGN_MASK; \ 185 DATA_MASK = DATA_MASK; \ 186 CC_DEP2 = CC_DEP2; \ 187 CC_NDEP = CC_NDEP; 188 189 190 /*-------------------------------------------------------------*/ 191 192 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ 193 { \ 194 PREAMBLE(DATA_BITS); \ 195 { ULong cf, pf, af, zf, sf, of; \ 196 ULong argL, argR, res; \ 197 argL = CC_DEP1; \ 198 argR = CC_DEP2; \ 199 res = argL + argR; \ 200 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 201 pf = parity_table[(UChar)res]; \ 202 af = (res ^ argL ^ argR) & 0x10; \ 203 zf = ((DATA_UTYPE)res == 0) << 6; \ 204 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 205 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 206 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 207 return cf | pf | af | zf | sf | of; \ 208 } \ 209 } 210 211 /*-------------------------------------------------------------*/ 212 213 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ 214 { \ 215 PREAMBLE(DATA_BITS); \ 216 { ULong cf, pf, af, zf, sf, of; \ 217 ULong argL, argR, res; \ 218 argL = CC_DEP1; \ 219 argR = CC_DEP2; \ 220 res = argL - argR; \ 221 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 222 pf = parity_table[(UChar)res]; \ 223 af = (res ^ argL ^ argR) & 0x10; \ 224 zf = ((DATA_UTYPE)res == 0) << 6; \ 225 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 226 of = lshift((argL ^ argR) & (argL ^ res), \ 227 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 228 return cf | pf | af | zf | sf | of; \ 229 } \ 230 } 231 232 /*-------------------------------------------------------------*/ 233 234 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ 235 { \ 236 PREAMBLE(DATA_BITS); \ 237 { ULong cf, pf, af, zf, sf, of; \ 238 ULong argL, argR, oldC, res; \ 239 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 240 argL = CC_DEP1; \ 241 argR = CC_DEP2 ^ oldC; \ 242 res = (argL + argR) + oldC; \ 243 if (oldC) \ 244 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ 245 else \ 246 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 247 pf = parity_table[(UChar)res]; \ 248 af = (res ^ argL ^ argR) & 0x10; \ 249 zf = ((DATA_UTYPE)res == 0) << 6; \ 250 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 251 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 252 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 253 return cf | pf | af | zf | sf | of; \ 254 } \ 255 } 256 257 /*-------------------------------------------------------------*/ 258 259 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ 260 { \ 261 PREAMBLE(DATA_BITS); \ 262 { ULong cf, pf, af, zf, sf, of; \ 263 ULong argL, argR, oldC, res; \ 264 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 265 argL = CC_DEP1; \ 266 argR = CC_DEP2 ^ oldC; \ 267 res = (argL - argR) - oldC; \ 268 if (oldC) \ 269 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ 270 else \ 271 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 272 pf = parity_table[(UChar)res]; \ 273 af = (res ^ argL ^ argR) & 0x10; \ 274 zf = ((DATA_UTYPE)res == 0) << 6; \ 275 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 276 of = lshift((argL ^ argR) & (argL ^ res), \ 277 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 278 return cf | pf | af | zf | sf | of; \ 279 } \ 280 } 281 282 /*-------------------------------------------------------------*/ 283 284 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ 285 { \ 286 PREAMBLE(DATA_BITS); \ 287 { ULong cf, pf, af, zf, sf, of; \ 288 cf = 0; \ 289 pf = parity_table[(UChar)CC_DEP1]; \ 290 af = 0; \ 291 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 292 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 293 of = 0; \ 294 return cf | pf | af | zf | sf | of; \ 295 } \ 296 } 297 298 /*-------------------------------------------------------------*/ 299 300 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ 301 { \ 302 PREAMBLE(DATA_BITS); \ 303 { ULong cf, pf, af, zf, sf, of; \ 304 ULong argL, argR, res; \ 305 res = CC_DEP1; \ 306 argL = res - 1; \ 307 argR = 1; \ 308 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 309 pf = parity_table[(UChar)res]; \ 310 af = (res ^ argL ^ argR) & 0x10; \ 311 zf = ((DATA_UTYPE)res == 0) << 6; \ 312 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 313 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ 314 return cf | pf | af | zf | sf | of; \ 315 } \ 316 } 317 318 /*-------------------------------------------------------------*/ 319 320 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ 321 { \ 322 PREAMBLE(DATA_BITS); \ 323 { ULong cf, pf, af, zf, sf, of; \ 324 ULong argL, argR, res; \ 325 res = CC_DEP1; \ 326 argL = res + 1; \ 327 argR = 1; \ 328 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 329 pf = parity_table[(UChar)res]; \ 330 af = (res ^ argL ^ argR) & 0x10; \ 331 zf = ((DATA_UTYPE)res == 0) << 6; \ 332 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 333 of = ((res & DATA_MASK) \ 334 == ((ULong)SIGN_MASK - 1)) << 11; \ 335 return cf | pf | af | zf | sf | of; \ 336 } \ 337 } 338 339 /*-------------------------------------------------------------*/ 340 341 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ 342 { \ 343 PREAMBLE(DATA_BITS); \ 344 { ULong cf, pf, af, zf, sf, of; \ 345 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ 346 pf = parity_table[(UChar)CC_DEP1]; \ 347 af = 0; /* undefined */ \ 348 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 349 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 350 /* of is defined if shift count == 1 */ \ 351 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 352 & AMD64G_CC_MASK_O; \ 353 return cf | pf | af | zf | sf | of; \ 354 } \ 355 } 356 357 /*-------------------------------------------------------------*/ 358 359 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ 360 { \ 361 PREAMBLE(DATA_BITS); \ 362 { ULong cf, pf, af, zf, sf, of; \ 363 cf = CC_DEP2 & 1; \ 364 pf = parity_table[(UChar)CC_DEP1]; \ 365 af = 0; /* undefined */ \ 366 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 367 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 368 /* of is defined if shift count == 1 */ \ 369 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 370 & AMD64G_CC_MASK_O; \ 371 return cf | pf | af | zf | sf | of; \ 372 } \ 373 } 374 375 /*-------------------------------------------------------------*/ 376 377 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ 378 /* DEP1 = result, NDEP = old flags */ 379 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ 380 { \ 381 PREAMBLE(DATA_BITS); \ 382 { ULong fl \ 383 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 384 | (AMD64G_CC_MASK_C & CC_DEP1) \ 385 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 386 11-(DATA_BITS-1)) \ 387 ^ lshift(CC_DEP1, 11))); \ 388 return fl; \ 389 } \ 390 } 391 392 /*-------------------------------------------------------------*/ 393 394 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ 395 /* DEP1 = result, NDEP = old flags */ 396 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ 397 { \ 398 PREAMBLE(DATA_BITS); \ 399 { ULong fl \ 400 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 401 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ 402 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 403 11-(DATA_BITS-1)) \ 404 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ 405 return fl; \ 406 } \ 407 } 408 409 /*-------------------------------------------------------------*/ 410 411 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ 412 DATA_U2TYPE, NARROWto2U) \ 413 { \ 414 PREAMBLE(DATA_BITS); \ 415 { ULong cf, pf, af, zf, sf, of; \ 416 DATA_UTYPE hi; \ 417 DATA_UTYPE lo \ 418 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ 419 * ((DATA_UTYPE)CC_DEP2) ); \ 420 DATA_U2TYPE rr \ 421 = NARROWto2U( \ 422 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ 423 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ 424 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ 425 cf = (hi != 0); \ 426 pf = parity_table[(UChar)lo]; \ 427 af = 0; /* undefined */ \ 428 zf = (lo == 0) << 6; \ 429 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 430 of = cf << 11; \ 431 return cf | pf | af | zf | sf | of; \ 432 } \ 433 } 434 435 /*-------------------------------------------------------------*/ 436 437 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ 438 DATA_S2TYPE, NARROWto2S) \ 439 { \ 440 PREAMBLE(DATA_BITS); \ 441 { ULong cf, pf, af, zf, sf, of; \ 442 DATA_STYPE hi; \ 443 DATA_STYPE lo \ 444 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \ 445 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \ 446 DATA_S2TYPE rr \ 447 = NARROWto2S( \ 448 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ 449 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ 450 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ 451 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ 452 pf = parity_table[(UChar)lo]; \ 453 af = 0; /* undefined */ \ 454 zf = (lo == 0) << 6; \ 455 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 456 of = cf << 11; \ 457 return cf | pf | af | zf | sf | of; \ 458 } \ 459 } 460 461 /*-------------------------------------------------------------*/ 462 463 #define ACTIONS_UMULQ \ 464 { \ 465 PREAMBLE(64); \ 466 { ULong cf, pf, af, zf, sf, of; \ 467 ULong lo, hi; \ 468 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ 469 cf = (hi != 0); \ 470 pf = parity_table[(UChar)lo]; \ 471 af = 0; /* undefined */ \ 472 zf = (lo == 0) << 6; \ 473 sf = lshift(lo, 8 - 64) & 0x80; \ 474 of = cf << 11; \ 475 return cf | pf | af | zf | sf | of; \ 476 } \ 477 } 478 479 /*-------------------------------------------------------------*/ 480 481 #define ACTIONS_SMULQ \ 482 { \ 483 PREAMBLE(64); \ 484 { ULong cf, pf, af, zf, sf, of; \ 485 Long lo, hi; \ 486 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ 487 cf = (hi != (lo >>/*s*/ (64-1))); \ 488 pf = parity_table[(UChar)lo]; \ 489 af = 0; /* undefined */ \ 490 zf = (lo == 0) << 6; \ 491 sf = lshift(lo, 8 - 64) & 0x80; \ 492 of = cf << 11; \ 493 return cf | pf | af | zf | sf | of; \ 494 } \ 495 } 496 497 /*-------------------------------------------------------------*/ 498 499 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \ 500 { \ 501 PREAMBLE(DATA_BITS); \ 502 { ULong cf, pf, af, zf, sf, of; \ 503 cf = 0; \ 504 pf = 0; \ 505 af = 0; \ 506 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 507 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 508 of = 0; \ 509 return cf | pf | af | zf | sf | of; \ 510 } \ 511 } 512 513 /*-------------------------------------------------------------*/ 514 515 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \ 516 { \ 517 PREAMBLE(DATA_BITS); \ 518 { ULong cf, pf, af, zf, sf, of; \ 519 cf = ((DATA_UTYPE)CC_DEP2 != 0); \ 520 pf = 0; \ 521 af = 0; \ 522 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 523 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 524 of = 0; \ 525 return cf | pf | af | zf | sf | of; \ 526 } \ 527 } 528 529 /*-------------------------------------------------------------*/ 530 531 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \ 532 { \ 533 PREAMBLE(DATA_BITS); \ 534 { Long cf, pf, af, zf, sf, of; \ 535 cf = ((DATA_UTYPE)CC_DEP2 == 0); \ 536 pf = 0; \ 537 af = 0; \ 538 zf = 0; \ 539 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 540 of = 0; \ 541 return cf | pf | af | zf | sf | of; \ 542 } \ 543 } 544 545 /*-------------------------------------------------------------*/ 546 547 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \ 548 { \ 549 PREAMBLE(DATA_BITS); \ 550 { ULong cf, pf, af, zf, sf, of; \ 551 cf = ((DATA_UTYPE)CC_DEP2 == 0); \ 552 pf = 0; \ 553 af = 0; \ 554 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 555 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 556 of = 0; \ 557 return cf | pf | af | zf | sf | of; \ 558 } \ 559 } 560 561 /*-------------------------------------------------------------*/ 562 563 564 #if PROFILE_RFLAGS 565 566 static Bool initted = False; 567 568 /* C flag, fast route */ 569 static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; 570 /* C flag, slow route */ 571 static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; 572 /* table for calculate_cond */ 573 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; 574 /* total entry counts for calc_all, calc_c, calc_cond. */ 575 static UInt n_calc_all = 0; 576 static UInt n_calc_c = 0; 577 static UInt n_calc_cond = 0; 578 579 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) 580 581 582 static void showCounts ( void ) 583 { 584 Int op, co; 585 HChar ch; 586 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", 587 n_calc_all, n_calc_cond, n_calc_c); 588 589 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" 590 " S NS P NP L NL LE NLE\n"); 591 vex_printf(" -----------------------------------------------------" 592 "----------------------------------------\n"); 593 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 594 595 ch = ' '; 596 if (op > 0 && (op-1) % 4 == 0) 597 ch = 'B'; 598 if (op > 0 && (op-1) % 4 == 1) 599 ch = 'W'; 600 if (op > 0 && (op-1) % 4 == 2) 601 ch = 'L'; 602 if (op > 0 && (op-1) % 4 == 3) 603 ch = 'Q'; 604 605 vex_printf("%2d%c: ", op, ch); 606 vex_printf("%6u ", tabc_slow[op]); 607 vex_printf("%6u ", tabc_fast[op]); 608 for (co = 0; co < 16; co++) { 609 Int n = tab_cond[op][co]; 610 if (n >= 1000) { 611 vex_printf(" %3dK", n / 1000); 612 } else 613 if (n >= 0) { 614 vex_printf(" %3d ", n ); 615 } else { 616 vex_printf(" "); 617 } 618 } 619 vex_printf("\n"); 620 } 621 vex_printf("\n"); 622 } 623 624 static void initCounts ( void ) 625 { 626 Int op, co; 627 initted = True; 628 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 629 tabc_fast[op] = tabc_slow[op] = 0; 630 for (co = 0; co < 16; co++) 631 tab_cond[op][co] = 0; 632 } 633 } 634 635 #endif /* PROFILE_RFLAGS */ 636 637 638 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 639 /* Calculate all the 6 flags from the supplied thunk parameters. 640 Worker function, not directly called from generated code. */ 641 static 642 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, 643 ULong cc_dep1_formal, 644 ULong cc_dep2_formal, 645 ULong cc_ndep_formal ) 646 { 647 switch (cc_op) { 648 case AMD64G_CC_OP_COPY: 649 return cc_dep1_formal 650 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z 651 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); 652 653 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); 654 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); 655 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); 656 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); 657 658 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); 659 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); 660 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); 661 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); 662 663 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); 664 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); 665 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); 666 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); 667 668 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); 669 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); 670 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); 671 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); 672 673 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); 674 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); 675 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); 676 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); 677 678 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); 679 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); 680 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); 681 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); 682 683 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); 684 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); 685 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); 686 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); 687 688 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); 689 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); 690 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); 691 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); 692 693 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); 694 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); 695 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); 696 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); 697 698 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); 699 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); 700 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); 701 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); 702 703 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); 704 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); 705 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); 706 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); 707 708 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, 709 UShort, toUShort ); 710 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, 711 UInt, toUInt ); 712 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, 713 ULong, idULong ); 714 715 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; 716 717 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, 718 Short, toUShort ); 719 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, 720 Int, toUInt ); 721 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, 722 Long, idULong ); 723 724 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; 725 726 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt ); 727 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong ); 728 729 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt ); 730 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong ); 731 732 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt ); 733 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong ); 734 735 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt ); 736 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong ); 737 738 default: 739 /* shouldn't really make these calls from generated code */ 740 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" 741 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", 742 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); 743 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); 744 } 745 } 746 747 748 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 749 /* Calculate all the 6 flags from the supplied thunk parameters. */ 750 ULong amd64g_calculate_rflags_all ( ULong cc_op, 751 ULong cc_dep1, 752 ULong cc_dep2, 753 ULong cc_ndep ) 754 { 755 # if PROFILE_RFLAGS 756 if (!initted) initCounts(); 757 n_calc_all++; 758 if (SHOW_COUNTS_NOW) showCounts(); 759 # endif 760 return 761 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); 762 } 763 764 765 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 766 /* Calculate just the carry flag from the supplied thunk parameters. */ 767 ULong amd64g_calculate_rflags_c ( ULong cc_op, 768 ULong cc_dep1, 769 ULong cc_dep2, 770 ULong cc_ndep ) 771 { 772 # if PROFILE_RFLAGS 773 if (!initted) initCounts(); 774 n_calc_c++; 775 tabc_fast[cc_op]++; 776 if (SHOW_COUNTS_NOW) showCounts(); 777 # endif 778 779 /* Fast-case some common ones. */ 780 switch (cc_op) { 781 case AMD64G_CC_OP_COPY: 782 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; 783 case AMD64G_CC_OP_LOGICQ: 784 case AMD64G_CC_OP_LOGICL: 785 case AMD64G_CC_OP_LOGICW: 786 case AMD64G_CC_OP_LOGICB: 787 return 0; 788 // case AMD64G_CC_OP_SUBL: 789 // return ((UInt)cc_dep1) < ((UInt)cc_dep2) 790 // ? AMD64G_CC_MASK_C : 0; 791 // case AMD64G_CC_OP_SUBW: 792 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) 793 // ? AMD64G_CC_MASK_C : 0; 794 // case AMD64G_CC_OP_SUBB: 795 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) 796 // ? AMD64G_CC_MASK_C : 0; 797 // case AMD64G_CC_OP_INCL: 798 // case AMD64G_CC_OP_DECL: 799 // return cc_ndep & AMD64G_CC_MASK_C; 800 default: 801 break; 802 } 803 804 # if PROFILE_RFLAGS 805 tabc_fast[cc_op]--; 806 tabc_slow[cc_op]++; 807 # endif 808 809 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) 810 & AMD64G_CC_MASK_C; 811 } 812 813 814 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 815 /* returns 1 or 0 */ 816 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, 817 ULong cc_op, 818 ULong cc_dep1, 819 ULong cc_dep2, 820 ULong cc_ndep ) 821 { 822 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, 823 cc_dep2, cc_ndep); 824 ULong of,sf,zf,cf,pf; 825 ULong inv = cond & 1; 826 827 # if PROFILE_RFLAGS 828 if (!initted) initCounts(); 829 tab_cond[cc_op][cond]++; 830 n_calc_cond++; 831 if (SHOW_COUNTS_NOW) showCounts(); 832 # endif 833 834 switch (cond) { 835 case AMD64CondNO: 836 case AMD64CondO: /* OF == 1 */ 837 of = rflags >> AMD64G_CC_SHIFT_O; 838 return 1 & (inv ^ of); 839 840 case AMD64CondNZ: 841 case AMD64CondZ: /* ZF == 1 */ 842 zf = rflags >> AMD64G_CC_SHIFT_Z; 843 return 1 & (inv ^ zf); 844 845 case AMD64CondNB: 846 case AMD64CondB: /* CF == 1 */ 847 cf = rflags >> AMD64G_CC_SHIFT_C; 848 return 1 & (inv ^ cf); 849 break; 850 851 case AMD64CondNBE: 852 case AMD64CondBE: /* (CF or ZF) == 1 */ 853 cf = rflags >> AMD64G_CC_SHIFT_C; 854 zf = rflags >> AMD64G_CC_SHIFT_Z; 855 return 1 & (inv ^ (cf | zf)); 856 break; 857 858 case AMD64CondNS: 859 case AMD64CondS: /* SF == 1 */ 860 sf = rflags >> AMD64G_CC_SHIFT_S; 861 return 1 & (inv ^ sf); 862 863 case AMD64CondNP: 864 case AMD64CondP: /* PF == 1 */ 865 pf = rflags >> AMD64G_CC_SHIFT_P; 866 return 1 & (inv ^ pf); 867 868 case AMD64CondNL: 869 case AMD64CondL: /* (SF xor OF) == 1 */ 870 sf = rflags >> AMD64G_CC_SHIFT_S; 871 of = rflags >> AMD64G_CC_SHIFT_O; 872 return 1 & (inv ^ (sf ^ of)); 873 break; 874 875 case AMD64CondNLE: 876 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ 877 sf = rflags >> AMD64G_CC_SHIFT_S; 878 of = rflags >> AMD64G_CC_SHIFT_O; 879 zf = rflags >> AMD64G_CC_SHIFT_Z; 880 return 1 & (inv ^ ((sf ^ of) | zf)); 881 break; 882 883 default: 884 /* shouldn't really make these calls from generated code */ 885 vex_printf("amd64g_calculate_condition" 886 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", 887 cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); 888 vpanic("amd64g_calculate_condition"); 889 } 890 } 891 892 893 /* VISIBLE TO LIBVEX CLIENT */ 894 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state ) 895 { 896 ULong rflags = amd64g_calculate_rflags_all_WRK( 897 vex_state->guest_CC_OP, 898 vex_state->guest_CC_DEP1, 899 vex_state->guest_CC_DEP2, 900 vex_state->guest_CC_NDEP 901 ); 902 Long dflag = vex_state->guest_DFLAG; 903 vassert(dflag == 1 || dflag == -1); 904 if (dflag == -1) 905 rflags |= (1<<10); 906 if (vex_state->guest_IDFLAG == 1) 907 rflags |= (1<<21); 908 if (vex_state->guest_ACFLAG == 1) 909 rflags |= (1<<18); 910 911 return rflags; 912 } 913 914 /* VISIBLE TO LIBVEX CLIENT */ 915 void 916 LibVEX_GuestAMD64_put_rflags ( ULong rflags, 917 /*MOD*/VexGuestAMD64State* vex_state ) 918 { 919 /* D flag */ 920 if (rflags & AMD64G_CC_MASK_D) { 921 vex_state->guest_DFLAG = -1; 922 rflags &= ~AMD64G_CC_MASK_D; 923 } 924 else 925 vex_state->guest_DFLAG = 1; 926 927 /* ID flag */ 928 if (rflags & AMD64G_CC_MASK_ID) { 929 vex_state->guest_IDFLAG = 1; 930 rflags &= ~AMD64G_CC_MASK_ID; 931 } 932 else 933 vex_state->guest_IDFLAG = 0; 934 935 /* AC flag */ 936 if (rflags & AMD64G_CC_MASK_AC) { 937 vex_state->guest_ACFLAG = 1; 938 rflags &= ~AMD64G_CC_MASK_AC; 939 } 940 else 941 vex_state->guest_ACFLAG = 0; 942 943 UInt cc_mask = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z | 944 AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P; 945 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 946 vex_state->guest_CC_DEP1 = rflags & cc_mask; 947 vex_state->guest_CC_DEP2 = 0; 948 vex_state->guest_CC_NDEP = 0; 949 } 950 951 /* VISIBLE TO LIBVEX CLIENT */ 952 void 953 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, 954 /*MOD*/VexGuestAMD64State* vex_state ) 955 { 956 ULong oszacp = amd64g_calculate_rflags_all_WRK( 957 vex_state->guest_CC_OP, 958 vex_state->guest_CC_DEP1, 959 vex_state->guest_CC_DEP2, 960 vex_state->guest_CC_NDEP 961 ); 962 if (new_carry_flag & 1) { 963 oszacp |= AMD64G_CC_MASK_C; 964 } else { 965 oszacp &= ~AMD64G_CC_MASK_C; 966 } 967 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 968 vex_state->guest_CC_DEP1 = oszacp; 969 vex_state->guest_CC_DEP2 = 0; 970 vex_state->guest_CC_NDEP = 0; 971 } 972 973 974 /*---------------------------------------------------------------*/ 975 /*--- %rflags translation-time function specialisers. ---*/ 976 /*--- These help iropt specialise calls the above run-time ---*/ 977 /*--- %rflags functions. ---*/ 978 /*---------------------------------------------------------------*/ 979 980 /* Used by the optimiser to try specialisations. Returns an 981 equivalent expression, or NULL if none. */ 982 983 static Bool isU64 ( IRExpr* e, ULong n ) 984 { 985 return toBool( e->tag == Iex_Const 986 && e->Iex.Const.con->tag == Ico_U64 987 && e->Iex.Const.con->Ico.U64 == n ); 988 } 989 990 IRExpr* guest_amd64_spechelper ( const HChar* function_name, 991 IRExpr** args, 992 IRStmt** precedingStmts, 993 Int n_precedingStmts ) 994 { 995 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) 996 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) 997 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 998 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 999 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 1000 1001 Int i, arity = 0; 1002 for (i = 0; args[i]; i++) 1003 arity++; 1004 # if 0 1005 vex_printf("spec request:\n"); 1006 vex_printf(" %s ", function_name); 1007 for (i = 0; i < arity; i++) { 1008 vex_printf(" "); 1009 ppIRExpr(args[i]); 1010 } 1011 vex_printf("\n"); 1012 # endif 1013 1014 /* --------- specialising "amd64g_calculate_condition" --------- */ 1015 1016 if (vex_streq(function_name, "amd64g_calculate_condition")) { 1017 /* specialise calls to above "calculate condition" function */ 1018 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; 1019 vassert(arity == 5); 1020 cond = args[0]; 1021 cc_op = args[1]; 1022 cc_dep1 = args[2]; 1023 cc_dep2 = args[3]; 1024 1025 /*---------------- ADDQ ----------------*/ 1026 1027 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { 1028 /* long long add, then Z --> test (dst+src == 0) */ 1029 return unop(Iop_1Uto64, 1030 binop(Iop_CmpEQ64, 1031 binop(Iop_Add64, cc_dep1, cc_dep2), 1032 mkU64(0))); 1033 } 1034 1035 /*---------------- ADDL ----------------*/ 1036 1037 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) { 1038 /* This is very commonly generated by Javascript JITs, for 1039 the idiom "do a 32-bit add and jump to out-of-line code if 1040 an overflow occurs". */ 1041 /* long add, then O (overflow) 1042 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31] 1043 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 1044 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 1045 */ 1046 vassert(isIRAtom(cc_dep1)); 1047 vassert(isIRAtom(cc_dep2)); 1048 return 1049 binop(Iop_And64, 1050 binop(Iop_Shr64, 1051 binop(Iop_And64, 1052 unop(Iop_Not64, 1053 binop(Iop_Xor64, cc_dep1, cc_dep2)), 1054 binop(Iop_Xor64, 1055 cc_dep1, 1056 binop(Iop_Add64, cc_dep1, cc_dep2))), 1057 mkU8(31)), 1058 mkU64(1)); 1059 1060 } 1061 1062 /*---------------- SUBQ ----------------*/ 1063 1064 /* 0, */ 1065 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) { 1066 /* long long sub/cmp, then O (overflow) 1067 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63] 1068 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63 1069 */ 1070 vassert(isIRAtom(cc_dep1)); 1071 vassert(isIRAtom(cc_dep2)); 1072 return binop(Iop_Shr64, 1073 binop(Iop_And64, 1074 binop(Iop_Xor64, cc_dep1, cc_dep2), 1075 binop(Iop_Xor64, 1076 cc_dep1, 1077 binop(Iop_Sub64, cc_dep1, cc_dep2))), 1078 mkU8(63)); 1079 } 1080 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) { 1081 /* No action. Never yet found a test case. */ 1082 } 1083 1084 /* 2, 3 */ 1085 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) { 1086 /* long long sub/cmp, then B (unsigned less than) 1087 --> test dst <u src */ 1088 return unop(Iop_1Uto64, 1089 binop(Iop_CmpLT64U, cc_dep1, cc_dep2)); 1090 } 1091 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) { 1092 /* long long sub/cmp, then NB (unsigned greater than or equal) 1093 --> test src <=u dst */ 1094 /* Note, args are opposite way round from the usual */ 1095 return unop(Iop_1Uto64, 1096 binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); 1097 } 1098 1099 /* 4, 5 */ 1100 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { 1101 /* long long sub/cmp, then Z --> test dst==src */ 1102 return unop(Iop_1Uto64, 1103 binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); 1104 } 1105 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { 1106 /* long long sub/cmp, then NZ --> test dst!=src */ 1107 return unop(Iop_1Uto64, 1108 binop(Iop_CmpNE64,cc_dep1,cc_dep2)); 1109 } 1110 1111 /* 6, 7 */ 1112 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { 1113 /* long long sub/cmp, then BE (unsigned less than or equal) 1114 --> test dst <=u src */ 1115 return unop(Iop_1Uto64, 1116 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); 1117 } 1118 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) { 1119 /* long long sub/cmp, then NBE (unsigned greater than) 1120 --> test !(dst <=u src) */ 1121 return binop(Iop_Xor64, 1122 unop(Iop_1Uto64, 1123 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)), 1124 mkU64(1)); 1125 } 1126 1127 /* 8, 9 */ 1128 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) { 1129 /* long long sub/cmp, then S (negative) 1130 --> (dst-src)[63] 1131 --> (dst-src) >>u 63 */ 1132 return binop(Iop_Shr64, 1133 binop(Iop_Sub64, cc_dep1, cc_dep2), 1134 mkU8(63)); 1135 } 1136 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) { 1137 /* long long sub/cmp, then NS (not negative) 1138 --> (dst-src)[63] ^ 1 1139 --> ((dst-src) >>u 63) ^ 1 */ 1140 return binop(Iop_Xor64, 1141 binop(Iop_Shr64, 1142 binop(Iop_Sub64, cc_dep1, cc_dep2), 1143 mkU8(63)), 1144 mkU64(1)); 1145 } 1146 1147 /* 12, 13 */ 1148 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { 1149 /* long long sub/cmp, then L (signed less than) 1150 --> test dst <s src */ 1151 return unop(Iop_1Uto64, 1152 binop(Iop_CmpLT64S, cc_dep1, cc_dep2)); 1153 } 1154 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) { 1155 /* long long sub/cmp, then NL (signed greater than or equal) 1156 --> test dst >=s src 1157 --> test src <=s dst */ 1158 return unop(Iop_1Uto64, 1159 binop(Iop_CmpLE64S, cc_dep2, cc_dep1)); 1160 } 1161 1162 /* 14, 15 */ 1163 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) { 1164 /* long long sub/cmp, then LE (signed less than or equal) 1165 --> test dst <=s src */ 1166 return unop(Iop_1Uto64, 1167 binop(Iop_CmpLE64S, cc_dep1, cc_dep2)); 1168 } 1169 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) { 1170 /* long sub/cmp, then NLE (signed greater than) 1171 --> test !(dst <=s src) 1172 --> test (dst >s src) 1173 --> test (src <s dst) */ 1174 return unop(Iop_1Uto64, 1175 binop(Iop_CmpLT64S, cc_dep2, cc_dep1)); 1176 1177 } 1178 1179 /*---------------- SUBL ----------------*/ 1180 1181 /* 0, */ 1182 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) { 1183 /* This is very commonly generated by Javascript JITs, for 1184 the idiom "do a 32-bit subtract and jump to out-of-line 1185 code if an overflow occurs". */ 1186 /* long sub/cmp, then O (overflow) 1187 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31] 1188 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1 1189 */ 1190 vassert(isIRAtom(cc_dep1)); 1191 vassert(isIRAtom(cc_dep2)); 1192 return 1193 binop(Iop_And64, 1194 binop(Iop_Shr64, 1195 binop(Iop_And64, 1196 binop(Iop_Xor64, cc_dep1, cc_dep2), 1197 binop(Iop_Xor64, 1198 cc_dep1, 1199 binop(Iop_Sub64, cc_dep1, cc_dep2))), 1200 mkU8(31)), 1201 mkU64(1)); 1202 } 1203 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) { 1204 /* No action. Never yet found a test case. */ 1205 } 1206 1207 /* 2, 3 */ 1208 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) { 1209 /* long sub/cmp, then B (unsigned less than) 1210 --> test dst <u src */ 1211 return unop(Iop_1Uto64, 1212 binop(Iop_CmpLT32U, 1213 unop(Iop_64to32, cc_dep1), 1214 unop(Iop_64to32, cc_dep2))); 1215 } 1216 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) { 1217 /* long sub/cmp, then NB (unsigned greater than or equal) 1218 --> test src <=u dst */ 1219 /* Note, args are opposite way round from the usual */ 1220 return unop(Iop_1Uto64, 1221 binop(Iop_CmpLE32U, 1222 unop(Iop_64to32, cc_dep2), 1223 unop(Iop_64to32, cc_dep1))); 1224 } 1225 1226 /* 4, 5 */ 1227 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { 1228 /* long sub/cmp, then Z --> test dst==src */ 1229 return unop(Iop_1Uto64, 1230 binop(Iop_CmpEQ32, 1231 unop(Iop_64to32, cc_dep1), 1232 unop(Iop_64to32, cc_dep2))); 1233 } 1234 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { 1235 /* long sub/cmp, then NZ --> test dst!=src */ 1236 return unop(Iop_1Uto64, 1237 binop(Iop_CmpNE32, 1238 unop(Iop_64to32, cc_dep1), 1239 unop(Iop_64to32, cc_dep2))); 1240 } 1241 1242 /* 6, 7 */ 1243 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { 1244 /* long sub/cmp, then BE (unsigned less than or equal) 1245 --> test dst <=u src */ 1246 return unop(Iop_1Uto64, 1247 binop(Iop_CmpLE32U, 1248 unop(Iop_64to32, cc_dep1), 1249 unop(Iop_64to32, cc_dep2))); 1250 } 1251 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { 1252 /* long sub/cmp, then NBE (unsigned greater than) 1253 --> test src <u dst */ 1254 /* Note, args are opposite way round from the usual */ 1255 return unop(Iop_1Uto64, 1256 binop(Iop_CmpLT32U, 1257 unop(Iop_64to32, cc_dep2), 1258 unop(Iop_64to32, cc_dep1))); 1259 } 1260 1261 /* 8, 9 */ 1262 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) { 1263 /* long sub/cmp, then S (negative) 1264 --> (dst-src)[31] 1265 --> ((dst -64 src) >>u 31) & 1 1266 Pointless to narrow the args to 32 bit before the subtract. */ 1267 return binop(Iop_And64, 1268 binop(Iop_Shr64, 1269 binop(Iop_Sub64, cc_dep1, cc_dep2), 1270 mkU8(31)), 1271 mkU64(1)); 1272 } 1273 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) { 1274 /* long sub/cmp, then NS (not negative) 1275 --> (dst-src)[31] ^ 1 1276 --> (((dst -64 src) >>u 31) & 1) ^ 1 1277 Pointless to narrow the args to 32 bit before the subtract. */ 1278 return binop(Iop_Xor64, 1279 binop(Iop_And64, 1280 binop(Iop_Shr64, 1281 binop(Iop_Sub64, cc_dep1, cc_dep2), 1282 mkU8(31)), 1283 mkU64(1)), 1284 mkU64(1)); 1285 } 1286 1287 /* 12, 13 */ 1288 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { 1289 /* long sub/cmp, then L (signed less than) 1290 --> test dst <s src */ 1291 return unop(Iop_1Uto64, 1292 binop(Iop_CmpLT32S, 1293 unop(Iop_64to32, cc_dep1), 1294 unop(Iop_64to32, cc_dep2))); 1295 } 1296 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) { 1297 /* long sub/cmp, then NL (signed greater than or equal) 1298 --> test dst >=s src 1299 --> test src <=s dst */ 1300 return unop(Iop_1Uto64, 1301 binop(Iop_CmpLE32S, 1302 unop(Iop_64to32, cc_dep2), 1303 unop(Iop_64to32, cc_dep1))); 1304 } 1305 1306 /* 14, 15 */ 1307 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { 1308 /* long sub/cmp, then LE (signed less than or equal) 1309 --> test dst <=s src */ 1310 return unop(Iop_1Uto64, 1311 binop(Iop_CmpLE32S, 1312 unop(Iop_64to32, cc_dep1), 1313 unop(Iop_64to32, cc_dep2))); 1314 1315 } 1316 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) { 1317 /* long sub/cmp, then NLE (signed greater than) 1318 --> test !(dst <=s src) 1319 --> test (dst >s src) 1320 --> test (src <s dst) */ 1321 return unop(Iop_1Uto64, 1322 binop(Iop_CmpLT32S, 1323 unop(Iop_64to32, cc_dep2), 1324 unop(Iop_64to32, cc_dep1))); 1325 1326 } 1327 1328 /*---------------- SUBW ----------------*/ 1329 1330 /* 4, 5 */ 1331 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { 1332 /* word sub/cmp, then Z --> test dst==src */ 1333 return unop(Iop_1Uto64, 1334 binop(Iop_CmpEQ16, 1335 unop(Iop_64to16,cc_dep1), 1336 unop(Iop_64to16,cc_dep2))); 1337 } 1338 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { 1339 /* word sub/cmp, then NZ --> test dst!=src */ 1340 return unop(Iop_1Uto64, 1341 binop(Iop_CmpNE16, 1342 unop(Iop_64to16,cc_dep1), 1343 unop(Iop_64to16,cc_dep2))); 1344 } 1345 1346 /* 6, */ 1347 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) { 1348 /* word sub/cmp, then BE (unsigned less than or equal) 1349 --> test dst <=u src */ 1350 return unop(Iop_1Uto64, 1351 binop(Iop_CmpLE64U, 1352 binop(Iop_Shl64, cc_dep1, mkU8(48)), 1353 binop(Iop_Shl64, cc_dep2, mkU8(48)))); 1354 } 1355 1356 /* 14, */ 1357 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { 1358 /* word sub/cmp, then LE (signed less than or equal) 1359 --> test dst <=s src */ 1360 return unop(Iop_1Uto64, 1361 binop(Iop_CmpLE64S, 1362 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1363 binop(Iop_Shl64,cc_dep2,mkU8(48)))); 1364 1365 } 1366 1367 /*---------------- SUBB ----------------*/ 1368 1369 /* 2, 3 */ 1370 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) { 1371 /* byte sub/cmp, then B (unsigned less than) 1372 --> test dst <u src */ 1373 return unop(Iop_1Uto64, 1374 binop(Iop_CmpLT64U, 1375 binop(Iop_And64, cc_dep1, mkU64(0xFF)), 1376 binop(Iop_And64, cc_dep2, mkU64(0xFF)))); 1377 } 1378 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) { 1379 /* byte sub/cmp, then NB (unsigned greater than or equal) 1380 --> test src <=u dst */ 1381 /* Note, args are opposite way round from the usual */ 1382 return unop(Iop_1Uto64, 1383 binop(Iop_CmpLE64U, 1384 binop(Iop_And64, cc_dep2, mkU64(0xFF)), 1385 binop(Iop_And64, cc_dep1, mkU64(0xFF)))); 1386 } 1387 1388 /* 4, 5 */ 1389 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { 1390 /* byte sub/cmp, then Z --> test dst==src */ 1391 return unop(Iop_1Uto64, 1392 binop(Iop_CmpEQ8, 1393 unop(Iop_64to8,cc_dep1), 1394 unop(Iop_64to8,cc_dep2))); 1395 } 1396 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { 1397 /* byte sub/cmp, then NZ --> test dst!=src */ 1398 return unop(Iop_1Uto64, 1399 binop(Iop_CmpNE8, 1400 unop(Iop_64to8,cc_dep1), 1401 unop(Iop_64to8,cc_dep2))); 1402 } 1403 1404 /* 6, */ 1405 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) { 1406 /* byte sub/cmp, then BE (unsigned less than or equal) 1407 --> test dst <=u src */ 1408 return unop(Iop_1Uto64, 1409 binop(Iop_CmpLE64U, 1410 binop(Iop_And64, cc_dep1, mkU64(0xFF)), 1411 binop(Iop_And64, cc_dep2, mkU64(0xFF)))); 1412 } 1413 1414 /* 8, 9 */ 1415 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) 1416 && isU64(cc_dep2, 0)) { 1417 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) 1418 --> test dst <s 0 1419 --> (ULong)dst[7] 1420 This is yet another scheme by which gcc figures out if the 1421 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ 1422 /* Note: isU64(cc_dep2, 0) is correct, even though this is 1423 for an 8-bit comparison, since the args to the helper 1424 function are always U64s. */ 1425 return binop(Iop_And64, 1426 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1427 mkU64(1)); 1428 } 1429 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) 1430 && isU64(cc_dep2, 0)) { 1431 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) 1432 --> test !(dst <s 0) 1433 --> (ULong) !dst[7] 1434 */ 1435 return binop(Iop_Xor64, 1436 binop(Iop_And64, 1437 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1438 mkU64(1)), 1439 mkU64(1)); 1440 } 1441 1442 /*---------------- LOGICQ ----------------*/ 1443 1444 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { 1445 /* long long and/or/xor, then Z --> test dst==0 */ 1446 return unop(Iop_1Uto64, 1447 binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); 1448 } 1449 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) { 1450 /* long long and/or/xor, then NZ --> test dst!=0 */ 1451 return unop(Iop_1Uto64, 1452 binop(Iop_CmpNE64, cc_dep1, mkU64(0))); 1453 } 1454 1455 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { 1456 /* long long and/or/xor, then L 1457 LOGIC sets SF and ZF according to the 1458 result and makes OF be zero. L computes SF ^ OF, but 1459 OF is zero, so this reduces to SF -- which will be 1 iff 1460 the result is < signed 0. Hence ... 1461 */ 1462 return unop(Iop_1Uto64, 1463 binop(Iop_CmpLT64S, 1464 cc_dep1, 1465 mkU64(0))); 1466 } 1467 1468 /*---------------- LOGICL ----------------*/ 1469 1470 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { 1471 /* long and/or/xor, then Z --> test dst==0 */ 1472 return unop(Iop_1Uto64, 1473 binop(Iop_CmpEQ32, 1474 unop(Iop_64to32, cc_dep1), 1475 mkU32(0))); 1476 } 1477 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { 1478 /* long and/or/xor, then NZ --> test dst!=0 */ 1479 return unop(Iop_1Uto64, 1480 binop(Iop_CmpNE32, 1481 unop(Iop_64to32, cc_dep1), 1482 mkU32(0))); 1483 } 1484 1485 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { 1486 /* long and/or/xor, then LE 1487 This is pretty subtle. LOGIC sets SF and ZF according to the 1488 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but 1489 OF is zero, so this reduces to SF | ZF -- which will be 1 iff 1490 the result is <=signed 0. Hence ... 1491 */ 1492 return unop(Iop_1Uto64, 1493 binop(Iop_CmpLE32S, 1494 unop(Iop_64to32, cc_dep1), 1495 mkU32(0))); 1496 } 1497 1498 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) { 1499 /* long and/or/xor, then S --> (ULong)result[31] */ 1500 return binop(Iop_And64, 1501 binop(Iop_Shr64, cc_dep1, mkU8(31)), 1502 mkU64(1)); 1503 } 1504 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) { 1505 /* long and/or/xor, then S --> (ULong) ~ result[31] */ 1506 return binop(Iop_Xor64, 1507 binop(Iop_And64, 1508 binop(Iop_Shr64, cc_dep1, mkU8(31)), 1509 mkU64(1)), 1510 mkU64(1)); 1511 } 1512 1513 /*---------------- LOGICW ----------------*/ 1514 1515 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) { 1516 /* word and/or/xor, then Z --> test dst==0 */ 1517 return unop(Iop_1Uto64, 1518 binop(Iop_CmpEQ64, 1519 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), 1520 mkU64(0))); 1521 } 1522 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) { 1523 /* word and/or/xor, then NZ --> test dst!=0 */ 1524 return unop(Iop_1Uto64, 1525 binop(Iop_CmpNE64, 1526 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), 1527 mkU64(0))); 1528 } 1529 1530 /*---------------- LOGICB ----------------*/ 1531 1532 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { 1533 /* byte and/or/xor, then Z --> test dst==0 */ 1534 return unop(Iop_1Uto64, 1535 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), 1536 mkU64(0))); 1537 } 1538 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) { 1539 /* byte and/or/xor, then NZ --> test dst!=0 */ 1540 return unop(Iop_1Uto64, 1541 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)), 1542 mkU64(0))); 1543 } 1544 1545 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { 1546 /* this is an idiom gcc sometimes uses to find out if the top 1547 bit of a byte register is set: eg testb %al,%al; js .. 1548 Since it just depends on the top bit of the byte, extract 1549 that bit and explicitly get rid of all the rest. This 1550 helps memcheck avoid false positives in the case where any 1551 of the other bits in the byte are undefined. */ 1552 /* byte and/or/xor, then S --> (UInt)result[7] */ 1553 return binop(Iop_And64, 1554 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1555 mkU64(1)); 1556 } 1557 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) { 1558 /* byte and/or/xor, then NS --> (UInt)!result[7] */ 1559 return binop(Iop_Xor64, 1560 binop(Iop_And64, 1561 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1562 mkU64(1)), 1563 mkU64(1)); 1564 } 1565 1566 /*---------------- INCB ----------------*/ 1567 1568 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { 1569 /* 8-bit inc, then LE --> sign bit of the arg */ 1570 return binop(Iop_And64, 1571 binop(Iop_Shr64, 1572 binop(Iop_Sub64, cc_dep1, mkU64(1)), 1573 mkU8(7)), 1574 mkU64(1)); 1575 } 1576 1577 /*---------------- INCW ----------------*/ 1578 1579 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { 1580 /* 16-bit inc, then Z --> test dst == 0 */ 1581 return unop(Iop_1Uto64, 1582 binop(Iop_CmpEQ64, 1583 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1584 mkU64(0))); 1585 } 1586 1587 /*---------------- DECL ----------------*/ 1588 1589 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { 1590 /* dec L, then Z --> test dst == 0 */ 1591 return unop(Iop_1Uto64, 1592 binop(Iop_CmpEQ32, 1593 unop(Iop_64to32, cc_dep1), 1594 mkU32(0))); 1595 } 1596 1597 /*---------------- DECW ----------------*/ 1598 1599 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { 1600 /* 16-bit dec, then NZ --> test dst != 0 */ 1601 return unop(Iop_1Uto64, 1602 binop(Iop_CmpNE64, 1603 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1604 mkU64(0))); 1605 } 1606 1607 /*---------------- COPY ----------------*/ 1608 /* This can happen, as a result of amd64 FP compares: "comisd ... ; 1609 jbe" for example. */ 1610 1611 if (isU64(cc_op, AMD64G_CC_OP_COPY) && 1612 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { 1613 /* COPY, then BE --> extract C and Z from dep1, and test (C 1614 or Z == 1). */ 1615 /* COPY, then NBE --> extract C and Z from dep1, and test (C 1616 or Z == 0). */ 1617 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; 1618 return 1619 unop( 1620 Iop_1Uto64, 1621 binop( 1622 Iop_CmpEQ64, 1623 binop( 1624 Iop_And64, 1625 binop( 1626 Iop_Or64, 1627 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1628 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) 1629 ), 1630 mkU64(1) 1631 ), 1632 mkU64(nnn) 1633 ) 1634 ); 1635 } 1636 1637 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { 1638 /* COPY, then B --> extract C dep1, and test (C == 1). */ 1639 return 1640 unop( 1641 Iop_1Uto64, 1642 binop( 1643 Iop_CmpNE64, 1644 binop( 1645 Iop_And64, 1646 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1647 mkU64(1) 1648 ), 1649 mkU64(0) 1650 ) 1651 ); 1652 } 1653 1654 if (isU64(cc_op, AMD64G_CC_OP_COPY) 1655 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { 1656 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ 1657 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ 1658 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; 1659 return 1660 unop( 1661 Iop_1Uto64, 1662 binop( 1663 Iop_CmpEQ64, 1664 binop( 1665 Iop_And64, 1666 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), 1667 mkU64(1) 1668 ), 1669 mkU64(nnn) 1670 ) 1671 ); 1672 } 1673 1674 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { 1675 /* COPY, then P --> extract P from dep1, and test (P == 1). */ 1676 return 1677 unop( 1678 Iop_1Uto64, 1679 binop( 1680 Iop_CmpNE64, 1681 binop( 1682 Iop_And64, 1683 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), 1684 mkU64(1) 1685 ), 1686 mkU64(0) 1687 ) 1688 ); 1689 } 1690 1691 return NULL; 1692 } 1693 1694 /* --------- specialising "amd64g_calculate_rflags_c" --------- */ 1695 1696 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { 1697 /* specialise calls to above "calculate_rflags_c" function */ 1698 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; 1699 vassert(arity == 4); 1700 cc_op = args[0]; 1701 cc_dep1 = args[1]; 1702 cc_dep2 = args[2]; 1703 cc_ndep = args[3]; 1704 1705 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { 1706 /* C after sub denotes unsigned less than */ 1707 return unop(Iop_1Uto64, 1708 binop(Iop_CmpLT64U, 1709 cc_dep1, 1710 cc_dep2)); 1711 } 1712 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { 1713 /* C after sub denotes unsigned less than */ 1714 return unop(Iop_1Uto64, 1715 binop(Iop_CmpLT32U, 1716 unop(Iop_64to32, cc_dep1), 1717 unop(Iop_64to32, cc_dep2))); 1718 } 1719 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { 1720 /* C after sub denotes unsigned less than */ 1721 return unop(Iop_1Uto64, 1722 binop(Iop_CmpLT64U, 1723 binop(Iop_And64,cc_dep1,mkU64(0xFF)), 1724 binop(Iop_And64,cc_dep2,mkU64(0xFF)))); 1725 } 1726 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) 1727 || isU64(cc_op, AMD64G_CC_OP_LOGICL) 1728 || isU64(cc_op, AMD64G_CC_OP_LOGICW) 1729 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { 1730 /* cflag after logic is zero */ 1731 return mkU64(0); 1732 } 1733 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) 1734 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { 1735 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ 1736 return cc_ndep; 1737 } 1738 1739 # if 0 1740 if (cc_op->tag == Iex_Const) { 1741 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); 1742 } 1743 # endif 1744 1745 return NULL; 1746 } 1747 1748 # undef unop 1749 # undef binop 1750 # undef mkU64 1751 # undef mkU32 1752 # undef mkU8 1753 1754 return NULL; 1755 } 1756 1757 1758 /*---------------------------------------------------------------*/ 1759 /*--- Supporting functions for x87 FPU activities. ---*/ 1760 /*---------------------------------------------------------------*/ 1761 1762 static inline Bool host_is_little_endian ( void ) 1763 { 1764 UInt x = 0x76543210; 1765 UChar* p = (UChar*)(&x); 1766 return toBool(*p == 0x10); 1767 } 1768 1769 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ 1770 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1771 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) 1772 { 1773 Bool mantissaIsZero; 1774 Int bexp; 1775 UChar sign; 1776 UChar* f64; 1777 1778 vassert(host_is_little_endian()); 1779 1780 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ 1781 1782 f64 = (UChar*)(&dbl); 1783 sign = toUChar( (f64[7] >> 7) & 1 ); 1784 1785 /* First off, if the tag indicates the register was empty, 1786 return 1,0,sign,1 */ 1787 if (tag == 0) { 1788 /* vex_printf("Empty\n"); */ 1789 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) 1790 | AMD64G_FC_MASK_C0; 1791 } 1792 1793 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 1794 bexp &= 0x7FF; 1795 1796 mantissaIsZero 1797 = toBool( 1798 (f64[6] & 0x0F) == 0 1799 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 1800 ); 1801 1802 /* If both exponent and mantissa are zero, the value is zero. 1803 Return 1,0,sign,0. */ 1804 if (bexp == 0 && mantissaIsZero) { 1805 /* vex_printf("Zero\n"); */ 1806 return AMD64G_FC_MASK_C3 | 0 1807 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1808 } 1809 1810 /* If exponent is zero but mantissa isn't, it's a denormal. 1811 Return 1,1,sign,0. */ 1812 if (bexp == 0 && !mantissaIsZero) { 1813 /* vex_printf("Denormal\n"); */ 1814 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 1815 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1816 } 1817 1818 /* If the exponent is 7FF and the mantissa is zero, this is an infinity. 1819 Return 0,1,sign,1. */ 1820 if (bexp == 0x7FF && mantissaIsZero) { 1821 /* vex_printf("Inf\n"); */ 1822 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) 1823 | AMD64G_FC_MASK_C0; 1824 } 1825 1826 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. 1827 Return 0,0,sign,1. */ 1828 if (bexp == 0x7FF && !mantissaIsZero) { 1829 /* vex_printf("NaN\n"); */ 1830 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; 1831 } 1832 1833 /* Uh, ok, we give up. It must be a normal finite number. 1834 Return 0,1,sign,0. 1835 */ 1836 /* vex_printf("normal\n"); */ 1837 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1838 } 1839 1840 1841 /* This is used to implement both 'frstor' and 'fldenv'. The latter 1842 appears to differ from the former only in that the 8 FP registers 1843 themselves are not transferred into the guest state. */ 1844 static 1845 VexEmNote do_put_x87 ( Bool moveRegs, 1846 /*IN*/UChar* x87_state, 1847 /*OUT*/VexGuestAMD64State* vex_state ) 1848 { 1849 Int stno, preg; 1850 UInt tag; 1851 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1852 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1853 Fpu_State* x87 = (Fpu_State*)x87_state; 1854 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; 1855 UInt tagw = x87->env[FP_ENV_TAG]; 1856 UInt fpucw = x87->env[FP_ENV_CTRL]; 1857 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; 1858 VexEmNote ew; 1859 UInt fpround; 1860 ULong pair; 1861 1862 /* Copy registers and tags */ 1863 for (stno = 0; stno < 8; stno++) { 1864 preg = (stno + ftop) & 7; 1865 tag = (tagw >> (2*preg)) & 3; 1866 if (tag == 3) { 1867 /* register is empty */ 1868 /* hmm, if it's empty, does it still get written? Probably 1869 safer to say it does. If we don't, memcheck could get out 1870 of sync, in that it thinks all FP registers are defined by 1871 this helper, but in reality some have not been updated. */ 1872 if (moveRegs) 1873 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 1874 vexTags[preg] = 0; 1875 } else { 1876 /* register is non-empty */ 1877 if (moveRegs) 1878 convert_f80le_to_f64le( &x87->reg[10*stno], 1879 (UChar*)&vexRegs[preg] ); 1880 vexTags[preg] = 1; 1881 } 1882 } 1883 1884 /* stack pointer */ 1885 vex_state->guest_FTOP = ftop; 1886 1887 /* status word */ 1888 vex_state->guest_FC3210 = c3210; 1889 1890 /* handle the control word, setting FPROUND and detecting any 1891 emulation warnings. */ 1892 pair = amd64g_check_fldcw ( (ULong)fpucw ); 1893 fpround = (UInt)pair & 0xFFFFFFFFULL; 1894 ew = (VexEmNote)(pair >> 32); 1895 1896 vex_state->guest_FPROUND = fpround & 3; 1897 1898 /* emulation warnings --> caller */ 1899 return ew; 1900 } 1901 1902 1903 /* Create an x87 FPU state from the guest state, as close as 1904 we can approximate it. */ 1905 static 1906 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, 1907 /*OUT*/UChar* x87_state ) 1908 { 1909 Int i, stno, preg; 1910 UInt tagw; 1911 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1912 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1913 Fpu_State* x87 = (Fpu_State*)x87_state; 1914 UInt ftop = vex_state->guest_FTOP; 1915 UInt c3210 = vex_state->guest_FC3210; 1916 1917 for (i = 0; i < 14; i++) 1918 x87->env[i] = 0; 1919 1920 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 1921 x87->env[FP_ENV_STAT] 1922 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 1923 x87->env[FP_ENV_CTRL] 1924 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); 1925 1926 /* Dump the register stack in ST order. */ 1927 tagw = 0; 1928 for (stno = 0; stno < 8; stno++) { 1929 preg = (stno + ftop) & 7; 1930 if (vexTags[preg] == 0) { 1931 /* register is empty */ 1932 tagw |= (3 << (2*preg)); 1933 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1934 &x87->reg[10*stno] ); 1935 } else { 1936 /* register is full. */ 1937 tagw |= (0 << (2*preg)); 1938 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1939 &x87->reg[10*stno] ); 1940 } 1941 } 1942 x87->env[FP_ENV_TAG] = toUShort(tagw); 1943 } 1944 1945 1946 /*---------------------------------------------------------------*/ 1947 /*--- Supporting functions for XSAVE/FXSAVE. ---*/ 1948 /*---------------------------------------------------------------*/ 1949 1950 /* CALLED FROM GENERATED CODE */ 1951 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1952 /* XSAVE component 0 is the x87 FPU state. */ 1953 void amd64g_dirtyhelper_XSAVE_COMPONENT_0 1954 ( VexGuestAMD64State* gst, HWord addr ) 1955 { 1956 /* Derived from values obtained from 1957 vendor_id : AuthenticAMD 1958 cpu family : 15 1959 model : 12 1960 model name : AMD Athlon(tm) 64 Processor 3200+ 1961 stepping : 0 1962 cpu MHz : 2200.000 1963 cache size : 512 KB 1964 */ 1965 /* Somewhat roundabout, but at least it's simple. */ 1966 Fpu_State tmp; 1967 UShort* addrS = (UShort*)addr; 1968 UChar* addrC = (UChar*)addr; 1969 UShort fp_tags; 1970 UInt summary_tags; 1971 Int r, stno; 1972 UShort *srcS, *dstS; 1973 1974 do_get_x87( gst, (UChar*)&tmp ); 1975 1976 /* Now build the proper fxsave x87 image from the fsave x87 image 1977 we just made. */ 1978 1979 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ 1980 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ 1981 1982 /* set addrS[2] in an endian-independent way */ 1983 summary_tags = 0; 1984 fp_tags = tmp.env[FP_ENV_TAG]; 1985 for (r = 0; r < 8; r++) { 1986 if ( ((fp_tags >> (2*r)) & 3) != 3 ) 1987 summary_tags |= (1 << r); 1988 } 1989 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ 1990 addrC[5] = 0; /* pad */ 1991 1992 /* FOP: faulting fpu opcode. From experimentation, the real CPU 1993 does not write this field. (?!) */ 1994 addrS[3] = 0; /* BOGUS */ 1995 1996 /* RIP (Last x87 instruction pointer). From experimentation, the 1997 real CPU does not write this field. (?!) */ 1998 addrS[4] = 0; /* BOGUS */ 1999 addrS[5] = 0; /* BOGUS */ 2000 addrS[6] = 0; /* BOGUS */ 2001 addrS[7] = 0; /* BOGUS */ 2002 2003 /* RDP (Last x87 data pointer). From experimentation, the real CPU 2004 does not write this field. (?!) */ 2005 addrS[8] = 0; /* BOGUS */ 2006 addrS[9] = 0; /* BOGUS */ 2007 addrS[10] = 0; /* BOGUS */ 2008 addrS[11] = 0; /* BOGUS */ 2009 2010 /* addrS[13,12] are MXCSR -- not written */ 2011 /* addrS[15,14] are MXCSR_MASK -- not written */ 2012 2013 /* Copy in the FP registers, in ST order. */ 2014 for (stno = 0; stno < 8; stno++) { 2015 srcS = (UShort*)(&tmp.reg[10*stno]); 2016 dstS = (UShort*)(&addrS[16 + 8*stno]); 2017 dstS[0] = srcS[0]; 2018 dstS[1] = srcS[1]; 2019 dstS[2] = srcS[2]; 2020 dstS[3] = srcS[3]; 2021 dstS[4] = srcS[4]; 2022 dstS[5] = 0; 2023 dstS[6] = 0; 2024 dstS[7] = 0; 2025 } 2026 } 2027 2028 2029 /* CALLED FROM GENERATED CODE */ 2030 /* DIRTY HELPER (reads guest state, writes guest mem) */ 2031 /* XSAVE component 1 is the SSE state. */ 2032 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS 2033 ( VexGuestAMD64State* gst, HWord addr ) 2034 { 2035 UShort* addrS = (UShort*)addr; 2036 UInt mxcsr; 2037 2038 /* The only non-register parts of the SSE state are MXCSR and 2039 MXCSR_MASK. */ 2040 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); 2041 2042 addrS[12] = toUShort(mxcsr); /* MXCSR */ 2043 addrS[13] = toUShort(mxcsr >> 16); 2044 2045 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ 2046 addrS[15] = 0x0000; /* MXCSR mask (hi16) */ 2047 } 2048 2049 2050 /* VISIBLE TO LIBVEX CLIENT */ 2051 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store 2052 the result at the given address which represents a buffer of at 2053 least 416 bytes. 2054 2055 This function is not called from generated code. FXSAVE is dealt 2056 with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1} 2057 functions above plus some in-line IR. This function is merely a 2058 convenience function for VEX's users. 2059 */ 2060 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst, 2061 /*OUT*/HWord fp_state ) 2062 { 2063 /* Do the x87 part */ 2064 amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state); 2065 2066 /* And now the SSE part, except for the registers themselves. */ 2067 amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state); 2068 2069 /* That's the first 160 bytes of the image done. */ 2070 /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is 2071 big-endian, these need to be byte-swapped. */ 2072 U128 *xmm = (U128 *)(fp_state + 160); 2073 vassert(host_is_little_endian()); 2074 2075 # define COPY_U128(_dst,_src) \ 2076 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 2077 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 2078 while (0) 2079 2080 COPY_U128( xmm[0], gst->guest_YMM0 ); 2081 COPY_U128( xmm[1], gst->guest_YMM1 ); 2082 COPY_U128( xmm[2], gst->guest_YMM2 ); 2083 COPY_U128( xmm[3], gst->guest_YMM3 ); 2084 COPY_U128( xmm[4], gst->guest_YMM4 ); 2085 COPY_U128( xmm[5], gst->guest_YMM5 ); 2086 COPY_U128( xmm[6], gst->guest_YMM6 ); 2087 COPY_U128( xmm[7], gst->guest_YMM7 ); 2088 COPY_U128( xmm[8], gst->guest_YMM8 ); 2089 COPY_U128( xmm[9], gst->guest_YMM9 ); 2090 COPY_U128( xmm[10], gst->guest_YMM10 ); 2091 COPY_U128( xmm[11], gst->guest_YMM11 ); 2092 COPY_U128( xmm[12], gst->guest_YMM12 ); 2093 COPY_U128( xmm[13], gst->guest_YMM13 ); 2094 COPY_U128( xmm[14], gst->guest_YMM14 ); 2095 COPY_U128( xmm[15], gst->guest_YMM15 ); 2096 # undef COPY_U128 2097 } 2098 2099 2100 /*---------------------------------------------------------------*/ 2101 /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/ 2102 /*---------------------------------------------------------------*/ 2103 2104 /* CALLED FROM GENERATED CODE */ 2105 /* DIRTY HELPER (writes guest state, reads guest mem) */ 2106 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0 2107 ( VexGuestAMD64State* gst, HWord addr ) 2108 { 2109 Fpu_State tmp; 2110 UShort* addrS = (UShort*)addr; 2111 UChar* addrC = (UChar*)addr; 2112 UShort fp_tags; 2113 Int r, stno, i; 2114 2115 /* Copy the x87 registers out of the image, into a temporary 2116 Fpu_State struct. */ 2117 for (i = 0; i < 14; i++) tmp.env[i] = 0; 2118 for (i = 0; i < 80; i++) tmp.reg[i] = 0; 2119 /* fill in tmp.reg[0..7] */ 2120 for (stno = 0; stno < 8; stno++) { 2121 UShort* dstS = (UShort*)(&tmp.reg[10*stno]); 2122 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); 2123 dstS[0] = srcS[0]; 2124 dstS[1] = srcS[1]; 2125 dstS[2] = srcS[2]; 2126 dstS[3] = srcS[3]; 2127 dstS[4] = srcS[4]; 2128 } 2129 /* fill in tmp.env[0..13] */ 2130 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ 2131 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ 2132 2133 fp_tags = 0; 2134 for (r = 0; r < 8; r++) { 2135 if (addrC[4] & (1<<r)) 2136 fp_tags |= (0 << (2*r)); /* EMPTY */ 2137 else 2138 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ 2139 } 2140 tmp.env[FP_ENV_TAG] = fp_tags; 2141 2142 /* Now write 'tmp' into the guest state. */ 2143 VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst ); 2144 2145 return warnX87; 2146 } 2147 2148 2149 /* CALLED FROM GENERATED CODE */ 2150 /* DIRTY HELPER (writes guest state, reads guest mem) */ 2151 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS 2152 ( VexGuestAMD64State* gst, HWord addr ) 2153 { 2154 UShort* addrS = (UShort*)addr; 2155 UInt w32 = (((UInt)addrS[12]) & 0xFFFF) 2156 | ((((UInt)addrS[13]) & 0xFFFF) << 16); 2157 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 ); 2158 2159 VexEmNote warnXMM = (VexEmNote)(w64 >> 32); 2160 2161 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; 2162 return warnXMM; 2163 } 2164 2165 2166 /* VISIBLE TO LIBVEX CLIENT */ 2167 /* Do FXRSTOR from the supplied address and store read values to the given 2168 VexGuestAMD64State structure. 2169 2170 This function is not called from generated code. FXRSTOR is dealt 2171 with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1} 2172 functions above plus some in-line IR. This function is merely a 2173 convenience function for VEX's users. 2174 */ 2175 VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state, 2176 /*MOD*/VexGuestAMD64State* gst ) 2177 { 2178 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need 2179 to be byte-swapped. */ 2180 U128 *xmm = (U128 *)(fp_state + 160); 2181 2182 vassert(host_is_little_endian()); 2183 2184 # define COPY_U128(_dst,_src) \ 2185 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ 2186 _dst[2] = _src[2]; _dst[3] = _src[3]; } \ 2187 while (0) 2188 2189 COPY_U128( gst->guest_YMM0, xmm[0] ); 2190 COPY_U128( gst->guest_YMM1, xmm[1] ); 2191 COPY_U128( gst->guest_YMM2, xmm[2] ); 2192 COPY_U128( gst->guest_YMM3, xmm[3] ); 2193 COPY_U128( gst->guest_YMM4, xmm[4] ); 2194 COPY_U128( gst->guest_YMM5, xmm[5] ); 2195 COPY_U128( gst->guest_YMM6, xmm[6] ); 2196 COPY_U128( gst->guest_YMM7, xmm[7] ); 2197 COPY_U128( gst->guest_YMM8, xmm[8] ); 2198 COPY_U128( gst->guest_YMM9, xmm[9] ); 2199 COPY_U128( gst->guest_YMM10, xmm[10] ); 2200 COPY_U128( gst->guest_YMM11, xmm[11] ); 2201 COPY_U128( gst->guest_YMM12, xmm[12] ); 2202 COPY_U128( gst->guest_YMM13, xmm[13] ); 2203 COPY_U128( gst->guest_YMM14, xmm[14] ); 2204 COPY_U128( gst->guest_YMM15, xmm[15] ); 2205 2206 # undef COPY_U128 2207 2208 VexEmNote warnXMM 2209 = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state); 2210 VexEmNote warnX87 2211 = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state); 2212 2213 /* Prefer an X87 emwarn over an XMM one, if both exist. */ 2214 if (warnX87 != EmNote_NONE) 2215 return warnX87; 2216 else 2217 return warnXMM; 2218 } 2219 2220 2221 /*---------------------------------------------------------------*/ 2222 /*--- Supporting functions for FSAVE/FRSTOR ---*/ 2223 /*---------------------------------------------------------------*/ 2224 2225 /* DIRTY HELPER (writes guest state) */ 2226 /* Initialise the x87 FPU state as per 'finit'. */ 2227 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) 2228 { 2229 Int i; 2230 gst->guest_FTOP = 0; 2231 for (i = 0; i < 8; i++) { 2232 gst->guest_FPTAG[i] = 0; /* empty */ 2233 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ 2234 } 2235 gst->guest_FPROUND = (ULong)Irrm_NEAREST; 2236 gst->guest_FC3210 = 0; 2237 } 2238 2239 2240 /* CALLED FROM GENERATED CODE */ 2241 /* DIRTY HELPER (reads guest memory) */ 2242 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU ) 2243 { 2244 ULong f64; 2245 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 ); 2246 return f64; 2247 } 2248 2249 /* CALLED FROM GENERATED CODE */ 2250 /* DIRTY HELPER (writes guest memory) */ 2251 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 ) 2252 { 2253 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU ); 2254 } 2255 2256 2257 /* CALLED FROM GENERATED CODE */ 2258 /* CLEAN HELPER */ 2259 /* mxcsr[15:0] contains a SSE native format MXCSR value. 2260 Extract from it the required SSEROUND value and any resulting 2261 emulation warning, and return (warn << 32) | sseround value. 2262 */ 2263 ULong amd64g_check_ldmxcsr ( ULong mxcsr ) 2264 { 2265 /* Decide on a rounding mode. mxcsr[14:13] holds it. */ 2266 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 2267 ULong rmode = (mxcsr >> 13) & 3; 2268 2269 /* Detect any required emulation warnings. */ 2270 VexEmNote ew = EmNote_NONE; 2271 2272 if ((mxcsr & 0x1F80) != 0x1F80) { 2273 /* unmasked exceptions! */ 2274 ew = EmWarn_X86_sseExns; 2275 } 2276 else 2277 if (mxcsr & (1<<15)) { 2278 /* FZ is set */ 2279 ew = EmWarn_X86_fz; 2280 } 2281 else 2282 if (mxcsr & (1<<6)) { 2283 /* DAZ is set */ 2284 ew = EmWarn_X86_daz; 2285 } 2286 2287 return (((ULong)ew) << 32) | ((ULong)rmode); 2288 } 2289 2290 2291 /* CALLED FROM GENERATED CODE */ 2292 /* CLEAN HELPER */ 2293 /* Given sseround as an IRRoundingMode value, create a suitable SSE 2294 native format MXCSR value. */ 2295 ULong amd64g_create_mxcsr ( ULong sseround ) 2296 { 2297 sseround &= 3; 2298 return 0x1F80 | (sseround << 13); 2299 } 2300 2301 2302 /* CLEAN HELPER */ 2303 /* fpucw[15:0] contains a x87 native format FPU control word. 2304 Extract from it the required FPROUND value and any resulting 2305 emulation warning, and return (warn << 32) | fpround value. 2306 */ 2307 ULong amd64g_check_fldcw ( ULong fpucw ) 2308 { 2309 /* Decide on a rounding mode. fpucw[11:10] holds it. */ 2310 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 2311 ULong rmode = (fpucw >> 10) & 3; 2312 2313 /* Detect any required emulation warnings. */ 2314 VexEmNote ew = EmNote_NONE; 2315 2316 if ((fpucw & 0x3F) != 0x3F) { 2317 /* unmasked exceptions! */ 2318 ew = EmWarn_X86_x87exns; 2319 } 2320 else 2321 if (((fpucw >> 8) & 3) != 3) { 2322 /* unsupported precision */ 2323 ew = EmWarn_X86_x87precision; 2324 } 2325 2326 return (((ULong)ew) << 32) | ((ULong)rmode); 2327 } 2328 2329 2330 /* CLEAN HELPER */ 2331 /* Given fpround as an IRRoundingMode value, create a suitable x87 2332 native format FPU control word. */ 2333 ULong amd64g_create_fpucw ( ULong fpround ) 2334 { 2335 fpround &= 3; 2336 return 0x037F | (fpround << 10); 2337 } 2338 2339 2340 /* This is used to implement 'fldenv'. 2341 Reads 28 bytes at x87_state[0 .. 27]. */ 2342 /* CALLED FROM GENERATED CODE */ 2343 /* DIRTY HELPER */ 2344 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, 2345 /*IN*/HWord x87_state) 2346 { 2347 return do_put_x87( False, (UChar*)x87_state, vex_state ); 2348 } 2349 2350 2351 /* CALLED FROM GENERATED CODE */ 2352 /* DIRTY HELPER */ 2353 /* Create an x87 FPU env from the guest state, as close as we can 2354 approximate it. Writes 28 bytes at x87_state[0..27]. */ 2355 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, 2356 /*OUT*/HWord x87_state ) 2357 { 2358 Int i, stno, preg; 2359 UInt tagw; 2360 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 2361 Fpu_State* x87 = (Fpu_State*)x87_state; 2362 UInt ftop = vex_state->guest_FTOP; 2363 ULong c3210 = vex_state->guest_FC3210; 2364 2365 for (i = 0; i < 14; i++) 2366 x87->env[i] = 0; 2367 2368 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 2369 x87->env[FP_ENV_STAT] 2370 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); 2371 x87->env[FP_ENV_CTRL] 2372 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); 2373 2374 /* Compute the x87 tag word. */ 2375 tagw = 0; 2376 for (stno = 0; stno < 8; stno++) { 2377 preg = (stno + ftop) & 7; 2378 if (vexTags[preg] == 0) { 2379 /* register is empty */ 2380 tagw |= (3 << (2*preg)); 2381 } else { 2382 /* register is full. */ 2383 tagw |= (0 << (2*preg)); 2384 } 2385 } 2386 x87->env[FP_ENV_TAG] = toUShort(tagw); 2387 2388 /* We don't dump the x87 registers, tho. */ 2389 } 2390 2391 2392 /* This is used to implement 'fnsave'. 2393 Writes 108 bytes at x87_state[0 .. 107]. */ 2394 /* CALLED FROM GENERATED CODE */ 2395 /* DIRTY HELPER */ 2396 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state, 2397 /*OUT*/HWord x87_state) 2398 { 2399 do_get_x87( vex_state, (UChar*)x87_state ); 2400 } 2401 2402 2403 /* This is used to implement 'fnsaves'. 2404 Writes 94 bytes at x87_state[0 .. 93]. */ 2405 /* CALLED FROM GENERATED CODE */ 2406 /* DIRTY HELPER */ 2407 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state, 2408 /*OUT*/HWord x87_state) 2409 { 2410 Int i, stno, preg; 2411 UInt tagw; 2412 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 2413 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 2414 Fpu_State_16* x87 = (Fpu_State_16*)x87_state; 2415 UInt ftop = vex_state->guest_FTOP; 2416 UInt c3210 = vex_state->guest_FC3210; 2417 2418 for (i = 0; i < 7; i++) 2419 x87->env[i] = 0; 2420 2421 x87->env[FPS_ENV_STAT] 2422 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 2423 x87->env[FPS_ENV_CTRL] 2424 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); 2425 2426 /* Dump the register stack in ST order. */ 2427 tagw = 0; 2428 for (stno = 0; stno < 8; stno++) { 2429 preg = (stno + ftop) & 7; 2430 if (vexTags[preg] == 0) { 2431 /* register is empty */ 2432 tagw |= (3 << (2*preg)); 2433 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 2434 &x87->reg[10*stno] ); 2435 } else { 2436 /* register is full. */ 2437 tagw |= (0 << (2*preg)); 2438 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 2439 &x87->reg[10*stno] ); 2440 } 2441 } 2442 x87->env[FPS_ENV_TAG] = toUShort(tagw); 2443 } 2444 2445 2446 /* This is used to implement 'frstor'. 2447 Reads 108 bytes at x87_state[0 .. 107]. */ 2448 /* CALLED FROM GENERATED CODE */ 2449 /* DIRTY HELPER */ 2450 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state, 2451 /*IN*/HWord x87_state) 2452 { 2453 return do_put_x87( True, (UChar*)x87_state, vex_state ); 2454 } 2455 2456 2457 /* This is used to implement 'frstors'. 2458 Reads 94 bytes at x87_state[0 .. 93]. */ 2459 /* CALLED FROM GENERATED CODE */ 2460 /* DIRTY HELPER */ 2461 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state, 2462 /*IN*/HWord x87_state) 2463 { 2464 Int stno, preg; 2465 UInt tag; 2466 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 2467 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 2468 Fpu_State_16* x87 = (Fpu_State_16*)x87_state; 2469 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7; 2470 UInt tagw = x87->env[FPS_ENV_TAG]; 2471 UInt fpucw = x87->env[FPS_ENV_CTRL]; 2472 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700; 2473 VexEmNote ew; 2474 UInt fpround; 2475 ULong pair; 2476 2477 /* Copy registers and tags */ 2478 for (stno = 0; stno < 8; stno++) { 2479 preg = (stno + ftop) & 7; 2480 tag = (tagw >> (2*preg)) & 3; 2481 if (tag == 3) { 2482 /* register is empty */ 2483 /* hmm, if it's empty, does it still get written? Probably 2484 safer to say it does. If we don't, memcheck could get out 2485 of sync, in that it thinks all FP registers are defined by 2486 this helper, but in reality some have not been updated. */ 2487 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 2488 vexTags[preg] = 0; 2489 } else { 2490 /* register is non-empty */ 2491 convert_f80le_to_f64le( &x87->reg[10*stno], 2492 (UChar*)&vexRegs[preg] ); 2493 vexTags[preg] = 1; 2494 } 2495 } 2496 2497 /* stack pointer */ 2498 vex_state->guest_FTOP = ftop; 2499 2500 /* status word */ 2501 vex_state->guest_FC3210 = c3210; 2502 2503 /* handle the control word, setting FPROUND and detecting any 2504 emulation warnings. */ 2505 pair = amd64g_check_fldcw ( (ULong)fpucw ); 2506 fpround = (UInt)pair & 0xFFFFFFFFULL; 2507 ew = (VexEmNote)(pair >> 32); 2508 2509 vex_state->guest_FPROUND = fpround & 3; 2510 2511 /* emulation warnings --> caller */ 2512 return ew; 2513 } 2514 2515 2516 /*---------------------------------------------------------------*/ 2517 /*--- CPUID helpers. ---*/ 2518 /*---------------------------------------------------------------*/ 2519 2520 /* Claim to be the following CPU, which is probably representative of 2521 the lowliest (earliest) amd64 offerings. It can do neither sse3 2522 nor cx16. 2523 2524 vendor_id : AuthenticAMD 2525 cpu family : 15 2526 model : 5 2527 model name : AMD Opteron (tm) Processor 848 2528 stepping : 10 2529 cpu MHz : 1797.682 2530 cache size : 1024 KB 2531 fpu : yes 2532 fpu_exception : yes 2533 cpuid level : 1 2534 wp : yes 2535 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2536 mtrr pge mca cmov pat pse36 clflush mmx fxsr 2537 sse sse2 syscall nx mmxext lm 3dnowext 3dnow 2538 bogomips : 3600.62 2539 TLB size : 1088 4K pages 2540 clflush size : 64 2541 cache_alignment : 64 2542 address sizes : 40 bits physical, 48 bits virtual 2543 power management: ts fid vid ttp 2544 2545 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact 2546 we don't support them. See #291568. 3dnow is 80000001.EDX.31 2547 and 3dnowext is 80000001.EDX.30. 2548 */ 2549 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) 2550 { 2551 # define SET_ABCD(_a,_b,_c,_d) \ 2552 do { st->guest_RAX = (ULong)(_a); \ 2553 st->guest_RBX = (ULong)(_b); \ 2554 st->guest_RCX = (ULong)(_c); \ 2555 st->guest_RDX = (ULong)(_d); \ 2556 } while (0) 2557 2558 switch (0xFFFFFFFF & st->guest_RAX) { 2559 case 0x00000000: 2560 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); 2561 break; 2562 case 0x00000001: 2563 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); 2564 break; 2565 case 0x80000000: 2566 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); 2567 break; 2568 case 0x80000001: 2569 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is 2570 the original it-is-supported value that the h/w provides. 2571 See #291568. */ 2572 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/ 2573 0x21d3fbff); 2574 break; 2575 case 0x80000002: 2576 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); 2577 break; 2578 case 0x80000003: 2579 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); 2580 break; 2581 case 0x80000004: 2582 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2583 break; 2584 case 0x80000005: 2585 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); 2586 break; 2587 case 0x80000006: 2588 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); 2589 break; 2590 case 0x80000007: 2591 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); 2592 break; 2593 case 0x80000008: 2594 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); 2595 break; 2596 default: 2597 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2598 break; 2599 } 2600 # undef SET_ABCD 2601 } 2602 2603 2604 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 2605 capable. 2606 2607 vendor_id : GenuineIntel 2608 cpu family : 6 2609 model : 15 2610 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz 2611 stepping : 6 2612 cpu MHz : 2394.000 2613 cache size : 4096 KB 2614 physical id : 0 2615 siblings : 2 2616 core id : 0 2617 cpu cores : 2 2618 fpu : yes 2619 fpu_exception : yes 2620 cpuid level : 10 2621 wp : yes 2622 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2623 mtrr pge mca cmov pat pse36 clflush dts acpi 2624 mmx fxsr sse sse2 ss ht tm syscall nx lm 2625 constant_tsc pni monitor ds_cpl vmx est tm2 2626 cx16 xtpr lahf_lm 2627 bogomips : 4798.78 2628 clflush size : 64 2629 cache_alignment : 64 2630 address sizes : 36 bits physical, 48 bits virtual 2631 power management: 2632 */ 2633 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) 2634 { 2635 # define SET_ABCD(_a,_b,_c,_d) \ 2636 do { st->guest_RAX = (ULong)(_a); \ 2637 st->guest_RBX = (ULong)(_b); \ 2638 st->guest_RCX = (ULong)(_c); \ 2639 st->guest_RDX = (ULong)(_d); \ 2640 } while (0) 2641 2642 switch (0xFFFFFFFF & st->guest_RAX) { 2643 case 0x00000000: 2644 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); 2645 break; 2646 case 0x00000001: 2647 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); 2648 break; 2649 case 0x00000002: 2650 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); 2651 break; 2652 case 0x00000003: 2653 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2654 break; 2655 case 0x00000004: { 2656 switch (0xFFFFFFFF & st->guest_RCX) { 2657 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, 2658 0x0000003f, 0x00000001); break; 2659 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, 2660 0x0000003f, 0x00000001); break; 2661 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, 2662 0x00000fff, 0x00000001); break; 2663 default: SET_ABCD(0x00000000, 0x00000000, 2664 0x00000000, 0x00000000); break; 2665 } 2666 break; 2667 } 2668 case 0x00000005: 2669 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); 2670 break; 2671 case 0x00000006: 2672 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); 2673 break; 2674 case 0x00000007: 2675 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2676 break; 2677 case 0x00000008: 2678 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); 2679 break; 2680 case 0x00000009: 2681 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2682 break; 2683 case 0x0000000a: 2684 unhandled_eax_value: 2685 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); 2686 break; 2687 case 0x80000000: 2688 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2689 break; 2690 case 0x80000001: 2691 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); 2692 break; 2693 case 0x80000002: 2694 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2695 break; 2696 case 0x80000003: 2697 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); 2698 break; 2699 case 0x80000004: 2700 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); 2701 break; 2702 case 0x80000005: 2703 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2704 break; 2705 case 0x80000006: 2706 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); 2707 break; 2708 case 0x80000007: 2709 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2710 break; 2711 case 0x80000008: 2712 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2713 break; 2714 default: 2715 goto unhandled_eax_value; 2716 } 2717 # undef SET_ABCD 2718 } 2719 2720 2721 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16 2722 capable. 2723 2724 vendor_id : GenuineIntel 2725 cpu family : 6 2726 model : 37 2727 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz 2728 stepping : 2 2729 cpu MHz : 3334.000 2730 cache size : 4096 KB 2731 physical id : 0 2732 siblings : 4 2733 core id : 0 2734 cpu cores : 2 2735 apicid : 0 2736 initial apicid : 0 2737 fpu : yes 2738 fpu_exception : yes 2739 cpuid level : 11 2740 wp : yes 2741 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2742 mtrr pge mca cmov pat pse36 clflush dts acpi 2743 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp 2744 lm constant_tsc arch_perfmon pebs bts rep_good 2745 xtopology nonstop_tsc aperfmperf pni pclmulqdq 2746 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 2747 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida 2748 arat tpr_shadow vnmi flexpriority ept vpid 2749 bogomips : 6957.57 2750 clflush size : 64 2751 cache_alignment : 64 2752 address sizes : 36 bits physical, 48 bits virtual 2753 power management: 2754 */ 2755 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ) 2756 { 2757 # define SET_ABCD(_a,_b,_c,_d) \ 2758 do { st->guest_RAX = (ULong)(_a); \ 2759 st->guest_RBX = (ULong)(_b); \ 2760 st->guest_RCX = (ULong)(_c); \ 2761 st->guest_RDX = (ULong)(_d); \ 2762 } while (0) 2763 2764 UInt old_eax = (UInt)st->guest_RAX; 2765 UInt old_ecx = (UInt)st->guest_RCX; 2766 2767 switch (old_eax) { 2768 case 0x00000000: 2769 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69); 2770 break; 2771 case 0x00000001: 2772 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff); 2773 break; 2774 case 0x00000002: 2775 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c); 2776 break; 2777 case 0x00000003: 2778 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2779 break; 2780 case 0x00000004: 2781 switch (old_ecx) { 2782 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, 2783 0x0000003f, 0x00000000); break; 2784 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f, 2785 0x0000007f, 0x00000000); break; 2786 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, 2787 0x000001ff, 0x00000000); break; 2788 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, 2789 0x00000fff, 0x00000002); break; 2790 default: SET_ABCD(0x00000000, 0x00000000, 2791 0x00000000, 0x00000000); break; 2792 } 2793 break; 2794 case 0x00000005: 2795 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); 2796 break; 2797 case 0x00000006: 2798 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000); 2799 break; 2800 case 0x00000007: 2801 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2802 break; 2803 case 0x00000008: 2804 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2805 break; 2806 case 0x00000009: 2807 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2808 break; 2809 case 0x0000000a: 2810 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603); 2811 break; 2812 case 0x0000000b: 2813 switch (old_ecx) { 2814 case 0x00000000: 2815 SET_ABCD(0x00000001, 0x00000002, 2816 0x00000100, 0x00000000); break; 2817 case 0x00000001: 2818 SET_ABCD(0x00000004, 0x00000004, 2819 0x00000201, 0x00000000); break; 2820 default: 2821 SET_ABCD(0x00000000, 0x00000000, 2822 old_ecx, 0x00000000); break; 2823 } 2824 break; 2825 case 0x0000000c: 2826 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2827 break; 2828 case 0x0000000d: 2829 switch (old_ecx) { 2830 case 0x00000000: SET_ABCD(0x00000001, 0x00000002, 2831 0x00000100, 0x00000000); break; 2832 case 0x00000001: SET_ABCD(0x00000004, 0x00000004, 2833 0x00000201, 0x00000000); break; 2834 default: SET_ABCD(0x00000000, 0x00000000, 2835 old_ecx, 0x00000000); break; 2836 } 2837 break; 2838 case 0x80000000: 2839 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2840 break; 2841 case 0x80000001: 2842 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); 2843 break; 2844 case 0x80000002: 2845 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2846 break; 2847 case 0x80000003: 2848 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020); 2849 break; 2850 case 0x80000004: 2851 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847); 2852 break; 2853 case 0x80000005: 2854 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2855 break; 2856 case 0x80000006: 2857 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); 2858 break; 2859 case 0x80000007: 2860 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); 2861 break; 2862 case 0x80000008: 2863 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2864 break; 2865 default: 2866 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2867 break; 2868 } 2869 # undef SET_ABCD 2870 } 2871 2872 2873 /* Claim to be the following CPU (4 x ...), which is AVX and cx16 2874 capable. Plus (kludge!) it "supports" HTM. 2875 2876 Also with the following change: claim that XSaveOpt is not 2877 available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 2878 on the real CPU. Consequently, programs that correctly observe 2879 these CPUID values should only try to use 3 of the 8 XSave-family 2880 instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids 2881 having to implement the compacted or optimised save/restore 2882 variants. 2883 2884 vendor_id : GenuineIntel 2885 cpu family : 6 2886 model : 42 2887 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz 2888 stepping : 7 2889 cpu MHz : 1600.000 2890 cache size : 6144 KB 2891 physical id : 0 2892 siblings : 4 2893 core id : 3 2894 cpu cores : 4 2895 apicid : 6 2896 initial apicid : 6 2897 fpu : yes 2898 fpu_exception : yes 2899 cpuid level : 13 2900 wp : yes 2901 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2902 mtrr pge mca cmov pat pse36 clflush dts acpi 2903 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp 2904 lm constant_tsc arch_perfmon pebs bts rep_good 2905 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq 2906 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 2907 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx 2908 lahf_lm ida arat epb xsaveopt pln pts dts 2909 tpr_shadow vnmi flexpriority ept vpid 2910 2911 bogomips : 5768.94 2912 clflush size : 64 2913 cache_alignment : 64 2914 address sizes : 36 bits physical, 48 bits virtual 2915 power management: 2916 */ 2917 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st ) 2918 { 2919 # define SET_ABCD(_a,_b,_c,_d) \ 2920 do { st->guest_RAX = (ULong)(_a); \ 2921 st->guest_RBX = (ULong)(_b); \ 2922 st->guest_RCX = (ULong)(_c); \ 2923 st->guest_RDX = (ULong)(_d); \ 2924 } while (0) 2925 2926 UInt old_eax = (UInt)st->guest_RAX; 2927 UInt old_ecx = (UInt)st->guest_RCX; 2928 2929 switch (old_eax) { 2930 case 0x00000000: 2931 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69); 2932 break; 2933 case 0x00000001: 2934 SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff); 2935 break; 2936 case 0x00000002: 2937 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000); 2938 break; 2939 case 0x00000003: 2940 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2941 break; 2942 case 0x00000004: 2943 switch (old_ecx) { 2944 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, 2945 0x0000003f, 0x00000000); break; 2946 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f, 2947 0x0000003f, 0x00000000); break; 2948 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, 2949 0x000001ff, 0x00000000); break; 2950 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f, 2951 0x00001fff, 0x00000006); break; 2952 default: SET_ABCD(0x00000000, 0x00000000, 2953 0x00000000, 0x00000000); break; 2954 } 2955 break; 2956 case 0x00000005: 2957 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); 2958 break; 2959 case 0x00000006: 2960 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000); 2961 break; 2962 case 0x00000007: 2963 SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000); 2964 break; 2965 case 0x00000008: 2966 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2967 break; 2968 case 0x00000009: 2969 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2970 break; 2971 case 0x0000000a: 2972 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603); 2973 break; 2974 case 0x0000000b: 2975 switch (old_ecx) { 2976 case 0x00000000: 2977 SET_ABCD(0x00000001, 0x00000001, 2978 0x00000100, 0x00000000); break; 2979 case 0x00000001: 2980 SET_ABCD(0x00000004, 0x00000004, 2981 0x00000201, 0x00000000); break; 2982 default: 2983 SET_ABCD(0x00000000, 0x00000000, 2984 old_ecx, 0x00000000); break; 2985 } 2986 break; 2987 case 0x0000000c: 2988 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2989 break; 2990 case 0x0000000d: 2991 switch (old_ecx) { 2992 case 0x00000000: SET_ABCD(0x00000007, 0x00000340, 2993 0x00000340, 0x00000000); break; 2994 case 0x00000001: SET_ABCD(0x00000000, 0x00000000, 2995 0x00000000, 0x00000000); break; 2996 case 0x00000002: SET_ABCD(0x00000100, 0x00000240, 2997 0x00000000, 0x00000000); break; 2998 default: SET_ABCD(0x00000000, 0x00000000, 2999 0x00000000, 0x00000000); break; 3000 } 3001 break; 3002 case 0x0000000e: 3003 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); 3004 break; 3005 case 0x0000000f: 3006 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); 3007 break; 3008 case 0x80000000: 3009 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 3010 break; 3011 case 0x80000001: 3012 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); 3013 break; 3014 case 0x80000002: 3015 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c); 3016 break; 3017 case 0x80000003: 3018 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d); 3019 break; 3020 case 0x80000004: 3021 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847); 3022 break; 3023 case 0x80000005: 3024 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 3025 break; 3026 case 0x80000006: 3027 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); 3028 break; 3029 case 0x80000007: 3030 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); 3031 break; 3032 case 0x80000008: 3033 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 3034 break; 3035 default: 3036 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); 3037 break; 3038 } 3039 # undef SET_ABCD 3040 } 3041 3042 3043 /* Claim to be the following CPU (4 x ...), which is AVX2 capable. 3044 3045 With the following change: claim that XSaveOpt is not available, by 3046 cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real 3047 CPU. Consequently, programs that correctly observe these CPUID 3048 values should only try to use 3 of the 8 XSave-family instructions: 3049 XGETBV, XSAVE and XRSTOR. In particular this avoids having to 3050 implement the compacted or optimised save/restore variants. 3051 3052 vendor_id : GenuineIntel 3053 cpu family : 6 3054 model : 60 3055 model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz 3056 stepping : 3 3057 microcode : 0x1c 3058 cpu MHz : 919.957 3059 cache size : 8192 KB 3060 physical id : 0 3061 siblings : 4 3062 core id : 3 3063 cpu cores : 4 3064 apicid : 6 3065 initial apicid : 6 3066 fpu : yes 3067 fpu_exception : yes 3068 cpuid level : 13 3069 wp : yes 3070 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca 3071 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht 3072 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc 3073 arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc 3074 aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl 3075 vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 3076 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave 3077 avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm 3078 tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust 3079 bmi1 avx2 smep bmi2 erms invpcid xsaveopt 3080 bugs : 3081 bogomips : 5786.68 3082 clflush size : 64 3083 cache_alignment : 64 3084 address sizes : 39 bits physical, 48 bits virtual 3085 power management: 3086 */ 3087 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st ) 3088 { 3089 # define SET_ABCD(_a,_b,_c,_d) \ 3090 do { st->guest_RAX = (ULong)(_a); \ 3091 st->guest_RBX = (ULong)(_b); \ 3092 st->guest_RCX = (ULong)(_c); \ 3093 st->guest_RDX = (ULong)(_d); \ 3094 } while (0) 3095 3096 UInt old_eax = (UInt)st->guest_RAX; 3097 UInt old_ecx = (UInt)st->guest_RCX; 3098 3099 switch (old_eax) { 3100 case 0x00000000: 3101 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69); 3102 break; 3103 case 0x00000001: 3104 /* Don't advertise RDRAND support, bit 30 in ECX. */ 3105 SET_ABCD(0x000306c3, 0x02100800, 0x3ffafbff, 0xbfebfbff); 3106 break; 3107 case 0x00000002: 3108 SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000); 3109 break; 3110 case 0x00000003: 3111 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 3112 break; 3113 case 0x00000004: 3114 switch (old_ecx) { 3115 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, 3116 0x0000003f, 0x00000000); break; 3117 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f, 3118 0x0000003f, 0x00000000); break; 3119 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, 3120 0x000001ff, 0x00000000); break; 3121 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, 3122 0x00001fff, 0x00000006); break; 3123 default: SET_ABCD(0x00000000, 0x00000000, 3124 0x00000000, 0x00000000); break; 3125 } 3126 break; 3127 case 0x00000005: 3128 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120); 3129 break; 3130 case 0x00000006: 3131 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000); 3132 break; 3133 case 0x00000007: 3134 switch (old_ecx) { 3135 case 0x00000000: SET_ABCD(0x00000000, 0x000027ab, 3136 0x00000000, 0x00000000); break; 3137 default: SET_ABCD(0x00000000, 0x00000000, 3138 0x00000000, 0x00000000); break; 3139 } 3140 break; 3141 case 0x00000008: 3142 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 3143 break; 3144 case 0x00000009: 3145 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 3146 break; 3147 case 0x0000000a: 3148 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603); 3149 break; 3150 case 0x0000000b: 3151 switch (old_ecx) { 3152 case 0x00000000: SET_ABCD(0x00000001, 0x00000002, 3153 0x00000100, 0x00000002); break; 3154 case 0x00000001: SET_ABCD(0x00000004, 0x00000008, 3155 0x00000201, 0x00000002); break; 3156 default: SET_ABCD(0x00000000, 0x00000000, 3157 old_ecx, 0x00000002); break; 3158 } 3159 break; 3160 case 0x0000000c: 3161 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 3162 break; 3163 case 0x0000000d: 3164 switch (old_ecx) { 3165 case 0x00000000: SET_ABCD(0x00000007, 0x00000340, 3166 0x00000340, 0x00000000); break; 3167 case 0x00000001: SET_ABCD(0x00000000, 0x00000000, 3168 0x00000000, 0x00000000); break; 3169 case 0x00000002: SET_ABCD(0x00000100, 0x00000240, 3170 0x00000000, 0x00000000); break; 3171 default: SET_ABCD(0x00000000, 0x00000000, 3172 0x00000000, 0x00000000); break; 3173 } 3174 break; 3175 case 0x80000000: 3176 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 3177 break; 3178 case 0x80000001: 3179 SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800); 3180 break; 3181 case 0x80000002: 3182 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 3183 break; 3184 case 0x80000003: 3185 SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043); 3186 break; 3187 case 0x80000004: 3188 SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000); 3189 break; 3190 case 0x80000005: 3191 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 3192 break; 3193 case 0x80000006: 3194 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); 3195 break; 3196 case 0x80000007: 3197 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); 3198 break; 3199 case 0x80000008: 3200 SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000); 3201 break; 3202 default: 3203 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); 3204 break; 3205 } 3206 # undef SET_ABCD 3207 } 3208 3209 3210 /*---------------------------------------------------------------*/ 3211 /*--- Misc integer helpers, including rotates and crypto. ---*/ 3212 /*---------------------------------------------------------------*/ 3213 3214 ULong amd64g_calculate_RCR ( ULong arg, 3215 ULong rot_amt, 3216 ULong rflags_in, 3217 Long szIN ) 3218 { 3219 Bool wantRflags = toBool(szIN < 0); 3220 ULong sz = wantRflags ? (-szIN) : szIN; 3221 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 3222 ULong cf=0, of=0, tempcf; 3223 3224 switch (sz) { 3225 case 8: 3226 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3227 of = ((arg >> 63) ^ cf) & 1; 3228 while (tempCOUNT > 0) { 3229 tempcf = arg & 1; 3230 arg = (arg >> 1) | (cf << 63); 3231 cf = tempcf; 3232 tempCOUNT--; 3233 } 3234 break; 3235 case 4: 3236 while (tempCOUNT >= 33) tempCOUNT -= 33; 3237 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3238 of = ((arg >> 31) ^ cf) & 1; 3239 while (tempCOUNT > 0) { 3240 tempcf = arg & 1; 3241 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); 3242 cf = tempcf; 3243 tempCOUNT--; 3244 } 3245 break; 3246 case 2: 3247 while (tempCOUNT >= 17) tempCOUNT -= 17; 3248 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3249 of = ((arg >> 15) ^ cf) & 1; 3250 while (tempCOUNT > 0) { 3251 tempcf = arg & 1; 3252 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); 3253 cf = tempcf; 3254 tempCOUNT--; 3255 } 3256 break; 3257 case 1: 3258 while (tempCOUNT >= 9) tempCOUNT -= 9; 3259 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3260 of = ((arg >> 7) ^ cf) & 1; 3261 while (tempCOUNT > 0) { 3262 tempcf = arg & 1; 3263 arg = ((arg >> 1) & 0x7FULL) | (cf << 7); 3264 cf = tempcf; 3265 tempCOUNT--; 3266 } 3267 break; 3268 default: 3269 vpanic("calculate_RCR(amd64g): invalid size"); 3270 } 3271 3272 cf &= 1; 3273 of &= 1; 3274 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 3275 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 3276 3277 /* caller can ask to have back either the resulting flags or 3278 resulting value, but not both */ 3279 return wantRflags ? rflags_in : arg; 3280 } 3281 3282 ULong amd64g_calculate_RCL ( ULong arg, 3283 ULong rot_amt, 3284 ULong rflags_in, 3285 Long szIN ) 3286 { 3287 Bool wantRflags = toBool(szIN < 0); 3288 ULong sz = wantRflags ? (-szIN) : szIN; 3289 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 3290 ULong cf=0, of=0, tempcf; 3291 3292 switch (sz) { 3293 case 8: 3294 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3295 while (tempCOUNT > 0) { 3296 tempcf = (arg >> 63) & 1; 3297 arg = (arg << 1) | (cf & 1); 3298 cf = tempcf; 3299 tempCOUNT--; 3300 } 3301 of = ((arg >> 63) ^ cf) & 1; 3302 break; 3303 case 4: 3304 while (tempCOUNT >= 33) tempCOUNT -= 33; 3305 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3306 while (tempCOUNT > 0) { 3307 tempcf = (arg >> 31) & 1; 3308 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); 3309 cf = tempcf; 3310 tempCOUNT--; 3311 } 3312 of = ((arg >> 31) ^ cf) & 1; 3313 break; 3314 case 2: 3315 while (tempCOUNT >= 17) tempCOUNT -= 17; 3316 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3317 while (tempCOUNT > 0) { 3318 tempcf = (arg >> 15) & 1; 3319 arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); 3320 cf = tempcf; 3321 tempCOUNT--; 3322 } 3323 of = ((arg >> 15) ^ cf) & 1; 3324 break; 3325 case 1: 3326 while (tempCOUNT >= 9) tempCOUNT -= 9; 3327 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 3328 while (tempCOUNT > 0) { 3329 tempcf = (arg >> 7) & 1; 3330 arg = 0xFFULL & ((arg << 1) | (cf & 1)); 3331 cf = tempcf; 3332 tempCOUNT--; 3333 } 3334 of = ((arg >> 7) ^ cf) & 1; 3335 break; 3336 default: 3337 vpanic("calculate_RCL(amd64g): invalid size"); 3338 } 3339 3340 cf &= 1; 3341 of &= 1; 3342 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 3343 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 3344 3345 return wantRflags ? rflags_in : arg; 3346 } 3347 3348 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+) 3349 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25 3350 */ 3351 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which) 3352 { 3353 ULong hi, lo, tmp, A[16]; 3354 3355 A[0] = 0; A[1] = a; 3356 A[2] = A[1] << 1; A[3] = A[2] ^ a; 3357 A[4] = A[2] << 1; A[5] = A[4] ^ a; 3358 A[6] = A[3] << 1; A[7] = A[6] ^ a; 3359 A[8] = A[4] << 1; A[9] = A[8] ^ a; 3360 A[10] = A[5] << 1; A[11] = A[10] ^ a; 3361 A[12] = A[6] << 1; A[13] = A[12] ^ a; 3362 A[14] = A[7] << 1; A[15] = A[14] ^ a; 3363 3364 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15]; 3365 hi = lo >> 56; 3366 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15]; 3367 hi = (hi << 8) | (lo >> 56); 3368 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15]; 3369 hi = (hi << 8) | (lo >> 56); 3370 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15]; 3371 hi = (hi << 8) | (lo >> 56); 3372 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15]; 3373 hi = (hi << 8) | (lo >> 56); 3374 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15]; 3375 hi = (hi << 8) | (lo >> 56); 3376 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15]; 3377 hi = (hi << 8) | (lo >> 56); 3378 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15]; 3379 3380 ULong m0 = -1; 3381 m0 /= 255; 3382 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp; 3383 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp; 3384 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp; 3385 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp; 3386 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp; 3387 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp; 3388 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp; 3389 3390 return which ? hi : lo; 3391 } 3392 3393 3394 /* CALLED FROM GENERATED CODE */ 3395 /* DIRTY HELPER (non-referentially-transparent) */ 3396 /* Horrible hack. On non-amd64 platforms, return 1. */ 3397 ULong amd64g_dirtyhelper_RDTSC ( void ) 3398 { 3399 # if defined(__x86_64__) 3400 UInt eax, edx; 3401 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); 3402 return (((ULong)edx) << 32) | ((ULong)eax); 3403 # else 3404 return 1ULL; 3405 # endif 3406 } 3407 3408 /* CALLED FROM GENERATED CODE */ 3409 /* DIRTY HELPER (non-referentially-transparent) */ 3410 /* Horrible hack. On non-amd64 platforms, return 1. */ 3411 /* This uses a different calling convention from _RDTSC just above 3412 only because of the difficulty of returning 96 bits from a C 3413 function -- RDTSC returns 64 bits and so is simple by comparison, 3414 on amd64. */ 3415 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ) 3416 { 3417 # if defined(__x86_64__) 3418 UInt eax, ecx, edx; 3419 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx)); 3420 st->guest_RAX = (ULong)eax; 3421 st->guest_RCX = (ULong)ecx; 3422 st->guest_RDX = (ULong)edx; 3423 # else 3424 /* Do nothing. */ 3425 # endif 3426 } 3427 3428 /* CALLED FROM GENERATED CODE */ 3429 /* DIRTY HELPER (non-referentially-transparent) */ 3430 /* Horrible hack. On non-amd64 platforms, return 0. */ 3431 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) 3432 { 3433 # if defined(__x86_64__) 3434 ULong r = 0; 3435 portno &= 0xFFFF; 3436 switch (sz) { 3437 case 4: 3438 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" 3439 : "=a" (r) : "Nd" (portno)); 3440 break; 3441 case 2: 3442 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" 3443 : "=a" (r) : "Nd" (portno)); 3444 break; 3445 case 1: 3446 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" 3447 : "=a" (r) : "Nd" (portno)); 3448 break; 3449 default: 3450 break; /* note: no 64-bit version of insn exists */ 3451 } 3452 return r; 3453 # else 3454 return 0; 3455 # endif 3456 } 3457 3458 3459 /* CALLED FROM GENERATED CODE */ 3460 /* DIRTY HELPER (non-referentially-transparent) */ 3461 /* Horrible hack. On non-amd64 platforms, do nothing. */ 3462 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) 3463 { 3464 # if defined(__x86_64__) 3465 portno &= 0xFFFF; 3466 switch (sz) { 3467 case 4: 3468 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" 3469 : : "a" (data), "Nd" (portno)); 3470 break; 3471 case 2: 3472 __asm__ __volatile__("outw %w0, %w1" 3473 : : "a" (data), "Nd" (portno)); 3474 break; 3475 case 1: 3476 __asm__ __volatile__("outb %b0, %w1" 3477 : : "a" (data), "Nd" (portno)); 3478 break; 3479 default: 3480 break; /* note: no 64-bit version of insn exists */ 3481 } 3482 # else 3483 /* do nothing */ 3484 # endif 3485 } 3486 3487 /* CALLED FROM GENERATED CODE */ 3488 /* DIRTY HELPER (non-referentially-transparent) */ 3489 /* Horrible hack. On non-amd64 platforms, do nothing. */ 3490 /* op = 0: call the native SGDT instruction. 3491 op = 1: call the native SIDT instruction. 3492 */ 3493 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) { 3494 # if defined(__x86_64__) 3495 switch (op) { 3496 case 0: 3497 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); 3498 break; 3499 case 1: 3500 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); 3501 break; 3502 default: 3503 vpanic("amd64g_dirtyhelper_SxDT"); 3504 } 3505 # else 3506 /* do nothing */ 3507 UChar* p = (UChar*)address; 3508 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; 3509 p[6] = p[7] = p[8] = p[9] = 0; 3510 # endif 3511 } 3512 3513 /*---------------------------------------------------------------*/ 3514 /*--- Helpers for MMX/SSE/SSE2. ---*/ 3515 /*---------------------------------------------------------------*/ 3516 3517 static inline UChar abdU8 ( UChar xx, UChar yy ) { 3518 return toUChar(xx>yy ? xx-yy : yy-xx); 3519 } 3520 3521 static inline ULong mk32x2 ( UInt w1, UInt w0 ) { 3522 return (((ULong)w1) << 32) | ((ULong)w0); 3523 } 3524 3525 static inline UShort sel16x4_3 ( ULong w64 ) { 3526 UInt hi32 = toUInt(w64 >> 32); 3527 return toUShort(hi32 >> 16); 3528 } 3529 static inline UShort sel16x4_2 ( ULong w64 ) { 3530 UInt hi32 = toUInt(w64 >> 32); 3531 return toUShort(hi32); 3532 } 3533 static inline UShort sel16x4_1 ( ULong w64 ) { 3534 UInt lo32 = toUInt(w64); 3535 return toUShort(lo32 >> 16); 3536 } 3537 static inline UShort sel16x4_0 ( ULong w64 ) { 3538 UInt lo32 = toUInt(w64); 3539 return toUShort(lo32); 3540 } 3541 3542 static inline UChar sel8x8_7 ( ULong w64 ) { 3543 UInt hi32 = toUInt(w64 >> 32); 3544 return toUChar(hi32 >> 24); 3545 } 3546 static inline UChar sel8x8_6 ( ULong w64 ) { 3547 UInt hi32 = toUInt(w64 >> 32); 3548 return toUChar(hi32 >> 16); 3549 } 3550 static inline UChar sel8x8_5 ( ULong w64 ) { 3551 UInt hi32 = toUInt(w64 >> 32); 3552 return toUChar(hi32 >> 8); 3553 } 3554 static inline UChar sel8x8_4 ( ULong w64 ) { 3555 UInt hi32 = toUInt(w64 >> 32); 3556 return toUChar(hi32 >> 0); 3557 } 3558 static inline UChar sel8x8_3 ( ULong w64 ) { 3559 UInt lo32 = toUInt(w64); 3560 return toUChar(lo32 >> 24); 3561 } 3562 static inline UChar sel8x8_2 ( ULong w64 ) { 3563 UInt lo32 = toUInt(w64); 3564 return toUChar(lo32 >> 16); 3565 } 3566 static inline UChar sel8x8_1 ( ULong w64 ) { 3567 UInt lo32 = toUInt(w64); 3568 return toUChar(lo32 >> 8); 3569 } 3570 static inline UChar sel8x8_0 ( ULong w64 ) { 3571 UInt lo32 = toUInt(w64); 3572 return toUChar(lo32 >> 0); 3573 } 3574 3575 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3576 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) 3577 { 3578 return 3579 mk32x2( 3580 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) 3581 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), 3582 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) 3583 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) 3584 ); 3585 } 3586 3587 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3588 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) 3589 { 3590 UInt t = 0; 3591 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); 3592 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); 3593 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); 3594 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); 3595 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 3596 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 3597 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 3598 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 3599 t &= 0xFFFF; 3600 return (ULong)t; 3601 } 3602 3603 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3604 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) 3605 { 3606 UShort t, min; 3607 UInt idx; 3608 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; } 3609 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; } 3610 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; } 3611 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; } 3612 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; } 3613 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; } 3614 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; } 3615 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; } 3616 return ((ULong)(idx << 16)) | ((ULong)min); 3617 } 3618 3619 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3620 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) 3621 { 3622 UInt i; 3623 ULong crc = (b & 0xFFULL) ^ crcIn; 3624 for (i = 0; i < 8; i++) 3625 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 3626 return crc; 3627 } 3628 3629 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3630 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) 3631 { 3632 UInt i; 3633 ULong crc = (w & 0xFFFFULL) ^ crcIn; 3634 for (i = 0; i < 16; i++) 3635 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 3636 return crc; 3637 } 3638 3639 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3640 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) 3641 { 3642 UInt i; 3643 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; 3644 for (i = 0; i < 32; i++) 3645 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 3646 return crc; 3647 } 3648 3649 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3650 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) 3651 { 3652 ULong crc = amd64g_calc_crc32l(crcIn, q); 3653 return amd64g_calc_crc32l(crc, q >> 32); 3654 } 3655 3656 3657 /* .. helper for next fn .. */ 3658 static inline ULong sad_8x4 ( ULong xx, ULong yy ) 3659 { 3660 UInt t = 0; 3661 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 3662 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 3663 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 3664 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 3665 return (ULong)t; 3666 } 3667 3668 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3669 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo, 3670 ULong dHi, ULong dLo, 3671 ULong imm_and_return_control_bit ) 3672 { 3673 UInt imm8 = imm_and_return_control_bit & 7; 3674 Bool calcHi = (imm_and_return_control_bit >> 7) & 1; 3675 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */ 3676 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */ 3677 /* For src we only need 32 bits, so get them into the 3678 lower half of a 64 bit word. */ 3679 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1)); 3680 /* For dst we need to get hold of 56 bits (7 bytes) from a total of 3681 11 bytes. If calculating the low part of the result, need bytes 3682 dstOffsL * 4 + (0 .. 6); if calculating the high part, 3683 dstOffsL * 4 + (4 .. 10). */ 3684 ULong dst; 3685 /* dstOffL = 0, Lo -> 0 .. 6 3686 dstOffL = 1, Lo -> 4 .. 10 3687 dstOffL = 0, Hi -> 4 .. 10 3688 dstOffL = 1, Hi -> 8 .. 14 3689 */ 3690 if (calcHi && dstOffsL) { 3691 /* 8 .. 14 */ 3692 dst = dHi & 0x00FFFFFFFFFFFFFFULL; 3693 } 3694 else if (!calcHi && !dstOffsL) { 3695 /* 0 .. 6 */ 3696 dst = dLo & 0x00FFFFFFFFFFFFFFULL; 3697 } 3698 else { 3699 /* 4 .. 10 */ 3700 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32); 3701 } 3702 ULong r0 = sad_8x4( dst >> 0, src ); 3703 ULong r1 = sad_8x4( dst >> 8, src ); 3704 ULong r2 = sad_8x4( dst >> 16, src ); 3705 ULong r3 = sad_8x4( dst >> 24, src ); 3706 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0; 3707 return res; 3708 } 3709 3710 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3711 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask ) 3712 { 3713 ULong dst = 0; 3714 ULong src_bit; 3715 ULong dst_bit = 1; 3716 for (src_bit = 1; src_bit; src_bit <<= 1) { 3717 if (mask & src_bit) { 3718 if (src_masked & src_bit) dst |= dst_bit; 3719 dst_bit <<= 1; 3720 } 3721 } 3722 return dst; 3723 } 3724 3725 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3726 ULong amd64g_calculate_pdep ( ULong src, ULong mask ) 3727 { 3728 ULong dst = 0; 3729 ULong dst_bit; 3730 ULong src_bit = 1; 3731 for (dst_bit = 1; dst_bit; dst_bit <<= 1) { 3732 if (mask & dst_bit) { 3733 if (src & src_bit) dst |= dst_bit; 3734 src_bit <<= 1; 3735 } 3736 } 3737 return dst; 3738 } 3739 3740 /*---------------------------------------------------------------*/ 3741 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ 3742 /*---------------------------------------------------------------*/ 3743 3744 static UInt zmask_from_V128 ( V128* arg ) 3745 { 3746 UInt i, res = 0; 3747 for (i = 0; i < 16; i++) { 3748 res |= ((arg->w8[i] == 0) ? 1 : 0) << i; 3749 } 3750 return res; 3751 } 3752 3753 static UInt zmask_from_V128_wide ( V128* arg ) 3754 { 3755 UInt i, res = 0; 3756 for (i = 0; i < 8; i++) { 3757 res |= ((arg->w16[i] == 0) ? 1 : 0) << i; 3758 } 3759 return res; 3760 } 3761 3762 /* Helps with PCMP{I,E}STR{I,M}. 3763 3764 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really, 3765 actually it could be a clean helper, but for the fact that we can't 3766 pass by value 2 x V128 to a clean helper, nor have one returned.) 3767 Reads guest state, writes to guest state for the xSTRM cases, no 3768 accesses of memory, is a pure function. 3769 3770 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so 3771 the callee knows which I/E and I/M variant it is dealing with and 3772 what the specific operation is. 4th byte of opcode is in the range 3773 0x60 to 0x63: 3774 istri 66 0F 3A 63 3775 istrm 66 0F 3A 62 3776 estri 66 0F 3A 61 3777 estrm 66 0F 3A 60 3778 3779 gstOffL and gstOffR are the guest state offsets for the two XMM 3780 register inputs. We never have to deal with the memory case since 3781 that is handled by pre-loading the relevant value into the fake 3782 XMM16 register. 3783 3784 For ESTRx variants, edxIN and eaxIN hold the values of those two 3785 registers. 3786 3787 In all cases, the bottom 16 bits of the result contain the new 3788 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the 3789 result hold the new %ecx value. For xSTRM variants, the helper 3790 writes the result directly to the guest XMM0. 3791 3792 Declarable side effects: in all cases, reads guest state at 3793 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes 3794 guest_XMM0. 3795 3796 Is expected to be called with opc_and_imm combinations which have 3797 actually been validated, and will assert if otherwise. The front 3798 end should ensure we're only called with verified values. 3799 */ 3800 ULong amd64g_dirtyhelper_PCMPxSTRx ( 3801 VexGuestAMD64State* gst, 3802 HWord opc4_and_imm, 3803 HWord gstOffL, HWord gstOffR, 3804 HWord edxIN, HWord eaxIN 3805 ) 3806 { 3807 HWord opc4 = (opc4_and_imm >> 8) & 0xFF; 3808 HWord imm8 = opc4_and_imm & 0xFF; 3809 HWord isISTRx = opc4 & 2; 3810 HWord isxSTRM = (opc4 & 1) ^ 1; 3811 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */ 3812 HWord wide = (imm8 & 1); 3813 3814 // where the args are 3815 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 3816 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 3817 3818 /* Create the arg validity masks, either from the vectors 3819 themselves or from the supplied edx/eax values. */ 3820 // FIXME: this is only right for the 8-bit data cases. 3821 // At least that is asserted above. 3822 UInt zmaskL, zmaskR; 3823 3824 // temp spot for the resulting flags and vector. 3825 V128 resV; 3826 UInt resOSZACP; 3827 3828 // for checking whether case was handled 3829 Bool ok = False; 3830 3831 if (wide) { 3832 if (isISTRx) { 3833 zmaskL = zmask_from_V128_wide(argL); 3834 zmaskR = zmask_from_V128_wide(argR); 3835 } else { 3836 Int tmp; 3837 tmp = edxIN & 0xFFFFFFFF; 3838 if (tmp < -8) tmp = -8; 3839 if (tmp > 8) tmp = 8; 3840 if (tmp < 0) tmp = -tmp; 3841 vassert(tmp >= 0 && tmp <= 8); 3842 zmaskL = (1 << tmp) & 0xFF; 3843 tmp = eaxIN & 0xFFFFFFFF; 3844 if (tmp < -8) tmp = -8; 3845 if (tmp > 8) tmp = 8; 3846 if (tmp < 0) tmp = -tmp; 3847 vassert(tmp >= 0 && tmp <= 8); 3848 zmaskR = (1 << tmp) & 0xFF; 3849 } 3850 // do the meyaath 3851 ok = compute_PCMPxSTRx_wide ( 3852 &resV, &resOSZACP, argL, argR, 3853 zmaskL, zmaskR, imm8, (Bool)isxSTRM 3854 ); 3855 } else { 3856 if (isISTRx) { 3857 zmaskL = zmask_from_V128(argL); 3858 zmaskR = zmask_from_V128(argR); 3859 } else { 3860 Int tmp; 3861 tmp = edxIN & 0xFFFFFFFF; 3862 if (tmp < -16) tmp = -16; 3863 if (tmp > 16) tmp = 16; 3864 if (tmp < 0) tmp = -tmp; 3865 vassert(tmp >= 0 && tmp <= 16); 3866 zmaskL = (1 << tmp) & 0xFFFF; 3867 tmp = eaxIN & 0xFFFFFFFF; 3868 if (tmp < -16) tmp = -16; 3869 if (tmp > 16) tmp = 16; 3870 if (tmp < 0) tmp = -tmp; 3871 vassert(tmp >= 0 && tmp <= 16); 3872 zmaskR = (1 << tmp) & 0xFFFF; 3873 } 3874 // do the meyaath 3875 ok = compute_PCMPxSTRx ( 3876 &resV, &resOSZACP, argL, argR, 3877 zmaskL, zmaskR, imm8, (Bool)isxSTRM 3878 ); 3879 } 3880 3881 // front end shouldn't pass us any imm8 variants we can't 3882 // handle. Hence: 3883 vassert(ok); 3884 3885 // So, finally we need to get the results back to the caller. 3886 // In all cases, the new OSZACP value is the lowest 16 of 3887 // the return value. 3888 if (isxSTRM) { 3889 gst->guest_YMM0[0] = resV.w32[0]; 3890 gst->guest_YMM0[1] = resV.w32[1]; 3891 gst->guest_YMM0[2] = resV.w32[2]; 3892 gst->guest_YMM0[3] = resV.w32[3]; 3893 return resOSZACP & 0x8D5; 3894 } else { 3895 UInt newECX = resV.w32[0] & 0xFFFF; 3896 return (newECX << 16) | (resOSZACP & 0x8D5); 3897 } 3898 } 3899 3900 /*---------------------------------------------------------------*/ 3901 /*--- AES primitives and helpers ---*/ 3902 /*---------------------------------------------------------------*/ 3903 /* a 16 x 16 matrix */ 3904 static const UChar sbox[256] = { // row nr 3905 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1 3906 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 3907 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2 3908 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 3909 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3 3910 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 3911 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4 3912 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 3913 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5 3914 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 3915 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6 3916 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 3917 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7 3918 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 3919 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8 3920 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 3921 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9 3922 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 3923 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10 3924 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 3925 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11 3926 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 3927 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12 3928 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 3929 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13 3930 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 3931 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14 3932 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 3933 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15 3934 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 3935 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16 3936 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 3937 }; 3938 static void SubBytes (V128* v) 3939 { 3940 V128 r; 3941 UInt i; 3942 for (i = 0; i < 16; i++) 3943 r.w8[i] = sbox[v->w8[i]]; 3944 *v = r; 3945 } 3946 3947 /* a 16 x 16 matrix */ 3948 static const UChar invsbox[256] = { // row nr 3949 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1 3950 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 3951 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2 3952 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 3953 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3 3954 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 3955 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4 3956 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 3957 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5 3958 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 3959 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6 3960 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 3961 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7 3962 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 3963 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8 3964 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 3965 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9 3966 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 3967 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10 3968 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 3969 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11 3970 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 3971 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12 3972 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 3973 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13 3974 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 3975 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14 3976 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 3977 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15 3978 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 3979 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16 3980 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 3981 }; 3982 static void InvSubBytes (V128* v) 3983 { 3984 V128 r; 3985 UInt i; 3986 for (i = 0; i < 16; i++) 3987 r.w8[i] = invsbox[v->w8[i]]; 3988 *v = r; 3989 } 3990 3991 static const UChar ShiftRows_op[16] = 3992 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0}; 3993 static void ShiftRows (V128* v) 3994 { 3995 V128 r; 3996 UInt i; 3997 for (i = 0; i < 16; i++) 3998 r.w8[i] = v->w8[ShiftRows_op[15-i]]; 3999 *v = r; 4000 } 4001 4002 static const UChar InvShiftRows_op[16] = 4003 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0}; 4004 static void InvShiftRows (V128* v) 4005 { 4006 V128 r; 4007 UInt i; 4008 for (i = 0; i < 16; i++) 4009 r.w8[i] = v->w8[InvShiftRows_op[15-i]]; 4010 *v = r; 4011 } 4012 4013 /* Multiplication of the finite fields elements of AES. 4014 See "A Specification for The AES Algorithm Rijndael 4015 (by Joan Daemen & Vincent Rijmen)" 4016 Dr. Brian Gladman, v3.1, 3rd March 2001. */ 4017 /* N values so that (hex) xy = 0x03^N. 4018 0x00 cannot be used. We put 0xff for this value.*/ 4019 /* a 16 x 16 matrix */ 4020 static const UChar Nxy[256] = { // row nr 4021 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1 4022 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, 4023 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2 4024 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, 4025 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3 4026 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, 4027 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4 4028 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, 4029 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5 4030 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, 4031 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6 4032 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, 4033 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7 4034 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, 4035 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8 4036 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, 4037 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9 4038 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, 4039 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10 4040 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, 4041 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11 4042 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, 4043 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12 4044 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, 4045 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13 4046 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, 4047 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14 4048 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, 4049 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15 4050 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, 4051 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16 4052 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07 4053 }; 4054 4055 /* E values so that E = 0x03^xy. */ 4056 static const UChar Exy[256] = { // row nr 4057 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1 4058 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35, 4059 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2 4060 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa, 4061 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3 4062 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31, 4063 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4 4064 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd, 4065 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5 4066 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88, 4067 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6 4068 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a, 4069 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7 4070 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3, 4071 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8 4072 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0, 4073 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9 4074 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41, 4075 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10 4076 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75, 4077 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11 4078 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80, 4079 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12 4080 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54, 4081 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13 4082 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca, 4083 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14 4084 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e, 4085 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15 4086 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17, 4087 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16 4088 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01}; 4089 4090 static inline UChar ff_mul(UChar u1, UChar u2) 4091 { 4092 if ((u1 > 0) && (u2 > 0)) { 4093 UInt ui = Nxy[u1] + Nxy[u2]; 4094 if (ui >= 255) 4095 ui = ui - 255; 4096 return Exy[ui]; 4097 } else { 4098 return 0; 4099 }; 4100 } 4101 4102 static void MixColumns (V128* v) 4103 { 4104 V128 r; 4105 Int j; 4106 #define P(x,row,col) (x)->w8[((row)*4+(col))] 4107 for (j = 0; j < 4; j++) { 4108 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1)) 4109 ^ P(v,j,2) ^ P(v,j,3); 4110 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) ) 4111 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3); 4112 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) ) 4113 ^ ff_mul(0x03, P(v,j,3) ); 4114 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2) 4115 ^ ff_mul( 0x02, P(v,j,3) ); 4116 } 4117 *v = r; 4118 #undef P 4119 } 4120 4121 static void InvMixColumns (V128* v) 4122 { 4123 V128 r; 4124 Int j; 4125 #define P(x,row,col) (x)->w8[((row)*4+(col))] 4126 for (j = 0; j < 4; j++) { 4127 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) ) 4128 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) ); 4129 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) ) 4130 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) ); 4131 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) ) 4132 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) ); 4133 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) ) 4134 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) ); 4135 } 4136 *v = r; 4137 #undef P 4138 4139 } 4140 4141 /* For description, see definition in guest_amd64_defs.h */ 4142 void amd64g_dirtyhelper_AES ( 4143 VexGuestAMD64State* gst, 4144 HWord opc4, HWord gstOffD, 4145 HWord gstOffL, HWord gstOffR 4146 ) 4147 { 4148 // where the args are 4149 V128* argD = (V128*)( ((UChar*)gst) + gstOffD ); 4150 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 4151 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 4152 V128 r; 4153 4154 switch (opc4) { 4155 case 0xDC: /* AESENC */ 4156 case 0xDD: /* AESENCLAST */ 4157 r = *argR; 4158 ShiftRows (&r); 4159 SubBytes (&r); 4160 if (opc4 == 0xDC) 4161 MixColumns (&r); 4162 argD->w64[0] = r.w64[0] ^ argL->w64[0]; 4163 argD->w64[1] = r.w64[1] ^ argL->w64[1]; 4164 break; 4165 4166 case 0xDE: /* AESDEC */ 4167 case 0xDF: /* AESDECLAST */ 4168 r = *argR; 4169 InvShiftRows (&r); 4170 InvSubBytes (&r); 4171 if (opc4 == 0xDE) 4172 InvMixColumns (&r); 4173 argD->w64[0] = r.w64[0] ^ argL->w64[0]; 4174 argD->w64[1] = r.w64[1] ^ argL->w64[1]; 4175 break; 4176 4177 case 0xDB: /* AESIMC */ 4178 *argD = *argL; 4179 InvMixColumns (argD); 4180 break; 4181 default: vassert(0); 4182 } 4183 } 4184 4185 static inline UInt RotWord (UInt w32) 4186 { 4187 return ((w32 >> 8) | (w32 << 24)); 4188 } 4189 4190 static inline UInt SubWord (UInt w32) 4191 { 4192 UChar *w8; 4193 UChar *r8; 4194 UInt res; 4195 w8 = (UChar*) &w32; 4196 r8 = (UChar*) &res; 4197 r8[0] = sbox[w8[0]]; 4198 r8[1] = sbox[w8[1]]; 4199 r8[2] = sbox[w8[2]]; 4200 r8[3] = sbox[w8[3]]; 4201 return res; 4202 } 4203 4204 /* For description, see definition in guest_amd64_defs.h */ 4205 extern void amd64g_dirtyhelper_AESKEYGENASSIST ( 4206 VexGuestAMD64State* gst, 4207 HWord imm8, 4208 HWord gstOffL, HWord gstOffR 4209 ) 4210 { 4211 // where the args are 4212 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 4213 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 4214 4215 // We have to create the result in a temporary in the 4216 // case where the src and dst regs are the same. See #341698. 4217 V128 tmp; 4218 4219 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8; 4220 tmp.w32[2] = SubWord (argL->w32[3]); 4221 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8; 4222 tmp.w32[0] = SubWord (argL->w32[1]); 4223 4224 argR->w32[3] = tmp.w32[3]; 4225 argR->w32[2] = tmp.w32[2]; 4226 argR->w32[1] = tmp.w32[1]; 4227 argR->w32[0] = tmp.w32[0]; 4228 } 4229 4230 4231 4232 /*---------------------------------------------------------------*/ 4233 /*--- Helpers for dealing with, and describing, ---*/ 4234 /*--- guest state as a whole. ---*/ 4235 /*---------------------------------------------------------------*/ 4236 4237 /* Initialise the entire amd64 guest state. */ 4238 /* VISIBLE TO LIBVEX CLIENT */ 4239 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) 4240 { 4241 vex_state->host_EvC_FAILADDR = 0; 4242 vex_state->host_EvC_COUNTER = 0; 4243 vex_state->pad0 = 0; 4244 4245 vex_state->guest_RAX = 0; 4246 vex_state->guest_RCX = 0; 4247 vex_state->guest_RDX = 0; 4248 vex_state->guest_RBX = 0; 4249 vex_state->guest_RSP = 0; 4250 vex_state->guest_RBP = 0; 4251 vex_state->guest_RSI = 0; 4252 vex_state->guest_RDI = 0; 4253 vex_state->guest_R8 = 0; 4254 vex_state->guest_R9 = 0; 4255 vex_state->guest_R10 = 0; 4256 vex_state->guest_R11 = 0; 4257 vex_state->guest_R12 = 0; 4258 vex_state->guest_R13 = 0; 4259 vex_state->guest_R14 = 0; 4260 vex_state->guest_R15 = 0; 4261 4262 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 4263 vex_state->guest_CC_DEP1 = 0; 4264 vex_state->guest_CC_DEP2 = 0; 4265 vex_state->guest_CC_NDEP = 0; 4266 4267 vex_state->guest_DFLAG = 1; /* forwards */ 4268 vex_state->guest_IDFLAG = 0; 4269 vex_state->guest_ACFLAG = 0; 4270 4271 /* HACK: represent the offset associated with a constant %fs. 4272 Typically, on linux, this assumes that %fs is only ever zero (main 4273 thread) or 0x63. */ 4274 vex_state->guest_FS_CONST = 0; 4275 4276 vex_state->guest_RIP = 0; 4277 4278 /* Initialise the simulated FPU */ 4279 amd64g_dirtyhelper_FINIT( vex_state ); 4280 4281 /* Initialise the AVX state. */ 4282 # define AVXZERO(_ymm) \ 4283 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \ 4284 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \ 4285 } while (0) 4286 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; 4287 AVXZERO(vex_state->guest_YMM0); 4288 AVXZERO(vex_state->guest_YMM1); 4289 AVXZERO(vex_state->guest_YMM2); 4290 AVXZERO(vex_state->guest_YMM3); 4291 AVXZERO(vex_state->guest_YMM4); 4292 AVXZERO(vex_state->guest_YMM5); 4293 AVXZERO(vex_state->guest_YMM6); 4294 AVXZERO(vex_state->guest_YMM7); 4295 AVXZERO(vex_state->guest_YMM8); 4296 AVXZERO(vex_state->guest_YMM9); 4297 AVXZERO(vex_state->guest_YMM10); 4298 AVXZERO(vex_state->guest_YMM11); 4299 AVXZERO(vex_state->guest_YMM12); 4300 AVXZERO(vex_state->guest_YMM13); 4301 AVXZERO(vex_state->guest_YMM14); 4302 AVXZERO(vex_state->guest_YMM15); 4303 AVXZERO(vex_state->guest_YMM16); 4304 4305 # undef AVXZERO 4306 4307 vex_state->guest_EMNOTE = EmNote_NONE; 4308 4309 /* These should not ever be either read or written, but we 4310 initialise them anyway. */ 4311 vex_state->guest_CMSTART = 0; 4312 vex_state->guest_CMLEN = 0; 4313 4314 vex_state->guest_NRADDR = 0; 4315 vex_state->guest_SC_CLASS = 0; 4316 vex_state->guest_GS_CONST = 0; 4317 4318 vex_state->guest_IP_AT_SYSCALL = 0; 4319 vex_state->pad1 = 0; 4320 } 4321 4322 4323 /* Figure out if any part of the guest state contained in minoff 4324 .. maxoff requires precise memory exceptions. If in doubt return 4325 True (but this generates significantly slower code). 4326 4327 By default we enforce precise exns for guest %RSP, %RBP and %RIP 4328 only. These are the minimum needed to extract correct stack 4329 backtraces from amd64 code. 4330 4331 Only %RSP is needed in mode VexRegUpdSpAtMemAccess. 4332 */ 4333 Bool guest_amd64_state_requires_precise_mem_exns ( 4334 Int minoff, Int maxoff, VexRegisterUpdates pxControl 4335 ) 4336 { 4337 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); 4338 Int rbp_max = rbp_min + 8 - 1; 4339 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); 4340 Int rsp_max = rsp_min + 8 - 1; 4341 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); 4342 Int rip_max = rip_min + 8 - 1; 4343 4344 if (maxoff < rsp_min || minoff > rsp_max) { 4345 /* no overlap with rsp */ 4346 if (pxControl == VexRegUpdSpAtMemAccess) 4347 return False; // We only need to check stack pointer. 4348 } else { 4349 return True; 4350 } 4351 4352 if (maxoff < rbp_min || minoff > rbp_max) { 4353 /* no overlap with rbp */ 4354 } else { 4355 return True; 4356 } 4357 4358 if (maxoff < rip_min || minoff > rip_max) { 4359 /* no overlap with eip */ 4360 } else { 4361 return True; 4362 } 4363 4364 return False; 4365 } 4366 4367 4368 #define ALWAYSDEFD(field) \ 4369 { offsetof(VexGuestAMD64State, field), \ 4370 (sizeof ((VexGuestAMD64State*)0)->field) } 4371 4372 VexGuestLayout 4373 amd64guest_layout 4374 = { 4375 /* Total size of the guest state, in bytes. */ 4376 .total_sizeB = sizeof(VexGuestAMD64State), 4377 4378 /* Describe the stack pointer. */ 4379 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), 4380 .sizeof_SP = 8, 4381 4382 /* Describe the frame pointer. */ 4383 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), 4384 .sizeof_FP = 8, 4385 4386 /* Describe the instruction pointer. */ 4387 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), 4388 .sizeof_IP = 8, 4389 4390 /* Describe any sections to be regarded by Memcheck as 4391 'always-defined'. */ 4392 .n_alwaysDefd = 16, 4393 4394 /* flags thunk: OP and NDEP are always defd, whereas DEP1 4395 and DEP2 have to be tracked. See detailed comment in 4396 gdefs.h on meaning of thunk fields. */ 4397 .alwaysDefd 4398 = { /* 0 */ ALWAYSDEFD(guest_CC_OP), 4399 /* 1 */ ALWAYSDEFD(guest_CC_NDEP), 4400 /* 2 */ ALWAYSDEFD(guest_DFLAG), 4401 /* 3 */ ALWAYSDEFD(guest_IDFLAG), 4402 /* 4 */ ALWAYSDEFD(guest_RIP), 4403 /* 5 */ ALWAYSDEFD(guest_FS_CONST), 4404 /* 6 */ ALWAYSDEFD(guest_FTOP), 4405 /* 7 */ ALWAYSDEFD(guest_FPTAG), 4406 /* 8 */ ALWAYSDEFD(guest_FPROUND), 4407 /* 9 */ ALWAYSDEFD(guest_FC3210), 4408 // /* */ ALWAYSDEFD(guest_CS), 4409 // /* */ ALWAYSDEFD(guest_DS), 4410 // /* */ ALWAYSDEFD(guest_ES), 4411 // /* */ ALWAYSDEFD(guest_FS), 4412 // /* */ ALWAYSDEFD(guest_GS), 4413 // /* */ ALWAYSDEFD(guest_SS), 4414 // /* */ ALWAYSDEFD(guest_LDT), 4415 // /* */ ALWAYSDEFD(guest_GDT), 4416 /* 10 */ ALWAYSDEFD(guest_EMNOTE), 4417 /* 11 */ ALWAYSDEFD(guest_SSEROUND), 4418 /* 12 */ ALWAYSDEFD(guest_CMSTART), 4419 /* 13 */ ALWAYSDEFD(guest_CMLEN), 4420 /* 14 */ ALWAYSDEFD(guest_SC_CLASS), 4421 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) 4422 } 4423 }; 4424 4425 4426 /*---------------------------------------------------------------*/ 4427 /*--- end guest_amd64_helpers.c ---*/ 4428 /*---------------------------------------------------------------*/ 4429