1 2 /*---------------------------------------------------------------*/ 3 /*--- begin guest_amd64_helpers.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_emnote.h" 38 #include "libvex_guest_amd64.h" 39 #include "libvex_ir.h" 40 #include "libvex.h" 41 42 #include "main_util.h" 43 #include "main_globals.h" 44 #include "guest_generic_bb_to_IR.h" 45 #include "guest_amd64_defs.h" 46 #include "guest_generic_x87.h" 47 48 49 /* This file contains helper functions for amd64 guest code. 50 Calls to these functions are generated by the back end. 51 These calls are of course in the host machine code and 52 this file will be compiled to host machine code, so that 53 all makes sense. 54 55 Only change the signatures of these helper functions very 56 carefully. If you change the signature here, you'll have to change 57 the parameters passed to it in the IR calls constructed by 58 guest-amd64/toIR.c. 59 60 The convention used is that all functions called from generated 61 code are named amd64g_<something>, and any function whose name lacks 62 that prefix is not called from generated code. Note that some 63 LibVEX_* functions can however be called by VEX's client, but that 64 is not the same as calling them from VEX-generated code. 65 */ 66 67 68 /* Set to 1 to get detailed profiling info about use of the flag 69 machinery. */ 70 #define PROFILE_RFLAGS 0 71 72 73 /*---------------------------------------------------------------*/ 74 /*--- %rflags run-time helpers. ---*/ 75 /*---------------------------------------------------------------*/ 76 77 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags 78 after imulq/mulq. */ 79 80 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) 81 { 82 ULong u0, v0, w0; 83 Long u1, v1, w1, w2, t; 84 u0 = u & 0xFFFFFFFFULL; 85 u1 = u >> 32; 86 v0 = v & 0xFFFFFFFFULL; 87 v1 = v >> 32; 88 w0 = u0 * v0; 89 t = u1 * v0 + (w0 >> 32); 90 w1 = t & 0xFFFFFFFFULL; 91 w2 = t >> 32; 92 w1 = u0 * v1 + w1; 93 *rHi = u1 * v1 + w2 + (w1 >> 32); 94 *rLo = u * v; 95 } 96 97 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) 98 { 99 ULong u0, v0, w0; 100 ULong u1, v1, w1,w2,t; 101 u0 = u & 0xFFFFFFFFULL; 102 u1 = u >> 32; 103 v0 = v & 0xFFFFFFFFULL; 104 v1 = v >> 32; 105 w0 = u0 * v0; 106 t = u1 * v0 + (w0 >> 32); 107 w1 = t & 0xFFFFFFFFULL; 108 w2 = t >> 32; 109 w1 = u0 * v1 + w1; 110 *rHi = u1 * v1 + w2 + (w1 >> 32); 111 *rLo = u * v; 112 } 113 114 115 static const UChar parity_table[256] = { 116 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 118 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 119 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 122 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 124 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 126 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 127 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 130 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 131 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 134 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 135 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 138 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 140 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 142 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 143 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 146 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, 147 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 148 }; 149 150 /* generalised left-shifter */ 151 static inline Long lshift ( Long x, Int n ) 152 { 153 if (n >= 0) 154 return (ULong)x << n; 155 else 156 return x >> (-n); 157 } 158 159 /* identity on ULong */ 160 static inline ULong idULong ( ULong x ) 161 { 162 return x; 163 } 164 165 166 #define PREAMBLE(__data_bits) \ 167 /* const */ ULong DATA_MASK \ 168 = __data_bits==8 \ 169 ? 0xFFULL \ 170 : (__data_bits==16 \ 171 ? 0xFFFFULL \ 172 : (__data_bits==32 \ 173 ? 0xFFFFFFFFULL \ 174 : 0xFFFFFFFFFFFFFFFFULL)); \ 175 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ 176 /* const */ ULong CC_DEP1 = cc_dep1_formal; \ 177 /* const */ ULong CC_DEP2 = cc_dep2_formal; \ 178 /* const */ ULong CC_NDEP = cc_ndep_formal; \ 179 /* Four bogus assignments, which hopefully gcc can */ \ 180 /* optimise away, and which stop it complaining about */ \ 181 /* unused variables. */ \ 182 SIGN_MASK = SIGN_MASK; \ 183 DATA_MASK = DATA_MASK; \ 184 CC_DEP2 = CC_DEP2; \ 185 CC_NDEP = CC_NDEP; 186 187 188 /*-------------------------------------------------------------*/ 189 190 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ 191 { \ 192 PREAMBLE(DATA_BITS); \ 193 { ULong cf, pf, af, zf, sf, of; \ 194 ULong argL, argR, res; \ 195 argL = CC_DEP1; \ 196 argR = CC_DEP2; \ 197 res = argL + argR; \ 198 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 199 pf = parity_table[(UChar)res]; \ 200 af = (res ^ argL ^ argR) & 0x10; \ 201 zf = ((DATA_UTYPE)res == 0) << 6; \ 202 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 203 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 204 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 205 return cf | pf | af | zf | sf | of; \ 206 } \ 207 } 208 209 /*-------------------------------------------------------------*/ 210 211 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ 212 { \ 213 PREAMBLE(DATA_BITS); \ 214 { ULong cf, pf, af, zf, sf, of; \ 215 ULong argL, argR, res; \ 216 argL = CC_DEP1; \ 217 argR = CC_DEP2; \ 218 res = argL - argR; \ 219 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 220 pf = parity_table[(UChar)res]; \ 221 af = (res ^ argL ^ argR) & 0x10; \ 222 zf = ((DATA_UTYPE)res == 0) << 6; \ 223 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 224 of = lshift((argL ^ argR) & (argL ^ res), \ 225 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 226 return cf | pf | af | zf | sf | of; \ 227 } \ 228 } 229 230 /*-------------------------------------------------------------*/ 231 232 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ 233 { \ 234 PREAMBLE(DATA_BITS); \ 235 { ULong cf, pf, af, zf, sf, of; \ 236 ULong argL, argR, oldC, res; \ 237 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 238 argL = CC_DEP1; \ 239 argR = CC_DEP2 ^ oldC; \ 240 res = (argL + argR) + oldC; \ 241 if (oldC) \ 242 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ 243 else \ 244 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ 245 pf = parity_table[(UChar)res]; \ 246 af = (res ^ argL ^ argR) & 0x10; \ 247 zf = ((DATA_UTYPE)res == 0) << 6; \ 248 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 249 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ 250 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 251 return cf | pf | af | zf | sf | of; \ 252 } \ 253 } 254 255 /*-------------------------------------------------------------*/ 256 257 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ 258 { \ 259 PREAMBLE(DATA_BITS); \ 260 { ULong cf, pf, af, zf, sf, of; \ 261 ULong argL, argR, oldC, res; \ 262 oldC = CC_NDEP & AMD64G_CC_MASK_C; \ 263 argL = CC_DEP1; \ 264 argR = CC_DEP2 ^ oldC; \ 265 res = (argL - argR) - oldC; \ 266 if (oldC) \ 267 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ 268 else \ 269 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ 270 pf = parity_table[(UChar)res]; \ 271 af = (res ^ argL ^ argR) & 0x10; \ 272 zf = ((DATA_UTYPE)res == 0) << 6; \ 273 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 274 of = lshift((argL ^ argR) & (argL ^ res), \ 275 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ 276 return cf | pf | af | zf | sf | of; \ 277 } \ 278 } 279 280 /*-------------------------------------------------------------*/ 281 282 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ 283 { \ 284 PREAMBLE(DATA_BITS); \ 285 { ULong cf, pf, af, zf, sf, of; \ 286 cf = 0; \ 287 pf = parity_table[(UChar)CC_DEP1]; \ 288 af = 0; \ 289 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 290 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 291 of = 0; \ 292 return cf | pf | af | zf | sf | of; \ 293 } \ 294 } 295 296 /*-------------------------------------------------------------*/ 297 298 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ 299 { \ 300 PREAMBLE(DATA_BITS); \ 301 { ULong cf, pf, af, zf, sf, of; \ 302 ULong argL, argR, res; \ 303 res = CC_DEP1; \ 304 argL = res - 1; \ 305 argR = 1; \ 306 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 307 pf = parity_table[(UChar)res]; \ 308 af = (res ^ argL ^ argR) & 0x10; \ 309 zf = ((DATA_UTYPE)res == 0) << 6; \ 310 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 311 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ 312 return cf | pf | af | zf | sf | of; \ 313 } \ 314 } 315 316 /*-------------------------------------------------------------*/ 317 318 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ 319 { \ 320 PREAMBLE(DATA_BITS); \ 321 { ULong cf, pf, af, zf, sf, of; \ 322 ULong argL, argR, res; \ 323 res = CC_DEP1; \ 324 argL = res + 1; \ 325 argR = 1; \ 326 cf = CC_NDEP & AMD64G_CC_MASK_C; \ 327 pf = parity_table[(UChar)res]; \ 328 af = (res ^ argL ^ argR) & 0x10; \ 329 zf = ((DATA_UTYPE)res == 0) << 6; \ 330 sf = lshift(res, 8 - DATA_BITS) & 0x80; \ 331 of = ((res & DATA_MASK) \ 332 == ((ULong)SIGN_MASK - 1)) << 11; \ 333 return cf | pf | af | zf | sf | of; \ 334 } \ 335 } 336 337 /*-------------------------------------------------------------*/ 338 339 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ 340 { \ 341 PREAMBLE(DATA_BITS); \ 342 { ULong cf, pf, af, zf, sf, of; \ 343 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ 344 pf = parity_table[(UChar)CC_DEP1]; \ 345 af = 0; /* undefined */ \ 346 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 347 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 348 /* of is defined if shift count == 1 */ \ 349 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 350 & AMD64G_CC_MASK_O; \ 351 return cf | pf | af | zf | sf | of; \ 352 } \ 353 } 354 355 /*-------------------------------------------------------------*/ 356 357 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ 358 { \ 359 PREAMBLE(DATA_BITS); \ 360 { ULong cf, pf, af, zf, sf, of; \ 361 cf = CC_DEP2 & 1; \ 362 pf = parity_table[(UChar)CC_DEP1]; \ 363 af = 0; /* undefined */ \ 364 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 365 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 366 /* of is defined if shift count == 1 */ \ 367 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ 368 & AMD64G_CC_MASK_O; \ 369 return cf | pf | af | zf | sf | of; \ 370 } \ 371 } 372 373 /*-------------------------------------------------------------*/ 374 375 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ 376 /* DEP1 = result, NDEP = old flags */ 377 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ 378 { \ 379 PREAMBLE(DATA_BITS); \ 380 { ULong fl \ 381 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 382 | (AMD64G_CC_MASK_C & CC_DEP1) \ 383 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 384 11-(DATA_BITS-1)) \ 385 ^ lshift(CC_DEP1, 11))); \ 386 return fl; \ 387 } \ 388 } 389 390 /*-------------------------------------------------------------*/ 391 392 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ 393 /* DEP1 = result, NDEP = old flags */ 394 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ 395 { \ 396 PREAMBLE(DATA_BITS); \ 397 { ULong fl \ 398 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ 399 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ 400 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ 401 11-(DATA_BITS-1)) \ 402 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ 403 return fl; \ 404 } \ 405 } 406 407 /*-------------------------------------------------------------*/ 408 409 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ 410 DATA_U2TYPE, NARROWto2U) \ 411 { \ 412 PREAMBLE(DATA_BITS); \ 413 { ULong cf, pf, af, zf, sf, of; \ 414 DATA_UTYPE hi; \ 415 DATA_UTYPE lo \ 416 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ 417 * ((DATA_UTYPE)CC_DEP2) ); \ 418 DATA_U2TYPE rr \ 419 = NARROWto2U( \ 420 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ 421 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ 422 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ 423 cf = (hi != 0); \ 424 pf = parity_table[(UChar)lo]; \ 425 af = 0; /* undefined */ \ 426 zf = (lo == 0) << 6; \ 427 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 428 of = cf << 11; \ 429 return cf | pf | af | zf | sf | of; \ 430 } \ 431 } 432 433 /*-------------------------------------------------------------*/ 434 435 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ 436 DATA_S2TYPE, NARROWto2S) \ 437 { \ 438 PREAMBLE(DATA_BITS); \ 439 { ULong cf, pf, af, zf, sf, of; \ 440 DATA_STYPE hi; \ 441 DATA_STYPE lo \ 442 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \ 443 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \ 444 DATA_S2TYPE rr \ 445 = NARROWto2S( \ 446 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ 447 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ 448 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ 449 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ 450 pf = parity_table[(UChar)lo]; \ 451 af = 0; /* undefined */ \ 452 zf = (lo == 0) << 6; \ 453 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ 454 of = cf << 11; \ 455 return cf | pf | af | zf | sf | of; \ 456 } \ 457 } 458 459 /*-------------------------------------------------------------*/ 460 461 #define ACTIONS_UMULQ \ 462 { \ 463 PREAMBLE(64); \ 464 { ULong cf, pf, af, zf, sf, of; \ 465 ULong lo, hi; \ 466 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ 467 cf = (hi != 0); \ 468 pf = parity_table[(UChar)lo]; \ 469 af = 0; /* undefined */ \ 470 zf = (lo == 0) << 6; \ 471 sf = lshift(lo, 8 - 64) & 0x80; \ 472 of = cf << 11; \ 473 return cf | pf | af | zf | sf | of; \ 474 } \ 475 } 476 477 /*-------------------------------------------------------------*/ 478 479 #define ACTIONS_SMULQ \ 480 { \ 481 PREAMBLE(64); \ 482 { ULong cf, pf, af, zf, sf, of; \ 483 Long lo, hi; \ 484 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ 485 cf = (hi != (lo >>/*s*/ (64-1))); \ 486 pf = parity_table[(UChar)lo]; \ 487 af = 0; /* undefined */ \ 488 zf = (lo == 0) << 6; \ 489 sf = lshift(lo, 8 - 64) & 0x80; \ 490 of = cf << 11; \ 491 return cf | pf | af | zf | sf | of; \ 492 } \ 493 } 494 495 /*-------------------------------------------------------------*/ 496 497 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \ 498 { \ 499 PREAMBLE(DATA_BITS); \ 500 { ULong cf, pf, af, zf, sf, of; \ 501 cf = 0; \ 502 pf = 0; \ 503 af = 0; \ 504 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 505 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 506 of = 0; \ 507 return cf | pf | af | zf | sf | of; \ 508 } \ 509 } 510 511 /*-------------------------------------------------------------*/ 512 513 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \ 514 { \ 515 PREAMBLE(DATA_BITS); \ 516 { ULong cf, pf, af, zf, sf, of; \ 517 cf = ((DATA_UTYPE)CC_DEP2 != 0); \ 518 pf = 0; \ 519 af = 0; \ 520 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 521 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 522 of = 0; \ 523 return cf | pf | af | zf | sf | of; \ 524 } \ 525 } 526 527 /*-------------------------------------------------------------*/ 528 529 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \ 530 { \ 531 PREAMBLE(DATA_BITS); \ 532 { Long cf, pf, af, zf, sf, of; \ 533 cf = ((DATA_UTYPE)CC_DEP2 == 0); \ 534 pf = 0; \ 535 af = 0; \ 536 zf = 0; \ 537 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 538 of = 0; \ 539 return cf | pf | af | zf | sf | of; \ 540 } \ 541 } 542 543 /*-------------------------------------------------------------*/ 544 545 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \ 546 { \ 547 PREAMBLE(DATA_BITS); \ 548 { ULong cf, pf, af, zf, sf, of; \ 549 cf = ((DATA_UTYPE)CC_DEP2 == 0); \ 550 pf = 0; \ 551 af = 0; \ 552 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ 553 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ 554 of = 0; \ 555 return cf | pf | af | zf | sf | of; \ 556 } \ 557 } 558 559 /*-------------------------------------------------------------*/ 560 561 562 #if PROFILE_RFLAGS 563 564 static Bool initted = False; 565 566 /* C flag, fast route */ 567 static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; 568 /* C flag, slow route */ 569 static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; 570 /* table for calculate_cond */ 571 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; 572 /* total entry counts for calc_all, calc_c, calc_cond. */ 573 static UInt n_calc_all = 0; 574 static UInt n_calc_c = 0; 575 static UInt n_calc_cond = 0; 576 577 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) 578 579 580 static void showCounts ( void ) 581 { 582 Int op, co; 583 HChar ch; 584 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", 585 n_calc_all, n_calc_cond, n_calc_c); 586 587 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" 588 " S NS P NP L NL LE NLE\n"); 589 vex_printf(" -----------------------------------------------------" 590 "----------------------------------------\n"); 591 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 592 593 ch = ' '; 594 if (op > 0 && (op-1) % 4 == 0) 595 ch = 'B'; 596 if (op > 0 && (op-1) % 4 == 1) 597 ch = 'W'; 598 if (op > 0 && (op-1) % 4 == 2) 599 ch = 'L'; 600 if (op > 0 && (op-1) % 4 == 3) 601 ch = 'Q'; 602 603 vex_printf("%2d%c: ", op, ch); 604 vex_printf("%6u ", tabc_slow[op]); 605 vex_printf("%6u ", tabc_fast[op]); 606 for (co = 0; co < 16; co++) { 607 Int n = tab_cond[op][co]; 608 if (n >= 1000) { 609 vex_printf(" %3dK", n / 1000); 610 } else 611 if (n >= 0) { 612 vex_printf(" %3d ", n ); 613 } else { 614 vex_printf(" "); 615 } 616 } 617 vex_printf("\n"); 618 } 619 vex_printf("\n"); 620 } 621 622 static void initCounts ( void ) 623 { 624 Int op, co; 625 initted = True; 626 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { 627 tabc_fast[op] = tabc_slow[op] = 0; 628 for (co = 0; co < 16; co++) 629 tab_cond[op][co] = 0; 630 } 631 } 632 633 #endif /* PROFILE_RFLAGS */ 634 635 636 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 637 /* Calculate all the 6 flags from the supplied thunk parameters. 638 Worker function, not directly called from generated code. */ 639 static 640 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, 641 ULong cc_dep1_formal, 642 ULong cc_dep2_formal, 643 ULong cc_ndep_formal ) 644 { 645 switch (cc_op) { 646 case AMD64G_CC_OP_COPY: 647 return cc_dep1_formal 648 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z 649 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); 650 651 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); 652 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); 653 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); 654 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); 655 656 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); 657 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); 658 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); 659 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); 660 661 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); 662 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); 663 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); 664 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); 665 666 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); 667 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); 668 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); 669 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); 670 671 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); 672 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); 673 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); 674 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); 675 676 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); 677 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); 678 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); 679 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); 680 681 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); 682 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); 683 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); 684 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); 685 686 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); 687 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); 688 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); 689 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); 690 691 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); 692 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); 693 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); 694 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); 695 696 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); 697 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); 698 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); 699 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); 700 701 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); 702 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); 703 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); 704 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); 705 706 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, 707 UShort, toUShort ); 708 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, 709 UInt, toUInt ); 710 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, 711 ULong, idULong ); 712 713 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; 714 715 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, 716 Short, toUShort ); 717 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, 718 Int, toUInt ); 719 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, 720 Long, idULong ); 721 722 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; 723 724 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt ); 725 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong ); 726 727 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt ); 728 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong ); 729 730 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt ); 731 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong ); 732 733 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt ); 734 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong ); 735 736 default: 737 /* shouldn't really make these calls from generated code */ 738 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" 739 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", 740 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); 741 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); 742 } 743 } 744 745 746 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 747 /* Calculate all the 6 flags from the supplied thunk parameters. */ 748 ULong amd64g_calculate_rflags_all ( ULong cc_op, 749 ULong cc_dep1, 750 ULong cc_dep2, 751 ULong cc_ndep ) 752 { 753 # if PROFILE_RFLAGS 754 if (!initted) initCounts(); 755 n_calc_all++; 756 if (SHOW_COUNTS_NOW) showCounts(); 757 # endif 758 return 759 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); 760 } 761 762 763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 764 /* Calculate just the carry flag from the supplied thunk parameters. */ 765 ULong amd64g_calculate_rflags_c ( ULong cc_op, 766 ULong cc_dep1, 767 ULong cc_dep2, 768 ULong cc_ndep ) 769 { 770 # if PROFILE_RFLAGS 771 if (!initted) initCounts(); 772 n_calc_c++; 773 tabc_fast[cc_op]++; 774 if (SHOW_COUNTS_NOW) showCounts(); 775 # endif 776 777 /* Fast-case some common ones. */ 778 switch (cc_op) { 779 case AMD64G_CC_OP_COPY: 780 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; 781 case AMD64G_CC_OP_LOGICQ: 782 case AMD64G_CC_OP_LOGICL: 783 case AMD64G_CC_OP_LOGICW: 784 case AMD64G_CC_OP_LOGICB: 785 return 0; 786 // case AMD64G_CC_OP_SUBL: 787 // return ((UInt)cc_dep1) < ((UInt)cc_dep2) 788 // ? AMD64G_CC_MASK_C : 0; 789 // case AMD64G_CC_OP_SUBW: 790 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) 791 // ? AMD64G_CC_MASK_C : 0; 792 // case AMD64G_CC_OP_SUBB: 793 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) 794 // ? AMD64G_CC_MASK_C : 0; 795 // case AMD64G_CC_OP_INCL: 796 // case AMD64G_CC_OP_DECL: 797 // return cc_ndep & AMD64G_CC_MASK_C; 798 default: 799 break; 800 } 801 802 # if PROFILE_RFLAGS 803 tabc_fast[cc_op]--; 804 tabc_slow[cc_op]++; 805 # endif 806 807 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) 808 & AMD64G_CC_MASK_C; 809 } 810 811 812 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 813 /* returns 1 or 0 */ 814 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, 815 ULong cc_op, 816 ULong cc_dep1, 817 ULong cc_dep2, 818 ULong cc_ndep ) 819 { 820 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, 821 cc_dep2, cc_ndep); 822 ULong of,sf,zf,cf,pf; 823 ULong inv = cond & 1; 824 825 # if PROFILE_RFLAGS 826 if (!initted) initCounts(); 827 tab_cond[cc_op][cond]++; 828 n_calc_cond++; 829 if (SHOW_COUNTS_NOW) showCounts(); 830 # endif 831 832 switch (cond) { 833 case AMD64CondNO: 834 case AMD64CondO: /* OF == 1 */ 835 of = rflags >> AMD64G_CC_SHIFT_O; 836 return 1 & (inv ^ of); 837 838 case AMD64CondNZ: 839 case AMD64CondZ: /* ZF == 1 */ 840 zf = rflags >> AMD64G_CC_SHIFT_Z; 841 return 1 & (inv ^ zf); 842 843 case AMD64CondNB: 844 case AMD64CondB: /* CF == 1 */ 845 cf = rflags >> AMD64G_CC_SHIFT_C; 846 return 1 & (inv ^ cf); 847 break; 848 849 case AMD64CondNBE: 850 case AMD64CondBE: /* (CF or ZF) == 1 */ 851 cf = rflags >> AMD64G_CC_SHIFT_C; 852 zf = rflags >> AMD64G_CC_SHIFT_Z; 853 return 1 & (inv ^ (cf | zf)); 854 break; 855 856 case AMD64CondNS: 857 case AMD64CondS: /* SF == 1 */ 858 sf = rflags >> AMD64G_CC_SHIFT_S; 859 return 1 & (inv ^ sf); 860 861 case AMD64CondNP: 862 case AMD64CondP: /* PF == 1 */ 863 pf = rflags >> AMD64G_CC_SHIFT_P; 864 return 1 & (inv ^ pf); 865 866 case AMD64CondNL: 867 case AMD64CondL: /* (SF xor OF) == 1 */ 868 sf = rflags >> AMD64G_CC_SHIFT_S; 869 of = rflags >> AMD64G_CC_SHIFT_O; 870 return 1 & (inv ^ (sf ^ of)); 871 break; 872 873 case AMD64CondNLE: 874 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ 875 sf = rflags >> AMD64G_CC_SHIFT_S; 876 of = rflags >> AMD64G_CC_SHIFT_O; 877 zf = rflags >> AMD64G_CC_SHIFT_Z; 878 return 1 & (inv ^ ((sf ^ of) | zf)); 879 break; 880 881 default: 882 /* shouldn't really make these calls from generated code */ 883 vex_printf("amd64g_calculate_condition" 884 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", 885 cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); 886 vpanic("amd64g_calculate_condition"); 887 } 888 } 889 890 891 /* VISIBLE TO LIBVEX CLIENT */ 892 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state ) 893 { 894 ULong rflags = amd64g_calculate_rflags_all_WRK( 895 vex_state->guest_CC_OP, 896 vex_state->guest_CC_DEP1, 897 vex_state->guest_CC_DEP2, 898 vex_state->guest_CC_NDEP 899 ); 900 Long dflag = vex_state->guest_DFLAG; 901 vassert(dflag == 1 || dflag == -1); 902 if (dflag == -1) 903 rflags |= (1<<10); 904 if (vex_state->guest_IDFLAG == 1) 905 rflags |= (1<<21); 906 if (vex_state->guest_ACFLAG == 1) 907 rflags |= (1<<18); 908 909 return rflags; 910 } 911 912 /* VISIBLE TO LIBVEX CLIENT */ 913 void 914 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, 915 /*MOD*/VexGuestAMD64State* vex_state ) 916 { 917 ULong oszacp = amd64g_calculate_rflags_all_WRK( 918 vex_state->guest_CC_OP, 919 vex_state->guest_CC_DEP1, 920 vex_state->guest_CC_DEP2, 921 vex_state->guest_CC_NDEP 922 ); 923 if (new_carry_flag & 1) { 924 oszacp |= AMD64G_CC_MASK_C; 925 } else { 926 oszacp &= ~AMD64G_CC_MASK_C; 927 } 928 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 929 vex_state->guest_CC_DEP1 = oszacp; 930 vex_state->guest_CC_DEP2 = 0; 931 vex_state->guest_CC_NDEP = 0; 932 } 933 934 935 /*---------------------------------------------------------------*/ 936 /*--- %rflags translation-time function specialisers. ---*/ 937 /*--- These help iropt specialise calls the above run-time ---*/ 938 /*--- %rflags functions. ---*/ 939 /*---------------------------------------------------------------*/ 940 941 /* Used by the optimiser to try specialisations. Returns an 942 equivalent expression, or NULL if none. */ 943 944 static Bool isU64 ( IRExpr* e, ULong n ) 945 { 946 return toBool( e->tag == Iex_Const 947 && e->Iex.Const.con->tag == Ico_U64 948 && e->Iex.Const.con->Ico.U64 == n ); 949 } 950 951 IRExpr* guest_amd64_spechelper ( const HChar* function_name, 952 IRExpr** args, 953 IRStmt** precedingStmts, 954 Int n_precedingStmts ) 955 { 956 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) 957 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) 958 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 959 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 960 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 961 962 Int i, arity = 0; 963 for (i = 0; args[i]; i++) 964 arity++; 965 # if 0 966 vex_printf("spec request:\n"); 967 vex_printf(" %s ", function_name); 968 for (i = 0; i < arity; i++) { 969 vex_printf(" "); 970 ppIRExpr(args[i]); 971 } 972 vex_printf("\n"); 973 # endif 974 975 /* --------- specialising "amd64g_calculate_condition" --------- */ 976 977 if (vex_streq(function_name, "amd64g_calculate_condition")) { 978 /* specialise calls to above "calculate condition" function */ 979 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; 980 vassert(arity == 5); 981 cond = args[0]; 982 cc_op = args[1]; 983 cc_dep1 = args[2]; 984 cc_dep2 = args[3]; 985 986 /*---------------- ADDQ ----------------*/ 987 988 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { 989 /* long long add, then Z --> test (dst+src == 0) */ 990 return unop(Iop_1Uto64, 991 binop(Iop_CmpEQ64, 992 binop(Iop_Add64, cc_dep1, cc_dep2), 993 mkU64(0))); 994 } 995 996 /*---------------- ADDL ----------------*/ 997 998 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) { 999 /* This is very commonly generated by Javascript JITs, for 1000 the idiom "do a 32-bit add and jump to out-of-line code if 1001 an overflow occurs". */ 1002 /* long add, then O (overflow) 1003 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31] 1004 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 1005 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 1006 */ 1007 vassert(isIRAtom(cc_dep1)); 1008 vassert(isIRAtom(cc_dep2)); 1009 return 1010 binop(Iop_And64, 1011 binop(Iop_Shr64, 1012 binop(Iop_And64, 1013 unop(Iop_Not64, 1014 binop(Iop_Xor64, cc_dep1, cc_dep2)), 1015 binop(Iop_Xor64, 1016 cc_dep1, 1017 binop(Iop_Add64, cc_dep1, cc_dep2))), 1018 mkU8(31)), 1019 mkU64(1)); 1020 1021 } 1022 1023 /*---------------- SUBQ ----------------*/ 1024 1025 /* 0, */ 1026 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) { 1027 /* long long sub/cmp, then O (overflow) 1028 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63] 1029 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63 1030 */ 1031 vassert(isIRAtom(cc_dep1)); 1032 vassert(isIRAtom(cc_dep2)); 1033 return binop(Iop_Shr64, 1034 binop(Iop_And64, 1035 binop(Iop_Xor64, cc_dep1, cc_dep2), 1036 binop(Iop_Xor64, 1037 cc_dep1, 1038 binop(Iop_Sub64, cc_dep1, cc_dep2))), 1039 mkU8(64)); 1040 } 1041 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) { 1042 /* No action. Never yet found a test case. */ 1043 } 1044 1045 /* 2, 3 */ 1046 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) { 1047 /* long long sub/cmp, then B (unsigned less than) 1048 --> test dst <u src */ 1049 return unop(Iop_1Uto64, 1050 binop(Iop_CmpLT64U, cc_dep1, cc_dep2)); 1051 } 1052 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) { 1053 /* long long sub/cmp, then NB (unsigned greater than or equal) 1054 --> test src <=u dst */ 1055 /* Note, args are opposite way round from the usual */ 1056 return unop(Iop_1Uto64, 1057 binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); 1058 } 1059 1060 /* 4, 5 */ 1061 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { 1062 /* long long sub/cmp, then Z --> test dst==src */ 1063 return unop(Iop_1Uto64, 1064 binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); 1065 } 1066 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { 1067 /* long long sub/cmp, then NZ --> test dst!=src */ 1068 return unop(Iop_1Uto64, 1069 binop(Iop_CmpNE64,cc_dep1,cc_dep2)); 1070 } 1071 1072 /* 6, 7 */ 1073 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { 1074 /* long long sub/cmp, then BE (unsigned less than or equal) 1075 --> test dst <=u src */ 1076 return unop(Iop_1Uto64, 1077 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); 1078 } 1079 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) { 1080 /* long long sub/cmp, then NBE (unsigned greater than) 1081 --> test !(dst <=u src) */ 1082 return binop(Iop_Xor64, 1083 unop(Iop_1Uto64, 1084 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)), 1085 mkU64(1)); 1086 } 1087 1088 /* 8, 9 */ 1089 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) { 1090 /* long long sub/cmp, then S (negative) 1091 --> (dst-src)[63] 1092 --> (dst-src) >>u 63 */ 1093 return binop(Iop_Shr64, 1094 binop(Iop_Sub64, cc_dep1, cc_dep2), 1095 mkU8(63)); 1096 } 1097 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) { 1098 /* long long sub/cmp, then NS (not negative) 1099 --> (dst-src)[63] ^ 1 1100 --> ((dst-src) >>u 63) ^ 1 */ 1101 return binop(Iop_Xor64, 1102 binop(Iop_Shr64, 1103 binop(Iop_Sub64, cc_dep1, cc_dep2), 1104 mkU8(63)), 1105 mkU64(1)); 1106 } 1107 1108 /* 12, 13 */ 1109 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { 1110 /* long long sub/cmp, then L (signed less than) 1111 --> test dst <s src */ 1112 return unop(Iop_1Uto64, 1113 binop(Iop_CmpLT64S, cc_dep1, cc_dep2)); 1114 } 1115 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) { 1116 /* long long sub/cmp, then NL (signed greater than or equal) 1117 --> test dst >=s src 1118 --> test src <=s dst */ 1119 return unop(Iop_1Uto64, 1120 binop(Iop_CmpLE64S, cc_dep2, cc_dep1)); 1121 } 1122 1123 /* 14, 15 */ 1124 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) { 1125 /* long long sub/cmp, then LE (signed less than or equal) 1126 --> test dst <=s src */ 1127 return unop(Iop_1Uto64, 1128 binop(Iop_CmpLE64S, cc_dep1, cc_dep2)); 1129 } 1130 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) { 1131 /* long sub/cmp, then NLE (signed greater than) 1132 --> test !(dst <=s src) 1133 --> test (dst >s src) 1134 --> test (src <s dst) */ 1135 return unop(Iop_1Uto64, 1136 binop(Iop_CmpLT64S, cc_dep2, cc_dep1)); 1137 1138 } 1139 1140 /*---------------- SUBL ----------------*/ 1141 1142 /* 0, */ 1143 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) { 1144 /* This is very commonly generated by Javascript JITs, for 1145 the idiom "do a 32-bit subtract and jump to out-of-line 1146 code if an overflow occurs". */ 1147 /* long sub/cmp, then O (overflow) 1148 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31] 1149 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1 1150 */ 1151 vassert(isIRAtom(cc_dep1)); 1152 vassert(isIRAtom(cc_dep2)); 1153 return 1154 binop(Iop_And64, 1155 binop(Iop_Shr64, 1156 binop(Iop_And64, 1157 binop(Iop_Xor64, cc_dep1, cc_dep2), 1158 binop(Iop_Xor64, 1159 cc_dep1, 1160 binop(Iop_Sub64, cc_dep1, cc_dep2))), 1161 mkU8(31)), 1162 mkU64(1)); 1163 } 1164 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) { 1165 /* No action. Never yet found a test case. */ 1166 } 1167 1168 /* 2, 3 */ 1169 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) { 1170 /* long sub/cmp, then B (unsigned less than) 1171 --> test dst <u src */ 1172 return unop(Iop_1Uto64, 1173 binop(Iop_CmpLT32U, 1174 unop(Iop_64to32, cc_dep1), 1175 unop(Iop_64to32, cc_dep2))); 1176 } 1177 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) { 1178 /* long sub/cmp, then NB (unsigned greater than or equal) 1179 --> test src <=u dst */ 1180 /* Note, args are opposite way round from the usual */ 1181 return unop(Iop_1Uto64, 1182 binop(Iop_CmpLE32U, 1183 unop(Iop_64to32, cc_dep2), 1184 unop(Iop_64to32, cc_dep1))); 1185 } 1186 1187 /* 4, 5 */ 1188 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { 1189 /* long sub/cmp, then Z --> test dst==src */ 1190 return unop(Iop_1Uto64, 1191 binop(Iop_CmpEQ32, 1192 unop(Iop_64to32, cc_dep1), 1193 unop(Iop_64to32, cc_dep2))); 1194 } 1195 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { 1196 /* long sub/cmp, then NZ --> test dst!=src */ 1197 return unop(Iop_1Uto64, 1198 binop(Iop_CmpNE32, 1199 unop(Iop_64to32, cc_dep1), 1200 unop(Iop_64to32, cc_dep2))); 1201 } 1202 1203 /* 6, 7 */ 1204 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { 1205 /* long sub/cmp, then BE (unsigned less than or equal) 1206 --> test dst <=u src */ 1207 return unop(Iop_1Uto64, 1208 binop(Iop_CmpLE32U, 1209 unop(Iop_64to32, cc_dep1), 1210 unop(Iop_64to32, cc_dep2))); 1211 } 1212 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { 1213 /* long sub/cmp, then NBE (unsigned greater than) 1214 --> test src <u dst */ 1215 /* Note, args are opposite way round from the usual */ 1216 return unop(Iop_1Uto64, 1217 binop(Iop_CmpLT32U, 1218 unop(Iop_64to32, cc_dep2), 1219 unop(Iop_64to32, cc_dep1))); 1220 } 1221 1222 /* 8, 9 */ 1223 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) { 1224 /* long sub/cmp, then S (negative) 1225 --> (dst-src)[31] 1226 --> ((dst -64 src) >>u 31) & 1 1227 Pointless to narrow the args to 32 bit before the subtract. */ 1228 return binop(Iop_And64, 1229 binop(Iop_Shr64, 1230 binop(Iop_Sub64, cc_dep1, cc_dep2), 1231 mkU8(31)), 1232 mkU64(1)); 1233 } 1234 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) { 1235 /* long sub/cmp, then NS (not negative) 1236 --> (dst-src)[31] ^ 1 1237 --> (((dst -64 src) >>u 31) & 1) ^ 1 1238 Pointless to narrow the args to 32 bit before the subtract. */ 1239 return binop(Iop_Xor64, 1240 binop(Iop_And64, 1241 binop(Iop_Shr64, 1242 binop(Iop_Sub64, cc_dep1, cc_dep2), 1243 mkU8(31)), 1244 mkU64(1)), 1245 mkU64(1)); 1246 } 1247 1248 /* 12, 13 */ 1249 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { 1250 /* long sub/cmp, then L (signed less than) 1251 --> test dst <s src */ 1252 return unop(Iop_1Uto64, 1253 binop(Iop_CmpLT32S, 1254 unop(Iop_64to32, cc_dep1), 1255 unop(Iop_64to32, cc_dep2))); 1256 } 1257 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) { 1258 /* long sub/cmp, then NL (signed greater than or equal) 1259 --> test dst >=s src 1260 --> test src <=s dst */ 1261 return unop(Iop_1Uto64, 1262 binop(Iop_CmpLE32S, 1263 unop(Iop_64to32, cc_dep2), 1264 unop(Iop_64to32, cc_dep1))); 1265 } 1266 1267 /* 14, 15 */ 1268 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { 1269 /* long sub/cmp, then LE (signed less than or equal) 1270 --> test dst <=s src */ 1271 return unop(Iop_1Uto64, 1272 binop(Iop_CmpLE32S, 1273 unop(Iop_64to32, cc_dep1), 1274 unop(Iop_64to32, cc_dep2))); 1275 1276 } 1277 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) { 1278 /* long sub/cmp, then NLE (signed greater than) 1279 --> test !(dst <=s src) 1280 --> test (dst >s src) 1281 --> test (src <s dst) */ 1282 return unop(Iop_1Uto64, 1283 binop(Iop_CmpLT32S, 1284 unop(Iop_64to32, cc_dep2), 1285 unop(Iop_64to32, cc_dep1))); 1286 1287 } 1288 1289 /*---------------- SUBW ----------------*/ 1290 1291 /* 4, 5 */ 1292 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { 1293 /* word sub/cmp, then Z --> test dst==src */ 1294 return unop(Iop_1Uto64, 1295 binop(Iop_CmpEQ16, 1296 unop(Iop_64to16,cc_dep1), 1297 unop(Iop_64to16,cc_dep2))); 1298 } 1299 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { 1300 /* word sub/cmp, then NZ --> test dst!=src */ 1301 return unop(Iop_1Uto64, 1302 binop(Iop_CmpNE16, 1303 unop(Iop_64to16,cc_dep1), 1304 unop(Iop_64to16,cc_dep2))); 1305 } 1306 1307 /* 6, */ 1308 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) { 1309 /* word sub/cmp, then BE (unsigned less than or equal) 1310 --> test dst <=u src */ 1311 return unop(Iop_1Uto64, 1312 binop(Iop_CmpLE64U, 1313 binop(Iop_Shl64, cc_dep1, mkU8(48)), 1314 binop(Iop_Shl64, cc_dep2, mkU8(48)))); 1315 } 1316 1317 /* 14, */ 1318 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { 1319 /* word sub/cmp, then LE (signed less than or equal) 1320 --> test dst <=s src */ 1321 return unop(Iop_1Uto64, 1322 binop(Iop_CmpLE64S, 1323 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1324 binop(Iop_Shl64,cc_dep2,mkU8(48)))); 1325 1326 } 1327 1328 /*---------------- SUBB ----------------*/ 1329 1330 /* 2, 3 */ 1331 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) { 1332 /* byte sub/cmp, then B (unsigned less than) 1333 --> test dst <u src */ 1334 return unop(Iop_1Uto64, 1335 binop(Iop_CmpLT64U, 1336 binop(Iop_And64, cc_dep1, mkU64(0xFF)), 1337 binop(Iop_And64, cc_dep2, mkU64(0xFF)))); 1338 } 1339 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) { 1340 /* byte sub/cmp, then NB (unsigned greater than or equal) 1341 --> test src <=u dst */ 1342 /* Note, args are opposite way round from the usual */ 1343 return unop(Iop_1Uto64, 1344 binop(Iop_CmpLE64U, 1345 binop(Iop_And64, cc_dep2, mkU64(0xFF)), 1346 binop(Iop_And64, cc_dep1, mkU64(0xFF)))); 1347 } 1348 1349 /* 4, 5 */ 1350 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { 1351 /* byte sub/cmp, then Z --> test dst==src */ 1352 return unop(Iop_1Uto64, 1353 binop(Iop_CmpEQ8, 1354 unop(Iop_64to8,cc_dep1), 1355 unop(Iop_64to8,cc_dep2))); 1356 } 1357 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { 1358 /* byte sub/cmp, then NZ --> test dst!=src */ 1359 return unop(Iop_1Uto64, 1360 binop(Iop_CmpNE8, 1361 unop(Iop_64to8,cc_dep1), 1362 unop(Iop_64to8,cc_dep2))); 1363 } 1364 1365 /* 6, */ 1366 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) { 1367 /* byte sub/cmp, then BE (unsigned less than or equal) 1368 --> test dst <=u src */ 1369 return unop(Iop_1Uto64, 1370 binop(Iop_CmpLE64U, 1371 binop(Iop_And64, cc_dep1, mkU64(0xFF)), 1372 binop(Iop_And64, cc_dep2, mkU64(0xFF)))); 1373 } 1374 1375 /* 8, 9 */ 1376 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) 1377 && isU64(cc_dep2, 0)) { 1378 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) 1379 --> test dst <s 0 1380 --> (ULong)dst[7] 1381 This is yet another scheme by which gcc figures out if the 1382 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ 1383 /* Note: isU64(cc_dep2, 0) is correct, even though this is 1384 for an 8-bit comparison, since the args to the helper 1385 function are always U64s. */ 1386 return binop(Iop_And64, 1387 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1388 mkU64(1)); 1389 } 1390 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) 1391 && isU64(cc_dep2, 0)) { 1392 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) 1393 --> test !(dst <s 0) 1394 --> (ULong) !dst[7] 1395 */ 1396 return binop(Iop_Xor64, 1397 binop(Iop_And64, 1398 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1399 mkU64(1)), 1400 mkU64(1)); 1401 } 1402 1403 /*---------------- LOGICQ ----------------*/ 1404 1405 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { 1406 /* long long and/or/xor, then Z --> test dst==0 */ 1407 return unop(Iop_1Uto64, 1408 binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); 1409 } 1410 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) { 1411 /* long long and/or/xor, then NZ --> test dst!=0 */ 1412 return unop(Iop_1Uto64, 1413 binop(Iop_CmpNE64, cc_dep1, mkU64(0))); 1414 } 1415 1416 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { 1417 /* long long and/or/xor, then L 1418 LOGIC sets SF and ZF according to the 1419 result and makes OF be zero. L computes SF ^ OF, but 1420 OF is zero, so this reduces to SF -- which will be 1 iff 1421 the result is < signed 0. Hence ... 1422 */ 1423 return unop(Iop_1Uto64, 1424 binop(Iop_CmpLT64S, 1425 cc_dep1, 1426 mkU64(0))); 1427 } 1428 1429 /*---------------- LOGICL ----------------*/ 1430 1431 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { 1432 /* long and/or/xor, then Z --> test dst==0 */ 1433 return unop(Iop_1Uto64, 1434 binop(Iop_CmpEQ32, 1435 unop(Iop_64to32, cc_dep1), 1436 mkU32(0))); 1437 } 1438 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { 1439 /* long and/or/xor, then NZ --> test dst!=0 */ 1440 return unop(Iop_1Uto64, 1441 binop(Iop_CmpNE32, 1442 unop(Iop_64to32, cc_dep1), 1443 mkU32(0))); 1444 } 1445 1446 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { 1447 /* long and/or/xor, then LE 1448 This is pretty subtle. LOGIC sets SF and ZF according to the 1449 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but 1450 OF is zero, so this reduces to SF | ZF -- which will be 1 iff 1451 the result is <=signed 0. Hence ... 1452 */ 1453 return unop(Iop_1Uto64, 1454 binop(Iop_CmpLE32S, 1455 unop(Iop_64to32, cc_dep1), 1456 mkU32(0))); 1457 } 1458 1459 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) { 1460 /* long and/or/xor, then S --> (ULong)result[31] */ 1461 return binop(Iop_And64, 1462 binop(Iop_Shr64, cc_dep1, mkU8(31)), 1463 mkU64(1)); 1464 } 1465 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) { 1466 /* long and/or/xor, then S --> (ULong) ~ result[31] */ 1467 return binop(Iop_Xor64, 1468 binop(Iop_And64, 1469 binop(Iop_Shr64, cc_dep1, mkU8(31)), 1470 mkU64(1)), 1471 mkU64(1)); 1472 } 1473 1474 /*---------------- LOGICW ----------------*/ 1475 1476 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) { 1477 /* word and/or/xor, then Z --> test dst==0 */ 1478 return unop(Iop_1Uto64, 1479 binop(Iop_CmpEQ64, 1480 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), 1481 mkU64(0))); 1482 } 1483 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) { 1484 /* word and/or/xor, then NZ --> test dst!=0 */ 1485 return unop(Iop_1Uto64, 1486 binop(Iop_CmpNE64, 1487 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), 1488 mkU64(0))); 1489 } 1490 1491 /*---------------- LOGICB ----------------*/ 1492 1493 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { 1494 /* byte and/or/xor, then Z --> test dst==0 */ 1495 return unop(Iop_1Uto64, 1496 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), 1497 mkU64(0))); 1498 } 1499 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) { 1500 /* byte and/or/xor, then NZ --> test dst!=0 */ 1501 return unop(Iop_1Uto64, 1502 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)), 1503 mkU64(0))); 1504 } 1505 1506 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { 1507 /* this is an idiom gcc sometimes uses to find out if the top 1508 bit of a byte register is set: eg testb %al,%al; js .. 1509 Since it just depends on the top bit of the byte, extract 1510 that bit and explicitly get rid of all the rest. This 1511 helps memcheck avoid false positives in the case where any 1512 of the other bits in the byte are undefined. */ 1513 /* byte and/or/xor, then S --> (UInt)result[7] */ 1514 return binop(Iop_And64, 1515 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1516 mkU64(1)); 1517 } 1518 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) { 1519 /* byte and/or/xor, then NS --> (UInt)!result[7] */ 1520 return binop(Iop_Xor64, 1521 binop(Iop_And64, 1522 binop(Iop_Shr64,cc_dep1,mkU8(7)), 1523 mkU64(1)), 1524 mkU64(1)); 1525 } 1526 1527 /*---------------- INCB ----------------*/ 1528 1529 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { 1530 /* 8-bit inc, then LE --> sign bit of the arg */ 1531 return binop(Iop_And64, 1532 binop(Iop_Shr64, 1533 binop(Iop_Sub64, cc_dep1, mkU64(1)), 1534 mkU8(7)), 1535 mkU64(1)); 1536 } 1537 1538 /*---------------- INCW ----------------*/ 1539 1540 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { 1541 /* 16-bit inc, then Z --> test dst == 0 */ 1542 return unop(Iop_1Uto64, 1543 binop(Iop_CmpEQ64, 1544 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1545 mkU64(0))); 1546 } 1547 1548 /*---------------- DECL ----------------*/ 1549 1550 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { 1551 /* dec L, then Z --> test dst == 0 */ 1552 return unop(Iop_1Uto64, 1553 binop(Iop_CmpEQ32, 1554 unop(Iop_64to32, cc_dep1), 1555 mkU32(0))); 1556 } 1557 1558 /*---------------- DECW ----------------*/ 1559 1560 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { 1561 /* 16-bit dec, then NZ --> test dst != 0 */ 1562 return unop(Iop_1Uto64, 1563 binop(Iop_CmpNE64, 1564 binop(Iop_Shl64,cc_dep1,mkU8(48)), 1565 mkU64(0))); 1566 } 1567 1568 /*---------------- COPY ----------------*/ 1569 /* This can happen, as a result of amd64 FP compares: "comisd ... ; 1570 jbe" for example. */ 1571 1572 if (isU64(cc_op, AMD64G_CC_OP_COPY) && 1573 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { 1574 /* COPY, then BE --> extract C and Z from dep1, and test (C 1575 or Z == 1). */ 1576 /* COPY, then NBE --> extract C and Z from dep1, and test (C 1577 or Z == 0). */ 1578 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; 1579 return 1580 unop( 1581 Iop_1Uto64, 1582 binop( 1583 Iop_CmpEQ64, 1584 binop( 1585 Iop_And64, 1586 binop( 1587 Iop_Or64, 1588 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1589 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) 1590 ), 1591 mkU64(1) 1592 ), 1593 mkU64(nnn) 1594 ) 1595 ); 1596 } 1597 1598 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { 1599 /* COPY, then B --> extract C dep1, and test (C == 1). */ 1600 return 1601 unop( 1602 Iop_1Uto64, 1603 binop( 1604 Iop_CmpNE64, 1605 binop( 1606 Iop_And64, 1607 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), 1608 mkU64(1) 1609 ), 1610 mkU64(0) 1611 ) 1612 ); 1613 } 1614 1615 if (isU64(cc_op, AMD64G_CC_OP_COPY) 1616 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { 1617 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ 1618 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ 1619 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; 1620 return 1621 unop( 1622 Iop_1Uto64, 1623 binop( 1624 Iop_CmpEQ64, 1625 binop( 1626 Iop_And64, 1627 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), 1628 mkU64(1) 1629 ), 1630 mkU64(nnn) 1631 ) 1632 ); 1633 } 1634 1635 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { 1636 /* COPY, then P --> extract P from dep1, and test (P == 1). */ 1637 return 1638 unop( 1639 Iop_1Uto64, 1640 binop( 1641 Iop_CmpNE64, 1642 binop( 1643 Iop_And64, 1644 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), 1645 mkU64(1) 1646 ), 1647 mkU64(0) 1648 ) 1649 ); 1650 } 1651 1652 return NULL; 1653 } 1654 1655 /* --------- specialising "amd64g_calculate_rflags_c" --------- */ 1656 1657 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { 1658 /* specialise calls to above "calculate_rflags_c" function */ 1659 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; 1660 vassert(arity == 4); 1661 cc_op = args[0]; 1662 cc_dep1 = args[1]; 1663 cc_dep2 = args[2]; 1664 cc_ndep = args[3]; 1665 1666 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { 1667 /* C after sub denotes unsigned less than */ 1668 return unop(Iop_1Uto64, 1669 binop(Iop_CmpLT64U, 1670 cc_dep1, 1671 cc_dep2)); 1672 } 1673 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { 1674 /* C after sub denotes unsigned less than */ 1675 return unop(Iop_1Uto64, 1676 binop(Iop_CmpLT32U, 1677 unop(Iop_64to32, cc_dep1), 1678 unop(Iop_64to32, cc_dep2))); 1679 } 1680 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { 1681 /* C after sub denotes unsigned less than */ 1682 return unop(Iop_1Uto64, 1683 binop(Iop_CmpLT64U, 1684 binop(Iop_And64,cc_dep1,mkU64(0xFF)), 1685 binop(Iop_And64,cc_dep2,mkU64(0xFF)))); 1686 } 1687 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) 1688 || isU64(cc_op, AMD64G_CC_OP_LOGICL) 1689 || isU64(cc_op, AMD64G_CC_OP_LOGICW) 1690 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { 1691 /* cflag after logic is zero */ 1692 return mkU64(0); 1693 } 1694 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) 1695 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { 1696 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ 1697 return cc_ndep; 1698 } 1699 1700 # if 0 1701 if (cc_op->tag == Iex_Const) { 1702 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); 1703 } 1704 # endif 1705 1706 return NULL; 1707 } 1708 1709 # undef unop 1710 # undef binop 1711 # undef mkU64 1712 # undef mkU32 1713 # undef mkU8 1714 1715 return NULL; 1716 } 1717 1718 1719 /*---------------------------------------------------------------*/ 1720 /*--- Supporting functions for x87 FPU activities. ---*/ 1721 /*---------------------------------------------------------------*/ 1722 1723 static inline Bool host_is_little_endian ( void ) 1724 { 1725 UInt x = 0x76543210; 1726 UChar* p = (UChar*)(&x); 1727 return toBool(*p == 0x10); 1728 } 1729 1730 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ 1731 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 1732 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) 1733 { 1734 Bool mantissaIsZero; 1735 Int bexp; 1736 UChar sign; 1737 UChar* f64; 1738 1739 vassert(host_is_little_endian()); 1740 1741 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ 1742 1743 f64 = (UChar*)(&dbl); 1744 sign = toUChar( (f64[7] >> 7) & 1 ); 1745 1746 /* First off, if the tag indicates the register was empty, 1747 return 1,0,sign,1 */ 1748 if (tag == 0) { 1749 /* vex_printf("Empty\n"); */ 1750 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) 1751 | AMD64G_FC_MASK_C0; 1752 } 1753 1754 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 1755 bexp &= 0x7FF; 1756 1757 mantissaIsZero 1758 = toBool( 1759 (f64[6] & 0x0F) == 0 1760 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 1761 ); 1762 1763 /* If both exponent and mantissa are zero, the value is zero. 1764 Return 1,0,sign,0. */ 1765 if (bexp == 0 && mantissaIsZero) { 1766 /* vex_printf("Zero\n"); */ 1767 return AMD64G_FC_MASK_C3 | 0 1768 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1769 } 1770 1771 /* If exponent is zero but mantissa isn't, it's a denormal. 1772 Return 1,1,sign,0. */ 1773 if (bexp == 0 && !mantissaIsZero) { 1774 /* vex_printf("Denormal\n"); */ 1775 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 1776 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1777 } 1778 1779 /* If the exponent is 7FF and the mantissa is zero, this is an infinity. 1780 Return 0,1,sign,1. */ 1781 if (bexp == 0x7FF && mantissaIsZero) { 1782 /* vex_printf("Inf\n"); */ 1783 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) 1784 | AMD64G_FC_MASK_C0; 1785 } 1786 1787 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. 1788 Return 0,0,sign,1. */ 1789 if (bexp == 0x7FF && !mantissaIsZero) { 1790 /* vex_printf("NaN\n"); */ 1791 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; 1792 } 1793 1794 /* Uh, ok, we give up. It must be a normal finite number. 1795 Return 0,1,sign,0. 1796 */ 1797 /* vex_printf("normal\n"); */ 1798 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; 1799 } 1800 1801 1802 /* This is used to implement both 'frstor' and 'fldenv'. The latter 1803 appears to differ from the former only in that the 8 FP registers 1804 themselves are not transferred into the guest state. */ 1805 static 1806 VexEmNote do_put_x87 ( Bool moveRegs, 1807 /*IN*/UChar* x87_state, 1808 /*OUT*/VexGuestAMD64State* vex_state ) 1809 { 1810 Int stno, preg; 1811 UInt tag; 1812 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1813 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1814 Fpu_State* x87 = (Fpu_State*)x87_state; 1815 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; 1816 UInt tagw = x87->env[FP_ENV_TAG]; 1817 UInt fpucw = x87->env[FP_ENV_CTRL]; 1818 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; 1819 VexEmNote ew; 1820 UInt fpround; 1821 ULong pair; 1822 1823 /* Copy registers and tags */ 1824 for (stno = 0; stno < 8; stno++) { 1825 preg = (stno + ftop) & 7; 1826 tag = (tagw >> (2*preg)) & 3; 1827 if (tag == 3) { 1828 /* register is empty */ 1829 /* hmm, if it's empty, does it still get written? Probably 1830 safer to say it does. If we don't, memcheck could get out 1831 of sync, in that it thinks all FP registers are defined by 1832 this helper, but in reality some have not been updated. */ 1833 if (moveRegs) 1834 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 1835 vexTags[preg] = 0; 1836 } else { 1837 /* register is non-empty */ 1838 if (moveRegs) 1839 convert_f80le_to_f64le( &x87->reg[10*stno], 1840 (UChar*)&vexRegs[preg] ); 1841 vexTags[preg] = 1; 1842 } 1843 } 1844 1845 /* stack pointer */ 1846 vex_state->guest_FTOP = ftop; 1847 1848 /* status word */ 1849 vex_state->guest_FC3210 = c3210; 1850 1851 /* handle the control word, setting FPROUND and detecting any 1852 emulation warnings. */ 1853 pair = amd64g_check_fldcw ( (ULong)fpucw ); 1854 fpround = (UInt)pair & 0xFFFFFFFFULL; 1855 ew = (VexEmNote)(pair >> 32); 1856 1857 vex_state->guest_FPROUND = fpround & 3; 1858 1859 /* emulation warnings --> caller */ 1860 return ew; 1861 } 1862 1863 1864 /* Create an x87 FPU state from the guest state, as close as 1865 we can approximate it. */ 1866 static 1867 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, 1868 /*OUT*/UChar* x87_state ) 1869 { 1870 Int i, stno, preg; 1871 UInt tagw; 1872 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 1873 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 1874 Fpu_State* x87 = (Fpu_State*)x87_state; 1875 UInt ftop = vex_state->guest_FTOP; 1876 UInt c3210 = vex_state->guest_FC3210; 1877 1878 for (i = 0; i < 14; i++) 1879 x87->env[i] = 0; 1880 1881 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 1882 x87->env[FP_ENV_STAT] 1883 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 1884 x87->env[FP_ENV_CTRL] 1885 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); 1886 1887 /* Dump the register stack in ST order. */ 1888 tagw = 0; 1889 for (stno = 0; stno < 8; stno++) { 1890 preg = (stno + ftop) & 7; 1891 if (vexTags[preg] == 0) { 1892 /* register is empty */ 1893 tagw |= (3 << (2*preg)); 1894 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1895 &x87->reg[10*stno] ); 1896 } else { 1897 /* register is full. */ 1898 tagw |= (0 << (2*preg)); 1899 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 1900 &x87->reg[10*stno] ); 1901 } 1902 } 1903 x87->env[FP_ENV_TAG] = toUShort(tagw); 1904 } 1905 1906 1907 /* CALLED FROM GENERATED CODE */ 1908 /* DIRTY HELPER (reads guest state, writes guest mem) */ 1909 /* NOTE: only handles 32-bit format (no REX.W on the insn) */ 1910 void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst, 1911 HWord addr ) 1912 { 1913 /* Derived from values obtained from 1914 vendor_id : AuthenticAMD 1915 cpu family : 15 1916 model : 12 1917 model name : AMD Athlon(tm) 64 Processor 3200+ 1918 stepping : 0 1919 cpu MHz : 2200.000 1920 cache size : 512 KB 1921 */ 1922 /* Somewhat roundabout, but at least it's simple. */ 1923 Fpu_State tmp; 1924 UShort* addrS = (UShort*)addr; 1925 UChar* addrC = (UChar*)addr; 1926 UInt mxcsr; 1927 UShort fp_tags; 1928 UInt summary_tags; 1929 Int r, stno; 1930 UShort *srcS, *dstS; 1931 1932 do_get_x87( gst, (UChar*)&tmp ); 1933 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); 1934 1935 /* Now build the proper fxsave image from the x87 image we just 1936 made. */ 1937 1938 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ 1939 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ 1940 1941 /* set addrS[2] in an endian-independent way */ 1942 summary_tags = 0; 1943 fp_tags = tmp.env[FP_ENV_TAG]; 1944 for (r = 0; r < 8; r++) { 1945 if ( ((fp_tags >> (2*r)) & 3) != 3 ) 1946 summary_tags |= (1 << r); 1947 } 1948 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ 1949 addrC[5] = 0; /* pad */ 1950 1951 /* FOP: faulting fpu opcode. From experimentation, the real CPU 1952 does not write this field. (?!) */ 1953 addrS[3] = 0; /* BOGUS */ 1954 1955 /* RIP (Last x87 instruction pointer). From experimentation, the 1956 real CPU does not write this field. (?!) */ 1957 addrS[4] = 0; /* BOGUS */ 1958 addrS[5] = 0; /* BOGUS */ 1959 addrS[6] = 0; /* BOGUS */ 1960 addrS[7] = 0; /* BOGUS */ 1961 1962 /* RDP (Last x87 data pointer). From experimentation, the real CPU 1963 does not write this field. (?!) */ 1964 addrS[8] = 0; /* BOGUS */ 1965 addrS[9] = 0; /* BOGUS */ 1966 addrS[10] = 0; /* BOGUS */ 1967 addrS[11] = 0; /* BOGUS */ 1968 1969 addrS[12] = toUShort(mxcsr); /* MXCSR */ 1970 addrS[13] = toUShort(mxcsr >> 16); 1971 1972 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ 1973 addrS[15] = 0x0000; /* MXCSR mask (hi16) */ 1974 1975 /* Copy in the FP registers, in ST order. */ 1976 for (stno = 0; stno < 8; stno++) { 1977 srcS = (UShort*)(&tmp.reg[10*stno]); 1978 dstS = (UShort*)(&addrS[16 + 8*stno]); 1979 dstS[0] = srcS[0]; 1980 dstS[1] = srcS[1]; 1981 dstS[2] = srcS[2]; 1982 dstS[3] = srcS[3]; 1983 dstS[4] = srcS[4]; 1984 dstS[5] = 0; 1985 dstS[6] = 0; 1986 dstS[7] = 0; 1987 } 1988 1989 /* That's the first 160 bytes of the image done. Now only %xmm0 1990 .. %xmm15 remain to be copied, and we let the generated IR do 1991 that, so as to make Memcheck's definedness flow for the non-XMM 1992 parts independant from that of the all the other control and 1993 status words in the structure. This avoids the false positives 1994 shown in #291310. */ 1995 } 1996 1997 1998 /* CALLED FROM GENERATED CODE */ 1999 /* DIRTY HELPER (writes guest state, reads guest mem) */ 2000 VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst, 2001 HWord addr ) 2002 { 2003 Fpu_State tmp; 2004 VexEmNote warnX87 = EmNote_NONE; 2005 VexEmNote warnXMM = EmNote_NONE; 2006 UShort* addrS = (UShort*)addr; 2007 UChar* addrC = (UChar*)addr; 2008 UShort fp_tags; 2009 Int r, stno, i; 2010 2011 /* Don't restore %xmm0 .. %xmm15, for the same reasons that 2012 amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them. See 2013 comment in that function for details. */ 2014 2015 /* Copy the x87 registers out of the image, into a temporary 2016 Fpu_State struct. */ 2017 for (i = 0; i < 14; i++) tmp.env[i] = 0; 2018 for (i = 0; i < 80; i++) tmp.reg[i] = 0; 2019 /* fill in tmp.reg[0..7] */ 2020 for (stno = 0; stno < 8; stno++) { 2021 UShort* dstS = (UShort*)(&tmp.reg[10*stno]); 2022 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); 2023 dstS[0] = srcS[0]; 2024 dstS[1] = srcS[1]; 2025 dstS[2] = srcS[2]; 2026 dstS[3] = srcS[3]; 2027 dstS[4] = srcS[4]; 2028 } 2029 /* fill in tmp.env[0..13] */ 2030 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ 2031 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ 2032 2033 fp_tags = 0; 2034 for (r = 0; r < 8; r++) { 2035 if (addrC[4] & (1<<r)) 2036 fp_tags |= (0 << (2*r)); /* EMPTY */ 2037 else 2038 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ 2039 } 2040 tmp.env[FP_ENV_TAG] = fp_tags; 2041 2042 /* Now write 'tmp' into the guest state. */ 2043 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst ); 2044 2045 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF) 2046 | ((((UInt)addrS[13]) & 0xFFFF) << 16); 2047 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 ); 2048 2049 warnXMM = (VexEmNote)(w64 >> 32); 2050 2051 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; 2052 } 2053 2054 /* Prefer an X87 emwarn over an XMM one, if both exist. */ 2055 if (warnX87 != EmNote_NONE) 2056 return warnX87; 2057 else 2058 return warnXMM; 2059 } 2060 2061 2062 /* DIRTY HELPER (writes guest state) */ 2063 /* Initialise the x87 FPU state as per 'finit'. */ 2064 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) 2065 { 2066 Int i; 2067 gst->guest_FTOP = 0; 2068 for (i = 0; i < 8; i++) { 2069 gst->guest_FPTAG[i] = 0; /* empty */ 2070 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ 2071 } 2072 gst->guest_FPROUND = (ULong)Irrm_NEAREST; 2073 gst->guest_FC3210 = 0; 2074 } 2075 2076 2077 /* CALLED FROM GENERATED CODE */ 2078 /* DIRTY HELPER (reads guest memory) */ 2079 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU ) 2080 { 2081 ULong f64; 2082 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 ); 2083 return f64; 2084 } 2085 2086 /* CALLED FROM GENERATED CODE */ 2087 /* DIRTY HELPER (writes guest memory) */ 2088 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 ) 2089 { 2090 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU ); 2091 } 2092 2093 2094 /* CALLED FROM GENERATED CODE */ 2095 /* CLEAN HELPER */ 2096 /* mxcsr[15:0] contains a SSE native format MXCSR value. 2097 Extract from it the required SSEROUND value and any resulting 2098 emulation warning, and return (warn << 32) | sseround value. 2099 */ 2100 ULong amd64g_check_ldmxcsr ( ULong mxcsr ) 2101 { 2102 /* Decide on a rounding mode. mxcsr[14:13] holds it. */ 2103 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 2104 ULong rmode = (mxcsr >> 13) & 3; 2105 2106 /* Detect any required emulation warnings. */ 2107 VexEmNote ew = EmNote_NONE; 2108 2109 if ((mxcsr & 0x1F80) != 0x1F80) { 2110 /* unmasked exceptions! */ 2111 ew = EmWarn_X86_sseExns; 2112 } 2113 else 2114 if (mxcsr & (1<<15)) { 2115 /* FZ is set */ 2116 ew = EmWarn_X86_fz; 2117 } 2118 else 2119 if (mxcsr & (1<<6)) { 2120 /* DAZ is set */ 2121 ew = EmWarn_X86_daz; 2122 } 2123 2124 return (((ULong)ew) << 32) | ((ULong)rmode); 2125 } 2126 2127 2128 /* CALLED FROM GENERATED CODE */ 2129 /* CLEAN HELPER */ 2130 /* Given sseround as an IRRoundingMode value, create a suitable SSE 2131 native format MXCSR value. */ 2132 ULong amd64g_create_mxcsr ( ULong sseround ) 2133 { 2134 sseround &= 3; 2135 return 0x1F80 | (sseround << 13); 2136 } 2137 2138 2139 /* CLEAN HELPER */ 2140 /* fpucw[15:0] contains a x87 native format FPU control word. 2141 Extract from it the required FPROUND value and any resulting 2142 emulation warning, and return (warn << 32) | fpround value. 2143 */ 2144 ULong amd64g_check_fldcw ( ULong fpucw ) 2145 { 2146 /* Decide on a rounding mode. fpucw[11:10] holds it. */ 2147 /* NOTE, encoded exactly as per enum IRRoundingMode. */ 2148 ULong rmode = (fpucw >> 10) & 3; 2149 2150 /* Detect any required emulation warnings. */ 2151 VexEmNote ew = EmNote_NONE; 2152 2153 if ((fpucw & 0x3F) != 0x3F) { 2154 /* unmasked exceptions! */ 2155 ew = EmWarn_X86_x87exns; 2156 } 2157 else 2158 if (((fpucw >> 8) & 3) != 3) { 2159 /* unsupported precision */ 2160 ew = EmWarn_X86_x87precision; 2161 } 2162 2163 return (((ULong)ew) << 32) | ((ULong)rmode); 2164 } 2165 2166 2167 /* CLEAN HELPER */ 2168 /* Given fpround as an IRRoundingMode value, create a suitable x87 2169 native format FPU control word. */ 2170 ULong amd64g_create_fpucw ( ULong fpround ) 2171 { 2172 fpround &= 3; 2173 return 0x037F | (fpround << 10); 2174 } 2175 2176 2177 /* This is used to implement 'fldenv'. 2178 Reads 28 bytes at x87_state[0 .. 27]. */ 2179 /* CALLED FROM GENERATED CODE */ 2180 /* DIRTY HELPER */ 2181 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, 2182 /*IN*/HWord x87_state) 2183 { 2184 return do_put_x87( False, (UChar*)x87_state, vex_state ); 2185 } 2186 2187 2188 /* CALLED FROM GENERATED CODE */ 2189 /* DIRTY HELPER */ 2190 /* Create an x87 FPU env from the guest state, as close as we can 2191 approximate it. Writes 28 bytes at x87_state[0..27]. */ 2192 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, 2193 /*OUT*/HWord x87_state ) 2194 { 2195 Int i, stno, preg; 2196 UInt tagw; 2197 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 2198 Fpu_State* x87 = (Fpu_State*)x87_state; 2199 UInt ftop = vex_state->guest_FTOP; 2200 ULong c3210 = vex_state->guest_FC3210; 2201 2202 for (i = 0; i < 14; i++) 2203 x87->env[i] = 0; 2204 2205 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; 2206 x87->env[FP_ENV_STAT] 2207 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); 2208 x87->env[FP_ENV_CTRL] 2209 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); 2210 2211 /* Compute the x87 tag word. */ 2212 tagw = 0; 2213 for (stno = 0; stno < 8; stno++) { 2214 preg = (stno + ftop) & 7; 2215 if (vexTags[preg] == 0) { 2216 /* register is empty */ 2217 tagw |= (3 << (2*preg)); 2218 } else { 2219 /* register is full. */ 2220 tagw |= (0 << (2*preg)); 2221 } 2222 } 2223 x87->env[FP_ENV_TAG] = toUShort(tagw); 2224 2225 /* We don't dump the x87 registers, tho. */ 2226 } 2227 2228 2229 /* This is used to implement 'fnsave'. 2230 Writes 108 bytes at x87_state[0 .. 107]. */ 2231 /* CALLED FROM GENERATED CODE */ 2232 /* DIRTY HELPER */ 2233 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state, 2234 /*OUT*/HWord x87_state) 2235 { 2236 do_get_x87( vex_state, (UChar*)x87_state ); 2237 } 2238 2239 2240 /* This is used to implement 'fnsaves'. 2241 Writes 94 bytes at x87_state[0 .. 93]. */ 2242 /* CALLED FROM GENERATED CODE */ 2243 /* DIRTY HELPER */ 2244 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state, 2245 /*OUT*/HWord x87_state) 2246 { 2247 Int i, stno, preg; 2248 UInt tagw; 2249 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 2250 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 2251 Fpu_State_16* x87 = (Fpu_State_16*)x87_state; 2252 UInt ftop = vex_state->guest_FTOP; 2253 UInt c3210 = vex_state->guest_FC3210; 2254 2255 for (i = 0; i < 7; i++) 2256 x87->env[i] = 0; 2257 2258 x87->env[FPS_ENV_STAT] 2259 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); 2260 x87->env[FPS_ENV_CTRL] 2261 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); 2262 2263 /* Dump the register stack in ST order. */ 2264 tagw = 0; 2265 for (stno = 0; stno < 8; stno++) { 2266 preg = (stno + ftop) & 7; 2267 if (vexTags[preg] == 0) { 2268 /* register is empty */ 2269 tagw |= (3 << (2*preg)); 2270 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 2271 &x87->reg[10*stno] ); 2272 } else { 2273 /* register is full. */ 2274 tagw |= (0 << (2*preg)); 2275 convert_f64le_to_f80le( (UChar*)&vexRegs[preg], 2276 &x87->reg[10*stno] ); 2277 } 2278 } 2279 x87->env[FPS_ENV_TAG] = toUShort(tagw); 2280 } 2281 2282 2283 /* This is used to implement 'frstor'. 2284 Reads 108 bytes at x87_state[0 .. 107]. */ 2285 /* CALLED FROM GENERATED CODE */ 2286 /* DIRTY HELPER */ 2287 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state, 2288 /*IN*/HWord x87_state) 2289 { 2290 return do_put_x87( True, (UChar*)x87_state, vex_state ); 2291 } 2292 2293 2294 /* This is used to implement 'frstors'. 2295 Reads 94 bytes at x87_state[0 .. 93]. */ 2296 /* CALLED FROM GENERATED CODE */ 2297 /* DIRTY HELPER */ 2298 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state, 2299 /*IN*/HWord x87_state) 2300 { 2301 Int stno, preg; 2302 UInt tag; 2303 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); 2304 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); 2305 Fpu_State_16* x87 = (Fpu_State_16*)x87_state; 2306 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7; 2307 UInt tagw = x87->env[FPS_ENV_TAG]; 2308 UInt fpucw = x87->env[FPS_ENV_CTRL]; 2309 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700; 2310 VexEmNote ew; 2311 UInt fpround; 2312 ULong pair; 2313 2314 /* Copy registers and tags */ 2315 for (stno = 0; stno < 8; stno++) { 2316 preg = (stno + ftop) & 7; 2317 tag = (tagw >> (2*preg)) & 3; 2318 if (tag == 3) { 2319 /* register is empty */ 2320 /* hmm, if it's empty, does it still get written? Probably 2321 safer to say it does. If we don't, memcheck could get out 2322 of sync, in that it thinks all FP registers are defined by 2323 this helper, but in reality some have not been updated. */ 2324 vexRegs[preg] = 0; /* IEEE754 64-bit zero */ 2325 vexTags[preg] = 0; 2326 } else { 2327 /* register is non-empty */ 2328 convert_f80le_to_f64le( &x87->reg[10*stno], 2329 (UChar*)&vexRegs[preg] ); 2330 vexTags[preg] = 1; 2331 } 2332 } 2333 2334 /* stack pointer */ 2335 vex_state->guest_FTOP = ftop; 2336 2337 /* status word */ 2338 vex_state->guest_FC3210 = c3210; 2339 2340 /* handle the control word, setting FPROUND and detecting any 2341 emulation warnings. */ 2342 pair = amd64g_check_fldcw ( (ULong)fpucw ); 2343 fpround = (UInt)pair & 0xFFFFFFFFULL; 2344 ew = (VexEmNote)(pair >> 32); 2345 2346 vex_state->guest_FPROUND = fpround & 3; 2347 2348 /* emulation warnings --> caller */ 2349 return ew; 2350 } 2351 2352 2353 /*---------------------------------------------------------------*/ 2354 /*--- Misc integer helpers, including rotates and CPUID. ---*/ 2355 /*---------------------------------------------------------------*/ 2356 2357 /* Claim to be the following CPU, which is probably representative of 2358 the lowliest (earliest) amd64 offerings. It can do neither sse3 2359 nor cx16. 2360 2361 vendor_id : AuthenticAMD 2362 cpu family : 15 2363 model : 5 2364 model name : AMD Opteron (tm) Processor 848 2365 stepping : 10 2366 cpu MHz : 1797.682 2367 cache size : 1024 KB 2368 fpu : yes 2369 fpu_exception : yes 2370 cpuid level : 1 2371 wp : yes 2372 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2373 mtrr pge mca cmov pat pse36 clflush mmx fxsr 2374 sse sse2 syscall nx mmxext lm 3dnowext 3dnow 2375 bogomips : 3600.62 2376 TLB size : 1088 4K pages 2377 clflush size : 64 2378 cache_alignment : 64 2379 address sizes : 40 bits physical, 48 bits virtual 2380 power management: ts fid vid ttp 2381 2382 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact 2383 we don't support them. See #291568. 3dnow is 80000001.EDX.31 2384 and 3dnowext is 80000001.EDX.30. 2385 */ 2386 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) 2387 { 2388 # define SET_ABCD(_a,_b,_c,_d) \ 2389 do { st->guest_RAX = (ULong)(_a); \ 2390 st->guest_RBX = (ULong)(_b); \ 2391 st->guest_RCX = (ULong)(_c); \ 2392 st->guest_RDX = (ULong)(_d); \ 2393 } while (0) 2394 2395 switch (0xFFFFFFFF & st->guest_RAX) { 2396 case 0x00000000: 2397 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); 2398 break; 2399 case 0x00000001: 2400 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); 2401 break; 2402 case 0x80000000: 2403 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); 2404 break; 2405 case 0x80000001: 2406 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is 2407 the original it-is-supported value that the h/w provides. 2408 See #291568. */ 2409 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/ 2410 0x21d3fbff); 2411 break; 2412 case 0x80000002: 2413 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); 2414 break; 2415 case 0x80000003: 2416 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); 2417 break; 2418 case 0x80000004: 2419 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2420 break; 2421 case 0x80000005: 2422 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); 2423 break; 2424 case 0x80000006: 2425 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); 2426 break; 2427 case 0x80000007: 2428 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); 2429 break; 2430 case 0x80000008: 2431 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); 2432 break; 2433 default: 2434 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2435 break; 2436 } 2437 # undef SET_ABCD 2438 } 2439 2440 2441 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 2442 capable. 2443 2444 vendor_id : GenuineIntel 2445 cpu family : 6 2446 model : 15 2447 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz 2448 stepping : 6 2449 cpu MHz : 2394.000 2450 cache size : 4096 KB 2451 physical id : 0 2452 siblings : 2 2453 core id : 0 2454 cpu cores : 2 2455 fpu : yes 2456 fpu_exception : yes 2457 cpuid level : 10 2458 wp : yes 2459 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2460 mtrr pge mca cmov pat pse36 clflush dts acpi 2461 mmx fxsr sse sse2 ss ht tm syscall nx lm 2462 constant_tsc pni monitor ds_cpl vmx est tm2 2463 cx16 xtpr lahf_lm 2464 bogomips : 4798.78 2465 clflush size : 64 2466 cache_alignment : 64 2467 address sizes : 36 bits physical, 48 bits virtual 2468 power management: 2469 */ 2470 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) 2471 { 2472 # define SET_ABCD(_a,_b,_c,_d) \ 2473 do { st->guest_RAX = (ULong)(_a); \ 2474 st->guest_RBX = (ULong)(_b); \ 2475 st->guest_RCX = (ULong)(_c); \ 2476 st->guest_RDX = (ULong)(_d); \ 2477 } while (0) 2478 2479 switch (0xFFFFFFFF & st->guest_RAX) { 2480 case 0x00000000: 2481 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); 2482 break; 2483 case 0x00000001: 2484 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); 2485 break; 2486 case 0x00000002: 2487 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); 2488 break; 2489 case 0x00000003: 2490 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2491 break; 2492 case 0x00000004: { 2493 switch (0xFFFFFFFF & st->guest_RCX) { 2494 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, 2495 0x0000003f, 0x00000001); break; 2496 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, 2497 0x0000003f, 0x00000001); break; 2498 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, 2499 0x00000fff, 0x00000001); break; 2500 default: SET_ABCD(0x00000000, 0x00000000, 2501 0x00000000, 0x00000000); break; 2502 } 2503 break; 2504 } 2505 case 0x00000005: 2506 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); 2507 break; 2508 case 0x00000006: 2509 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); 2510 break; 2511 case 0x00000007: 2512 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2513 break; 2514 case 0x00000008: 2515 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); 2516 break; 2517 case 0x00000009: 2518 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2519 break; 2520 case 0x0000000a: 2521 unhandled_eax_value: 2522 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); 2523 break; 2524 case 0x80000000: 2525 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2526 break; 2527 case 0x80000001: 2528 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); 2529 break; 2530 case 0x80000002: 2531 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2532 break; 2533 case 0x80000003: 2534 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); 2535 break; 2536 case 0x80000004: 2537 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); 2538 break; 2539 case 0x80000005: 2540 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2541 break; 2542 case 0x80000006: 2543 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); 2544 break; 2545 case 0x80000007: 2546 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2547 break; 2548 case 0x80000008: 2549 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2550 break; 2551 default: 2552 goto unhandled_eax_value; 2553 } 2554 # undef SET_ABCD 2555 } 2556 2557 2558 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16 2559 capable. 2560 2561 vendor_id : GenuineIntel 2562 cpu family : 6 2563 model : 37 2564 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz 2565 stepping : 2 2566 cpu MHz : 3334.000 2567 cache size : 4096 KB 2568 physical id : 0 2569 siblings : 4 2570 core id : 0 2571 cpu cores : 2 2572 apicid : 0 2573 initial apicid : 0 2574 fpu : yes 2575 fpu_exception : yes 2576 cpuid level : 11 2577 wp : yes 2578 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2579 mtrr pge mca cmov pat pse36 clflush dts acpi 2580 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp 2581 lm constant_tsc arch_perfmon pebs bts rep_good 2582 xtopology nonstop_tsc aperfmperf pni pclmulqdq 2583 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 2584 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida 2585 arat tpr_shadow vnmi flexpriority ept vpid 2586 bogomips : 6957.57 2587 clflush size : 64 2588 cache_alignment : 64 2589 address sizes : 36 bits physical, 48 bits virtual 2590 power management: 2591 */ 2592 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ) 2593 { 2594 # define SET_ABCD(_a,_b,_c,_d) \ 2595 do { st->guest_RAX = (ULong)(_a); \ 2596 st->guest_RBX = (ULong)(_b); \ 2597 st->guest_RCX = (ULong)(_c); \ 2598 st->guest_RDX = (ULong)(_d); \ 2599 } while (0) 2600 2601 UInt old_eax = (UInt)st->guest_RAX; 2602 UInt old_ecx = (UInt)st->guest_RCX; 2603 2604 switch (old_eax) { 2605 case 0x00000000: 2606 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69); 2607 break; 2608 case 0x00000001: 2609 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff); 2610 break; 2611 case 0x00000002: 2612 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c); 2613 break; 2614 case 0x00000003: 2615 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2616 break; 2617 case 0x00000004: 2618 switch (old_ecx) { 2619 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, 2620 0x0000003f, 0x00000000); break; 2621 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f, 2622 0x0000007f, 0x00000000); break; 2623 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, 2624 0x000001ff, 0x00000000); break; 2625 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, 2626 0x00000fff, 0x00000002); break; 2627 default: SET_ABCD(0x00000000, 0x00000000, 2628 0x00000000, 0x00000000); break; 2629 } 2630 break; 2631 case 0x00000005: 2632 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); 2633 break; 2634 case 0x00000006: 2635 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000); 2636 break; 2637 case 0x00000007: 2638 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2639 break; 2640 case 0x00000008: 2641 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2642 break; 2643 case 0x00000009: 2644 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2645 break; 2646 case 0x0000000a: 2647 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603); 2648 break; 2649 case 0x0000000b: 2650 switch (old_ecx) { 2651 case 0x00000000: 2652 SET_ABCD(0x00000001, 0x00000002, 2653 0x00000100, 0x00000000); break; 2654 case 0x00000001: 2655 SET_ABCD(0x00000004, 0x00000004, 2656 0x00000201, 0x00000000); break; 2657 default: 2658 SET_ABCD(0x00000000, 0x00000000, 2659 old_ecx, 0x00000000); break; 2660 } 2661 break; 2662 case 0x0000000c: 2663 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2664 break; 2665 case 0x0000000d: 2666 switch (old_ecx) { 2667 case 0x00000000: SET_ABCD(0x00000001, 0x00000002, 2668 0x00000100, 0x00000000); break; 2669 case 0x00000001: SET_ABCD(0x00000004, 0x00000004, 2670 0x00000201, 0x00000000); break; 2671 default: SET_ABCD(0x00000000, 0x00000000, 2672 old_ecx, 0x00000000); break; 2673 } 2674 break; 2675 case 0x80000000: 2676 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2677 break; 2678 case 0x80000001: 2679 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); 2680 break; 2681 case 0x80000002: 2682 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); 2683 break; 2684 case 0x80000003: 2685 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020); 2686 break; 2687 case 0x80000004: 2688 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847); 2689 break; 2690 case 0x80000005: 2691 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2692 break; 2693 case 0x80000006: 2694 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); 2695 break; 2696 case 0x80000007: 2697 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); 2698 break; 2699 case 0x80000008: 2700 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2701 break; 2702 default: 2703 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); 2704 break; 2705 } 2706 # undef SET_ABCD 2707 } 2708 2709 2710 /* Claim to be the following CPU (4 x ...), which is AVX and cx16 2711 capable. Plus (kludge!) it "supports" HTM. 2712 2713 vendor_id : GenuineIntel 2714 cpu family : 6 2715 model : 42 2716 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz 2717 stepping : 7 2718 cpu MHz : 1600.000 2719 cache size : 6144 KB 2720 physical id : 0 2721 siblings : 4 2722 core id : 3 2723 cpu cores : 4 2724 apicid : 6 2725 initial apicid : 6 2726 fpu : yes 2727 fpu_exception : yes 2728 cpuid level : 13 2729 wp : yes 2730 flags : fpu vme de pse tsc msr pae mce cx8 apic sep 2731 mtrr pge mca cmov pat pse36 clflush dts acpi 2732 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp 2733 lm constant_tsc arch_perfmon pebs bts rep_good 2734 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq 2735 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 2736 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx 2737 lahf_lm ida arat epb xsaveopt pln pts dts 2738 tpr_shadow vnmi flexpriority ept vpid 2739 2740 bogomips : 5768.94 2741 clflush size : 64 2742 cache_alignment : 64 2743 address sizes : 36 bits physical, 48 bits virtual 2744 power management: 2745 */ 2746 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st ) 2747 { 2748 # define SET_ABCD(_a,_b,_c,_d) \ 2749 do { st->guest_RAX = (ULong)(_a); \ 2750 st->guest_RBX = (ULong)(_b); \ 2751 st->guest_RCX = (ULong)(_c); \ 2752 st->guest_RDX = (ULong)(_d); \ 2753 } while (0) 2754 2755 UInt old_eax = (UInt)st->guest_RAX; 2756 UInt old_ecx = (UInt)st->guest_RCX; 2757 2758 switch (old_eax) { 2759 case 0x00000000: 2760 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69); 2761 break; 2762 case 0x00000001: 2763 SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff); 2764 break; 2765 case 0x00000002: 2766 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000); 2767 break; 2768 case 0x00000003: 2769 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2770 break; 2771 case 0x00000004: 2772 switch (old_ecx) { 2773 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, 2774 0x0000003f, 0x00000000); break; 2775 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f, 2776 0x0000003f, 0x00000000); break; 2777 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, 2778 0x000001ff, 0x00000000); break; 2779 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f, 2780 0x00001fff, 0x00000006); break; 2781 default: SET_ABCD(0x00000000, 0x00000000, 2782 0x00000000, 0x00000000); break; 2783 } 2784 break; 2785 case 0x00000005: 2786 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); 2787 break; 2788 case 0x00000006: 2789 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000); 2790 break; 2791 case 0x00000007: 2792 SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000); 2793 break; 2794 case 0x00000008: 2795 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2796 break; 2797 case 0x00000009: 2798 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2799 break; 2800 case 0x0000000a: 2801 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603); 2802 break; 2803 case 0x0000000b: 2804 switch (old_ecx) { 2805 case 0x00000000: 2806 SET_ABCD(0x00000001, 0x00000001, 2807 0x00000100, 0x00000000); break; 2808 case 0x00000001: 2809 SET_ABCD(0x00000004, 0x00000004, 2810 0x00000201, 0x00000000); break; 2811 default: 2812 SET_ABCD(0x00000000, 0x00000000, 2813 old_ecx, 0x00000000); break; 2814 } 2815 break; 2816 case 0x0000000c: 2817 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2818 break; 2819 case 0x0000000d: 2820 switch (old_ecx) { 2821 case 0x00000000: SET_ABCD(0x00000007, 0x00000340, 2822 0x00000340, 0x00000000); break; 2823 case 0x00000001: SET_ABCD(0x00000001, 0x00000000, 2824 0x00000000, 0x00000000); break; 2825 case 0x00000002: SET_ABCD(0x00000100, 0x00000240, 2826 0x00000000, 0x00000000); break; 2827 default: SET_ABCD(0x00000000, 0x00000000, 2828 0x00000000, 0x00000000); break; 2829 } 2830 break; 2831 case 0x0000000e: 2832 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); 2833 break; 2834 case 0x0000000f: 2835 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); 2836 break; 2837 case 0x80000000: 2838 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); 2839 break; 2840 case 0x80000001: 2841 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); 2842 break; 2843 case 0x80000002: 2844 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c); 2845 break; 2846 case 0x80000003: 2847 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d); 2848 break; 2849 case 0x80000004: 2850 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847); 2851 break; 2852 case 0x80000005: 2853 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); 2854 break; 2855 case 0x80000006: 2856 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); 2857 break; 2858 case 0x80000007: 2859 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); 2860 break; 2861 case 0x80000008: 2862 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); 2863 break; 2864 default: 2865 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); 2866 break; 2867 } 2868 # undef SET_ABCD 2869 } 2870 2871 2872 ULong amd64g_calculate_RCR ( ULong arg, 2873 ULong rot_amt, 2874 ULong rflags_in, 2875 Long szIN ) 2876 { 2877 Bool wantRflags = toBool(szIN < 0); 2878 ULong sz = wantRflags ? (-szIN) : szIN; 2879 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 2880 ULong cf=0, of=0, tempcf; 2881 2882 switch (sz) { 2883 case 8: 2884 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2885 of = ((arg >> 63) ^ cf) & 1; 2886 while (tempCOUNT > 0) { 2887 tempcf = arg & 1; 2888 arg = (arg >> 1) | (cf << 63); 2889 cf = tempcf; 2890 tempCOUNT--; 2891 } 2892 break; 2893 case 4: 2894 while (tempCOUNT >= 33) tempCOUNT -= 33; 2895 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2896 of = ((arg >> 31) ^ cf) & 1; 2897 while (tempCOUNT > 0) { 2898 tempcf = arg & 1; 2899 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); 2900 cf = tempcf; 2901 tempCOUNT--; 2902 } 2903 break; 2904 case 2: 2905 while (tempCOUNT >= 17) tempCOUNT -= 17; 2906 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2907 of = ((arg >> 15) ^ cf) & 1; 2908 while (tempCOUNT > 0) { 2909 tempcf = arg & 1; 2910 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); 2911 cf = tempcf; 2912 tempCOUNT--; 2913 } 2914 break; 2915 case 1: 2916 while (tempCOUNT >= 9) tempCOUNT -= 9; 2917 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2918 of = ((arg >> 7) ^ cf) & 1; 2919 while (tempCOUNT > 0) { 2920 tempcf = arg & 1; 2921 arg = ((arg >> 1) & 0x7FULL) | (cf << 7); 2922 cf = tempcf; 2923 tempCOUNT--; 2924 } 2925 break; 2926 default: 2927 vpanic("calculate_RCR(amd64g): invalid size"); 2928 } 2929 2930 cf &= 1; 2931 of &= 1; 2932 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 2933 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 2934 2935 /* caller can ask to have back either the resulting flags or 2936 resulting value, but not both */ 2937 return wantRflags ? rflags_in : arg; 2938 } 2939 2940 ULong amd64g_calculate_RCL ( ULong arg, 2941 ULong rot_amt, 2942 ULong rflags_in, 2943 Long szIN ) 2944 { 2945 Bool wantRflags = toBool(szIN < 0); 2946 ULong sz = wantRflags ? (-szIN) : szIN; 2947 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); 2948 ULong cf=0, of=0, tempcf; 2949 2950 switch (sz) { 2951 case 8: 2952 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2953 while (tempCOUNT > 0) { 2954 tempcf = (arg >> 63) & 1; 2955 arg = (arg << 1) | (cf & 1); 2956 cf = tempcf; 2957 tempCOUNT--; 2958 } 2959 of = ((arg >> 63) ^ cf) & 1; 2960 break; 2961 case 4: 2962 while (tempCOUNT >= 33) tempCOUNT -= 33; 2963 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2964 while (tempCOUNT > 0) { 2965 tempcf = (arg >> 31) & 1; 2966 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); 2967 cf = tempcf; 2968 tempCOUNT--; 2969 } 2970 of = ((arg >> 31) ^ cf) & 1; 2971 break; 2972 case 2: 2973 while (tempCOUNT >= 17) tempCOUNT -= 17; 2974 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2975 while (tempCOUNT > 0) { 2976 tempcf = (arg >> 15) & 1; 2977 arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); 2978 cf = tempcf; 2979 tempCOUNT--; 2980 } 2981 of = ((arg >> 15) ^ cf) & 1; 2982 break; 2983 case 1: 2984 while (tempCOUNT >= 9) tempCOUNT -= 9; 2985 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; 2986 while (tempCOUNT > 0) { 2987 tempcf = (arg >> 7) & 1; 2988 arg = 0xFFULL & ((arg << 1) | (cf & 1)); 2989 cf = tempcf; 2990 tempCOUNT--; 2991 } 2992 of = ((arg >> 7) ^ cf) & 1; 2993 break; 2994 default: 2995 vpanic("calculate_RCL(amd64g): invalid size"); 2996 } 2997 2998 cf &= 1; 2999 of &= 1; 3000 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); 3001 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); 3002 3003 return wantRflags ? rflags_in : arg; 3004 } 3005 3006 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+) 3007 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25 3008 */ 3009 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which) 3010 { 3011 ULong hi, lo, tmp, A[16]; 3012 3013 A[0] = 0; A[1] = a; 3014 A[2] = A[1] << 1; A[3] = A[2] ^ a; 3015 A[4] = A[2] << 1; A[5] = A[4] ^ a; 3016 A[6] = A[3] << 1; A[7] = A[6] ^ a; 3017 A[8] = A[4] << 1; A[9] = A[8] ^ a; 3018 A[10] = A[5] << 1; A[11] = A[10] ^ a; 3019 A[12] = A[6] << 1; A[13] = A[12] ^ a; 3020 A[14] = A[7] << 1; A[15] = A[14] ^ a; 3021 3022 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15]; 3023 hi = lo >> 56; 3024 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15]; 3025 hi = (hi << 8) | (lo >> 56); 3026 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15]; 3027 hi = (hi << 8) | (lo >> 56); 3028 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15]; 3029 hi = (hi << 8) | (lo >> 56); 3030 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15]; 3031 hi = (hi << 8) | (lo >> 56); 3032 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15]; 3033 hi = (hi << 8) | (lo >> 56); 3034 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15]; 3035 hi = (hi << 8) | (lo >> 56); 3036 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15]; 3037 3038 ULong m0 = -1; 3039 m0 /= 255; 3040 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp; 3041 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp; 3042 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp; 3043 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp; 3044 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp; 3045 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp; 3046 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp; 3047 3048 return which ? hi : lo; 3049 } 3050 3051 3052 /* CALLED FROM GENERATED CODE */ 3053 /* DIRTY HELPER (non-referentially-transparent) */ 3054 /* Horrible hack. On non-amd64 platforms, return 1. */ 3055 ULong amd64g_dirtyhelper_RDTSC ( void ) 3056 { 3057 # if defined(__x86_64__) 3058 UInt eax, edx; 3059 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); 3060 return (((ULong)edx) << 32) | ((ULong)eax); 3061 # else 3062 return 1ULL; 3063 # endif 3064 } 3065 3066 /* CALLED FROM GENERATED CODE */ 3067 /* DIRTY HELPER (non-referentially-transparent) */ 3068 /* Horrible hack. On non-amd64 platforms, return 1. */ 3069 /* This uses a different calling convention from _RDTSC just above 3070 only because of the difficulty of returning 96 bits from a C 3071 function -- RDTSC returns 64 bits and so is simple by comparison, 3072 on amd64. */ 3073 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ) 3074 { 3075 # if defined(__x86_64__) 3076 UInt eax, ecx, edx; 3077 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx)); 3078 st->guest_RAX = (ULong)eax; 3079 st->guest_RCX = (ULong)ecx; 3080 st->guest_RDX = (ULong)edx; 3081 # else 3082 /* Do nothing. */ 3083 # endif 3084 } 3085 3086 /* CALLED FROM GENERATED CODE */ 3087 /* DIRTY HELPER (non-referentially-transparent) */ 3088 /* Horrible hack. On non-amd64 platforms, return 0. */ 3089 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) 3090 { 3091 # if defined(__x86_64__) 3092 ULong r = 0; 3093 portno &= 0xFFFF; 3094 switch (sz) { 3095 case 4: 3096 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" 3097 : "=a" (r) : "Nd" (portno)); 3098 break; 3099 case 2: 3100 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" 3101 : "=a" (r) : "Nd" (portno)); 3102 break; 3103 case 1: 3104 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" 3105 : "=a" (r) : "Nd" (portno)); 3106 break; 3107 default: 3108 break; /* note: no 64-bit version of insn exists */ 3109 } 3110 return r; 3111 # else 3112 return 0; 3113 # endif 3114 } 3115 3116 3117 /* CALLED FROM GENERATED CODE */ 3118 /* DIRTY HELPER (non-referentially-transparent) */ 3119 /* Horrible hack. On non-amd64 platforms, do nothing. */ 3120 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) 3121 { 3122 # if defined(__x86_64__) 3123 portno &= 0xFFFF; 3124 switch (sz) { 3125 case 4: 3126 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" 3127 : : "a" (data), "Nd" (portno)); 3128 break; 3129 case 2: 3130 __asm__ __volatile__("outw %w0, %w1" 3131 : : "a" (data), "Nd" (portno)); 3132 break; 3133 case 1: 3134 __asm__ __volatile__("outb %b0, %w1" 3135 : : "a" (data), "Nd" (portno)); 3136 break; 3137 default: 3138 break; /* note: no 64-bit version of insn exists */ 3139 } 3140 # else 3141 /* do nothing */ 3142 # endif 3143 } 3144 3145 /* CALLED FROM GENERATED CODE */ 3146 /* DIRTY HELPER (non-referentially-transparent) */ 3147 /* Horrible hack. On non-amd64 platforms, do nothing. */ 3148 /* op = 0: call the native SGDT instruction. 3149 op = 1: call the native SIDT instruction. 3150 */ 3151 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) { 3152 # if defined(__x86_64__) 3153 switch (op) { 3154 case 0: 3155 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); 3156 break; 3157 case 1: 3158 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); 3159 break; 3160 default: 3161 vpanic("amd64g_dirtyhelper_SxDT"); 3162 } 3163 # else 3164 /* do nothing */ 3165 UChar* p = (UChar*)address; 3166 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; 3167 p[6] = p[7] = p[8] = p[9] = 0; 3168 # endif 3169 } 3170 3171 /*---------------------------------------------------------------*/ 3172 /*--- Helpers for MMX/SSE/SSE2. ---*/ 3173 /*---------------------------------------------------------------*/ 3174 3175 static inline UChar abdU8 ( UChar xx, UChar yy ) { 3176 return toUChar(xx>yy ? xx-yy : yy-xx); 3177 } 3178 3179 static inline ULong mk32x2 ( UInt w1, UInt w0 ) { 3180 return (((ULong)w1) << 32) | ((ULong)w0); 3181 } 3182 3183 static inline UShort sel16x4_3 ( ULong w64 ) { 3184 UInt hi32 = toUInt(w64 >> 32); 3185 return toUShort(hi32 >> 16); 3186 } 3187 static inline UShort sel16x4_2 ( ULong w64 ) { 3188 UInt hi32 = toUInt(w64 >> 32); 3189 return toUShort(hi32); 3190 } 3191 static inline UShort sel16x4_1 ( ULong w64 ) { 3192 UInt lo32 = toUInt(w64); 3193 return toUShort(lo32 >> 16); 3194 } 3195 static inline UShort sel16x4_0 ( ULong w64 ) { 3196 UInt lo32 = toUInt(w64); 3197 return toUShort(lo32); 3198 } 3199 3200 static inline UChar sel8x8_7 ( ULong w64 ) { 3201 UInt hi32 = toUInt(w64 >> 32); 3202 return toUChar(hi32 >> 24); 3203 } 3204 static inline UChar sel8x8_6 ( ULong w64 ) { 3205 UInt hi32 = toUInt(w64 >> 32); 3206 return toUChar(hi32 >> 16); 3207 } 3208 static inline UChar sel8x8_5 ( ULong w64 ) { 3209 UInt hi32 = toUInt(w64 >> 32); 3210 return toUChar(hi32 >> 8); 3211 } 3212 static inline UChar sel8x8_4 ( ULong w64 ) { 3213 UInt hi32 = toUInt(w64 >> 32); 3214 return toUChar(hi32 >> 0); 3215 } 3216 static inline UChar sel8x8_3 ( ULong w64 ) { 3217 UInt lo32 = toUInt(w64); 3218 return toUChar(lo32 >> 24); 3219 } 3220 static inline UChar sel8x8_2 ( ULong w64 ) { 3221 UInt lo32 = toUInt(w64); 3222 return toUChar(lo32 >> 16); 3223 } 3224 static inline UChar sel8x8_1 ( ULong w64 ) { 3225 UInt lo32 = toUInt(w64); 3226 return toUChar(lo32 >> 8); 3227 } 3228 static inline UChar sel8x8_0 ( ULong w64 ) { 3229 UInt lo32 = toUInt(w64); 3230 return toUChar(lo32 >> 0); 3231 } 3232 3233 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3234 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) 3235 { 3236 return 3237 mk32x2( 3238 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) 3239 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), 3240 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) 3241 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) 3242 ); 3243 } 3244 3245 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3246 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) 3247 { 3248 UInt t = 0; 3249 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); 3250 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); 3251 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); 3252 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); 3253 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 3254 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 3255 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 3256 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 3257 t &= 0xFFFF; 3258 return (ULong)t; 3259 } 3260 3261 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3262 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) 3263 { 3264 UShort t, min; 3265 UInt idx; 3266 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; } 3267 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; } 3268 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; } 3269 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; } 3270 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; } 3271 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; } 3272 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; } 3273 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; } 3274 return ((ULong)(idx << 16)) | ((ULong)min); 3275 } 3276 3277 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3278 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) 3279 { 3280 UInt i; 3281 ULong crc = (b & 0xFFULL) ^ crcIn; 3282 for (i = 0; i < 8; i++) 3283 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 3284 return crc; 3285 } 3286 3287 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3288 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) 3289 { 3290 UInt i; 3291 ULong crc = (w & 0xFFFFULL) ^ crcIn; 3292 for (i = 0; i < 16; i++) 3293 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 3294 return crc; 3295 } 3296 3297 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3298 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) 3299 { 3300 UInt i; 3301 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; 3302 for (i = 0; i < 32; i++) 3303 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); 3304 return crc; 3305 } 3306 3307 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3308 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) 3309 { 3310 ULong crc = amd64g_calc_crc32l(crcIn, q); 3311 return amd64g_calc_crc32l(crc, q >> 32); 3312 } 3313 3314 3315 /* .. helper for next fn .. */ 3316 static inline ULong sad_8x4 ( ULong xx, ULong yy ) 3317 { 3318 UInt t = 0; 3319 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); 3320 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); 3321 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); 3322 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); 3323 return (ULong)t; 3324 } 3325 3326 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3327 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo, 3328 ULong dHi, ULong dLo, 3329 ULong imm_and_return_control_bit ) 3330 { 3331 UInt imm8 = imm_and_return_control_bit & 7; 3332 Bool calcHi = (imm_and_return_control_bit >> 7) & 1; 3333 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */ 3334 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */ 3335 /* For src we only need 32 bits, so get them into the 3336 lower half of a 64 bit word. */ 3337 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1)); 3338 /* For dst we need to get hold of 56 bits (7 bytes) from a total of 3339 11 bytes. If calculating the low part of the result, need bytes 3340 dstOffsL * 4 + (0 .. 6); if calculating the high part, 3341 dstOffsL * 4 + (4 .. 10). */ 3342 ULong dst; 3343 /* dstOffL = 0, Lo -> 0 .. 6 3344 dstOffL = 1, Lo -> 4 .. 10 3345 dstOffL = 0, Hi -> 4 .. 10 3346 dstOffL = 1, Hi -> 8 .. 14 3347 */ 3348 if (calcHi && dstOffsL) { 3349 /* 8 .. 14 */ 3350 dst = dHi & 0x00FFFFFFFFFFFFFFULL; 3351 } 3352 else if (!calcHi && !dstOffsL) { 3353 /* 0 .. 6 */ 3354 dst = dLo & 0x00FFFFFFFFFFFFFFULL; 3355 } 3356 else { 3357 /* 4 .. 10 */ 3358 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32); 3359 } 3360 ULong r0 = sad_8x4( dst >> 0, src ); 3361 ULong r1 = sad_8x4( dst >> 8, src ); 3362 ULong r2 = sad_8x4( dst >> 16, src ); 3363 ULong r3 = sad_8x4( dst >> 24, src ); 3364 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0; 3365 return res; 3366 } 3367 3368 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3369 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask ) 3370 { 3371 ULong dst = 0; 3372 ULong src_bit; 3373 ULong dst_bit = 1; 3374 for (src_bit = 1; src_bit; src_bit <<= 1) { 3375 if (mask & src_bit) { 3376 if (src_masked & src_bit) dst |= dst_bit; 3377 dst_bit <<= 1; 3378 } 3379 } 3380 return dst; 3381 } 3382 3383 /* CALLED FROM GENERATED CODE: CLEAN HELPER */ 3384 ULong amd64g_calculate_pdep ( ULong src, ULong mask ) 3385 { 3386 ULong dst = 0; 3387 ULong dst_bit; 3388 ULong src_bit = 1; 3389 for (dst_bit = 1; dst_bit; dst_bit <<= 1) { 3390 if (mask & dst_bit) { 3391 if (src & src_bit) dst |= dst_bit; 3392 src_bit <<= 1; 3393 } 3394 } 3395 return dst; 3396 } 3397 3398 /*---------------------------------------------------------------*/ 3399 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ 3400 /*---------------------------------------------------------------*/ 3401 3402 static UInt zmask_from_V128 ( V128* arg ) 3403 { 3404 UInt i, res = 0; 3405 for (i = 0; i < 16; i++) { 3406 res |= ((arg->w8[i] == 0) ? 1 : 0) << i; 3407 } 3408 return res; 3409 } 3410 3411 static UInt zmask_from_V128_wide ( V128* arg ) 3412 { 3413 UInt i, res = 0; 3414 for (i = 0; i < 8; i++) { 3415 res |= ((arg->w16[i] == 0) ? 1 : 0) << i; 3416 } 3417 return res; 3418 } 3419 3420 /* Helps with PCMP{I,E}STR{I,M}. 3421 3422 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really, 3423 actually it could be a clean helper, but for the fact that we can't 3424 pass by value 2 x V128 to a clean helper, nor have one returned.) 3425 Reads guest state, writes to guest state for the xSTRM cases, no 3426 accesses of memory, is a pure function. 3427 3428 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so 3429 the callee knows which I/E and I/M variant it is dealing with and 3430 what the specific operation is. 4th byte of opcode is in the range 3431 0x60 to 0x63: 3432 istri 66 0F 3A 63 3433 istrm 66 0F 3A 62 3434 estri 66 0F 3A 61 3435 estrm 66 0F 3A 60 3436 3437 gstOffL and gstOffR are the guest state offsets for the two XMM 3438 register inputs. We never have to deal with the memory case since 3439 that is handled by pre-loading the relevant value into the fake 3440 XMM16 register. 3441 3442 For ESTRx variants, edxIN and eaxIN hold the values of those two 3443 registers. 3444 3445 In all cases, the bottom 16 bits of the result contain the new 3446 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the 3447 result hold the new %ecx value. For xSTRM variants, the helper 3448 writes the result directly to the guest XMM0. 3449 3450 Declarable side effects: in all cases, reads guest state at 3451 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes 3452 guest_XMM0. 3453 3454 Is expected to be called with opc_and_imm combinations which have 3455 actually been validated, and will assert if otherwise. The front 3456 end should ensure we're only called with verified values. 3457 */ 3458 ULong amd64g_dirtyhelper_PCMPxSTRx ( 3459 VexGuestAMD64State* gst, 3460 HWord opc4_and_imm, 3461 HWord gstOffL, HWord gstOffR, 3462 HWord edxIN, HWord eaxIN 3463 ) 3464 { 3465 HWord opc4 = (opc4_and_imm >> 8) & 0xFF; 3466 HWord imm8 = opc4_and_imm & 0xFF; 3467 HWord isISTRx = opc4 & 2; 3468 HWord isxSTRM = (opc4 & 1) ^ 1; 3469 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */ 3470 HWord wide = (imm8 & 1); 3471 3472 // where the args are 3473 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 3474 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 3475 3476 /* Create the arg validity masks, either from the vectors 3477 themselves or from the supplied edx/eax values. */ 3478 // FIXME: this is only right for the 8-bit data cases. 3479 // At least that is asserted above. 3480 UInt zmaskL, zmaskR; 3481 3482 // temp spot for the resulting flags and vector. 3483 V128 resV; 3484 UInt resOSZACP; 3485 3486 // for checking whether case was handled 3487 Bool ok = False; 3488 3489 if (wide) { 3490 if (isISTRx) { 3491 zmaskL = zmask_from_V128_wide(argL); 3492 zmaskR = zmask_from_V128_wide(argR); 3493 } else { 3494 Int tmp; 3495 tmp = edxIN & 0xFFFFFFFF; 3496 if (tmp < -8) tmp = -8; 3497 if (tmp > 8) tmp = 8; 3498 if (tmp < 0) tmp = -tmp; 3499 vassert(tmp >= 0 && tmp <= 8); 3500 zmaskL = (1 << tmp) & 0xFF; 3501 tmp = eaxIN & 0xFFFFFFFF; 3502 if (tmp < -8) tmp = -8; 3503 if (tmp > 8) tmp = 8; 3504 if (tmp < 0) tmp = -tmp; 3505 vassert(tmp >= 0 && tmp <= 8); 3506 zmaskR = (1 << tmp) & 0xFF; 3507 } 3508 // do the meyaath 3509 ok = compute_PCMPxSTRx_wide ( 3510 &resV, &resOSZACP, argL, argR, 3511 zmaskL, zmaskR, imm8, (Bool)isxSTRM 3512 ); 3513 } else { 3514 if (isISTRx) { 3515 zmaskL = zmask_from_V128(argL); 3516 zmaskR = zmask_from_V128(argR); 3517 } else { 3518 Int tmp; 3519 tmp = edxIN & 0xFFFFFFFF; 3520 if (tmp < -16) tmp = -16; 3521 if (tmp > 16) tmp = 16; 3522 if (tmp < 0) tmp = -tmp; 3523 vassert(tmp >= 0 && tmp <= 16); 3524 zmaskL = (1 << tmp) & 0xFFFF; 3525 tmp = eaxIN & 0xFFFFFFFF; 3526 if (tmp < -16) tmp = -16; 3527 if (tmp > 16) tmp = 16; 3528 if (tmp < 0) tmp = -tmp; 3529 vassert(tmp >= 0 && tmp <= 16); 3530 zmaskR = (1 << tmp) & 0xFFFF; 3531 } 3532 // do the meyaath 3533 ok = compute_PCMPxSTRx ( 3534 &resV, &resOSZACP, argL, argR, 3535 zmaskL, zmaskR, imm8, (Bool)isxSTRM 3536 ); 3537 } 3538 3539 // front end shouldn't pass us any imm8 variants we can't 3540 // handle. Hence: 3541 vassert(ok); 3542 3543 // So, finally we need to get the results back to the caller. 3544 // In all cases, the new OSZACP value is the lowest 16 of 3545 // the return value. 3546 if (isxSTRM) { 3547 gst->guest_YMM0[0] = resV.w32[0]; 3548 gst->guest_YMM0[1] = resV.w32[1]; 3549 gst->guest_YMM0[2] = resV.w32[2]; 3550 gst->guest_YMM0[3] = resV.w32[3]; 3551 return resOSZACP & 0x8D5; 3552 } else { 3553 UInt newECX = resV.w32[0] & 0xFFFF; 3554 return (newECX << 16) | (resOSZACP & 0x8D5); 3555 } 3556 } 3557 3558 /*---------------------------------------------------------------*/ 3559 /*--- AES primitives and helpers ---*/ 3560 /*---------------------------------------------------------------*/ 3561 /* a 16 x 16 matrix */ 3562 static const UChar sbox[256] = { // row nr 3563 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1 3564 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 3565 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2 3566 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 3567 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3 3568 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 3569 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4 3570 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 3571 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5 3572 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 3573 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6 3574 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 3575 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7 3576 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 3577 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8 3578 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 3579 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9 3580 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 3581 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10 3582 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 3583 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11 3584 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 3585 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12 3586 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 3587 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13 3588 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 3589 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14 3590 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 3591 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15 3592 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 3593 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16 3594 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 3595 }; 3596 static void SubBytes (V128* v) 3597 { 3598 V128 r; 3599 UInt i; 3600 for (i = 0; i < 16; i++) 3601 r.w8[i] = sbox[v->w8[i]]; 3602 *v = r; 3603 } 3604 3605 /* a 16 x 16 matrix */ 3606 static const UChar invsbox[256] = { // row nr 3607 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1 3608 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 3609 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2 3610 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 3611 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3 3612 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 3613 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4 3614 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 3615 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5 3616 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 3617 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6 3618 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 3619 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7 3620 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 3621 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8 3622 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 3623 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9 3624 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 3625 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10 3626 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 3627 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11 3628 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 3629 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12 3630 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 3631 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13 3632 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 3633 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14 3634 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 3635 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15 3636 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 3637 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16 3638 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 3639 }; 3640 static void InvSubBytes (V128* v) 3641 { 3642 V128 r; 3643 UInt i; 3644 for (i = 0; i < 16; i++) 3645 r.w8[i] = invsbox[v->w8[i]]; 3646 *v = r; 3647 } 3648 3649 static const UChar ShiftRows_op[16] = 3650 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0}; 3651 static void ShiftRows (V128* v) 3652 { 3653 V128 r; 3654 UInt i; 3655 for (i = 0; i < 16; i++) 3656 r.w8[i] = v->w8[ShiftRows_op[15-i]]; 3657 *v = r; 3658 } 3659 3660 static const UChar InvShiftRows_op[16] = 3661 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0}; 3662 static void InvShiftRows (V128* v) 3663 { 3664 V128 r; 3665 UInt i; 3666 for (i = 0; i < 16; i++) 3667 r.w8[i] = v->w8[InvShiftRows_op[15-i]]; 3668 *v = r; 3669 } 3670 3671 /* Multiplication of the finite fields elements of AES. 3672 See "A Specification for The AES Algorithm Rijndael 3673 (by Joan Daemen & Vincent Rijmen)" 3674 Dr. Brian Gladman, v3.1, 3rd March 2001. */ 3675 /* N values so that (hex) xy = 0x03^N. 3676 0x00 cannot be used. We put 0xff for this value.*/ 3677 /* a 16 x 16 matrix */ 3678 static const UChar Nxy[256] = { // row nr 3679 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1 3680 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, 3681 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2 3682 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, 3683 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3 3684 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, 3685 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4 3686 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, 3687 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5 3688 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, 3689 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6 3690 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, 3691 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7 3692 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, 3693 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8 3694 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, 3695 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9 3696 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, 3697 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10 3698 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, 3699 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11 3700 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, 3701 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12 3702 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, 3703 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13 3704 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, 3705 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14 3706 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, 3707 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15 3708 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, 3709 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16 3710 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07 3711 }; 3712 3713 /* E values so that E = 0x03^xy. */ 3714 static const UChar Exy[256] = { // row nr 3715 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1 3716 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35, 3717 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2 3718 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa, 3719 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3 3720 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31, 3721 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4 3722 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd, 3723 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5 3724 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88, 3725 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6 3726 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a, 3727 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7 3728 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3, 3729 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8 3730 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0, 3731 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9 3732 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41, 3733 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10 3734 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75, 3735 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11 3736 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80, 3737 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12 3738 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54, 3739 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13 3740 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca, 3741 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14 3742 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e, 3743 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15 3744 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17, 3745 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16 3746 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01}; 3747 3748 static inline UChar ff_mul(UChar u1, UChar u2) 3749 { 3750 if ((u1 > 0) && (u2 > 0)) { 3751 UInt ui = Nxy[u1] + Nxy[u2]; 3752 if (ui >= 255) 3753 ui = ui - 255; 3754 return Exy[ui]; 3755 } else { 3756 return 0; 3757 }; 3758 } 3759 3760 static void MixColumns (V128* v) 3761 { 3762 V128 r; 3763 Int j; 3764 #define P(x,row,col) (x)->w8[((row)*4+(col))] 3765 for (j = 0; j < 4; j++) { 3766 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1)) 3767 ^ P(v,j,2) ^ P(v,j,3); 3768 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) ) 3769 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3); 3770 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) ) 3771 ^ ff_mul(0x03, P(v,j,3) ); 3772 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2) 3773 ^ ff_mul( 0x02, P(v,j,3) ); 3774 } 3775 *v = r; 3776 #undef P 3777 } 3778 3779 static void InvMixColumns (V128* v) 3780 { 3781 V128 r; 3782 Int j; 3783 #define P(x,row,col) (x)->w8[((row)*4+(col))] 3784 for (j = 0; j < 4; j++) { 3785 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) ) 3786 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) ); 3787 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) ) 3788 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) ); 3789 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) ) 3790 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) ); 3791 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) ) 3792 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) ); 3793 } 3794 *v = r; 3795 #undef P 3796 3797 } 3798 3799 /* For description, see definition in guest_amd64_defs.h */ 3800 void amd64g_dirtyhelper_AES ( 3801 VexGuestAMD64State* gst, 3802 HWord opc4, HWord gstOffD, 3803 HWord gstOffL, HWord gstOffR 3804 ) 3805 { 3806 // where the args are 3807 V128* argD = (V128*)( ((UChar*)gst) + gstOffD ); 3808 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 3809 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 3810 V128 r; 3811 3812 switch (opc4) { 3813 case 0xDC: /* AESENC */ 3814 case 0xDD: /* AESENCLAST */ 3815 r = *argR; 3816 ShiftRows (&r); 3817 SubBytes (&r); 3818 if (opc4 == 0xDC) 3819 MixColumns (&r); 3820 argD->w64[0] = r.w64[0] ^ argL->w64[0]; 3821 argD->w64[1] = r.w64[1] ^ argL->w64[1]; 3822 break; 3823 3824 case 0xDE: /* AESDEC */ 3825 case 0xDF: /* AESDECLAST */ 3826 r = *argR; 3827 InvShiftRows (&r); 3828 InvSubBytes (&r); 3829 if (opc4 == 0xDE) 3830 InvMixColumns (&r); 3831 argD->w64[0] = r.w64[0] ^ argL->w64[0]; 3832 argD->w64[1] = r.w64[1] ^ argL->w64[1]; 3833 break; 3834 3835 case 0xDB: /* AESIMC */ 3836 *argD = *argL; 3837 InvMixColumns (argD); 3838 break; 3839 default: vassert(0); 3840 } 3841 } 3842 3843 static inline UInt RotWord (UInt w32) 3844 { 3845 return ((w32 >> 8) | (w32 << 24)); 3846 } 3847 3848 static inline UInt SubWord (UInt w32) 3849 { 3850 UChar *w8; 3851 UChar *r8; 3852 UInt res; 3853 w8 = (UChar*) &w32; 3854 r8 = (UChar*) &res; 3855 r8[0] = sbox[w8[0]]; 3856 r8[1] = sbox[w8[1]]; 3857 r8[2] = sbox[w8[2]]; 3858 r8[3] = sbox[w8[3]]; 3859 return res; 3860 } 3861 3862 /* For description, see definition in guest_amd64_defs.h */ 3863 extern void amd64g_dirtyhelper_AESKEYGENASSIST ( 3864 VexGuestAMD64State* gst, 3865 HWord imm8, 3866 HWord gstOffL, HWord gstOffR 3867 ) 3868 { 3869 // where the args are 3870 V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); 3871 V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); 3872 3873 // We have to create the result in a temporary in the 3874 // case where the src and dst regs are the same. See #341698. 3875 V128 tmp; 3876 3877 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8; 3878 tmp.w32[2] = SubWord (argL->w32[3]); 3879 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8; 3880 tmp.w32[0] = SubWord (argL->w32[1]); 3881 3882 argR->w32[3] = tmp.w32[3]; 3883 argR->w32[2] = tmp.w32[2]; 3884 argR->w32[1] = tmp.w32[1]; 3885 argR->w32[0] = tmp.w32[0]; 3886 } 3887 3888 3889 3890 /*---------------------------------------------------------------*/ 3891 /*--- Helpers for dealing with, and describing, ---*/ 3892 /*--- guest state as a whole. ---*/ 3893 /*---------------------------------------------------------------*/ 3894 3895 /* Initialise the entire amd64 guest state. */ 3896 /* VISIBLE TO LIBVEX CLIENT */ 3897 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) 3898 { 3899 vex_state->host_EvC_FAILADDR = 0; 3900 vex_state->host_EvC_COUNTER = 0; 3901 vex_state->pad0 = 0; 3902 3903 vex_state->guest_RAX = 0; 3904 vex_state->guest_RCX = 0; 3905 vex_state->guest_RDX = 0; 3906 vex_state->guest_RBX = 0; 3907 vex_state->guest_RSP = 0; 3908 vex_state->guest_RBP = 0; 3909 vex_state->guest_RSI = 0; 3910 vex_state->guest_RDI = 0; 3911 vex_state->guest_R8 = 0; 3912 vex_state->guest_R9 = 0; 3913 vex_state->guest_R10 = 0; 3914 vex_state->guest_R11 = 0; 3915 vex_state->guest_R12 = 0; 3916 vex_state->guest_R13 = 0; 3917 vex_state->guest_R14 = 0; 3918 vex_state->guest_R15 = 0; 3919 3920 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; 3921 vex_state->guest_CC_DEP1 = 0; 3922 vex_state->guest_CC_DEP2 = 0; 3923 vex_state->guest_CC_NDEP = 0; 3924 3925 vex_state->guest_DFLAG = 1; /* forwards */ 3926 vex_state->guest_IDFLAG = 0; 3927 vex_state->guest_ACFLAG = 0; 3928 3929 /* HACK: represent the offset associated with a constant %fs. 3930 Typically, on linux, this assumes that %fs is only ever zero (main 3931 thread) or 0x63. */ 3932 vex_state->guest_FS_CONST = 0; 3933 3934 vex_state->guest_RIP = 0; 3935 3936 /* Initialise the simulated FPU */ 3937 amd64g_dirtyhelper_FINIT( vex_state ); 3938 3939 /* Initialise the AVX state. */ 3940 # define AVXZERO(_ymm) \ 3941 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \ 3942 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \ 3943 } while (0) 3944 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; 3945 AVXZERO(vex_state->guest_YMM0); 3946 AVXZERO(vex_state->guest_YMM1); 3947 AVXZERO(vex_state->guest_YMM2); 3948 AVXZERO(vex_state->guest_YMM3); 3949 AVXZERO(vex_state->guest_YMM4); 3950 AVXZERO(vex_state->guest_YMM5); 3951 AVXZERO(vex_state->guest_YMM6); 3952 AVXZERO(vex_state->guest_YMM7); 3953 AVXZERO(vex_state->guest_YMM8); 3954 AVXZERO(vex_state->guest_YMM9); 3955 AVXZERO(vex_state->guest_YMM10); 3956 AVXZERO(vex_state->guest_YMM11); 3957 AVXZERO(vex_state->guest_YMM12); 3958 AVXZERO(vex_state->guest_YMM13); 3959 AVXZERO(vex_state->guest_YMM14); 3960 AVXZERO(vex_state->guest_YMM15); 3961 AVXZERO(vex_state->guest_YMM16); 3962 3963 # undef AVXZERO 3964 3965 vex_state->guest_EMNOTE = EmNote_NONE; 3966 3967 /* These should not ever be either read or written, but we 3968 initialise them anyway. */ 3969 vex_state->guest_CMSTART = 0; 3970 vex_state->guest_CMLEN = 0; 3971 3972 vex_state->guest_NRADDR = 0; 3973 vex_state->guest_SC_CLASS = 0; 3974 vex_state->guest_GS_CONST = 0; 3975 3976 vex_state->guest_IP_AT_SYSCALL = 0; 3977 vex_state->pad1 = 0; 3978 } 3979 3980 3981 /* Figure out if any part of the guest state contained in minoff 3982 .. maxoff requires precise memory exceptions. If in doubt return 3983 True (but this generates significantly slower code). 3984 3985 By default we enforce precise exns for guest %RSP, %RBP and %RIP 3986 only. These are the minimum needed to extract correct stack 3987 backtraces from amd64 code. 3988 3989 Only %RSP is needed in mode VexRegUpdSpAtMemAccess. 3990 */ 3991 Bool guest_amd64_state_requires_precise_mem_exns ( 3992 Int minoff, Int maxoff, VexRegisterUpdates pxControl 3993 ) 3994 { 3995 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); 3996 Int rbp_max = rbp_min + 8 - 1; 3997 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); 3998 Int rsp_max = rsp_min + 8 - 1; 3999 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); 4000 Int rip_max = rip_min + 8 - 1; 4001 4002 if (maxoff < rsp_min || minoff > rsp_max) { 4003 /* no overlap with rsp */ 4004 if (pxControl == VexRegUpdSpAtMemAccess) 4005 return False; // We only need to check stack pointer. 4006 } else { 4007 return True; 4008 } 4009 4010 if (maxoff < rbp_min || minoff > rbp_max) { 4011 /* no overlap with rbp */ 4012 } else { 4013 return True; 4014 } 4015 4016 if (maxoff < rip_min || minoff > rip_max) { 4017 /* no overlap with eip */ 4018 } else { 4019 return True; 4020 } 4021 4022 return False; 4023 } 4024 4025 4026 #define ALWAYSDEFD(field) \ 4027 { offsetof(VexGuestAMD64State, field), \ 4028 (sizeof ((VexGuestAMD64State*)0)->field) } 4029 4030 VexGuestLayout 4031 amd64guest_layout 4032 = { 4033 /* Total size of the guest state, in bytes. */ 4034 .total_sizeB = sizeof(VexGuestAMD64State), 4035 4036 /* Describe the stack pointer. */ 4037 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), 4038 .sizeof_SP = 8, 4039 4040 /* Describe the frame pointer. */ 4041 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), 4042 .sizeof_FP = 8, 4043 4044 /* Describe the instruction pointer. */ 4045 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), 4046 .sizeof_IP = 8, 4047 4048 /* Describe any sections to be regarded by Memcheck as 4049 'always-defined'. */ 4050 .n_alwaysDefd = 16, 4051 4052 /* flags thunk: OP and NDEP are always defd, whereas DEP1 4053 and DEP2 have to be tracked. See detailed comment in 4054 gdefs.h on meaning of thunk fields. */ 4055 .alwaysDefd 4056 = { /* 0 */ ALWAYSDEFD(guest_CC_OP), 4057 /* 1 */ ALWAYSDEFD(guest_CC_NDEP), 4058 /* 2 */ ALWAYSDEFD(guest_DFLAG), 4059 /* 3 */ ALWAYSDEFD(guest_IDFLAG), 4060 /* 4 */ ALWAYSDEFD(guest_RIP), 4061 /* 5 */ ALWAYSDEFD(guest_FS_CONST), 4062 /* 6 */ ALWAYSDEFD(guest_FTOP), 4063 /* 7 */ ALWAYSDEFD(guest_FPTAG), 4064 /* 8 */ ALWAYSDEFD(guest_FPROUND), 4065 /* 9 */ ALWAYSDEFD(guest_FC3210), 4066 // /* */ ALWAYSDEFD(guest_CS), 4067 // /* */ ALWAYSDEFD(guest_DS), 4068 // /* */ ALWAYSDEFD(guest_ES), 4069 // /* */ ALWAYSDEFD(guest_FS), 4070 // /* */ ALWAYSDEFD(guest_GS), 4071 // /* */ ALWAYSDEFD(guest_SS), 4072 // /* */ ALWAYSDEFD(guest_LDT), 4073 // /* */ ALWAYSDEFD(guest_GDT), 4074 /* 10 */ ALWAYSDEFD(guest_EMNOTE), 4075 /* 11 */ ALWAYSDEFD(guest_SSEROUND), 4076 /* 12 */ ALWAYSDEFD(guest_CMSTART), 4077 /* 13 */ ALWAYSDEFD(guest_CMLEN), 4078 /* 14 */ ALWAYSDEFD(guest_SC_CLASS), 4079 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) 4080 } 4081 }; 4082 4083 4084 /*---------------------------------------------------------------*/ 4085 /*--- end guest_amd64_helpers.c ---*/ 4086 /*---------------------------------------------------------------*/ 4087