1 2 /*--------------------------------------------------------------------*/ 3 /*--- Instrument IR to perform memory checking operations. ---*/ 4 /*--- mc_translate.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2011 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_tool_basics.h" 33 #include "pub_tool_hashtable.h" // For mc_include.h 34 #include "pub_tool_libcassert.h" 35 #include "pub_tool_libcprint.h" 36 #include "pub_tool_tooliface.h" 37 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 38 #include "pub_tool_xarray.h" 39 #include "pub_tool_mallocfree.h" 40 #include "pub_tool_libcbase.h" 41 42 #include "mc_include.h" 43 44 45 /* FIXMEs JRS 2011-June-16. 46 47 Check the interpretation for vector narrowing and widening ops, 48 particularly the saturating ones. I suspect they are either overly 49 pessimistic and/or wrong. 50 */ 51 52 /* This file implements the Memcheck instrumentation, and in 53 particular contains the core of its undefined value detection 54 machinery. For a comprehensive background of the terminology, 55 algorithms and rationale used herein, read: 56 57 Using Valgrind to detect undefined value errors with 58 bit-precision 59 60 Julian Seward and Nicholas Nethercote 61 62 2005 USENIX Annual Technical Conference (General Track), 63 Anaheim, CA, USA, April 10-15, 2005. 64 65 ---- 66 67 Here is as good a place as any to record exactly when V bits are and 68 should be checked, why, and what function is responsible. 69 70 71 Memcheck complains when an undefined value is used: 72 73 1. In the condition of a conditional branch. Because it could cause 74 incorrect control flow, and thus cause incorrect externally-visible 75 behaviour. [mc_translate.c:complainIfUndefined] 76 77 2. As an argument to a system call, or as the value that specifies 78 the system call number. Because it could cause an incorrect 79 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 80 81 3. As the address in a load or store. Because it could cause an 82 incorrect value to be used later, which could cause externally-visible 83 behaviour (eg. via incorrect control flow or an incorrect system call 84 argument) [complainIfUndefined] 85 86 4. As the target address of a branch. Because it could cause incorrect 87 control flow. [complainIfUndefined] 88 89 5. As an argument to setenv, unsetenv, or putenv. Because it could put 90 an incorrect value into the external environment. 91 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 92 93 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 94 [complainIfUndefined] 95 96 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 97 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 98 requested it. [in memcheck.h] 99 100 101 Memcheck also complains, but should not, when an undefined value is used: 102 103 8. As the shift value in certain SIMD shift operations (but not in the 104 standard integer shift operations). This inconsistency is due to 105 historical reasons.) [complainIfUndefined] 106 107 108 Memcheck does not complain, but should, when an undefined value is used: 109 110 9. As an input to a client request. Because the client request may 111 affect the visible behaviour -- see bug #144362 for an example 112 involving the malloc replacements in vg_replace_malloc.c and 113 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 114 isn't identified. That bug report also has some info on how to solve 115 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 116 117 118 In practice, 1 and 2 account for the vast majority of cases. 119 */ 120 121 /*------------------------------------------------------------*/ 122 /*--- Forward decls ---*/ 123 /*------------------------------------------------------------*/ 124 125 struct _MCEnv; 126 127 static IRType shadowTypeV ( IRType ty ); 128 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 129 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 130 131 static IRExpr *i128_const_zero(void); 132 133 /*------------------------------------------------------------*/ 134 /*--- Memcheck running state, and tmp management. ---*/ 135 /*------------------------------------------------------------*/ 136 137 /* Carries info about a particular tmp. The tmp's number is not 138 recorded, as this is implied by (equal to) its index in the tmpMap 139 in MCEnv. The tmp's type is also not recorded, as this is present 140 in MCEnv.sb->tyenv. 141 142 When .kind is Orig, .shadowV and .shadowB may give the identities 143 of the temps currently holding the associated definedness (shadowV) 144 and origin (shadowB) values, or these may be IRTemp_INVALID if code 145 to compute such values has not yet been emitted. 146 147 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 148 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 149 illogical for a shadow tmp itself to be shadowed. 150 */ 151 typedef 152 enum { Orig=1, VSh=2, BSh=3 } 153 TempKind; 154 155 typedef 156 struct { 157 TempKind kind; 158 IRTemp shadowV; 159 IRTemp shadowB; 160 } 161 TempMapEnt; 162 163 164 /* Carries around state during memcheck instrumentation. */ 165 typedef 166 struct _MCEnv { 167 /* MODIFIED: the superblock being constructed. IRStmts are 168 added. */ 169 IRSB* sb; 170 Bool trace; 171 172 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 173 current kind and possibly shadow temps for each temp in the 174 IRSB being constructed. Note that it does not contain the 175 type of each tmp. If you want to know the type, look at the 176 relevant entry in sb->tyenv. It follows that at all times 177 during the instrumentation process, the valid indices for 178 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 179 total number of Orig, V- and B- temps allocated so far. 180 181 The reason for this strange split (types in one place, all 182 other info in another) is that we need the types to be 183 attached to sb so as to make it possible to do 184 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 185 instrumentation process. */ 186 XArray* /* of TempMapEnt */ tmpMap; 187 188 /* MODIFIED: indicates whether "bogus" literals have so far been 189 found. Starts off False, and may change to True. */ 190 Bool bogusLiterals; 191 192 /* READONLY: the guest layout. This indicates which parts of 193 the guest state should be regarded as 'always defined'. */ 194 VexGuestLayout* layout; 195 196 /* READONLY: the host word type. Needed for constructing 197 arguments of type 'HWord' to be passed to helper functions. 198 Ity_I32 or Ity_I64 only. */ 199 IRType hWordTy; 200 } 201 MCEnv; 202 203 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 204 demand), as they are encountered. This is for two reasons. 205 206 (1) (less important reason): Many original tmps are unused due to 207 initial IR optimisation, and we do not want to spaces in tables 208 tracking them. 209 210 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 211 table indexed [0 .. n_types-1], which gives the current shadow for 212 each original tmp, or INVALID_IRTEMP if none is so far assigned. 213 It is necessary to support making multiple assignments to a shadow 214 -- specifically, after testing a shadow for definedness, it needs 215 to be made defined. But IR's SSA property disallows this. 216 217 (2) (more important reason): Therefore, when a shadow needs to get 218 a new value, a new temporary is created, the value is assigned to 219 that, and the tmpMap is updated to reflect the new binding. 220 221 A corollary is that if the tmpMap maps a given tmp to 222 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 223 there's a read-before-write error in the original tmps. The IR 224 sanity checker should catch all such anomalies, however. 225 */ 226 227 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 228 both the table in mce->sb and to our auxiliary mapping. Note that 229 newTemp may cause mce->tmpMap to resize, hence previous results 230 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 231 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 232 { 233 Word newIx; 234 TempMapEnt ent; 235 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 236 ent.kind = kind; 237 ent.shadowV = IRTemp_INVALID; 238 ent.shadowB = IRTemp_INVALID; 239 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 240 tl_assert(newIx == (Word)tmp); 241 return tmp; 242 } 243 244 245 /* Find the tmp currently shadowing the given original tmp. If none 246 so far exists, allocate one. */ 247 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 248 { 249 TempMapEnt* ent; 250 /* VG_(indexXA) range-checks 'orig', hence no need to check 251 here. */ 252 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 253 tl_assert(ent->kind == Orig); 254 if (ent->shadowV == IRTemp_INVALID) { 255 IRTemp tmpV 256 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 257 /* newTemp may cause mce->tmpMap to resize, hence previous results 258 from VG_(indexXA) are invalid. */ 259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 260 tl_assert(ent->kind == Orig); 261 tl_assert(ent->shadowV == IRTemp_INVALID); 262 ent->shadowV = tmpV; 263 } 264 return ent->shadowV; 265 } 266 267 /* Allocate a new shadow for the given original tmp. This means any 268 previous shadow is abandoned. This is needed because it is 269 necessary to give a new value to a shadow once it has been tested 270 for undefinedness, but unfortunately IR's SSA property disallows 271 this. Instead we must abandon the old shadow, allocate a new one 272 and use that instead. 273 274 This is the same as findShadowTmpV, except we don't bother to see 275 if a shadow temp already existed -- we simply allocate a new one 276 regardless. */ 277 static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 278 { 279 TempMapEnt* ent; 280 /* VG_(indexXA) range-checks 'orig', hence no need to check 281 here. */ 282 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 283 tl_assert(ent->kind == Orig); 284 if (1) { 285 IRTemp tmpV 286 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 287 /* newTemp may cause mce->tmpMap to resize, hence previous results 288 from VG_(indexXA) are invalid. */ 289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 290 tl_assert(ent->kind == Orig); 291 ent->shadowV = tmpV; 292 } 293 } 294 295 296 /*------------------------------------------------------------*/ 297 /*--- IRAtoms -- a subset of IRExprs ---*/ 298 /*------------------------------------------------------------*/ 299 300 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 301 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 302 input, most of this code deals in atoms. Usefully, a value atom 303 always has a V-value which is also an atom: constants are shadowed 304 by constants, and temps are shadowed by the corresponding shadow 305 temporary. */ 306 307 typedef IRExpr IRAtom; 308 309 /* (used for sanity checks only): is this an atom which looks 310 like it's from original code? */ 311 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 312 { 313 if (a1->tag == Iex_Const) 314 return True; 315 if (a1->tag == Iex_RdTmp) { 316 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 317 return ent->kind == Orig; 318 } 319 return False; 320 } 321 322 /* (used for sanity checks only): is this an atom which looks 323 like it's from shadow code? */ 324 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 325 { 326 if (a1->tag == Iex_Const) 327 return True; 328 if (a1->tag == Iex_RdTmp) { 329 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 330 return ent->kind == VSh || ent->kind == BSh; 331 } 332 return False; 333 } 334 335 /* (used for sanity checks only): check that both args are atoms and 336 are identically-kinded. */ 337 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 338 { 339 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 340 return True; 341 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 342 return True; 343 return False; 344 } 345 346 347 /*------------------------------------------------------------*/ 348 /*--- Type management ---*/ 349 /*------------------------------------------------------------*/ 350 351 /* Shadow state is always accessed using integer types. This returns 352 an integer type with the same size (as per sizeofIRType) as the 353 given type. The only valid shadow types are Bit, I8, I16, I32, 354 I64, I128, V128. */ 355 356 static IRType shadowTypeV ( IRType ty ) 357 { 358 switch (ty) { 359 case Ity_I1: 360 case Ity_I8: 361 case Ity_I16: 362 case Ity_I32: 363 case Ity_I64: 364 case Ity_I128: return ty; 365 case Ity_F32: return Ity_I32; 366 case Ity_F64: return Ity_I64; 367 case Ity_F128: return Ity_I128; 368 case Ity_V128: return Ity_V128; 369 default: ppIRType(ty); 370 VG_(tool_panic)("memcheck:shadowTypeV"); 371 } 372 } 373 374 /* Produce a 'defined' value of the given shadow type. Should only be 375 supplied shadow types (Bit/I8/I16/I32/UI64). */ 376 static IRExpr* definedOfType ( IRType ty ) { 377 switch (ty) { 378 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 379 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 380 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 381 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 382 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 383 case Ity_I128: return i128_const_zero(); 384 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 385 default: VG_(tool_panic)("memcheck:definedOfType"); 386 } 387 } 388 389 390 /*------------------------------------------------------------*/ 391 /*--- Constructing IR fragments ---*/ 392 /*------------------------------------------------------------*/ 393 394 /* add stmt to a bb */ 395 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 396 if (mce->trace) { 397 VG_(printf)(" %c: ", cat); 398 ppIRStmt(st); 399 VG_(printf)("\n"); 400 } 401 addStmtToIRSB(mce->sb, st); 402 } 403 404 /* assign value to tmp */ 405 static inline 406 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 407 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 408 } 409 410 /* build various kinds of expressions */ 411 #define triop(_op, _arg1, _arg2, _arg3) \ 412 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 413 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 414 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 415 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 416 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 417 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 418 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 419 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 420 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 421 422 /* Bind the given expression to a new temporary, and return the 423 temporary. This effectively converts an arbitrary expression into 424 an atom. 425 426 'ty' is the type of 'e' and hence the type that the new temporary 427 needs to be. But passing it in is redundant, since we can deduce 428 the type merely by inspecting 'e'. So at least use that fact to 429 assert that the two types agree. */ 430 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 431 { 432 TempKind k; 433 IRTemp t; 434 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 435 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 436 switch (cat) { 437 case 'V': k = VSh; break; 438 case 'B': k = BSh; break; 439 case 'C': k = Orig; break; 440 /* happens when we are making up new "orig" 441 expressions, for IRCAS handling */ 442 default: tl_assert(0); 443 } 444 t = newTemp(mce, ty, k); 445 assign(cat, mce, t, e); 446 return mkexpr(t); 447 } 448 449 450 /*------------------------------------------------------------*/ 451 /*--- Helper functions for 128-bit ops ---*/ 452 /*------------------------------------------------------------*/ 453 static IRExpr *i128_const_zero(void) 454 { 455 return binop(Iop_64HLto128, IRExpr_Const(IRConst_U64(0)), 456 IRExpr_Const(IRConst_U64(0))); 457 } 458 459 /* There are no 128-bit loads and/or stores. So we do not need to worry 460 about that in expr2vbits_Load */ 461 462 /*------------------------------------------------------------*/ 463 /*--- Constructing definedness primitive ops ---*/ 464 /*------------------------------------------------------------*/ 465 466 /* --------- Defined-if-either-defined --------- */ 467 468 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 469 tl_assert(isShadowAtom(mce,a1)); 470 tl_assert(isShadowAtom(mce,a2)); 471 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 472 } 473 474 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 475 tl_assert(isShadowAtom(mce,a1)); 476 tl_assert(isShadowAtom(mce,a2)); 477 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 478 } 479 480 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 481 tl_assert(isShadowAtom(mce,a1)); 482 tl_assert(isShadowAtom(mce,a2)); 483 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 484 } 485 486 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 487 tl_assert(isShadowAtom(mce,a1)); 488 tl_assert(isShadowAtom(mce,a2)); 489 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 490 } 491 492 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 493 tl_assert(isShadowAtom(mce,a1)); 494 tl_assert(isShadowAtom(mce,a2)); 495 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 496 } 497 498 /* --------- Undefined-if-either-undefined --------- */ 499 500 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 501 tl_assert(isShadowAtom(mce,a1)); 502 tl_assert(isShadowAtom(mce,a2)); 503 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 504 } 505 506 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 507 tl_assert(isShadowAtom(mce,a1)); 508 tl_assert(isShadowAtom(mce,a2)); 509 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 510 } 511 512 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 513 tl_assert(isShadowAtom(mce,a1)); 514 tl_assert(isShadowAtom(mce,a2)); 515 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 516 } 517 518 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 519 tl_assert(isShadowAtom(mce,a1)); 520 tl_assert(isShadowAtom(mce,a2)); 521 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 522 } 523 524 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 525 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6; 526 tl_assert(isShadowAtom(mce,a1)); 527 tl_assert(isShadowAtom(mce,a2)); 528 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1)); 529 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1)); 530 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2)); 531 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2)); 532 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3)); 533 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4)); 534 535 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5)); 536 } 537 538 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 539 tl_assert(isShadowAtom(mce,a1)); 540 tl_assert(isShadowAtom(mce,a2)); 541 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 542 } 543 544 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 545 switch (vty) { 546 case Ity_I8: return mkUifU8(mce, a1, a2); 547 case Ity_I16: return mkUifU16(mce, a1, a2); 548 case Ity_I32: return mkUifU32(mce, a1, a2); 549 case Ity_I64: return mkUifU64(mce, a1, a2); 550 case Ity_I128: return mkUifU128(mce, a1, a2); 551 case Ity_V128: return mkUifUV128(mce, a1, a2); 552 default: 553 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 554 VG_(tool_panic)("memcheck:mkUifU"); 555 } 556 } 557 558 /* --------- The Left-family of operations. --------- */ 559 560 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 561 tl_assert(isShadowAtom(mce,a1)); 562 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 563 } 564 565 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 566 tl_assert(isShadowAtom(mce,a1)); 567 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 568 } 569 570 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 571 tl_assert(isShadowAtom(mce,a1)); 572 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 573 } 574 575 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 576 tl_assert(isShadowAtom(mce,a1)); 577 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 578 } 579 580 /* --------- 'Improvement' functions for AND/OR. --------- */ 581 582 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 583 defined (0); all other -> undefined (1). 584 */ 585 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 586 { 587 tl_assert(isOriginalAtom(mce, data)); 588 tl_assert(isShadowAtom(mce, vbits)); 589 tl_assert(sameKindedAtoms(data, vbits)); 590 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 591 } 592 593 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 594 { 595 tl_assert(isOriginalAtom(mce, data)); 596 tl_assert(isShadowAtom(mce, vbits)); 597 tl_assert(sameKindedAtoms(data, vbits)); 598 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 599 } 600 601 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 602 { 603 tl_assert(isOriginalAtom(mce, data)); 604 tl_assert(isShadowAtom(mce, vbits)); 605 tl_assert(sameKindedAtoms(data, vbits)); 606 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 607 } 608 609 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 610 { 611 tl_assert(isOriginalAtom(mce, data)); 612 tl_assert(isShadowAtom(mce, vbits)); 613 tl_assert(sameKindedAtoms(data, vbits)); 614 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 615 } 616 617 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 618 { 619 tl_assert(isOriginalAtom(mce, data)); 620 tl_assert(isShadowAtom(mce, vbits)); 621 tl_assert(sameKindedAtoms(data, vbits)); 622 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 623 } 624 625 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 626 defined (0); all other -> undefined (1). 627 */ 628 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 629 { 630 tl_assert(isOriginalAtom(mce, data)); 631 tl_assert(isShadowAtom(mce, vbits)); 632 tl_assert(sameKindedAtoms(data, vbits)); 633 return assignNew( 634 'V', mce, Ity_I8, 635 binop(Iop_Or8, 636 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 637 vbits) ); 638 } 639 640 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 641 { 642 tl_assert(isOriginalAtom(mce, data)); 643 tl_assert(isShadowAtom(mce, vbits)); 644 tl_assert(sameKindedAtoms(data, vbits)); 645 return assignNew( 646 'V', mce, Ity_I16, 647 binop(Iop_Or16, 648 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 649 vbits) ); 650 } 651 652 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 653 { 654 tl_assert(isOriginalAtom(mce, data)); 655 tl_assert(isShadowAtom(mce, vbits)); 656 tl_assert(sameKindedAtoms(data, vbits)); 657 return assignNew( 658 'V', mce, Ity_I32, 659 binop(Iop_Or32, 660 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 661 vbits) ); 662 } 663 664 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 665 { 666 tl_assert(isOriginalAtom(mce, data)); 667 tl_assert(isShadowAtom(mce, vbits)); 668 tl_assert(sameKindedAtoms(data, vbits)); 669 return assignNew( 670 'V', mce, Ity_I64, 671 binop(Iop_Or64, 672 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 673 vbits) ); 674 } 675 676 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 677 { 678 tl_assert(isOriginalAtom(mce, data)); 679 tl_assert(isShadowAtom(mce, vbits)); 680 tl_assert(sameKindedAtoms(data, vbits)); 681 return assignNew( 682 'V', mce, Ity_V128, 683 binop(Iop_OrV128, 684 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 685 vbits) ); 686 } 687 688 /* --------- Pessimising casts. --------- */ 689 690 /* The function returns an expression of type DST_TY. If any of the VBITS 691 is undefined (value == 1) the resulting expression has all bits set to 692 1. Otherwise, all bits are 0. */ 693 694 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 695 { 696 IRType src_ty; 697 IRAtom* tmp1; 698 /* Note, dst_ty is a shadow type, not an original type. */ 699 /* First of all, collapse vbits down to a single bit. */ 700 tl_assert(isShadowAtom(mce,vbits)); 701 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 702 703 /* Fast-track some common cases */ 704 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 705 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 706 707 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 708 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 709 710 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 711 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 712 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 713 } 714 715 /* Else do it the slow way .. */ 716 tmp1 = NULL; 717 switch (src_ty) { 718 case Ity_I1: 719 tmp1 = vbits; 720 break; 721 case Ity_I8: 722 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 723 break; 724 case Ity_I16: 725 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 726 break; 727 case Ity_I32: 728 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 729 break; 730 case Ity_I64: 731 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 732 break; 733 case Ity_I128: { 734 /* Gah. Chop it in half, OR the halves together, and compare 735 that with zero. */ 736 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 737 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 738 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 739 tmp1 = assignNew('V', mce, Ity_I1, 740 unop(Iop_CmpNEZ64, tmp4)); 741 break; 742 } 743 default: 744 ppIRType(src_ty); 745 VG_(tool_panic)("mkPCastTo(1)"); 746 } 747 tl_assert(tmp1); 748 /* Now widen up to the dst type. */ 749 switch (dst_ty) { 750 case Ity_I1: 751 return tmp1; 752 case Ity_I8: 753 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 754 case Ity_I16: 755 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 756 case Ity_I32: 757 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 758 case Ity_I64: 759 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 760 case Ity_V128: 761 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 762 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 763 return tmp1; 764 case Ity_I128: 765 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 766 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 767 return tmp1; 768 default: 769 ppIRType(dst_ty); 770 VG_(tool_panic)("mkPCastTo(2)"); 771 } 772 } 773 774 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 775 /* 776 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 777 PCasting to Ity_U1. However, sometimes it is necessary to be more 778 accurate. The insight is that the result is defined if two 779 corresponding bits can be found, one from each argument, so that 780 both bits are defined but are different -- that makes EQ say "No" 781 and NE say "Yes". Hence, we compute an improvement term and DifD 782 it onto the "normal" (UifU) result. 783 784 The result is: 785 786 PCastTo<1> ( 787 -- naive version 788 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 789 790 `DifD<sz>` 791 792 -- improvement term 793 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 794 ) 795 796 where 797 vec contains 0 (defined) bits where the corresponding arg bits 798 are defined but different, and 1 bits otherwise. 799 800 vec = Or<sz>( vxx, // 0 iff bit defined 801 vyy, // 0 iff bit defined 802 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 803 ) 804 805 If any bit of vec is 0, the result is defined and so the 806 improvement term should produce 0...0, else it should produce 807 1...1. 808 809 Hence require for the improvement term: 810 811 if vec == 1...1 then 1...1 else 0...0 812 -> 813 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 814 815 This was extensively re-analysed and checked on 6 July 05. 816 */ 817 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 818 IRType ty, 819 IRAtom* vxx, IRAtom* vyy, 820 IRAtom* xx, IRAtom* yy ) 821 { 822 IRAtom *naive, *vec, *improvement_term; 823 IRAtom *improved, *final_cast, *top; 824 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 825 826 tl_assert(isShadowAtom(mce,vxx)); 827 tl_assert(isShadowAtom(mce,vyy)); 828 tl_assert(isOriginalAtom(mce,xx)); 829 tl_assert(isOriginalAtom(mce,yy)); 830 tl_assert(sameKindedAtoms(vxx,xx)); 831 tl_assert(sameKindedAtoms(vyy,yy)); 832 833 switch (ty) { 834 case Ity_I32: 835 opOR = Iop_Or32; 836 opDIFD = Iop_And32; 837 opUIFU = Iop_Or32; 838 opNOT = Iop_Not32; 839 opXOR = Iop_Xor32; 840 opCMP = Iop_CmpEQ32; 841 top = mkU32(0xFFFFFFFF); 842 break; 843 case Ity_I64: 844 opOR = Iop_Or64; 845 opDIFD = Iop_And64; 846 opUIFU = Iop_Or64; 847 opNOT = Iop_Not64; 848 opXOR = Iop_Xor64; 849 opCMP = Iop_CmpEQ64; 850 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 851 break; 852 default: 853 VG_(tool_panic)("expensiveCmpEQorNE"); 854 } 855 856 naive 857 = mkPCastTo(mce,ty, 858 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 859 860 vec 861 = assignNew( 862 'V', mce,ty, 863 binop( opOR, 864 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 865 assignNew( 866 'V', mce,ty, 867 unop( opNOT, 868 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 869 870 improvement_term 871 = mkPCastTo( mce,ty, 872 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 873 874 improved 875 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 876 877 final_cast 878 = mkPCastTo( mce, Ity_I1, improved ); 879 880 return final_cast; 881 } 882 883 884 /* --------- Semi-accurate interpretation of CmpORD. --------- */ 885 886 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 887 888 CmpORD32S(x,y) = 1<<3 if x <s y 889 = 1<<2 if x >s y 890 = 1<<1 if x == y 891 892 and similarly the unsigned variant. The default interpretation is: 893 894 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 895 & (7<<1) 896 897 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 898 are zero and therefore defined (viz, zero). 899 900 Also deal with a special case better: 901 902 CmpORD32S(x,0) 903 904 Here, bit 3 (LT) of the result is a copy of the top bit of x and 905 will be defined even if the rest of x isn't. In which case we do: 906 907 CmpORD32S#(x,x#,0,{impliedly 0}#) 908 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 909 | (x# >>u 31) << 3 -- LT# = x#[31] 910 911 Analogous handling for CmpORD64{S,U}. 912 */ 913 static Bool isZeroU32 ( IRAtom* e ) 914 { 915 return 916 toBool( e->tag == Iex_Const 917 && e->Iex.Const.con->tag == Ico_U32 918 && e->Iex.Const.con->Ico.U32 == 0 ); 919 } 920 921 static Bool isZeroU64 ( IRAtom* e ) 922 { 923 return 924 toBool( e->tag == Iex_Const 925 && e->Iex.Const.con->tag == Ico_U64 926 && e->Iex.Const.con->Ico.U64 == 0 ); 927 } 928 929 static IRAtom* doCmpORD ( MCEnv* mce, 930 IROp cmp_op, 931 IRAtom* xxhash, IRAtom* yyhash, 932 IRAtom* xx, IRAtom* yy ) 933 { 934 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 935 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 936 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 937 IROp opAND = m64 ? Iop_And64 : Iop_And32; 938 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 939 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 940 IRType ty = m64 ? Ity_I64 : Ity_I32; 941 Int width = m64 ? 64 : 32; 942 943 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 944 945 IRAtom* threeLeft1 = NULL; 946 IRAtom* sevenLeft1 = NULL; 947 948 tl_assert(isShadowAtom(mce,xxhash)); 949 tl_assert(isShadowAtom(mce,yyhash)); 950 tl_assert(isOriginalAtom(mce,xx)); 951 tl_assert(isOriginalAtom(mce,yy)); 952 tl_assert(sameKindedAtoms(xxhash,xx)); 953 tl_assert(sameKindedAtoms(yyhash,yy)); 954 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 955 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 956 957 if (0) { 958 ppIROp(cmp_op); VG_(printf)(" "); 959 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 960 } 961 962 if (syned && isZero(yy)) { 963 /* fancy interpretation */ 964 /* if yy is zero, then it must be fully defined (zero#). */ 965 tl_assert(isZero(yyhash)); 966 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 967 return 968 binop( 969 opOR, 970 assignNew( 971 'V', mce,ty, 972 binop( 973 opAND, 974 mkPCastTo(mce,ty, xxhash), 975 threeLeft1 976 )), 977 assignNew( 978 'V', mce,ty, 979 binop( 980 opSHL, 981 assignNew( 982 'V', mce,ty, 983 binop(opSHR, xxhash, mkU8(width-1))), 984 mkU8(3) 985 )) 986 ); 987 } else { 988 /* standard interpretation */ 989 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 990 return 991 binop( 992 opAND, 993 mkPCastTo( mce,ty, 994 mkUifU(mce,ty, xxhash,yyhash)), 995 sevenLeft1 996 ); 997 } 998 } 999 1000 1001 /*------------------------------------------------------------*/ 1002 /*--- Emit a test and complaint if something is undefined. ---*/ 1003 /*------------------------------------------------------------*/ 1004 1005 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 1006 1007 1008 /* Set the annotations on a dirty helper to indicate that the stack 1009 pointer and instruction pointers might be read. This is the 1010 behaviour of all 'emit-a-complaint' style functions we might 1011 call. */ 1012 1013 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1014 di->nFxState = 2; 1015 di->fxState[0].fx = Ifx_Read; 1016 di->fxState[0].offset = mce->layout->offset_SP; 1017 di->fxState[0].size = mce->layout->sizeof_SP; 1018 di->fxState[1].fx = Ifx_Read; 1019 di->fxState[1].offset = mce->layout->offset_IP; 1020 di->fxState[1].size = mce->layout->sizeof_IP; 1021 } 1022 1023 1024 /* Check the supplied **original** atom for undefinedness, and emit a 1025 complaint if so. Once that happens, mark it as defined. This is 1026 possible because the atom is either a tmp or literal. If it's a 1027 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1028 be defined. In fact as mentioned above, we will have to allocate a 1029 new tmp to carry the new 'defined' shadow value, and update the 1030 original->tmp mapping accordingly; we cannot simply assign a new 1031 value to an existing shadow tmp as this breaks SSAness -- resulting 1032 in the post-instrumentation sanity checker spluttering in disapproval. 1033 */ 1034 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom ) 1035 { 1036 IRAtom* vatom; 1037 IRType ty; 1038 Int sz; 1039 IRDirty* di; 1040 IRAtom* cond; 1041 IRAtom* origin; 1042 void* fn; 1043 HChar* nm; 1044 IRExpr** args; 1045 Int nargs; 1046 1047 // Don't do V bit tests if we're not reporting undefined value errors. 1048 if (MC_(clo_mc_level) == 1) 1049 return; 1050 1051 /* Since the original expression is atomic, there's no duplicated 1052 work generated by making multiple V-expressions for it. So we 1053 don't really care about the possibility that someone else may 1054 also create a V-interpretion for it. */ 1055 tl_assert(isOriginalAtom(mce, atom)); 1056 vatom = expr2vbits( mce, atom ); 1057 tl_assert(isShadowAtom(mce, vatom)); 1058 tl_assert(sameKindedAtoms(atom, vatom)); 1059 1060 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1061 1062 /* sz is only used for constructing the error message */ 1063 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1064 1065 cond = mkPCastTo( mce, Ity_I1, vatom ); 1066 /* cond will be 0 if all defined, and 1 if any not defined. */ 1067 1068 /* Get the origin info for the value we are about to check. At 1069 least, if we are doing origin tracking. If not, use a dummy 1070 zero origin. */ 1071 if (MC_(clo_mc_level) == 3) { 1072 origin = schemeE( mce, atom ); 1073 if (mce->hWordTy == Ity_I64) { 1074 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1075 } 1076 } else { 1077 origin = NULL; 1078 } 1079 1080 fn = NULL; 1081 nm = NULL; 1082 args = NULL; 1083 nargs = -1; 1084 1085 switch (sz) { 1086 case 0: 1087 if (origin) { 1088 fn = &MC_(helperc_value_check0_fail_w_o); 1089 nm = "MC_(helperc_value_check0_fail_w_o)"; 1090 args = mkIRExprVec_1(origin); 1091 nargs = 1; 1092 } else { 1093 fn = &MC_(helperc_value_check0_fail_no_o); 1094 nm = "MC_(helperc_value_check0_fail_no_o)"; 1095 args = mkIRExprVec_0(); 1096 nargs = 0; 1097 } 1098 break; 1099 case 1: 1100 if (origin) { 1101 fn = &MC_(helperc_value_check1_fail_w_o); 1102 nm = "MC_(helperc_value_check1_fail_w_o)"; 1103 args = mkIRExprVec_1(origin); 1104 nargs = 1; 1105 } else { 1106 fn = &MC_(helperc_value_check1_fail_no_o); 1107 nm = "MC_(helperc_value_check1_fail_no_o)"; 1108 args = mkIRExprVec_0(); 1109 nargs = 0; 1110 } 1111 break; 1112 case 4: 1113 if (origin) { 1114 fn = &MC_(helperc_value_check4_fail_w_o); 1115 nm = "MC_(helperc_value_check4_fail_w_o)"; 1116 args = mkIRExprVec_1(origin); 1117 nargs = 1; 1118 } else { 1119 fn = &MC_(helperc_value_check4_fail_no_o); 1120 nm = "MC_(helperc_value_check4_fail_no_o)"; 1121 args = mkIRExprVec_0(); 1122 nargs = 0; 1123 } 1124 break; 1125 case 8: 1126 if (origin) { 1127 fn = &MC_(helperc_value_check8_fail_w_o); 1128 nm = "MC_(helperc_value_check8_fail_w_o)"; 1129 args = mkIRExprVec_1(origin); 1130 nargs = 1; 1131 } else { 1132 fn = &MC_(helperc_value_check8_fail_no_o); 1133 nm = "MC_(helperc_value_check8_fail_no_o)"; 1134 args = mkIRExprVec_0(); 1135 nargs = 0; 1136 } 1137 break; 1138 case 2: 1139 case 16: 1140 if (origin) { 1141 fn = &MC_(helperc_value_checkN_fail_w_o); 1142 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1143 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1144 nargs = 2; 1145 } else { 1146 fn = &MC_(helperc_value_checkN_fail_no_o); 1147 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1148 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1149 nargs = 1; 1150 } 1151 break; 1152 default: 1153 VG_(tool_panic)("unexpected szB"); 1154 } 1155 1156 tl_assert(fn); 1157 tl_assert(nm); 1158 tl_assert(args); 1159 tl_assert(nargs >= 0 && nargs <= 2); 1160 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1161 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1162 1163 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1164 VG_(fnptr_to_fnentry)( fn ), args ); 1165 di->guard = cond; 1166 setHelperAnns( mce, di ); 1167 stmt( 'V', mce, IRStmt_Dirty(di)); 1168 1169 /* Set the shadow tmp to be defined. First, update the 1170 orig->shadow tmp mapping to reflect the fact that this shadow is 1171 getting a new value. */ 1172 tl_assert(isIRAtom(vatom)); 1173 /* sameKindedAtoms ... */ 1174 if (vatom->tag == Iex_RdTmp) { 1175 tl_assert(atom->tag == Iex_RdTmp); 1176 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1177 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1178 definedOfType(ty)); 1179 } 1180 } 1181 1182 1183 /*------------------------------------------------------------*/ 1184 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1185 /*------------------------------------------------------------*/ 1186 1187 /* Examine the always-defined sections declared in layout to see if 1188 the (offset,size) section is within one. Note, is is an error to 1189 partially fall into such a region: (offset,size) should either be 1190 completely in such a region or completely not-in such a region. 1191 */ 1192 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1193 { 1194 Int minoffD, maxoffD, i; 1195 Int minoff = offset; 1196 Int maxoff = minoff + size - 1; 1197 tl_assert((minoff & ~0xFFFF) == 0); 1198 tl_assert((maxoff & ~0xFFFF) == 0); 1199 1200 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1201 minoffD = mce->layout->alwaysDefd[i].offset; 1202 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1203 tl_assert((minoffD & ~0xFFFF) == 0); 1204 tl_assert((maxoffD & ~0xFFFF) == 0); 1205 1206 if (maxoff < minoffD || maxoffD < minoff) 1207 continue; /* no overlap */ 1208 if (minoff >= minoffD && maxoff <= maxoffD) 1209 return True; /* completely contained in an always-defd section */ 1210 1211 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1212 } 1213 return False; /* could not find any containing section */ 1214 } 1215 1216 1217 /* Generate into bb suitable actions to shadow this Put. If the state 1218 slice is marked 'always defined', do nothing. Otherwise, write the 1219 supplied V bits to the shadow state. We can pass in either an 1220 original atom or a V-atom, but not both. In the former case the 1221 relevant V-bits are then generated from the original. 1222 */ 1223 static 1224 void do_shadow_PUT ( MCEnv* mce, Int offset, 1225 IRAtom* atom, IRAtom* vatom ) 1226 { 1227 IRType ty; 1228 1229 // Don't do shadow PUTs if we're not doing undefined value checking. 1230 // Their absence lets Vex's optimiser remove all the shadow computation 1231 // that they depend on, which includes GETs of the shadow registers. 1232 if (MC_(clo_mc_level) == 1) 1233 return; 1234 1235 if (atom) { 1236 tl_assert(!vatom); 1237 tl_assert(isOriginalAtom(mce, atom)); 1238 vatom = expr2vbits( mce, atom ); 1239 } else { 1240 tl_assert(vatom); 1241 tl_assert(isShadowAtom(mce, vatom)); 1242 } 1243 1244 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1245 tl_assert(ty != Ity_I1); 1246 tl_assert(ty != Ity_I128); 1247 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1248 /* later: no ... */ 1249 /* emit code to emit a complaint if any of the vbits are 1. */ 1250 /* complainIfUndefined(mce, atom); */ 1251 } else { 1252 /* Do a plain shadow Put. */ 1253 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) ); 1254 } 1255 } 1256 1257 1258 /* Return an expression which contains the V bits corresponding to the 1259 given GETI (passed in in pieces). 1260 */ 1261 static 1262 void do_shadow_PUTI ( MCEnv* mce, 1263 IRRegArray* descr, 1264 IRAtom* ix, Int bias, IRAtom* atom ) 1265 { 1266 IRAtom* vatom; 1267 IRType ty, tyS; 1268 Int arrSize;; 1269 1270 // Don't do shadow PUTIs if we're not doing undefined value checking. 1271 // Their absence lets Vex's optimiser remove all the shadow computation 1272 // that they depend on, which includes GETIs of the shadow registers. 1273 if (MC_(clo_mc_level) == 1) 1274 return; 1275 1276 tl_assert(isOriginalAtom(mce,atom)); 1277 vatom = expr2vbits( mce, atom ); 1278 tl_assert(sameKindedAtoms(atom, vatom)); 1279 ty = descr->elemTy; 1280 tyS = shadowTypeV(ty); 1281 arrSize = descr->nElems * sizeofIRType(ty); 1282 tl_assert(ty != Ity_I1); 1283 tl_assert(isOriginalAtom(mce,ix)); 1284 complainIfUndefined(mce,ix); 1285 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1286 /* later: no ... */ 1287 /* emit code to emit a complaint if any of the vbits are 1. */ 1288 /* complainIfUndefined(mce, atom); */ 1289 } else { 1290 /* Do a cloned version of the Put that refers to the shadow 1291 area. */ 1292 IRRegArray* new_descr 1293 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1294 tyS, descr->nElems); 1295 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom )); 1296 } 1297 } 1298 1299 1300 /* Return an expression which contains the V bits corresponding to the 1301 given GET (passed in in pieces). 1302 */ 1303 static 1304 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1305 { 1306 IRType tyS = shadowTypeV(ty); 1307 tl_assert(ty != Ity_I1); 1308 tl_assert(ty != Ity_I128); 1309 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1310 /* Always defined, return all zeroes of the relevant type */ 1311 return definedOfType(tyS); 1312 } else { 1313 /* return a cloned version of the Get that refers to the shadow 1314 area. */ 1315 /* FIXME: this isn't an atom! */ 1316 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1317 } 1318 } 1319 1320 1321 /* Return an expression which contains the V bits corresponding to the 1322 given GETI (passed in in pieces). 1323 */ 1324 static 1325 IRExpr* shadow_GETI ( MCEnv* mce, 1326 IRRegArray* descr, IRAtom* ix, Int bias ) 1327 { 1328 IRType ty = descr->elemTy; 1329 IRType tyS = shadowTypeV(ty); 1330 Int arrSize = descr->nElems * sizeofIRType(ty); 1331 tl_assert(ty != Ity_I1); 1332 tl_assert(isOriginalAtom(mce,ix)); 1333 complainIfUndefined(mce,ix); 1334 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1335 /* Always defined, return all zeroes of the relevant type */ 1336 return definedOfType(tyS); 1337 } else { 1338 /* return a cloned version of the Get that refers to the shadow 1339 area. */ 1340 IRRegArray* new_descr 1341 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1342 tyS, descr->nElems); 1343 return IRExpr_GetI( new_descr, ix, bias ); 1344 } 1345 } 1346 1347 1348 /*------------------------------------------------------------*/ 1349 /*--- Generating approximations for unknown operations, ---*/ 1350 /*--- using lazy-propagate semantics ---*/ 1351 /*------------------------------------------------------------*/ 1352 1353 /* Lazy propagation of undefinedness from two values, resulting in the 1354 specified shadow type. 1355 */ 1356 static 1357 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1358 { 1359 IRAtom* at; 1360 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1361 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1362 tl_assert(isShadowAtom(mce,va1)); 1363 tl_assert(isShadowAtom(mce,va2)); 1364 1365 /* The general case is inefficient because PCast is an expensive 1366 operation. Here are some special cases which use PCast only 1367 once rather than twice. */ 1368 1369 /* I64 x I64 -> I64 */ 1370 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1371 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1372 at = mkUifU(mce, Ity_I64, va1, va2); 1373 at = mkPCastTo(mce, Ity_I64, at); 1374 return at; 1375 } 1376 1377 /* I64 x I64 -> I32 */ 1378 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1379 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1380 at = mkUifU(mce, Ity_I64, va1, va2); 1381 at = mkPCastTo(mce, Ity_I32, at); 1382 return at; 1383 } 1384 1385 if (0) { 1386 VG_(printf)("mkLazy2 "); 1387 ppIRType(t1); 1388 VG_(printf)("_"); 1389 ppIRType(t2); 1390 VG_(printf)("_"); 1391 ppIRType(finalVty); 1392 VG_(printf)("\n"); 1393 } 1394 1395 /* General case: force everything via 32-bit intermediaries. */ 1396 at = mkPCastTo(mce, Ity_I32, va1); 1397 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1398 at = mkPCastTo(mce, finalVty, at); 1399 return at; 1400 } 1401 1402 1403 /* 3-arg version of the above. */ 1404 static 1405 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1406 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1407 { 1408 IRAtom* at; 1409 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1410 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1411 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1412 tl_assert(isShadowAtom(mce,va1)); 1413 tl_assert(isShadowAtom(mce,va2)); 1414 tl_assert(isShadowAtom(mce,va3)); 1415 1416 /* The general case is inefficient because PCast is an expensive 1417 operation. Here are some special cases which use PCast only 1418 twice rather than three times. */ 1419 1420 /* I32 x I64 x I64 -> I64 */ 1421 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1422 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1423 && finalVty == Ity_I64) { 1424 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1425 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1426 mode indication which is fully defined, this should get 1427 folded out later. */ 1428 at = mkPCastTo(mce, Ity_I64, va1); 1429 /* Now fold in 2nd and 3rd args. */ 1430 at = mkUifU(mce, Ity_I64, at, va2); 1431 at = mkUifU(mce, Ity_I64, at, va3); 1432 /* and PCast once again. */ 1433 at = mkPCastTo(mce, Ity_I64, at); 1434 return at; 1435 } 1436 1437 /* I32 x I64 x I64 -> I32 */ 1438 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1439 && finalVty == Ity_I32) { 1440 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1441 at = mkPCastTo(mce, Ity_I64, va1); 1442 at = mkUifU(mce, Ity_I64, at, va2); 1443 at = mkUifU(mce, Ity_I64, at, va3); 1444 at = mkPCastTo(mce, Ity_I32, at); 1445 return at; 1446 } 1447 1448 /* I32 x I32 x I32 -> I32 */ 1449 /* 32-bit FP idiom, as (eg) happens on ARM */ 1450 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1451 && finalVty == Ity_I32) { 1452 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1453 at = va1; 1454 at = mkUifU(mce, Ity_I32, at, va2); 1455 at = mkUifU(mce, Ity_I32, at, va3); 1456 at = mkPCastTo(mce, Ity_I32, at); 1457 return at; 1458 } 1459 1460 /* I32 x I128 x I128 -> I128 */ 1461 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1462 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 1463 && finalVty == Ity_I128) { 1464 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n"); 1465 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1466 mode indication which is fully defined, this should get 1467 folded out later. */ 1468 at = mkPCastTo(mce, Ity_I128, va1); 1469 /* Now fold in 2nd and 3rd args. */ 1470 at = mkUifU(mce, Ity_I128, at, va2); 1471 at = mkUifU(mce, Ity_I128, at, va3); 1472 /* and PCast once again. */ 1473 at = mkPCastTo(mce, Ity_I128, at); 1474 return at; 1475 } 1476 if (1) { 1477 VG_(printf)("mkLazy3: "); 1478 ppIRType(t1); 1479 VG_(printf)(" x "); 1480 ppIRType(t2); 1481 VG_(printf)(" x "); 1482 ppIRType(t3); 1483 VG_(printf)(" -> "); 1484 ppIRType(finalVty); 1485 VG_(printf)("\n"); 1486 } 1487 1488 tl_assert(0); 1489 /* General case: force everything via 32-bit intermediaries. */ 1490 /* 1491 at = mkPCastTo(mce, Ity_I32, va1); 1492 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1493 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1494 at = mkPCastTo(mce, finalVty, at); 1495 return at; 1496 */ 1497 } 1498 1499 1500 /* 4-arg version of the above. */ 1501 static 1502 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1503 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1504 { 1505 IRAtom* at; 1506 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1507 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1508 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1509 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1510 tl_assert(isShadowAtom(mce,va1)); 1511 tl_assert(isShadowAtom(mce,va2)); 1512 tl_assert(isShadowAtom(mce,va3)); 1513 tl_assert(isShadowAtom(mce,va4)); 1514 1515 /* The general case is inefficient because PCast is an expensive 1516 operation. Here are some special cases which use PCast only 1517 twice rather than three times. */ 1518 1519 /* I32 x I64 x I64 x I64 -> I64 */ 1520 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1521 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1522 && finalVty == Ity_I64) { 1523 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1524 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1525 mode indication which is fully defined, this should get 1526 folded out later. */ 1527 at = mkPCastTo(mce, Ity_I64, va1); 1528 /* Now fold in 2nd, 3rd, 4th args. */ 1529 at = mkUifU(mce, Ity_I64, at, va2); 1530 at = mkUifU(mce, Ity_I64, at, va3); 1531 at = mkUifU(mce, Ity_I64, at, va4); 1532 /* and PCast once again. */ 1533 at = mkPCastTo(mce, Ity_I64, at); 1534 return at; 1535 } 1536 /* I32 x I32 x I32 x I32 -> I32 */ 1537 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1538 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32 1539 && finalVty == Ity_I32) { 1540 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n"); 1541 at = va1; 1542 /* Now fold in 2nd, 3rd, 4th args. */ 1543 at = mkUifU(mce, Ity_I32, at, va2); 1544 at = mkUifU(mce, Ity_I32, at, va3); 1545 at = mkUifU(mce, Ity_I32, at, va4); 1546 at = mkPCastTo(mce, Ity_I32, at); 1547 return at; 1548 } 1549 1550 if (1) { 1551 VG_(printf)("mkLazy4: "); 1552 ppIRType(t1); 1553 VG_(printf)(" x "); 1554 ppIRType(t2); 1555 VG_(printf)(" x "); 1556 ppIRType(t3); 1557 VG_(printf)(" x "); 1558 ppIRType(t4); 1559 VG_(printf)(" -> "); 1560 ppIRType(finalVty); 1561 VG_(printf)("\n"); 1562 } 1563 1564 tl_assert(0); 1565 } 1566 1567 1568 /* Do the lazy propagation game from a null-terminated vector of 1569 atoms. This is presumably the arguments to a helper call, so the 1570 IRCallee info is also supplied in order that we can know which 1571 arguments should be ignored (via the .mcx_mask field). 1572 */ 1573 static 1574 IRAtom* mkLazyN ( MCEnv* mce, 1575 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1576 { 1577 Int i; 1578 IRAtom* here; 1579 IRAtom* curr; 1580 IRType mergeTy; 1581 Bool mergeTy64 = True; 1582 1583 /* Decide on the type of the merge intermediary. If all relevant 1584 args are I64, then it's I64. In all other circumstances, use 1585 I32. */ 1586 for (i = 0; exprvec[i]; i++) { 1587 tl_assert(i < 32); 1588 tl_assert(isOriginalAtom(mce, exprvec[i])); 1589 if (cee->mcx_mask & (1<<i)) 1590 continue; 1591 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1592 mergeTy64 = False; 1593 } 1594 1595 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1596 curr = definedOfType(mergeTy); 1597 1598 for (i = 0; exprvec[i]; i++) { 1599 tl_assert(i < 32); 1600 tl_assert(isOriginalAtom(mce, exprvec[i])); 1601 /* Only take notice of this arg if the callee's mc-exclusion 1602 mask does not say it is to be excluded. */ 1603 if (cee->mcx_mask & (1<<i)) { 1604 /* the arg is to be excluded from definedness checking. Do 1605 nothing. */ 1606 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1607 } else { 1608 /* calculate the arg's definedness, and pessimistically merge 1609 it in. */ 1610 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1611 curr = mergeTy64 1612 ? mkUifU64(mce, here, curr) 1613 : mkUifU32(mce, here, curr); 1614 } 1615 } 1616 return mkPCastTo(mce, finalVtype, curr ); 1617 } 1618 1619 1620 /*------------------------------------------------------------*/ 1621 /*--- Generating expensive sequences for exact carry-chain ---*/ 1622 /*--- propagation in add/sub and related operations. ---*/ 1623 /*------------------------------------------------------------*/ 1624 1625 static 1626 IRAtom* expensiveAddSub ( MCEnv* mce, 1627 Bool add, 1628 IRType ty, 1629 IRAtom* qaa, IRAtom* qbb, 1630 IRAtom* aa, IRAtom* bb ) 1631 { 1632 IRAtom *a_min, *b_min, *a_max, *b_max; 1633 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1634 1635 tl_assert(isShadowAtom(mce,qaa)); 1636 tl_assert(isShadowAtom(mce,qbb)); 1637 tl_assert(isOriginalAtom(mce,aa)); 1638 tl_assert(isOriginalAtom(mce,bb)); 1639 tl_assert(sameKindedAtoms(qaa,aa)); 1640 tl_assert(sameKindedAtoms(qbb,bb)); 1641 1642 switch (ty) { 1643 case Ity_I32: 1644 opAND = Iop_And32; 1645 opOR = Iop_Or32; 1646 opXOR = Iop_Xor32; 1647 opNOT = Iop_Not32; 1648 opADD = Iop_Add32; 1649 opSUB = Iop_Sub32; 1650 break; 1651 case Ity_I64: 1652 opAND = Iop_And64; 1653 opOR = Iop_Or64; 1654 opXOR = Iop_Xor64; 1655 opNOT = Iop_Not64; 1656 opADD = Iop_Add64; 1657 opSUB = Iop_Sub64; 1658 break; 1659 default: 1660 VG_(tool_panic)("expensiveAddSub"); 1661 } 1662 1663 // a_min = aa & ~qaa 1664 a_min = assignNew('V', mce,ty, 1665 binop(opAND, aa, 1666 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1667 1668 // b_min = bb & ~qbb 1669 b_min = assignNew('V', mce,ty, 1670 binop(opAND, bb, 1671 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1672 1673 // a_max = aa | qaa 1674 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1675 1676 // b_max = bb | qbb 1677 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1678 1679 if (add) { 1680 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1681 return 1682 assignNew('V', mce,ty, 1683 binop( opOR, 1684 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1685 assignNew('V', mce,ty, 1686 binop( opXOR, 1687 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1688 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1689 ) 1690 ) 1691 ) 1692 ); 1693 } else { 1694 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1695 return 1696 assignNew('V', mce,ty, 1697 binop( opOR, 1698 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1699 assignNew('V', mce,ty, 1700 binop( opXOR, 1701 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1702 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1703 ) 1704 ) 1705 ) 1706 ); 1707 } 1708 1709 } 1710 1711 1712 /*------------------------------------------------------------*/ 1713 /*--- Scalar shifts. ---*/ 1714 /*------------------------------------------------------------*/ 1715 1716 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 1717 idea is to shift the definedness bits by the original shift amount. 1718 This introduces 0s ("defined") in new positions for left shifts and 1719 unsigned right shifts, and copies the top definedness bit for 1720 signed right shifts. So, conveniently, applying the original shift 1721 operator to the definedness bits for the left arg is exactly the 1722 right thing to do: 1723 1724 (qaa << bb) 1725 1726 However if the shift amount is undefined then the whole result 1727 is undefined. Hence need: 1728 1729 (qaa << bb) `UifU` PCast(qbb) 1730 1731 If the shift amount bb is a literal than qbb will say 'all defined' 1732 and the UifU and PCast will get folded out by post-instrumentation 1733 optimisation. 1734 */ 1735 static IRAtom* scalarShift ( MCEnv* mce, 1736 IRType ty, 1737 IROp original_op, 1738 IRAtom* qaa, IRAtom* qbb, 1739 IRAtom* aa, IRAtom* bb ) 1740 { 1741 tl_assert(isShadowAtom(mce,qaa)); 1742 tl_assert(isShadowAtom(mce,qbb)); 1743 tl_assert(isOriginalAtom(mce,aa)); 1744 tl_assert(isOriginalAtom(mce,bb)); 1745 tl_assert(sameKindedAtoms(qaa,aa)); 1746 tl_assert(sameKindedAtoms(qbb,bb)); 1747 return 1748 assignNew( 1749 'V', mce, ty, 1750 mkUifU( mce, ty, 1751 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 1752 mkPCastTo(mce, ty, qbb) 1753 ) 1754 ); 1755 } 1756 1757 1758 /*------------------------------------------------------------*/ 1759 /*--- Helpers for dealing with vector primops. ---*/ 1760 /*------------------------------------------------------------*/ 1761 1762 /* Vector pessimisation -- pessimise within each lane individually. */ 1763 1764 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 1765 { 1766 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 1767 } 1768 1769 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 1770 { 1771 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 1772 } 1773 1774 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 1775 { 1776 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 1777 } 1778 1779 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 1780 { 1781 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 1782 } 1783 1784 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 1785 { 1786 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 1787 } 1788 1789 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 1790 { 1791 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 1792 } 1793 1794 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 1795 { 1796 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 1797 } 1798 1799 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 1800 { 1801 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 1802 } 1803 1804 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 1805 { 1806 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 1807 } 1808 1809 1810 /* Here's a simple scheme capable of handling ops derived from SSE1 1811 code and while only generating ops that can be efficiently 1812 implemented in SSE1. */ 1813 1814 /* All-lanes versions are straightforward: 1815 1816 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 1817 1818 unary32Fx4(x,y) ==> PCast32x4(x#) 1819 1820 Lowest-lane-only versions are more complex: 1821 1822 binary32F0x4(x,y) ==> SetV128lo32( 1823 x#, 1824 PCast32(V128to32(UifUV128(x#,y#))) 1825 ) 1826 1827 This is perhaps not so obvious. In particular, it's faster to 1828 do a V128-bit UifU and then take the bottom 32 bits than the more 1829 obvious scheme of taking the bottom 32 bits of each operand 1830 and doing a 32-bit UifU. Basically since UifU is fast and 1831 chopping lanes off vector values is slow. 1832 1833 Finally: 1834 1835 unary32F0x4(x) ==> SetV128lo32( 1836 x#, 1837 PCast32(V128to32(x#)) 1838 ) 1839 1840 Where: 1841 1842 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 1843 PCast32x4(v#) = CmpNEZ32x4(v#) 1844 */ 1845 1846 static 1847 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1848 { 1849 IRAtom* at; 1850 tl_assert(isShadowAtom(mce, vatomX)); 1851 tl_assert(isShadowAtom(mce, vatomY)); 1852 at = mkUifUV128(mce, vatomX, vatomY); 1853 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 1854 return at; 1855 } 1856 1857 static 1858 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 1859 { 1860 IRAtom* at; 1861 tl_assert(isShadowAtom(mce, vatomX)); 1862 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 1863 return at; 1864 } 1865 1866 static 1867 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1868 { 1869 IRAtom* at; 1870 tl_assert(isShadowAtom(mce, vatomX)); 1871 tl_assert(isShadowAtom(mce, vatomY)); 1872 at = mkUifUV128(mce, vatomX, vatomY); 1873 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 1874 at = mkPCastTo(mce, Ity_I32, at); 1875 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1876 return at; 1877 } 1878 1879 static 1880 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 1881 { 1882 IRAtom* at; 1883 tl_assert(isShadowAtom(mce, vatomX)); 1884 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 1885 at = mkPCastTo(mce, Ity_I32, at); 1886 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1887 return at; 1888 } 1889 1890 /* --- ... and ... 64Fx2 versions of the same ... --- */ 1891 1892 static 1893 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1894 { 1895 IRAtom* at; 1896 tl_assert(isShadowAtom(mce, vatomX)); 1897 tl_assert(isShadowAtom(mce, vatomY)); 1898 at = mkUifUV128(mce, vatomX, vatomY); 1899 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 1900 return at; 1901 } 1902 1903 static 1904 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1905 { 1906 IRAtom* at; 1907 tl_assert(isShadowAtom(mce, vatomX)); 1908 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 1909 return at; 1910 } 1911 1912 static 1913 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1914 { 1915 IRAtom* at; 1916 tl_assert(isShadowAtom(mce, vatomX)); 1917 tl_assert(isShadowAtom(mce, vatomY)); 1918 at = mkUifUV128(mce, vatomX, vatomY); 1919 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 1920 at = mkPCastTo(mce, Ity_I64, at); 1921 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1922 return at; 1923 } 1924 1925 static 1926 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 1927 { 1928 IRAtom* at; 1929 tl_assert(isShadowAtom(mce, vatomX)); 1930 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 1931 at = mkPCastTo(mce, Ity_I64, at); 1932 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1933 return at; 1934 } 1935 1936 /* --- --- ... and ... 32Fx2 versions of the same --- --- */ 1937 1938 static 1939 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1940 { 1941 IRAtom* at; 1942 tl_assert(isShadowAtom(mce, vatomX)); 1943 tl_assert(isShadowAtom(mce, vatomY)); 1944 at = mkUifU64(mce, vatomX, vatomY); 1945 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 1946 return at; 1947 } 1948 1949 static 1950 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1951 { 1952 IRAtom* at; 1953 tl_assert(isShadowAtom(mce, vatomX)); 1954 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 1955 return at; 1956 } 1957 1958 /* --- --- Vector saturated narrowing --- --- */ 1959 1960 /* We used to do something very clever here, but on closer inspection 1961 (2011-Jun-15), and in particular bug #279698, it turns out to be 1962 wrong. Part of the problem came from the fact that for a long 1963 time, the IR primops to do with saturated narrowing were 1964 underspecified and managed to confuse multiple cases which needed 1965 to be separate: the op names had a signedness qualifier, but in 1966 fact the source and destination signednesses needed to be specified 1967 independently, so the op names really need two independent 1968 signedness specifiers. 1969 1970 As of 2011-Jun-15 (ish) the underspecification was sorted out 1971 properly. The incorrect instrumentation remained, though. That 1972 has now (2011-Oct-22) been fixed. 1973 1974 What we now do is simple: 1975 1976 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a 1977 number of lanes, X is the source lane width and signedness, and Y 1978 is the destination lane width and signedness. In all cases the 1979 destination lane width is half the source lane width, so the names 1980 have a bit of redundancy, but are at least easy to read. 1981 1982 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s 1983 to unsigned 16s. 1984 1985 Let Vanilla(OP) be a function that takes OP, one of these 1986 saturating narrowing ops, and produces the same "shaped" narrowing 1987 op which is not saturating, but merely dumps the most significant 1988 bits. "same shape" means that the lane numbers and widths are the 1989 same as with OP. 1990 1991 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8) 1992 = Iop_NarrowBin32to16x8, 1993 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by 1994 dumping the top half of each lane. 1995 1996 So, with that in place, the scheme is simple, and it is simple to 1997 pessimise each lane individually and then apply Vanilla(OP) so as 1998 to get the result in the right "shape". If the original OP is 1999 QNarrowBinXtoYxZ then we produce 2000 2001 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) ) 2002 2003 or for the case when OP is unary (Iop_QNarrowUn*) 2004 2005 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) ) 2006 */ 2007 static 2008 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp ) 2009 { 2010 switch (qnarrowOp) { 2011 /* Binary: (128, 128) -> 128 */ 2012 case Iop_QNarrowBin16Sto8Ux16: 2013 case Iop_QNarrowBin16Sto8Sx16: 2014 case Iop_QNarrowBin16Uto8Ux16: 2015 return Iop_NarrowBin16to8x16; 2016 case Iop_QNarrowBin32Sto16Ux8: 2017 case Iop_QNarrowBin32Sto16Sx8: 2018 case Iop_QNarrowBin32Uto16Ux8: 2019 return Iop_NarrowBin32to16x8; 2020 /* Binary: (64, 64) -> 64 */ 2021 case Iop_QNarrowBin32Sto16Sx4: 2022 return Iop_NarrowBin32to16x4; 2023 case Iop_QNarrowBin16Sto8Ux8: 2024 case Iop_QNarrowBin16Sto8Sx8: 2025 return Iop_NarrowBin16to8x8; 2026 /* Unary: 128 -> 64 */ 2027 case Iop_QNarrowUn64Uto32Ux2: 2028 case Iop_QNarrowUn64Sto32Sx2: 2029 case Iop_QNarrowUn64Sto32Ux2: 2030 return Iop_NarrowUn64to32x2; 2031 case Iop_QNarrowUn32Uto16Ux4: 2032 case Iop_QNarrowUn32Sto16Sx4: 2033 case Iop_QNarrowUn32Sto16Ux4: 2034 return Iop_NarrowUn32to16x4; 2035 case Iop_QNarrowUn16Uto8Ux8: 2036 case Iop_QNarrowUn16Sto8Sx8: 2037 case Iop_QNarrowUn16Sto8Ux8: 2038 return Iop_NarrowUn16to8x8; 2039 default: 2040 ppIROp(qnarrowOp); 2041 VG_(tool_panic)("vanillaNarrowOpOfShape"); 2042 } 2043 } 2044 2045 static 2046 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op, 2047 IRAtom* vatom1, IRAtom* vatom2) 2048 { 2049 IRAtom *at1, *at2, *at3; 2050 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2051 switch (narrow_op) { 2052 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 2053 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break; 2054 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break; 2055 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 2056 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break; 2057 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 2058 default: VG_(tool_panic)("vectorNarrowBinV128"); 2059 } 2060 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2061 tl_assert(isShadowAtom(mce,vatom1)); 2062 tl_assert(isShadowAtom(mce,vatom2)); 2063 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2064 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 2065 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2)); 2066 return at3; 2067 } 2068 2069 static 2070 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op, 2071 IRAtom* vatom1, IRAtom* vatom2) 2072 { 2073 IRAtom *at1, *at2, *at3; 2074 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2075 switch (narrow_op) { 2076 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break; 2077 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break; 2078 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break; 2079 default: VG_(tool_panic)("vectorNarrowBin64"); 2080 } 2081 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2082 tl_assert(isShadowAtom(mce,vatom1)); 2083 tl_assert(isShadowAtom(mce,vatom2)); 2084 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 2085 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 2086 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2)); 2087 return at3; 2088 } 2089 2090 static 2091 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op, 2092 IRAtom* vatom1) 2093 { 2094 IRAtom *at1, *at2; 2095 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2096 tl_assert(isShadowAtom(mce,vatom1)); 2097 /* For vanilla narrowing (non-saturating), we can just apply 2098 the op directly to the V bits. */ 2099 switch (narrow_op) { 2100 case Iop_NarrowUn16to8x8: 2101 case Iop_NarrowUn32to16x4: 2102 case Iop_NarrowUn64to32x2: 2103 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1)); 2104 return at1; 2105 default: 2106 break; /* Do Plan B */ 2107 } 2108 /* Plan B: for ops that involve a saturation operation on the args, 2109 we must PCast before the vanilla narrow. */ 2110 switch (narrow_op) { 2111 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break; 2112 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break; 2113 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break; 2114 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break; 2115 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break; 2116 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break; 2117 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break; 2118 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break; 2119 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break; 2120 default: VG_(tool_panic)("vectorNarrowUnV128"); 2121 } 2122 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2123 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2124 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1)); 2125 return at2; 2126 } 2127 2128 static 2129 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op, 2130 IRAtom* vatom1) 2131 { 2132 IRAtom *at1, *at2; 2133 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2134 switch (longen_op) { 2135 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break; 2136 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break; 2137 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break; 2138 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break; 2139 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break; 2140 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break; 2141 default: VG_(tool_panic)("vectorWidenI64"); 2142 } 2143 tl_assert(isShadowAtom(mce,vatom1)); 2144 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 2145 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2146 return at2; 2147 } 2148 2149 2150 /* --- --- Vector integer arithmetic --- --- */ 2151 2152 /* Simple ... UifU the args and per-lane pessimise the results. */ 2153 2154 /* --- V128-bit versions --- */ 2155 2156 static 2157 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2158 { 2159 IRAtom* at; 2160 at = mkUifUV128(mce, vatom1, vatom2); 2161 at = mkPCast8x16(mce, at); 2162 return at; 2163 } 2164 2165 static 2166 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2167 { 2168 IRAtom* at; 2169 at = mkUifUV128(mce, vatom1, vatom2); 2170 at = mkPCast16x8(mce, at); 2171 return at; 2172 } 2173 2174 static 2175 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2176 { 2177 IRAtom* at; 2178 at = mkUifUV128(mce, vatom1, vatom2); 2179 at = mkPCast32x4(mce, at); 2180 return at; 2181 } 2182 2183 static 2184 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2185 { 2186 IRAtom* at; 2187 at = mkUifUV128(mce, vatom1, vatom2); 2188 at = mkPCast64x2(mce, at); 2189 return at; 2190 } 2191 2192 /* --- 64-bit versions --- */ 2193 2194 static 2195 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2196 { 2197 IRAtom* at; 2198 at = mkUifU64(mce, vatom1, vatom2); 2199 at = mkPCast8x8(mce, at); 2200 return at; 2201 } 2202 2203 static 2204 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2205 { 2206 IRAtom* at; 2207 at = mkUifU64(mce, vatom1, vatom2); 2208 at = mkPCast16x4(mce, at); 2209 return at; 2210 } 2211 2212 static 2213 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2214 { 2215 IRAtom* at; 2216 at = mkUifU64(mce, vatom1, vatom2); 2217 at = mkPCast32x2(mce, at); 2218 return at; 2219 } 2220 2221 static 2222 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2223 { 2224 IRAtom* at; 2225 at = mkUifU64(mce, vatom1, vatom2); 2226 at = mkPCastTo(mce, Ity_I64, at); 2227 return at; 2228 } 2229 2230 /* --- 32-bit versions --- */ 2231 2232 static 2233 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2234 { 2235 IRAtom* at; 2236 at = mkUifU32(mce, vatom1, vatom2); 2237 at = mkPCast8x4(mce, at); 2238 return at; 2239 } 2240 2241 static 2242 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2243 { 2244 IRAtom* at; 2245 at = mkUifU32(mce, vatom1, vatom2); 2246 at = mkPCast16x2(mce, at); 2247 return at; 2248 } 2249 2250 2251 /*------------------------------------------------------------*/ 2252 /*--- Generate shadow values from all kinds of IRExprs. ---*/ 2253 /*------------------------------------------------------------*/ 2254 2255 static 2256 IRAtom* expr2vbits_Qop ( MCEnv* mce, 2257 IROp op, 2258 IRAtom* atom1, IRAtom* atom2, 2259 IRAtom* atom3, IRAtom* atom4 ) 2260 { 2261 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2262 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2263 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2264 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2265 2266 tl_assert(isOriginalAtom(mce,atom1)); 2267 tl_assert(isOriginalAtom(mce,atom2)); 2268 tl_assert(isOriginalAtom(mce,atom3)); 2269 tl_assert(isOriginalAtom(mce,atom4)); 2270 tl_assert(isShadowAtom(mce,vatom1)); 2271 tl_assert(isShadowAtom(mce,vatom2)); 2272 tl_assert(isShadowAtom(mce,vatom3)); 2273 tl_assert(isShadowAtom(mce,vatom4)); 2274 tl_assert(sameKindedAtoms(atom1,vatom1)); 2275 tl_assert(sameKindedAtoms(atom2,vatom2)); 2276 tl_assert(sameKindedAtoms(atom3,vatom3)); 2277 tl_assert(sameKindedAtoms(atom4,vatom4)); 2278 switch (op) { 2279 case Iop_MAddF64: 2280 case Iop_MAddF64r32: 2281 case Iop_MSubF64: 2282 case Iop_MSubF64r32: 2283 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2284 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2285 2286 case Iop_MAddF32: 2287 case Iop_MSubF32: 2288 /* I32(rm) x F32 x F32 x F32 -> F32 */ 2289 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4); 2290 2291 default: 2292 ppIROp(op); 2293 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2294 } 2295 } 2296 2297 2298 static 2299 IRAtom* expr2vbits_Triop ( MCEnv* mce, 2300 IROp op, 2301 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2302 { 2303 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2304 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2305 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2306 2307 tl_assert(isOriginalAtom(mce,atom1)); 2308 tl_assert(isOriginalAtom(mce,atom2)); 2309 tl_assert(isOriginalAtom(mce,atom3)); 2310 tl_assert(isShadowAtom(mce,vatom1)); 2311 tl_assert(isShadowAtom(mce,vatom2)); 2312 tl_assert(isShadowAtom(mce,vatom3)); 2313 tl_assert(sameKindedAtoms(atom1,vatom1)); 2314 tl_assert(sameKindedAtoms(atom2,vatom2)); 2315 tl_assert(sameKindedAtoms(atom3,vatom3)); 2316 switch (op) { 2317 case Iop_AddF128: 2318 case Iop_SubF128: 2319 case Iop_MulF128: 2320 case Iop_DivF128: 2321 /* I32(rm) x F128 x F128 -> F128 */ 2322 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2323 case Iop_AddF64: 2324 case Iop_AddF64r32: 2325 case Iop_SubF64: 2326 case Iop_SubF64r32: 2327 case Iop_MulF64: 2328 case Iop_MulF64r32: 2329 case Iop_DivF64: 2330 case Iop_DivF64r32: 2331 case Iop_ScaleF64: 2332 case Iop_Yl2xF64: 2333 case Iop_Yl2xp1F64: 2334 case Iop_AtanF64: 2335 case Iop_PRemF64: 2336 case Iop_PRem1F64: 2337 /* I32(rm) x F64 x F64 -> F64 */ 2338 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2339 case Iop_PRemC3210F64: 2340 case Iop_PRem1C3210F64: 2341 /* I32(rm) x F64 x F64 -> I32 */ 2342 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2343 case Iop_AddF32: 2344 case Iop_SubF32: 2345 case Iop_MulF32: 2346 case Iop_DivF32: 2347 /* I32(rm) x F32 x F32 -> I32 */ 2348 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2349 case Iop_ExtractV128: 2350 complainIfUndefined(mce, atom3); 2351 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2352 case Iop_Extract64: 2353 complainIfUndefined(mce, atom3); 2354 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2355 case Iop_SetElem8x8: 2356 case Iop_SetElem16x4: 2357 case Iop_SetElem32x2: 2358 complainIfUndefined(mce, atom2); 2359 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2360 default: 2361 ppIROp(op); 2362 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2363 } 2364 } 2365 2366 2367 static 2368 IRAtom* expr2vbits_Binop ( MCEnv* mce, 2369 IROp op, 2370 IRAtom* atom1, IRAtom* atom2 ) 2371 { 2372 IRType and_or_ty; 2373 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2374 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2375 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2376 2377 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2378 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2379 2380 tl_assert(isOriginalAtom(mce,atom1)); 2381 tl_assert(isOriginalAtom(mce,atom2)); 2382 tl_assert(isShadowAtom(mce,vatom1)); 2383 tl_assert(isShadowAtom(mce,vatom2)); 2384 tl_assert(sameKindedAtoms(atom1,vatom1)); 2385 tl_assert(sameKindedAtoms(atom2,vatom2)); 2386 switch (op) { 2387 2388 /* 32-bit SIMD */ 2389 2390 case Iop_Add16x2: 2391 case Iop_HAdd16Ux2: 2392 case Iop_HAdd16Sx2: 2393 case Iop_Sub16x2: 2394 case Iop_HSub16Ux2: 2395 case Iop_HSub16Sx2: 2396 case Iop_QAdd16Sx2: 2397 case Iop_QSub16Sx2: 2398 return binary16Ix2(mce, vatom1, vatom2); 2399 2400 case Iop_Add8x4: 2401 case Iop_HAdd8Ux4: 2402 case Iop_HAdd8Sx4: 2403 case Iop_Sub8x4: 2404 case Iop_HSub8Ux4: 2405 case Iop_HSub8Sx4: 2406 case Iop_QSub8Ux4: 2407 case Iop_QAdd8Ux4: 2408 case Iop_QSub8Sx4: 2409 case Iop_QAdd8Sx4: 2410 return binary8Ix4(mce, vatom1, vatom2); 2411 2412 /* 64-bit SIMD */ 2413 2414 case Iop_ShrN8x8: 2415 case Iop_ShrN16x4: 2416 case Iop_ShrN32x2: 2417 case Iop_SarN8x8: 2418 case Iop_SarN16x4: 2419 case Iop_SarN32x2: 2420 case Iop_ShlN16x4: 2421 case Iop_ShlN32x2: 2422 case Iop_ShlN8x8: 2423 /* Same scheme as with all other shifts. */ 2424 complainIfUndefined(mce, atom2); 2425 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2426 2427 case Iop_QNarrowBin32Sto16Sx4: 2428 case Iop_QNarrowBin16Sto8Sx8: 2429 case Iop_QNarrowBin16Sto8Ux8: 2430 return vectorNarrowBin64(mce, op, vatom1, vatom2); 2431 2432 case Iop_Min8Ux8: 2433 case Iop_Min8Sx8: 2434 case Iop_Max8Ux8: 2435 case Iop_Max8Sx8: 2436 case Iop_Avg8Ux8: 2437 case Iop_QSub8Sx8: 2438 case Iop_QSub8Ux8: 2439 case Iop_Sub8x8: 2440 case Iop_CmpGT8Sx8: 2441 case Iop_CmpGT8Ux8: 2442 case Iop_CmpEQ8x8: 2443 case Iop_QAdd8Sx8: 2444 case Iop_QAdd8Ux8: 2445 case Iop_QSal8x8: 2446 case Iop_QShl8x8: 2447 case Iop_Add8x8: 2448 case Iop_Mul8x8: 2449 case Iop_PolynomialMul8x8: 2450 return binary8Ix8(mce, vatom1, vatom2); 2451 2452 case Iop_Min16Sx4: 2453 case Iop_Min16Ux4: 2454 case Iop_Max16Sx4: 2455 case Iop_Max16Ux4: 2456 case Iop_Avg16Ux4: 2457 case Iop_QSub16Ux4: 2458 case Iop_QSub16Sx4: 2459 case Iop_Sub16x4: 2460 case Iop_Mul16x4: 2461 case Iop_MulHi16Sx4: 2462 case Iop_MulHi16Ux4: 2463 case Iop_CmpGT16Sx4: 2464 case Iop_CmpGT16Ux4: 2465 case Iop_CmpEQ16x4: 2466 case Iop_QAdd16Sx4: 2467 case Iop_QAdd16Ux4: 2468 case Iop_QSal16x4: 2469 case Iop_QShl16x4: 2470 case Iop_Add16x4: 2471 case Iop_QDMulHi16Sx4: 2472 case Iop_QRDMulHi16Sx4: 2473 return binary16Ix4(mce, vatom1, vatom2); 2474 2475 case Iop_Sub32x2: 2476 case Iop_Mul32x2: 2477 case Iop_Max32Sx2: 2478 case Iop_Max32Ux2: 2479 case Iop_Min32Sx2: 2480 case Iop_Min32Ux2: 2481 case Iop_CmpGT32Sx2: 2482 case Iop_CmpGT32Ux2: 2483 case Iop_CmpEQ32x2: 2484 case Iop_Add32x2: 2485 case Iop_QAdd32Ux2: 2486 case Iop_QAdd32Sx2: 2487 case Iop_QSub32Ux2: 2488 case Iop_QSub32Sx2: 2489 case Iop_QSal32x2: 2490 case Iop_QShl32x2: 2491 case Iop_QDMulHi32Sx2: 2492 case Iop_QRDMulHi32Sx2: 2493 return binary32Ix2(mce, vatom1, vatom2); 2494 2495 case Iop_QSub64Ux1: 2496 case Iop_QSub64Sx1: 2497 case Iop_QAdd64Ux1: 2498 case Iop_QAdd64Sx1: 2499 case Iop_QSal64x1: 2500 case Iop_QShl64x1: 2501 case Iop_Sal64x1: 2502 return binary64Ix1(mce, vatom1, vatom2); 2503 2504 case Iop_QShlN8Sx8: 2505 case Iop_QShlN8x8: 2506 case Iop_QSalN8x8: 2507 complainIfUndefined(mce, atom2); 2508 return mkPCast8x8(mce, vatom1); 2509 2510 case Iop_QShlN16Sx4: 2511 case Iop_QShlN16x4: 2512 case Iop_QSalN16x4: 2513 complainIfUndefined(mce, atom2); 2514 return mkPCast16x4(mce, vatom1); 2515 2516 case Iop_QShlN32Sx2: 2517 case Iop_QShlN32x2: 2518 case Iop_QSalN32x2: 2519 complainIfUndefined(mce, atom2); 2520 return mkPCast32x2(mce, vatom1); 2521 2522 case Iop_QShlN64Sx1: 2523 case Iop_QShlN64x1: 2524 case Iop_QSalN64x1: 2525 complainIfUndefined(mce, atom2); 2526 return mkPCast32x2(mce, vatom1); 2527 2528 case Iop_PwMax32Sx2: 2529 case Iop_PwMax32Ux2: 2530 case Iop_PwMin32Sx2: 2531 case Iop_PwMin32Ux2: 2532 case Iop_PwMax32Fx2: 2533 case Iop_PwMin32Fx2: 2534 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1), 2535 mkPCast32x2(mce, vatom2))); 2536 2537 case Iop_PwMax16Sx4: 2538 case Iop_PwMax16Ux4: 2539 case Iop_PwMin16Sx4: 2540 case Iop_PwMin16Ux4: 2541 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1), 2542 mkPCast16x4(mce, vatom2))); 2543 2544 case Iop_PwMax8Sx8: 2545 case Iop_PwMax8Ux8: 2546 case Iop_PwMin8Sx8: 2547 case Iop_PwMin8Ux8: 2548 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1), 2549 mkPCast8x8(mce, vatom2))); 2550 2551 case Iop_PwAdd32x2: 2552 case Iop_PwAdd32Fx2: 2553 return mkPCast32x2(mce, 2554 assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1), 2555 mkPCast32x2(mce, vatom2)))); 2556 2557 case Iop_PwAdd16x4: 2558 return mkPCast16x4(mce, 2559 assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1), 2560 mkPCast16x4(mce, vatom2)))); 2561 2562 case Iop_PwAdd8x8: 2563 return mkPCast8x8(mce, 2564 assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1), 2565 mkPCast8x8(mce, vatom2)))); 2566 2567 case Iop_Shl8x8: 2568 case Iop_Shr8x8: 2569 case Iop_Sar8x8: 2570 case Iop_Sal8x8: 2571 return mkUifU64(mce, 2572 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2573 mkPCast8x8(mce,vatom2) 2574 ); 2575 2576 case Iop_Shl16x4: 2577 case Iop_Shr16x4: 2578 case Iop_Sar16x4: 2579 case Iop_Sal16x4: 2580 return mkUifU64(mce, 2581 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2582 mkPCast16x4(mce,vatom2) 2583 ); 2584 2585 case Iop_Shl32x2: 2586 case Iop_Shr32x2: 2587 case Iop_Sar32x2: 2588 case Iop_Sal32x2: 2589 return mkUifU64(mce, 2590 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2591 mkPCast32x2(mce,vatom2) 2592 ); 2593 2594 /* 64-bit data-steering */ 2595 case Iop_InterleaveLO32x2: 2596 case Iop_InterleaveLO16x4: 2597 case Iop_InterleaveLO8x8: 2598 case Iop_InterleaveHI32x2: 2599 case Iop_InterleaveHI16x4: 2600 case Iop_InterleaveHI8x8: 2601 case Iop_CatOddLanes8x8: 2602 case Iop_CatEvenLanes8x8: 2603 case Iop_CatOddLanes16x4: 2604 case Iop_CatEvenLanes16x4: 2605 case Iop_InterleaveOddLanes8x8: 2606 case Iop_InterleaveEvenLanes8x8: 2607 case Iop_InterleaveOddLanes16x4: 2608 case Iop_InterleaveEvenLanes16x4: 2609 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 2610 2611 case Iop_GetElem8x8: 2612 complainIfUndefined(mce, atom2); 2613 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 2614 case Iop_GetElem16x4: 2615 complainIfUndefined(mce, atom2); 2616 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 2617 case Iop_GetElem32x2: 2618 complainIfUndefined(mce, atom2); 2619 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 2620 2621 /* Perm8x8: rearrange values in left arg using steering values 2622 from right arg. So rearrange the vbits in the same way but 2623 pessimise wrt steering values. */ 2624 case Iop_Perm8x8: 2625 return mkUifU64( 2626 mce, 2627 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2628 mkPCast8x8(mce, vatom2) 2629 ); 2630 2631 /* V128-bit SIMD */ 2632 2633 case Iop_ShrN8x16: 2634 case Iop_ShrN16x8: 2635 case Iop_ShrN32x4: 2636 case Iop_ShrN64x2: 2637 case Iop_SarN8x16: 2638 case Iop_SarN16x8: 2639 case Iop_SarN32x4: 2640 case Iop_SarN64x2: 2641 case Iop_ShlN8x16: 2642 case Iop_ShlN16x8: 2643 case Iop_ShlN32x4: 2644 case Iop_ShlN64x2: 2645 /* Same scheme as with all other shifts. Note: 22 Oct 05: 2646 this is wrong now, scalar shifts are done properly lazily. 2647 Vector shifts should be fixed too. */ 2648 complainIfUndefined(mce, atom2); 2649 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 2650 2651 /* V x V shifts/rotates are done using the standard lazy scheme. */ 2652 case Iop_Shl8x16: 2653 case Iop_Shr8x16: 2654 case Iop_Sar8x16: 2655 case Iop_Sal8x16: 2656 case Iop_Rol8x16: 2657 return mkUifUV128(mce, 2658 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2659 mkPCast8x16(mce,vatom2) 2660 ); 2661 2662 case Iop_Shl16x8: 2663 case Iop_Shr16x8: 2664 case Iop_Sar16x8: 2665 case Iop_Sal16x8: 2666 case Iop_Rol16x8: 2667 return mkUifUV128(mce, 2668 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2669 mkPCast16x8(mce,vatom2) 2670 ); 2671 2672 case Iop_Shl32x4: 2673 case Iop_Shr32x4: 2674 case Iop_Sar32x4: 2675 case Iop_Sal32x4: 2676 case Iop_Rol32x4: 2677 return mkUifUV128(mce, 2678 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2679 mkPCast32x4(mce,vatom2) 2680 ); 2681 2682 case Iop_Shl64x2: 2683 case Iop_Shr64x2: 2684 case Iop_Sar64x2: 2685 case Iop_Sal64x2: 2686 return mkUifUV128(mce, 2687 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2688 mkPCast64x2(mce,vatom2) 2689 ); 2690 2691 case Iop_F32ToFixed32Ux4_RZ: 2692 case Iop_F32ToFixed32Sx4_RZ: 2693 case Iop_Fixed32UToF32x4_RN: 2694 case Iop_Fixed32SToF32x4_RN: 2695 complainIfUndefined(mce, atom2); 2696 return mkPCast32x4(mce, vatom1); 2697 2698 case Iop_F32ToFixed32Ux2_RZ: 2699 case Iop_F32ToFixed32Sx2_RZ: 2700 case Iop_Fixed32UToF32x2_RN: 2701 case Iop_Fixed32SToF32x2_RN: 2702 complainIfUndefined(mce, atom2); 2703 return mkPCast32x2(mce, vatom1); 2704 2705 case Iop_QSub8Ux16: 2706 case Iop_QSub8Sx16: 2707 case Iop_Sub8x16: 2708 case Iop_Min8Ux16: 2709 case Iop_Min8Sx16: 2710 case Iop_Max8Ux16: 2711 case Iop_Max8Sx16: 2712 case Iop_CmpGT8Sx16: 2713 case Iop_CmpGT8Ux16: 2714 case Iop_CmpEQ8x16: 2715 case Iop_Avg8Ux16: 2716 case Iop_Avg8Sx16: 2717 case Iop_QAdd8Ux16: 2718 case Iop_QAdd8Sx16: 2719 case Iop_QSal8x16: 2720 case Iop_QShl8x16: 2721 case Iop_Add8x16: 2722 case Iop_Mul8x16: 2723 case Iop_PolynomialMul8x16: 2724 return binary8Ix16(mce, vatom1, vatom2); 2725 2726 case Iop_QSub16Ux8: 2727 case Iop_QSub16Sx8: 2728 case Iop_Sub16x8: 2729 case Iop_Mul16x8: 2730 case Iop_MulHi16Sx8: 2731 case Iop_MulHi16Ux8: 2732 case Iop_Min16Sx8: 2733 case Iop_Min16Ux8: 2734 case Iop_Max16Sx8: 2735 case Iop_Max16Ux8: 2736 case Iop_CmpGT16Sx8: 2737 case Iop_CmpGT16Ux8: 2738 case Iop_CmpEQ16x8: 2739 case Iop_Avg16Ux8: 2740 case Iop_Avg16Sx8: 2741 case Iop_QAdd16Ux8: 2742 case Iop_QAdd16Sx8: 2743 case Iop_QSal16x8: 2744 case Iop_QShl16x8: 2745 case Iop_Add16x8: 2746 case Iop_QDMulHi16Sx8: 2747 case Iop_QRDMulHi16Sx8: 2748 return binary16Ix8(mce, vatom1, vatom2); 2749 2750 case Iop_Sub32x4: 2751 case Iop_CmpGT32Sx4: 2752 case Iop_CmpGT32Ux4: 2753 case Iop_CmpEQ32x4: 2754 case Iop_QAdd32Sx4: 2755 case Iop_QAdd32Ux4: 2756 case Iop_QSub32Sx4: 2757 case Iop_QSub32Ux4: 2758 case Iop_QSal32x4: 2759 case Iop_QShl32x4: 2760 case Iop_Avg32Ux4: 2761 case Iop_Avg32Sx4: 2762 case Iop_Add32x4: 2763 case Iop_Max32Ux4: 2764 case Iop_Max32Sx4: 2765 case Iop_Min32Ux4: 2766 case Iop_Min32Sx4: 2767 case Iop_Mul32x4: 2768 case Iop_QDMulHi32Sx4: 2769 case Iop_QRDMulHi32Sx4: 2770 return binary32Ix4(mce, vatom1, vatom2); 2771 2772 case Iop_Sub64x2: 2773 case Iop_Add64x2: 2774 case Iop_CmpEQ64x2: 2775 case Iop_CmpGT64Sx2: 2776 case Iop_QSal64x2: 2777 case Iop_QShl64x2: 2778 case Iop_QAdd64Ux2: 2779 case Iop_QAdd64Sx2: 2780 case Iop_QSub64Ux2: 2781 case Iop_QSub64Sx2: 2782 return binary64Ix2(mce, vatom1, vatom2); 2783 2784 case Iop_QNarrowBin32Sto16Sx8: 2785 case Iop_QNarrowBin32Uto16Ux8: 2786 case Iop_QNarrowBin32Sto16Ux8: 2787 case Iop_QNarrowBin16Sto8Sx16: 2788 case Iop_QNarrowBin16Uto8Ux16: 2789 case Iop_QNarrowBin16Sto8Ux16: 2790 return vectorNarrowBinV128(mce, op, vatom1, vatom2); 2791 2792 case Iop_Sub64Fx2: 2793 case Iop_Mul64Fx2: 2794 case Iop_Min64Fx2: 2795 case Iop_Max64Fx2: 2796 case Iop_Div64Fx2: 2797 case Iop_CmpLT64Fx2: 2798 case Iop_CmpLE64Fx2: 2799 case Iop_CmpEQ64Fx2: 2800 case Iop_CmpUN64Fx2: 2801 case Iop_Add64Fx2: 2802 return binary64Fx2(mce, vatom1, vatom2); 2803 2804 case Iop_Sub64F0x2: 2805 case Iop_Mul64F0x2: 2806 case Iop_Min64F0x2: 2807 case Iop_Max64F0x2: 2808 case Iop_Div64F0x2: 2809 case Iop_CmpLT64F0x2: 2810 case Iop_CmpLE64F0x2: 2811 case Iop_CmpEQ64F0x2: 2812 case Iop_CmpUN64F0x2: 2813 case Iop_Add64F0x2: 2814 return binary64F0x2(mce, vatom1, vatom2); 2815 2816 case Iop_Sub32Fx4: 2817 case Iop_Mul32Fx4: 2818 case Iop_Min32Fx4: 2819 case Iop_Max32Fx4: 2820 case Iop_Div32Fx4: 2821 case Iop_CmpLT32Fx4: 2822 case Iop_CmpLE32Fx4: 2823 case Iop_CmpEQ32Fx4: 2824 case Iop_CmpUN32Fx4: 2825 case Iop_CmpGT32Fx4: 2826 case Iop_CmpGE32Fx4: 2827 case Iop_Add32Fx4: 2828 case Iop_Recps32Fx4: 2829 case Iop_Rsqrts32Fx4: 2830 return binary32Fx4(mce, vatom1, vatom2); 2831 2832 case Iop_Sub32Fx2: 2833 case Iop_Mul32Fx2: 2834 case Iop_Min32Fx2: 2835 case Iop_Max32Fx2: 2836 case Iop_CmpEQ32Fx2: 2837 case Iop_CmpGT32Fx2: 2838 case Iop_CmpGE32Fx2: 2839 case Iop_Add32Fx2: 2840 case Iop_Recps32Fx2: 2841 case Iop_Rsqrts32Fx2: 2842 return binary32Fx2(mce, vatom1, vatom2); 2843 2844 case Iop_Sub32F0x4: 2845 case Iop_Mul32F0x4: 2846 case Iop_Min32F0x4: 2847 case Iop_Max32F0x4: 2848 case Iop_Div32F0x4: 2849 case Iop_CmpLT32F0x4: 2850 case Iop_CmpLE32F0x4: 2851 case Iop_CmpEQ32F0x4: 2852 case Iop_CmpUN32F0x4: 2853 case Iop_Add32F0x4: 2854 return binary32F0x4(mce, vatom1, vatom2); 2855 2856 case Iop_QShlN8Sx16: 2857 case Iop_QShlN8x16: 2858 case Iop_QSalN8x16: 2859 complainIfUndefined(mce, atom2); 2860 return mkPCast8x16(mce, vatom1); 2861 2862 case Iop_QShlN16Sx8: 2863 case Iop_QShlN16x8: 2864 case Iop_QSalN16x8: 2865 complainIfUndefined(mce, atom2); 2866 return mkPCast16x8(mce, vatom1); 2867 2868 case Iop_QShlN32Sx4: 2869 case Iop_QShlN32x4: 2870 case Iop_QSalN32x4: 2871 complainIfUndefined(mce, atom2); 2872 return mkPCast32x4(mce, vatom1); 2873 2874 case Iop_QShlN64Sx2: 2875 case Iop_QShlN64x2: 2876 case Iop_QSalN64x2: 2877 complainIfUndefined(mce, atom2); 2878 return mkPCast32x4(mce, vatom1); 2879 2880 case Iop_Mull32Sx2: 2881 case Iop_Mull32Ux2: 2882 case Iop_QDMulLong32Sx2: 2883 return vectorWidenI64(mce, Iop_Widen32Sto64x2, 2884 mkUifU64(mce, vatom1, vatom2)); 2885 2886 case Iop_Mull16Sx4: 2887 case Iop_Mull16Ux4: 2888 case Iop_QDMulLong16Sx4: 2889 return vectorWidenI64(mce, Iop_Widen16Sto32x4, 2890 mkUifU64(mce, vatom1, vatom2)); 2891 2892 case Iop_Mull8Sx8: 2893 case Iop_Mull8Ux8: 2894 case Iop_PolynomialMull8x8: 2895 return vectorWidenI64(mce, Iop_Widen8Sto16x8, 2896 mkUifU64(mce, vatom1, vatom2)); 2897 2898 case Iop_PwAdd32x4: 2899 return mkPCast32x4(mce, 2900 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 2901 mkPCast32x4(mce, vatom2)))); 2902 2903 case Iop_PwAdd16x8: 2904 return mkPCast16x8(mce, 2905 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 2906 mkPCast16x8(mce, vatom2)))); 2907 2908 case Iop_PwAdd8x16: 2909 return mkPCast8x16(mce, 2910 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 2911 mkPCast8x16(mce, vatom2)))); 2912 2913 /* V128-bit data-steering */ 2914 case Iop_SetV128lo32: 2915 case Iop_SetV128lo64: 2916 case Iop_64HLtoV128: 2917 case Iop_InterleaveLO64x2: 2918 case Iop_InterleaveLO32x4: 2919 case Iop_InterleaveLO16x8: 2920 case Iop_InterleaveLO8x16: 2921 case Iop_InterleaveHI64x2: 2922 case Iop_InterleaveHI32x4: 2923 case Iop_InterleaveHI16x8: 2924 case Iop_InterleaveHI8x16: 2925 case Iop_CatOddLanes8x16: 2926 case Iop_CatOddLanes16x8: 2927 case Iop_CatOddLanes32x4: 2928 case Iop_CatEvenLanes8x16: 2929 case Iop_CatEvenLanes16x8: 2930 case Iop_CatEvenLanes32x4: 2931 case Iop_InterleaveOddLanes8x16: 2932 case Iop_InterleaveOddLanes16x8: 2933 case Iop_InterleaveOddLanes32x4: 2934 case Iop_InterleaveEvenLanes8x16: 2935 case Iop_InterleaveEvenLanes16x8: 2936 case Iop_InterleaveEvenLanes32x4: 2937 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 2938 2939 case Iop_GetElem8x16: 2940 complainIfUndefined(mce, atom2); 2941 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 2942 case Iop_GetElem16x8: 2943 complainIfUndefined(mce, atom2); 2944 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 2945 case Iop_GetElem32x4: 2946 complainIfUndefined(mce, atom2); 2947 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 2948 case Iop_GetElem64x2: 2949 complainIfUndefined(mce, atom2); 2950 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2951 2952 /* Perm8x16: rearrange values in left arg using steering values 2953 from right arg. So rearrange the vbits in the same way but 2954 pessimise wrt steering values. */ 2955 case Iop_Perm8x16: 2956 return mkUifUV128( 2957 mce, 2958 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2959 mkPCast8x16(mce, vatom2) 2960 ); 2961 2962 /* These two take the lower half of each 16-bit lane, sign/zero 2963 extend it to 32, and multiply together, producing a 32x4 2964 result (and implicitly ignoring half the operand bits). So 2965 treat it as a bunch of independent 16x8 operations, but then 2966 do 32-bit shifts left-right to copy the lower half results 2967 (which are all 0s or all 1s due to PCasting in binary16Ix8) 2968 into the upper half of each result lane. */ 2969 case Iop_MullEven16Ux8: 2970 case Iop_MullEven16Sx8: { 2971 IRAtom* at; 2972 at = binary16Ix8(mce,vatom1,vatom2); 2973 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 2974 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 2975 return at; 2976 } 2977 2978 /* Same deal as Iop_MullEven16{S,U}x8 */ 2979 case Iop_MullEven8Ux16: 2980 case Iop_MullEven8Sx16: { 2981 IRAtom* at; 2982 at = binary8Ix16(mce,vatom1,vatom2); 2983 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 2984 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 2985 return at; 2986 } 2987 2988 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 2989 32x4 -> 16x8 laneage, discarding the upper half of each lane. 2990 Simply apply same op to the V bits, since this really no more 2991 than a data steering operation. */ 2992 case Iop_NarrowBin32to16x8: 2993 case Iop_NarrowBin16to8x16: 2994 return assignNew('V', mce, Ity_V128, 2995 binop(op, vatom1, vatom2)); 2996 2997 case Iop_ShrV128: 2998 case Iop_ShlV128: 2999 /* Same scheme as with all other shifts. Note: 10 Nov 05: 3000 this is wrong now, scalar shifts are done properly lazily. 3001 Vector shifts should be fixed too. */ 3002 complainIfUndefined(mce, atom2); 3003 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3004 3005 /* I128-bit data-steering */ 3006 case Iop_64HLto128: 3007 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 3008 3009 /* Scalar floating point */ 3010 3011 case Iop_F32toI64S: 3012 /* I32(rm) x F32 -> I64 */ 3013 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3014 3015 case Iop_I64StoF32: 3016 /* I32(rm) x I64 -> F32 */ 3017 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3018 3019 case Iop_RoundF64toInt: 3020 case Iop_RoundF64toF32: 3021 case Iop_F64toI64S: 3022 case Iop_F64toI64U: 3023 case Iop_I64StoF64: 3024 case Iop_I64UtoF64: 3025 case Iop_SinF64: 3026 case Iop_CosF64: 3027 case Iop_TanF64: 3028 case Iop_2xm1F64: 3029 case Iop_SqrtF64: 3030 /* I32(rm) x I64/F64 -> I64/F64 */ 3031 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3032 3033 case Iop_RoundF32toInt: 3034 case Iop_SqrtF32: 3035 /* I32(rm) x I32/F32 -> I32/F32 */ 3036 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3037 3038 case Iop_SqrtF128: 3039 /* I32(rm) x F128 -> F128 */ 3040 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3041 3042 case Iop_I32StoF32: 3043 case Iop_F32toI32S: 3044 /* First arg is I32 (rounding mode), second is F32/I32 (data). */ 3045 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3046 3047 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */ 3048 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */ 3049 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3050 3051 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */ 3052 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */ 3053 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3054 3055 case Iop_F64HLtoF128: 3056 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vatom1, vatom2)); 3057 3058 case Iop_F64toI32U: 3059 case Iop_F64toI32S: 3060 case Iop_F64toF32: 3061 case Iop_I64UtoF32: 3062 /* First arg is I32 (rounding mode), second is F64 (data). */ 3063 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3064 3065 case Iop_F64toI16S: 3066 /* First arg is I32 (rounding mode), second is F64 (data). */ 3067 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3068 3069 case Iop_CmpF32: 3070 case Iop_CmpF64: 3071 case Iop_CmpF128: 3072 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3073 3074 /* non-FP after here */ 3075 3076 case Iop_DivModU64to32: 3077 case Iop_DivModS64to32: 3078 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3079 3080 case Iop_DivModU128to64: 3081 case Iop_DivModS128to64: 3082 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3083 3084 case Iop_16HLto32: 3085 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 3086 case Iop_32HLto64: 3087 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3088 3089 case Iop_DivModS64to64: 3090 case Iop_MullS64: 3091 case Iop_MullU64: { 3092 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3093 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 3094 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64)); 3095 } 3096 3097 case Iop_MullS32: 3098 case Iop_MullU32: { 3099 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3100 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 3101 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32)); 3102 } 3103 3104 case Iop_MullS16: 3105 case Iop_MullU16: { 3106 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3107 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 3108 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16)); 3109 } 3110 3111 case Iop_MullS8: 3112 case Iop_MullU8: { 3113 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3114 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 3115 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 3116 } 3117 3118 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 3119 case Iop_DivS32: 3120 case Iop_DivU32: 3121 case Iop_DivU32E: 3122 case Iop_DivS32E: 3123 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3124 3125 case Iop_DivS64: 3126 case Iop_DivU64: 3127 case Iop_DivS64E: 3128 case Iop_DivU64E: 3129 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3130 3131 case Iop_Add32: 3132 if (mce->bogusLiterals) 3133 return expensiveAddSub(mce,True,Ity_I32, 3134 vatom1,vatom2, atom1,atom2); 3135 else 3136 goto cheap_AddSub32; 3137 case Iop_Sub32: 3138 if (mce->bogusLiterals) 3139 return expensiveAddSub(mce,False,Ity_I32, 3140 vatom1,vatom2, atom1,atom2); 3141 else 3142 goto cheap_AddSub32; 3143 3144 cheap_AddSub32: 3145 case Iop_Mul32: 3146 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3147 3148 case Iop_CmpORD32S: 3149 case Iop_CmpORD32U: 3150 case Iop_CmpORD64S: 3151 case Iop_CmpORD64U: 3152 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 3153 3154 case Iop_Add64: 3155 if (mce->bogusLiterals) 3156 return expensiveAddSub(mce,True,Ity_I64, 3157 vatom1,vatom2, atom1,atom2); 3158 else 3159 goto cheap_AddSub64; 3160 case Iop_Sub64: 3161 if (mce->bogusLiterals) 3162 return expensiveAddSub(mce,False,Ity_I64, 3163 vatom1,vatom2, atom1,atom2); 3164 else 3165 goto cheap_AddSub64; 3166 3167 cheap_AddSub64: 3168 case Iop_Mul64: 3169 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3170 3171 case Iop_Mul16: 3172 case Iop_Add16: 3173 case Iop_Sub16: 3174 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3175 3176 case Iop_Sub8: 3177 case Iop_Add8: 3178 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3179 3180 case Iop_CmpEQ64: 3181 case Iop_CmpNE64: 3182 if (mce->bogusLiterals) 3183 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 3184 else 3185 goto cheap_cmp64; 3186 cheap_cmp64: 3187 case Iop_CmpLE64S: case Iop_CmpLE64U: 3188 case Iop_CmpLT64U: case Iop_CmpLT64S: 3189 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 3190 3191 case Iop_CmpEQ32: 3192 case Iop_CmpNE32: 3193 if (mce->bogusLiterals) 3194 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 3195 else 3196 goto cheap_cmp32; 3197 cheap_cmp32: 3198 case Iop_CmpLE32S: case Iop_CmpLE32U: 3199 case Iop_CmpLT32U: case Iop_CmpLT32S: 3200 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 3201 3202 case Iop_CmpEQ16: case Iop_CmpNE16: 3203 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 3204 3205 case Iop_CmpEQ8: case Iop_CmpNE8: 3206 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 3207 3208 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 3209 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 3210 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 3211 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 3212 /* Just say these all produce a defined result, regardless 3213 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 3214 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 3215 3216 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 3217 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 3218 3219 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 3220 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 3221 3222 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 3223 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 3224 3225 case Iop_Shl8: case Iop_Shr8: 3226 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 3227 3228 case Iop_AndV128: 3229 uifu = mkUifUV128; difd = mkDifDV128; 3230 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 3231 case Iop_And64: 3232 uifu = mkUifU64; difd = mkDifD64; 3233 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 3234 case Iop_And32: 3235 uifu = mkUifU32; difd = mkDifD32; 3236 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 3237 case Iop_And16: 3238 uifu = mkUifU16; difd = mkDifD16; 3239 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 3240 case Iop_And8: 3241 uifu = mkUifU8; difd = mkDifD8; 3242 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 3243 3244 case Iop_OrV128: 3245 uifu = mkUifUV128; difd = mkDifDV128; 3246 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 3247 case Iop_Or64: 3248 uifu = mkUifU64; difd = mkDifD64; 3249 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 3250 case Iop_Or32: 3251 uifu = mkUifU32; difd = mkDifD32; 3252 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 3253 case Iop_Or16: 3254 uifu = mkUifU16; difd = mkDifD16; 3255 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 3256 case Iop_Or8: 3257 uifu = mkUifU8; difd = mkDifD8; 3258 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 3259 3260 do_And_Or: 3261 return 3262 assignNew( 3263 'V', mce, 3264 and_or_ty, 3265 difd(mce, uifu(mce, vatom1, vatom2), 3266 difd(mce, improve(mce, atom1, vatom1), 3267 improve(mce, atom2, vatom2) ) ) ); 3268 3269 case Iop_Xor8: 3270 return mkUifU8(mce, vatom1, vatom2); 3271 case Iop_Xor16: 3272 return mkUifU16(mce, vatom1, vatom2); 3273 case Iop_Xor32: 3274 return mkUifU32(mce, vatom1, vatom2); 3275 case Iop_Xor64: 3276 return mkUifU64(mce, vatom1, vatom2); 3277 case Iop_XorV128: 3278 return mkUifUV128(mce, vatom1, vatom2); 3279 3280 default: 3281 ppIROp(op); 3282 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 3283 } 3284 } 3285 3286 3287 static 3288 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 3289 { 3290 IRAtom* vatom = expr2vbits( mce, atom ); 3291 tl_assert(isOriginalAtom(mce,atom)); 3292 switch (op) { 3293 3294 case Iop_Sqrt64Fx2: 3295 return unary64Fx2(mce, vatom); 3296 3297 case Iop_Sqrt64F0x2: 3298 return unary64F0x2(mce, vatom); 3299 3300 case Iop_Sqrt32Fx4: 3301 case Iop_RSqrt32Fx4: 3302 case Iop_Recip32Fx4: 3303 case Iop_I32UtoFx4: 3304 case Iop_I32StoFx4: 3305 case Iop_QFtoI32Ux4_RZ: 3306 case Iop_QFtoI32Sx4_RZ: 3307 case Iop_RoundF32x4_RM: 3308 case Iop_RoundF32x4_RP: 3309 case Iop_RoundF32x4_RN: 3310 case Iop_RoundF32x4_RZ: 3311 case Iop_Recip32x4: 3312 case Iop_Abs32Fx4: 3313 case Iop_Neg32Fx4: 3314 case Iop_Rsqrte32Fx4: 3315 return unary32Fx4(mce, vatom); 3316 3317 case Iop_I32UtoFx2: 3318 case Iop_I32StoFx2: 3319 case Iop_Recip32Fx2: 3320 case Iop_Recip32x2: 3321 case Iop_Abs32Fx2: 3322 case Iop_Neg32Fx2: 3323 case Iop_Rsqrte32Fx2: 3324 return unary32Fx2(mce, vatom); 3325 3326 case Iop_Sqrt32F0x4: 3327 case Iop_RSqrt32F0x4: 3328 case Iop_Recip32F0x4: 3329 return unary32F0x4(mce, vatom); 3330 3331 case Iop_32UtoV128: 3332 case Iop_64UtoV128: 3333 case Iop_Dup8x16: 3334 case Iop_Dup16x8: 3335 case Iop_Dup32x4: 3336 case Iop_Reverse16_8x16: 3337 case Iop_Reverse32_8x16: 3338 case Iop_Reverse32_16x8: 3339 case Iop_Reverse64_8x16: 3340 case Iop_Reverse64_16x8: 3341 case Iop_Reverse64_32x4: 3342 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 3343 3344 case Iop_F128HItoF64: /* F128 -> high half of F128 */ 3345 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom)); 3346 case Iop_F128LOtoF64: /* F128 -> low half of F128 */ 3347 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom)); 3348 3349 case Iop_NegF128: 3350 case Iop_AbsF128: 3351 return mkPCastTo(mce, Ity_I128, vatom); 3352 3353 case Iop_I32StoF128: /* signed I32 -> F128 */ 3354 case Iop_I64StoF128: /* signed I64 -> F128 */ 3355 case Iop_F32toF128: /* F32 -> F128 */ 3356 case Iop_F64toF128: /* F64 -> F128 */ 3357 return mkPCastTo(mce, Ity_I128, vatom); 3358 3359 case Iop_F32toF64: 3360 case Iop_I32StoF64: 3361 case Iop_I32UtoF64: 3362 case Iop_NegF64: 3363 case Iop_AbsF64: 3364 case Iop_Est5FRSqrt: 3365 case Iop_RoundF64toF64_NEAREST: 3366 case Iop_RoundF64toF64_NegINF: 3367 case Iop_RoundF64toF64_PosINF: 3368 case Iop_RoundF64toF64_ZERO: 3369 case Iop_Clz64: 3370 case Iop_Ctz64: 3371 return mkPCastTo(mce, Ity_I64, vatom); 3372 3373 case Iop_Clz32: 3374 case Iop_Ctz32: 3375 case Iop_TruncF64asF32: 3376 case Iop_NegF32: 3377 case Iop_AbsF32: 3378 return mkPCastTo(mce, Ity_I32, vatom); 3379 3380 case Iop_1Uto64: 3381 case Iop_1Sto64: 3382 case Iop_8Uto64: 3383 case Iop_8Sto64: 3384 case Iop_16Uto64: 3385 case Iop_16Sto64: 3386 case Iop_32Sto64: 3387 case Iop_32Uto64: 3388 case Iop_V128to64: 3389 case Iop_V128HIto64: 3390 case Iop_128HIto64: 3391 case Iop_128to64: 3392 case Iop_Dup8x8: 3393 case Iop_Dup16x4: 3394 case Iop_Dup32x2: 3395 case Iop_Reverse16_8x8: 3396 case Iop_Reverse32_8x8: 3397 case Iop_Reverse32_16x4: 3398 case Iop_Reverse64_8x8: 3399 case Iop_Reverse64_16x4: 3400 case Iop_Reverse64_32x2: 3401 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 3402 3403 case Iop_I16StoF32: 3404 case Iop_64to32: 3405 case Iop_64HIto32: 3406 case Iop_1Uto32: 3407 case Iop_1Sto32: 3408 case Iop_8Uto32: 3409 case Iop_16Uto32: 3410 case Iop_16Sto32: 3411 case Iop_8Sto32: 3412 case Iop_V128to32: 3413 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 3414 3415 case Iop_8Sto16: 3416 case Iop_8Uto16: 3417 case Iop_32to16: 3418 case Iop_32HIto16: 3419 case Iop_64to16: 3420 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 3421 3422 case Iop_1Uto8: 3423 case Iop_1Sto8: 3424 case Iop_16to8: 3425 case Iop_16HIto8: 3426 case Iop_32to8: 3427 case Iop_64to8: 3428 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 3429 3430 case Iop_32to1: 3431 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 3432 3433 case Iop_64to1: 3434 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 3435 3436 case Iop_ReinterpF64asI64: 3437 case Iop_ReinterpI64asF64: 3438 case Iop_ReinterpI32asF32: 3439 case Iop_ReinterpF32asI32: 3440 case Iop_NotV128: 3441 case Iop_Not64: 3442 case Iop_Not32: 3443 case Iop_Not16: 3444 case Iop_Not8: 3445 case Iop_Not1: 3446 return vatom; 3447 3448 case Iop_CmpNEZ8x8: 3449 case Iop_Cnt8x8: 3450 case Iop_Clz8Sx8: 3451 case Iop_Cls8Sx8: 3452 case Iop_Abs8x8: 3453 return mkPCast8x8(mce, vatom); 3454 3455 case Iop_CmpNEZ8x16: 3456 case Iop_Cnt8x16: 3457 case Iop_Clz8Sx16: 3458 case Iop_Cls8Sx16: 3459 case Iop_Abs8x16: 3460 return mkPCast8x16(mce, vatom); 3461 3462 case Iop_CmpNEZ16x4: 3463 case Iop_Clz16Sx4: 3464 case Iop_Cls16Sx4: 3465 case Iop_Abs16x4: 3466 return mkPCast16x4(mce, vatom); 3467 3468 case Iop_CmpNEZ16x8: 3469 case Iop_Clz16Sx8: 3470 case Iop_Cls16Sx8: 3471 case Iop_Abs16x8: 3472 return mkPCast16x8(mce, vatom); 3473 3474 case Iop_CmpNEZ32x2: 3475 case Iop_Clz32Sx2: 3476 case Iop_Cls32Sx2: 3477 case Iop_FtoI32Ux2_RZ: 3478 case Iop_FtoI32Sx2_RZ: 3479 case Iop_Abs32x2: 3480 return mkPCast32x2(mce, vatom); 3481 3482 case Iop_CmpNEZ32x4: 3483 case Iop_Clz32Sx4: 3484 case Iop_Cls32Sx4: 3485 case Iop_FtoI32Ux4_RZ: 3486 case Iop_FtoI32Sx4_RZ: 3487 case Iop_Abs32x4: 3488 return mkPCast32x4(mce, vatom); 3489 3490 case Iop_CmpwNEZ64: 3491 return mkPCastTo(mce, Ity_I64, vatom); 3492 3493 case Iop_CmpNEZ64x2: 3494 return mkPCast64x2(mce, vatom); 3495 3496 case Iop_NarrowUn16to8x8: 3497 case Iop_NarrowUn32to16x4: 3498 case Iop_NarrowUn64to32x2: 3499 case Iop_QNarrowUn16Sto8Sx8: 3500 case Iop_QNarrowUn16Sto8Ux8: 3501 case Iop_QNarrowUn16Uto8Ux8: 3502 case Iop_QNarrowUn32Sto16Sx4: 3503 case Iop_QNarrowUn32Sto16Ux4: 3504 case Iop_QNarrowUn32Uto16Ux4: 3505 case Iop_QNarrowUn64Sto32Sx2: 3506 case Iop_QNarrowUn64Sto32Ux2: 3507 case Iop_QNarrowUn64Uto32Ux2: 3508 return vectorNarrowUnV128(mce, op, vatom); 3509 3510 case Iop_Widen8Sto16x8: 3511 case Iop_Widen8Uto16x8: 3512 case Iop_Widen16Sto32x4: 3513 case Iop_Widen16Uto32x4: 3514 case Iop_Widen32Sto64x2: 3515 case Iop_Widen32Uto64x2: 3516 return vectorWidenI64(mce, op, vatom); 3517 3518 case Iop_PwAddL32Ux2: 3519 case Iop_PwAddL32Sx2: 3520 return mkPCastTo(mce, Ity_I64, 3521 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 3522 3523 case Iop_PwAddL16Ux4: 3524 case Iop_PwAddL16Sx4: 3525 return mkPCast32x2(mce, 3526 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 3527 3528 case Iop_PwAddL8Ux8: 3529 case Iop_PwAddL8Sx8: 3530 return mkPCast16x4(mce, 3531 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 3532 3533 case Iop_PwAddL32Ux4: 3534 case Iop_PwAddL32Sx4: 3535 return mkPCast64x2(mce, 3536 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 3537 3538 case Iop_PwAddL16Ux8: 3539 case Iop_PwAddL16Sx8: 3540 return mkPCast32x4(mce, 3541 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 3542 3543 case Iop_PwAddL8Ux16: 3544 case Iop_PwAddL8Sx16: 3545 return mkPCast16x8(mce, 3546 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 3547 3548 case Iop_I64UtoF32: 3549 default: 3550 ppIROp(op); 3551 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 3552 } 3553 } 3554 3555 3556 /* Worker function; do not call directly. */ 3557 static 3558 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 3559 IREndness end, IRType ty, 3560 IRAtom* addr, UInt bias ) 3561 { 3562 void* helper; 3563 Char* hname; 3564 IRDirty* di; 3565 IRTemp datavbits; 3566 IRAtom* addrAct; 3567 3568 tl_assert(isOriginalAtom(mce,addr)); 3569 tl_assert(end == Iend_LE || end == Iend_BE); 3570 3571 /* First, emit a definedness test for the address. This also sets 3572 the address (shadow) to 'defined' following the test. */ 3573 complainIfUndefined( mce, addr ); 3574 3575 /* Now cook up a call to the relevant helper function, to read the 3576 data V bits from shadow memory. */ 3577 ty = shadowTypeV(ty); 3578 3579 if (end == Iend_LE) { 3580 switch (ty) { 3581 case Ity_I64: helper = &MC_(helperc_LOADV64le); 3582 hname = "MC_(helperc_LOADV64le)"; 3583 break; 3584 case Ity_I32: helper = &MC_(helperc_LOADV32le); 3585 hname = "MC_(helperc_LOADV32le)"; 3586 break; 3587 case Ity_I16: helper = &MC_(helperc_LOADV16le); 3588 hname = "MC_(helperc_LOADV16le)"; 3589 break; 3590 case Ity_I8: helper = &MC_(helperc_LOADV8); 3591 hname = "MC_(helperc_LOADV8)"; 3592 break; 3593 default: ppIRType(ty); 3594 VG_(tool_panic)("memcheck:do_shadow_Load(LE)"); 3595 } 3596 } else { 3597 switch (ty) { 3598 case Ity_I64: helper = &MC_(helperc_LOADV64be); 3599 hname = "MC_(helperc_LOADV64be)"; 3600 break; 3601 case Ity_I32: helper = &MC_(helperc_LOADV32be); 3602 hname = "MC_(helperc_LOADV32be)"; 3603 break; 3604 case Ity_I16: helper = &MC_(helperc_LOADV16be); 3605 hname = "MC_(helperc_LOADV16be)"; 3606 break; 3607 case Ity_I8: helper = &MC_(helperc_LOADV8); 3608 hname = "MC_(helperc_LOADV8)"; 3609 break; 3610 default: ppIRType(ty); 3611 VG_(tool_panic)("memcheck:do_shadow_Load(BE)"); 3612 } 3613 } 3614 3615 /* Generate the actual address into addrAct. */ 3616 if (bias == 0) { 3617 addrAct = addr; 3618 } else { 3619 IROp mkAdd; 3620 IRAtom* eBias; 3621 IRType tyAddr = mce->hWordTy; 3622 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 3623 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 3624 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 3625 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 3626 } 3627 3628 /* We need to have a place to park the V bits we're just about to 3629 read. */ 3630 datavbits = newTemp(mce, ty, VSh); 3631 di = unsafeIRDirty_1_N( datavbits, 3632 1/*regparms*/, 3633 hname, VG_(fnptr_to_fnentry)( helper ), 3634 mkIRExprVec_1( addrAct )); 3635 setHelperAnns( mce, di ); 3636 stmt( 'V', mce, IRStmt_Dirty(di) ); 3637 3638 return mkexpr(datavbits); 3639 } 3640 3641 3642 static 3643 IRAtom* expr2vbits_Load ( MCEnv* mce, 3644 IREndness end, IRType ty, 3645 IRAtom* addr, UInt bias ) 3646 { 3647 IRAtom *v64hi, *v64lo; 3648 tl_assert(end == Iend_LE || end == Iend_BE); 3649 switch (shadowTypeV(ty)) { 3650 case Ity_I8: 3651 case Ity_I16: 3652 case Ity_I32: 3653 case Ity_I64: 3654 return expr2vbits_Load_WRK(mce, end, ty, addr, bias); 3655 case Ity_V128: 3656 if (end == Iend_LE) { 3657 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias); 3658 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3659 } else { 3660 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias); 3661 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3662 } 3663 return assignNew( 'V', mce, 3664 Ity_V128, 3665 binop(Iop_64HLtoV128, v64hi, v64lo)); 3666 default: 3667 VG_(tool_panic)("expr2vbits_Load"); 3668 } 3669 } 3670 3671 3672 static 3673 IRAtom* expr2vbits_Mux0X ( MCEnv* mce, 3674 IRAtom* cond, IRAtom* expr0, IRAtom* exprX ) 3675 { 3676 IRAtom *vbitsC, *vbits0, *vbitsX; 3677 IRType ty; 3678 /* Given Mux0X(cond,expr0,exprX), generate 3679 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#) 3680 That is, steer the V bits like the originals, but trash the 3681 result if the steering value is undefined. This gives 3682 lazy propagation. */ 3683 tl_assert(isOriginalAtom(mce, cond)); 3684 tl_assert(isOriginalAtom(mce, expr0)); 3685 tl_assert(isOriginalAtom(mce, exprX)); 3686 3687 vbitsC = expr2vbits(mce, cond); 3688 vbits0 = expr2vbits(mce, expr0); 3689 vbitsX = expr2vbits(mce, exprX); 3690 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 3691 3692 return 3693 mkUifU(mce, ty, assignNew('V', mce, ty, 3694 IRExpr_Mux0X(cond, vbits0, vbitsX)), 3695 mkPCastTo(mce, ty, vbitsC) ); 3696 } 3697 3698 /* --------- This is the main expression-handling function. --------- */ 3699 3700 static 3701 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 3702 { 3703 switch (e->tag) { 3704 3705 case Iex_Get: 3706 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 3707 3708 case Iex_GetI: 3709 return shadow_GETI( mce, e->Iex.GetI.descr, 3710 e->Iex.GetI.ix, e->Iex.GetI.bias ); 3711 3712 case Iex_RdTmp: 3713 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 3714 3715 case Iex_Const: 3716 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 3717 3718 case Iex_Qop: 3719 return expr2vbits_Qop( 3720 mce, 3721 e->Iex.Qop.op, 3722 e->Iex.Qop.arg1, e->Iex.Qop.arg2, 3723 e->Iex.Qop.arg3, e->Iex.Qop.arg4 3724 ); 3725 3726 case Iex_Triop: 3727 return expr2vbits_Triop( 3728 mce, 3729 e->Iex.Triop.op, 3730 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3 3731 ); 3732 3733 case Iex_Binop: 3734 return expr2vbits_Binop( 3735 mce, 3736 e->Iex.Binop.op, 3737 e->Iex.Binop.arg1, e->Iex.Binop.arg2 3738 ); 3739 3740 case Iex_Unop: 3741 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 3742 3743 case Iex_Load: 3744 return expr2vbits_Load( mce, e->Iex.Load.end, 3745 e->Iex.Load.ty, 3746 e->Iex.Load.addr, 0/*addr bias*/ ); 3747 3748 case Iex_CCall: 3749 return mkLazyN( mce, e->Iex.CCall.args, 3750 e->Iex.CCall.retty, 3751 e->Iex.CCall.cee ); 3752 3753 case Iex_Mux0X: 3754 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0, 3755 e->Iex.Mux0X.exprX); 3756 3757 default: 3758 VG_(printf)("\n"); 3759 ppIRExpr(e); 3760 VG_(printf)("\n"); 3761 VG_(tool_panic)("memcheck: expr2vbits"); 3762 } 3763 } 3764 3765 /*------------------------------------------------------------*/ 3766 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 3767 /*------------------------------------------------------------*/ 3768 3769 /* Widen a value to the host word size. */ 3770 3771 static 3772 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 3773 { 3774 IRType ty, tyH; 3775 3776 /* vatom is vbits-value and as such can only have a shadow type. */ 3777 tl_assert(isShadowAtom(mce,vatom)); 3778 3779 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 3780 tyH = mce->hWordTy; 3781 3782 if (tyH == Ity_I32) { 3783 switch (ty) { 3784 case Ity_I32: 3785 return vatom; 3786 case Ity_I16: 3787 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 3788 case Ity_I8: 3789 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 3790 default: 3791 goto unhandled; 3792 } 3793 } else 3794 if (tyH == Ity_I64) { 3795 switch (ty) { 3796 case Ity_I32: 3797 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 3798 case Ity_I16: 3799 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 3800 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 3801 case Ity_I8: 3802 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 3803 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 3804 default: 3805 goto unhandled; 3806 } 3807 } else { 3808 goto unhandled; 3809 } 3810 unhandled: 3811 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 3812 VG_(tool_panic)("zwidenToHostWord"); 3813 } 3814 3815 3816 /* Generate a shadow store. addr is always the original address atom. 3817 You can pass in either originals or V-bits for the data atom, but 3818 obviously not both. guard :: Ity_I1 controls whether the store 3819 really happens; NULL means it unconditionally does. Note that 3820 guard itself is not checked for definedness; the caller of this 3821 function must do that if necessary. */ 3822 3823 static 3824 void do_shadow_Store ( MCEnv* mce, 3825 IREndness end, 3826 IRAtom* addr, UInt bias, 3827 IRAtom* data, IRAtom* vdata, 3828 IRAtom* guard ) 3829 { 3830 IROp mkAdd; 3831 IRType ty, tyAddr; 3832 void* helper = NULL; 3833 Char* hname = NULL; 3834 IRConst* c; 3835 3836 tyAddr = mce->hWordTy; 3837 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 3838 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 3839 tl_assert( end == Iend_LE || end == Iend_BE ); 3840 3841 if (data) { 3842 tl_assert(!vdata); 3843 tl_assert(isOriginalAtom(mce, data)); 3844 tl_assert(bias == 0); 3845 vdata = expr2vbits( mce, data ); 3846 } else { 3847 tl_assert(vdata); 3848 } 3849 3850 tl_assert(isOriginalAtom(mce,addr)); 3851 tl_assert(isShadowAtom(mce,vdata)); 3852 3853 if (guard) { 3854 tl_assert(isOriginalAtom(mce, guard)); 3855 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 3856 } 3857 3858 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 3859 3860 // If we're not doing undefined value checking, pretend that this value 3861 // is "all valid". That lets Vex's optimiser remove some of the V bit 3862 // shadow computation ops that precede it. 3863 if (MC_(clo_mc_level) == 1) { 3864 switch (ty) { 3865 case Ity_V128: // V128 weirdness 3866 c = IRConst_V128(V_BITS16_DEFINED); break; 3867 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 3868 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 3869 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 3870 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 3871 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 3872 } 3873 vdata = IRExpr_Const( c ); 3874 } 3875 3876 /* First, emit a definedness test for the address. This also sets 3877 the address (shadow) to 'defined' following the test. */ 3878 complainIfUndefined( mce, addr ); 3879 3880 /* Now decide which helper function to call to write the data V 3881 bits into shadow memory. */ 3882 if (end == Iend_LE) { 3883 switch (ty) { 3884 case Ity_V128: /* we'll use the helper twice */ 3885 case Ity_I64: helper = &MC_(helperc_STOREV64le); 3886 hname = "MC_(helperc_STOREV64le)"; 3887 break; 3888 case Ity_I32: helper = &MC_(helperc_STOREV32le); 3889 hname = "MC_(helperc_STOREV32le)"; 3890 break; 3891 case Ity_I16: helper = &MC_(helperc_STOREV16le); 3892 hname = "MC_(helperc_STOREV16le)"; 3893 break; 3894 case Ity_I8: helper = &MC_(helperc_STOREV8); 3895 hname = "MC_(helperc_STOREV8)"; 3896 break; 3897 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 3898 } 3899 } else { 3900 switch (ty) { 3901 case Ity_V128: /* we'll use the helper twice */ 3902 case Ity_I64: helper = &MC_(helperc_STOREV64be); 3903 hname = "MC_(helperc_STOREV64be)"; 3904 break; 3905 case Ity_I32: helper = &MC_(helperc_STOREV32be); 3906 hname = "MC_(helperc_STOREV32be)"; 3907 break; 3908 case Ity_I16: helper = &MC_(helperc_STOREV16be); 3909 hname = "MC_(helperc_STOREV16be)"; 3910 break; 3911 case Ity_I8: helper = &MC_(helperc_STOREV8); 3912 hname = "MC_(helperc_STOREV8)"; 3913 break; 3914 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 3915 } 3916 } 3917 3918 if (ty == Ity_V128) { 3919 3920 /* V128-bit case */ 3921 /* See comment in next clause re 64-bit regparms */ 3922 /* also, need to be careful about endianness */ 3923 3924 Int offLo64, offHi64; 3925 IRDirty *diLo64, *diHi64; 3926 IRAtom *addrLo64, *addrHi64; 3927 IRAtom *vdataLo64, *vdataHi64; 3928 IRAtom *eBiasLo64, *eBiasHi64; 3929 3930 if (end == Iend_LE) { 3931 offLo64 = 0; 3932 offHi64 = 8; 3933 } else { 3934 offLo64 = 8; 3935 offHi64 = 0; 3936 } 3937 3938 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 3939 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 3940 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 3941 diLo64 = unsafeIRDirty_0_N( 3942 1/*regparms*/, 3943 hname, VG_(fnptr_to_fnentry)( helper ), 3944 mkIRExprVec_2( addrLo64, vdataLo64 ) 3945 ); 3946 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 3947 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 3948 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 3949 diHi64 = unsafeIRDirty_0_N( 3950 1/*regparms*/, 3951 hname, VG_(fnptr_to_fnentry)( helper ), 3952 mkIRExprVec_2( addrHi64, vdataHi64 ) 3953 ); 3954 if (guard) diLo64->guard = guard; 3955 if (guard) diHi64->guard = guard; 3956 setHelperAnns( mce, diLo64 ); 3957 setHelperAnns( mce, diHi64 ); 3958 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 3959 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 3960 3961 } else { 3962 3963 IRDirty *di; 3964 IRAtom *addrAct; 3965 3966 /* 8/16/32/64-bit cases */ 3967 /* Generate the actual address into addrAct. */ 3968 if (bias == 0) { 3969 addrAct = addr; 3970 } else { 3971 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 3972 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 3973 } 3974 3975 if (ty == Ity_I64) { 3976 /* We can't do this with regparm 2 on 32-bit platforms, since 3977 the back ends aren't clever enough to handle 64-bit 3978 regparm args. Therefore be different. */ 3979 di = unsafeIRDirty_0_N( 3980 1/*regparms*/, 3981 hname, VG_(fnptr_to_fnentry)( helper ), 3982 mkIRExprVec_2( addrAct, vdata ) 3983 ); 3984 } else { 3985 di = unsafeIRDirty_0_N( 3986 2/*regparms*/, 3987 hname, VG_(fnptr_to_fnentry)( helper ), 3988 mkIRExprVec_2( addrAct, 3989 zwidenToHostWord( mce, vdata )) 3990 ); 3991 } 3992 if (guard) di->guard = guard; 3993 setHelperAnns( mce, di ); 3994 stmt( 'V', mce, IRStmt_Dirty(di) ); 3995 } 3996 3997 } 3998 3999 4000 /* Do lazy pessimistic propagation through a dirty helper call, by 4001 looking at the annotations on it. This is the most complex part of 4002 Memcheck. */ 4003 4004 static IRType szToITy ( Int n ) 4005 { 4006 switch (n) { 4007 case 1: return Ity_I8; 4008 case 2: return Ity_I16; 4009 case 4: return Ity_I32; 4010 case 8: return Ity_I64; 4011 default: VG_(tool_panic)("szToITy(memcheck)"); 4012 } 4013 } 4014 4015 static 4016 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 4017 { 4018 Int i, n, toDo, gSz, gOff; 4019 IRAtom *src, *here, *curr; 4020 IRType tySrc, tyDst; 4021 IRTemp dst; 4022 IREndness end; 4023 4024 /* What's the native endianness? We need to know this. */ 4025 # if defined(VG_BIGENDIAN) 4026 end = Iend_BE; 4027 # elif defined(VG_LITTLEENDIAN) 4028 end = Iend_LE; 4029 # else 4030 # error "Unknown endianness" 4031 # endif 4032 4033 /* First check the guard. */ 4034 complainIfUndefined(mce, d->guard); 4035 4036 /* Now round up all inputs and PCast over them. */ 4037 curr = definedOfType(Ity_I32); 4038 4039 /* Inputs: unmasked args */ 4040 for (i = 0; d->args[i]; i++) { 4041 if (d->cee->mcx_mask & (1<<i)) { 4042 /* ignore this arg */ 4043 } else { 4044 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) ); 4045 curr = mkUifU32(mce, here, curr); 4046 } 4047 } 4048 4049 /* Inputs: guest state that we read. */ 4050 for (i = 0; i < d->nFxState; i++) { 4051 tl_assert(d->fxState[i].fx != Ifx_None); 4052 if (d->fxState[i].fx == Ifx_Write) 4053 continue; 4054 4055 /* Ignore any sections marked as 'always defined'. */ 4056 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) { 4057 if (0) 4058 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 4059 d->fxState[i].offset, d->fxState[i].size ); 4060 continue; 4061 } 4062 4063 /* This state element is read or modified. So we need to 4064 consider it. If larger than 8 bytes, deal with it in 8-byte 4065 chunks. */ 4066 gSz = d->fxState[i].size; 4067 gOff = d->fxState[i].offset; 4068 tl_assert(gSz > 0); 4069 while (True) { 4070 if (gSz == 0) break; 4071 n = gSz <= 8 ? gSz : 8; 4072 /* update 'curr' with UifU of the state slice 4073 gOff .. gOff+n-1 */ 4074 tySrc = szToITy( n ); 4075 src = assignNew( 'V', mce, tySrc, 4076 shadow_GET(mce, gOff, tySrc ) ); 4077 here = mkPCastTo( mce, Ity_I32, src ); 4078 curr = mkUifU32(mce, here, curr); 4079 gSz -= n; 4080 gOff += n; 4081 } 4082 4083 } 4084 4085 /* Inputs: memory. First set up some info needed regardless of 4086 whether we're doing reads or writes. */ 4087 4088 if (d->mFx != Ifx_None) { 4089 /* Because we may do multiple shadow loads/stores from the same 4090 base address, it's best to do a single test of its 4091 definedness right now. Post-instrumentation optimisation 4092 should remove all but this test. */ 4093 IRType tyAddr; 4094 tl_assert(d->mAddr); 4095 complainIfUndefined(mce, d->mAddr); 4096 4097 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 4098 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 4099 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 4100 } 4101 4102 /* Deal with memory inputs (reads or modifies) */ 4103 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 4104 toDo = d->mSize; 4105 /* chew off 32-bit chunks. We don't care about the endianness 4106 since it's all going to be condensed down to a single bit, 4107 but nevertheless choose an endianness which is hopefully 4108 native to the platform. */ 4109 while (toDo >= 4) { 4110 here = mkPCastTo( 4111 mce, Ity_I32, 4112 expr2vbits_Load ( mce, end, Ity_I32, 4113 d->mAddr, d->mSize - toDo ) 4114 ); 4115 curr = mkUifU32(mce, here, curr); 4116 toDo -= 4; 4117 } 4118 /* chew off 16-bit chunks */ 4119 while (toDo >= 2) { 4120 here = mkPCastTo( 4121 mce, Ity_I32, 4122 expr2vbits_Load ( mce, end, Ity_I16, 4123 d->mAddr, d->mSize - toDo ) 4124 ); 4125 curr = mkUifU32(mce, here, curr); 4126 toDo -= 2; 4127 } 4128 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 4129 } 4130 4131 /* Whew! So curr is a 32-bit V-value summarising pessimistically 4132 all the inputs to the helper. Now we need to re-distribute the 4133 results to all destinations. */ 4134 4135 /* Outputs: the destination temporary, if there is one. */ 4136 if (d->tmp != IRTemp_INVALID) { 4137 dst = findShadowTmpV(mce, d->tmp); 4138 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 4139 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 4140 } 4141 4142 /* Outputs: guest state that we write or modify. */ 4143 for (i = 0; i < d->nFxState; i++) { 4144 tl_assert(d->fxState[i].fx != Ifx_None); 4145 if (d->fxState[i].fx == Ifx_Read) 4146 continue; 4147 /* Ignore any sections marked as 'always defined'. */ 4148 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) 4149 continue; 4150 /* This state element is written or modified. So we need to 4151 consider it. If larger than 8 bytes, deal with it in 8-byte 4152 chunks. */ 4153 gSz = d->fxState[i].size; 4154 gOff = d->fxState[i].offset; 4155 tl_assert(gSz > 0); 4156 while (True) { 4157 if (gSz == 0) break; 4158 n = gSz <= 8 ? gSz : 8; 4159 /* Write suitably-casted 'curr' to the state slice 4160 gOff .. gOff+n-1 */ 4161 tyDst = szToITy( n ); 4162 do_shadow_PUT( mce, gOff, 4163 NULL, /* original atom */ 4164 mkPCastTo( mce, tyDst, curr ) ); 4165 gSz -= n; 4166 gOff += n; 4167 } 4168 } 4169 4170 /* Outputs: memory that we write or modify. Same comments about 4171 endianness as above apply. */ 4172 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 4173 toDo = d->mSize; 4174 /* chew off 32-bit chunks */ 4175 while (toDo >= 4) { 4176 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4177 NULL, /* original data */ 4178 mkPCastTo( mce, Ity_I32, curr ), 4179 NULL/*guard*/ ); 4180 toDo -= 4; 4181 } 4182 /* chew off 16-bit chunks */ 4183 while (toDo >= 2) { 4184 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4185 NULL, /* original data */ 4186 mkPCastTo( mce, Ity_I16, curr ), 4187 NULL/*guard*/ ); 4188 toDo -= 2; 4189 } 4190 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 4191 } 4192 4193 } 4194 4195 4196 /* We have an ABI hint telling us that [base .. base+len-1] is to 4197 become undefined ("writable"). Generate code to call a helper to 4198 notify the A/V bit machinery of this fact. 4199 4200 We call 4201 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 4202 Addr nia ); 4203 */ 4204 static 4205 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 4206 { 4207 IRDirty* di; 4208 /* Minor optimisation: if not doing origin tracking, ignore the 4209 supplied nia and pass zero instead. This is on the basis that 4210 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can 4211 almost always generate a shorter instruction to put zero into a 4212 register than any other value. */ 4213 if (MC_(clo_mc_level) < 3) 4214 nia = mkIRExpr_HWord(0); 4215 4216 di = unsafeIRDirty_0_N( 4217 0/*regparms*/, 4218 "MC_(helperc_MAKE_STACK_UNINIT)", 4219 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ), 4220 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 4221 ); 4222 stmt( 'V', mce, IRStmt_Dirty(di) ); 4223 } 4224 4225 4226 /* ------ Dealing with IRCAS (big and complex) ------ */ 4227 4228 /* FWDS */ 4229 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 4230 IRAtom* baseaddr, Int offset ); 4231 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 4232 static void gen_store_b ( MCEnv* mce, Int szB, 4233 IRAtom* baseaddr, Int offset, IRAtom* dataB, 4234 IRAtom* guard ); 4235 4236 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 4237 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 4238 4239 4240 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 4241 IRExpr.Consts, else this asserts. If they are both Consts, it 4242 doesn't do anything. So that just leaves the RdTmp case. 4243 4244 In which case: this assigns the shadow value SHADOW to the IR 4245 shadow temporary associated with ORIG. That is, ORIG, being an 4246 original temporary, will have a shadow temporary associated with 4247 it. However, in the case envisaged here, there will so far have 4248 been no IR emitted to actually write a shadow value into that 4249 temporary. What this routine does is to (emit IR to) copy the 4250 value in SHADOW into said temporary, so that after this call, 4251 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 4252 value in SHADOW. 4253 4254 Point is to allow callers to compute "by hand" a shadow value for 4255 ORIG, and force it to be associated with ORIG. 4256 4257 How do we know that that shadow associated with ORIG has not so far 4258 been assigned to? Well, we don't per se know that, but supposing 4259 it had. Then this routine would create a second assignment to it, 4260 and later the IR sanity checker would barf. But that never 4261 happens. QED. 4262 */ 4263 static void bind_shadow_tmp_to_orig ( UChar how, 4264 MCEnv* mce, 4265 IRAtom* orig, IRAtom* shadow ) 4266 { 4267 tl_assert(isOriginalAtom(mce, orig)); 4268 tl_assert(isShadowAtom(mce, shadow)); 4269 switch (orig->tag) { 4270 case Iex_Const: 4271 tl_assert(shadow->tag == Iex_Const); 4272 break; 4273 case Iex_RdTmp: 4274 tl_assert(shadow->tag == Iex_RdTmp); 4275 if (how == 'V') { 4276 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 4277 shadow); 4278 } else { 4279 tl_assert(how == 'B'); 4280 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 4281 shadow); 4282 } 4283 break; 4284 default: 4285 tl_assert(0); 4286 } 4287 } 4288 4289 4290 static 4291 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 4292 { 4293 /* Scheme is (both single- and double- cases): 4294 4295 1. fetch data#,dataB (the proposed new value) 4296 4297 2. fetch expd#,expdB (what we expect to see at the address) 4298 4299 3. check definedness of address 4300 4301 4. load old#,oldB from shadow memory; this also checks 4302 addressibility of the address 4303 4304 5. the CAS itself 4305 4306 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 4307 4308 7. if "expected == old" (as computed by (6)) 4309 store data#,dataB to shadow memory 4310 4311 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 4312 'data' but 7 stores 'data#'. Hence it is possible for the 4313 shadow data to be incorrectly checked and/or updated: 4314 4315 * 7 is at least gated correctly, since the 'expected == old' 4316 condition is derived from outputs of 5. However, the shadow 4317 write could happen too late: imagine after 5 we are 4318 descheduled, a different thread runs, writes a different 4319 (shadow) value at the address, and then we resume, hence 4320 overwriting the shadow value written by the other thread. 4321 4322 Because the original memory access is atomic, there's no way to 4323 make both the original and shadow accesses into a single atomic 4324 thing, hence this is unavoidable. 4325 4326 At least as Valgrind stands, I don't think it's a problem, since 4327 we're single threaded *and* we guarantee that there are no 4328 context switches during the execution of any specific superblock 4329 -- context switches can only happen at superblock boundaries. 4330 4331 If Valgrind ever becomes MT in the future, then it might be more 4332 of a problem. A possible kludge would be to artificially 4333 associate with the location, a lock, which we must acquire and 4334 release around the transaction as a whole. Hmm, that probably 4335 would't work properly since it only guards us against other 4336 threads doing CASs on the same location, not against other 4337 threads doing normal reads and writes. 4338 4339 ------------------------------------------------------------ 4340 4341 COMMENT_ON_CasCmpEQ: 4342 4343 Note two things. Firstly, in the sequence above, we compute 4344 "expected == old", but we don't check definedness of it. Why 4345 not? Also, the x86 and amd64 front ends use 4346 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent 4347 determination (expected == old ?) for themselves, and we also 4348 don't check definedness for those primops; we just say that the 4349 result is defined. Why? Details follow. 4350 4351 x86/amd64 contains various forms of locked insns: 4352 * lock prefix before all basic arithmetic insn; 4353 eg lock xorl %reg1,(%reg2) 4354 * atomic exchange reg-mem 4355 * compare-and-swaps 4356 4357 Rather than attempt to represent them all, which would be a 4358 royal PITA, I used a result from Maurice Herlihy 4359 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 4360 demonstrates that compare-and-swap is a primitive more general 4361 than the other two, and so can be used to represent all of them. 4362 So the translation scheme for (eg) lock incl (%reg) is as 4363 follows: 4364 4365 again: 4366 old = * %reg 4367 new = old + 1 4368 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 4369 4370 The "atomically" is the CAS bit. The scheme is always the same: 4371 get old value from memory, compute new value, atomically stuff 4372 new value back in memory iff the old value has not changed (iow, 4373 no other thread modified it in the meantime). If it has changed 4374 then we've been out-raced and we have to start over. 4375 4376 Now that's all very neat, but it has the bad side effect of 4377 introducing an explicit equality test into the translation. 4378 Consider the behaviour of said code on a memory location which 4379 is uninitialised. We will wind up doing a comparison on 4380 uninitialised data, and mc duly complains. 4381 4382 What's difficult about this is, the common case is that the 4383 location is uncontended, and so we're usually comparing the same 4384 value (* %reg) with itself. So we shouldn't complain even if it 4385 is undefined. But mc doesn't know that. 4386 4387 My solution is to mark the == in the IR specially, so as to tell 4388 mc that it almost certainly compares a value with itself, and we 4389 should just regard the result as always defined. Rather than 4390 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 4391 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 4392 4393 So there's always the question of, can this give a false 4394 negative? eg, imagine that initially, * %reg is defined; and we 4395 read that; but then in the gap between the read and the CAS, a 4396 different thread writes an undefined (and different) value at 4397 the location. Then the CAS in this thread will fail and we will 4398 go back to "again:", but without knowing that the trip back 4399 there was based on an undefined comparison. No matter; at least 4400 the other thread won the race and the location is correctly 4401 marked as undefined. What if it wrote an uninitialised version 4402 of the same value that was there originally, though? 4403 4404 etc etc. Seems like there's a small corner case in which we 4405 might lose the fact that something's defined -- we're out-raced 4406 in between the "old = * reg" and the "atomically {", _and_ the 4407 other thread is writing in an undefined version of what's 4408 already there. Well, that seems pretty unlikely. 4409 4410 --- 4411 4412 If we ever need to reinstate it .. code which generates a 4413 definedness test for "expected == old" was removed at r10432 of 4414 this file. 4415 */ 4416 if (cas->oldHi == IRTemp_INVALID) { 4417 do_shadow_CAS_single( mce, cas ); 4418 } else { 4419 do_shadow_CAS_double( mce, cas ); 4420 } 4421 } 4422 4423 4424 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 4425 { 4426 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4427 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4428 IRAtom *voldLo = NULL, *boldLo = NULL; 4429 IRAtom *expd_eq_old = NULL; 4430 IROp opCasCmpEQ; 4431 Int elemSzB; 4432 IRType elemTy; 4433 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4434 4435 /* single CAS */ 4436 tl_assert(cas->oldHi == IRTemp_INVALID); 4437 tl_assert(cas->expdHi == NULL); 4438 tl_assert(cas->dataHi == NULL); 4439 4440 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4441 switch (elemTy) { 4442 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 4443 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 4444 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 4445 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 4446 default: tl_assert(0); /* IR defn disallows any other types */ 4447 } 4448 4449 /* 1. fetch data# (the proposed new value) */ 4450 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4451 vdataLo 4452 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4453 tl_assert(isShadowAtom(mce, vdataLo)); 4454 if (otrak) { 4455 bdataLo 4456 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4457 tl_assert(isShadowAtom(mce, bdataLo)); 4458 } 4459 4460 /* 2. fetch expected# (what we expect to see at the address) */ 4461 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4462 vexpdLo 4463 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 4464 tl_assert(isShadowAtom(mce, vexpdLo)); 4465 if (otrak) { 4466 bexpdLo 4467 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 4468 tl_assert(isShadowAtom(mce, bexpdLo)); 4469 } 4470 4471 /* 3. check definedness of address */ 4472 /* 4. fetch old# from shadow memory; this also checks 4473 addressibility of the address */ 4474 voldLo 4475 = assignNew( 4476 'V', mce, elemTy, 4477 expr2vbits_Load( 4478 mce, 4479 cas->end, elemTy, cas->addr, 0/*Addr bias*/ 4480 )); 4481 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 4482 if (otrak) { 4483 boldLo 4484 = assignNew('B', mce, Ity_I32, 4485 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 4486 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 4487 } 4488 4489 /* 5. the CAS itself */ 4490 stmt( 'C', mce, IRStmt_CAS(cas) ); 4491 4492 /* 6. compute "expected == old" */ 4493 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 4494 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 4495 tree, but it's not copied from the input block. */ 4496 expd_eq_old 4497 = assignNew('C', mce, Ity_I1, 4498 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 4499 4500 /* 7. if "expected == old" 4501 store data# to shadow memory */ 4502 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 4503 NULL/*data*/, vdataLo/*vdata*/, 4504 expd_eq_old/*guard for store*/ ); 4505 if (otrak) { 4506 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 4507 bdataLo/*bdata*/, 4508 expd_eq_old/*guard for store*/ ); 4509 } 4510 } 4511 4512 4513 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 4514 { 4515 IRAtom *vdataHi = NULL, *bdataHi = NULL; 4516 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4517 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 4518 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4519 IRAtom *voldHi = NULL, *boldHi = NULL; 4520 IRAtom *voldLo = NULL, *boldLo = NULL; 4521 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 4522 IRAtom *expd_eq_old = NULL, *zero = NULL; 4523 IROp opCasCmpEQ, opOr, opXor; 4524 Int elemSzB, memOffsLo, memOffsHi; 4525 IRType elemTy; 4526 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4527 4528 /* double CAS */ 4529 tl_assert(cas->oldHi != IRTemp_INVALID); 4530 tl_assert(cas->expdHi != NULL); 4531 tl_assert(cas->dataHi != NULL); 4532 4533 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4534 switch (elemTy) { 4535 case Ity_I8: 4536 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 4537 elemSzB = 1; zero = mkU8(0); 4538 break; 4539 case Ity_I16: 4540 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 4541 elemSzB = 2; zero = mkU16(0); 4542 break; 4543 case Ity_I32: 4544 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 4545 elemSzB = 4; zero = mkU32(0); 4546 break; 4547 case Ity_I64: 4548 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 4549 elemSzB = 8; zero = mkU64(0); 4550 break; 4551 default: 4552 tl_assert(0); /* IR defn disallows any other types */ 4553 } 4554 4555 /* 1. fetch data# (the proposed new value) */ 4556 tl_assert(isOriginalAtom(mce, cas->dataHi)); 4557 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4558 vdataHi 4559 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 4560 vdataLo 4561 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4562 tl_assert(isShadowAtom(mce, vdataHi)); 4563 tl_assert(isShadowAtom(mce, vdataLo)); 4564 if (otrak) { 4565 bdataHi 4566 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 4567 bdataLo 4568 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4569 tl_assert(isShadowAtom(mce, bdataHi)); 4570 tl_assert(isShadowAtom(mce, bdataLo)); 4571 } 4572 4573 /* 2. fetch expected# (what we expect to see at the address) */ 4574 tl_assert(isOriginalAtom(mce, cas->expdHi)); 4575 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4576 vexpdHi 4577 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 4578 vexpdLo 4579 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 4580 tl_assert(isShadowAtom(mce, vexpdHi)); 4581 tl_assert(isShadowAtom(mce, vexpdLo)); 4582 if (otrak) { 4583 bexpdHi 4584 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 4585 bexpdLo 4586 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 4587 tl_assert(isShadowAtom(mce, bexpdHi)); 4588 tl_assert(isShadowAtom(mce, bexpdLo)); 4589 } 4590 4591 /* 3. check definedness of address */ 4592 /* 4. fetch old# from shadow memory; this also checks 4593 addressibility of the address */ 4594 if (cas->end == Iend_LE) { 4595 memOffsLo = 0; 4596 memOffsHi = elemSzB; 4597 } else { 4598 tl_assert(cas->end == Iend_BE); 4599 memOffsLo = elemSzB; 4600 memOffsHi = 0; 4601 } 4602 voldHi 4603 = assignNew( 4604 'V', mce, elemTy, 4605 expr2vbits_Load( 4606 mce, 4607 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/ 4608 )); 4609 voldLo 4610 = assignNew( 4611 'V', mce, elemTy, 4612 expr2vbits_Load( 4613 mce, 4614 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/ 4615 )); 4616 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 4617 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 4618 if (otrak) { 4619 boldHi 4620 = assignNew('B', mce, Ity_I32, 4621 gen_load_b(mce, elemSzB, cas->addr, 4622 memOffsHi/*addr bias*/)); 4623 boldLo 4624 = assignNew('B', mce, Ity_I32, 4625 gen_load_b(mce, elemSzB, cas->addr, 4626 memOffsLo/*addr bias*/)); 4627 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 4628 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 4629 } 4630 4631 /* 5. the CAS itself */ 4632 stmt( 'C', mce, IRStmt_CAS(cas) ); 4633 4634 /* 6. compute "expected == old" */ 4635 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 4636 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 4637 tree, but it's not copied from the input block. */ 4638 /* 4639 xHi = oldHi ^ expdHi; 4640 xLo = oldLo ^ expdLo; 4641 xHL = xHi | xLo; 4642 expd_eq_old = xHL == 0; 4643 */ 4644 xHi = assignNew('C', mce, elemTy, 4645 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 4646 xLo = assignNew('C', mce, elemTy, 4647 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 4648 xHL = assignNew('C', mce, elemTy, 4649 binop(opOr, xHi, xLo)); 4650 expd_eq_old 4651 = assignNew('C', mce, Ity_I1, 4652 binop(opCasCmpEQ, xHL, zero)); 4653 4654 /* 7. if "expected == old" 4655 store data# to shadow memory */ 4656 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 4657 NULL/*data*/, vdataHi/*vdata*/, 4658 expd_eq_old/*guard for store*/ ); 4659 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 4660 NULL/*data*/, vdataLo/*vdata*/, 4661 expd_eq_old/*guard for store*/ ); 4662 if (otrak) { 4663 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 4664 bdataHi/*bdata*/, 4665 expd_eq_old/*guard for store*/ ); 4666 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 4667 bdataLo/*bdata*/, 4668 expd_eq_old/*guard for store*/ ); 4669 } 4670 } 4671 4672 4673 /* ------ Dealing with LL/SC (not difficult) ------ */ 4674 4675 static void do_shadow_LLSC ( MCEnv* mce, 4676 IREndness stEnd, 4677 IRTemp stResult, 4678 IRExpr* stAddr, 4679 IRExpr* stStoredata ) 4680 { 4681 /* In short: treat a load-linked like a normal load followed by an 4682 assignment of the loaded (shadow) data to the result temporary. 4683 Treat a store-conditional like a normal store, and mark the 4684 result temporary as defined. */ 4685 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 4686 IRTemp resTmp = findShadowTmpV(mce, stResult); 4687 4688 tl_assert(isIRAtom(stAddr)); 4689 if (stStoredata) 4690 tl_assert(isIRAtom(stStoredata)); 4691 4692 if (stStoredata == NULL) { 4693 /* Load Linked */ 4694 /* Just treat this as a normal load, followed by an assignment of 4695 the value to .result. */ 4696 /* Stay sane */ 4697 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 4698 || resTy == Ity_I16 || resTy == Ity_I8); 4699 assign( 'V', mce, resTmp, 4700 expr2vbits_Load( 4701 mce, stEnd, resTy, stAddr, 0/*addr bias*/)); 4702 } else { 4703 /* Store Conditional */ 4704 /* Stay sane */ 4705 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 4706 stStoredata); 4707 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 4708 || dataTy == Ity_I16 || dataTy == Ity_I8); 4709 do_shadow_Store( mce, stEnd, 4710 stAddr, 0/* addr bias */, 4711 stStoredata, 4712 NULL /* shadow data */, 4713 NULL/*guard*/ ); 4714 /* This is a store conditional, so it writes to .result a value 4715 indicating whether or not the store succeeded. Just claim 4716 this value is always defined. In the PowerPC interpretation 4717 of store-conditional, definedness of the success indication 4718 depends on whether the address of the store matches the 4719 reservation address. But we can't tell that here (and 4720 anyway, we're not being PowerPC-specific). At least we are 4721 guaranteed that the definedness of the store address, and its 4722 addressibility, will be checked as per normal. So it seems 4723 pretty safe to just say that the success indication is always 4724 defined. 4725 4726 In schemeS, for origin tracking, we must correspondingly set 4727 a no-origin value for the origin shadow of .result. 4728 */ 4729 tl_assert(resTy == Ity_I1); 4730 assign( 'V', mce, resTmp, definedOfType(resTy) ); 4731 } 4732 } 4733 4734 4735 /*------------------------------------------------------------*/ 4736 /*--- Memcheck main ---*/ 4737 /*------------------------------------------------------------*/ 4738 4739 static void schemeS ( MCEnv* mce, IRStmt* st ); 4740 4741 static Bool isBogusAtom ( IRAtom* at ) 4742 { 4743 ULong n = 0; 4744 IRConst* con; 4745 tl_assert(isIRAtom(at)); 4746 if (at->tag == Iex_RdTmp) 4747 return False; 4748 tl_assert(at->tag == Iex_Const); 4749 con = at->Iex.Const.con; 4750 switch (con->tag) { 4751 case Ico_U1: return False; 4752 case Ico_U8: n = (ULong)con->Ico.U8; break; 4753 case Ico_U16: n = (ULong)con->Ico.U16; break; 4754 case Ico_U32: n = (ULong)con->Ico.U32; break; 4755 case Ico_U64: n = (ULong)con->Ico.U64; break; 4756 case Ico_F64: return False; 4757 case Ico_F32i: return False; 4758 case Ico_F64i: return False; 4759 case Ico_V128: return False; 4760 default: ppIRExpr(at); tl_assert(0); 4761 } 4762 /* VG_(printf)("%llx\n", n); */ 4763 return (/*32*/ n == 0xFEFEFEFFULL 4764 /*32*/ || n == 0x80808080ULL 4765 /*32*/ || n == 0x7F7F7F7FULL 4766 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 4767 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 4768 /*64*/ || n == 0x0000000000008080ULL 4769 /*64*/ || n == 0x8080808080808080ULL 4770 /*64*/ || n == 0x0101010101010101ULL 4771 ); 4772 } 4773 4774 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 4775 { 4776 Int i; 4777 IRExpr* e; 4778 IRDirty* d; 4779 IRCAS* cas; 4780 switch (st->tag) { 4781 case Ist_WrTmp: 4782 e = st->Ist.WrTmp.data; 4783 switch (e->tag) { 4784 case Iex_Get: 4785 case Iex_RdTmp: 4786 return False; 4787 case Iex_Const: 4788 return isBogusAtom(e); 4789 case Iex_Unop: 4790 return isBogusAtom(e->Iex.Unop.arg); 4791 case Iex_GetI: 4792 return isBogusAtom(e->Iex.GetI.ix); 4793 case Iex_Binop: 4794 return isBogusAtom(e->Iex.Binop.arg1) 4795 || isBogusAtom(e->Iex.Binop.arg2); 4796 case Iex_Triop: 4797 return isBogusAtom(e->Iex.Triop.arg1) 4798 || isBogusAtom(e->Iex.Triop.arg2) 4799 || isBogusAtom(e->Iex.Triop.arg3); 4800 case Iex_Qop: 4801 return isBogusAtom(e->Iex.Qop.arg1) 4802 || isBogusAtom(e->Iex.Qop.arg2) 4803 || isBogusAtom(e->Iex.Qop.arg3) 4804 || isBogusAtom(e->Iex.Qop.arg4); 4805 case Iex_Mux0X: 4806 return isBogusAtom(e->Iex.Mux0X.cond) 4807 || isBogusAtom(e->Iex.Mux0X.expr0) 4808 || isBogusAtom(e->Iex.Mux0X.exprX); 4809 case Iex_Load: 4810 return isBogusAtom(e->Iex.Load.addr); 4811 case Iex_CCall: 4812 for (i = 0; e->Iex.CCall.args[i]; i++) 4813 if (isBogusAtom(e->Iex.CCall.args[i])) 4814 return True; 4815 return False; 4816 default: 4817 goto unhandled; 4818 } 4819 case Ist_Dirty: 4820 d = st->Ist.Dirty.details; 4821 for (i = 0; d->args[i]; i++) 4822 if (isBogusAtom(d->args[i])) 4823 return True; 4824 if (d->guard && isBogusAtom(d->guard)) 4825 return True; 4826 if (d->mAddr && isBogusAtom(d->mAddr)) 4827 return True; 4828 return False; 4829 case Ist_Put: 4830 return isBogusAtom(st->Ist.Put.data); 4831 case Ist_PutI: 4832 return isBogusAtom(st->Ist.PutI.ix) 4833 || isBogusAtom(st->Ist.PutI.data); 4834 case Ist_Store: 4835 return isBogusAtom(st->Ist.Store.addr) 4836 || isBogusAtom(st->Ist.Store.data); 4837 case Ist_Exit: 4838 return isBogusAtom(st->Ist.Exit.guard); 4839 case Ist_AbiHint: 4840 return isBogusAtom(st->Ist.AbiHint.base) 4841 || isBogusAtom(st->Ist.AbiHint.nia); 4842 case Ist_NoOp: 4843 case Ist_IMark: 4844 case Ist_MBE: 4845 return False; 4846 case Ist_CAS: 4847 cas = st->Ist.CAS.details; 4848 return isBogusAtom(cas->addr) 4849 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 4850 || isBogusAtom(cas->expdLo) 4851 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 4852 || isBogusAtom(cas->dataLo); 4853 case Ist_LLSC: 4854 return isBogusAtom(st->Ist.LLSC.addr) 4855 || (st->Ist.LLSC.storedata 4856 ? isBogusAtom(st->Ist.LLSC.storedata) 4857 : False); 4858 default: 4859 unhandled: 4860 ppIRStmt(st); 4861 VG_(tool_panic)("hasBogusLiterals"); 4862 } 4863 } 4864 4865 4866 IRSB* MC_(instrument) ( VgCallbackClosure* closure, 4867 IRSB* sb_in, 4868 VexGuestLayout* layout, 4869 VexGuestExtents* vge, 4870 IRType gWordTy, IRType hWordTy ) 4871 { 4872 Bool verboze = 0||False; 4873 Bool bogus; 4874 Int i, j, first_stmt; 4875 IRStmt* st; 4876 MCEnv mce; 4877 IRSB* sb_out; 4878 4879 if (gWordTy != hWordTy) { 4880 /* We don't currently support this case. */ 4881 VG_(tool_panic)("host/guest word size mismatch"); 4882 } 4883 4884 /* Check we're not completely nuts */ 4885 tl_assert(sizeof(UWord) == sizeof(void*)); 4886 tl_assert(sizeof(Word) == sizeof(void*)); 4887 tl_assert(sizeof(Addr) == sizeof(void*)); 4888 tl_assert(sizeof(ULong) == 8); 4889 tl_assert(sizeof(Long) == 8); 4890 tl_assert(sizeof(Addr64) == 8); 4891 tl_assert(sizeof(UInt) == 4); 4892 tl_assert(sizeof(Int) == 4); 4893 4894 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 4895 4896 /* Set up SB */ 4897 sb_out = deepCopyIRSBExceptStmts(sb_in); 4898 4899 /* Set up the running environment. Both .sb and .tmpMap are 4900 modified as we go along. Note that tmps are added to both 4901 .sb->tyenv and .tmpMap together, so the valid index-set for 4902 those two arrays should always be identical. */ 4903 VG_(memset)(&mce, 0, sizeof(mce)); 4904 mce.sb = sb_out; 4905 mce.trace = verboze; 4906 mce.layout = layout; 4907 mce.hWordTy = hWordTy; 4908 mce.bogusLiterals = False; 4909 4910 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 4911 sizeof(TempMapEnt)); 4912 for (i = 0; i < sb_in->tyenv->types_used; i++) { 4913 TempMapEnt ent; 4914 ent.kind = Orig; 4915 ent.shadowV = IRTemp_INVALID; 4916 ent.shadowB = IRTemp_INVALID; 4917 VG_(addToXA)( mce.tmpMap, &ent ); 4918 } 4919 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 4920 4921 /* Make a preliminary inspection of the statements, to see if there 4922 are any dodgy-looking literals. If there are, we generate 4923 extra-detailed (hence extra-expensive) instrumentation in 4924 places. Scan the whole bb even if dodgyness is found earlier, 4925 so that the flatness assertion is applied to all stmts. */ 4926 4927 bogus = False; 4928 4929 for (i = 0; i < sb_in->stmts_used; i++) { 4930 4931 st = sb_in->stmts[i]; 4932 tl_assert(st); 4933 tl_assert(isFlatIRStmt(st)); 4934 4935 if (!bogus) { 4936 bogus = checkForBogusLiterals(st); 4937 if (0 && bogus) { 4938 VG_(printf)("bogus: "); 4939 ppIRStmt(st); 4940 VG_(printf)("\n"); 4941 } 4942 } 4943 4944 } 4945 4946 mce.bogusLiterals = bogus; 4947 4948 /* Copy verbatim any IR preamble preceding the first IMark */ 4949 4950 tl_assert(mce.sb == sb_out); 4951 tl_assert(mce.sb != sb_in); 4952 4953 i = 0; 4954 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 4955 4956 st = sb_in->stmts[i]; 4957 tl_assert(st); 4958 tl_assert(isFlatIRStmt(st)); 4959 4960 stmt( 'C', &mce, sb_in->stmts[i] ); 4961 i++; 4962 } 4963 4964 /* Nasty problem. IR optimisation of the pre-instrumented IR may 4965 cause the IR following the preamble to contain references to IR 4966 temporaries defined in the preamble. Because the preamble isn't 4967 instrumented, these temporaries don't have any shadows. 4968 Nevertheless uses of them following the preamble will cause 4969 memcheck to generate references to their shadows. End effect is 4970 to cause IR sanity check failures, due to references to 4971 non-existent shadows. This is only evident for the complex 4972 preambles used for function wrapping on TOC-afflicted platforms 4973 (ppc64-linux). 4974 4975 The following loop therefore scans the preamble looking for 4976 assignments to temporaries. For each one found it creates an 4977 assignment to the corresponding (V) shadow temp, marking it as 4978 'defined'. This is the same resulting IR as if the main 4979 instrumentation loop before had been applied to the statement 4980 'tmp = CONSTANT'. 4981 4982 Similarly, if origin tracking is enabled, we must generate an 4983 assignment for the corresponding origin (B) shadow, claiming 4984 no-origin, as appropriate for a defined value. 4985 */ 4986 for (j = 0; j < i; j++) { 4987 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 4988 /* findShadowTmpV checks its arg is an original tmp; 4989 no need to assert that here. */ 4990 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 4991 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 4992 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 4993 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 4994 if (MC_(clo_mc_level) == 3) { 4995 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 4996 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 4997 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 4998 } 4999 if (0) { 5000 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 5001 ppIRType( ty_v ); 5002 VG_(printf)("\n"); 5003 } 5004 } 5005 } 5006 5007 /* Iterate over the remaining stmts to generate instrumentation. */ 5008 5009 tl_assert(sb_in->stmts_used > 0); 5010 tl_assert(i >= 0); 5011 tl_assert(i < sb_in->stmts_used); 5012 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 5013 5014 for (/* use current i*/; i < sb_in->stmts_used; i++) { 5015 5016 st = sb_in->stmts[i]; 5017 first_stmt = sb_out->stmts_used; 5018 5019 if (verboze) { 5020 VG_(printf)("\n"); 5021 ppIRStmt(st); 5022 VG_(printf)("\n"); 5023 } 5024 5025 if (MC_(clo_mc_level) == 3) { 5026 /* See comments on case Ist_CAS below. */ 5027 if (st->tag != Ist_CAS) 5028 schemeS( &mce, st ); 5029 } 5030 5031 /* Generate instrumentation code for each stmt ... */ 5032 5033 switch (st->tag) { 5034 5035 case Ist_WrTmp: 5036 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 5037 expr2vbits( &mce, st->Ist.WrTmp.data) ); 5038 break; 5039 5040 case Ist_Put: 5041 do_shadow_PUT( &mce, 5042 st->Ist.Put.offset, 5043 st->Ist.Put.data, 5044 NULL /* shadow atom */ ); 5045 break; 5046 5047 case Ist_PutI: 5048 do_shadow_PUTI( &mce, 5049 st->Ist.PutI.descr, 5050 st->Ist.PutI.ix, 5051 st->Ist.PutI.bias, 5052 st->Ist.PutI.data ); 5053 break; 5054 5055 case Ist_Store: 5056 do_shadow_Store( &mce, st->Ist.Store.end, 5057 st->Ist.Store.addr, 0/* addr bias */, 5058 st->Ist.Store.data, 5059 NULL /* shadow data */, 5060 NULL/*guard*/ ); 5061 break; 5062 5063 case Ist_Exit: 5064 complainIfUndefined( &mce, st->Ist.Exit.guard ); 5065 break; 5066 5067 case Ist_IMark: 5068 break; 5069 5070 case Ist_NoOp: 5071 case Ist_MBE: 5072 break; 5073 5074 case Ist_Dirty: 5075 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 5076 break; 5077 5078 case Ist_AbiHint: 5079 do_AbiHint( &mce, st->Ist.AbiHint.base, 5080 st->Ist.AbiHint.len, 5081 st->Ist.AbiHint.nia ); 5082 break; 5083 5084 case Ist_CAS: 5085 do_shadow_CAS( &mce, st->Ist.CAS.details ); 5086 /* Note, do_shadow_CAS copies the CAS itself to the output 5087 block, because it needs to add instrumentation both 5088 before and after it. Hence skip the copy below. Also 5089 skip the origin-tracking stuff (call to schemeS) above, 5090 since that's all tangled up with it too; do_shadow_CAS 5091 does it all. */ 5092 break; 5093 5094 case Ist_LLSC: 5095 do_shadow_LLSC( &mce, 5096 st->Ist.LLSC.end, 5097 st->Ist.LLSC.result, 5098 st->Ist.LLSC.addr, 5099 st->Ist.LLSC.storedata ); 5100 break; 5101 5102 default: 5103 VG_(printf)("\n"); 5104 ppIRStmt(st); 5105 VG_(printf)("\n"); 5106 VG_(tool_panic)("memcheck: unhandled IRStmt"); 5107 5108 } /* switch (st->tag) */ 5109 5110 if (0 && verboze) { 5111 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5112 VG_(printf)(" "); 5113 ppIRStmt(sb_out->stmts[j]); 5114 VG_(printf)("\n"); 5115 } 5116 VG_(printf)("\n"); 5117 } 5118 5119 /* ... and finally copy the stmt itself to the output. Except, 5120 skip the copy of IRCASs; see comments on case Ist_CAS 5121 above. */ 5122 if (st->tag != Ist_CAS) 5123 stmt('C', &mce, st); 5124 } 5125 5126 /* Now we need to complain if the jump target is undefined. */ 5127 first_stmt = sb_out->stmts_used; 5128 5129 if (verboze) { 5130 VG_(printf)("sb_in->next = "); 5131 ppIRExpr(sb_in->next); 5132 VG_(printf)("\n\n"); 5133 } 5134 5135 complainIfUndefined( &mce, sb_in->next ); 5136 5137 if (0 && verboze) { 5138 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5139 VG_(printf)(" "); 5140 ppIRStmt(sb_out->stmts[j]); 5141 VG_(printf)("\n"); 5142 } 5143 VG_(printf)("\n"); 5144 } 5145 5146 /* If this fails, there's been some serious snafu with tmp management, 5147 that should be investigated. */ 5148 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 5149 VG_(deleteXA)( mce.tmpMap ); 5150 5151 tl_assert(mce.sb == sb_out); 5152 return sb_out; 5153 } 5154 5155 /*------------------------------------------------------------*/ 5156 /*--- Post-tree-build final tidying ---*/ 5157 /*------------------------------------------------------------*/ 5158 5159 /* This exploits the observation that Memcheck often produces 5160 repeated conditional calls of the form 5161 5162 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 5163 5164 with the same guard expression G guarding the same helper call. 5165 The second and subsequent calls are redundant. This usually 5166 results from instrumentation of guest code containing multiple 5167 memory references at different constant offsets from the same base 5168 register. After optimisation of the instrumentation, you get a 5169 test for the definedness of the base register for each memory 5170 reference, which is kinda pointless. MC_(final_tidy) therefore 5171 looks for such repeated calls and removes all but the first. */ 5172 5173 /* A struct for recording which (helper, guard) pairs we have already 5174 seen. */ 5175 typedef 5176 struct { void* entry; IRExpr* guard; } 5177 Pair; 5178 5179 /* Return True if e1 and e2 definitely denote the same value (used to 5180 compare guards). Return False if unknown; False is the safe 5181 answer. Since guest registers and guest memory do not have the 5182 SSA property we must return False if any Gets or Loads appear in 5183 the expression. */ 5184 5185 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 5186 { 5187 if (e1->tag != e2->tag) 5188 return False; 5189 switch (e1->tag) { 5190 case Iex_Const: 5191 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 5192 case Iex_Binop: 5193 return e1->Iex.Binop.op == e2->Iex.Binop.op 5194 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 5195 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 5196 case Iex_Unop: 5197 return e1->Iex.Unop.op == e2->Iex.Unop.op 5198 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 5199 case Iex_RdTmp: 5200 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 5201 case Iex_Mux0X: 5202 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond ) 5203 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 ) 5204 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX ); 5205 case Iex_Qop: 5206 case Iex_Triop: 5207 case Iex_CCall: 5208 /* be lazy. Could define equality for these, but they never 5209 appear to be used. */ 5210 return False; 5211 case Iex_Get: 5212 case Iex_GetI: 5213 case Iex_Load: 5214 /* be conservative - these may not give the same value each 5215 time */ 5216 return False; 5217 case Iex_Binder: 5218 /* should never see this */ 5219 /* fallthrough */ 5220 default: 5221 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 5222 ppIRExpr(e1); 5223 VG_(tool_panic)("memcheck:sameIRValue"); 5224 return False; 5225 } 5226 } 5227 5228 /* See if 'pairs' already has an entry for (entry, guard). Return 5229 True if so. If not, add an entry. */ 5230 5231 static 5232 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) 5233 { 5234 Pair p; 5235 Pair* pp; 5236 Int i, n = VG_(sizeXA)( pairs ); 5237 for (i = 0; i < n; i++) { 5238 pp = VG_(indexXA)( pairs, i ); 5239 if (pp->entry == entry && sameIRValue(pp->guard, guard)) 5240 return True; 5241 } 5242 p.guard = guard; 5243 p.entry = entry; 5244 VG_(addToXA)( pairs, &p ); 5245 return False; 5246 } 5247 5248 static Bool is_helperc_value_checkN_fail ( HChar* name ) 5249 { 5250 return 5251 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") 5252 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") 5253 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") 5254 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") 5255 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") 5256 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") 5257 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") 5258 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); 5259 } 5260 5261 IRSB* MC_(final_tidy) ( IRSB* sb_in ) 5262 { 5263 Int i; 5264 IRStmt* st; 5265 IRDirty* di; 5266 IRExpr* guard; 5267 IRCallee* cee; 5268 Bool alreadyPresent; 5269 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", 5270 VG_(free), sizeof(Pair) ); 5271 /* Scan forwards through the statements. Each time a call to one 5272 of the relevant helpers is seen, check if we have made a 5273 previous call to the same helper using the same guard 5274 expression, and if so, delete the call. */ 5275 for (i = 0; i < sb_in->stmts_used; i++) { 5276 st = sb_in->stmts[i]; 5277 tl_assert(st); 5278 if (st->tag != Ist_Dirty) 5279 continue; 5280 di = st->Ist.Dirty.details; 5281 guard = di->guard; 5282 if (!guard) 5283 continue; 5284 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 5285 cee = di->cee; 5286 if (!is_helperc_value_checkN_fail( cee->name )) 5287 continue; 5288 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 5289 guard 'guard'. Check if we have already seen a call to this 5290 function with the same guard. If so, delete it. If not, 5291 add it to the set of calls we do know about. */ 5292 alreadyPresent = check_or_add( pairs, guard, cee->addr ); 5293 if (alreadyPresent) { 5294 sb_in->stmts[i] = IRStmt_NoOp(); 5295 if (0) VG_(printf)("XX\n"); 5296 } 5297 } 5298 VG_(deleteXA)( pairs ); 5299 return sb_in; 5300 } 5301 5302 5303 /*------------------------------------------------------------*/ 5304 /*--- Origin tracking stuff ---*/ 5305 /*------------------------------------------------------------*/ 5306 5307 /* Almost identical to findShadowTmpV. */ 5308 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 5309 { 5310 TempMapEnt* ent; 5311 /* VG_(indexXA) range-checks 'orig', hence no need to check 5312 here. */ 5313 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5314 tl_assert(ent->kind == Orig); 5315 if (ent->shadowB == IRTemp_INVALID) { 5316 IRTemp tmpB 5317 = newTemp( mce, Ity_I32, BSh ); 5318 /* newTemp may cause mce->tmpMap to resize, hence previous results 5319 from VG_(indexXA) are invalid. */ 5320 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5321 tl_assert(ent->kind == Orig); 5322 tl_assert(ent->shadowB == IRTemp_INVALID); 5323 ent->shadowB = tmpB; 5324 } 5325 return ent->shadowB; 5326 } 5327 5328 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 5329 { 5330 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 5331 } 5332 5333 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5334 IRAtom* baseaddr, Int offset ) 5335 { 5336 void* hFun; 5337 HChar* hName; 5338 IRTemp bTmp; 5339 IRDirty* di; 5340 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5341 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5342 IRAtom* ea = baseaddr; 5343 if (offset != 0) { 5344 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5345 : mkU64( (Long)(Int)offset ); 5346 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5347 } 5348 bTmp = newTemp(mce, mce->hWordTy, BSh); 5349 5350 switch (szB) { 5351 case 1: hFun = (void*)&MC_(helperc_b_load1); 5352 hName = "MC_(helperc_b_load1)"; 5353 break; 5354 case 2: hFun = (void*)&MC_(helperc_b_load2); 5355 hName = "MC_(helperc_b_load2)"; 5356 break; 5357 case 4: hFun = (void*)&MC_(helperc_b_load4); 5358 hName = "MC_(helperc_b_load4)"; 5359 break; 5360 case 8: hFun = (void*)&MC_(helperc_b_load8); 5361 hName = "MC_(helperc_b_load8)"; 5362 break; 5363 case 16: hFun = (void*)&MC_(helperc_b_load16); 5364 hName = "MC_(helperc_b_load16)"; 5365 break; 5366 default: 5367 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 5368 tl_assert(0); 5369 } 5370 di = unsafeIRDirty_1_N( 5371 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 5372 mkIRExprVec_1( ea ) 5373 ); 5374 /* no need to mess with any annotations. This call accesses 5375 neither guest state nor guest memory. */ 5376 stmt( 'B', mce, IRStmt_Dirty(di) ); 5377 if (mce->hWordTy == Ity_I64) { 5378 /* 64-bit host */ 5379 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 5380 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 5381 return mkexpr(bTmp32); 5382 } else { 5383 /* 32-bit host */ 5384 return mkexpr(bTmp); 5385 } 5386 } 5387 5388 /* Generate a shadow store. guard :: Ity_I1 controls whether the 5389 store really happens; NULL means it unconditionally does. */ 5390 static void gen_store_b ( MCEnv* mce, Int szB, 5391 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5392 IRAtom* guard ) 5393 { 5394 void* hFun; 5395 HChar* hName; 5396 IRDirty* di; 5397 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5398 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5399 IRAtom* ea = baseaddr; 5400 if (guard) { 5401 tl_assert(isOriginalAtom(mce, guard)); 5402 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 5403 } 5404 if (offset != 0) { 5405 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5406 : mkU64( (Long)(Int)offset ); 5407 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5408 } 5409 if (mce->hWordTy == Ity_I64) 5410 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 5411 5412 switch (szB) { 5413 case 1: hFun = (void*)&MC_(helperc_b_store1); 5414 hName = "MC_(helperc_b_store1)"; 5415 break; 5416 case 2: hFun = (void*)&MC_(helperc_b_store2); 5417 hName = "MC_(helperc_b_store2)"; 5418 break; 5419 case 4: hFun = (void*)&MC_(helperc_b_store4); 5420 hName = "MC_(helperc_b_store4)"; 5421 break; 5422 case 8: hFun = (void*)&MC_(helperc_b_store8); 5423 hName = "MC_(helperc_b_store8)"; 5424 break; 5425 case 16: hFun = (void*)&MC_(helperc_b_store16); 5426 hName = "MC_(helperc_b_store16)"; 5427 break; 5428 default: 5429 tl_assert(0); 5430 } 5431 di = unsafeIRDirty_0_N( 2/*regparms*/, 5432 hName, VG_(fnptr_to_fnentry)( hFun ), 5433 mkIRExprVec_2( ea, dataB ) 5434 ); 5435 /* no need to mess with any annotations. This call accesses 5436 neither guest state nor guest memory. */ 5437 if (guard) di->guard = guard; 5438 stmt( 'B', mce, IRStmt_Dirty(di) ); 5439 } 5440 5441 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 5442 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5443 if (eTy == Ity_I64) 5444 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 5445 if (eTy == Ity_I32) 5446 return e; 5447 tl_assert(0); 5448 } 5449 5450 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 5451 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5452 tl_assert(eTy == Ity_I32); 5453 if (dstTy == Ity_I64) 5454 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 5455 tl_assert(0); 5456 } 5457 5458 5459 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 5460 { 5461 tl_assert(MC_(clo_mc_level) == 3); 5462 5463 switch (e->tag) { 5464 5465 case Iex_GetI: { 5466 IRRegArray* descr_b; 5467 IRAtom *t1, *t2, *t3, *t4; 5468 IRRegArray* descr = e->Iex.GetI.descr; 5469 IRType equivIntTy 5470 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 5471 /* If this array is unshadowable for whatever reason, use the 5472 usual approximation. */ 5473 if (equivIntTy == Ity_INVALID) 5474 return mkU32(0); 5475 tl_assert(sizeofIRType(equivIntTy) >= 4); 5476 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 5477 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 5478 equivIntTy, descr->nElems ); 5479 /* Do a shadow indexed get of the same size, giving t1. Take 5480 the bottom 32 bits of it, giving t2. Compute into t3 the 5481 origin for the index (almost certainly zero, but there's 5482 no harm in being completely general here, since iropt will 5483 remove any useless code), and fold it in, giving a final 5484 value t4. */ 5485 t1 = assignNew( 'B', mce, equivIntTy, 5486 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 5487 e->Iex.GetI.bias )); 5488 t2 = narrowTo32( mce, t1 ); 5489 t3 = schemeE( mce, e->Iex.GetI.ix ); 5490 t4 = gen_maxU32( mce, t2, t3 ); 5491 return t4; 5492 } 5493 case Iex_CCall: { 5494 Int i; 5495 IRAtom* here; 5496 IRExpr** args = e->Iex.CCall.args; 5497 IRAtom* curr = mkU32(0); 5498 for (i = 0; args[i]; i++) { 5499 tl_assert(i < 32); 5500 tl_assert(isOriginalAtom(mce, args[i])); 5501 /* Only take notice of this arg if the callee's 5502 mc-exclusion mask does not say it is to be excluded. */ 5503 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 5504 /* the arg is to be excluded from definedness checking. 5505 Do nothing. */ 5506 if (0) VG_(printf)("excluding %s(%d)\n", 5507 e->Iex.CCall.cee->name, i); 5508 } else { 5509 /* calculate the arg's definedness, and pessimistically 5510 merge it in. */ 5511 here = schemeE( mce, args[i] ); 5512 curr = gen_maxU32( mce, curr, here ); 5513 } 5514 } 5515 return curr; 5516 } 5517 case Iex_Load: { 5518 Int dszB; 5519 dszB = sizeofIRType(e->Iex.Load.ty); 5520 /* assert that the B value for the address is already 5521 available (somewhere) */ 5522 tl_assert(isIRAtom(e->Iex.Load.addr)); 5523 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 5524 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 5525 } 5526 case Iex_Mux0X: { 5527 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond ); 5528 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 ); 5529 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX ); 5530 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 5531 } 5532 case Iex_Qop: { 5533 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 ); 5534 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 ); 5535 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 ); 5536 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 ); 5537 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 5538 gen_maxU32( mce, b3, b4 ) ); 5539 } 5540 case Iex_Triop: { 5541 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 ); 5542 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 ); 5543 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 ); 5544 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 5545 } 5546 case Iex_Binop: { 5547 switch (e->Iex.Binop.op) { 5548 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 5549 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 5550 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 5551 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 5552 /* Just say these all produce a defined result, 5553 regardless of their arguments. See 5554 COMMENT_ON_CasCmpEQ in this file. */ 5555 return mkU32(0); 5556 default: { 5557 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 5558 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 5559 return gen_maxU32( mce, b1, b2 ); 5560 } 5561 } 5562 tl_assert(0); 5563 /*NOTREACHED*/ 5564 } 5565 case Iex_Unop: { 5566 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 5567 return b1; 5568 } 5569 case Iex_Const: 5570 return mkU32(0); 5571 case Iex_RdTmp: 5572 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 5573 case Iex_Get: { 5574 Int b_offset = MC_(get_otrack_shadow_offset)( 5575 e->Iex.Get.offset, 5576 sizeofIRType(e->Iex.Get.ty) 5577 ); 5578 tl_assert(b_offset >= -1 5579 && b_offset <= mce->layout->total_sizeB -4); 5580 if (b_offset >= 0) { 5581 /* FIXME: this isn't an atom! */ 5582 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 5583 Ity_I32 ); 5584 } 5585 return mkU32(0); 5586 } 5587 default: 5588 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 5589 ppIRExpr(e); 5590 VG_(tool_panic)("memcheck:schemeE"); 5591 } 5592 } 5593 5594 5595 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 5596 { 5597 // This is a hacked version of do_shadow_Dirty 5598 Int i, n, toDo, gSz, gOff; 5599 IRAtom *here, *curr; 5600 IRTemp dst; 5601 5602 /* First check the guard. */ 5603 curr = schemeE( mce, d->guard ); 5604 5605 /* Now round up all inputs and maxU32 over them. */ 5606 5607 /* Inputs: unmasked args */ 5608 for (i = 0; d->args[i]; i++) { 5609 if (d->cee->mcx_mask & (1<<i)) { 5610 /* ignore this arg */ 5611 } else { 5612 here = schemeE( mce, d->args[i] ); 5613 curr = gen_maxU32( mce, curr, here ); 5614 } 5615 } 5616 5617 /* Inputs: guest state that we read. */ 5618 for (i = 0; i < d->nFxState; i++) { 5619 tl_assert(d->fxState[i].fx != Ifx_None); 5620 if (d->fxState[i].fx == Ifx_Write) 5621 continue; 5622 5623 /* Ignore any sections marked as 'always defined'. */ 5624 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) { 5625 if (0) 5626 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 5627 d->fxState[i].offset, d->fxState[i].size ); 5628 continue; 5629 } 5630 5631 /* This state element is read or modified. So we need to 5632 consider it. If larger than 4 bytes, deal with it in 4-byte 5633 chunks. */ 5634 gSz = d->fxState[i].size; 5635 gOff = d->fxState[i].offset; 5636 tl_assert(gSz > 0); 5637 while (True) { 5638 Int b_offset; 5639 if (gSz == 0) break; 5640 n = gSz <= 4 ? gSz : 4; 5641 /* update 'curr' with maxU32 of the state slice 5642 gOff .. gOff+n-1 */ 5643 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 5644 if (b_offset != -1) { 5645 here = assignNew( 'B',mce, 5646 Ity_I32, 5647 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB, 5648 Ity_I32)); 5649 curr = gen_maxU32( mce, curr, here ); 5650 } 5651 gSz -= n; 5652 gOff += n; 5653 } 5654 5655 } 5656 5657 /* Inputs: memory */ 5658 5659 if (d->mFx != Ifx_None) { 5660 /* Because we may do multiple shadow loads/stores from the same 5661 base address, it's best to do a single test of its 5662 definedness right now. Post-instrumentation optimisation 5663 should remove all but this test. */ 5664 tl_assert(d->mAddr); 5665 here = schemeE( mce, d->mAddr ); 5666 curr = gen_maxU32( mce, curr, here ); 5667 } 5668 5669 /* Deal with memory inputs (reads or modifies) */ 5670 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 5671 toDo = d->mSize; 5672 /* chew off 32-bit chunks. We don't care about the endianness 5673 since it's all going to be condensed down to a single bit, 5674 but nevertheless choose an endianness which is hopefully 5675 native to the platform. */ 5676 while (toDo >= 4) { 5677 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo ); 5678 curr = gen_maxU32( mce, curr, here ); 5679 toDo -= 4; 5680 } 5681 /* handle possible 16-bit excess */ 5682 while (toDo >= 2) { 5683 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo ); 5684 curr = gen_maxU32( mce, curr, here ); 5685 toDo -= 2; 5686 } 5687 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 5688 } 5689 5690 /* Whew! So curr is a 32-bit B-value which should give an origin 5691 of some use if any of the inputs to the helper are undefined. 5692 Now we need to re-distribute the results to all destinations. */ 5693 5694 /* Outputs: the destination temporary, if there is one. */ 5695 if (d->tmp != IRTemp_INVALID) { 5696 dst = findShadowTmpB(mce, d->tmp); 5697 assign( 'V', mce, dst, curr ); 5698 } 5699 5700 /* Outputs: guest state that we write or modify. */ 5701 for (i = 0; i < d->nFxState; i++) { 5702 tl_assert(d->fxState[i].fx != Ifx_None); 5703 if (d->fxState[i].fx == Ifx_Read) 5704 continue; 5705 5706 /* Ignore any sections marked as 'always defined'. */ 5707 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) 5708 continue; 5709 5710 /* This state element is written or modified. So we need to 5711 consider it. If larger than 4 bytes, deal with it in 4-byte 5712 chunks. */ 5713 gSz = d->fxState[i].size; 5714 gOff = d->fxState[i].offset; 5715 tl_assert(gSz > 0); 5716 while (True) { 5717 Int b_offset; 5718 if (gSz == 0) break; 5719 n = gSz <= 4 ? gSz : 4; 5720 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 5721 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 5722 if (b_offset != -1) { 5723 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 5724 curr )); 5725 } 5726 gSz -= n; 5727 gOff += n; 5728 } 5729 } 5730 5731 /* Outputs: memory that we write or modify. Same comments about 5732 endianness as above apply. */ 5733 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 5734 toDo = d->mSize; 5735 /* chew off 32-bit chunks */ 5736 while (toDo >= 4) { 5737 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 5738 NULL/*guard*/ ); 5739 toDo -= 4; 5740 } 5741 /* handle possible 16-bit excess */ 5742 while (toDo >= 2) { 5743 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 5744 NULL/*guard*/ ); 5745 toDo -= 2; 5746 } 5747 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 5748 } 5749 } 5750 5751 5752 static void do_origins_Store ( MCEnv* mce, 5753 IREndness stEnd, 5754 IRExpr* stAddr, 5755 IRExpr* stData ) 5756 { 5757 Int dszB; 5758 IRAtom* dataB; 5759 /* assert that the B value for the address is already available 5760 (somewhere), since the call to schemeE will want to see it. 5761 XXXX how does this actually ensure that?? */ 5762 tl_assert(isIRAtom(stAddr)); 5763 tl_assert(isIRAtom(stData)); 5764 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 5765 dataB = schemeE( mce, stData ); 5766 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, 5767 NULL/*guard*/ ); 5768 } 5769 5770 5771 static void schemeS ( MCEnv* mce, IRStmt* st ) 5772 { 5773 tl_assert(MC_(clo_mc_level) == 3); 5774 5775 switch (st->tag) { 5776 5777 case Ist_AbiHint: 5778 /* The value-check instrumenter handles this - by arranging 5779 to pass the address of the next instruction to 5780 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 5781 happen for origin tracking w.r.t. AbiHints. So there is 5782 nothing to do here. */ 5783 break; 5784 5785 case Ist_PutI: { 5786 IRRegArray* descr_b; 5787 IRAtom *t1, *t2, *t3, *t4; 5788 IRRegArray* descr = st->Ist.PutI.descr; 5789 IRType equivIntTy 5790 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 5791 /* If this array is unshadowable for whatever reason, 5792 generate no code. */ 5793 if (equivIntTy == Ity_INVALID) 5794 break; 5795 tl_assert(sizeofIRType(equivIntTy) >= 4); 5796 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 5797 descr_b 5798 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 5799 equivIntTy, descr->nElems ); 5800 /* Compute a value to Put - the conjoinment of the origin for 5801 the data to be Put-ted (obviously) and of the index value 5802 (not so obviously). */ 5803 t1 = schemeE( mce, st->Ist.PutI.data ); 5804 t2 = schemeE( mce, st->Ist.PutI.ix ); 5805 t3 = gen_maxU32( mce, t1, t2 ); 5806 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 5807 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix, 5808 st->Ist.PutI.bias, t4 )); 5809 break; 5810 } 5811 5812 case Ist_Dirty: 5813 do_origins_Dirty( mce, st->Ist.Dirty.details ); 5814 break; 5815 5816 case Ist_Store: 5817 do_origins_Store( mce, st->Ist.Store.end, 5818 st->Ist.Store.addr, 5819 st->Ist.Store.data ); 5820 break; 5821 5822 case Ist_LLSC: { 5823 /* In short: treat a load-linked like a normal load followed 5824 by an assignment of the loaded (shadow) data the result 5825 temporary. Treat a store-conditional like a normal store, 5826 and mark the result temporary as defined. */ 5827 if (st->Ist.LLSC.storedata == NULL) { 5828 /* Load Linked */ 5829 IRType resTy 5830 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 5831 IRExpr* vanillaLoad 5832 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 5833 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 5834 || resTy == Ity_I16 || resTy == Ity_I8); 5835 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 5836 schemeE(mce, vanillaLoad)); 5837 } else { 5838 /* Store conditional */ 5839 do_origins_Store( mce, st->Ist.LLSC.end, 5840 st->Ist.LLSC.addr, 5841 st->Ist.LLSC.storedata ); 5842 /* For the rationale behind this, see comments at the 5843 place where the V-shadow for .result is constructed, in 5844 do_shadow_LLSC. In short, we regard .result as 5845 always-defined. */ 5846 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 5847 mkU32(0) ); 5848 } 5849 break; 5850 } 5851 5852 case Ist_Put: { 5853 Int b_offset 5854 = MC_(get_otrack_shadow_offset)( 5855 st->Ist.Put.offset, 5856 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 5857 ); 5858 if (b_offset >= 0) { 5859 /* FIXME: this isn't an atom! */ 5860 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 5861 schemeE( mce, st->Ist.Put.data )) ); 5862 } 5863 break; 5864 } 5865 5866 case Ist_WrTmp: 5867 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 5868 schemeE(mce, st->Ist.WrTmp.data) ); 5869 break; 5870 5871 case Ist_MBE: 5872 case Ist_NoOp: 5873 case Ist_Exit: 5874 case Ist_IMark: 5875 break; 5876 5877 default: 5878 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 5879 ppIRStmt(st); 5880 VG_(tool_panic)("memcheck:schemeS"); 5881 } 5882 } 5883 5884 5885 /*--------------------------------------------------------------------*/ 5886 /*--- end mc_translate.c ---*/ 5887 /*--------------------------------------------------------------------*/ 5888