1 2 /*--------------------------------------------------------------------*/ 3 /*--- Instrument IR to perform memory checking operations. ---*/ 4 /*--- mc_translate.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2012 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_tool_basics.h" 33 #include "pub_tool_poolalloc.h" // For mc_include.h 34 #include "pub_tool_hashtable.h" // For mc_include.h 35 #include "pub_tool_libcassert.h" 36 #include "pub_tool_libcprint.h" 37 #include "pub_tool_tooliface.h" 38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 39 #include "pub_tool_xarray.h" 40 #include "pub_tool_mallocfree.h" 41 #include "pub_tool_libcbase.h" 42 43 #include "mc_include.h" 44 45 46 /* FIXMEs JRS 2011-June-16. 47 48 Check the interpretation for vector narrowing and widening ops, 49 particularly the saturating ones. I suspect they are either overly 50 pessimistic and/or wrong. 51 */ 52 53 /* This file implements the Memcheck instrumentation, and in 54 particular contains the core of its undefined value detection 55 machinery. For a comprehensive background of the terminology, 56 algorithms and rationale used herein, read: 57 58 Using Valgrind to detect undefined value errors with 59 bit-precision 60 61 Julian Seward and Nicholas Nethercote 62 63 2005 USENIX Annual Technical Conference (General Track), 64 Anaheim, CA, USA, April 10-15, 2005. 65 66 ---- 67 68 Here is as good a place as any to record exactly when V bits are and 69 should be checked, why, and what function is responsible. 70 71 72 Memcheck complains when an undefined value is used: 73 74 1. In the condition of a conditional branch. Because it could cause 75 incorrect control flow, and thus cause incorrect externally-visible 76 behaviour. [mc_translate.c:complainIfUndefined] 77 78 2. As an argument to a system call, or as the value that specifies 79 the system call number. Because it could cause an incorrect 80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 81 82 3. As the address in a load or store. Because it could cause an 83 incorrect value to be used later, which could cause externally-visible 84 behaviour (eg. via incorrect control flow or an incorrect system call 85 argument) [complainIfUndefined] 86 87 4. As the target address of a branch. Because it could cause incorrect 88 control flow. [complainIfUndefined] 89 90 5. As an argument to setenv, unsetenv, or putenv. Because it could put 91 an incorrect value into the external environment. 92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 93 94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 95 [complainIfUndefined] 96 97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 99 requested it. [in memcheck.h] 100 101 102 Memcheck also complains, but should not, when an undefined value is used: 103 104 8. As the shift value in certain SIMD shift operations (but not in the 105 standard integer shift operations). This inconsistency is due to 106 historical reasons.) [complainIfUndefined] 107 108 109 Memcheck does not complain, but should, when an undefined value is used: 110 111 9. As an input to a client request. Because the client request may 112 affect the visible behaviour -- see bug #144362 for an example 113 involving the malloc replacements in vg_replace_malloc.c and 114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 115 isn't identified. That bug report also has some info on how to solve 116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 117 118 119 In practice, 1 and 2 account for the vast majority of cases. 120 */ 121 122 /*------------------------------------------------------------*/ 123 /*--- Forward decls ---*/ 124 /*------------------------------------------------------------*/ 125 126 struct _MCEnv; 127 128 static IRType shadowTypeV ( IRType ty ); 129 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 130 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 131 132 static IRExpr *i128_const_zero(void); 133 134 /*------------------------------------------------------------*/ 135 /*--- Memcheck running state, and tmp management. ---*/ 136 /*------------------------------------------------------------*/ 137 138 /* Carries info about a particular tmp. The tmp's number is not 139 recorded, as this is implied by (equal to) its index in the tmpMap 140 in MCEnv. The tmp's type is also not recorded, as this is present 141 in MCEnv.sb->tyenv. 142 143 When .kind is Orig, .shadowV and .shadowB may give the identities 144 of the temps currently holding the associated definedness (shadowV) 145 and origin (shadowB) values, or these may be IRTemp_INVALID if code 146 to compute such values has not yet been emitted. 147 148 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 149 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 150 illogical for a shadow tmp itself to be shadowed. 151 */ 152 typedef 153 enum { Orig=1, VSh=2, BSh=3 } 154 TempKind; 155 156 typedef 157 struct { 158 TempKind kind; 159 IRTemp shadowV; 160 IRTemp shadowB; 161 } 162 TempMapEnt; 163 164 165 /* Carries around state during memcheck instrumentation. */ 166 typedef 167 struct _MCEnv { 168 /* MODIFIED: the superblock being constructed. IRStmts are 169 added. */ 170 IRSB* sb; 171 Bool trace; 172 173 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 174 current kind and possibly shadow temps for each temp in the 175 IRSB being constructed. Note that it does not contain the 176 type of each tmp. If you want to know the type, look at the 177 relevant entry in sb->tyenv. It follows that at all times 178 during the instrumentation process, the valid indices for 179 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 180 total number of Orig, V- and B- temps allocated so far. 181 182 The reason for this strange split (types in one place, all 183 other info in another) is that we need the types to be 184 attached to sb so as to make it possible to do 185 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 186 instrumentation process. */ 187 XArray* /* of TempMapEnt */ tmpMap; 188 189 /* MODIFIED: indicates whether "bogus" literals have so far been 190 found. Starts off False, and may change to True. */ 191 Bool bogusLiterals; 192 193 /* READONLY: indicates whether we should use expensive 194 interpretations of integer adds, since unfortunately LLVM 195 uses them to do ORs in some circumstances. Defaulted to True 196 on MacOS and False everywhere else. */ 197 Bool useLLVMworkarounds; 198 199 /* READONLY: the guest layout. This indicates which parts of 200 the guest state should be regarded as 'always defined'. */ 201 VexGuestLayout* layout; 202 203 /* READONLY: the host word type. Needed for constructing 204 arguments of type 'HWord' to be passed to helper functions. 205 Ity_I32 or Ity_I64 only. */ 206 IRType hWordTy; 207 } 208 MCEnv; 209 210 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 211 demand), as they are encountered. This is for two reasons. 212 213 (1) (less important reason): Many original tmps are unused due to 214 initial IR optimisation, and we do not want to spaces in tables 215 tracking them. 216 217 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 218 table indexed [0 .. n_types-1], which gives the current shadow for 219 each original tmp, or INVALID_IRTEMP if none is so far assigned. 220 It is necessary to support making multiple assignments to a shadow 221 -- specifically, after testing a shadow for definedness, it needs 222 to be made defined. But IR's SSA property disallows this. 223 224 (2) (more important reason): Therefore, when a shadow needs to get 225 a new value, a new temporary is created, the value is assigned to 226 that, and the tmpMap is updated to reflect the new binding. 227 228 A corollary is that if the tmpMap maps a given tmp to 229 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 230 there's a read-before-write error in the original tmps. The IR 231 sanity checker should catch all such anomalies, however. 232 */ 233 234 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 235 both the table in mce->sb and to our auxiliary mapping. Note that 236 newTemp may cause mce->tmpMap to resize, hence previous results 237 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 238 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 239 { 240 Word newIx; 241 TempMapEnt ent; 242 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 243 ent.kind = kind; 244 ent.shadowV = IRTemp_INVALID; 245 ent.shadowB = IRTemp_INVALID; 246 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 247 tl_assert(newIx == (Word)tmp); 248 return tmp; 249 } 250 251 252 /* Find the tmp currently shadowing the given original tmp. If none 253 so far exists, allocate one. */ 254 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 255 { 256 TempMapEnt* ent; 257 /* VG_(indexXA) range-checks 'orig', hence no need to check 258 here. */ 259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 260 tl_assert(ent->kind == Orig); 261 if (ent->shadowV == IRTemp_INVALID) { 262 IRTemp tmpV 263 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 264 /* newTemp may cause mce->tmpMap to resize, hence previous results 265 from VG_(indexXA) are invalid. */ 266 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 267 tl_assert(ent->kind == Orig); 268 tl_assert(ent->shadowV == IRTemp_INVALID); 269 ent->shadowV = tmpV; 270 } 271 return ent->shadowV; 272 } 273 274 /* Allocate a new shadow for the given original tmp. This means any 275 previous shadow is abandoned. This is needed because it is 276 necessary to give a new value to a shadow once it has been tested 277 for undefinedness, but unfortunately IR's SSA property disallows 278 this. Instead we must abandon the old shadow, allocate a new one 279 and use that instead. 280 281 This is the same as findShadowTmpV, except we don't bother to see 282 if a shadow temp already existed -- we simply allocate a new one 283 regardless. */ 284 static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 285 { 286 TempMapEnt* ent; 287 /* VG_(indexXA) range-checks 'orig', hence no need to check 288 here. */ 289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 290 tl_assert(ent->kind == Orig); 291 if (1) { 292 IRTemp tmpV 293 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 294 /* newTemp may cause mce->tmpMap to resize, hence previous results 295 from VG_(indexXA) are invalid. */ 296 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 297 tl_assert(ent->kind == Orig); 298 ent->shadowV = tmpV; 299 } 300 } 301 302 303 /*------------------------------------------------------------*/ 304 /*--- IRAtoms -- a subset of IRExprs ---*/ 305 /*------------------------------------------------------------*/ 306 307 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 308 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 309 input, most of this code deals in atoms. Usefully, a value atom 310 always has a V-value which is also an atom: constants are shadowed 311 by constants, and temps are shadowed by the corresponding shadow 312 temporary. */ 313 314 typedef IRExpr IRAtom; 315 316 /* (used for sanity checks only): is this an atom which looks 317 like it's from original code? */ 318 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 319 { 320 if (a1->tag == Iex_Const) 321 return True; 322 if (a1->tag == Iex_RdTmp) { 323 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 324 return ent->kind == Orig; 325 } 326 return False; 327 } 328 329 /* (used for sanity checks only): is this an atom which looks 330 like it's from shadow code? */ 331 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 332 { 333 if (a1->tag == Iex_Const) 334 return True; 335 if (a1->tag == Iex_RdTmp) { 336 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 337 return ent->kind == VSh || ent->kind == BSh; 338 } 339 return False; 340 } 341 342 /* (used for sanity checks only): check that both args are atoms and 343 are identically-kinded. */ 344 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 345 { 346 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 347 return True; 348 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 349 return True; 350 return False; 351 } 352 353 354 /*------------------------------------------------------------*/ 355 /*--- Type management ---*/ 356 /*------------------------------------------------------------*/ 357 358 /* Shadow state is always accessed using integer types. This returns 359 an integer type with the same size (as per sizeofIRType) as the 360 given type. The only valid shadow types are Bit, I8, I16, I32, 361 I64, I128, V128, V256. */ 362 363 static IRType shadowTypeV ( IRType ty ) 364 { 365 switch (ty) { 366 case Ity_I1: 367 case Ity_I8: 368 case Ity_I16: 369 case Ity_I32: 370 case Ity_I64: 371 case Ity_I128: return ty; 372 case Ity_F32: return Ity_I32; 373 case Ity_D32: return Ity_I32; 374 case Ity_F64: return Ity_I64; 375 case Ity_D64: return Ity_I64; 376 case Ity_F128: return Ity_I128; 377 case Ity_D128: return Ity_I128; 378 case Ity_V128: return Ity_V128; 379 case Ity_V256: return Ity_V256; 380 default: ppIRType(ty); 381 VG_(tool_panic)("memcheck:shadowTypeV"); 382 } 383 } 384 385 /* Produce a 'defined' value of the given shadow type. Should only be 386 supplied shadow types (Bit/I8/I16/I32/UI64). */ 387 static IRExpr* definedOfType ( IRType ty ) { 388 switch (ty) { 389 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 390 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 391 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 392 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 393 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 394 case Ity_I128: return i128_const_zero(); 395 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 396 default: VG_(tool_panic)("memcheck:definedOfType"); 397 } 398 } 399 400 401 /*------------------------------------------------------------*/ 402 /*--- Constructing IR fragments ---*/ 403 /*------------------------------------------------------------*/ 404 405 /* add stmt to a bb */ 406 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 407 if (mce->trace) { 408 VG_(printf)(" %c: ", cat); 409 ppIRStmt(st); 410 VG_(printf)("\n"); 411 } 412 addStmtToIRSB(mce->sb, st); 413 } 414 415 /* assign value to tmp */ 416 static inline 417 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 418 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 419 } 420 421 /* build various kinds of expressions */ 422 #define triop(_op, _arg1, _arg2, _arg3) \ 423 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 424 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 425 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 426 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 427 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 428 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 429 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 430 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 431 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 432 433 /* Bind the given expression to a new temporary, and return the 434 temporary. This effectively converts an arbitrary expression into 435 an atom. 436 437 'ty' is the type of 'e' and hence the type that the new temporary 438 needs to be. But passing it in is redundant, since we can deduce 439 the type merely by inspecting 'e'. So at least use that fact to 440 assert that the two types agree. */ 441 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 442 { 443 TempKind k; 444 IRTemp t; 445 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 446 447 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 448 switch (cat) { 449 case 'V': k = VSh; break; 450 case 'B': k = BSh; break; 451 case 'C': k = Orig; break; 452 /* happens when we are making up new "orig" 453 expressions, for IRCAS handling */ 454 default: tl_assert(0); 455 } 456 t = newTemp(mce, ty, k); 457 assign(cat, mce, t, e); 458 return mkexpr(t); 459 } 460 461 462 /*------------------------------------------------------------*/ 463 /*--- Helper functions for 128-bit ops ---*/ 464 /*------------------------------------------------------------*/ 465 466 static IRExpr *i128_const_zero(void) 467 { 468 IRAtom* z64 = IRExpr_Const(IRConst_U64(0)); 469 return binop(Iop_64HLto128, z64, z64); 470 } 471 472 /* There are no I128-bit loads and/or stores [as generated by any 473 current front ends]. So we do not need to worry about that in 474 expr2vbits_Load */ 475 476 477 /*------------------------------------------------------------*/ 478 /*--- Constructing definedness primitive ops ---*/ 479 /*------------------------------------------------------------*/ 480 481 /* --------- Defined-if-either-defined --------- */ 482 483 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 484 tl_assert(isShadowAtom(mce,a1)); 485 tl_assert(isShadowAtom(mce,a2)); 486 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 487 } 488 489 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 490 tl_assert(isShadowAtom(mce,a1)); 491 tl_assert(isShadowAtom(mce,a2)); 492 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 493 } 494 495 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 496 tl_assert(isShadowAtom(mce,a1)); 497 tl_assert(isShadowAtom(mce,a2)); 498 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 499 } 500 501 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 502 tl_assert(isShadowAtom(mce,a1)); 503 tl_assert(isShadowAtom(mce,a2)); 504 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 505 } 506 507 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 508 tl_assert(isShadowAtom(mce,a1)); 509 tl_assert(isShadowAtom(mce,a2)); 510 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 511 } 512 513 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 514 tl_assert(isShadowAtom(mce,a1)); 515 tl_assert(isShadowAtom(mce,a2)); 516 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2)); 517 } 518 519 /* --------- Undefined-if-either-undefined --------- */ 520 521 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 522 tl_assert(isShadowAtom(mce,a1)); 523 tl_assert(isShadowAtom(mce,a2)); 524 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 525 } 526 527 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 528 tl_assert(isShadowAtom(mce,a1)); 529 tl_assert(isShadowAtom(mce,a2)); 530 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 531 } 532 533 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 534 tl_assert(isShadowAtom(mce,a1)); 535 tl_assert(isShadowAtom(mce,a2)); 536 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 537 } 538 539 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 540 tl_assert(isShadowAtom(mce,a1)); 541 tl_assert(isShadowAtom(mce,a2)); 542 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 543 } 544 545 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 546 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6; 547 tl_assert(isShadowAtom(mce,a1)); 548 tl_assert(isShadowAtom(mce,a2)); 549 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1)); 550 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1)); 551 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2)); 552 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2)); 553 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3)); 554 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4)); 555 556 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5)); 557 } 558 559 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 560 tl_assert(isShadowAtom(mce,a1)); 561 tl_assert(isShadowAtom(mce,a2)); 562 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 563 } 564 565 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 566 tl_assert(isShadowAtom(mce,a1)); 567 tl_assert(isShadowAtom(mce,a2)); 568 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2)); 569 } 570 571 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 572 switch (vty) { 573 case Ity_I8: return mkUifU8(mce, a1, a2); 574 case Ity_I16: return mkUifU16(mce, a1, a2); 575 case Ity_I32: return mkUifU32(mce, a1, a2); 576 case Ity_I64: return mkUifU64(mce, a1, a2); 577 case Ity_I128: return mkUifU128(mce, a1, a2); 578 case Ity_V128: return mkUifUV128(mce, a1, a2); 579 default: 580 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 581 VG_(tool_panic)("memcheck:mkUifU"); 582 } 583 } 584 585 /* --------- The Left-family of operations. --------- */ 586 587 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 588 tl_assert(isShadowAtom(mce,a1)); 589 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 590 } 591 592 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 593 tl_assert(isShadowAtom(mce,a1)); 594 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 595 } 596 597 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 598 tl_assert(isShadowAtom(mce,a1)); 599 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 600 } 601 602 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 603 tl_assert(isShadowAtom(mce,a1)); 604 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 605 } 606 607 /* --------- 'Improvement' functions for AND/OR. --------- */ 608 609 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 610 defined (0); all other -> undefined (1). 611 */ 612 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 613 { 614 tl_assert(isOriginalAtom(mce, data)); 615 tl_assert(isShadowAtom(mce, vbits)); 616 tl_assert(sameKindedAtoms(data, vbits)); 617 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 618 } 619 620 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 621 { 622 tl_assert(isOriginalAtom(mce, data)); 623 tl_assert(isShadowAtom(mce, vbits)); 624 tl_assert(sameKindedAtoms(data, vbits)); 625 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 626 } 627 628 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 629 { 630 tl_assert(isOriginalAtom(mce, data)); 631 tl_assert(isShadowAtom(mce, vbits)); 632 tl_assert(sameKindedAtoms(data, vbits)); 633 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 634 } 635 636 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 637 { 638 tl_assert(isOriginalAtom(mce, data)); 639 tl_assert(isShadowAtom(mce, vbits)); 640 tl_assert(sameKindedAtoms(data, vbits)); 641 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 642 } 643 644 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 645 { 646 tl_assert(isOriginalAtom(mce, data)); 647 tl_assert(isShadowAtom(mce, vbits)); 648 tl_assert(sameKindedAtoms(data, vbits)); 649 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 650 } 651 652 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 653 { 654 tl_assert(isOriginalAtom(mce, data)); 655 tl_assert(isShadowAtom(mce, vbits)); 656 tl_assert(sameKindedAtoms(data, vbits)); 657 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits)); 658 } 659 660 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 661 defined (0); all other -> undefined (1). 662 */ 663 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 664 { 665 tl_assert(isOriginalAtom(mce, data)); 666 tl_assert(isShadowAtom(mce, vbits)); 667 tl_assert(sameKindedAtoms(data, vbits)); 668 return assignNew( 669 'V', mce, Ity_I8, 670 binop(Iop_Or8, 671 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 672 vbits) ); 673 } 674 675 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 676 { 677 tl_assert(isOriginalAtom(mce, data)); 678 tl_assert(isShadowAtom(mce, vbits)); 679 tl_assert(sameKindedAtoms(data, vbits)); 680 return assignNew( 681 'V', mce, Ity_I16, 682 binop(Iop_Or16, 683 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 684 vbits) ); 685 } 686 687 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 688 { 689 tl_assert(isOriginalAtom(mce, data)); 690 tl_assert(isShadowAtom(mce, vbits)); 691 tl_assert(sameKindedAtoms(data, vbits)); 692 return assignNew( 693 'V', mce, Ity_I32, 694 binop(Iop_Or32, 695 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 696 vbits) ); 697 } 698 699 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 700 { 701 tl_assert(isOriginalAtom(mce, data)); 702 tl_assert(isShadowAtom(mce, vbits)); 703 tl_assert(sameKindedAtoms(data, vbits)); 704 return assignNew( 705 'V', mce, Ity_I64, 706 binop(Iop_Or64, 707 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 708 vbits) ); 709 } 710 711 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 712 { 713 tl_assert(isOriginalAtom(mce, data)); 714 tl_assert(isShadowAtom(mce, vbits)); 715 tl_assert(sameKindedAtoms(data, vbits)); 716 return assignNew( 717 'V', mce, Ity_V128, 718 binop(Iop_OrV128, 719 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 720 vbits) ); 721 } 722 723 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 724 { 725 tl_assert(isOriginalAtom(mce, data)); 726 tl_assert(isShadowAtom(mce, vbits)); 727 tl_assert(sameKindedAtoms(data, vbits)); 728 return assignNew( 729 'V', mce, Ity_V256, 730 binop(Iop_OrV256, 731 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)), 732 vbits) ); 733 } 734 735 /* --------- Pessimising casts. --------- */ 736 737 /* The function returns an expression of type DST_TY. If any of the VBITS 738 is undefined (value == 1) the resulting expression has all bits set to 739 1. Otherwise, all bits are 0. */ 740 741 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 742 { 743 IRType src_ty; 744 IRAtom* tmp1; 745 746 /* Note, dst_ty is a shadow type, not an original type. */ 747 tl_assert(isShadowAtom(mce,vbits)); 748 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 749 750 /* Fast-track some common cases */ 751 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 752 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 753 754 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 755 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 756 757 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 758 /* PCast the arg, then clone it. */ 759 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 760 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 761 } 762 763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) { 764 /* PCast the arg. This gives all 0s or all 1s. Then throw away 765 the top half. */ 766 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 767 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp)); 768 } 769 770 /* Else do it the slow way .. */ 771 /* First of all, collapse vbits down to a single bit. */ 772 tmp1 = NULL; 773 switch (src_ty) { 774 case Ity_I1: 775 tmp1 = vbits; 776 break; 777 case Ity_I8: 778 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 779 break; 780 case Ity_I16: 781 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 782 break; 783 case Ity_I32: 784 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 785 break; 786 case Ity_I64: 787 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 788 break; 789 case Ity_I128: { 790 /* Gah. Chop it in half, OR the halves together, and compare 791 that with zero. */ 792 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 793 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 794 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 795 tmp1 = assignNew('V', mce, Ity_I1, 796 unop(Iop_CmpNEZ64, tmp4)); 797 break; 798 } 799 default: 800 ppIRType(src_ty); 801 VG_(tool_panic)("mkPCastTo(1)"); 802 } 803 tl_assert(tmp1); 804 /* Now widen up to the dst type. */ 805 switch (dst_ty) { 806 case Ity_I1: 807 return tmp1; 808 case Ity_I8: 809 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 810 case Ity_I16: 811 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 812 case Ity_I32: 813 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 814 case Ity_I64: 815 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 816 case Ity_V128: 817 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 818 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 819 return tmp1; 820 case Ity_I128: 821 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 822 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 823 return tmp1; 824 default: 825 ppIRType(dst_ty); 826 VG_(tool_panic)("mkPCastTo(2)"); 827 } 828 } 829 830 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 831 /* 832 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 833 PCasting to Ity_U1. However, sometimes it is necessary to be more 834 accurate. The insight is that the result is defined if two 835 corresponding bits can be found, one from each argument, so that 836 both bits are defined but are different -- that makes EQ say "No" 837 and NE say "Yes". Hence, we compute an improvement term and DifD 838 it onto the "normal" (UifU) result. 839 840 The result is: 841 842 PCastTo<1> ( 843 -- naive version 844 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 845 846 `DifD<sz>` 847 848 -- improvement term 849 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 850 ) 851 852 where 853 vec contains 0 (defined) bits where the corresponding arg bits 854 are defined but different, and 1 bits otherwise. 855 856 vec = Or<sz>( vxx, // 0 iff bit defined 857 vyy, // 0 iff bit defined 858 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 859 ) 860 861 If any bit of vec is 0, the result is defined and so the 862 improvement term should produce 0...0, else it should produce 863 1...1. 864 865 Hence require for the improvement term: 866 867 if vec == 1...1 then 1...1 else 0...0 868 -> 869 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 870 871 This was extensively re-analysed and checked on 6 July 05. 872 */ 873 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 874 IRType ty, 875 IRAtom* vxx, IRAtom* vyy, 876 IRAtom* xx, IRAtom* yy ) 877 { 878 IRAtom *naive, *vec, *improvement_term; 879 IRAtom *improved, *final_cast, *top; 880 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 881 882 tl_assert(isShadowAtom(mce,vxx)); 883 tl_assert(isShadowAtom(mce,vyy)); 884 tl_assert(isOriginalAtom(mce,xx)); 885 tl_assert(isOriginalAtom(mce,yy)); 886 tl_assert(sameKindedAtoms(vxx,xx)); 887 tl_assert(sameKindedAtoms(vyy,yy)); 888 889 switch (ty) { 890 case Ity_I32: 891 opOR = Iop_Or32; 892 opDIFD = Iop_And32; 893 opUIFU = Iop_Or32; 894 opNOT = Iop_Not32; 895 opXOR = Iop_Xor32; 896 opCMP = Iop_CmpEQ32; 897 top = mkU32(0xFFFFFFFF); 898 break; 899 case Ity_I64: 900 opOR = Iop_Or64; 901 opDIFD = Iop_And64; 902 opUIFU = Iop_Or64; 903 opNOT = Iop_Not64; 904 opXOR = Iop_Xor64; 905 opCMP = Iop_CmpEQ64; 906 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 907 break; 908 default: 909 VG_(tool_panic)("expensiveCmpEQorNE"); 910 } 911 912 naive 913 = mkPCastTo(mce,ty, 914 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 915 916 vec 917 = assignNew( 918 'V', mce,ty, 919 binop( opOR, 920 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 921 assignNew( 922 'V', mce,ty, 923 unop( opNOT, 924 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 925 926 improvement_term 927 = mkPCastTo( mce,ty, 928 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 929 930 improved 931 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 932 933 final_cast 934 = mkPCastTo( mce, Ity_I1, improved ); 935 936 return final_cast; 937 } 938 939 940 /* --------- Semi-accurate interpretation of CmpORD. --------- */ 941 942 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 943 944 CmpORD32S(x,y) = 1<<3 if x <s y 945 = 1<<2 if x >s y 946 = 1<<1 if x == y 947 948 and similarly the unsigned variant. The default interpretation is: 949 950 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 951 & (7<<1) 952 953 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 954 are zero and therefore defined (viz, zero). 955 956 Also deal with a special case better: 957 958 CmpORD32S(x,0) 959 960 Here, bit 3 (LT) of the result is a copy of the top bit of x and 961 will be defined even if the rest of x isn't. In which case we do: 962 963 CmpORD32S#(x,x#,0,{impliedly 0}#) 964 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 965 | (x# >>u 31) << 3 -- LT# = x#[31] 966 967 Analogous handling for CmpORD64{S,U}. 968 */ 969 static Bool isZeroU32 ( IRAtom* e ) 970 { 971 return 972 toBool( e->tag == Iex_Const 973 && e->Iex.Const.con->tag == Ico_U32 974 && e->Iex.Const.con->Ico.U32 == 0 ); 975 } 976 977 static Bool isZeroU64 ( IRAtom* e ) 978 { 979 return 980 toBool( e->tag == Iex_Const 981 && e->Iex.Const.con->tag == Ico_U64 982 && e->Iex.Const.con->Ico.U64 == 0 ); 983 } 984 985 static IRAtom* doCmpORD ( MCEnv* mce, 986 IROp cmp_op, 987 IRAtom* xxhash, IRAtom* yyhash, 988 IRAtom* xx, IRAtom* yy ) 989 { 990 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 991 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 992 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 993 IROp opAND = m64 ? Iop_And64 : Iop_And32; 994 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 995 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 996 IRType ty = m64 ? Ity_I64 : Ity_I32; 997 Int width = m64 ? 64 : 32; 998 999 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 1000 1001 IRAtom* threeLeft1 = NULL; 1002 IRAtom* sevenLeft1 = NULL; 1003 1004 tl_assert(isShadowAtom(mce,xxhash)); 1005 tl_assert(isShadowAtom(mce,yyhash)); 1006 tl_assert(isOriginalAtom(mce,xx)); 1007 tl_assert(isOriginalAtom(mce,yy)); 1008 tl_assert(sameKindedAtoms(xxhash,xx)); 1009 tl_assert(sameKindedAtoms(yyhash,yy)); 1010 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 1011 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 1012 1013 if (0) { 1014 ppIROp(cmp_op); VG_(printf)(" "); 1015 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 1016 } 1017 1018 if (syned && isZero(yy)) { 1019 /* fancy interpretation */ 1020 /* if yy is zero, then it must be fully defined (zero#). */ 1021 tl_assert(isZero(yyhash)); 1022 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 1023 return 1024 binop( 1025 opOR, 1026 assignNew( 1027 'V', mce,ty, 1028 binop( 1029 opAND, 1030 mkPCastTo(mce,ty, xxhash), 1031 threeLeft1 1032 )), 1033 assignNew( 1034 'V', mce,ty, 1035 binop( 1036 opSHL, 1037 assignNew( 1038 'V', mce,ty, 1039 binop(opSHR, xxhash, mkU8(width-1))), 1040 mkU8(3) 1041 )) 1042 ); 1043 } else { 1044 /* standard interpretation */ 1045 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 1046 return 1047 binop( 1048 opAND, 1049 mkPCastTo( mce,ty, 1050 mkUifU(mce,ty, xxhash,yyhash)), 1051 sevenLeft1 1052 ); 1053 } 1054 } 1055 1056 1057 /*------------------------------------------------------------*/ 1058 /*--- Emit a test and complaint if something is undefined. ---*/ 1059 /*------------------------------------------------------------*/ 1060 1061 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 1062 1063 1064 /* Set the annotations on a dirty helper to indicate that the stack 1065 pointer and instruction pointers might be read. This is the 1066 behaviour of all 'emit-a-complaint' style functions we might 1067 call. */ 1068 1069 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1070 di->nFxState = 2; 1071 di->fxState[0].fx = Ifx_Read; 1072 di->fxState[0].offset = mce->layout->offset_SP; 1073 di->fxState[0].size = mce->layout->sizeof_SP; 1074 di->fxState[0].nRepeats = 0; 1075 di->fxState[0].repeatLen = 0; 1076 di->fxState[1].fx = Ifx_Read; 1077 di->fxState[1].offset = mce->layout->offset_IP; 1078 di->fxState[1].size = mce->layout->sizeof_IP; 1079 di->fxState[1].nRepeats = 0; 1080 di->fxState[1].repeatLen = 0; 1081 } 1082 1083 1084 /* Check the supplied **original** atom for undefinedness, and emit a 1085 complaint if so. Once that happens, mark it as defined. This is 1086 possible because the atom is either a tmp or literal. If it's a 1087 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1088 be defined. In fact as mentioned above, we will have to allocate a 1089 new tmp to carry the new 'defined' shadow value, and update the 1090 original->tmp mapping accordingly; we cannot simply assign a new 1091 value to an existing shadow tmp as this breaks SSAness -- resulting 1092 in the post-instrumentation sanity checker spluttering in disapproval. 1093 */ 1094 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) 1095 { 1096 IRAtom* vatom; 1097 IRType ty; 1098 Int sz; 1099 IRDirty* di; 1100 IRAtom* cond; 1101 IRAtom* origin; 1102 void* fn; 1103 HChar* nm; 1104 IRExpr** args; 1105 Int nargs; 1106 1107 // Don't do V bit tests if we're not reporting undefined value errors. 1108 if (MC_(clo_mc_level) == 1) 1109 return; 1110 1111 /* Since the original expression is atomic, there's no duplicated 1112 work generated by making multiple V-expressions for it. So we 1113 don't really care about the possibility that someone else may 1114 also create a V-interpretion for it. */ 1115 tl_assert(isOriginalAtom(mce, atom)); 1116 vatom = expr2vbits( mce, atom ); 1117 tl_assert(isShadowAtom(mce, vatom)); 1118 tl_assert(sameKindedAtoms(atom, vatom)); 1119 1120 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1121 1122 /* sz is only used for constructing the error message */ 1123 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1124 1125 cond = mkPCastTo( mce, Ity_I1, vatom ); 1126 /* cond will be 0 if all defined, and 1 if any not defined. */ 1127 1128 /* Get the origin info for the value we are about to check. At 1129 least, if we are doing origin tracking. If not, use a dummy 1130 zero origin. */ 1131 if (MC_(clo_mc_level) == 3) { 1132 origin = schemeE( mce, atom ); 1133 if (mce->hWordTy == Ity_I64) { 1134 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1135 } 1136 } else { 1137 origin = NULL; 1138 } 1139 1140 fn = NULL; 1141 nm = NULL; 1142 args = NULL; 1143 nargs = -1; 1144 1145 switch (sz) { 1146 case 0: 1147 if (origin) { 1148 fn = &MC_(helperc_value_check0_fail_w_o); 1149 nm = "MC_(helperc_value_check0_fail_w_o)"; 1150 args = mkIRExprVec_1(origin); 1151 nargs = 1; 1152 } else { 1153 fn = &MC_(helperc_value_check0_fail_no_o); 1154 nm = "MC_(helperc_value_check0_fail_no_o)"; 1155 args = mkIRExprVec_0(); 1156 nargs = 0; 1157 } 1158 break; 1159 case 1: 1160 if (origin) { 1161 fn = &MC_(helperc_value_check1_fail_w_o); 1162 nm = "MC_(helperc_value_check1_fail_w_o)"; 1163 args = mkIRExprVec_1(origin); 1164 nargs = 1; 1165 } else { 1166 fn = &MC_(helperc_value_check1_fail_no_o); 1167 nm = "MC_(helperc_value_check1_fail_no_o)"; 1168 args = mkIRExprVec_0(); 1169 nargs = 0; 1170 } 1171 break; 1172 case 4: 1173 if (origin) { 1174 fn = &MC_(helperc_value_check4_fail_w_o); 1175 nm = "MC_(helperc_value_check4_fail_w_o)"; 1176 args = mkIRExprVec_1(origin); 1177 nargs = 1; 1178 } else { 1179 fn = &MC_(helperc_value_check4_fail_no_o); 1180 nm = "MC_(helperc_value_check4_fail_no_o)"; 1181 args = mkIRExprVec_0(); 1182 nargs = 0; 1183 } 1184 break; 1185 case 8: 1186 if (origin) { 1187 fn = &MC_(helperc_value_check8_fail_w_o); 1188 nm = "MC_(helperc_value_check8_fail_w_o)"; 1189 args = mkIRExprVec_1(origin); 1190 nargs = 1; 1191 } else { 1192 fn = &MC_(helperc_value_check8_fail_no_o); 1193 nm = "MC_(helperc_value_check8_fail_no_o)"; 1194 args = mkIRExprVec_0(); 1195 nargs = 0; 1196 } 1197 break; 1198 case 2: 1199 case 16: 1200 if (origin) { 1201 fn = &MC_(helperc_value_checkN_fail_w_o); 1202 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1203 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1204 nargs = 2; 1205 } else { 1206 fn = &MC_(helperc_value_checkN_fail_no_o); 1207 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1208 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1209 nargs = 1; 1210 } 1211 break; 1212 default: 1213 VG_(tool_panic)("unexpected szB"); 1214 } 1215 1216 tl_assert(fn); 1217 tl_assert(nm); 1218 tl_assert(args); 1219 tl_assert(nargs >= 0 && nargs <= 2); 1220 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1221 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1222 1223 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1224 VG_(fnptr_to_fnentry)( fn ), args ); 1225 di->guard = cond; 1226 1227 /* If the complaint is to be issued under a guard condition, AND that 1228 guard condition. */ 1229 if (guard) { 1230 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard)); 1231 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard)); 1232 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2)); 1233 1234 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e)); 1235 } 1236 1237 setHelperAnns( mce, di ); 1238 stmt( 'V', mce, IRStmt_Dirty(di)); 1239 1240 /* Set the shadow tmp to be defined. First, update the 1241 orig->shadow tmp mapping to reflect the fact that this shadow is 1242 getting a new value. */ 1243 tl_assert(isIRAtom(vatom)); 1244 /* sameKindedAtoms ... */ 1245 if (vatom->tag == Iex_RdTmp) { 1246 tl_assert(atom->tag == Iex_RdTmp); 1247 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1248 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1249 definedOfType(ty)); 1250 } 1251 } 1252 1253 1254 /*------------------------------------------------------------*/ 1255 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1256 /*------------------------------------------------------------*/ 1257 1258 /* Examine the always-defined sections declared in layout to see if 1259 the (offset,size) section is within one. Note, is is an error to 1260 partially fall into such a region: (offset,size) should either be 1261 completely in such a region or completely not-in such a region. 1262 */ 1263 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1264 { 1265 Int minoffD, maxoffD, i; 1266 Int minoff = offset; 1267 Int maxoff = minoff + size - 1; 1268 tl_assert((minoff & ~0xFFFF) == 0); 1269 tl_assert((maxoff & ~0xFFFF) == 0); 1270 1271 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1272 minoffD = mce->layout->alwaysDefd[i].offset; 1273 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1274 tl_assert((minoffD & ~0xFFFF) == 0); 1275 tl_assert((maxoffD & ~0xFFFF) == 0); 1276 1277 if (maxoff < minoffD || maxoffD < minoff) 1278 continue; /* no overlap */ 1279 if (minoff >= minoffD && maxoff <= maxoffD) 1280 return True; /* completely contained in an always-defd section */ 1281 1282 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1283 } 1284 return False; /* could not find any containing section */ 1285 } 1286 1287 1288 /* Generate into bb suitable actions to shadow this Put. If the state 1289 slice is marked 'always defined', do nothing. Otherwise, write the 1290 supplied V bits to the shadow state. We can pass in either an 1291 original atom or a V-atom, but not both. In the former case the 1292 relevant V-bits are then generated from the original. 1293 We assume here, that the definedness of GUARD has already been checked. 1294 */ 1295 static 1296 void do_shadow_PUT ( MCEnv* mce, Int offset, 1297 IRAtom* atom, IRAtom* vatom, IRExpr *guard ) 1298 { 1299 IRType ty; 1300 1301 // Don't do shadow PUTs if we're not doing undefined value checking. 1302 // Their absence lets Vex's optimiser remove all the shadow computation 1303 // that they depend on, which includes GETs of the shadow registers. 1304 if (MC_(clo_mc_level) == 1) 1305 return; 1306 1307 if (atom) { 1308 tl_assert(!vatom); 1309 tl_assert(isOriginalAtom(mce, atom)); 1310 vatom = expr2vbits( mce, atom ); 1311 } else { 1312 tl_assert(vatom); 1313 tl_assert(isShadowAtom(mce, vatom)); 1314 } 1315 1316 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1317 tl_assert(ty != Ity_I1); 1318 tl_assert(ty != Ity_I128); 1319 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1320 /* later: no ... */ 1321 /* emit code to emit a complaint if any of the vbits are 1. */ 1322 /* complainIfUndefined(mce, atom); */ 1323 } else { 1324 /* Do a plain shadow Put. */ 1325 if (guard) { 1326 /* If the guard expression evaluates to false we simply Put the value 1327 that is already stored in the guest state slot */ 1328 IRAtom *cond, *iffalse; 1329 1330 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard)); 1331 iffalse = assignNew('V', mce, ty, 1332 IRExpr_Get(offset + mce->layout->total_sizeB, ty)); 1333 vatom = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom)); 1334 } 1335 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom )); 1336 } 1337 } 1338 1339 1340 /* Return an expression which contains the V bits corresponding to the 1341 given GETI (passed in in pieces). 1342 */ 1343 static 1344 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti) 1345 { 1346 IRAtom* vatom; 1347 IRType ty, tyS; 1348 Int arrSize;; 1349 IRRegArray* descr = puti->descr; 1350 IRAtom* ix = puti->ix; 1351 Int bias = puti->bias; 1352 IRAtom* atom = puti->data; 1353 1354 // Don't do shadow PUTIs if we're not doing undefined value checking. 1355 // Their absence lets Vex's optimiser remove all the shadow computation 1356 // that they depend on, which includes GETIs of the shadow registers. 1357 if (MC_(clo_mc_level) == 1) 1358 return; 1359 1360 tl_assert(isOriginalAtom(mce,atom)); 1361 vatom = expr2vbits( mce, atom ); 1362 tl_assert(sameKindedAtoms(atom, vatom)); 1363 ty = descr->elemTy; 1364 tyS = shadowTypeV(ty); 1365 arrSize = descr->nElems * sizeofIRType(ty); 1366 tl_assert(ty != Ity_I1); 1367 tl_assert(isOriginalAtom(mce,ix)); 1368 complainIfUndefined(mce, ix, NULL); 1369 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1370 /* later: no ... */ 1371 /* emit code to emit a complaint if any of the vbits are 1. */ 1372 /* complainIfUndefined(mce, atom); */ 1373 } else { 1374 /* Do a cloned version of the Put that refers to the shadow 1375 area. */ 1376 IRRegArray* new_descr 1377 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1378 tyS, descr->nElems); 1379 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) )); 1380 } 1381 } 1382 1383 1384 /* Return an expression which contains the V bits corresponding to the 1385 given GET (passed in in pieces). 1386 */ 1387 static 1388 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1389 { 1390 IRType tyS = shadowTypeV(ty); 1391 tl_assert(ty != Ity_I1); 1392 tl_assert(ty != Ity_I128); 1393 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1394 /* Always defined, return all zeroes of the relevant type */ 1395 return definedOfType(tyS); 1396 } else { 1397 /* return a cloned version of the Get that refers to the shadow 1398 area. */ 1399 /* FIXME: this isn't an atom! */ 1400 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1401 } 1402 } 1403 1404 1405 /* Return an expression which contains the V bits corresponding to the 1406 given GETI (passed in in pieces). 1407 */ 1408 static 1409 IRExpr* shadow_GETI ( MCEnv* mce, 1410 IRRegArray* descr, IRAtom* ix, Int bias ) 1411 { 1412 IRType ty = descr->elemTy; 1413 IRType tyS = shadowTypeV(ty); 1414 Int arrSize = descr->nElems * sizeofIRType(ty); 1415 tl_assert(ty != Ity_I1); 1416 tl_assert(isOriginalAtom(mce,ix)); 1417 complainIfUndefined(mce, ix, NULL); 1418 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1419 /* Always defined, return all zeroes of the relevant type */ 1420 return definedOfType(tyS); 1421 } else { 1422 /* return a cloned version of the Get that refers to the shadow 1423 area. */ 1424 IRRegArray* new_descr 1425 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1426 tyS, descr->nElems); 1427 return IRExpr_GetI( new_descr, ix, bias ); 1428 } 1429 } 1430 1431 1432 /*------------------------------------------------------------*/ 1433 /*--- Generating approximations for unknown operations, ---*/ 1434 /*--- using lazy-propagate semantics ---*/ 1435 /*------------------------------------------------------------*/ 1436 1437 /* Lazy propagation of undefinedness from two values, resulting in the 1438 specified shadow type. 1439 */ 1440 static 1441 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1442 { 1443 IRAtom* at; 1444 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1445 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1446 tl_assert(isShadowAtom(mce,va1)); 1447 tl_assert(isShadowAtom(mce,va2)); 1448 1449 /* The general case is inefficient because PCast is an expensive 1450 operation. Here are some special cases which use PCast only 1451 once rather than twice. */ 1452 1453 /* I64 x I64 -> I64 */ 1454 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1455 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1456 at = mkUifU(mce, Ity_I64, va1, va2); 1457 at = mkPCastTo(mce, Ity_I64, at); 1458 return at; 1459 } 1460 1461 /* I64 x I64 -> I32 */ 1462 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1463 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1464 at = mkUifU(mce, Ity_I64, va1, va2); 1465 at = mkPCastTo(mce, Ity_I32, at); 1466 return at; 1467 } 1468 1469 if (0) { 1470 VG_(printf)("mkLazy2 "); 1471 ppIRType(t1); 1472 VG_(printf)("_"); 1473 ppIRType(t2); 1474 VG_(printf)("_"); 1475 ppIRType(finalVty); 1476 VG_(printf)("\n"); 1477 } 1478 1479 /* General case: force everything via 32-bit intermediaries. */ 1480 at = mkPCastTo(mce, Ity_I32, va1); 1481 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1482 at = mkPCastTo(mce, finalVty, at); 1483 return at; 1484 } 1485 1486 1487 /* 3-arg version of the above. */ 1488 static 1489 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1490 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1491 { 1492 IRAtom* at; 1493 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1494 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1495 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1496 tl_assert(isShadowAtom(mce,va1)); 1497 tl_assert(isShadowAtom(mce,va2)); 1498 tl_assert(isShadowAtom(mce,va3)); 1499 1500 /* The general case is inefficient because PCast is an expensive 1501 operation. Here are some special cases which use PCast only 1502 twice rather than three times. */ 1503 1504 /* I32 x I64 x I64 -> I64 */ 1505 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1506 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1507 && finalVty == Ity_I64) { 1508 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1509 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1510 mode indication which is fully defined, this should get 1511 folded out later. */ 1512 at = mkPCastTo(mce, Ity_I64, va1); 1513 /* Now fold in 2nd and 3rd args. */ 1514 at = mkUifU(mce, Ity_I64, at, va2); 1515 at = mkUifU(mce, Ity_I64, at, va3); 1516 /* and PCast once again. */ 1517 at = mkPCastTo(mce, Ity_I64, at); 1518 return at; 1519 } 1520 1521 /* I32 x I64 x I64 -> I32 */ 1522 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1523 && finalVty == Ity_I32) { 1524 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1525 at = mkPCastTo(mce, Ity_I64, va1); 1526 at = mkUifU(mce, Ity_I64, at, va2); 1527 at = mkUifU(mce, Ity_I64, at, va3); 1528 at = mkPCastTo(mce, Ity_I32, at); 1529 return at; 1530 } 1531 1532 /* I32 x I32 x I32 -> I32 */ 1533 /* 32-bit FP idiom, as (eg) happens on ARM */ 1534 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1535 && finalVty == Ity_I32) { 1536 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1537 at = va1; 1538 at = mkUifU(mce, Ity_I32, at, va2); 1539 at = mkUifU(mce, Ity_I32, at, va3); 1540 at = mkPCastTo(mce, Ity_I32, at); 1541 return at; 1542 } 1543 1544 /* I32 x I128 x I128 -> I128 */ 1545 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1546 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 1547 && finalVty == Ity_I128) { 1548 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n"); 1549 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1550 mode indication which is fully defined, this should get 1551 folded out later. */ 1552 at = mkPCastTo(mce, Ity_I128, va1); 1553 /* Now fold in 2nd and 3rd args. */ 1554 at = mkUifU(mce, Ity_I128, at, va2); 1555 at = mkUifU(mce, Ity_I128, at, va3); 1556 /* and PCast once again. */ 1557 at = mkPCastTo(mce, Ity_I128, at); 1558 return at; 1559 } 1560 if (1) { 1561 VG_(printf)("mkLazy3: "); 1562 ppIRType(t1); 1563 VG_(printf)(" x "); 1564 ppIRType(t2); 1565 VG_(printf)(" x "); 1566 ppIRType(t3); 1567 VG_(printf)(" -> "); 1568 ppIRType(finalVty); 1569 VG_(printf)("\n"); 1570 } 1571 1572 tl_assert(0); 1573 /* General case: force everything via 32-bit intermediaries. */ 1574 /* 1575 at = mkPCastTo(mce, Ity_I32, va1); 1576 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1577 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1578 at = mkPCastTo(mce, finalVty, at); 1579 return at; 1580 */ 1581 } 1582 1583 1584 /* 4-arg version of the above. */ 1585 static 1586 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1587 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1588 { 1589 IRAtom* at; 1590 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1591 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1592 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1593 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1594 tl_assert(isShadowAtom(mce,va1)); 1595 tl_assert(isShadowAtom(mce,va2)); 1596 tl_assert(isShadowAtom(mce,va3)); 1597 tl_assert(isShadowAtom(mce,va4)); 1598 1599 /* The general case is inefficient because PCast is an expensive 1600 operation. Here are some special cases which use PCast only 1601 twice rather than three times. */ 1602 1603 /* I32 x I64 x I64 x I64 -> I64 */ 1604 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1606 && finalVty == Ity_I64) { 1607 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1608 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1609 mode indication which is fully defined, this should get 1610 folded out later. */ 1611 at = mkPCastTo(mce, Ity_I64, va1); 1612 /* Now fold in 2nd, 3rd, 4th args. */ 1613 at = mkUifU(mce, Ity_I64, at, va2); 1614 at = mkUifU(mce, Ity_I64, at, va3); 1615 at = mkUifU(mce, Ity_I64, at, va4); 1616 /* and PCast once again. */ 1617 at = mkPCastTo(mce, Ity_I64, at); 1618 return at; 1619 } 1620 /* I32 x I32 x I32 x I32 -> I32 */ 1621 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1622 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32 1623 && finalVty == Ity_I32) { 1624 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n"); 1625 at = va1; 1626 /* Now fold in 2nd, 3rd, 4th args. */ 1627 at = mkUifU(mce, Ity_I32, at, va2); 1628 at = mkUifU(mce, Ity_I32, at, va3); 1629 at = mkUifU(mce, Ity_I32, at, va4); 1630 at = mkPCastTo(mce, Ity_I32, at); 1631 return at; 1632 } 1633 1634 if (1) { 1635 VG_(printf)("mkLazy4: "); 1636 ppIRType(t1); 1637 VG_(printf)(" x "); 1638 ppIRType(t2); 1639 VG_(printf)(" x "); 1640 ppIRType(t3); 1641 VG_(printf)(" x "); 1642 ppIRType(t4); 1643 VG_(printf)(" -> "); 1644 ppIRType(finalVty); 1645 VG_(printf)("\n"); 1646 } 1647 1648 tl_assert(0); 1649 } 1650 1651 1652 /* Do the lazy propagation game from a null-terminated vector of 1653 atoms. This is presumably the arguments to a helper call, so the 1654 IRCallee info is also supplied in order that we can know which 1655 arguments should be ignored (via the .mcx_mask field). 1656 */ 1657 static 1658 IRAtom* mkLazyN ( MCEnv* mce, 1659 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1660 { 1661 Int i; 1662 IRAtom* here; 1663 IRAtom* curr; 1664 IRType mergeTy; 1665 Bool mergeTy64 = True; 1666 1667 /* Decide on the type of the merge intermediary. If all relevant 1668 args are I64, then it's I64. In all other circumstances, use 1669 I32. */ 1670 for (i = 0; exprvec[i]; i++) { 1671 tl_assert(i < 32); 1672 tl_assert(isOriginalAtom(mce, exprvec[i])); 1673 if (cee->mcx_mask & (1<<i)) 1674 continue; 1675 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1676 mergeTy64 = False; 1677 } 1678 1679 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1680 curr = definedOfType(mergeTy); 1681 1682 for (i = 0; exprvec[i]; i++) { 1683 tl_assert(i < 32); 1684 tl_assert(isOriginalAtom(mce, exprvec[i])); 1685 /* Only take notice of this arg if the callee's mc-exclusion 1686 mask does not say it is to be excluded. */ 1687 if (cee->mcx_mask & (1<<i)) { 1688 /* the arg is to be excluded from definedness checking. Do 1689 nothing. */ 1690 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1691 } else { 1692 /* calculate the arg's definedness, and pessimistically merge 1693 it in. */ 1694 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1695 curr = mergeTy64 1696 ? mkUifU64(mce, here, curr) 1697 : mkUifU32(mce, here, curr); 1698 } 1699 } 1700 return mkPCastTo(mce, finalVtype, curr ); 1701 } 1702 1703 1704 /*------------------------------------------------------------*/ 1705 /*--- Generating expensive sequences for exact carry-chain ---*/ 1706 /*--- propagation in add/sub and related operations. ---*/ 1707 /*------------------------------------------------------------*/ 1708 1709 static 1710 IRAtom* expensiveAddSub ( MCEnv* mce, 1711 Bool add, 1712 IRType ty, 1713 IRAtom* qaa, IRAtom* qbb, 1714 IRAtom* aa, IRAtom* bb ) 1715 { 1716 IRAtom *a_min, *b_min, *a_max, *b_max; 1717 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1718 1719 tl_assert(isShadowAtom(mce,qaa)); 1720 tl_assert(isShadowAtom(mce,qbb)); 1721 tl_assert(isOriginalAtom(mce,aa)); 1722 tl_assert(isOriginalAtom(mce,bb)); 1723 tl_assert(sameKindedAtoms(qaa,aa)); 1724 tl_assert(sameKindedAtoms(qbb,bb)); 1725 1726 switch (ty) { 1727 case Ity_I32: 1728 opAND = Iop_And32; 1729 opOR = Iop_Or32; 1730 opXOR = Iop_Xor32; 1731 opNOT = Iop_Not32; 1732 opADD = Iop_Add32; 1733 opSUB = Iop_Sub32; 1734 break; 1735 case Ity_I64: 1736 opAND = Iop_And64; 1737 opOR = Iop_Or64; 1738 opXOR = Iop_Xor64; 1739 opNOT = Iop_Not64; 1740 opADD = Iop_Add64; 1741 opSUB = Iop_Sub64; 1742 break; 1743 default: 1744 VG_(tool_panic)("expensiveAddSub"); 1745 } 1746 1747 // a_min = aa & ~qaa 1748 a_min = assignNew('V', mce,ty, 1749 binop(opAND, aa, 1750 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1751 1752 // b_min = bb & ~qbb 1753 b_min = assignNew('V', mce,ty, 1754 binop(opAND, bb, 1755 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1756 1757 // a_max = aa | qaa 1758 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1759 1760 // b_max = bb | qbb 1761 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1762 1763 if (add) { 1764 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1765 return 1766 assignNew('V', mce,ty, 1767 binop( opOR, 1768 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1769 assignNew('V', mce,ty, 1770 binop( opXOR, 1771 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1772 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1773 ) 1774 ) 1775 ) 1776 ); 1777 } else { 1778 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1779 return 1780 assignNew('V', mce,ty, 1781 binop( opOR, 1782 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1783 assignNew('V', mce,ty, 1784 binop( opXOR, 1785 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1786 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1787 ) 1788 ) 1789 ) 1790 ); 1791 } 1792 1793 } 1794 1795 1796 /*------------------------------------------------------------*/ 1797 /*--- Scalar shifts. ---*/ 1798 /*------------------------------------------------------------*/ 1799 1800 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 1801 idea is to shift the definedness bits by the original shift amount. 1802 This introduces 0s ("defined") in new positions for left shifts and 1803 unsigned right shifts, and copies the top definedness bit for 1804 signed right shifts. So, conveniently, applying the original shift 1805 operator to the definedness bits for the left arg is exactly the 1806 right thing to do: 1807 1808 (qaa << bb) 1809 1810 However if the shift amount is undefined then the whole result 1811 is undefined. Hence need: 1812 1813 (qaa << bb) `UifU` PCast(qbb) 1814 1815 If the shift amount bb is a literal than qbb will say 'all defined' 1816 and the UifU and PCast will get folded out by post-instrumentation 1817 optimisation. 1818 */ 1819 static IRAtom* scalarShift ( MCEnv* mce, 1820 IRType ty, 1821 IROp original_op, 1822 IRAtom* qaa, IRAtom* qbb, 1823 IRAtom* aa, IRAtom* bb ) 1824 { 1825 tl_assert(isShadowAtom(mce,qaa)); 1826 tl_assert(isShadowAtom(mce,qbb)); 1827 tl_assert(isOriginalAtom(mce,aa)); 1828 tl_assert(isOriginalAtom(mce,bb)); 1829 tl_assert(sameKindedAtoms(qaa,aa)); 1830 tl_assert(sameKindedAtoms(qbb,bb)); 1831 return 1832 assignNew( 1833 'V', mce, ty, 1834 mkUifU( mce, ty, 1835 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 1836 mkPCastTo(mce, ty, qbb) 1837 ) 1838 ); 1839 } 1840 1841 1842 /*------------------------------------------------------------*/ 1843 /*--- Helpers for dealing with vector primops. ---*/ 1844 /*------------------------------------------------------------*/ 1845 1846 /* Vector pessimisation -- pessimise within each lane individually. */ 1847 1848 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 1849 { 1850 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 1851 } 1852 1853 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 1854 { 1855 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 1856 } 1857 1858 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 1859 { 1860 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 1861 } 1862 1863 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 1864 { 1865 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 1866 } 1867 1868 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at ) 1869 { 1870 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at)); 1871 } 1872 1873 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at ) 1874 { 1875 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at)); 1876 } 1877 1878 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 1879 { 1880 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 1881 } 1882 1883 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 1884 { 1885 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 1886 } 1887 1888 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 1889 { 1890 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 1891 } 1892 1893 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 1894 { 1895 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 1896 } 1897 1898 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 1899 { 1900 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 1901 } 1902 1903 1904 /* Here's a simple scheme capable of handling ops derived from SSE1 1905 code and while only generating ops that can be efficiently 1906 implemented in SSE1. */ 1907 1908 /* All-lanes versions are straightforward: 1909 1910 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 1911 1912 unary32Fx4(x,y) ==> PCast32x4(x#) 1913 1914 Lowest-lane-only versions are more complex: 1915 1916 binary32F0x4(x,y) ==> SetV128lo32( 1917 x#, 1918 PCast32(V128to32(UifUV128(x#,y#))) 1919 ) 1920 1921 This is perhaps not so obvious. In particular, it's faster to 1922 do a V128-bit UifU and then take the bottom 32 bits than the more 1923 obvious scheme of taking the bottom 32 bits of each operand 1924 and doing a 32-bit UifU. Basically since UifU is fast and 1925 chopping lanes off vector values is slow. 1926 1927 Finally: 1928 1929 unary32F0x4(x) ==> SetV128lo32( 1930 x#, 1931 PCast32(V128to32(x#)) 1932 ) 1933 1934 Where: 1935 1936 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 1937 PCast32x4(v#) = CmpNEZ32x4(v#) 1938 */ 1939 1940 static 1941 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1942 { 1943 IRAtom* at; 1944 tl_assert(isShadowAtom(mce, vatomX)); 1945 tl_assert(isShadowAtom(mce, vatomY)); 1946 at = mkUifUV128(mce, vatomX, vatomY); 1947 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 1948 return at; 1949 } 1950 1951 static 1952 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 1953 { 1954 IRAtom* at; 1955 tl_assert(isShadowAtom(mce, vatomX)); 1956 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 1957 return at; 1958 } 1959 1960 static 1961 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1962 { 1963 IRAtom* at; 1964 tl_assert(isShadowAtom(mce, vatomX)); 1965 tl_assert(isShadowAtom(mce, vatomY)); 1966 at = mkUifUV128(mce, vatomX, vatomY); 1967 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 1968 at = mkPCastTo(mce, Ity_I32, at); 1969 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1970 return at; 1971 } 1972 1973 static 1974 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 1975 { 1976 IRAtom* at; 1977 tl_assert(isShadowAtom(mce, vatomX)); 1978 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 1979 at = mkPCastTo(mce, Ity_I32, at); 1980 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1981 return at; 1982 } 1983 1984 /* --- ... and ... 64Fx2 versions of the same ... --- */ 1985 1986 static 1987 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1988 { 1989 IRAtom* at; 1990 tl_assert(isShadowAtom(mce, vatomX)); 1991 tl_assert(isShadowAtom(mce, vatomY)); 1992 at = mkUifUV128(mce, vatomX, vatomY); 1993 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 1994 return at; 1995 } 1996 1997 static 1998 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1999 { 2000 IRAtom* at; 2001 tl_assert(isShadowAtom(mce, vatomX)); 2002 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 2003 return at; 2004 } 2005 2006 static 2007 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2008 { 2009 IRAtom* at; 2010 tl_assert(isShadowAtom(mce, vatomX)); 2011 tl_assert(isShadowAtom(mce, vatomY)); 2012 at = mkUifUV128(mce, vatomX, vatomY); 2013 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 2014 at = mkPCastTo(mce, Ity_I64, at); 2015 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2016 return at; 2017 } 2018 2019 static 2020 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 2021 { 2022 IRAtom* at; 2023 tl_assert(isShadowAtom(mce, vatomX)); 2024 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 2025 at = mkPCastTo(mce, Ity_I64, at); 2026 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2027 return at; 2028 } 2029 2030 /* --- --- ... and ... 32Fx2 versions of the same --- --- */ 2031 2032 static 2033 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2034 { 2035 IRAtom* at; 2036 tl_assert(isShadowAtom(mce, vatomX)); 2037 tl_assert(isShadowAtom(mce, vatomY)); 2038 at = mkUifU64(mce, vatomX, vatomY); 2039 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 2040 return at; 2041 } 2042 2043 static 2044 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2045 { 2046 IRAtom* at; 2047 tl_assert(isShadowAtom(mce, vatomX)); 2048 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 2049 return at; 2050 } 2051 2052 /* --- ... and ... 64Fx4 versions of the same ... --- */ 2053 2054 static 2055 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2056 { 2057 IRAtom* at; 2058 tl_assert(isShadowAtom(mce, vatomX)); 2059 tl_assert(isShadowAtom(mce, vatomY)); 2060 at = mkUifUV256(mce, vatomX, vatomY); 2061 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at)); 2062 return at; 2063 } 2064 2065 static 2066 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2067 { 2068 IRAtom* at; 2069 tl_assert(isShadowAtom(mce, vatomX)); 2070 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX)); 2071 return at; 2072 } 2073 2074 /* --- ... and ... 32Fx8 versions of the same ... --- */ 2075 2076 static 2077 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2078 { 2079 IRAtom* at; 2080 tl_assert(isShadowAtom(mce, vatomX)); 2081 tl_assert(isShadowAtom(mce, vatomY)); 2082 at = mkUifUV256(mce, vatomX, vatomY); 2083 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at)); 2084 return at; 2085 } 2086 2087 static 2088 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX ) 2089 { 2090 IRAtom* at; 2091 tl_assert(isShadowAtom(mce, vatomX)); 2092 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX)); 2093 return at; 2094 } 2095 2096 /* --- --- Vector saturated narrowing --- --- */ 2097 2098 /* We used to do something very clever here, but on closer inspection 2099 (2011-Jun-15), and in particular bug #279698, it turns out to be 2100 wrong. Part of the problem came from the fact that for a long 2101 time, the IR primops to do with saturated narrowing were 2102 underspecified and managed to confuse multiple cases which needed 2103 to be separate: the op names had a signedness qualifier, but in 2104 fact the source and destination signednesses needed to be specified 2105 independently, so the op names really need two independent 2106 signedness specifiers. 2107 2108 As of 2011-Jun-15 (ish) the underspecification was sorted out 2109 properly. The incorrect instrumentation remained, though. That 2110 has now (2011-Oct-22) been fixed. 2111 2112 What we now do is simple: 2113 2114 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a 2115 number of lanes, X is the source lane width and signedness, and Y 2116 is the destination lane width and signedness. In all cases the 2117 destination lane width is half the source lane width, so the names 2118 have a bit of redundancy, but are at least easy to read. 2119 2120 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s 2121 to unsigned 16s. 2122 2123 Let Vanilla(OP) be a function that takes OP, one of these 2124 saturating narrowing ops, and produces the same "shaped" narrowing 2125 op which is not saturating, but merely dumps the most significant 2126 bits. "same shape" means that the lane numbers and widths are the 2127 same as with OP. 2128 2129 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8) 2130 = Iop_NarrowBin32to16x8, 2131 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by 2132 dumping the top half of each lane. 2133 2134 So, with that in place, the scheme is simple, and it is simple to 2135 pessimise each lane individually and then apply Vanilla(OP) so as 2136 to get the result in the right "shape". If the original OP is 2137 QNarrowBinXtoYxZ then we produce 2138 2139 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) ) 2140 2141 or for the case when OP is unary (Iop_QNarrowUn*) 2142 2143 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) ) 2144 */ 2145 static 2146 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp ) 2147 { 2148 switch (qnarrowOp) { 2149 /* Binary: (128, 128) -> 128 */ 2150 case Iop_QNarrowBin16Sto8Ux16: 2151 case Iop_QNarrowBin16Sto8Sx16: 2152 case Iop_QNarrowBin16Uto8Ux16: 2153 return Iop_NarrowBin16to8x16; 2154 case Iop_QNarrowBin32Sto16Ux8: 2155 case Iop_QNarrowBin32Sto16Sx8: 2156 case Iop_QNarrowBin32Uto16Ux8: 2157 return Iop_NarrowBin32to16x8; 2158 /* Binary: (64, 64) -> 64 */ 2159 case Iop_QNarrowBin32Sto16Sx4: 2160 return Iop_NarrowBin32to16x4; 2161 case Iop_QNarrowBin16Sto8Ux8: 2162 case Iop_QNarrowBin16Sto8Sx8: 2163 return Iop_NarrowBin16to8x8; 2164 /* Unary: 128 -> 64 */ 2165 case Iop_QNarrowUn64Uto32Ux2: 2166 case Iop_QNarrowUn64Sto32Sx2: 2167 case Iop_QNarrowUn64Sto32Ux2: 2168 return Iop_NarrowUn64to32x2; 2169 case Iop_QNarrowUn32Uto16Ux4: 2170 case Iop_QNarrowUn32Sto16Sx4: 2171 case Iop_QNarrowUn32Sto16Ux4: 2172 return Iop_NarrowUn32to16x4; 2173 case Iop_QNarrowUn16Uto8Ux8: 2174 case Iop_QNarrowUn16Sto8Sx8: 2175 case Iop_QNarrowUn16Sto8Ux8: 2176 return Iop_NarrowUn16to8x8; 2177 default: 2178 ppIROp(qnarrowOp); 2179 VG_(tool_panic)("vanillaNarrowOpOfShape"); 2180 } 2181 } 2182 2183 static 2184 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op, 2185 IRAtom* vatom1, IRAtom* vatom2) 2186 { 2187 IRAtom *at1, *at2, *at3; 2188 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2189 switch (narrow_op) { 2190 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 2191 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break; 2192 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break; 2193 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 2194 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break; 2195 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 2196 default: VG_(tool_panic)("vectorNarrowBinV128"); 2197 } 2198 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2199 tl_assert(isShadowAtom(mce,vatom1)); 2200 tl_assert(isShadowAtom(mce,vatom2)); 2201 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2202 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 2203 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2)); 2204 return at3; 2205 } 2206 2207 static 2208 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op, 2209 IRAtom* vatom1, IRAtom* vatom2) 2210 { 2211 IRAtom *at1, *at2, *at3; 2212 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2213 switch (narrow_op) { 2214 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break; 2215 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break; 2216 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break; 2217 default: VG_(tool_panic)("vectorNarrowBin64"); 2218 } 2219 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2220 tl_assert(isShadowAtom(mce,vatom1)); 2221 tl_assert(isShadowAtom(mce,vatom2)); 2222 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 2223 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 2224 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2)); 2225 return at3; 2226 } 2227 2228 static 2229 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op, 2230 IRAtom* vatom1) 2231 { 2232 IRAtom *at1, *at2; 2233 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2234 tl_assert(isShadowAtom(mce,vatom1)); 2235 /* For vanilla narrowing (non-saturating), we can just apply 2236 the op directly to the V bits. */ 2237 switch (narrow_op) { 2238 case Iop_NarrowUn16to8x8: 2239 case Iop_NarrowUn32to16x4: 2240 case Iop_NarrowUn64to32x2: 2241 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1)); 2242 return at1; 2243 default: 2244 break; /* Do Plan B */ 2245 } 2246 /* Plan B: for ops that involve a saturation operation on the args, 2247 we must PCast before the vanilla narrow. */ 2248 switch (narrow_op) { 2249 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break; 2250 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break; 2251 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break; 2252 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break; 2253 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break; 2254 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break; 2255 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break; 2256 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break; 2257 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break; 2258 default: VG_(tool_panic)("vectorNarrowUnV128"); 2259 } 2260 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2261 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2262 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1)); 2263 return at2; 2264 } 2265 2266 static 2267 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op, 2268 IRAtom* vatom1) 2269 { 2270 IRAtom *at1, *at2; 2271 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2272 switch (longen_op) { 2273 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break; 2274 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break; 2275 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break; 2276 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break; 2277 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break; 2278 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break; 2279 default: VG_(tool_panic)("vectorWidenI64"); 2280 } 2281 tl_assert(isShadowAtom(mce,vatom1)); 2282 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 2283 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2284 return at2; 2285 } 2286 2287 2288 /* --- --- Vector integer arithmetic --- --- */ 2289 2290 /* Simple ... UifU the args and per-lane pessimise the results. */ 2291 2292 /* --- V128-bit versions --- */ 2293 2294 static 2295 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2296 { 2297 IRAtom* at; 2298 at = mkUifUV128(mce, vatom1, vatom2); 2299 at = mkPCast8x16(mce, at); 2300 return at; 2301 } 2302 2303 static 2304 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2305 { 2306 IRAtom* at; 2307 at = mkUifUV128(mce, vatom1, vatom2); 2308 at = mkPCast16x8(mce, at); 2309 return at; 2310 } 2311 2312 static 2313 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2314 { 2315 IRAtom* at; 2316 at = mkUifUV128(mce, vatom1, vatom2); 2317 at = mkPCast32x4(mce, at); 2318 return at; 2319 } 2320 2321 static 2322 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2323 { 2324 IRAtom* at; 2325 at = mkUifUV128(mce, vatom1, vatom2); 2326 at = mkPCast64x2(mce, at); 2327 return at; 2328 } 2329 2330 /* --- 64-bit versions --- */ 2331 2332 static 2333 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2334 { 2335 IRAtom* at; 2336 at = mkUifU64(mce, vatom1, vatom2); 2337 at = mkPCast8x8(mce, at); 2338 return at; 2339 } 2340 2341 static 2342 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2343 { 2344 IRAtom* at; 2345 at = mkUifU64(mce, vatom1, vatom2); 2346 at = mkPCast16x4(mce, at); 2347 return at; 2348 } 2349 2350 static 2351 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2352 { 2353 IRAtom* at; 2354 at = mkUifU64(mce, vatom1, vatom2); 2355 at = mkPCast32x2(mce, at); 2356 return at; 2357 } 2358 2359 static 2360 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2361 { 2362 IRAtom* at; 2363 at = mkUifU64(mce, vatom1, vatom2); 2364 at = mkPCastTo(mce, Ity_I64, at); 2365 return at; 2366 } 2367 2368 /* --- 32-bit versions --- */ 2369 2370 static 2371 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2372 { 2373 IRAtom* at; 2374 at = mkUifU32(mce, vatom1, vatom2); 2375 at = mkPCast8x4(mce, at); 2376 return at; 2377 } 2378 2379 static 2380 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2381 { 2382 IRAtom* at; 2383 at = mkUifU32(mce, vatom1, vatom2); 2384 at = mkPCast16x2(mce, at); 2385 return at; 2386 } 2387 2388 2389 /*------------------------------------------------------------*/ 2390 /*--- Generate shadow values from all kinds of IRExprs. ---*/ 2391 /*------------------------------------------------------------*/ 2392 2393 static 2394 IRAtom* expr2vbits_Qop ( MCEnv* mce, 2395 IROp op, 2396 IRAtom* atom1, IRAtom* atom2, 2397 IRAtom* atom3, IRAtom* atom4 ) 2398 { 2399 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2400 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2401 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2402 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2403 2404 tl_assert(isOriginalAtom(mce,atom1)); 2405 tl_assert(isOriginalAtom(mce,atom2)); 2406 tl_assert(isOriginalAtom(mce,atom3)); 2407 tl_assert(isOriginalAtom(mce,atom4)); 2408 tl_assert(isShadowAtom(mce,vatom1)); 2409 tl_assert(isShadowAtom(mce,vatom2)); 2410 tl_assert(isShadowAtom(mce,vatom3)); 2411 tl_assert(isShadowAtom(mce,vatom4)); 2412 tl_assert(sameKindedAtoms(atom1,vatom1)); 2413 tl_assert(sameKindedAtoms(atom2,vatom2)); 2414 tl_assert(sameKindedAtoms(atom3,vatom3)); 2415 tl_assert(sameKindedAtoms(atom4,vatom4)); 2416 switch (op) { 2417 case Iop_MAddF64: 2418 case Iop_MAddF64r32: 2419 case Iop_MSubF64: 2420 case Iop_MSubF64r32: 2421 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2422 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2423 2424 case Iop_MAddF32: 2425 case Iop_MSubF32: 2426 /* I32(rm) x F32 x F32 x F32 -> F32 */ 2427 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4); 2428 2429 /* V256-bit data-steering */ 2430 case Iop_64x4toV256: 2431 return assignNew('V', mce, Ity_V256, 2432 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4)); 2433 2434 default: 2435 ppIROp(op); 2436 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2437 } 2438 } 2439 2440 2441 static 2442 IRAtom* expr2vbits_Triop ( MCEnv* mce, 2443 IROp op, 2444 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2445 { 2446 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2447 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2448 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2449 2450 tl_assert(isOriginalAtom(mce,atom1)); 2451 tl_assert(isOriginalAtom(mce,atom2)); 2452 tl_assert(isOriginalAtom(mce,atom3)); 2453 tl_assert(isShadowAtom(mce,vatom1)); 2454 tl_assert(isShadowAtom(mce,vatom2)); 2455 tl_assert(isShadowAtom(mce,vatom3)); 2456 tl_assert(sameKindedAtoms(atom1,vatom1)); 2457 tl_assert(sameKindedAtoms(atom2,vatom2)); 2458 tl_assert(sameKindedAtoms(atom3,vatom3)); 2459 switch (op) { 2460 case Iop_AddF128: 2461 case Iop_AddD128: 2462 case Iop_SubF128: 2463 case Iop_SubD128: 2464 case Iop_MulF128: 2465 case Iop_MulD128: 2466 case Iop_DivF128: 2467 case Iop_DivD128: 2468 case Iop_QuantizeD128: 2469 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */ 2470 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2471 case Iop_AddF64: 2472 case Iop_AddD64: 2473 case Iop_AddF64r32: 2474 case Iop_SubF64: 2475 case Iop_SubD64: 2476 case Iop_SubF64r32: 2477 case Iop_MulF64: 2478 case Iop_MulD64: 2479 case Iop_MulF64r32: 2480 case Iop_DivF64: 2481 case Iop_DivD64: 2482 case Iop_DivF64r32: 2483 case Iop_ScaleF64: 2484 case Iop_Yl2xF64: 2485 case Iop_Yl2xp1F64: 2486 case Iop_AtanF64: 2487 case Iop_PRemF64: 2488 case Iop_PRem1F64: 2489 case Iop_QuantizeD64: 2490 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */ 2491 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2492 case Iop_PRemC3210F64: 2493 case Iop_PRem1C3210F64: 2494 /* I32(rm) x F64 x F64 -> I32 */ 2495 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2496 case Iop_AddF32: 2497 case Iop_SubF32: 2498 case Iop_MulF32: 2499 case Iop_DivF32: 2500 /* I32(rm) x F32 x F32 -> I32 */ 2501 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2502 case Iop_SignificanceRoundD64: 2503 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */ 2504 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2505 case Iop_SignificanceRoundD128: 2506 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */ 2507 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2508 case Iop_ExtractV128: 2509 complainIfUndefined(mce, atom3, NULL); 2510 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2511 case Iop_Extract64: 2512 complainIfUndefined(mce, atom3, NULL); 2513 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2514 case Iop_SetElem8x8: 2515 case Iop_SetElem16x4: 2516 case Iop_SetElem32x2: 2517 complainIfUndefined(mce, atom2, NULL); 2518 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2519 default: 2520 ppIROp(op); 2521 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2522 } 2523 } 2524 2525 2526 static 2527 IRAtom* expr2vbits_Binop ( MCEnv* mce, 2528 IROp op, 2529 IRAtom* atom1, IRAtom* atom2 ) 2530 { 2531 IRType and_or_ty; 2532 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2533 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2534 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2535 2536 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2537 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2538 2539 tl_assert(isOriginalAtom(mce,atom1)); 2540 tl_assert(isOriginalAtom(mce,atom2)); 2541 tl_assert(isShadowAtom(mce,vatom1)); 2542 tl_assert(isShadowAtom(mce,vatom2)); 2543 tl_assert(sameKindedAtoms(atom1,vatom1)); 2544 tl_assert(sameKindedAtoms(atom2,vatom2)); 2545 switch (op) { 2546 2547 /* 32-bit SIMD */ 2548 2549 case Iop_Add16x2: 2550 case Iop_HAdd16Ux2: 2551 case Iop_HAdd16Sx2: 2552 case Iop_Sub16x2: 2553 case Iop_HSub16Ux2: 2554 case Iop_HSub16Sx2: 2555 case Iop_QAdd16Sx2: 2556 case Iop_QSub16Sx2: 2557 case Iop_QSub16Ux2: 2558 return binary16Ix2(mce, vatom1, vatom2); 2559 2560 case Iop_Add8x4: 2561 case Iop_HAdd8Ux4: 2562 case Iop_HAdd8Sx4: 2563 case Iop_Sub8x4: 2564 case Iop_HSub8Ux4: 2565 case Iop_HSub8Sx4: 2566 case Iop_QSub8Ux4: 2567 case Iop_QAdd8Ux4: 2568 case Iop_QSub8Sx4: 2569 case Iop_QAdd8Sx4: 2570 return binary8Ix4(mce, vatom1, vatom2); 2571 2572 /* 64-bit SIMD */ 2573 2574 case Iop_ShrN8x8: 2575 case Iop_ShrN16x4: 2576 case Iop_ShrN32x2: 2577 case Iop_SarN8x8: 2578 case Iop_SarN16x4: 2579 case Iop_SarN32x2: 2580 case Iop_ShlN16x4: 2581 case Iop_ShlN32x2: 2582 case Iop_ShlN8x8: 2583 /* Same scheme as with all other shifts. */ 2584 complainIfUndefined(mce, atom2, NULL); 2585 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2586 2587 case Iop_QNarrowBin32Sto16Sx4: 2588 case Iop_QNarrowBin16Sto8Sx8: 2589 case Iop_QNarrowBin16Sto8Ux8: 2590 return vectorNarrowBin64(mce, op, vatom1, vatom2); 2591 2592 case Iop_Min8Ux8: 2593 case Iop_Min8Sx8: 2594 case Iop_Max8Ux8: 2595 case Iop_Max8Sx8: 2596 case Iop_Avg8Ux8: 2597 case Iop_QSub8Sx8: 2598 case Iop_QSub8Ux8: 2599 case Iop_Sub8x8: 2600 case Iop_CmpGT8Sx8: 2601 case Iop_CmpGT8Ux8: 2602 case Iop_CmpEQ8x8: 2603 case Iop_QAdd8Sx8: 2604 case Iop_QAdd8Ux8: 2605 case Iop_QSal8x8: 2606 case Iop_QShl8x8: 2607 case Iop_Add8x8: 2608 case Iop_Mul8x8: 2609 case Iop_PolynomialMul8x8: 2610 return binary8Ix8(mce, vatom1, vatom2); 2611 2612 case Iop_Min16Sx4: 2613 case Iop_Min16Ux4: 2614 case Iop_Max16Sx4: 2615 case Iop_Max16Ux4: 2616 case Iop_Avg16Ux4: 2617 case Iop_QSub16Ux4: 2618 case Iop_QSub16Sx4: 2619 case Iop_Sub16x4: 2620 case Iop_Mul16x4: 2621 case Iop_MulHi16Sx4: 2622 case Iop_MulHi16Ux4: 2623 case Iop_CmpGT16Sx4: 2624 case Iop_CmpGT16Ux4: 2625 case Iop_CmpEQ16x4: 2626 case Iop_QAdd16Sx4: 2627 case Iop_QAdd16Ux4: 2628 case Iop_QSal16x4: 2629 case Iop_QShl16x4: 2630 case Iop_Add16x4: 2631 case Iop_QDMulHi16Sx4: 2632 case Iop_QRDMulHi16Sx4: 2633 return binary16Ix4(mce, vatom1, vatom2); 2634 2635 case Iop_Sub32x2: 2636 case Iop_Mul32x2: 2637 case Iop_Max32Sx2: 2638 case Iop_Max32Ux2: 2639 case Iop_Min32Sx2: 2640 case Iop_Min32Ux2: 2641 case Iop_CmpGT32Sx2: 2642 case Iop_CmpGT32Ux2: 2643 case Iop_CmpEQ32x2: 2644 case Iop_Add32x2: 2645 case Iop_QAdd32Ux2: 2646 case Iop_QAdd32Sx2: 2647 case Iop_QSub32Ux2: 2648 case Iop_QSub32Sx2: 2649 case Iop_QSal32x2: 2650 case Iop_QShl32x2: 2651 case Iop_QDMulHi32Sx2: 2652 case Iop_QRDMulHi32Sx2: 2653 return binary32Ix2(mce, vatom1, vatom2); 2654 2655 case Iop_QSub64Ux1: 2656 case Iop_QSub64Sx1: 2657 case Iop_QAdd64Ux1: 2658 case Iop_QAdd64Sx1: 2659 case Iop_QSal64x1: 2660 case Iop_QShl64x1: 2661 case Iop_Sal64x1: 2662 return binary64Ix1(mce, vatom1, vatom2); 2663 2664 case Iop_QShlN8Sx8: 2665 case Iop_QShlN8x8: 2666 case Iop_QSalN8x8: 2667 complainIfUndefined(mce, atom2, NULL); 2668 return mkPCast8x8(mce, vatom1); 2669 2670 case Iop_QShlN16Sx4: 2671 case Iop_QShlN16x4: 2672 case Iop_QSalN16x4: 2673 complainIfUndefined(mce, atom2, NULL); 2674 return mkPCast16x4(mce, vatom1); 2675 2676 case Iop_QShlN32Sx2: 2677 case Iop_QShlN32x2: 2678 case Iop_QSalN32x2: 2679 complainIfUndefined(mce, atom2, NULL); 2680 return mkPCast32x2(mce, vatom1); 2681 2682 case Iop_QShlN64Sx1: 2683 case Iop_QShlN64x1: 2684 case Iop_QSalN64x1: 2685 complainIfUndefined(mce, atom2, NULL); 2686 return mkPCast32x2(mce, vatom1); 2687 2688 case Iop_PwMax32Sx2: 2689 case Iop_PwMax32Ux2: 2690 case Iop_PwMin32Sx2: 2691 case Iop_PwMin32Ux2: 2692 case Iop_PwMax32Fx2: 2693 case Iop_PwMin32Fx2: 2694 return assignNew('V', mce, Ity_I64, 2695 binop(Iop_PwMax32Ux2, 2696 mkPCast32x2(mce, vatom1), 2697 mkPCast32x2(mce, vatom2))); 2698 2699 case Iop_PwMax16Sx4: 2700 case Iop_PwMax16Ux4: 2701 case Iop_PwMin16Sx4: 2702 case Iop_PwMin16Ux4: 2703 return assignNew('V', mce, Ity_I64, 2704 binop(Iop_PwMax16Ux4, 2705 mkPCast16x4(mce, vatom1), 2706 mkPCast16x4(mce, vatom2))); 2707 2708 case Iop_PwMax8Sx8: 2709 case Iop_PwMax8Ux8: 2710 case Iop_PwMin8Sx8: 2711 case Iop_PwMin8Ux8: 2712 return assignNew('V', mce, Ity_I64, 2713 binop(Iop_PwMax8Ux8, 2714 mkPCast8x8(mce, vatom1), 2715 mkPCast8x8(mce, vatom2))); 2716 2717 case Iop_PwAdd32x2: 2718 case Iop_PwAdd32Fx2: 2719 return mkPCast32x2(mce, 2720 assignNew('V', mce, Ity_I64, 2721 binop(Iop_PwAdd32x2, 2722 mkPCast32x2(mce, vatom1), 2723 mkPCast32x2(mce, vatom2)))); 2724 2725 case Iop_PwAdd16x4: 2726 return mkPCast16x4(mce, 2727 assignNew('V', mce, Ity_I64, 2728 binop(op, mkPCast16x4(mce, vatom1), 2729 mkPCast16x4(mce, vatom2)))); 2730 2731 case Iop_PwAdd8x8: 2732 return mkPCast8x8(mce, 2733 assignNew('V', mce, Ity_I64, 2734 binop(op, mkPCast8x8(mce, vatom1), 2735 mkPCast8x8(mce, vatom2)))); 2736 2737 case Iop_Shl8x8: 2738 case Iop_Shr8x8: 2739 case Iop_Sar8x8: 2740 case Iop_Sal8x8: 2741 return mkUifU64(mce, 2742 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2743 mkPCast8x8(mce,vatom2) 2744 ); 2745 2746 case Iop_Shl16x4: 2747 case Iop_Shr16x4: 2748 case Iop_Sar16x4: 2749 case Iop_Sal16x4: 2750 return mkUifU64(mce, 2751 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2752 mkPCast16x4(mce,vatom2) 2753 ); 2754 2755 case Iop_Shl32x2: 2756 case Iop_Shr32x2: 2757 case Iop_Sar32x2: 2758 case Iop_Sal32x2: 2759 return mkUifU64(mce, 2760 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2761 mkPCast32x2(mce,vatom2) 2762 ); 2763 2764 /* 64-bit data-steering */ 2765 case Iop_InterleaveLO32x2: 2766 case Iop_InterleaveLO16x4: 2767 case Iop_InterleaveLO8x8: 2768 case Iop_InterleaveHI32x2: 2769 case Iop_InterleaveHI16x4: 2770 case Iop_InterleaveHI8x8: 2771 case Iop_CatOddLanes8x8: 2772 case Iop_CatEvenLanes8x8: 2773 case Iop_CatOddLanes16x4: 2774 case Iop_CatEvenLanes16x4: 2775 case Iop_InterleaveOddLanes8x8: 2776 case Iop_InterleaveEvenLanes8x8: 2777 case Iop_InterleaveOddLanes16x4: 2778 case Iop_InterleaveEvenLanes16x4: 2779 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 2780 2781 case Iop_GetElem8x8: 2782 complainIfUndefined(mce, atom2, NULL); 2783 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 2784 case Iop_GetElem16x4: 2785 complainIfUndefined(mce, atom2, NULL); 2786 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 2787 case Iop_GetElem32x2: 2788 complainIfUndefined(mce, atom2, NULL); 2789 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 2790 2791 /* Perm8x8: rearrange values in left arg using steering values 2792 from right arg. So rearrange the vbits in the same way but 2793 pessimise wrt steering values. */ 2794 case Iop_Perm8x8: 2795 return mkUifU64( 2796 mce, 2797 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 2798 mkPCast8x8(mce, vatom2) 2799 ); 2800 2801 /* V128-bit SIMD */ 2802 2803 case Iop_ShrN8x16: 2804 case Iop_ShrN16x8: 2805 case Iop_ShrN32x4: 2806 case Iop_ShrN64x2: 2807 case Iop_SarN8x16: 2808 case Iop_SarN16x8: 2809 case Iop_SarN32x4: 2810 case Iop_SarN64x2: 2811 case Iop_ShlN8x16: 2812 case Iop_ShlN16x8: 2813 case Iop_ShlN32x4: 2814 case Iop_ShlN64x2: 2815 /* Same scheme as with all other shifts. Note: 22 Oct 05: 2816 this is wrong now, scalar shifts are done properly lazily. 2817 Vector shifts should be fixed too. */ 2818 complainIfUndefined(mce, atom2, NULL); 2819 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 2820 2821 /* V x V shifts/rotates are done using the standard lazy scheme. */ 2822 case Iop_Shl8x16: 2823 case Iop_Shr8x16: 2824 case Iop_Sar8x16: 2825 case Iop_Sal8x16: 2826 case Iop_Rol8x16: 2827 return mkUifUV128(mce, 2828 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2829 mkPCast8x16(mce,vatom2) 2830 ); 2831 2832 case Iop_Shl16x8: 2833 case Iop_Shr16x8: 2834 case Iop_Sar16x8: 2835 case Iop_Sal16x8: 2836 case Iop_Rol16x8: 2837 return mkUifUV128(mce, 2838 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2839 mkPCast16x8(mce,vatom2) 2840 ); 2841 2842 case Iop_Shl32x4: 2843 case Iop_Shr32x4: 2844 case Iop_Sar32x4: 2845 case Iop_Sal32x4: 2846 case Iop_Rol32x4: 2847 return mkUifUV128(mce, 2848 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2849 mkPCast32x4(mce,vatom2) 2850 ); 2851 2852 case Iop_Shl64x2: 2853 case Iop_Shr64x2: 2854 case Iop_Sar64x2: 2855 case Iop_Sal64x2: 2856 return mkUifUV128(mce, 2857 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 2858 mkPCast64x2(mce,vatom2) 2859 ); 2860 2861 case Iop_F32ToFixed32Ux4_RZ: 2862 case Iop_F32ToFixed32Sx4_RZ: 2863 case Iop_Fixed32UToF32x4_RN: 2864 case Iop_Fixed32SToF32x4_RN: 2865 complainIfUndefined(mce, atom2, NULL); 2866 return mkPCast32x4(mce, vatom1); 2867 2868 case Iop_F32ToFixed32Ux2_RZ: 2869 case Iop_F32ToFixed32Sx2_RZ: 2870 case Iop_Fixed32UToF32x2_RN: 2871 case Iop_Fixed32SToF32x2_RN: 2872 complainIfUndefined(mce, atom2, NULL); 2873 return mkPCast32x2(mce, vatom1); 2874 2875 case Iop_QSub8Ux16: 2876 case Iop_QSub8Sx16: 2877 case Iop_Sub8x16: 2878 case Iop_Min8Ux16: 2879 case Iop_Min8Sx16: 2880 case Iop_Max8Ux16: 2881 case Iop_Max8Sx16: 2882 case Iop_CmpGT8Sx16: 2883 case Iop_CmpGT8Ux16: 2884 case Iop_CmpEQ8x16: 2885 case Iop_Avg8Ux16: 2886 case Iop_Avg8Sx16: 2887 case Iop_QAdd8Ux16: 2888 case Iop_QAdd8Sx16: 2889 case Iop_QSal8x16: 2890 case Iop_QShl8x16: 2891 case Iop_Add8x16: 2892 case Iop_Mul8x16: 2893 case Iop_PolynomialMul8x16: 2894 return binary8Ix16(mce, vatom1, vatom2); 2895 2896 case Iop_QSub16Ux8: 2897 case Iop_QSub16Sx8: 2898 case Iop_Sub16x8: 2899 case Iop_Mul16x8: 2900 case Iop_MulHi16Sx8: 2901 case Iop_MulHi16Ux8: 2902 case Iop_Min16Sx8: 2903 case Iop_Min16Ux8: 2904 case Iop_Max16Sx8: 2905 case Iop_Max16Ux8: 2906 case Iop_CmpGT16Sx8: 2907 case Iop_CmpGT16Ux8: 2908 case Iop_CmpEQ16x8: 2909 case Iop_Avg16Ux8: 2910 case Iop_Avg16Sx8: 2911 case Iop_QAdd16Ux8: 2912 case Iop_QAdd16Sx8: 2913 case Iop_QSal16x8: 2914 case Iop_QShl16x8: 2915 case Iop_Add16x8: 2916 case Iop_QDMulHi16Sx8: 2917 case Iop_QRDMulHi16Sx8: 2918 return binary16Ix8(mce, vatom1, vatom2); 2919 2920 case Iop_Sub32x4: 2921 case Iop_CmpGT32Sx4: 2922 case Iop_CmpGT32Ux4: 2923 case Iop_CmpEQ32x4: 2924 case Iop_QAdd32Sx4: 2925 case Iop_QAdd32Ux4: 2926 case Iop_QSub32Sx4: 2927 case Iop_QSub32Ux4: 2928 case Iop_QSal32x4: 2929 case Iop_QShl32x4: 2930 case Iop_Avg32Ux4: 2931 case Iop_Avg32Sx4: 2932 case Iop_Add32x4: 2933 case Iop_Max32Ux4: 2934 case Iop_Max32Sx4: 2935 case Iop_Min32Ux4: 2936 case Iop_Min32Sx4: 2937 case Iop_Mul32x4: 2938 case Iop_QDMulHi32Sx4: 2939 case Iop_QRDMulHi32Sx4: 2940 return binary32Ix4(mce, vatom1, vatom2); 2941 2942 case Iop_Sub64x2: 2943 case Iop_Add64x2: 2944 case Iop_CmpEQ64x2: 2945 case Iop_CmpGT64Sx2: 2946 case Iop_QSal64x2: 2947 case Iop_QShl64x2: 2948 case Iop_QAdd64Ux2: 2949 case Iop_QAdd64Sx2: 2950 case Iop_QSub64Ux2: 2951 case Iop_QSub64Sx2: 2952 return binary64Ix2(mce, vatom1, vatom2); 2953 2954 case Iop_QNarrowBin32Sto16Sx8: 2955 case Iop_QNarrowBin32Uto16Ux8: 2956 case Iop_QNarrowBin32Sto16Ux8: 2957 case Iop_QNarrowBin16Sto8Sx16: 2958 case Iop_QNarrowBin16Uto8Ux16: 2959 case Iop_QNarrowBin16Sto8Ux16: 2960 return vectorNarrowBinV128(mce, op, vatom1, vatom2); 2961 2962 case Iop_Sub64Fx2: 2963 case Iop_Mul64Fx2: 2964 case Iop_Min64Fx2: 2965 case Iop_Max64Fx2: 2966 case Iop_Div64Fx2: 2967 case Iop_CmpLT64Fx2: 2968 case Iop_CmpLE64Fx2: 2969 case Iop_CmpEQ64Fx2: 2970 case Iop_CmpUN64Fx2: 2971 case Iop_Add64Fx2: 2972 return binary64Fx2(mce, vatom1, vatom2); 2973 2974 case Iop_Sub64F0x2: 2975 case Iop_Mul64F0x2: 2976 case Iop_Min64F0x2: 2977 case Iop_Max64F0x2: 2978 case Iop_Div64F0x2: 2979 case Iop_CmpLT64F0x2: 2980 case Iop_CmpLE64F0x2: 2981 case Iop_CmpEQ64F0x2: 2982 case Iop_CmpUN64F0x2: 2983 case Iop_Add64F0x2: 2984 return binary64F0x2(mce, vatom1, vatom2); 2985 2986 case Iop_Sub32Fx4: 2987 case Iop_Mul32Fx4: 2988 case Iop_Min32Fx4: 2989 case Iop_Max32Fx4: 2990 case Iop_Div32Fx4: 2991 case Iop_CmpLT32Fx4: 2992 case Iop_CmpLE32Fx4: 2993 case Iop_CmpEQ32Fx4: 2994 case Iop_CmpUN32Fx4: 2995 case Iop_CmpGT32Fx4: 2996 case Iop_CmpGE32Fx4: 2997 case Iop_Add32Fx4: 2998 case Iop_Recps32Fx4: 2999 case Iop_Rsqrts32Fx4: 3000 return binary32Fx4(mce, vatom1, vatom2); 3001 3002 case Iop_Sub32Fx2: 3003 case Iop_Mul32Fx2: 3004 case Iop_Min32Fx2: 3005 case Iop_Max32Fx2: 3006 case Iop_CmpEQ32Fx2: 3007 case Iop_CmpGT32Fx2: 3008 case Iop_CmpGE32Fx2: 3009 case Iop_Add32Fx2: 3010 case Iop_Recps32Fx2: 3011 case Iop_Rsqrts32Fx2: 3012 return binary32Fx2(mce, vatom1, vatom2); 3013 3014 case Iop_Sub32F0x4: 3015 case Iop_Mul32F0x4: 3016 case Iop_Min32F0x4: 3017 case Iop_Max32F0x4: 3018 case Iop_Div32F0x4: 3019 case Iop_CmpLT32F0x4: 3020 case Iop_CmpLE32F0x4: 3021 case Iop_CmpEQ32F0x4: 3022 case Iop_CmpUN32F0x4: 3023 case Iop_Add32F0x4: 3024 return binary32F0x4(mce, vatom1, vatom2); 3025 3026 case Iop_QShlN8Sx16: 3027 case Iop_QShlN8x16: 3028 case Iop_QSalN8x16: 3029 complainIfUndefined(mce, atom2, NULL); 3030 return mkPCast8x16(mce, vatom1); 3031 3032 case Iop_QShlN16Sx8: 3033 case Iop_QShlN16x8: 3034 case Iop_QSalN16x8: 3035 complainIfUndefined(mce, atom2, NULL); 3036 return mkPCast16x8(mce, vatom1); 3037 3038 case Iop_QShlN32Sx4: 3039 case Iop_QShlN32x4: 3040 case Iop_QSalN32x4: 3041 complainIfUndefined(mce, atom2, NULL); 3042 return mkPCast32x4(mce, vatom1); 3043 3044 case Iop_QShlN64Sx2: 3045 case Iop_QShlN64x2: 3046 case Iop_QSalN64x2: 3047 complainIfUndefined(mce, atom2, NULL); 3048 return mkPCast32x4(mce, vatom1); 3049 3050 case Iop_Mull32Sx2: 3051 case Iop_Mull32Ux2: 3052 case Iop_QDMulLong32Sx2: 3053 return vectorWidenI64(mce, Iop_Widen32Sto64x2, 3054 mkUifU64(mce, vatom1, vatom2)); 3055 3056 case Iop_Mull16Sx4: 3057 case Iop_Mull16Ux4: 3058 case Iop_QDMulLong16Sx4: 3059 return vectorWidenI64(mce, Iop_Widen16Sto32x4, 3060 mkUifU64(mce, vatom1, vatom2)); 3061 3062 case Iop_Mull8Sx8: 3063 case Iop_Mull8Ux8: 3064 case Iop_PolynomialMull8x8: 3065 return vectorWidenI64(mce, Iop_Widen8Sto16x8, 3066 mkUifU64(mce, vatom1, vatom2)); 3067 3068 case Iop_PwAdd32x4: 3069 return mkPCast32x4(mce, 3070 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 3071 mkPCast32x4(mce, vatom2)))); 3072 3073 case Iop_PwAdd16x8: 3074 return mkPCast16x8(mce, 3075 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 3076 mkPCast16x8(mce, vatom2)))); 3077 3078 case Iop_PwAdd8x16: 3079 return mkPCast8x16(mce, 3080 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 3081 mkPCast8x16(mce, vatom2)))); 3082 3083 /* V128-bit data-steering */ 3084 case Iop_SetV128lo32: 3085 case Iop_SetV128lo64: 3086 case Iop_64HLtoV128: 3087 case Iop_InterleaveLO64x2: 3088 case Iop_InterleaveLO32x4: 3089 case Iop_InterleaveLO16x8: 3090 case Iop_InterleaveLO8x16: 3091 case Iop_InterleaveHI64x2: 3092 case Iop_InterleaveHI32x4: 3093 case Iop_InterleaveHI16x8: 3094 case Iop_InterleaveHI8x16: 3095 case Iop_CatOddLanes8x16: 3096 case Iop_CatOddLanes16x8: 3097 case Iop_CatOddLanes32x4: 3098 case Iop_CatEvenLanes8x16: 3099 case Iop_CatEvenLanes16x8: 3100 case Iop_CatEvenLanes32x4: 3101 case Iop_InterleaveOddLanes8x16: 3102 case Iop_InterleaveOddLanes16x8: 3103 case Iop_InterleaveOddLanes32x4: 3104 case Iop_InterleaveEvenLanes8x16: 3105 case Iop_InterleaveEvenLanes16x8: 3106 case Iop_InterleaveEvenLanes32x4: 3107 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 3108 3109 case Iop_GetElem8x16: 3110 complainIfUndefined(mce, atom2, NULL); 3111 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3112 case Iop_GetElem16x8: 3113 complainIfUndefined(mce, atom2, NULL); 3114 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3115 case Iop_GetElem32x4: 3116 complainIfUndefined(mce, atom2, NULL); 3117 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3118 case Iop_GetElem64x2: 3119 complainIfUndefined(mce, atom2, NULL); 3120 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 3121 3122 /* Perm8x16: rearrange values in left arg using steering values 3123 from right arg. So rearrange the vbits in the same way but 3124 pessimise wrt steering values. Perm32x4 ditto. */ 3125 case Iop_Perm8x16: 3126 return mkUifUV128( 3127 mce, 3128 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3129 mkPCast8x16(mce, vatom2) 3130 ); 3131 case Iop_Perm32x4: 3132 return mkUifUV128( 3133 mce, 3134 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3135 mkPCast32x4(mce, vatom2) 3136 ); 3137 3138 /* These two take the lower half of each 16-bit lane, sign/zero 3139 extend it to 32, and multiply together, producing a 32x4 3140 result (and implicitly ignoring half the operand bits). So 3141 treat it as a bunch of independent 16x8 operations, but then 3142 do 32-bit shifts left-right to copy the lower half results 3143 (which are all 0s or all 1s due to PCasting in binary16Ix8) 3144 into the upper half of each result lane. */ 3145 case Iop_MullEven16Ux8: 3146 case Iop_MullEven16Sx8: { 3147 IRAtom* at; 3148 at = binary16Ix8(mce,vatom1,vatom2); 3149 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 3150 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 3151 return at; 3152 } 3153 3154 /* Same deal as Iop_MullEven16{S,U}x8 */ 3155 case Iop_MullEven8Ux16: 3156 case Iop_MullEven8Sx16: { 3157 IRAtom* at; 3158 at = binary8Ix16(mce,vatom1,vatom2); 3159 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 3160 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 3161 return at; 3162 } 3163 3164 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 3165 32x4 -> 16x8 laneage, discarding the upper half of each lane. 3166 Simply apply same op to the V bits, since this really no more 3167 than a data steering operation. */ 3168 case Iop_NarrowBin32to16x8: 3169 case Iop_NarrowBin16to8x16: 3170 return assignNew('V', mce, Ity_V128, 3171 binop(op, vatom1, vatom2)); 3172 3173 case Iop_ShrV128: 3174 case Iop_ShlV128: 3175 /* Same scheme as with all other shifts. Note: 10 Nov 05: 3176 this is wrong now, scalar shifts are done properly lazily. 3177 Vector shifts should be fixed too. */ 3178 complainIfUndefined(mce, atom2, NULL); 3179 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3180 3181 /* I128-bit data-steering */ 3182 case Iop_64HLto128: 3183 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 3184 3185 /* V256-bit SIMD */ 3186 3187 case Iop_Add64Fx4: 3188 case Iop_Sub64Fx4: 3189 case Iop_Mul64Fx4: 3190 case Iop_Div64Fx4: 3191 case Iop_Max64Fx4: 3192 case Iop_Min64Fx4: 3193 return binary64Fx4(mce, vatom1, vatom2); 3194 3195 case Iop_Add32Fx8: 3196 case Iop_Sub32Fx8: 3197 case Iop_Mul32Fx8: 3198 case Iop_Div32Fx8: 3199 case Iop_Max32Fx8: 3200 case Iop_Min32Fx8: 3201 return binary32Fx8(mce, vatom1, vatom2); 3202 3203 /* V256-bit data-steering */ 3204 case Iop_V128HLtoV256: 3205 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2)); 3206 3207 /* Scalar floating point */ 3208 3209 case Iop_F32toI64S: 3210 /* I32(rm) x F32 -> I64 */ 3211 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3212 3213 case Iop_I64StoF32: 3214 /* I32(rm) x I64 -> F32 */ 3215 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3216 3217 case Iop_RoundF64toInt: 3218 case Iop_RoundF64toF32: 3219 case Iop_F64toI64S: 3220 case Iop_F64toI64U: 3221 case Iop_I64StoF64: 3222 case Iop_I64UtoF64: 3223 case Iop_SinF64: 3224 case Iop_CosF64: 3225 case Iop_TanF64: 3226 case Iop_2xm1F64: 3227 case Iop_SqrtF64: 3228 /* I32(rm) x I64/F64 -> I64/F64 */ 3229 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3230 3231 case Iop_ShlD64: 3232 case Iop_ShrD64: 3233 case Iop_RoundD64toInt: 3234 /* I32(DFP rm) x D64 -> D64 */ 3235 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3236 3237 case Iop_ShlD128: 3238 case Iop_ShrD128: 3239 case Iop_RoundD128toInt: 3240 /* I32(DFP rm) x D128 -> D128 */ 3241 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3242 3243 case Iop_D64toI64S: 3244 case Iop_I64StoD64: 3245 /* I64(DFP rm) x I64 -> D64 */ 3246 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3247 3248 case Iop_RoundF32toInt: 3249 case Iop_SqrtF32: 3250 /* I32(rm) x I32/F32 -> I32/F32 */ 3251 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3252 3253 case Iop_SqrtF128: 3254 /* I32(rm) x F128 -> F128 */ 3255 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3256 3257 case Iop_I32StoF32: 3258 case Iop_F32toI32S: 3259 /* First arg is I32 (rounding mode), second is F32/I32 (data). */ 3260 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3261 3262 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */ 3263 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */ 3264 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3265 3266 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */ 3267 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */ 3268 case Iop_D128toD64: /* IRRoundingModeDFP(I64) x D128 -> D64 */ 3269 case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64 */ 3270 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3271 3272 case Iop_F64HLtoF128: 3273 case Iop_D64HLtoD128: 3274 return assignNew('V', mce, Ity_I128, 3275 binop(Iop_64HLto128, vatom1, vatom2)); 3276 3277 case Iop_F64toI32U: 3278 case Iop_F64toI32S: 3279 case Iop_F64toF32: 3280 case Iop_I64UtoF32: 3281 /* First arg is I32 (rounding mode), second is F64 (data). */ 3282 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3283 3284 case Iop_D64toD32: 3285 /* First arg is I64 (DFProunding mode), second is D64 (data). */ 3286 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3287 3288 case Iop_F64toI16S: 3289 /* First arg is I32 (rounding mode), second is F64 (data). */ 3290 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3291 3292 case Iop_InsertExpD64: 3293 /* I64 x I64 -> D64 */ 3294 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3295 3296 case Iop_InsertExpD128: 3297 /* I64 x I128 -> D128 */ 3298 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3299 3300 case Iop_CmpF32: 3301 case Iop_CmpF64: 3302 case Iop_CmpF128: 3303 case Iop_CmpD64: 3304 case Iop_CmpD128: 3305 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3306 3307 /* non-FP after here */ 3308 3309 case Iop_DivModU64to32: 3310 case Iop_DivModS64to32: 3311 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3312 3313 case Iop_DivModU128to64: 3314 case Iop_DivModS128to64: 3315 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3316 3317 case Iop_16HLto32: 3318 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 3319 case Iop_32HLto64: 3320 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3321 3322 case Iop_DivModS64to64: 3323 case Iop_MullS64: 3324 case Iop_MullU64: { 3325 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3326 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 3327 return assignNew('V', mce, Ity_I128, 3328 binop(Iop_64HLto128, vHi64, vLo64)); 3329 } 3330 3331 case Iop_MullS32: 3332 case Iop_MullU32: { 3333 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3334 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 3335 return assignNew('V', mce, Ity_I64, 3336 binop(Iop_32HLto64, vHi32, vLo32)); 3337 } 3338 3339 case Iop_MullS16: 3340 case Iop_MullU16: { 3341 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3342 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 3343 return assignNew('V', mce, Ity_I32, 3344 binop(Iop_16HLto32, vHi16, vLo16)); 3345 } 3346 3347 case Iop_MullS8: 3348 case Iop_MullU8: { 3349 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3350 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 3351 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 3352 } 3353 3354 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 3355 case Iop_DivS32: 3356 case Iop_DivU32: 3357 case Iop_DivU32E: 3358 case Iop_DivS32E: 3359 case Iop_QAdd32S: /* could probably do better */ 3360 case Iop_QSub32S: /* could probably do better */ 3361 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3362 3363 case Iop_DivS64: 3364 case Iop_DivU64: 3365 case Iop_DivS64E: 3366 case Iop_DivU64E: 3367 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3368 3369 case Iop_Add32: 3370 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3371 return expensiveAddSub(mce,True,Ity_I32, 3372 vatom1,vatom2, atom1,atom2); 3373 else 3374 goto cheap_AddSub32; 3375 case Iop_Sub32: 3376 if (mce->bogusLiterals) 3377 return expensiveAddSub(mce,False,Ity_I32, 3378 vatom1,vatom2, atom1,atom2); 3379 else 3380 goto cheap_AddSub32; 3381 3382 cheap_AddSub32: 3383 case Iop_Mul32: 3384 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3385 3386 case Iop_CmpORD32S: 3387 case Iop_CmpORD32U: 3388 case Iop_CmpORD64S: 3389 case Iop_CmpORD64U: 3390 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 3391 3392 case Iop_Add64: 3393 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3394 return expensiveAddSub(mce,True,Ity_I64, 3395 vatom1,vatom2, atom1,atom2); 3396 else 3397 goto cheap_AddSub64; 3398 case Iop_Sub64: 3399 if (mce->bogusLiterals) 3400 return expensiveAddSub(mce,False,Ity_I64, 3401 vatom1,vatom2, atom1,atom2); 3402 else 3403 goto cheap_AddSub64; 3404 3405 cheap_AddSub64: 3406 case Iop_Mul64: 3407 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3408 3409 case Iop_Mul16: 3410 case Iop_Add16: 3411 case Iop_Sub16: 3412 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3413 3414 case Iop_Sub8: 3415 case Iop_Add8: 3416 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3417 3418 case Iop_CmpEQ64: 3419 case Iop_CmpNE64: 3420 if (mce->bogusLiterals) 3421 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 3422 else 3423 goto cheap_cmp64; 3424 cheap_cmp64: 3425 case Iop_CmpLE64S: case Iop_CmpLE64U: 3426 case Iop_CmpLT64U: case Iop_CmpLT64S: 3427 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 3428 3429 case Iop_CmpEQ32: 3430 case Iop_CmpNE32: 3431 if (mce->bogusLiterals) 3432 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 3433 else 3434 goto cheap_cmp32; 3435 cheap_cmp32: 3436 case Iop_CmpLE32S: case Iop_CmpLE32U: 3437 case Iop_CmpLT32U: case Iop_CmpLT32S: 3438 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 3439 3440 case Iop_CmpEQ16: case Iop_CmpNE16: 3441 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 3442 3443 case Iop_CmpEQ8: case Iop_CmpNE8: 3444 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 3445 3446 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 3447 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 3448 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 3449 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 3450 /* Just say these all produce a defined result, regardless 3451 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 3452 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 3453 3454 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 3455 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 3456 3457 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 3458 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 3459 3460 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 3461 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 3462 3463 case Iop_Shl8: case Iop_Shr8: 3464 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 3465 3466 case Iop_AndV256: 3467 uifu = mkUifUV256; difd = mkDifDV256; 3468 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or; 3469 case Iop_AndV128: 3470 uifu = mkUifUV128; difd = mkDifDV128; 3471 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 3472 case Iop_And64: 3473 uifu = mkUifU64; difd = mkDifD64; 3474 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 3475 case Iop_And32: 3476 uifu = mkUifU32; difd = mkDifD32; 3477 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 3478 case Iop_And16: 3479 uifu = mkUifU16; difd = mkDifD16; 3480 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 3481 case Iop_And8: 3482 uifu = mkUifU8; difd = mkDifD8; 3483 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 3484 3485 case Iop_OrV256: 3486 uifu = mkUifUV256; difd = mkDifDV256; 3487 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or; 3488 case Iop_OrV128: 3489 uifu = mkUifUV128; difd = mkDifDV128; 3490 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 3491 case Iop_Or64: 3492 uifu = mkUifU64; difd = mkDifD64; 3493 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 3494 case Iop_Or32: 3495 uifu = mkUifU32; difd = mkDifD32; 3496 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 3497 case Iop_Or16: 3498 uifu = mkUifU16; difd = mkDifD16; 3499 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 3500 case Iop_Or8: 3501 uifu = mkUifU8; difd = mkDifD8; 3502 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 3503 3504 do_And_Or: 3505 return 3506 assignNew( 3507 'V', mce, 3508 and_or_ty, 3509 difd(mce, uifu(mce, vatom1, vatom2), 3510 difd(mce, improve(mce, atom1, vatom1), 3511 improve(mce, atom2, vatom2) ) ) ); 3512 3513 case Iop_Xor8: 3514 return mkUifU8(mce, vatom1, vatom2); 3515 case Iop_Xor16: 3516 return mkUifU16(mce, vatom1, vatom2); 3517 case Iop_Xor32: 3518 return mkUifU32(mce, vatom1, vatom2); 3519 case Iop_Xor64: 3520 return mkUifU64(mce, vatom1, vatom2); 3521 case Iop_XorV128: 3522 return mkUifUV128(mce, vatom1, vatom2); 3523 case Iop_XorV256: 3524 return mkUifUV256(mce, vatom1, vatom2); 3525 3526 default: 3527 ppIROp(op); 3528 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 3529 } 3530 } 3531 3532 3533 static 3534 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 3535 { 3536 IRAtom* vatom = expr2vbits( mce, atom ); 3537 tl_assert(isOriginalAtom(mce,atom)); 3538 switch (op) { 3539 3540 case Iop_Sqrt64Fx2: 3541 return unary64Fx2(mce, vatom); 3542 3543 case Iop_Sqrt64F0x2: 3544 return unary64F0x2(mce, vatom); 3545 3546 case Iop_Sqrt32Fx8: 3547 case Iop_RSqrt32Fx8: 3548 case Iop_Recip32Fx8: 3549 return unary32Fx8(mce, vatom); 3550 3551 case Iop_Sqrt64Fx4: 3552 return unary64Fx4(mce, vatom); 3553 3554 case Iop_Sqrt32Fx4: 3555 case Iop_RSqrt32Fx4: 3556 case Iop_Recip32Fx4: 3557 case Iop_I32UtoFx4: 3558 case Iop_I32StoFx4: 3559 case Iop_QFtoI32Ux4_RZ: 3560 case Iop_QFtoI32Sx4_RZ: 3561 case Iop_RoundF32x4_RM: 3562 case Iop_RoundF32x4_RP: 3563 case Iop_RoundF32x4_RN: 3564 case Iop_RoundF32x4_RZ: 3565 case Iop_Recip32x4: 3566 case Iop_Abs32Fx4: 3567 case Iop_Neg32Fx4: 3568 case Iop_Rsqrte32Fx4: 3569 return unary32Fx4(mce, vatom); 3570 3571 case Iop_I32UtoFx2: 3572 case Iop_I32StoFx2: 3573 case Iop_Recip32Fx2: 3574 case Iop_Recip32x2: 3575 case Iop_Abs32Fx2: 3576 case Iop_Neg32Fx2: 3577 case Iop_Rsqrte32Fx2: 3578 return unary32Fx2(mce, vatom); 3579 3580 case Iop_Sqrt32F0x4: 3581 case Iop_RSqrt32F0x4: 3582 case Iop_Recip32F0x4: 3583 return unary32F0x4(mce, vatom); 3584 3585 case Iop_32UtoV128: 3586 case Iop_64UtoV128: 3587 case Iop_Dup8x16: 3588 case Iop_Dup16x8: 3589 case Iop_Dup32x4: 3590 case Iop_Reverse16_8x16: 3591 case Iop_Reverse32_8x16: 3592 case Iop_Reverse32_16x8: 3593 case Iop_Reverse64_8x16: 3594 case Iop_Reverse64_16x8: 3595 case Iop_Reverse64_32x4: 3596 case Iop_V256toV128_1: case Iop_V256toV128_0: 3597 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 3598 3599 case Iop_F128HItoF64: /* F128 -> high half of F128 */ 3600 case Iop_D128HItoD64: /* D128 -> high half of D128 */ 3601 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom)); 3602 case Iop_F128LOtoF64: /* F128 -> low half of F128 */ 3603 case Iop_D128LOtoD64: /* D128 -> low half of D128 */ 3604 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom)); 3605 3606 case Iop_NegF128: 3607 case Iop_AbsF128: 3608 return mkPCastTo(mce, Ity_I128, vatom); 3609 3610 case Iop_I32StoF128: /* signed I32 -> F128 */ 3611 case Iop_I64StoF128: /* signed I64 -> F128 */ 3612 case Iop_F32toF128: /* F32 -> F128 */ 3613 case Iop_F64toF128: /* F64 -> F128 */ 3614 case Iop_I64StoD128: /* signed I64 -> D128 */ 3615 return mkPCastTo(mce, Ity_I128, vatom); 3616 3617 case Iop_F32toF64: 3618 case Iop_I32StoF64: 3619 case Iop_I32UtoF64: 3620 case Iop_NegF64: 3621 case Iop_AbsF64: 3622 case Iop_Est5FRSqrt: 3623 case Iop_RoundF64toF64_NEAREST: 3624 case Iop_RoundF64toF64_NegINF: 3625 case Iop_RoundF64toF64_PosINF: 3626 case Iop_RoundF64toF64_ZERO: 3627 case Iop_Clz64: 3628 case Iop_Ctz64: 3629 case Iop_D32toD64: 3630 case Iop_ExtractExpD64: /* D64 -> I64 */ 3631 case Iop_ExtractExpD128: /* D128 -> I64 */ 3632 case Iop_DPBtoBCD: 3633 case Iop_BCDtoDPB: 3634 return mkPCastTo(mce, Ity_I64, vatom); 3635 3636 case Iop_D64toD128: 3637 return mkPCastTo(mce, Ity_I128, vatom); 3638 3639 case Iop_Clz32: 3640 case Iop_Ctz32: 3641 case Iop_TruncF64asF32: 3642 case Iop_NegF32: 3643 case Iop_AbsF32: 3644 return mkPCastTo(mce, Ity_I32, vatom); 3645 3646 case Iop_1Uto64: 3647 case Iop_1Sto64: 3648 case Iop_8Uto64: 3649 case Iop_8Sto64: 3650 case Iop_16Uto64: 3651 case Iop_16Sto64: 3652 case Iop_32Sto64: 3653 case Iop_32Uto64: 3654 case Iop_V128to64: 3655 case Iop_V128HIto64: 3656 case Iop_128HIto64: 3657 case Iop_128to64: 3658 case Iop_Dup8x8: 3659 case Iop_Dup16x4: 3660 case Iop_Dup32x2: 3661 case Iop_Reverse16_8x8: 3662 case Iop_Reverse32_8x8: 3663 case Iop_Reverse32_16x4: 3664 case Iop_Reverse64_8x8: 3665 case Iop_Reverse64_16x4: 3666 case Iop_Reverse64_32x2: 3667 case Iop_V256to64_0: case Iop_V256to64_1: 3668 case Iop_V256to64_2: case Iop_V256to64_3: 3669 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 3670 3671 case Iop_I16StoF32: 3672 case Iop_64to32: 3673 case Iop_64HIto32: 3674 case Iop_1Uto32: 3675 case Iop_1Sto32: 3676 case Iop_8Uto32: 3677 case Iop_16Uto32: 3678 case Iop_16Sto32: 3679 case Iop_8Sto32: 3680 case Iop_V128to32: 3681 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 3682 3683 case Iop_8Sto16: 3684 case Iop_8Uto16: 3685 case Iop_32to16: 3686 case Iop_32HIto16: 3687 case Iop_64to16: 3688 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 3689 3690 case Iop_1Uto8: 3691 case Iop_1Sto8: 3692 case Iop_16to8: 3693 case Iop_16HIto8: 3694 case Iop_32to8: 3695 case Iop_64to8: 3696 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 3697 3698 case Iop_32to1: 3699 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 3700 3701 case Iop_64to1: 3702 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 3703 3704 case Iop_ReinterpF64asI64: 3705 case Iop_ReinterpI64asF64: 3706 case Iop_ReinterpI32asF32: 3707 case Iop_ReinterpF32asI32: 3708 case Iop_ReinterpI64asD64: 3709 case Iop_ReinterpD64asI64: 3710 case Iop_NotV256: 3711 case Iop_NotV128: 3712 case Iop_Not64: 3713 case Iop_Not32: 3714 case Iop_Not16: 3715 case Iop_Not8: 3716 case Iop_Not1: 3717 return vatom; 3718 3719 case Iop_CmpNEZ8x8: 3720 case Iop_Cnt8x8: 3721 case Iop_Clz8Sx8: 3722 case Iop_Cls8Sx8: 3723 case Iop_Abs8x8: 3724 return mkPCast8x8(mce, vatom); 3725 3726 case Iop_CmpNEZ8x16: 3727 case Iop_Cnt8x16: 3728 case Iop_Clz8Sx16: 3729 case Iop_Cls8Sx16: 3730 case Iop_Abs8x16: 3731 return mkPCast8x16(mce, vatom); 3732 3733 case Iop_CmpNEZ16x4: 3734 case Iop_Clz16Sx4: 3735 case Iop_Cls16Sx4: 3736 case Iop_Abs16x4: 3737 return mkPCast16x4(mce, vatom); 3738 3739 case Iop_CmpNEZ16x8: 3740 case Iop_Clz16Sx8: 3741 case Iop_Cls16Sx8: 3742 case Iop_Abs16x8: 3743 return mkPCast16x8(mce, vatom); 3744 3745 case Iop_CmpNEZ32x2: 3746 case Iop_Clz32Sx2: 3747 case Iop_Cls32Sx2: 3748 case Iop_FtoI32Ux2_RZ: 3749 case Iop_FtoI32Sx2_RZ: 3750 case Iop_Abs32x2: 3751 return mkPCast32x2(mce, vatom); 3752 3753 case Iop_CmpNEZ32x4: 3754 case Iop_Clz32Sx4: 3755 case Iop_Cls32Sx4: 3756 case Iop_FtoI32Ux4_RZ: 3757 case Iop_FtoI32Sx4_RZ: 3758 case Iop_Abs32x4: 3759 return mkPCast32x4(mce, vatom); 3760 3761 case Iop_CmpwNEZ64: 3762 return mkPCastTo(mce, Ity_I64, vatom); 3763 3764 case Iop_CmpNEZ64x2: 3765 return mkPCast64x2(mce, vatom); 3766 3767 case Iop_NarrowUn16to8x8: 3768 case Iop_NarrowUn32to16x4: 3769 case Iop_NarrowUn64to32x2: 3770 case Iop_QNarrowUn16Sto8Sx8: 3771 case Iop_QNarrowUn16Sto8Ux8: 3772 case Iop_QNarrowUn16Uto8Ux8: 3773 case Iop_QNarrowUn32Sto16Sx4: 3774 case Iop_QNarrowUn32Sto16Ux4: 3775 case Iop_QNarrowUn32Uto16Ux4: 3776 case Iop_QNarrowUn64Sto32Sx2: 3777 case Iop_QNarrowUn64Sto32Ux2: 3778 case Iop_QNarrowUn64Uto32Ux2: 3779 return vectorNarrowUnV128(mce, op, vatom); 3780 3781 case Iop_Widen8Sto16x8: 3782 case Iop_Widen8Uto16x8: 3783 case Iop_Widen16Sto32x4: 3784 case Iop_Widen16Uto32x4: 3785 case Iop_Widen32Sto64x2: 3786 case Iop_Widen32Uto64x2: 3787 return vectorWidenI64(mce, op, vatom); 3788 3789 case Iop_PwAddL32Ux2: 3790 case Iop_PwAddL32Sx2: 3791 return mkPCastTo(mce, Ity_I64, 3792 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 3793 3794 case Iop_PwAddL16Ux4: 3795 case Iop_PwAddL16Sx4: 3796 return mkPCast32x2(mce, 3797 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 3798 3799 case Iop_PwAddL8Ux8: 3800 case Iop_PwAddL8Sx8: 3801 return mkPCast16x4(mce, 3802 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 3803 3804 case Iop_PwAddL32Ux4: 3805 case Iop_PwAddL32Sx4: 3806 return mkPCast64x2(mce, 3807 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 3808 3809 case Iop_PwAddL16Ux8: 3810 case Iop_PwAddL16Sx8: 3811 return mkPCast32x4(mce, 3812 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 3813 3814 case Iop_PwAddL8Ux16: 3815 case Iop_PwAddL8Sx16: 3816 return mkPCast16x8(mce, 3817 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 3818 3819 case Iop_I64UtoF32: 3820 default: 3821 ppIROp(op); 3822 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 3823 } 3824 } 3825 3826 3827 /* Worker function; do not call directly. */ 3828 static 3829 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 3830 IREndness end, IRType ty, 3831 IRAtom* addr, UInt bias ) 3832 { 3833 void* helper; 3834 Char* hname; 3835 IRDirty* di; 3836 IRTemp datavbits; 3837 IRAtom* addrAct; 3838 3839 tl_assert(isOriginalAtom(mce,addr)); 3840 tl_assert(end == Iend_LE || end == Iend_BE); 3841 3842 /* First, emit a definedness test for the address. This also sets 3843 the address (shadow) to 'defined' following the test. */ 3844 complainIfUndefined( mce, addr, NULL ); 3845 3846 /* Now cook up a call to the relevant helper function, to read the 3847 data V bits from shadow memory. */ 3848 ty = shadowTypeV(ty); 3849 3850 if (end == Iend_LE) { 3851 switch (ty) { 3852 case Ity_I64: helper = &MC_(helperc_LOADV64le); 3853 hname = "MC_(helperc_LOADV64le)"; 3854 break; 3855 case Ity_I32: helper = &MC_(helperc_LOADV32le); 3856 hname = "MC_(helperc_LOADV32le)"; 3857 break; 3858 case Ity_I16: helper = &MC_(helperc_LOADV16le); 3859 hname = "MC_(helperc_LOADV16le)"; 3860 break; 3861 case Ity_I8: helper = &MC_(helperc_LOADV8); 3862 hname = "MC_(helperc_LOADV8)"; 3863 break; 3864 default: ppIRType(ty); 3865 VG_(tool_panic)("memcheck:do_shadow_Load(LE)"); 3866 } 3867 } else { 3868 switch (ty) { 3869 case Ity_I64: helper = &MC_(helperc_LOADV64be); 3870 hname = "MC_(helperc_LOADV64be)"; 3871 break; 3872 case Ity_I32: helper = &MC_(helperc_LOADV32be); 3873 hname = "MC_(helperc_LOADV32be)"; 3874 break; 3875 case Ity_I16: helper = &MC_(helperc_LOADV16be); 3876 hname = "MC_(helperc_LOADV16be)"; 3877 break; 3878 case Ity_I8: helper = &MC_(helperc_LOADV8); 3879 hname = "MC_(helperc_LOADV8)"; 3880 break; 3881 default: ppIRType(ty); 3882 VG_(tool_panic)("memcheck:do_shadow_Load(BE)"); 3883 } 3884 } 3885 3886 /* Generate the actual address into addrAct. */ 3887 if (bias == 0) { 3888 addrAct = addr; 3889 } else { 3890 IROp mkAdd; 3891 IRAtom* eBias; 3892 IRType tyAddr = mce->hWordTy; 3893 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 3894 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 3895 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 3896 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 3897 } 3898 3899 /* We need to have a place to park the V bits we're just about to 3900 read. */ 3901 datavbits = newTemp(mce, ty, VSh); 3902 di = unsafeIRDirty_1_N( datavbits, 3903 1/*regparms*/, 3904 hname, VG_(fnptr_to_fnentry)( helper ), 3905 mkIRExprVec_1( addrAct )); 3906 setHelperAnns( mce, di ); 3907 stmt( 'V', mce, IRStmt_Dirty(di) ); 3908 3909 return mkexpr(datavbits); 3910 } 3911 3912 3913 static 3914 IRAtom* expr2vbits_Load ( MCEnv* mce, 3915 IREndness end, IRType ty, 3916 IRAtom* addr, UInt bias ) 3917 { 3918 tl_assert(end == Iend_LE || end == Iend_BE); 3919 switch (shadowTypeV(ty)) { 3920 case Ity_I8: 3921 case Ity_I16: 3922 case Ity_I32: 3923 case Ity_I64: 3924 return expr2vbits_Load_WRK(mce, end, ty, addr, bias); 3925 case Ity_V128: { 3926 IRAtom *v64hi, *v64lo; 3927 if (end == Iend_LE) { 3928 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 3929 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3930 } else { 3931 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 3932 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3933 } 3934 return assignNew( 'V', mce, 3935 Ity_V128, 3936 binop(Iop_64HLtoV128, v64hi, v64lo)); 3937 } 3938 case Ity_V256: { 3939 /* V256-bit case -- phrased in terms of 64 bit units (Qs), 3940 with Q3 being the most significant lane. */ 3941 if (end == Iend_BE) goto unhandled; 3942 IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0); 3943 IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8); 3944 IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16); 3945 IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24); 3946 return assignNew( 'V', mce, 3947 Ity_V256, 3948 IRExpr_Qop(Iop_64x4toV256, 3949 v64Q3, v64Q2, v64Q1, v64Q0)); 3950 } 3951 unhandled: 3952 default: 3953 VG_(tool_panic)("expr2vbits_Load"); 3954 } 3955 } 3956 3957 3958 /* If there is no guard expression or the guard is always TRUE this function 3959 behaves like expr2vbits_Load. If the guard is not true at runtime, an 3960 all-bits-defined bit pattern will be returned. 3961 It is assumed that definedness of GUARD has already been checked at the call 3962 site. */ 3963 static 3964 IRAtom* expr2vbits_guarded_Load ( MCEnv* mce, 3965 IREndness end, IRType ty, 3966 IRAtom* addr, UInt bias, IRAtom *guard ) 3967 { 3968 if (guard) { 3969 IRAtom *cond, *iffalse, *iftrue; 3970 3971 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard)); 3972 iftrue = assignNew('V', mce, ty, 3973 expr2vbits_Load(mce, end, ty, addr, bias)); 3974 iffalse = assignNew('V', mce, ty, definedOfType(ty)); 3975 3976 return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue)); 3977 } 3978 3979 /* No guard expression or unconditional load */ 3980 return expr2vbits_Load(mce, end, ty, addr, bias); 3981 } 3982 3983 3984 static 3985 IRAtom* expr2vbits_Mux0X ( MCEnv* mce, 3986 IRAtom* cond, IRAtom* expr0, IRAtom* exprX ) 3987 { 3988 IRAtom *vbitsC, *vbits0, *vbitsX; 3989 IRType ty; 3990 /* Given Mux0X(cond,expr0,exprX), generate 3991 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#) 3992 That is, steer the V bits like the originals, but trash the 3993 result if the steering value is undefined. This gives 3994 lazy propagation. */ 3995 tl_assert(isOriginalAtom(mce, cond)); 3996 tl_assert(isOriginalAtom(mce, expr0)); 3997 tl_assert(isOriginalAtom(mce, exprX)); 3998 3999 vbitsC = expr2vbits(mce, cond); 4000 vbits0 = expr2vbits(mce, expr0); 4001 vbitsX = expr2vbits(mce, exprX); 4002 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 4003 4004 return 4005 mkUifU(mce, ty, assignNew('V', mce, ty, 4006 IRExpr_Mux0X(cond, vbits0, vbitsX)), 4007 mkPCastTo(mce, ty, vbitsC) ); 4008 } 4009 4010 /* --------- This is the main expression-handling function. --------- */ 4011 4012 static 4013 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 4014 { 4015 switch (e->tag) { 4016 4017 case Iex_Get: 4018 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 4019 4020 case Iex_GetI: 4021 return shadow_GETI( mce, e->Iex.GetI.descr, 4022 e->Iex.GetI.ix, e->Iex.GetI.bias ); 4023 4024 case Iex_RdTmp: 4025 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 4026 4027 case Iex_Const: 4028 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 4029 4030 case Iex_Qop: 4031 return expr2vbits_Qop( 4032 mce, 4033 e->Iex.Qop.details->op, 4034 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2, 4035 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4 4036 ); 4037 4038 case Iex_Triop: 4039 return expr2vbits_Triop( 4040 mce, 4041 e->Iex.Triop.details->op, 4042 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2, 4043 e->Iex.Triop.details->arg3 4044 ); 4045 4046 case Iex_Binop: 4047 return expr2vbits_Binop( 4048 mce, 4049 e->Iex.Binop.op, 4050 e->Iex.Binop.arg1, e->Iex.Binop.arg2 4051 ); 4052 4053 case Iex_Unop: 4054 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 4055 4056 case Iex_Load: 4057 return expr2vbits_Load( mce, e->Iex.Load.end, 4058 e->Iex.Load.ty, 4059 e->Iex.Load.addr, 0/*addr bias*/ ); 4060 4061 case Iex_CCall: 4062 return mkLazyN( mce, e->Iex.CCall.args, 4063 e->Iex.CCall.retty, 4064 e->Iex.CCall.cee ); 4065 4066 case Iex_Mux0X: 4067 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0, 4068 e->Iex.Mux0X.exprX); 4069 4070 default: 4071 VG_(printf)("\n"); 4072 ppIRExpr(e); 4073 VG_(printf)("\n"); 4074 VG_(tool_panic)("memcheck: expr2vbits"); 4075 } 4076 } 4077 4078 /*------------------------------------------------------------*/ 4079 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 4080 /*------------------------------------------------------------*/ 4081 4082 /* Widen a value to the host word size. */ 4083 4084 static 4085 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 4086 { 4087 IRType ty, tyH; 4088 4089 /* vatom is vbits-value and as such can only have a shadow type. */ 4090 tl_assert(isShadowAtom(mce,vatom)); 4091 4092 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 4093 tyH = mce->hWordTy; 4094 4095 if (tyH == Ity_I32) { 4096 switch (ty) { 4097 case Ity_I32: 4098 return vatom; 4099 case Ity_I16: 4100 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 4101 case Ity_I8: 4102 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 4103 default: 4104 goto unhandled; 4105 } 4106 } else 4107 if (tyH == Ity_I64) { 4108 switch (ty) { 4109 case Ity_I32: 4110 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 4111 case Ity_I16: 4112 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4113 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 4114 case Ity_I8: 4115 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4116 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 4117 default: 4118 goto unhandled; 4119 } 4120 } else { 4121 goto unhandled; 4122 } 4123 unhandled: 4124 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 4125 VG_(tool_panic)("zwidenToHostWord"); 4126 } 4127 4128 4129 /* Generate a shadow store. addr is always the original address atom. 4130 You can pass in either originals or V-bits for the data atom, but 4131 obviously not both. guard :: Ity_I1 controls whether the store 4132 really happens; NULL means it unconditionally does. Note that 4133 guard itself is not checked for definedness; the caller of this 4134 function must do that if necessary. */ 4135 4136 static 4137 void do_shadow_Store ( MCEnv* mce, 4138 IREndness end, 4139 IRAtom* addr, UInt bias, 4140 IRAtom* data, IRAtom* vdata, 4141 IRAtom* guard ) 4142 { 4143 IROp mkAdd; 4144 IRType ty, tyAddr; 4145 void* helper = NULL; 4146 Char* hname = NULL; 4147 IRConst* c; 4148 4149 tyAddr = mce->hWordTy; 4150 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 4151 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 4152 tl_assert( end == Iend_LE || end == Iend_BE ); 4153 4154 if (data) { 4155 tl_assert(!vdata); 4156 tl_assert(isOriginalAtom(mce, data)); 4157 tl_assert(bias == 0); 4158 vdata = expr2vbits( mce, data ); 4159 } else { 4160 tl_assert(vdata); 4161 } 4162 4163 tl_assert(isOriginalAtom(mce,addr)); 4164 tl_assert(isShadowAtom(mce,vdata)); 4165 4166 if (guard) { 4167 tl_assert(isOriginalAtom(mce, guard)); 4168 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 4169 } 4170 4171 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 4172 4173 // If we're not doing undefined value checking, pretend that this value 4174 // is "all valid". That lets Vex's optimiser remove some of the V bit 4175 // shadow computation ops that precede it. 4176 if (MC_(clo_mc_level) == 1) { 4177 switch (ty) { 4178 case Ity_V256: // V256 weirdness -- used four times 4179 c = IRConst_V256(V_BITS32_DEFINED); break; 4180 case Ity_V128: // V128 weirdness -- used twice 4181 c = IRConst_V128(V_BITS16_DEFINED); break; 4182 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 4183 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 4184 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 4185 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 4186 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4187 } 4188 vdata = IRExpr_Const( c ); 4189 } 4190 4191 /* First, emit a definedness test for the address. This also sets 4192 the address (shadow) to 'defined' following the test. */ 4193 complainIfUndefined( mce, addr, guard ); 4194 4195 /* Now decide which helper function to call to write the data V 4196 bits into shadow memory. */ 4197 if (end == Iend_LE) { 4198 switch (ty) { 4199 case Ity_V256: /* we'll use the helper four times */ 4200 case Ity_V128: /* we'll use the helper twice */ 4201 case Ity_I64: helper = &MC_(helperc_STOREV64le); 4202 hname = "MC_(helperc_STOREV64le)"; 4203 break; 4204 case Ity_I32: helper = &MC_(helperc_STOREV32le); 4205 hname = "MC_(helperc_STOREV32le)"; 4206 break; 4207 case Ity_I16: helper = &MC_(helperc_STOREV16le); 4208 hname = "MC_(helperc_STOREV16le)"; 4209 break; 4210 case Ity_I8: helper = &MC_(helperc_STOREV8); 4211 hname = "MC_(helperc_STOREV8)"; 4212 break; 4213 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4214 } 4215 } else { 4216 switch (ty) { 4217 case Ity_V128: /* we'll use the helper twice */ 4218 case Ity_I64: helper = &MC_(helperc_STOREV64be); 4219 hname = "MC_(helperc_STOREV64be)"; 4220 break; 4221 case Ity_I32: helper = &MC_(helperc_STOREV32be); 4222 hname = "MC_(helperc_STOREV32be)"; 4223 break; 4224 case Ity_I16: helper = &MC_(helperc_STOREV16be); 4225 hname = "MC_(helperc_STOREV16be)"; 4226 break; 4227 case Ity_I8: helper = &MC_(helperc_STOREV8); 4228 hname = "MC_(helperc_STOREV8)"; 4229 break; 4230 /* Note, no V256 case here, because no big-endian target that 4231 we support, has 256 vectors. */ 4232 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 4233 } 4234 } 4235 4236 if (UNLIKELY(ty == Ity_V256)) { 4237 4238 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with 4239 Q3 being the most significant lane. */ 4240 /* These are the offsets of the Qs in memory. */ 4241 Int offQ0, offQ1, offQ2, offQ3; 4242 4243 /* Various bits for constructing the 4 lane helper calls */ 4244 IRDirty *diQ0, *diQ1, *diQ2, *diQ3; 4245 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3; 4246 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3; 4247 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3; 4248 4249 if (end == Iend_LE) { 4250 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24; 4251 } else { 4252 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24; 4253 } 4254 4255 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0); 4256 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) ); 4257 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata)); 4258 diQ0 = unsafeIRDirty_0_N( 4259 1/*regparms*/, 4260 hname, VG_(fnptr_to_fnentry)( helper ), 4261 mkIRExprVec_2( addrQ0, vdataQ0 ) 4262 ); 4263 4264 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1); 4265 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) ); 4266 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata)); 4267 diQ1 = unsafeIRDirty_0_N( 4268 1/*regparms*/, 4269 hname, VG_(fnptr_to_fnentry)( helper ), 4270 mkIRExprVec_2( addrQ1, vdataQ1 ) 4271 ); 4272 4273 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2); 4274 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) ); 4275 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata)); 4276 diQ2 = unsafeIRDirty_0_N( 4277 1/*regparms*/, 4278 hname, VG_(fnptr_to_fnentry)( helper ), 4279 mkIRExprVec_2( addrQ2, vdataQ2 ) 4280 ); 4281 4282 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3); 4283 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) ); 4284 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata)); 4285 diQ3 = unsafeIRDirty_0_N( 4286 1/*regparms*/, 4287 hname, VG_(fnptr_to_fnentry)( helper ), 4288 mkIRExprVec_2( addrQ3, vdataQ3 ) 4289 ); 4290 4291 if (guard) 4292 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard; 4293 4294 setHelperAnns( mce, diQ0 ); 4295 setHelperAnns( mce, diQ1 ); 4296 setHelperAnns( mce, diQ2 ); 4297 setHelperAnns( mce, diQ3 ); 4298 stmt( 'V', mce, IRStmt_Dirty(diQ0) ); 4299 stmt( 'V', mce, IRStmt_Dirty(diQ1) ); 4300 stmt( 'V', mce, IRStmt_Dirty(diQ2) ); 4301 stmt( 'V', mce, IRStmt_Dirty(diQ3) ); 4302 4303 } 4304 else if (UNLIKELY(ty == Ity_V128)) { 4305 4306 /* V128-bit case */ 4307 /* See comment in next clause re 64-bit regparms */ 4308 /* also, need to be careful about endianness */ 4309 4310 Int offLo64, offHi64; 4311 IRDirty *diLo64, *diHi64; 4312 IRAtom *addrLo64, *addrHi64; 4313 IRAtom *vdataLo64, *vdataHi64; 4314 IRAtom *eBiasLo64, *eBiasHi64; 4315 4316 if (end == Iend_LE) { 4317 offLo64 = 0; 4318 offHi64 = 8; 4319 } else { 4320 offLo64 = 8; 4321 offHi64 = 0; 4322 } 4323 4324 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 4325 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 4326 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 4327 diLo64 = unsafeIRDirty_0_N( 4328 1/*regparms*/, 4329 hname, VG_(fnptr_to_fnentry)( helper ), 4330 mkIRExprVec_2( addrLo64, vdataLo64 ) 4331 ); 4332 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 4333 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 4334 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 4335 diHi64 = unsafeIRDirty_0_N( 4336 1/*regparms*/, 4337 hname, VG_(fnptr_to_fnentry)( helper ), 4338 mkIRExprVec_2( addrHi64, vdataHi64 ) 4339 ); 4340 if (guard) diLo64->guard = guard; 4341 if (guard) diHi64->guard = guard; 4342 setHelperAnns( mce, diLo64 ); 4343 setHelperAnns( mce, diHi64 ); 4344 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 4345 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 4346 4347 } else { 4348 4349 IRDirty *di; 4350 IRAtom *addrAct; 4351 4352 /* 8/16/32/64-bit cases */ 4353 /* Generate the actual address into addrAct. */ 4354 if (bias == 0) { 4355 addrAct = addr; 4356 } else { 4357 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 4358 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 4359 } 4360 4361 if (ty == Ity_I64) { 4362 /* We can't do this with regparm 2 on 32-bit platforms, since 4363 the back ends aren't clever enough to handle 64-bit 4364 regparm args. Therefore be different. */ 4365 di = unsafeIRDirty_0_N( 4366 1/*regparms*/, 4367 hname, VG_(fnptr_to_fnentry)( helper ), 4368 mkIRExprVec_2( addrAct, vdata ) 4369 ); 4370 } else { 4371 di = unsafeIRDirty_0_N( 4372 2/*regparms*/, 4373 hname, VG_(fnptr_to_fnentry)( helper ), 4374 mkIRExprVec_2( addrAct, 4375 zwidenToHostWord( mce, vdata )) 4376 ); 4377 } 4378 if (guard) di->guard = guard; 4379 setHelperAnns( mce, di ); 4380 stmt( 'V', mce, IRStmt_Dirty(di) ); 4381 } 4382 4383 } 4384 4385 4386 /* Do lazy pessimistic propagation through a dirty helper call, by 4387 looking at the annotations on it. This is the most complex part of 4388 Memcheck. */ 4389 4390 static IRType szToITy ( Int n ) 4391 { 4392 switch (n) { 4393 case 1: return Ity_I8; 4394 case 2: return Ity_I16; 4395 case 4: return Ity_I32; 4396 case 8: return Ity_I64; 4397 default: VG_(tool_panic)("szToITy(memcheck)"); 4398 } 4399 } 4400 4401 static 4402 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 4403 { 4404 Int i, k, n, toDo, gSz, gOff; 4405 IRAtom *src, *here, *curr; 4406 IRType tySrc, tyDst; 4407 IRTemp dst; 4408 IREndness end; 4409 4410 /* What's the native endianness? We need to know this. */ 4411 # if defined(VG_BIGENDIAN) 4412 end = Iend_BE; 4413 # elif defined(VG_LITTLEENDIAN) 4414 end = Iend_LE; 4415 # else 4416 # error "Unknown endianness" 4417 # endif 4418 4419 /* First check the guard. */ 4420 complainIfUndefined(mce, d->guard, NULL); 4421 4422 /* Now round up all inputs and PCast over them. */ 4423 curr = definedOfType(Ity_I32); 4424 4425 /* Inputs: unmasked args 4426 Note: arguments are evaluated REGARDLESS of the guard expression */ 4427 for (i = 0; d->args[i]; i++) { 4428 if (d->cee->mcx_mask & (1<<i)) { 4429 /* ignore this arg */ 4430 } else { 4431 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) ); 4432 curr = mkUifU32(mce, here, curr); 4433 } 4434 } 4435 4436 /* Inputs: guest state that we read. */ 4437 for (i = 0; i < d->nFxState; i++) { 4438 tl_assert(d->fxState[i].fx != Ifx_None); 4439 if (d->fxState[i].fx == Ifx_Write) 4440 continue; 4441 4442 /* Enumerate the described state segments */ 4443 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 4444 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 4445 gSz = d->fxState[i].size; 4446 4447 /* Ignore any sections marked as 'always defined'. */ 4448 if (isAlwaysDefd(mce, gOff, gSz)) { 4449 if (0) 4450 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 4451 gOff, gSz); 4452 continue; 4453 } 4454 4455 /* This state element is read or modified. So we need to 4456 consider it. If larger than 8 bytes, deal with it in 4457 8-byte chunks. */ 4458 while (True) { 4459 tl_assert(gSz >= 0); 4460 if (gSz == 0) break; 4461 n = gSz <= 8 ? gSz : 8; 4462 /* update 'curr' with UifU of the state slice 4463 gOff .. gOff+n-1 */ 4464 tySrc = szToITy( n ); 4465 4466 /* Observe the guard expression. If it is false use an 4467 all-bits-defined bit pattern */ 4468 IRAtom *cond, *iffalse, *iftrue; 4469 4470 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard)); 4471 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc)); 4472 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc)); 4473 src = assignNew('V', mce, tySrc, 4474 IRExpr_Mux0X(cond, iffalse, iftrue)); 4475 4476 here = mkPCastTo( mce, Ity_I32, src ); 4477 curr = mkUifU32(mce, here, curr); 4478 gSz -= n; 4479 gOff += n; 4480 } 4481 } 4482 } 4483 4484 /* Inputs: memory. First set up some info needed regardless of 4485 whether we're doing reads or writes. */ 4486 4487 if (d->mFx != Ifx_None) { 4488 /* Because we may do multiple shadow loads/stores from the same 4489 base address, it's best to do a single test of its 4490 definedness right now. Post-instrumentation optimisation 4491 should remove all but this test. */ 4492 IRType tyAddr; 4493 tl_assert(d->mAddr); 4494 complainIfUndefined(mce, d->mAddr, d->guard); 4495 4496 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 4497 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 4498 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 4499 } 4500 4501 /* Deal with memory inputs (reads or modifies) */ 4502 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 4503 toDo = d->mSize; 4504 /* chew off 32-bit chunks. We don't care about the endianness 4505 since it's all going to be condensed down to a single bit, 4506 but nevertheless choose an endianness which is hopefully 4507 native to the platform. */ 4508 while (toDo >= 4) { 4509 here = mkPCastTo( 4510 mce, Ity_I32, 4511 expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr, 4512 d->mSize - toDo, d->guard ) 4513 ); 4514 curr = mkUifU32(mce, here, curr); 4515 toDo -= 4; 4516 } 4517 /* chew off 16-bit chunks */ 4518 while (toDo >= 2) { 4519 here = mkPCastTo( 4520 mce, Ity_I32, 4521 expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr, 4522 d->mSize - toDo, d->guard ) 4523 ); 4524 curr = mkUifU32(mce, here, curr); 4525 toDo -= 2; 4526 } 4527 /* chew off the remaining 8-bit chunk, if any */ 4528 if (toDo == 1) { 4529 here = mkPCastTo( 4530 mce, Ity_I32, 4531 expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr, 4532 d->mSize - toDo, d->guard ) 4533 ); 4534 curr = mkUifU32(mce, here, curr); 4535 toDo -= 1; 4536 } 4537 tl_assert(toDo == 0); 4538 } 4539 4540 /* Whew! So curr is a 32-bit V-value summarising pessimistically 4541 all the inputs to the helper. Now we need to re-distribute the 4542 results to all destinations. */ 4543 4544 /* Outputs: the destination temporary, if there is one. */ 4545 if (d->tmp != IRTemp_INVALID) { 4546 dst = findShadowTmpV(mce, d->tmp); 4547 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 4548 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 4549 } 4550 4551 /* Outputs: guest state that we write or modify. */ 4552 for (i = 0; i < d->nFxState; i++) { 4553 tl_assert(d->fxState[i].fx != Ifx_None); 4554 if (d->fxState[i].fx == Ifx_Read) 4555 continue; 4556 4557 /* Enumerate the described state segments */ 4558 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 4559 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 4560 gSz = d->fxState[i].size; 4561 4562 /* Ignore any sections marked as 'always defined'. */ 4563 if (isAlwaysDefd(mce, gOff, gSz)) 4564 continue; 4565 4566 /* This state element is written or modified. So we need to 4567 consider it. If larger than 8 bytes, deal with it in 4568 8-byte chunks. */ 4569 while (True) { 4570 tl_assert(gSz >= 0); 4571 if (gSz == 0) break; 4572 n = gSz <= 8 ? gSz : 8; 4573 /* Write suitably-casted 'curr' to the state slice 4574 gOff .. gOff+n-1 */ 4575 tyDst = szToITy( n ); 4576 do_shadow_PUT( mce, gOff, 4577 NULL, /* original atom */ 4578 mkPCastTo( mce, tyDst, curr ), d->guard ); 4579 gSz -= n; 4580 gOff += n; 4581 } 4582 } 4583 } 4584 4585 /* Outputs: memory that we write or modify. Same comments about 4586 endianness as above apply. */ 4587 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 4588 toDo = d->mSize; 4589 /* chew off 32-bit chunks */ 4590 while (toDo >= 4) { 4591 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4592 NULL, /* original data */ 4593 mkPCastTo( mce, Ity_I32, curr ), 4594 d->guard ); 4595 toDo -= 4; 4596 } 4597 /* chew off 16-bit chunks */ 4598 while (toDo >= 2) { 4599 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4600 NULL, /* original data */ 4601 mkPCastTo( mce, Ity_I16, curr ), 4602 d->guard ); 4603 toDo -= 2; 4604 } 4605 /* chew off the remaining 8-bit chunk, if any */ 4606 if (toDo == 1) { 4607 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 4608 NULL, /* original data */ 4609 mkPCastTo( mce, Ity_I8, curr ), 4610 d->guard ); 4611 toDo -= 1; 4612 } 4613 tl_assert(toDo == 0); 4614 } 4615 4616 } 4617 4618 4619 /* We have an ABI hint telling us that [base .. base+len-1] is to 4620 become undefined ("writable"). Generate code to call a helper to 4621 notify the A/V bit machinery of this fact. 4622 4623 We call 4624 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 4625 Addr nia ); 4626 */ 4627 static 4628 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 4629 { 4630 IRDirty* di; 4631 /* Minor optimisation: if not doing origin tracking, ignore the 4632 supplied nia and pass zero instead. This is on the basis that 4633 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can 4634 almost always generate a shorter instruction to put zero into a 4635 register than any other value. */ 4636 if (MC_(clo_mc_level) < 3) 4637 nia = mkIRExpr_HWord(0); 4638 4639 di = unsafeIRDirty_0_N( 4640 0/*regparms*/, 4641 "MC_(helperc_MAKE_STACK_UNINIT)", 4642 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ), 4643 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 4644 ); 4645 stmt( 'V', mce, IRStmt_Dirty(di) ); 4646 } 4647 4648 4649 /* ------ Dealing with IRCAS (big and complex) ------ */ 4650 4651 /* FWDS */ 4652 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 4653 IRAtom* baseaddr, Int offset ); 4654 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 4655 static void gen_store_b ( MCEnv* mce, Int szB, 4656 IRAtom* baseaddr, Int offset, IRAtom* dataB, 4657 IRAtom* guard ); 4658 4659 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 4660 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 4661 4662 4663 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 4664 IRExpr.Consts, else this asserts. If they are both Consts, it 4665 doesn't do anything. So that just leaves the RdTmp case. 4666 4667 In which case: this assigns the shadow value SHADOW to the IR 4668 shadow temporary associated with ORIG. That is, ORIG, being an 4669 original temporary, will have a shadow temporary associated with 4670 it. However, in the case envisaged here, there will so far have 4671 been no IR emitted to actually write a shadow value into that 4672 temporary. What this routine does is to (emit IR to) copy the 4673 value in SHADOW into said temporary, so that after this call, 4674 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 4675 value in SHADOW. 4676 4677 Point is to allow callers to compute "by hand" a shadow value for 4678 ORIG, and force it to be associated with ORIG. 4679 4680 How do we know that that shadow associated with ORIG has not so far 4681 been assigned to? Well, we don't per se know that, but supposing 4682 it had. Then this routine would create a second assignment to it, 4683 and later the IR sanity checker would barf. But that never 4684 happens. QED. 4685 */ 4686 static void bind_shadow_tmp_to_orig ( UChar how, 4687 MCEnv* mce, 4688 IRAtom* orig, IRAtom* shadow ) 4689 { 4690 tl_assert(isOriginalAtom(mce, orig)); 4691 tl_assert(isShadowAtom(mce, shadow)); 4692 switch (orig->tag) { 4693 case Iex_Const: 4694 tl_assert(shadow->tag == Iex_Const); 4695 break; 4696 case Iex_RdTmp: 4697 tl_assert(shadow->tag == Iex_RdTmp); 4698 if (how == 'V') { 4699 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 4700 shadow); 4701 } else { 4702 tl_assert(how == 'B'); 4703 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 4704 shadow); 4705 } 4706 break; 4707 default: 4708 tl_assert(0); 4709 } 4710 } 4711 4712 4713 static 4714 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 4715 { 4716 /* Scheme is (both single- and double- cases): 4717 4718 1. fetch data#,dataB (the proposed new value) 4719 4720 2. fetch expd#,expdB (what we expect to see at the address) 4721 4722 3. check definedness of address 4723 4724 4. load old#,oldB from shadow memory; this also checks 4725 addressibility of the address 4726 4727 5. the CAS itself 4728 4729 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 4730 4731 7. if "expected == old" (as computed by (6)) 4732 store data#,dataB to shadow memory 4733 4734 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 4735 'data' but 7 stores 'data#'. Hence it is possible for the 4736 shadow data to be incorrectly checked and/or updated: 4737 4738 * 7 is at least gated correctly, since the 'expected == old' 4739 condition is derived from outputs of 5. However, the shadow 4740 write could happen too late: imagine after 5 we are 4741 descheduled, a different thread runs, writes a different 4742 (shadow) value at the address, and then we resume, hence 4743 overwriting the shadow value written by the other thread. 4744 4745 Because the original memory access is atomic, there's no way to 4746 make both the original and shadow accesses into a single atomic 4747 thing, hence this is unavoidable. 4748 4749 At least as Valgrind stands, I don't think it's a problem, since 4750 we're single threaded *and* we guarantee that there are no 4751 context switches during the execution of any specific superblock 4752 -- context switches can only happen at superblock boundaries. 4753 4754 If Valgrind ever becomes MT in the future, then it might be more 4755 of a problem. A possible kludge would be to artificially 4756 associate with the location, a lock, which we must acquire and 4757 release around the transaction as a whole. Hmm, that probably 4758 would't work properly since it only guards us against other 4759 threads doing CASs on the same location, not against other 4760 threads doing normal reads and writes. 4761 4762 ------------------------------------------------------------ 4763 4764 COMMENT_ON_CasCmpEQ: 4765 4766 Note two things. Firstly, in the sequence above, we compute 4767 "expected == old", but we don't check definedness of it. Why 4768 not? Also, the x86 and amd64 front ends use 4769 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent 4770 determination (expected == old ?) for themselves, and we also 4771 don't check definedness for those primops; we just say that the 4772 result is defined. Why? Details follow. 4773 4774 x86/amd64 contains various forms of locked insns: 4775 * lock prefix before all basic arithmetic insn; 4776 eg lock xorl %reg1,(%reg2) 4777 * atomic exchange reg-mem 4778 * compare-and-swaps 4779 4780 Rather than attempt to represent them all, which would be a 4781 royal PITA, I used a result from Maurice Herlihy 4782 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 4783 demonstrates that compare-and-swap is a primitive more general 4784 than the other two, and so can be used to represent all of them. 4785 So the translation scheme for (eg) lock incl (%reg) is as 4786 follows: 4787 4788 again: 4789 old = * %reg 4790 new = old + 1 4791 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 4792 4793 The "atomically" is the CAS bit. The scheme is always the same: 4794 get old value from memory, compute new value, atomically stuff 4795 new value back in memory iff the old value has not changed (iow, 4796 no other thread modified it in the meantime). If it has changed 4797 then we've been out-raced and we have to start over. 4798 4799 Now that's all very neat, but it has the bad side effect of 4800 introducing an explicit equality test into the translation. 4801 Consider the behaviour of said code on a memory location which 4802 is uninitialised. We will wind up doing a comparison on 4803 uninitialised data, and mc duly complains. 4804 4805 What's difficult about this is, the common case is that the 4806 location is uncontended, and so we're usually comparing the same 4807 value (* %reg) with itself. So we shouldn't complain even if it 4808 is undefined. But mc doesn't know that. 4809 4810 My solution is to mark the == in the IR specially, so as to tell 4811 mc that it almost certainly compares a value with itself, and we 4812 should just regard the result as always defined. Rather than 4813 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 4814 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 4815 4816 So there's always the question of, can this give a false 4817 negative? eg, imagine that initially, * %reg is defined; and we 4818 read that; but then in the gap between the read and the CAS, a 4819 different thread writes an undefined (and different) value at 4820 the location. Then the CAS in this thread will fail and we will 4821 go back to "again:", but without knowing that the trip back 4822 there was based on an undefined comparison. No matter; at least 4823 the other thread won the race and the location is correctly 4824 marked as undefined. What if it wrote an uninitialised version 4825 of the same value that was there originally, though? 4826 4827 etc etc. Seems like there's a small corner case in which we 4828 might lose the fact that something's defined -- we're out-raced 4829 in between the "old = * reg" and the "atomically {", _and_ the 4830 other thread is writing in an undefined version of what's 4831 already there. Well, that seems pretty unlikely. 4832 4833 --- 4834 4835 If we ever need to reinstate it .. code which generates a 4836 definedness test for "expected == old" was removed at r10432 of 4837 this file. 4838 */ 4839 if (cas->oldHi == IRTemp_INVALID) { 4840 do_shadow_CAS_single( mce, cas ); 4841 } else { 4842 do_shadow_CAS_double( mce, cas ); 4843 } 4844 } 4845 4846 4847 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 4848 { 4849 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4850 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4851 IRAtom *voldLo = NULL, *boldLo = NULL; 4852 IRAtom *expd_eq_old = NULL; 4853 IROp opCasCmpEQ; 4854 Int elemSzB; 4855 IRType elemTy; 4856 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4857 4858 /* single CAS */ 4859 tl_assert(cas->oldHi == IRTemp_INVALID); 4860 tl_assert(cas->expdHi == NULL); 4861 tl_assert(cas->dataHi == NULL); 4862 4863 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4864 switch (elemTy) { 4865 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 4866 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 4867 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 4868 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 4869 default: tl_assert(0); /* IR defn disallows any other types */ 4870 } 4871 4872 /* 1. fetch data# (the proposed new value) */ 4873 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4874 vdataLo 4875 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4876 tl_assert(isShadowAtom(mce, vdataLo)); 4877 if (otrak) { 4878 bdataLo 4879 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4880 tl_assert(isShadowAtom(mce, bdataLo)); 4881 } 4882 4883 /* 2. fetch expected# (what we expect to see at the address) */ 4884 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4885 vexpdLo 4886 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 4887 tl_assert(isShadowAtom(mce, vexpdLo)); 4888 if (otrak) { 4889 bexpdLo 4890 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 4891 tl_assert(isShadowAtom(mce, bexpdLo)); 4892 } 4893 4894 /* 3. check definedness of address */ 4895 /* 4. fetch old# from shadow memory; this also checks 4896 addressibility of the address */ 4897 voldLo 4898 = assignNew( 4899 'V', mce, elemTy, 4900 expr2vbits_Load( 4901 mce, 4902 cas->end, elemTy, cas->addr, 0/*Addr bias*/ 4903 )); 4904 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 4905 if (otrak) { 4906 boldLo 4907 = assignNew('B', mce, Ity_I32, 4908 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 4909 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 4910 } 4911 4912 /* 5. the CAS itself */ 4913 stmt( 'C', mce, IRStmt_CAS(cas) ); 4914 4915 /* 6. compute "expected == old" */ 4916 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 4917 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 4918 tree, but it's not copied from the input block. */ 4919 expd_eq_old 4920 = assignNew('C', mce, Ity_I1, 4921 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 4922 4923 /* 7. if "expected == old" 4924 store data# to shadow memory */ 4925 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 4926 NULL/*data*/, vdataLo/*vdata*/, 4927 expd_eq_old/*guard for store*/ ); 4928 if (otrak) { 4929 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 4930 bdataLo/*bdata*/, 4931 expd_eq_old/*guard for store*/ ); 4932 } 4933 } 4934 4935 4936 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 4937 { 4938 IRAtom *vdataHi = NULL, *bdataHi = NULL; 4939 IRAtom *vdataLo = NULL, *bdataLo = NULL; 4940 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 4941 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 4942 IRAtom *voldHi = NULL, *boldHi = NULL; 4943 IRAtom *voldLo = NULL, *boldLo = NULL; 4944 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 4945 IRAtom *expd_eq_old = NULL, *zero = NULL; 4946 IROp opCasCmpEQ, opOr, opXor; 4947 Int elemSzB, memOffsLo, memOffsHi; 4948 IRType elemTy; 4949 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 4950 4951 /* double CAS */ 4952 tl_assert(cas->oldHi != IRTemp_INVALID); 4953 tl_assert(cas->expdHi != NULL); 4954 tl_assert(cas->dataHi != NULL); 4955 4956 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 4957 switch (elemTy) { 4958 case Ity_I8: 4959 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 4960 elemSzB = 1; zero = mkU8(0); 4961 break; 4962 case Ity_I16: 4963 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 4964 elemSzB = 2; zero = mkU16(0); 4965 break; 4966 case Ity_I32: 4967 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 4968 elemSzB = 4; zero = mkU32(0); 4969 break; 4970 case Ity_I64: 4971 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 4972 elemSzB = 8; zero = mkU64(0); 4973 break; 4974 default: 4975 tl_assert(0); /* IR defn disallows any other types */ 4976 } 4977 4978 /* 1. fetch data# (the proposed new value) */ 4979 tl_assert(isOriginalAtom(mce, cas->dataHi)); 4980 tl_assert(isOriginalAtom(mce, cas->dataLo)); 4981 vdataHi 4982 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 4983 vdataLo 4984 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 4985 tl_assert(isShadowAtom(mce, vdataHi)); 4986 tl_assert(isShadowAtom(mce, vdataLo)); 4987 if (otrak) { 4988 bdataHi 4989 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 4990 bdataLo 4991 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 4992 tl_assert(isShadowAtom(mce, bdataHi)); 4993 tl_assert(isShadowAtom(mce, bdataLo)); 4994 } 4995 4996 /* 2. fetch expected# (what we expect to see at the address) */ 4997 tl_assert(isOriginalAtom(mce, cas->expdHi)); 4998 tl_assert(isOriginalAtom(mce, cas->expdLo)); 4999 vexpdHi 5000 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 5001 vexpdLo 5002 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5003 tl_assert(isShadowAtom(mce, vexpdHi)); 5004 tl_assert(isShadowAtom(mce, vexpdLo)); 5005 if (otrak) { 5006 bexpdHi 5007 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 5008 bexpdLo 5009 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5010 tl_assert(isShadowAtom(mce, bexpdHi)); 5011 tl_assert(isShadowAtom(mce, bexpdLo)); 5012 } 5013 5014 /* 3. check definedness of address */ 5015 /* 4. fetch old# from shadow memory; this also checks 5016 addressibility of the address */ 5017 if (cas->end == Iend_LE) { 5018 memOffsLo = 0; 5019 memOffsHi = elemSzB; 5020 } else { 5021 tl_assert(cas->end == Iend_BE); 5022 memOffsLo = elemSzB; 5023 memOffsHi = 0; 5024 } 5025 voldHi 5026 = assignNew( 5027 'V', mce, elemTy, 5028 expr2vbits_Load( 5029 mce, 5030 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/ 5031 )); 5032 voldLo 5033 = assignNew( 5034 'V', mce, elemTy, 5035 expr2vbits_Load( 5036 mce, 5037 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/ 5038 )); 5039 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 5040 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5041 if (otrak) { 5042 boldHi 5043 = assignNew('B', mce, Ity_I32, 5044 gen_load_b(mce, elemSzB, cas->addr, 5045 memOffsHi/*addr bias*/)); 5046 boldLo 5047 = assignNew('B', mce, Ity_I32, 5048 gen_load_b(mce, elemSzB, cas->addr, 5049 memOffsLo/*addr bias*/)); 5050 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 5051 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5052 } 5053 5054 /* 5. the CAS itself */ 5055 stmt( 'C', mce, IRStmt_CAS(cas) ); 5056 5057 /* 6. compute "expected == old" */ 5058 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5059 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5060 tree, but it's not copied from the input block. */ 5061 /* 5062 xHi = oldHi ^ expdHi; 5063 xLo = oldLo ^ expdLo; 5064 xHL = xHi | xLo; 5065 expd_eq_old = xHL == 0; 5066 */ 5067 xHi = assignNew('C', mce, elemTy, 5068 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 5069 xLo = assignNew('C', mce, elemTy, 5070 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 5071 xHL = assignNew('C', mce, elemTy, 5072 binop(opOr, xHi, xLo)); 5073 expd_eq_old 5074 = assignNew('C', mce, Ity_I1, 5075 binop(opCasCmpEQ, xHL, zero)); 5076 5077 /* 7. if "expected == old" 5078 store data# to shadow memory */ 5079 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 5080 NULL/*data*/, vdataHi/*vdata*/, 5081 expd_eq_old/*guard for store*/ ); 5082 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 5083 NULL/*data*/, vdataLo/*vdata*/, 5084 expd_eq_old/*guard for store*/ ); 5085 if (otrak) { 5086 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 5087 bdataHi/*bdata*/, 5088 expd_eq_old/*guard for store*/ ); 5089 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 5090 bdataLo/*bdata*/, 5091 expd_eq_old/*guard for store*/ ); 5092 } 5093 } 5094 5095 5096 /* ------ Dealing with LL/SC (not difficult) ------ */ 5097 5098 static void do_shadow_LLSC ( MCEnv* mce, 5099 IREndness stEnd, 5100 IRTemp stResult, 5101 IRExpr* stAddr, 5102 IRExpr* stStoredata ) 5103 { 5104 /* In short: treat a load-linked like a normal load followed by an 5105 assignment of the loaded (shadow) data to the result temporary. 5106 Treat a store-conditional like a normal store, and mark the 5107 result temporary as defined. */ 5108 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 5109 IRTemp resTmp = findShadowTmpV(mce, stResult); 5110 5111 tl_assert(isIRAtom(stAddr)); 5112 if (stStoredata) 5113 tl_assert(isIRAtom(stStoredata)); 5114 5115 if (stStoredata == NULL) { 5116 /* Load Linked */ 5117 /* Just treat this as a normal load, followed by an assignment of 5118 the value to .result. */ 5119 /* Stay sane */ 5120 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 5121 || resTy == Ity_I16 || resTy == Ity_I8); 5122 assign( 'V', mce, resTmp, 5123 expr2vbits_Load( 5124 mce, stEnd, resTy, stAddr, 0/*addr bias*/)); 5125 } else { 5126 /* Store Conditional */ 5127 /* Stay sane */ 5128 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 5129 stStoredata); 5130 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 5131 || dataTy == Ity_I16 || dataTy == Ity_I8); 5132 do_shadow_Store( mce, stEnd, 5133 stAddr, 0/* addr bias */, 5134 stStoredata, 5135 NULL /* shadow data */, 5136 NULL/*guard*/ ); 5137 /* This is a store conditional, so it writes to .result a value 5138 indicating whether or not the store succeeded. Just claim 5139 this value is always defined. In the PowerPC interpretation 5140 of store-conditional, definedness of the success indication 5141 depends on whether the address of the store matches the 5142 reservation address. But we can't tell that here (and 5143 anyway, we're not being PowerPC-specific). At least we are 5144 guaranteed that the definedness of the store address, and its 5145 addressibility, will be checked as per normal. So it seems 5146 pretty safe to just say that the success indication is always 5147 defined. 5148 5149 In schemeS, for origin tracking, we must correspondingly set 5150 a no-origin value for the origin shadow of .result. 5151 */ 5152 tl_assert(resTy == Ity_I1); 5153 assign( 'V', mce, resTmp, definedOfType(resTy) ); 5154 } 5155 } 5156 5157 5158 /*------------------------------------------------------------*/ 5159 /*--- Memcheck main ---*/ 5160 /*------------------------------------------------------------*/ 5161 5162 static void schemeS ( MCEnv* mce, IRStmt* st ); 5163 5164 static Bool isBogusAtom ( IRAtom* at ) 5165 { 5166 ULong n = 0; 5167 IRConst* con; 5168 tl_assert(isIRAtom(at)); 5169 if (at->tag == Iex_RdTmp) 5170 return False; 5171 tl_assert(at->tag == Iex_Const); 5172 con = at->Iex.Const.con; 5173 switch (con->tag) { 5174 case Ico_U1: return False; 5175 case Ico_U8: n = (ULong)con->Ico.U8; break; 5176 case Ico_U16: n = (ULong)con->Ico.U16; break; 5177 case Ico_U32: n = (ULong)con->Ico.U32; break; 5178 case Ico_U64: n = (ULong)con->Ico.U64; break; 5179 case Ico_F64: return False; 5180 case Ico_F32i: return False; 5181 case Ico_F64i: return False; 5182 case Ico_V128: return False; 5183 default: ppIRExpr(at); tl_assert(0); 5184 } 5185 /* VG_(printf)("%llx\n", n); */ 5186 return (/*32*/ n == 0xFEFEFEFFULL 5187 /*32*/ || n == 0x80808080ULL 5188 /*32*/ || n == 0x7F7F7F7FULL 5189 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 5190 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 5191 /*64*/ || n == 0x0000000000008080ULL 5192 /*64*/ || n == 0x8080808080808080ULL 5193 /*64*/ || n == 0x0101010101010101ULL 5194 ); 5195 } 5196 5197 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 5198 { 5199 Int i; 5200 IRExpr* e; 5201 IRDirty* d; 5202 IRCAS* cas; 5203 switch (st->tag) { 5204 case Ist_WrTmp: 5205 e = st->Ist.WrTmp.data; 5206 switch (e->tag) { 5207 case Iex_Get: 5208 case Iex_RdTmp: 5209 return False; 5210 case Iex_Const: 5211 return isBogusAtom(e); 5212 case Iex_Unop: 5213 return isBogusAtom(e->Iex.Unop.arg); 5214 case Iex_GetI: 5215 return isBogusAtom(e->Iex.GetI.ix); 5216 case Iex_Binop: 5217 return isBogusAtom(e->Iex.Binop.arg1) 5218 || isBogusAtom(e->Iex.Binop.arg2); 5219 case Iex_Triop: 5220 return isBogusAtom(e->Iex.Triop.details->arg1) 5221 || isBogusAtom(e->Iex.Triop.details->arg2) 5222 || isBogusAtom(e->Iex.Triop.details->arg3); 5223 case Iex_Qop: 5224 return isBogusAtom(e->Iex.Qop.details->arg1) 5225 || isBogusAtom(e->Iex.Qop.details->arg2) 5226 || isBogusAtom(e->Iex.Qop.details->arg3) 5227 || isBogusAtom(e->Iex.Qop.details->arg4); 5228 case Iex_Mux0X: 5229 return isBogusAtom(e->Iex.Mux0X.cond) 5230 || isBogusAtom(e->Iex.Mux0X.expr0) 5231 || isBogusAtom(e->Iex.Mux0X.exprX); 5232 case Iex_Load: 5233 return isBogusAtom(e->Iex.Load.addr); 5234 case Iex_CCall: 5235 for (i = 0; e->Iex.CCall.args[i]; i++) 5236 if (isBogusAtom(e->Iex.CCall.args[i])) 5237 return True; 5238 return False; 5239 default: 5240 goto unhandled; 5241 } 5242 case Ist_Dirty: 5243 d = st->Ist.Dirty.details; 5244 for (i = 0; d->args[i]; i++) 5245 if (isBogusAtom(d->args[i])) 5246 return True; 5247 if (d->guard && isBogusAtom(d->guard)) 5248 return True; 5249 if (d->mAddr && isBogusAtom(d->mAddr)) 5250 return True; 5251 return False; 5252 case Ist_Put: 5253 return isBogusAtom(st->Ist.Put.data); 5254 case Ist_PutI: 5255 return isBogusAtom(st->Ist.PutI.details->ix) 5256 || isBogusAtom(st->Ist.PutI.details->data); 5257 case Ist_Store: 5258 return isBogusAtom(st->Ist.Store.addr) 5259 || isBogusAtom(st->Ist.Store.data); 5260 case Ist_Exit: 5261 return isBogusAtom(st->Ist.Exit.guard); 5262 case Ist_AbiHint: 5263 return isBogusAtom(st->Ist.AbiHint.base) 5264 || isBogusAtom(st->Ist.AbiHint.nia); 5265 case Ist_NoOp: 5266 case Ist_IMark: 5267 case Ist_MBE: 5268 return False; 5269 case Ist_CAS: 5270 cas = st->Ist.CAS.details; 5271 return isBogusAtom(cas->addr) 5272 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 5273 || isBogusAtom(cas->expdLo) 5274 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 5275 || isBogusAtom(cas->dataLo); 5276 case Ist_LLSC: 5277 return isBogusAtom(st->Ist.LLSC.addr) 5278 || (st->Ist.LLSC.storedata 5279 ? isBogusAtom(st->Ist.LLSC.storedata) 5280 : False); 5281 default: 5282 unhandled: 5283 ppIRStmt(st); 5284 VG_(tool_panic)("hasBogusLiterals"); 5285 } 5286 } 5287 5288 5289 IRSB* MC_(instrument) ( VgCallbackClosure* closure, 5290 IRSB* sb_in, 5291 VexGuestLayout* layout, 5292 VexGuestExtents* vge, 5293 IRType gWordTy, IRType hWordTy ) 5294 { 5295 Bool verboze = 0||False; 5296 Bool bogus; 5297 Int i, j, first_stmt; 5298 IRStmt* st; 5299 MCEnv mce; 5300 IRSB* sb_out; 5301 5302 if (gWordTy != hWordTy) { 5303 /* We don't currently support this case. */ 5304 VG_(tool_panic)("host/guest word size mismatch"); 5305 } 5306 5307 /* Check we're not completely nuts */ 5308 tl_assert(sizeof(UWord) == sizeof(void*)); 5309 tl_assert(sizeof(Word) == sizeof(void*)); 5310 tl_assert(sizeof(Addr) == sizeof(void*)); 5311 tl_assert(sizeof(ULong) == 8); 5312 tl_assert(sizeof(Long) == 8); 5313 tl_assert(sizeof(Addr64) == 8); 5314 tl_assert(sizeof(UInt) == 4); 5315 tl_assert(sizeof(Int) == 4); 5316 5317 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 5318 5319 /* Set up SB */ 5320 sb_out = deepCopyIRSBExceptStmts(sb_in); 5321 5322 /* Set up the running environment. Both .sb and .tmpMap are 5323 modified as we go along. Note that tmps are added to both 5324 .sb->tyenv and .tmpMap together, so the valid index-set for 5325 those two arrays should always be identical. */ 5326 VG_(memset)(&mce, 0, sizeof(mce)); 5327 mce.sb = sb_out; 5328 mce.trace = verboze; 5329 mce.layout = layout; 5330 mce.hWordTy = hWordTy; 5331 mce.bogusLiterals = False; 5332 5333 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on 5334 Darwin. 10.7 is mostly built with LLVM, which uses these for 5335 bitfield inserts, and we get a lot of false errors if the cheap 5336 interpretation is used, alas. Could solve this much better if 5337 we knew which of such adds came from x86/amd64 LEA instructions, 5338 since these are the only ones really needing the expensive 5339 interpretation, but that would require some way to tag them in 5340 the _toIR.c front ends, which is a lot of faffing around. So 5341 for now just use the slow and blunt-instrument solution. */ 5342 mce.useLLVMworkarounds = False; 5343 # if defined(VGO_darwin) 5344 mce.useLLVMworkarounds = True; 5345 # endif 5346 5347 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 5348 sizeof(TempMapEnt)); 5349 for (i = 0; i < sb_in->tyenv->types_used; i++) { 5350 TempMapEnt ent; 5351 ent.kind = Orig; 5352 ent.shadowV = IRTemp_INVALID; 5353 ent.shadowB = IRTemp_INVALID; 5354 VG_(addToXA)( mce.tmpMap, &ent ); 5355 } 5356 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 5357 5358 /* Make a preliminary inspection of the statements, to see if there 5359 are any dodgy-looking literals. If there are, we generate 5360 extra-detailed (hence extra-expensive) instrumentation in 5361 places. Scan the whole bb even if dodgyness is found earlier, 5362 so that the flatness assertion is applied to all stmts. */ 5363 5364 bogus = False; 5365 5366 for (i = 0; i < sb_in->stmts_used; i++) { 5367 5368 st = sb_in->stmts[i]; 5369 tl_assert(st); 5370 tl_assert(isFlatIRStmt(st)); 5371 5372 if (!bogus) { 5373 bogus = checkForBogusLiterals(st); 5374 if (0 && bogus) { 5375 VG_(printf)("bogus: "); 5376 ppIRStmt(st); 5377 VG_(printf)("\n"); 5378 } 5379 } 5380 5381 } 5382 5383 mce.bogusLiterals = bogus; 5384 5385 /* Copy verbatim any IR preamble preceding the first IMark */ 5386 5387 tl_assert(mce.sb == sb_out); 5388 tl_assert(mce.sb != sb_in); 5389 5390 i = 0; 5391 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 5392 5393 st = sb_in->stmts[i]; 5394 tl_assert(st); 5395 tl_assert(isFlatIRStmt(st)); 5396 5397 stmt( 'C', &mce, sb_in->stmts[i] ); 5398 i++; 5399 } 5400 5401 /* Nasty problem. IR optimisation of the pre-instrumented IR may 5402 cause the IR following the preamble to contain references to IR 5403 temporaries defined in the preamble. Because the preamble isn't 5404 instrumented, these temporaries don't have any shadows. 5405 Nevertheless uses of them following the preamble will cause 5406 memcheck to generate references to their shadows. End effect is 5407 to cause IR sanity check failures, due to references to 5408 non-existent shadows. This is only evident for the complex 5409 preambles used for function wrapping on TOC-afflicted platforms 5410 (ppc64-linux). 5411 5412 The following loop therefore scans the preamble looking for 5413 assignments to temporaries. For each one found it creates an 5414 assignment to the corresponding (V) shadow temp, marking it as 5415 'defined'. This is the same resulting IR as if the main 5416 instrumentation loop before had been applied to the statement 5417 'tmp = CONSTANT'. 5418 5419 Similarly, if origin tracking is enabled, we must generate an 5420 assignment for the corresponding origin (B) shadow, claiming 5421 no-origin, as appropriate for a defined value. 5422 */ 5423 for (j = 0; j < i; j++) { 5424 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 5425 /* findShadowTmpV checks its arg is an original tmp; 5426 no need to assert that here. */ 5427 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 5428 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 5429 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 5430 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 5431 if (MC_(clo_mc_level) == 3) { 5432 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 5433 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 5434 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 5435 } 5436 if (0) { 5437 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 5438 ppIRType( ty_v ); 5439 VG_(printf)("\n"); 5440 } 5441 } 5442 } 5443 5444 /* Iterate over the remaining stmts to generate instrumentation. */ 5445 5446 tl_assert(sb_in->stmts_used > 0); 5447 tl_assert(i >= 0); 5448 tl_assert(i < sb_in->stmts_used); 5449 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 5450 5451 for (/* use current i*/; i < sb_in->stmts_used; i++) { 5452 5453 st = sb_in->stmts[i]; 5454 first_stmt = sb_out->stmts_used; 5455 5456 if (verboze) { 5457 VG_(printf)("\n"); 5458 ppIRStmt(st); 5459 VG_(printf)("\n"); 5460 } 5461 5462 if (MC_(clo_mc_level) == 3) { 5463 /* See comments on case Ist_CAS below. */ 5464 if (st->tag != Ist_CAS) 5465 schemeS( &mce, st ); 5466 } 5467 5468 /* Generate instrumentation code for each stmt ... */ 5469 5470 switch (st->tag) { 5471 5472 case Ist_WrTmp: 5473 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 5474 expr2vbits( &mce, st->Ist.WrTmp.data) ); 5475 break; 5476 5477 case Ist_Put: 5478 do_shadow_PUT( &mce, 5479 st->Ist.Put.offset, 5480 st->Ist.Put.data, 5481 NULL /* shadow atom */, NULL /* guard */ ); 5482 break; 5483 5484 case Ist_PutI: 5485 do_shadow_PUTI( &mce, st->Ist.PutI.details); 5486 break; 5487 5488 case Ist_Store: 5489 do_shadow_Store( &mce, st->Ist.Store.end, 5490 st->Ist.Store.addr, 0/* addr bias */, 5491 st->Ist.Store.data, 5492 NULL /* shadow data */, 5493 NULL/*guard*/ ); 5494 break; 5495 5496 case Ist_Exit: 5497 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL ); 5498 break; 5499 5500 case Ist_IMark: 5501 break; 5502 5503 case Ist_NoOp: 5504 case Ist_MBE: 5505 break; 5506 5507 case Ist_Dirty: 5508 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 5509 break; 5510 5511 case Ist_AbiHint: 5512 do_AbiHint( &mce, st->Ist.AbiHint.base, 5513 st->Ist.AbiHint.len, 5514 st->Ist.AbiHint.nia ); 5515 break; 5516 5517 case Ist_CAS: 5518 do_shadow_CAS( &mce, st->Ist.CAS.details ); 5519 /* Note, do_shadow_CAS copies the CAS itself to the output 5520 block, because it needs to add instrumentation both 5521 before and after it. Hence skip the copy below. Also 5522 skip the origin-tracking stuff (call to schemeS) above, 5523 since that's all tangled up with it too; do_shadow_CAS 5524 does it all. */ 5525 break; 5526 5527 case Ist_LLSC: 5528 do_shadow_LLSC( &mce, 5529 st->Ist.LLSC.end, 5530 st->Ist.LLSC.result, 5531 st->Ist.LLSC.addr, 5532 st->Ist.LLSC.storedata ); 5533 break; 5534 5535 default: 5536 VG_(printf)("\n"); 5537 ppIRStmt(st); 5538 VG_(printf)("\n"); 5539 VG_(tool_panic)("memcheck: unhandled IRStmt"); 5540 5541 } /* switch (st->tag) */ 5542 5543 if (0 && verboze) { 5544 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5545 VG_(printf)(" "); 5546 ppIRStmt(sb_out->stmts[j]); 5547 VG_(printf)("\n"); 5548 } 5549 VG_(printf)("\n"); 5550 } 5551 5552 /* ... and finally copy the stmt itself to the output. Except, 5553 skip the copy of IRCASs; see comments on case Ist_CAS 5554 above. */ 5555 if (st->tag != Ist_CAS) 5556 stmt('C', &mce, st); 5557 } 5558 5559 /* Now we need to complain if the jump target is undefined. */ 5560 first_stmt = sb_out->stmts_used; 5561 5562 if (verboze) { 5563 VG_(printf)("sb_in->next = "); 5564 ppIRExpr(sb_in->next); 5565 VG_(printf)("\n\n"); 5566 } 5567 5568 complainIfUndefined( &mce, sb_in->next, NULL ); 5569 5570 if (0 && verboze) { 5571 for (j = first_stmt; j < sb_out->stmts_used; j++) { 5572 VG_(printf)(" "); 5573 ppIRStmt(sb_out->stmts[j]); 5574 VG_(printf)("\n"); 5575 } 5576 VG_(printf)("\n"); 5577 } 5578 5579 /* If this fails, there's been some serious snafu with tmp management, 5580 that should be investigated. */ 5581 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 5582 VG_(deleteXA)( mce.tmpMap ); 5583 5584 tl_assert(mce.sb == sb_out); 5585 return sb_out; 5586 } 5587 5588 /*------------------------------------------------------------*/ 5589 /*--- Post-tree-build final tidying ---*/ 5590 /*------------------------------------------------------------*/ 5591 5592 /* This exploits the observation that Memcheck often produces 5593 repeated conditional calls of the form 5594 5595 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 5596 5597 with the same guard expression G guarding the same helper call. 5598 The second and subsequent calls are redundant. This usually 5599 results from instrumentation of guest code containing multiple 5600 memory references at different constant offsets from the same base 5601 register. After optimisation of the instrumentation, you get a 5602 test for the definedness of the base register for each memory 5603 reference, which is kinda pointless. MC_(final_tidy) therefore 5604 looks for such repeated calls and removes all but the first. */ 5605 5606 /* A struct for recording which (helper, guard) pairs we have already 5607 seen. */ 5608 typedef 5609 struct { void* entry; IRExpr* guard; } 5610 Pair; 5611 5612 /* Return True if e1 and e2 definitely denote the same value (used to 5613 compare guards). Return False if unknown; False is the safe 5614 answer. Since guest registers and guest memory do not have the 5615 SSA property we must return False if any Gets or Loads appear in 5616 the expression. */ 5617 5618 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 5619 { 5620 if (e1->tag != e2->tag) 5621 return False; 5622 switch (e1->tag) { 5623 case Iex_Const: 5624 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 5625 case Iex_Binop: 5626 return e1->Iex.Binop.op == e2->Iex.Binop.op 5627 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 5628 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 5629 case Iex_Unop: 5630 return e1->Iex.Unop.op == e2->Iex.Unop.op 5631 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 5632 case Iex_RdTmp: 5633 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 5634 case Iex_Mux0X: 5635 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond ) 5636 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 ) 5637 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX ); 5638 case Iex_Qop: 5639 case Iex_Triop: 5640 case Iex_CCall: 5641 /* be lazy. Could define equality for these, but they never 5642 appear to be used. */ 5643 return False; 5644 case Iex_Get: 5645 case Iex_GetI: 5646 case Iex_Load: 5647 /* be conservative - these may not give the same value each 5648 time */ 5649 return False; 5650 case Iex_Binder: 5651 /* should never see this */ 5652 /* fallthrough */ 5653 default: 5654 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 5655 ppIRExpr(e1); 5656 VG_(tool_panic)("memcheck:sameIRValue"); 5657 return False; 5658 } 5659 } 5660 5661 /* See if 'pairs' already has an entry for (entry, guard). Return 5662 True if so. If not, add an entry. */ 5663 5664 static 5665 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) 5666 { 5667 Pair p; 5668 Pair* pp; 5669 Int i, n = VG_(sizeXA)( pairs ); 5670 for (i = 0; i < n; i++) { 5671 pp = VG_(indexXA)( pairs, i ); 5672 if (pp->entry == entry && sameIRValue(pp->guard, guard)) 5673 return True; 5674 } 5675 p.guard = guard; 5676 p.entry = entry; 5677 VG_(addToXA)( pairs, &p ); 5678 return False; 5679 } 5680 5681 static Bool is_helperc_value_checkN_fail ( HChar* name ) 5682 { 5683 return 5684 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") 5685 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") 5686 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") 5687 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") 5688 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") 5689 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") 5690 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") 5691 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); 5692 } 5693 5694 IRSB* MC_(final_tidy) ( IRSB* sb_in ) 5695 { 5696 Int i; 5697 IRStmt* st; 5698 IRDirty* di; 5699 IRExpr* guard; 5700 IRCallee* cee; 5701 Bool alreadyPresent; 5702 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", 5703 VG_(free), sizeof(Pair) ); 5704 /* Scan forwards through the statements. Each time a call to one 5705 of the relevant helpers is seen, check if we have made a 5706 previous call to the same helper using the same guard 5707 expression, and if so, delete the call. */ 5708 for (i = 0; i < sb_in->stmts_used; i++) { 5709 st = sb_in->stmts[i]; 5710 tl_assert(st); 5711 if (st->tag != Ist_Dirty) 5712 continue; 5713 di = st->Ist.Dirty.details; 5714 guard = di->guard; 5715 if (!guard) 5716 continue; 5717 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 5718 cee = di->cee; 5719 if (!is_helperc_value_checkN_fail( cee->name )) 5720 continue; 5721 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 5722 guard 'guard'. Check if we have already seen a call to this 5723 function with the same guard. If so, delete it. If not, 5724 add it to the set of calls we do know about. */ 5725 alreadyPresent = check_or_add( pairs, guard, cee->addr ); 5726 if (alreadyPresent) { 5727 sb_in->stmts[i] = IRStmt_NoOp(); 5728 if (0) VG_(printf)("XX\n"); 5729 } 5730 } 5731 VG_(deleteXA)( pairs ); 5732 return sb_in; 5733 } 5734 5735 5736 /*------------------------------------------------------------*/ 5737 /*--- Origin tracking stuff ---*/ 5738 /*------------------------------------------------------------*/ 5739 5740 /* Almost identical to findShadowTmpV. */ 5741 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 5742 { 5743 TempMapEnt* ent; 5744 /* VG_(indexXA) range-checks 'orig', hence no need to check 5745 here. */ 5746 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5747 tl_assert(ent->kind == Orig); 5748 if (ent->shadowB == IRTemp_INVALID) { 5749 IRTemp tmpB 5750 = newTemp( mce, Ity_I32, BSh ); 5751 /* newTemp may cause mce->tmpMap to resize, hence previous results 5752 from VG_(indexXA) are invalid. */ 5753 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 5754 tl_assert(ent->kind == Orig); 5755 tl_assert(ent->shadowB == IRTemp_INVALID); 5756 ent->shadowB = tmpB; 5757 } 5758 return ent->shadowB; 5759 } 5760 5761 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 5762 { 5763 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 5764 } 5765 5766 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5767 IRAtom* baseaddr, Int offset ) 5768 { 5769 void* hFun; 5770 HChar* hName; 5771 IRTemp bTmp; 5772 IRDirty* di; 5773 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5774 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5775 IRAtom* ea = baseaddr; 5776 if (offset != 0) { 5777 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5778 : mkU64( (Long)(Int)offset ); 5779 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5780 } 5781 bTmp = newTemp(mce, mce->hWordTy, BSh); 5782 5783 switch (szB) { 5784 case 1: hFun = (void*)&MC_(helperc_b_load1); 5785 hName = "MC_(helperc_b_load1)"; 5786 break; 5787 case 2: hFun = (void*)&MC_(helperc_b_load2); 5788 hName = "MC_(helperc_b_load2)"; 5789 break; 5790 case 4: hFun = (void*)&MC_(helperc_b_load4); 5791 hName = "MC_(helperc_b_load4)"; 5792 break; 5793 case 8: hFun = (void*)&MC_(helperc_b_load8); 5794 hName = "MC_(helperc_b_load8)"; 5795 break; 5796 case 16: hFun = (void*)&MC_(helperc_b_load16); 5797 hName = "MC_(helperc_b_load16)"; 5798 break; 5799 case 32: hFun = (void*)&MC_(helperc_b_load32); 5800 hName = "MC_(helperc_b_load32)"; 5801 break; 5802 default: 5803 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 5804 tl_assert(0); 5805 } 5806 di = unsafeIRDirty_1_N( 5807 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 5808 mkIRExprVec_1( ea ) 5809 ); 5810 /* no need to mess with any annotations. This call accesses 5811 neither guest state nor guest memory. */ 5812 stmt( 'B', mce, IRStmt_Dirty(di) ); 5813 if (mce->hWordTy == Ity_I64) { 5814 /* 64-bit host */ 5815 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 5816 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 5817 return mkexpr(bTmp32); 5818 } else { 5819 /* 32-bit host */ 5820 return mkexpr(bTmp); 5821 } 5822 } 5823 5824 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr, 5825 Int offset, IRAtom* guard ) 5826 { 5827 if (guard) { 5828 IRAtom *cond, *iffalse, *iftrue; 5829 5830 cond = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard)); 5831 iftrue = assignNew('B', mce, Ity_I32, 5832 gen_load_b(mce, szB, baseaddr, offset)); 5833 iffalse = mkU32(0); 5834 5835 return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue)); 5836 } 5837 5838 return gen_load_b(mce, szB, baseaddr, offset); 5839 } 5840 5841 /* Generate a shadow store. guard :: Ity_I1 controls whether the 5842 store really happens; NULL means it unconditionally does. */ 5843 static void gen_store_b ( MCEnv* mce, Int szB, 5844 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5845 IRAtom* guard ) 5846 { 5847 void* hFun; 5848 HChar* hName; 5849 IRDirty* di; 5850 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 5851 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 5852 IRAtom* ea = baseaddr; 5853 if (guard) { 5854 tl_assert(isOriginalAtom(mce, guard)); 5855 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 5856 } 5857 if (offset != 0) { 5858 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 5859 : mkU64( (Long)(Int)offset ); 5860 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 5861 } 5862 if (mce->hWordTy == Ity_I64) 5863 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 5864 5865 switch (szB) { 5866 case 1: hFun = (void*)&MC_(helperc_b_store1); 5867 hName = "MC_(helperc_b_store1)"; 5868 break; 5869 case 2: hFun = (void*)&MC_(helperc_b_store2); 5870 hName = "MC_(helperc_b_store2)"; 5871 break; 5872 case 4: hFun = (void*)&MC_(helperc_b_store4); 5873 hName = "MC_(helperc_b_store4)"; 5874 break; 5875 case 8: hFun = (void*)&MC_(helperc_b_store8); 5876 hName = "MC_(helperc_b_store8)"; 5877 break; 5878 case 16: hFun = (void*)&MC_(helperc_b_store16); 5879 hName = "MC_(helperc_b_store16)"; 5880 break; 5881 case 32: hFun = (void*)&MC_(helperc_b_store32); 5882 hName = "MC_(helperc_b_store32)"; 5883 break; 5884 default: 5885 tl_assert(0); 5886 } 5887 di = unsafeIRDirty_0_N( 2/*regparms*/, 5888 hName, VG_(fnptr_to_fnentry)( hFun ), 5889 mkIRExprVec_2( ea, dataB ) 5890 ); 5891 /* no need to mess with any annotations. This call accesses 5892 neither guest state nor guest memory. */ 5893 if (guard) di->guard = guard; 5894 stmt( 'B', mce, IRStmt_Dirty(di) ); 5895 } 5896 5897 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 5898 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5899 if (eTy == Ity_I64) 5900 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 5901 if (eTy == Ity_I32) 5902 return e; 5903 tl_assert(0); 5904 } 5905 5906 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 5907 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 5908 tl_assert(eTy == Ity_I32); 5909 if (dstTy == Ity_I64) 5910 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 5911 tl_assert(0); 5912 } 5913 5914 5915 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 5916 { 5917 tl_assert(MC_(clo_mc_level) == 3); 5918 5919 switch (e->tag) { 5920 5921 case Iex_GetI: { 5922 IRRegArray* descr_b; 5923 IRAtom *t1, *t2, *t3, *t4; 5924 IRRegArray* descr = e->Iex.GetI.descr; 5925 IRType equivIntTy 5926 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 5927 /* If this array is unshadowable for whatever reason, use the 5928 usual approximation. */ 5929 if (equivIntTy == Ity_INVALID) 5930 return mkU32(0); 5931 tl_assert(sizeofIRType(equivIntTy) >= 4); 5932 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 5933 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 5934 equivIntTy, descr->nElems ); 5935 /* Do a shadow indexed get of the same size, giving t1. Take 5936 the bottom 32 bits of it, giving t2. Compute into t3 the 5937 origin for the index (almost certainly zero, but there's 5938 no harm in being completely general here, since iropt will 5939 remove any useless code), and fold it in, giving a final 5940 value t4. */ 5941 t1 = assignNew( 'B', mce, equivIntTy, 5942 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 5943 e->Iex.GetI.bias )); 5944 t2 = narrowTo32( mce, t1 ); 5945 t3 = schemeE( mce, e->Iex.GetI.ix ); 5946 t4 = gen_maxU32( mce, t2, t3 ); 5947 return t4; 5948 } 5949 case Iex_CCall: { 5950 Int i; 5951 IRAtom* here; 5952 IRExpr** args = e->Iex.CCall.args; 5953 IRAtom* curr = mkU32(0); 5954 for (i = 0; args[i]; i++) { 5955 tl_assert(i < 32); 5956 tl_assert(isOriginalAtom(mce, args[i])); 5957 /* Only take notice of this arg if the callee's 5958 mc-exclusion mask does not say it is to be excluded. */ 5959 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 5960 /* the arg is to be excluded from definedness checking. 5961 Do nothing. */ 5962 if (0) VG_(printf)("excluding %s(%d)\n", 5963 e->Iex.CCall.cee->name, i); 5964 } else { 5965 /* calculate the arg's definedness, and pessimistically 5966 merge it in. */ 5967 here = schemeE( mce, args[i] ); 5968 curr = gen_maxU32( mce, curr, here ); 5969 } 5970 } 5971 return curr; 5972 } 5973 case Iex_Load: { 5974 Int dszB; 5975 dszB = sizeofIRType(e->Iex.Load.ty); 5976 /* assert that the B value for the address is already 5977 available (somewhere) */ 5978 tl_assert(isIRAtom(e->Iex.Load.addr)); 5979 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 5980 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 5981 } 5982 case Iex_Mux0X: { 5983 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond ); 5984 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 ); 5985 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX ); 5986 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 5987 } 5988 case Iex_Qop: { 5989 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 ); 5990 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 ); 5991 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 ); 5992 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 ); 5993 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 5994 gen_maxU32( mce, b3, b4 ) ); 5995 } 5996 case Iex_Triop: { 5997 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 ); 5998 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 ); 5999 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 ); 6000 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 6001 } 6002 case Iex_Binop: { 6003 switch (e->Iex.Binop.op) { 6004 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 6005 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 6006 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 6007 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 6008 /* Just say these all produce a defined result, 6009 regardless of their arguments. See 6010 COMMENT_ON_CasCmpEQ in this file. */ 6011 return mkU32(0); 6012 default: { 6013 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 6014 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 6015 return gen_maxU32( mce, b1, b2 ); 6016 } 6017 } 6018 tl_assert(0); 6019 /*NOTREACHED*/ 6020 } 6021 case Iex_Unop: { 6022 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 6023 return b1; 6024 } 6025 case Iex_Const: 6026 return mkU32(0); 6027 case Iex_RdTmp: 6028 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 6029 case Iex_Get: { 6030 Int b_offset = MC_(get_otrack_shadow_offset)( 6031 e->Iex.Get.offset, 6032 sizeofIRType(e->Iex.Get.ty) 6033 ); 6034 tl_assert(b_offset >= -1 6035 && b_offset <= mce->layout->total_sizeB -4); 6036 if (b_offset >= 0) { 6037 /* FIXME: this isn't an atom! */ 6038 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 6039 Ity_I32 ); 6040 } 6041 return mkU32(0); 6042 } 6043 default: 6044 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 6045 ppIRExpr(e); 6046 VG_(tool_panic)("memcheck:schemeE"); 6047 } 6048 } 6049 6050 6051 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 6052 { 6053 // This is a hacked version of do_shadow_Dirty 6054 Int i, k, n, toDo, gSz, gOff; 6055 IRAtom *here, *curr; 6056 IRTemp dst; 6057 6058 /* First check the guard. */ 6059 curr = schemeE( mce, d->guard ); 6060 6061 /* Now round up all inputs and maxU32 over them. */ 6062 6063 /* Inputs: unmasked args 6064 Note: arguments are evaluated REGARDLESS of the guard expression */ 6065 for (i = 0; d->args[i]; i++) { 6066 if (d->cee->mcx_mask & (1<<i)) { 6067 /* ignore this arg */ 6068 } else { 6069 here = schemeE( mce, d->args[i] ); 6070 curr = gen_maxU32( mce, curr, here ); 6071 } 6072 } 6073 6074 /* Inputs: guest state that we read. */ 6075 for (i = 0; i < d->nFxState; i++) { 6076 tl_assert(d->fxState[i].fx != Ifx_None); 6077 if (d->fxState[i].fx == Ifx_Write) 6078 continue; 6079 6080 /* Enumerate the described state segments */ 6081 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6082 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6083 gSz = d->fxState[i].size; 6084 6085 /* Ignore any sections marked as 'always defined'. */ 6086 if (isAlwaysDefd(mce, gOff, gSz)) { 6087 if (0) 6088 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 6089 gOff, gSz); 6090 continue; 6091 } 6092 6093 /* This state element is read or modified. So we need to 6094 consider it. If larger than 4 bytes, deal with it in 6095 4-byte chunks. */ 6096 while (True) { 6097 Int b_offset; 6098 tl_assert(gSz >= 0); 6099 if (gSz == 0) break; 6100 n = gSz <= 4 ? gSz : 4; 6101 /* update 'curr' with maxU32 of the state slice 6102 gOff .. gOff+n-1 */ 6103 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6104 if (b_offset != -1) { 6105 /* Observe the guard expression. If it is false use 0, i.e. 6106 nothing is known about the origin */ 6107 IRAtom *cond, *iffalse, *iftrue; 6108 6109 cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard)); 6110 iffalse = mkU32(0); 6111 iftrue = assignNew( 'B', mce, Ity_I32, 6112 IRExpr_Get(b_offset 6113 + 2*mce->layout->total_sizeB, 6114 Ity_I32)); 6115 here = assignNew( 'B', mce, Ity_I32, 6116 IRExpr_Mux0X(cond, iffalse, iftrue)); 6117 curr = gen_maxU32( mce, curr, here ); 6118 } 6119 gSz -= n; 6120 gOff += n; 6121 } 6122 } 6123 } 6124 6125 /* Inputs: memory */ 6126 6127 if (d->mFx != Ifx_None) { 6128 /* Because we may do multiple shadow loads/stores from the same 6129 base address, it's best to do a single test of its 6130 definedness right now. Post-instrumentation optimisation 6131 should remove all but this test. */ 6132 tl_assert(d->mAddr); 6133 here = schemeE( mce, d->mAddr ); 6134 curr = gen_maxU32( mce, curr, here ); 6135 } 6136 6137 /* Deal with memory inputs (reads or modifies) */ 6138 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 6139 toDo = d->mSize; 6140 /* chew off 32-bit chunks. We don't care about the endianness 6141 since it's all going to be condensed down to a single bit, 6142 but nevertheless choose an endianness which is hopefully 6143 native to the platform. */ 6144 while (toDo >= 4) { 6145 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo, 6146 d->guard ); 6147 curr = gen_maxU32( mce, curr, here ); 6148 toDo -= 4; 6149 } 6150 /* handle possible 16-bit excess */ 6151 while (toDo >= 2) { 6152 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo, 6153 d->guard ); 6154 curr = gen_maxU32( mce, curr, here ); 6155 toDo -= 2; 6156 } 6157 /* chew off the remaining 8-bit chunk, if any */ 6158 if (toDo == 1) { 6159 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo, 6160 d->guard ); 6161 curr = gen_maxU32( mce, curr, here ); 6162 toDo -= 1; 6163 } 6164 tl_assert(toDo == 0); 6165 } 6166 6167 /* Whew! So curr is a 32-bit B-value which should give an origin 6168 of some use if any of the inputs to the helper are undefined. 6169 Now we need to re-distribute the results to all destinations. */ 6170 6171 /* Outputs: the destination temporary, if there is one. */ 6172 if (d->tmp != IRTemp_INVALID) { 6173 dst = findShadowTmpB(mce, d->tmp); 6174 assign( 'V', mce, dst, curr ); 6175 } 6176 6177 /* Outputs: guest state that we write or modify. */ 6178 for (i = 0; i < d->nFxState; i++) { 6179 tl_assert(d->fxState[i].fx != Ifx_None); 6180 if (d->fxState[i].fx == Ifx_Read) 6181 continue; 6182 6183 /* Enumerate the described state segments */ 6184 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6185 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6186 gSz = d->fxState[i].size; 6187 6188 /* Ignore any sections marked as 'always defined'. */ 6189 if (isAlwaysDefd(mce, gOff, gSz)) 6190 continue; 6191 6192 /* This state element is written or modified. So we need to 6193 consider it. If larger than 4 bytes, deal with it in 6194 4-byte chunks. */ 6195 while (True) { 6196 Int b_offset; 6197 tl_assert(gSz >= 0); 6198 if (gSz == 0) break; 6199 n = gSz <= 4 ? gSz : 4; 6200 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 6201 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6202 if (b_offset != -1) { 6203 if (d->guard) { 6204 /* If the guard expression evaluates to false we simply Put 6205 the value that is already stored in the guest state slot */ 6206 IRAtom *cond, *iffalse; 6207 6208 cond = assignNew('B', mce, Ity_I8, 6209 unop(Iop_1Uto8, d->guard)); 6210 iffalse = assignNew('B', mce, Ity_I32, 6211 IRExpr_Get(b_offset + 6212 2*mce->layout->total_sizeB, 6213 Ity_I32)); 6214 curr = assignNew('V', mce, Ity_I32, 6215 IRExpr_Mux0X(cond, iffalse, curr)); 6216 } 6217 stmt( 'B', mce, IRStmt_Put(b_offset 6218 + 2*mce->layout->total_sizeB, 6219 curr )); 6220 } 6221 gSz -= n; 6222 gOff += n; 6223 } 6224 } 6225 } 6226 6227 /* Outputs: memory that we write or modify. Same comments about 6228 endianness as above apply. */ 6229 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 6230 toDo = d->mSize; 6231 /* chew off 32-bit chunks */ 6232 while (toDo >= 4) { 6233 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 6234 d->guard ); 6235 toDo -= 4; 6236 } 6237 /* handle possible 16-bit excess */ 6238 while (toDo >= 2) { 6239 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 6240 d->guard ); 6241 toDo -= 2; 6242 } 6243 /* chew off the remaining 8-bit chunk, if any */ 6244 if (toDo == 1) { 6245 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr, 6246 d->guard ); 6247 toDo -= 1; 6248 } 6249 tl_assert(toDo == 0); 6250 } 6251 } 6252 6253 6254 static void do_origins_Store ( MCEnv* mce, 6255 IREndness stEnd, 6256 IRExpr* stAddr, 6257 IRExpr* stData ) 6258 { 6259 Int dszB; 6260 IRAtom* dataB; 6261 /* assert that the B value for the address is already available 6262 (somewhere), since the call to schemeE will want to see it. 6263 XXXX how does this actually ensure that?? */ 6264 tl_assert(isIRAtom(stAddr)); 6265 tl_assert(isIRAtom(stData)); 6266 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 6267 dataB = schemeE( mce, stData ); 6268 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, 6269 NULL/*guard*/ ); 6270 } 6271 6272 6273 static void schemeS ( MCEnv* mce, IRStmt* st ) 6274 { 6275 tl_assert(MC_(clo_mc_level) == 3); 6276 6277 switch (st->tag) { 6278 6279 case Ist_AbiHint: 6280 /* The value-check instrumenter handles this - by arranging 6281 to pass the address of the next instruction to 6282 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 6283 happen for origin tracking w.r.t. AbiHints. So there is 6284 nothing to do here. */ 6285 break; 6286 6287 case Ist_PutI: { 6288 IRPutI *puti = st->Ist.PutI.details; 6289 IRRegArray* descr_b; 6290 IRAtom *t1, *t2, *t3, *t4; 6291 IRRegArray* descr = puti->descr; 6292 IRType equivIntTy 6293 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 6294 /* If this array is unshadowable for whatever reason, 6295 generate no code. */ 6296 if (equivIntTy == Ity_INVALID) 6297 break; 6298 tl_assert(sizeofIRType(equivIntTy) >= 4); 6299 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 6300 descr_b 6301 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 6302 equivIntTy, descr->nElems ); 6303 /* Compute a value to Put - the conjoinment of the origin for 6304 the data to be Put-ted (obviously) and of the index value 6305 (not so obviously). */ 6306 t1 = schemeE( mce, puti->data ); 6307 t2 = schemeE( mce, puti->ix ); 6308 t3 = gen_maxU32( mce, t1, t2 ); 6309 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 6310 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix, 6311 puti->bias, t4) )); 6312 break; 6313 } 6314 6315 case Ist_Dirty: 6316 do_origins_Dirty( mce, st->Ist.Dirty.details ); 6317 break; 6318 6319 case Ist_Store: 6320 do_origins_Store( mce, st->Ist.Store.end, 6321 st->Ist.Store.addr, 6322 st->Ist.Store.data ); 6323 break; 6324 6325 case Ist_LLSC: { 6326 /* In short: treat a load-linked like a normal load followed 6327 by an assignment of the loaded (shadow) data the result 6328 temporary. Treat a store-conditional like a normal store, 6329 and mark the result temporary as defined. */ 6330 if (st->Ist.LLSC.storedata == NULL) { 6331 /* Load Linked */ 6332 IRType resTy 6333 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 6334 IRExpr* vanillaLoad 6335 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 6336 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 6337 || resTy == Ity_I16 || resTy == Ity_I8); 6338 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 6339 schemeE(mce, vanillaLoad)); 6340 } else { 6341 /* Store conditional */ 6342 do_origins_Store( mce, st->Ist.LLSC.end, 6343 st->Ist.LLSC.addr, 6344 st->Ist.LLSC.storedata ); 6345 /* For the rationale behind this, see comments at the 6346 place where the V-shadow for .result is constructed, in 6347 do_shadow_LLSC. In short, we regard .result as 6348 always-defined. */ 6349 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 6350 mkU32(0) ); 6351 } 6352 break; 6353 } 6354 6355 case Ist_Put: { 6356 Int b_offset 6357 = MC_(get_otrack_shadow_offset)( 6358 st->Ist.Put.offset, 6359 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 6360 ); 6361 if (b_offset >= 0) { 6362 /* FIXME: this isn't an atom! */ 6363 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 6364 schemeE( mce, st->Ist.Put.data )) ); 6365 } 6366 break; 6367 } 6368 6369 case Ist_WrTmp: 6370 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 6371 schemeE(mce, st->Ist.WrTmp.data) ); 6372 break; 6373 6374 case Ist_MBE: 6375 case Ist_NoOp: 6376 case Ist_Exit: 6377 case Ist_IMark: 6378 break; 6379 6380 default: 6381 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 6382 ppIRStmt(st); 6383 VG_(tool_panic)("memcheck:schemeS"); 6384 } 6385 } 6386 6387 6388 /*--------------------------------------------------------------------*/ 6389 /*--- end mc_translate.c ---*/ 6390 /*--------------------------------------------------------------------*/ 6391