1 2 /*--------------------------------------------------------------------*/ 3 /*--- Instrument IR to perform memory checking operations. ---*/ 4 /*--- mc_translate.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2013 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_tool_basics.h" 33 #include "pub_tool_poolalloc.h" // For mc_include.h 34 #include "pub_tool_hashtable.h" // For mc_include.h 35 #include "pub_tool_libcassert.h" 36 #include "pub_tool_libcprint.h" 37 #include "pub_tool_tooliface.h" 38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 39 #include "pub_tool_xarray.h" 40 #include "pub_tool_mallocfree.h" 41 #include "pub_tool_libcbase.h" 42 43 #include "mc_include.h" 44 45 46 /* FIXMEs JRS 2011-June-16. 47 48 Check the interpretation for vector narrowing and widening ops, 49 particularly the saturating ones. I suspect they are either overly 50 pessimistic and/or wrong. 51 */ 52 53 /* This file implements the Memcheck instrumentation, and in 54 particular contains the core of its undefined value detection 55 machinery. For a comprehensive background of the terminology, 56 algorithms and rationale used herein, read: 57 58 Using Valgrind to detect undefined value errors with 59 bit-precision 60 61 Julian Seward and Nicholas Nethercote 62 63 2005 USENIX Annual Technical Conference (General Track), 64 Anaheim, CA, USA, April 10-15, 2005. 65 66 ---- 67 68 Here is as good a place as any to record exactly when V bits are and 69 should be checked, why, and what function is responsible. 70 71 72 Memcheck complains when an undefined value is used: 73 74 1. In the condition of a conditional branch. Because it could cause 75 incorrect control flow, and thus cause incorrect externally-visible 76 behaviour. [mc_translate.c:complainIfUndefined] 77 78 2. As an argument to a system call, or as the value that specifies 79 the system call number. Because it could cause an incorrect 80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 81 82 3. As the address in a load or store. Because it could cause an 83 incorrect value to be used later, which could cause externally-visible 84 behaviour (eg. via incorrect control flow or an incorrect system call 85 argument) [complainIfUndefined] 86 87 4. As the target address of a branch. Because it could cause incorrect 88 control flow. [complainIfUndefined] 89 90 5. As an argument to setenv, unsetenv, or putenv. Because it could put 91 an incorrect value into the external environment. 92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 93 94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 95 [complainIfUndefined] 96 97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 99 requested it. [in memcheck.h] 100 101 102 Memcheck also complains, but should not, when an undefined value is used: 103 104 8. As the shift value in certain SIMD shift operations (but not in the 105 standard integer shift operations). This inconsistency is due to 106 historical reasons.) [complainIfUndefined] 107 108 109 Memcheck does not complain, but should, when an undefined value is used: 110 111 9. As an input to a client request. Because the client request may 112 affect the visible behaviour -- see bug #144362 for an example 113 involving the malloc replacements in vg_replace_malloc.c and 114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 115 isn't identified. That bug report also has some info on how to solve 116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 117 118 119 In practice, 1 and 2 account for the vast majority of cases. 120 */ 121 122 /* Generation of addr-definedness, addr-validity and 123 guard-definedness checks pertaining to loads and stores (Iex_Load, 124 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory 125 loads/stores) was re-checked 11 May 2013. */ 126 127 /*------------------------------------------------------------*/ 128 /*--- Forward decls ---*/ 129 /*------------------------------------------------------------*/ 130 131 struct _MCEnv; 132 133 static IRType shadowTypeV ( IRType ty ); 134 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 135 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 136 137 static IRExpr *i128_const_zero(void); 138 139 /*------------------------------------------------------------*/ 140 /*--- Memcheck running state, and tmp management. ---*/ 141 /*------------------------------------------------------------*/ 142 143 /* Carries info about a particular tmp. The tmp's number is not 144 recorded, as this is implied by (equal to) its index in the tmpMap 145 in MCEnv. The tmp's type is also not recorded, as this is present 146 in MCEnv.sb->tyenv. 147 148 When .kind is Orig, .shadowV and .shadowB may give the identities 149 of the temps currently holding the associated definedness (shadowV) 150 and origin (shadowB) values, or these may be IRTemp_INVALID if code 151 to compute such values has not yet been emitted. 152 153 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 154 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 155 illogical for a shadow tmp itself to be shadowed. 156 */ 157 typedef 158 enum { Orig=1, VSh=2, BSh=3 } 159 TempKind; 160 161 typedef 162 struct { 163 TempKind kind; 164 IRTemp shadowV; 165 IRTemp shadowB; 166 } 167 TempMapEnt; 168 169 170 /* Carries around state during memcheck instrumentation. */ 171 typedef 172 struct _MCEnv { 173 /* MODIFIED: the superblock being constructed. IRStmts are 174 added. */ 175 IRSB* sb; 176 Bool trace; 177 178 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 179 current kind and possibly shadow temps for each temp in the 180 IRSB being constructed. Note that it does not contain the 181 type of each tmp. If you want to know the type, look at the 182 relevant entry in sb->tyenv. It follows that at all times 183 during the instrumentation process, the valid indices for 184 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 185 total number of Orig, V- and B- temps allocated so far. 186 187 The reason for this strange split (types in one place, all 188 other info in another) is that we need the types to be 189 attached to sb so as to make it possible to do 190 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 191 instrumentation process. */ 192 XArray* /* of TempMapEnt */ tmpMap; 193 194 /* MODIFIED: indicates whether "bogus" literals have so far been 195 found. Starts off False, and may change to True. */ 196 Bool bogusLiterals; 197 198 /* READONLY: indicates whether we should use expensive 199 interpretations of integer adds, since unfortunately LLVM 200 uses them to do ORs in some circumstances. Defaulted to True 201 on MacOS and False everywhere else. */ 202 Bool useLLVMworkarounds; 203 204 /* READONLY: the guest layout. This indicates which parts of 205 the guest state should be regarded as 'always defined'. */ 206 VexGuestLayout* layout; 207 208 /* READONLY: the host word type. Needed for constructing 209 arguments of type 'HWord' to be passed to helper functions. 210 Ity_I32 or Ity_I64 only. */ 211 IRType hWordTy; 212 } 213 MCEnv; 214 215 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 216 demand), as they are encountered. This is for two reasons. 217 218 (1) (less important reason): Many original tmps are unused due to 219 initial IR optimisation, and we do not want to spaces in tables 220 tracking them. 221 222 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 223 table indexed [0 .. n_types-1], which gives the current shadow for 224 each original tmp, or INVALID_IRTEMP if none is so far assigned. 225 It is necessary to support making multiple assignments to a shadow 226 -- specifically, after testing a shadow for definedness, it needs 227 to be made defined. But IR's SSA property disallows this. 228 229 (2) (more important reason): Therefore, when a shadow needs to get 230 a new value, a new temporary is created, the value is assigned to 231 that, and the tmpMap is updated to reflect the new binding. 232 233 A corollary is that if the tmpMap maps a given tmp to 234 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 235 there's a read-before-write error in the original tmps. The IR 236 sanity checker should catch all such anomalies, however. 237 */ 238 239 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 240 both the table in mce->sb and to our auxiliary mapping. Note that 241 newTemp may cause mce->tmpMap to resize, hence previous results 242 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 243 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 244 { 245 Word newIx; 246 TempMapEnt ent; 247 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 248 ent.kind = kind; 249 ent.shadowV = IRTemp_INVALID; 250 ent.shadowB = IRTemp_INVALID; 251 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 252 tl_assert(newIx == (Word)tmp); 253 return tmp; 254 } 255 256 257 /* Find the tmp currently shadowing the given original tmp. If none 258 so far exists, allocate one. */ 259 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 260 { 261 TempMapEnt* ent; 262 /* VG_(indexXA) range-checks 'orig', hence no need to check 263 here. */ 264 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 265 tl_assert(ent->kind == Orig); 266 if (ent->shadowV == IRTemp_INVALID) { 267 IRTemp tmpV 268 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 269 /* newTemp may cause mce->tmpMap to resize, hence previous results 270 from VG_(indexXA) are invalid. */ 271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 272 tl_assert(ent->kind == Orig); 273 tl_assert(ent->shadowV == IRTemp_INVALID); 274 ent->shadowV = tmpV; 275 } 276 return ent->shadowV; 277 } 278 279 /* Allocate a new shadow for the given original tmp. This means any 280 previous shadow is abandoned. This is needed because it is 281 necessary to give a new value to a shadow once it has been tested 282 for undefinedness, but unfortunately IR's SSA property disallows 283 this. Instead we must abandon the old shadow, allocate a new one 284 and use that instead. 285 286 This is the same as findShadowTmpV, except we don't bother to see 287 if a shadow temp already existed -- we simply allocate a new one 288 regardless. */ 289 static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 290 { 291 TempMapEnt* ent; 292 /* VG_(indexXA) range-checks 'orig', hence no need to check 293 here. */ 294 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 295 tl_assert(ent->kind == Orig); 296 if (1) { 297 IRTemp tmpV 298 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 299 /* newTemp may cause mce->tmpMap to resize, hence previous results 300 from VG_(indexXA) are invalid. */ 301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 302 tl_assert(ent->kind == Orig); 303 ent->shadowV = tmpV; 304 } 305 } 306 307 308 /*------------------------------------------------------------*/ 309 /*--- IRAtoms -- a subset of IRExprs ---*/ 310 /*------------------------------------------------------------*/ 311 312 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 313 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 314 input, most of this code deals in atoms. Usefully, a value atom 315 always has a V-value which is also an atom: constants are shadowed 316 by constants, and temps are shadowed by the corresponding shadow 317 temporary. */ 318 319 typedef IRExpr IRAtom; 320 321 /* (used for sanity checks only): is this an atom which looks 322 like it's from original code? */ 323 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 324 { 325 if (a1->tag == Iex_Const) 326 return True; 327 if (a1->tag == Iex_RdTmp) { 328 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 329 return ent->kind == Orig; 330 } 331 return False; 332 } 333 334 /* (used for sanity checks only): is this an atom which looks 335 like it's from shadow code? */ 336 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 337 { 338 if (a1->tag == Iex_Const) 339 return True; 340 if (a1->tag == Iex_RdTmp) { 341 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 342 return ent->kind == VSh || ent->kind == BSh; 343 } 344 return False; 345 } 346 347 /* (used for sanity checks only): check that both args are atoms and 348 are identically-kinded. */ 349 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 350 { 351 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 352 return True; 353 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 354 return True; 355 return False; 356 } 357 358 359 /*------------------------------------------------------------*/ 360 /*--- Type management ---*/ 361 /*------------------------------------------------------------*/ 362 363 /* Shadow state is always accessed using integer types. This returns 364 an integer type with the same size (as per sizeofIRType) as the 365 given type. The only valid shadow types are Bit, I8, I16, I32, 366 I64, I128, V128, V256. */ 367 368 static IRType shadowTypeV ( IRType ty ) 369 { 370 switch (ty) { 371 case Ity_I1: 372 case Ity_I8: 373 case Ity_I16: 374 case Ity_I32: 375 case Ity_I64: 376 case Ity_I128: return ty; 377 case Ity_F32: return Ity_I32; 378 case Ity_D32: return Ity_I32; 379 case Ity_F64: return Ity_I64; 380 case Ity_D64: return Ity_I64; 381 case Ity_F128: return Ity_I128; 382 case Ity_D128: return Ity_I128; 383 case Ity_V128: return Ity_V128; 384 case Ity_V256: return Ity_V256; 385 default: ppIRType(ty); 386 VG_(tool_panic)("memcheck:shadowTypeV"); 387 } 388 } 389 390 /* Produce a 'defined' value of the given shadow type. Should only be 391 supplied shadow types (Bit/I8/I16/I32/UI64). */ 392 static IRExpr* definedOfType ( IRType ty ) { 393 switch (ty) { 394 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 395 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 396 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 397 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 398 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 399 case Ity_I128: return i128_const_zero(); 400 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 401 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000)); 402 default: VG_(tool_panic)("memcheck:definedOfType"); 403 } 404 } 405 406 407 /*------------------------------------------------------------*/ 408 /*--- Constructing IR fragments ---*/ 409 /*------------------------------------------------------------*/ 410 411 /* add stmt to a bb */ 412 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 413 if (mce->trace) { 414 VG_(printf)(" %c: ", cat); 415 ppIRStmt(st); 416 VG_(printf)("\n"); 417 } 418 addStmtToIRSB(mce->sb, st); 419 } 420 421 /* assign value to tmp */ 422 static inline 423 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 424 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 425 } 426 427 /* build various kinds of expressions */ 428 #define triop(_op, _arg1, _arg2, _arg3) \ 429 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 430 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 431 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 432 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n)) 433 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 434 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 435 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 436 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 437 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 438 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 439 440 /* Bind the given expression to a new temporary, and return the 441 temporary. This effectively converts an arbitrary expression into 442 an atom. 443 444 'ty' is the type of 'e' and hence the type that the new temporary 445 needs to be. But passing it in is redundant, since we can deduce 446 the type merely by inspecting 'e'. So at least use that fact to 447 assert that the two types agree. */ 448 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 449 { 450 TempKind k; 451 IRTemp t; 452 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 453 454 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 455 switch (cat) { 456 case 'V': k = VSh; break; 457 case 'B': k = BSh; break; 458 case 'C': k = Orig; break; 459 /* happens when we are making up new "orig" 460 expressions, for IRCAS handling */ 461 default: tl_assert(0); 462 } 463 t = newTemp(mce, ty, k); 464 assign(cat, mce, t, e); 465 return mkexpr(t); 466 } 467 468 469 /*------------------------------------------------------------*/ 470 /*--- Helper functions for 128-bit ops ---*/ 471 /*------------------------------------------------------------*/ 472 473 static IRExpr *i128_const_zero(void) 474 { 475 IRAtom* z64 = IRExpr_Const(IRConst_U64(0)); 476 return binop(Iop_64HLto128, z64, z64); 477 } 478 479 /* There are no I128-bit loads and/or stores [as generated by any 480 current front ends]. So we do not need to worry about that in 481 expr2vbits_Load */ 482 483 484 /*------------------------------------------------------------*/ 485 /*--- Constructing definedness primitive ops ---*/ 486 /*------------------------------------------------------------*/ 487 488 /* --------- Defined-if-either-defined --------- */ 489 490 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 491 tl_assert(isShadowAtom(mce,a1)); 492 tl_assert(isShadowAtom(mce,a2)); 493 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 494 } 495 496 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 497 tl_assert(isShadowAtom(mce,a1)); 498 tl_assert(isShadowAtom(mce,a2)); 499 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 500 } 501 502 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 503 tl_assert(isShadowAtom(mce,a1)); 504 tl_assert(isShadowAtom(mce,a2)); 505 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 506 } 507 508 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 509 tl_assert(isShadowAtom(mce,a1)); 510 tl_assert(isShadowAtom(mce,a2)); 511 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 512 } 513 514 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 515 tl_assert(isShadowAtom(mce,a1)); 516 tl_assert(isShadowAtom(mce,a2)); 517 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 518 } 519 520 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 521 tl_assert(isShadowAtom(mce,a1)); 522 tl_assert(isShadowAtom(mce,a2)); 523 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2)); 524 } 525 526 /* --------- Undefined-if-either-undefined --------- */ 527 528 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 529 tl_assert(isShadowAtom(mce,a1)); 530 tl_assert(isShadowAtom(mce,a2)); 531 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 532 } 533 534 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 535 tl_assert(isShadowAtom(mce,a1)); 536 tl_assert(isShadowAtom(mce,a2)); 537 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 538 } 539 540 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 541 tl_assert(isShadowAtom(mce,a1)); 542 tl_assert(isShadowAtom(mce,a2)); 543 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 544 } 545 546 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 547 tl_assert(isShadowAtom(mce,a1)); 548 tl_assert(isShadowAtom(mce,a2)); 549 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 550 } 551 552 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 553 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6; 554 tl_assert(isShadowAtom(mce,a1)); 555 tl_assert(isShadowAtom(mce,a2)); 556 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1)); 557 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1)); 558 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2)); 559 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2)); 560 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3)); 561 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4)); 562 563 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5)); 564 } 565 566 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 567 tl_assert(isShadowAtom(mce,a1)); 568 tl_assert(isShadowAtom(mce,a2)); 569 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 570 } 571 572 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 573 tl_assert(isShadowAtom(mce,a1)); 574 tl_assert(isShadowAtom(mce,a2)); 575 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2)); 576 } 577 578 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 579 switch (vty) { 580 case Ity_I8: return mkUifU8(mce, a1, a2); 581 case Ity_I16: return mkUifU16(mce, a1, a2); 582 case Ity_I32: return mkUifU32(mce, a1, a2); 583 case Ity_I64: return mkUifU64(mce, a1, a2); 584 case Ity_I128: return mkUifU128(mce, a1, a2); 585 case Ity_V128: return mkUifUV128(mce, a1, a2); 586 case Ity_V256: return mkUifUV256(mce, a1, a2); 587 default: 588 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 589 VG_(tool_panic)("memcheck:mkUifU"); 590 } 591 } 592 593 /* --------- The Left-family of operations. --------- */ 594 595 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 596 tl_assert(isShadowAtom(mce,a1)); 597 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 598 } 599 600 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 601 tl_assert(isShadowAtom(mce,a1)); 602 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 603 } 604 605 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 606 tl_assert(isShadowAtom(mce,a1)); 607 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 608 } 609 610 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 611 tl_assert(isShadowAtom(mce,a1)); 612 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 613 } 614 615 /* --------- 'Improvement' functions for AND/OR. --------- */ 616 617 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 618 defined (0); all other -> undefined (1). 619 */ 620 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 621 { 622 tl_assert(isOriginalAtom(mce, data)); 623 tl_assert(isShadowAtom(mce, vbits)); 624 tl_assert(sameKindedAtoms(data, vbits)); 625 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 626 } 627 628 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 629 { 630 tl_assert(isOriginalAtom(mce, data)); 631 tl_assert(isShadowAtom(mce, vbits)); 632 tl_assert(sameKindedAtoms(data, vbits)); 633 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 634 } 635 636 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 637 { 638 tl_assert(isOriginalAtom(mce, data)); 639 tl_assert(isShadowAtom(mce, vbits)); 640 tl_assert(sameKindedAtoms(data, vbits)); 641 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 642 } 643 644 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 645 { 646 tl_assert(isOriginalAtom(mce, data)); 647 tl_assert(isShadowAtom(mce, vbits)); 648 tl_assert(sameKindedAtoms(data, vbits)); 649 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 650 } 651 652 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 653 { 654 tl_assert(isOriginalAtom(mce, data)); 655 tl_assert(isShadowAtom(mce, vbits)); 656 tl_assert(sameKindedAtoms(data, vbits)); 657 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 658 } 659 660 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 661 { 662 tl_assert(isOriginalAtom(mce, data)); 663 tl_assert(isShadowAtom(mce, vbits)); 664 tl_assert(sameKindedAtoms(data, vbits)); 665 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits)); 666 } 667 668 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 669 defined (0); all other -> undefined (1). 670 */ 671 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 672 { 673 tl_assert(isOriginalAtom(mce, data)); 674 tl_assert(isShadowAtom(mce, vbits)); 675 tl_assert(sameKindedAtoms(data, vbits)); 676 return assignNew( 677 'V', mce, Ity_I8, 678 binop(Iop_Or8, 679 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 680 vbits) ); 681 } 682 683 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 684 { 685 tl_assert(isOriginalAtom(mce, data)); 686 tl_assert(isShadowAtom(mce, vbits)); 687 tl_assert(sameKindedAtoms(data, vbits)); 688 return assignNew( 689 'V', mce, Ity_I16, 690 binop(Iop_Or16, 691 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 692 vbits) ); 693 } 694 695 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 696 { 697 tl_assert(isOriginalAtom(mce, data)); 698 tl_assert(isShadowAtom(mce, vbits)); 699 tl_assert(sameKindedAtoms(data, vbits)); 700 return assignNew( 701 'V', mce, Ity_I32, 702 binop(Iop_Or32, 703 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 704 vbits) ); 705 } 706 707 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 708 { 709 tl_assert(isOriginalAtom(mce, data)); 710 tl_assert(isShadowAtom(mce, vbits)); 711 tl_assert(sameKindedAtoms(data, vbits)); 712 return assignNew( 713 'V', mce, Ity_I64, 714 binop(Iop_Or64, 715 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 716 vbits) ); 717 } 718 719 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 720 { 721 tl_assert(isOriginalAtom(mce, data)); 722 tl_assert(isShadowAtom(mce, vbits)); 723 tl_assert(sameKindedAtoms(data, vbits)); 724 return assignNew( 725 'V', mce, Ity_V128, 726 binop(Iop_OrV128, 727 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 728 vbits) ); 729 } 730 731 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 732 { 733 tl_assert(isOriginalAtom(mce, data)); 734 tl_assert(isShadowAtom(mce, vbits)); 735 tl_assert(sameKindedAtoms(data, vbits)); 736 return assignNew( 737 'V', mce, Ity_V256, 738 binop(Iop_OrV256, 739 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)), 740 vbits) ); 741 } 742 743 /* --------- Pessimising casts. --------- */ 744 745 /* The function returns an expression of type DST_TY. If any of the VBITS 746 is undefined (value == 1) the resulting expression has all bits set to 747 1. Otherwise, all bits are 0. */ 748 749 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 750 { 751 IRType src_ty; 752 IRAtom* tmp1; 753 754 /* Note, dst_ty is a shadow type, not an original type. */ 755 tl_assert(isShadowAtom(mce,vbits)); 756 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 757 758 /* Fast-track some common cases */ 759 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 760 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 761 762 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 763 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 764 765 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 766 /* PCast the arg, then clone it. */ 767 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 768 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 769 } 770 771 if (src_ty == Ity_I32 && dst_ty == Ity_V128) { 772 /* PCast the arg, then clone it 4 times. */ 773 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 774 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 775 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); 776 } 777 778 if (src_ty == Ity_I32 && dst_ty == Ity_V256) { 779 /* PCast the arg, then clone it 8 times. */ 780 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 781 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 782 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); 783 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp)); 784 } 785 786 if (src_ty == Ity_I64 && dst_ty == Ity_I32) { 787 /* PCast the arg. This gives all 0s or all 1s. Then throw away 788 the top half. */ 789 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 790 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp)); 791 } 792 793 /* Else do it the slow way .. */ 794 /* First of all, collapse vbits down to a single bit. */ 795 tmp1 = NULL; 796 switch (src_ty) { 797 case Ity_I1: 798 tmp1 = vbits; 799 break; 800 case Ity_I8: 801 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 802 break; 803 case Ity_I16: 804 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 805 break; 806 case Ity_I32: 807 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 808 break; 809 case Ity_I64: 810 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 811 break; 812 case Ity_I128: { 813 /* Gah. Chop it in half, OR the halves together, and compare 814 that with zero. */ 815 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 816 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 817 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 818 tmp1 = assignNew('V', mce, Ity_I1, 819 unop(Iop_CmpNEZ64, tmp4)); 820 break; 821 } 822 default: 823 ppIRType(src_ty); 824 VG_(tool_panic)("mkPCastTo(1)"); 825 } 826 tl_assert(tmp1); 827 /* Now widen up to the dst type. */ 828 switch (dst_ty) { 829 case Ity_I1: 830 return tmp1; 831 case Ity_I8: 832 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 833 case Ity_I16: 834 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 835 case Ity_I32: 836 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 837 case Ity_I64: 838 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 839 case Ity_V128: 840 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 841 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 842 return tmp1; 843 case Ity_I128: 844 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 845 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 846 return tmp1; 847 case Ity_V256: 848 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 849 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, 850 tmp1, tmp1)); 851 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, 852 tmp1, tmp1)); 853 return tmp1; 854 default: 855 ppIRType(dst_ty); 856 VG_(tool_panic)("mkPCastTo(2)"); 857 } 858 } 859 860 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 861 /* 862 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 863 PCasting to Ity_U1. However, sometimes it is necessary to be more 864 accurate. The insight is that the result is defined if two 865 corresponding bits can be found, one from each argument, so that 866 both bits are defined but are different -- that makes EQ say "No" 867 and NE say "Yes". Hence, we compute an improvement term and DifD 868 it onto the "normal" (UifU) result. 869 870 The result is: 871 872 PCastTo<1> ( 873 -- naive version 874 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 875 876 `DifD<sz>` 877 878 -- improvement term 879 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 880 ) 881 882 where 883 vec contains 0 (defined) bits where the corresponding arg bits 884 are defined but different, and 1 bits otherwise. 885 886 vec = Or<sz>( vxx, // 0 iff bit defined 887 vyy, // 0 iff bit defined 888 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 889 ) 890 891 If any bit of vec is 0, the result is defined and so the 892 improvement term should produce 0...0, else it should produce 893 1...1. 894 895 Hence require for the improvement term: 896 897 if vec == 1...1 then 1...1 else 0...0 898 -> 899 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 900 901 This was extensively re-analysed and checked on 6 July 05. 902 */ 903 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 904 IRType ty, 905 IRAtom* vxx, IRAtom* vyy, 906 IRAtom* xx, IRAtom* yy ) 907 { 908 IRAtom *naive, *vec, *improvement_term; 909 IRAtom *improved, *final_cast, *top; 910 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 911 912 tl_assert(isShadowAtom(mce,vxx)); 913 tl_assert(isShadowAtom(mce,vyy)); 914 tl_assert(isOriginalAtom(mce,xx)); 915 tl_assert(isOriginalAtom(mce,yy)); 916 tl_assert(sameKindedAtoms(vxx,xx)); 917 tl_assert(sameKindedAtoms(vyy,yy)); 918 919 switch (ty) { 920 case Ity_I16: 921 opOR = Iop_Or16; 922 opDIFD = Iop_And16; 923 opUIFU = Iop_Or16; 924 opNOT = Iop_Not16; 925 opXOR = Iop_Xor16; 926 opCMP = Iop_CmpEQ16; 927 top = mkU16(0xFFFF); 928 break; 929 case Ity_I32: 930 opOR = Iop_Or32; 931 opDIFD = Iop_And32; 932 opUIFU = Iop_Or32; 933 opNOT = Iop_Not32; 934 opXOR = Iop_Xor32; 935 opCMP = Iop_CmpEQ32; 936 top = mkU32(0xFFFFFFFF); 937 break; 938 case Ity_I64: 939 opOR = Iop_Or64; 940 opDIFD = Iop_And64; 941 opUIFU = Iop_Or64; 942 opNOT = Iop_Not64; 943 opXOR = Iop_Xor64; 944 opCMP = Iop_CmpEQ64; 945 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 946 break; 947 default: 948 VG_(tool_panic)("expensiveCmpEQorNE"); 949 } 950 951 naive 952 = mkPCastTo(mce,ty, 953 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 954 955 vec 956 = assignNew( 957 'V', mce,ty, 958 binop( opOR, 959 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 960 assignNew( 961 'V', mce,ty, 962 unop( opNOT, 963 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 964 965 improvement_term 966 = mkPCastTo( mce,ty, 967 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 968 969 improved 970 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 971 972 final_cast 973 = mkPCastTo( mce, Ity_I1, improved ); 974 975 return final_cast; 976 } 977 978 979 /* --------- Semi-accurate interpretation of CmpORD. --------- */ 980 981 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 982 983 CmpORD32S(x,y) = 1<<3 if x <s y 984 = 1<<2 if x >s y 985 = 1<<1 if x == y 986 987 and similarly the unsigned variant. The default interpretation is: 988 989 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 990 & (7<<1) 991 992 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 993 are zero and therefore defined (viz, zero). 994 995 Also deal with a special case better: 996 997 CmpORD32S(x,0) 998 999 Here, bit 3 (LT) of the result is a copy of the top bit of x and 1000 will be defined even if the rest of x isn't. In which case we do: 1001 1002 CmpORD32S#(x,x#,0,{impliedly 0}#) 1003 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 1004 | (x# >>u 31) << 3 -- LT# = x#[31] 1005 1006 Analogous handling for CmpORD64{S,U}. 1007 */ 1008 static Bool isZeroU32 ( IRAtom* e ) 1009 { 1010 return 1011 toBool( e->tag == Iex_Const 1012 && e->Iex.Const.con->tag == Ico_U32 1013 && e->Iex.Const.con->Ico.U32 == 0 ); 1014 } 1015 1016 static Bool isZeroU64 ( IRAtom* e ) 1017 { 1018 return 1019 toBool( e->tag == Iex_Const 1020 && e->Iex.Const.con->tag == Ico_U64 1021 && e->Iex.Const.con->Ico.U64 == 0 ); 1022 } 1023 1024 static IRAtom* doCmpORD ( MCEnv* mce, 1025 IROp cmp_op, 1026 IRAtom* xxhash, IRAtom* yyhash, 1027 IRAtom* xx, IRAtom* yy ) 1028 { 1029 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 1030 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 1031 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 1032 IROp opAND = m64 ? Iop_And64 : Iop_And32; 1033 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 1034 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 1035 IRType ty = m64 ? Ity_I64 : Ity_I32; 1036 Int width = m64 ? 64 : 32; 1037 1038 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 1039 1040 IRAtom* threeLeft1 = NULL; 1041 IRAtom* sevenLeft1 = NULL; 1042 1043 tl_assert(isShadowAtom(mce,xxhash)); 1044 tl_assert(isShadowAtom(mce,yyhash)); 1045 tl_assert(isOriginalAtom(mce,xx)); 1046 tl_assert(isOriginalAtom(mce,yy)); 1047 tl_assert(sameKindedAtoms(xxhash,xx)); 1048 tl_assert(sameKindedAtoms(yyhash,yy)); 1049 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 1050 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 1051 1052 if (0) { 1053 ppIROp(cmp_op); VG_(printf)(" "); 1054 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 1055 } 1056 1057 if (syned && isZero(yy)) { 1058 /* fancy interpretation */ 1059 /* if yy is zero, then it must be fully defined (zero#). */ 1060 tl_assert(isZero(yyhash)); 1061 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 1062 return 1063 binop( 1064 opOR, 1065 assignNew( 1066 'V', mce,ty, 1067 binop( 1068 opAND, 1069 mkPCastTo(mce,ty, xxhash), 1070 threeLeft1 1071 )), 1072 assignNew( 1073 'V', mce,ty, 1074 binop( 1075 opSHL, 1076 assignNew( 1077 'V', mce,ty, 1078 binop(opSHR, xxhash, mkU8(width-1))), 1079 mkU8(3) 1080 )) 1081 ); 1082 } else { 1083 /* standard interpretation */ 1084 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 1085 return 1086 binop( 1087 opAND, 1088 mkPCastTo( mce,ty, 1089 mkUifU(mce,ty, xxhash,yyhash)), 1090 sevenLeft1 1091 ); 1092 } 1093 } 1094 1095 1096 /*------------------------------------------------------------*/ 1097 /*--- Emit a test and complaint if something is undefined. ---*/ 1098 /*------------------------------------------------------------*/ 1099 1100 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 1101 1102 1103 /* Set the annotations on a dirty helper to indicate that the stack 1104 pointer and instruction pointers might be read. This is the 1105 behaviour of all 'emit-a-complaint' style functions we might 1106 call. */ 1107 1108 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1109 di->nFxState = 2; 1110 di->fxState[0].fx = Ifx_Read; 1111 di->fxState[0].offset = mce->layout->offset_SP; 1112 di->fxState[0].size = mce->layout->sizeof_SP; 1113 di->fxState[0].nRepeats = 0; 1114 di->fxState[0].repeatLen = 0; 1115 di->fxState[1].fx = Ifx_Read; 1116 di->fxState[1].offset = mce->layout->offset_IP; 1117 di->fxState[1].size = mce->layout->sizeof_IP; 1118 di->fxState[1].nRepeats = 0; 1119 di->fxState[1].repeatLen = 0; 1120 } 1121 1122 1123 /* Check the supplied *original* |atom| for undefinedness, and emit a 1124 complaint if so. Once that happens, mark it as defined. This is 1125 possible because the atom is either a tmp or literal. If it's a 1126 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1127 be defined. In fact as mentioned above, we will have to allocate a 1128 new tmp to carry the new 'defined' shadow value, and update the 1129 original->tmp mapping accordingly; we cannot simply assign a new 1130 value to an existing shadow tmp as this breaks SSAness. 1131 1132 The checks are performed, any resulting complaint emitted, and 1133 |atom|'s shadow temp set to 'defined', ONLY in the case that 1134 |guard| evaluates to True at run-time. If it evaluates to False 1135 then no action is performed. If |guard| is NULL (the usual case) 1136 then it is assumed to be always-true, and hence these actions are 1137 performed unconditionally. 1138 1139 This routine does not generate code to check the definedness of 1140 |guard|. The caller is assumed to have taken care of that already. 1141 */ 1142 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) 1143 { 1144 IRAtom* vatom; 1145 IRType ty; 1146 Int sz; 1147 IRDirty* di; 1148 IRAtom* cond; 1149 IRAtom* origin; 1150 void* fn; 1151 const HChar* nm; 1152 IRExpr** args; 1153 Int nargs; 1154 1155 // Don't do V bit tests if we're not reporting undefined value errors. 1156 if (MC_(clo_mc_level) == 1) 1157 return; 1158 1159 if (guard) 1160 tl_assert(isOriginalAtom(mce, guard)); 1161 1162 /* Since the original expression is atomic, there's no duplicated 1163 work generated by making multiple V-expressions for it. So we 1164 don't really care about the possibility that someone else may 1165 also create a V-interpretion for it. */ 1166 tl_assert(isOriginalAtom(mce, atom)); 1167 vatom = expr2vbits( mce, atom ); 1168 tl_assert(isShadowAtom(mce, vatom)); 1169 tl_assert(sameKindedAtoms(atom, vatom)); 1170 1171 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1172 1173 /* sz is only used for constructing the error message */ 1174 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1175 1176 cond = mkPCastTo( mce, Ity_I1, vatom ); 1177 /* cond will be 0 if all defined, and 1 if any not defined. */ 1178 1179 /* Get the origin info for the value we are about to check. At 1180 least, if we are doing origin tracking. If not, use a dummy 1181 zero origin. */ 1182 if (MC_(clo_mc_level) == 3) { 1183 origin = schemeE( mce, atom ); 1184 if (mce->hWordTy == Ity_I64) { 1185 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1186 } 1187 } else { 1188 origin = NULL; 1189 } 1190 1191 fn = NULL; 1192 nm = NULL; 1193 args = NULL; 1194 nargs = -1; 1195 1196 switch (sz) { 1197 case 0: 1198 if (origin) { 1199 fn = &MC_(helperc_value_check0_fail_w_o); 1200 nm = "MC_(helperc_value_check0_fail_w_o)"; 1201 args = mkIRExprVec_1(origin); 1202 nargs = 1; 1203 } else { 1204 fn = &MC_(helperc_value_check0_fail_no_o); 1205 nm = "MC_(helperc_value_check0_fail_no_o)"; 1206 args = mkIRExprVec_0(); 1207 nargs = 0; 1208 } 1209 break; 1210 case 1: 1211 if (origin) { 1212 fn = &MC_(helperc_value_check1_fail_w_o); 1213 nm = "MC_(helperc_value_check1_fail_w_o)"; 1214 args = mkIRExprVec_1(origin); 1215 nargs = 1; 1216 } else { 1217 fn = &MC_(helperc_value_check1_fail_no_o); 1218 nm = "MC_(helperc_value_check1_fail_no_o)"; 1219 args = mkIRExprVec_0(); 1220 nargs = 0; 1221 } 1222 break; 1223 case 4: 1224 if (origin) { 1225 fn = &MC_(helperc_value_check4_fail_w_o); 1226 nm = "MC_(helperc_value_check4_fail_w_o)"; 1227 args = mkIRExprVec_1(origin); 1228 nargs = 1; 1229 } else { 1230 fn = &MC_(helperc_value_check4_fail_no_o); 1231 nm = "MC_(helperc_value_check4_fail_no_o)"; 1232 args = mkIRExprVec_0(); 1233 nargs = 0; 1234 } 1235 break; 1236 case 8: 1237 if (origin) { 1238 fn = &MC_(helperc_value_check8_fail_w_o); 1239 nm = "MC_(helperc_value_check8_fail_w_o)"; 1240 args = mkIRExprVec_1(origin); 1241 nargs = 1; 1242 } else { 1243 fn = &MC_(helperc_value_check8_fail_no_o); 1244 nm = "MC_(helperc_value_check8_fail_no_o)"; 1245 args = mkIRExprVec_0(); 1246 nargs = 0; 1247 } 1248 break; 1249 case 2: 1250 case 16: 1251 if (origin) { 1252 fn = &MC_(helperc_value_checkN_fail_w_o); 1253 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1254 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1255 nargs = 2; 1256 } else { 1257 fn = &MC_(helperc_value_checkN_fail_no_o); 1258 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1259 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1260 nargs = 1; 1261 } 1262 break; 1263 default: 1264 VG_(tool_panic)("unexpected szB"); 1265 } 1266 1267 tl_assert(fn); 1268 tl_assert(nm); 1269 tl_assert(args); 1270 tl_assert(nargs >= 0 && nargs <= 2); 1271 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1272 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1273 1274 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1275 VG_(fnptr_to_fnentry)( fn ), args ); 1276 di->guard = cond; // and cond is PCast-to-1(atom#) 1277 1278 /* If the complaint is to be issued under a guard condition, AND 1279 that into the guard condition for the helper call. */ 1280 if (guard) { 1281 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard)); 1282 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard)); 1283 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2)); 1284 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e)); 1285 } 1286 1287 setHelperAnns( mce, di ); 1288 stmt( 'V', mce, IRStmt_Dirty(di)); 1289 1290 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be 1291 defined -- but only in the case where the guard evaluates to 1292 True at run-time. Do the update by setting the orig->shadow 1293 mapping for tmp to reflect the fact that this shadow is getting 1294 a new value. */ 1295 tl_assert(isIRAtom(vatom)); 1296 /* sameKindedAtoms ... */ 1297 if (vatom->tag == Iex_RdTmp) { 1298 tl_assert(atom->tag == Iex_RdTmp); 1299 if (guard == NULL) { 1300 // guard is 'always True', hence update unconditionally 1301 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1302 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1303 definedOfType(ty)); 1304 } else { 1305 // update the temp only conditionally. Do this by copying 1306 // its old value when the guard is False. 1307 // The old value .. 1308 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1309 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1310 IRAtom* new_tmpV 1311 = assignNew('V', mce, shadowTypeV(ty), 1312 IRExpr_ITE(guard, definedOfType(ty), 1313 mkexpr(old_tmpV))); 1314 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV); 1315 } 1316 } 1317 } 1318 1319 1320 /*------------------------------------------------------------*/ 1321 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1322 /*------------------------------------------------------------*/ 1323 1324 /* Examine the always-defined sections declared in layout to see if 1325 the (offset,size) section is within one. Note, is is an error to 1326 partially fall into such a region: (offset,size) should either be 1327 completely in such a region or completely not-in such a region. 1328 */ 1329 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1330 { 1331 Int minoffD, maxoffD, i; 1332 Int minoff = offset; 1333 Int maxoff = minoff + size - 1; 1334 tl_assert((minoff & ~0xFFFF) == 0); 1335 tl_assert((maxoff & ~0xFFFF) == 0); 1336 1337 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1338 minoffD = mce->layout->alwaysDefd[i].offset; 1339 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1340 tl_assert((minoffD & ~0xFFFF) == 0); 1341 tl_assert((maxoffD & ~0xFFFF) == 0); 1342 1343 if (maxoff < minoffD || maxoffD < minoff) 1344 continue; /* no overlap */ 1345 if (minoff >= minoffD && maxoff <= maxoffD) 1346 return True; /* completely contained in an always-defd section */ 1347 1348 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1349 } 1350 return False; /* could not find any containing section */ 1351 } 1352 1353 1354 /* Generate into bb suitable actions to shadow this Put. If the state 1355 slice is marked 'always defined', do nothing. Otherwise, write the 1356 supplied V bits to the shadow state. We can pass in either an 1357 original atom or a V-atom, but not both. In the former case the 1358 relevant V-bits are then generated from the original. 1359 We assume here, that the definedness of GUARD has already been checked. 1360 */ 1361 static 1362 void do_shadow_PUT ( MCEnv* mce, Int offset, 1363 IRAtom* atom, IRAtom* vatom, IRExpr *guard ) 1364 { 1365 IRType ty; 1366 1367 // Don't do shadow PUTs if we're not doing undefined value checking. 1368 // Their absence lets Vex's optimiser remove all the shadow computation 1369 // that they depend on, which includes GETs of the shadow registers. 1370 if (MC_(clo_mc_level) == 1) 1371 return; 1372 1373 if (atom) { 1374 tl_assert(!vatom); 1375 tl_assert(isOriginalAtom(mce, atom)); 1376 vatom = expr2vbits( mce, atom ); 1377 } else { 1378 tl_assert(vatom); 1379 tl_assert(isShadowAtom(mce, vatom)); 1380 } 1381 1382 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1383 tl_assert(ty != Ity_I1); 1384 tl_assert(ty != Ity_I128); 1385 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1386 /* later: no ... */ 1387 /* emit code to emit a complaint if any of the vbits are 1. */ 1388 /* complainIfUndefined(mce, atom); */ 1389 } else { 1390 /* Do a plain shadow Put. */ 1391 if (guard) { 1392 /* If the guard expression evaluates to false we simply Put the value 1393 that is already stored in the guest state slot */ 1394 IRAtom *cond, *iffalse; 1395 1396 cond = assignNew('V', mce, Ity_I1, guard); 1397 iffalse = assignNew('V', mce, ty, 1398 IRExpr_Get(offset + mce->layout->total_sizeB, ty)); 1399 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse)); 1400 } 1401 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom )); 1402 } 1403 } 1404 1405 1406 /* Return an expression which contains the V bits corresponding to the 1407 given GETI (passed in in pieces). 1408 */ 1409 static 1410 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti) 1411 { 1412 IRAtom* vatom; 1413 IRType ty, tyS; 1414 Int arrSize;; 1415 IRRegArray* descr = puti->descr; 1416 IRAtom* ix = puti->ix; 1417 Int bias = puti->bias; 1418 IRAtom* atom = puti->data; 1419 1420 // Don't do shadow PUTIs if we're not doing undefined value checking. 1421 // Their absence lets Vex's optimiser remove all the shadow computation 1422 // that they depend on, which includes GETIs of the shadow registers. 1423 if (MC_(clo_mc_level) == 1) 1424 return; 1425 1426 tl_assert(isOriginalAtom(mce,atom)); 1427 vatom = expr2vbits( mce, atom ); 1428 tl_assert(sameKindedAtoms(atom, vatom)); 1429 ty = descr->elemTy; 1430 tyS = shadowTypeV(ty); 1431 arrSize = descr->nElems * sizeofIRType(ty); 1432 tl_assert(ty != Ity_I1); 1433 tl_assert(isOriginalAtom(mce,ix)); 1434 complainIfUndefined(mce, ix, NULL); 1435 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1436 /* later: no ... */ 1437 /* emit code to emit a complaint if any of the vbits are 1. */ 1438 /* complainIfUndefined(mce, atom); */ 1439 } else { 1440 /* Do a cloned version of the Put that refers to the shadow 1441 area. */ 1442 IRRegArray* new_descr 1443 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1444 tyS, descr->nElems); 1445 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) )); 1446 } 1447 } 1448 1449 1450 /* Return an expression which contains the V bits corresponding to the 1451 given GET (passed in in pieces). 1452 */ 1453 static 1454 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1455 { 1456 IRType tyS = shadowTypeV(ty); 1457 tl_assert(ty != Ity_I1); 1458 tl_assert(ty != Ity_I128); 1459 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1460 /* Always defined, return all zeroes of the relevant type */ 1461 return definedOfType(tyS); 1462 } else { 1463 /* return a cloned version of the Get that refers to the shadow 1464 area. */ 1465 /* FIXME: this isn't an atom! */ 1466 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1467 } 1468 } 1469 1470 1471 /* Return an expression which contains the V bits corresponding to the 1472 given GETI (passed in in pieces). 1473 */ 1474 static 1475 IRExpr* shadow_GETI ( MCEnv* mce, 1476 IRRegArray* descr, IRAtom* ix, Int bias ) 1477 { 1478 IRType ty = descr->elemTy; 1479 IRType tyS = shadowTypeV(ty); 1480 Int arrSize = descr->nElems * sizeofIRType(ty); 1481 tl_assert(ty != Ity_I1); 1482 tl_assert(isOriginalAtom(mce,ix)); 1483 complainIfUndefined(mce, ix, NULL); 1484 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1485 /* Always defined, return all zeroes of the relevant type */ 1486 return definedOfType(tyS); 1487 } else { 1488 /* return a cloned version of the Get that refers to the shadow 1489 area. */ 1490 IRRegArray* new_descr 1491 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1492 tyS, descr->nElems); 1493 return IRExpr_GetI( new_descr, ix, bias ); 1494 } 1495 } 1496 1497 1498 /*------------------------------------------------------------*/ 1499 /*--- Generating approximations for unknown operations, ---*/ 1500 /*--- using lazy-propagate semantics ---*/ 1501 /*------------------------------------------------------------*/ 1502 1503 /* Lazy propagation of undefinedness from two values, resulting in the 1504 specified shadow type. 1505 */ 1506 static 1507 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1508 { 1509 IRAtom* at; 1510 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1511 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1512 tl_assert(isShadowAtom(mce,va1)); 1513 tl_assert(isShadowAtom(mce,va2)); 1514 1515 /* The general case is inefficient because PCast is an expensive 1516 operation. Here are some special cases which use PCast only 1517 once rather than twice. */ 1518 1519 /* I64 x I64 -> I64 */ 1520 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1521 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1522 at = mkUifU(mce, Ity_I64, va1, va2); 1523 at = mkPCastTo(mce, Ity_I64, at); 1524 return at; 1525 } 1526 1527 /* I64 x I64 -> I32 */ 1528 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1529 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1530 at = mkUifU(mce, Ity_I64, va1, va2); 1531 at = mkPCastTo(mce, Ity_I32, at); 1532 return at; 1533 } 1534 1535 if (0) { 1536 VG_(printf)("mkLazy2 "); 1537 ppIRType(t1); 1538 VG_(printf)("_"); 1539 ppIRType(t2); 1540 VG_(printf)("_"); 1541 ppIRType(finalVty); 1542 VG_(printf)("\n"); 1543 } 1544 1545 /* General case: force everything via 32-bit intermediaries. */ 1546 at = mkPCastTo(mce, Ity_I32, va1); 1547 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1548 at = mkPCastTo(mce, finalVty, at); 1549 return at; 1550 } 1551 1552 1553 /* 3-arg version of the above. */ 1554 static 1555 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1556 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1557 { 1558 IRAtom* at; 1559 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1560 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1561 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1562 tl_assert(isShadowAtom(mce,va1)); 1563 tl_assert(isShadowAtom(mce,va2)); 1564 tl_assert(isShadowAtom(mce,va3)); 1565 1566 /* The general case is inefficient because PCast is an expensive 1567 operation. Here are some special cases which use PCast only 1568 twice rather than three times. */ 1569 1570 /* I32 x I64 x I64 -> I64 */ 1571 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1572 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1573 && finalVty == Ity_I64) { 1574 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1575 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1576 mode indication which is fully defined, this should get 1577 folded out later. */ 1578 at = mkPCastTo(mce, Ity_I64, va1); 1579 /* Now fold in 2nd and 3rd args. */ 1580 at = mkUifU(mce, Ity_I64, at, va2); 1581 at = mkUifU(mce, Ity_I64, at, va3); 1582 /* and PCast once again. */ 1583 at = mkPCastTo(mce, Ity_I64, at); 1584 return at; 1585 } 1586 1587 /* I32 x I8 x I64 -> I64 */ 1588 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64 1589 && finalVty == Ity_I64) { 1590 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n"); 1591 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a 1592 * rounding mode indication which is fully defined, this should 1593 * get folded out later. 1594 */ 1595 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1); 1596 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2); 1597 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2)) 1598 at = mkUifU(mce, Ity_I64, at, va3); 1599 /* and PCast once again. */ 1600 at = mkPCastTo(mce, Ity_I64, at); 1601 return at; 1602 } 1603 1604 /* I32 x I64 x I64 -> I32 */ 1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1606 && finalVty == Ity_I32) { 1607 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1608 at = mkPCastTo(mce, Ity_I64, va1); 1609 at = mkUifU(mce, Ity_I64, at, va2); 1610 at = mkUifU(mce, Ity_I64, at, va3); 1611 at = mkPCastTo(mce, Ity_I32, at); 1612 return at; 1613 } 1614 1615 /* I32 x I32 x I32 -> I32 */ 1616 /* 32-bit FP idiom, as (eg) happens on ARM */ 1617 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1618 && finalVty == Ity_I32) { 1619 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1620 at = va1; 1621 at = mkUifU(mce, Ity_I32, at, va2); 1622 at = mkUifU(mce, Ity_I32, at, va3); 1623 at = mkPCastTo(mce, Ity_I32, at); 1624 return at; 1625 } 1626 1627 /* I32 x I128 x I128 -> I128 */ 1628 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1629 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 1630 && finalVty == Ity_I128) { 1631 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n"); 1632 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1633 mode indication which is fully defined, this should get 1634 folded out later. */ 1635 at = mkPCastTo(mce, Ity_I128, va1); 1636 /* Now fold in 2nd and 3rd args. */ 1637 at = mkUifU(mce, Ity_I128, at, va2); 1638 at = mkUifU(mce, Ity_I128, at, va3); 1639 /* and PCast once again. */ 1640 at = mkPCastTo(mce, Ity_I128, at); 1641 return at; 1642 } 1643 1644 /* I32 x I8 x I128 -> I128 */ 1645 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1646 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128 1647 && finalVty == Ity_I128) { 1648 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n"); 1649 /* Use I64 as an intermediate type, which means PCasting all 3 1650 args to I64 to start with. 1st arg is typically a rounding 1651 mode indication which is fully defined, so we hope that it 1652 will get folded out later. */ 1653 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1); 1654 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2); 1655 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3); 1656 /* Now UifU all three together. */ 1657 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2)) 1658 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3) 1659 /* and PCast once again. */ 1660 at = mkPCastTo(mce, Ity_I128, at); 1661 return at; 1662 } 1663 if (1) { 1664 VG_(printf)("mkLazy3: "); 1665 ppIRType(t1); 1666 VG_(printf)(" x "); 1667 ppIRType(t2); 1668 VG_(printf)(" x "); 1669 ppIRType(t3); 1670 VG_(printf)(" -> "); 1671 ppIRType(finalVty); 1672 VG_(printf)("\n"); 1673 } 1674 1675 tl_assert(0); 1676 /* General case: force everything via 32-bit intermediaries. */ 1677 /* 1678 at = mkPCastTo(mce, Ity_I32, va1); 1679 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1680 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1681 at = mkPCastTo(mce, finalVty, at); 1682 return at; 1683 */ 1684 } 1685 1686 1687 /* 4-arg version of the above. */ 1688 static 1689 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1690 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1691 { 1692 IRAtom* at; 1693 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1694 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1695 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1696 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1697 tl_assert(isShadowAtom(mce,va1)); 1698 tl_assert(isShadowAtom(mce,va2)); 1699 tl_assert(isShadowAtom(mce,va3)); 1700 tl_assert(isShadowAtom(mce,va4)); 1701 1702 /* The general case is inefficient because PCast is an expensive 1703 operation. Here are some special cases which use PCast only 1704 twice rather than three times. */ 1705 1706 /* I32 x I64 x I64 x I64 -> I64 */ 1707 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1708 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1709 && finalVty == Ity_I64) { 1710 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1711 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1712 mode indication which is fully defined, this should get 1713 folded out later. */ 1714 at = mkPCastTo(mce, Ity_I64, va1); 1715 /* Now fold in 2nd, 3rd, 4th args. */ 1716 at = mkUifU(mce, Ity_I64, at, va2); 1717 at = mkUifU(mce, Ity_I64, at, va3); 1718 at = mkUifU(mce, Ity_I64, at, va4); 1719 /* and PCast once again. */ 1720 at = mkPCastTo(mce, Ity_I64, at); 1721 return at; 1722 } 1723 /* I32 x I32 x I32 x I32 -> I32 */ 1724 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1725 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32 1726 && finalVty == Ity_I32) { 1727 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n"); 1728 at = va1; 1729 /* Now fold in 2nd, 3rd, 4th args. */ 1730 at = mkUifU(mce, Ity_I32, at, va2); 1731 at = mkUifU(mce, Ity_I32, at, va3); 1732 at = mkUifU(mce, Ity_I32, at, va4); 1733 at = mkPCastTo(mce, Ity_I32, at); 1734 return at; 1735 } 1736 1737 if (1) { 1738 VG_(printf)("mkLazy4: "); 1739 ppIRType(t1); 1740 VG_(printf)(" x "); 1741 ppIRType(t2); 1742 VG_(printf)(" x "); 1743 ppIRType(t3); 1744 VG_(printf)(" x "); 1745 ppIRType(t4); 1746 VG_(printf)(" -> "); 1747 ppIRType(finalVty); 1748 VG_(printf)("\n"); 1749 } 1750 1751 tl_assert(0); 1752 } 1753 1754 1755 /* Do the lazy propagation game from a null-terminated vector of 1756 atoms. This is presumably the arguments to a helper call, so the 1757 IRCallee info is also supplied in order that we can know which 1758 arguments should be ignored (via the .mcx_mask field). 1759 */ 1760 static 1761 IRAtom* mkLazyN ( MCEnv* mce, 1762 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1763 { 1764 Int i; 1765 IRAtom* here; 1766 IRAtom* curr; 1767 IRType mergeTy; 1768 Bool mergeTy64 = True; 1769 1770 /* Decide on the type of the merge intermediary. If all relevant 1771 args are I64, then it's I64. In all other circumstances, use 1772 I32. */ 1773 for (i = 0; exprvec[i]; i++) { 1774 tl_assert(i < 32); 1775 tl_assert(isOriginalAtom(mce, exprvec[i])); 1776 if (cee->mcx_mask & (1<<i)) 1777 continue; 1778 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1779 mergeTy64 = False; 1780 } 1781 1782 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1783 curr = definedOfType(mergeTy); 1784 1785 for (i = 0; exprvec[i]; i++) { 1786 tl_assert(i < 32); 1787 tl_assert(isOriginalAtom(mce, exprvec[i])); 1788 /* Only take notice of this arg if the callee's mc-exclusion 1789 mask does not say it is to be excluded. */ 1790 if (cee->mcx_mask & (1<<i)) { 1791 /* the arg is to be excluded from definedness checking. Do 1792 nothing. */ 1793 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1794 } else { 1795 /* calculate the arg's definedness, and pessimistically merge 1796 it in. */ 1797 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1798 curr = mergeTy64 1799 ? mkUifU64(mce, here, curr) 1800 : mkUifU32(mce, here, curr); 1801 } 1802 } 1803 return mkPCastTo(mce, finalVtype, curr ); 1804 } 1805 1806 1807 /*------------------------------------------------------------*/ 1808 /*--- Generating expensive sequences for exact carry-chain ---*/ 1809 /*--- propagation in add/sub and related operations. ---*/ 1810 /*------------------------------------------------------------*/ 1811 1812 static 1813 IRAtom* expensiveAddSub ( MCEnv* mce, 1814 Bool add, 1815 IRType ty, 1816 IRAtom* qaa, IRAtom* qbb, 1817 IRAtom* aa, IRAtom* bb ) 1818 { 1819 IRAtom *a_min, *b_min, *a_max, *b_max; 1820 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1821 1822 tl_assert(isShadowAtom(mce,qaa)); 1823 tl_assert(isShadowAtom(mce,qbb)); 1824 tl_assert(isOriginalAtom(mce,aa)); 1825 tl_assert(isOriginalAtom(mce,bb)); 1826 tl_assert(sameKindedAtoms(qaa,aa)); 1827 tl_assert(sameKindedAtoms(qbb,bb)); 1828 1829 switch (ty) { 1830 case Ity_I32: 1831 opAND = Iop_And32; 1832 opOR = Iop_Or32; 1833 opXOR = Iop_Xor32; 1834 opNOT = Iop_Not32; 1835 opADD = Iop_Add32; 1836 opSUB = Iop_Sub32; 1837 break; 1838 case Ity_I64: 1839 opAND = Iop_And64; 1840 opOR = Iop_Or64; 1841 opXOR = Iop_Xor64; 1842 opNOT = Iop_Not64; 1843 opADD = Iop_Add64; 1844 opSUB = Iop_Sub64; 1845 break; 1846 default: 1847 VG_(tool_panic)("expensiveAddSub"); 1848 } 1849 1850 // a_min = aa & ~qaa 1851 a_min = assignNew('V', mce,ty, 1852 binop(opAND, aa, 1853 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1854 1855 // b_min = bb & ~qbb 1856 b_min = assignNew('V', mce,ty, 1857 binop(opAND, bb, 1858 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1859 1860 // a_max = aa | qaa 1861 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1862 1863 // b_max = bb | qbb 1864 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1865 1866 if (add) { 1867 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1868 return 1869 assignNew('V', mce,ty, 1870 binop( opOR, 1871 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1872 assignNew('V', mce,ty, 1873 binop( opXOR, 1874 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1875 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1876 ) 1877 ) 1878 ) 1879 ); 1880 } else { 1881 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1882 return 1883 assignNew('V', mce,ty, 1884 binop( opOR, 1885 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1886 assignNew('V', mce,ty, 1887 binop( opXOR, 1888 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1889 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1890 ) 1891 ) 1892 ) 1893 ); 1894 } 1895 1896 } 1897 1898 1899 static 1900 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop, 1901 IRAtom* atom, IRAtom* vatom ) 1902 { 1903 IRType ty; 1904 IROp xorOp, subOp, andOp; 1905 IRExpr *one; 1906 IRAtom *improver, *improved; 1907 tl_assert(isShadowAtom(mce,vatom)); 1908 tl_assert(isOriginalAtom(mce,atom)); 1909 tl_assert(sameKindedAtoms(atom,vatom)); 1910 1911 switch (czop) { 1912 case Iop_Ctz32: 1913 ty = Ity_I32; 1914 xorOp = Iop_Xor32; 1915 subOp = Iop_Sub32; 1916 andOp = Iop_And32; 1917 one = mkU32(1); 1918 break; 1919 case Iop_Ctz64: 1920 ty = Ity_I64; 1921 xorOp = Iop_Xor64; 1922 subOp = Iop_Sub64; 1923 andOp = Iop_And64; 1924 one = mkU64(1); 1925 break; 1926 default: 1927 ppIROp(czop); 1928 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes"); 1929 } 1930 1931 // improver = atom ^ (atom - 1) 1932 // 1933 // That is, improver has its low ctz(atom) bits equal to one; 1934 // higher bits (if any) equal to zero. 1935 improver = assignNew('V', mce,ty, 1936 binop(xorOp, 1937 atom, 1938 assignNew('V', mce, ty, 1939 binop(subOp, atom, one)))); 1940 1941 // improved = vatom & improver 1942 // 1943 // That is, treat any V bits above the first ctz(atom) bits as 1944 // "defined". 1945 improved = assignNew('V', mce, ty, 1946 binop(andOp, vatom, improver)); 1947 1948 // Return pessimizing cast of improved. 1949 return mkPCastTo(mce, ty, improved); 1950 } 1951 1952 1953 /*------------------------------------------------------------*/ 1954 /*--- Scalar shifts. ---*/ 1955 /*------------------------------------------------------------*/ 1956 1957 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 1958 idea is to shift the definedness bits by the original shift amount. 1959 This introduces 0s ("defined") in new positions for left shifts and 1960 unsigned right shifts, and copies the top definedness bit for 1961 signed right shifts. So, conveniently, applying the original shift 1962 operator to the definedness bits for the left arg is exactly the 1963 right thing to do: 1964 1965 (qaa << bb) 1966 1967 However if the shift amount is undefined then the whole result 1968 is undefined. Hence need: 1969 1970 (qaa << bb) `UifU` PCast(qbb) 1971 1972 If the shift amount bb is a literal than qbb will say 'all defined' 1973 and the UifU and PCast will get folded out by post-instrumentation 1974 optimisation. 1975 */ 1976 static IRAtom* scalarShift ( MCEnv* mce, 1977 IRType ty, 1978 IROp original_op, 1979 IRAtom* qaa, IRAtom* qbb, 1980 IRAtom* aa, IRAtom* bb ) 1981 { 1982 tl_assert(isShadowAtom(mce,qaa)); 1983 tl_assert(isShadowAtom(mce,qbb)); 1984 tl_assert(isOriginalAtom(mce,aa)); 1985 tl_assert(isOriginalAtom(mce,bb)); 1986 tl_assert(sameKindedAtoms(qaa,aa)); 1987 tl_assert(sameKindedAtoms(qbb,bb)); 1988 return 1989 assignNew( 1990 'V', mce, ty, 1991 mkUifU( mce, ty, 1992 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 1993 mkPCastTo(mce, ty, qbb) 1994 ) 1995 ); 1996 } 1997 1998 1999 /*------------------------------------------------------------*/ 2000 /*--- Helpers for dealing with vector primops. ---*/ 2001 /*------------------------------------------------------------*/ 2002 2003 /* Vector pessimisation -- pessimise within each lane individually. */ 2004 2005 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 2006 { 2007 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 2008 } 2009 2010 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 2011 { 2012 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 2013 } 2014 2015 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 2016 { 2017 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 2018 } 2019 2020 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 2021 { 2022 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 2023 } 2024 2025 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at ) 2026 { 2027 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at)); 2028 } 2029 2030 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at ) 2031 { 2032 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at)); 2033 } 2034 2035 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 2036 { 2037 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 2038 } 2039 2040 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at ) 2041 { 2042 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at)); 2043 } 2044 2045 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 2046 { 2047 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 2048 } 2049 2050 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at ) 2051 { 2052 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at)); 2053 } 2054 2055 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 2056 { 2057 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 2058 } 2059 2060 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 2061 { 2062 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 2063 } 2064 2065 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 2066 { 2067 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 2068 } 2069 2070 2071 /* Here's a simple scheme capable of handling ops derived from SSE1 2072 code and while only generating ops that can be efficiently 2073 implemented in SSE1. */ 2074 2075 /* All-lanes versions are straightforward: 2076 2077 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 2078 2079 unary32Fx4(x,y) ==> PCast32x4(x#) 2080 2081 Lowest-lane-only versions are more complex: 2082 2083 binary32F0x4(x,y) ==> SetV128lo32( 2084 x#, 2085 PCast32(V128to32(UifUV128(x#,y#))) 2086 ) 2087 2088 This is perhaps not so obvious. In particular, it's faster to 2089 do a V128-bit UifU and then take the bottom 32 bits than the more 2090 obvious scheme of taking the bottom 32 bits of each operand 2091 and doing a 32-bit UifU. Basically since UifU is fast and 2092 chopping lanes off vector values is slow. 2093 2094 Finally: 2095 2096 unary32F0x4(x) ==> SetV128lo32( 2097 x#, 2098 PCast32(V128to32(x#)) 2099 ) 2100 2101 Where: 2102 2103 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 2104 PCast32x4(v#) = CmpNEZ32x4(v#) 2105 */ 2106 2107 static 2108 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2109 { 2110 IRAtom* at; 2111 tl_assert(isShadowAtom(mce, vatomX)); 2112 tl_assert(isShadowAtom(mce, vatomY)); 2113 at = mkUifUV128(mce, vatomX, vatomY); 2114 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 2115 return at; 2116 } 2117 2118 static 2119 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2120 { 2121 IRAtom* at; 2122 tl_assert(isShadowAtom(mce, vatomX)); 2123 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 2124 return at; 2125 } 2126 2127 static 2128 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2129 { 2130 IRAtom* at; 2131 tl_assert(isShadowAtom(mce, vatomX)); 2132 tl_assert(isShadowAtom(mce, vatomY)); 2133 at = mkUifUV128(mce, vatomX, vatomY); 2134 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 2135 at = mkPCastTo(mce, Ity_I32, at); 2136 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2137 return at; 2138 } 2139 2140 static 2141 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 2142 { 2143 IRAtom* at; 2144 tl_assert(isShadowAtom(mce, vatomX)); 2145 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 2146 at = mkPCastTo(mce, Ity_I32, at); 2147 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2148 return at; 2149 } 2150 2151 /* --- ... and ... 64Fx2 versions of the same ... --- */ 2152 2153 static 2154 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2155 { 2156 IRAtom* at; 2157 tl_assert(isShadowAtom(mce, vatomX)); 2158 tl_assert(isShadowAtom(mce, vatomY)); 2159 at = mkUifUV128(mce, vatomX, vatomY); 2160 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 2161 return at; 2162 } 2163 2164 static 2165 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2166 { 2167 IRAtom* at; 2168 tl_assert(isShadowAtom(mce, vatomX)); 2169 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 2170 return at; 2171 } 2172 2173 static 2174 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2175 { 2176 IRAtom* at; 2177 tl_assert(isShadowAtom(mce, vatomX)); 2178 tl_assert(isShadowAtom(mce, vatomY)); 2179 at = mkUifUV128(mce, vatomX, vatomY); 2180 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 2181 at = mkPCastTo(mce, Ity_I64, at); 2182 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2183 return at; 2184 } 2185 2186 static 2187 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 2188 { 2189 IRAtom* at; 2190 tl_assert(isShadowAtom(mce, vatomX)); 2191 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 2192 at = mkPCastTo(mce, Ity_I64, at); 2193 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2194 return at; 2195 } 2196 2197 /* --- --- ... and ... 32Fx2 versions of the same --- --- */ 2198 2199 static 2200 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2201 { 2202 IRAtom* at; 2203 tl_assert(isShadowAtom(mce, vatomX)); 2204 tl_assert(isShadowAtom(mce, vatomY)); 2205 at = mkUifU64(mce, vatomX, vatomY); 2206 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 2207 return at; 2208 } 2209 2210 static 2211 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2212 { 2213 IRAtom* at; 2214 tl_assert(isShadowAtom(mce, vatomX)); 2215 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 2216 return at; 2217 } 2218 2219 /* --- ... and ... 64Fx4 versions of the same ... --- */ 2220 2221 static 2222 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2223 { 2224 IRAtom* at; 2225 tl_assert(isShadowAtom(mce, vatomX)); 2226 tl_assert(isShadowAtom(mce, vatomY)); 2227 at = mkUifUV256(mce, vatomX, vatomY); 2228 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at)); 2229 return at; 2230 } 2231 2232 static 2233 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2234 { 2235 IRAtom* at; 2236 tl_assert(isShadowAtom(mce, vatomX)); 2237 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX)); 2238 return at; 2239 } 2240 2241 /* --- ... and ... 32Fx8 versions of the same ... --- */ 2242 2243 static 2244 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2245 { 2246 IRAtom* at; 2247 tl_assert(isShadowAtom(mce, vatomX)); 2248 tl_assert(isShadowAtom(mce, vatomY)); 2249 at = mkUifUV256(mce, vatomX, vatomY); 2250 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at)); 2251 return at; 2252 } 2253 2254 static 2255 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX ) 2256 { 2257 IRAtom* at; 2258 tl_assert(isShadowAtom(mce, vatomX)); 2259 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX)); 2260 return at; 2261 } 2262 2263 /* --- 64Fx2 binary FP ops, with rounding mode --- */ 2264 2265 static 2266 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, 2267 IRAtom* vatomX, IRAtom* vatomY ) 2268 { 2269 /* This is the same as binary64Fx2, except that we subsequently 2270 pessimise vRM (definedness of the rounding mode), widen to 128 2271 bits and UifU it into the result. As with the scalar cases, if 2272 the RM is a constant then it is defined and so this extra bit 2273 will get constant-folded out later. */ 2274 // "do" the vector args 2275 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY); 2276 // PCast the RM, and widen it to 128 bits 2277 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2278 // Roll it into the result 2279 t1 = mkUifUV128(mce, t1, t2); 2280 return t1; 2281 } 2282 2283 /* --- ... and ... 32Fx4 versions of the same --- */ 2284 2285 static 2286 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, 2287 IRAtom* vatomX, IRAtom* vatomY ) 2288 { 2289 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY); 2290 // PCast the RM, and widen it to 128 bits 2291 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2292 // Roll it into the result 2293 t1 = mkUifUV128(mce, t1, t2); 2294 return t1; 2295 } 2296 2297 /* --- ... and ... 64Fx4 versions of the same --- */ 2298 2299 static 2300 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, 2301 IRAtom* vatomX, IRAtom* vatomY ) 2302 { 2303 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY); 2304 // PCast the RM, and widen it to 256 bits 2305 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); 2306 // Roll it into the result 2307 t1 = mkUifUV256(mce, t1, t2); 2308 return t1; 2309 } 2310 2311 /* --- ... and ... 32Fx8 versions of the same --- */ 2312 2313 static 2314 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM, 2315 IRAtom* vatomX, IRAtom* vatomY ) 2316 { 2317 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY); 2318 // PCast the RM, and widen it to 256 bits 2319 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); 2320 // Roll it into the result 2321 t1 = mkUifUV256(mce, t1, t2); 2322 return t1; 2323 } 2324 2325 2326 /* --- --- Vector saturated narrowing --- --- */ 2327 2328 /* We used to do something very clever here, but on closer inspection 2329 (2011-Jun-15), and in particular bug #279698, it turns out to be 2330 wrong. Part of the problem came from the fact that for a long 2331 time, the IR primops to do with saturated narrowing were 2332 underspecified and managed to confuse multiple cases which needed 2333 to be separate: the op names had a signedness qualifier, but in 2334 fact the source and destination signednesses needed to be specified 2335 independently, so the op names really need two independent 2336 signedness specifiers. 2337 2338 As of 2011-Jun-15 (ish) the underspecification was sorted out 2339 properly. The incorrect instrumentation remained, though. That 2340 has now (2011-Oct-22) been fixed. 2341 2342 What we now do is simple: 2343 2344 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a 2345 number of lanes, X is the source lane width and signedness, and Y 2346 is the destination lane width and signedness. In all cases the 2347 destination lane width is half the source lane width, so the names 2348 have a bit of redundancy, but are at least easy to read. 2349 2350 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s 2351 to unsigned 16s. 2352 2353 Let Vanilla(OP) be a function that takes OP, one of these 2354 saturating narrowing ops, and produces the same "shaped" narrowing 2355 op which is not saturating, but merely dumps the most significant 2356 bits. "same shape" means that the lane numbers and widths are the 2357 same as with OP. 2358 2359 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8) 2360 = Iop_NarrowBin32to16x8, 2361 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by 2362 dumping the top half of each lane. 2363 2364 So, with that in place, the scheme is simple, and it is simple to 2365 pessimise each lane individually and then apply Vanilla(OP) so as 2366 to get the result in the right "shape". If the original OP is 2367 QNarrowBinXtoYxZ then we produce 2368 2369 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) ) 2370 2371 or for the case when OP is unary (Iop_QNarrowUn*) 2372 2373 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) ) 2374 */ 2375 static 2376 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp ) 2377 { 2378 switch (qnarrowOp) { 2379 /* Binary: (128, 128) -> 128 */ 2380 case Iop_QNarrowBin16Sto8Ux16: 2381 case Iop_QNarrowBin16Sto8Sx16: 2382 case Iop_QNarrowBin16Uto8Ux16: 2383 case Iop_QNarrowBin64Sto32Sx4: 2384 case Iop_QNarrowBin64Uto32Ux4: 2385 return Iop_NarrowBin16to8x16; 2386 case Iop_QNarrowBin32Sto16Ux8: 2387 case Iop_QNarrowBin32Sto16Sx8: 2388 case Iop_QNarrowBin32Uto16Ux8: 2389 return Iop_NarrowBin32to16x8; 2390 /* Binary: (64, 64) -> 64 */ 2391 case Iop_QNarrowBin32Sto16Sx4: 2392 return Iop_NarrowBin32to16x4; 2393 case Iop_QNarrowBin16Sto8Ux8: 2394 case Iop_QNarrowBin16Sto8Sx8: 2395 return Iop_NarrowBin16to8x8; 2396 /* Unary: 128 -> 64 */ 2397 case Iop_QNarrowUn64Uto32Ux2: 2398 case Iop_QNarrowUn64Sto32Sx2: 2399 case Iop_QNarrowUn64Sto32Ux2: 2400 return Iop_NarrowUn64to32x2; 2401 case Iop_QNarrowUn32Uto16Ux4: 2402 case Iop_QNarrowUn32Sto16Sx4: 2403 case Iop_QNarrowUn32Sto16Ux4: 2404 return Iop_NarrowUn32to16x4; 2405 case Iop_QNarrowUn16Uto8Ux8: 2406 case Iop_QNarrowUn16Sto8Sx8: 2407 case Iop_QNarrowUn16Sto8Ux8: 2408 return Iop_NarrowUn16to8x8; 2409 default: 2410 ppIROp(qnarrowOp); 2411 VG_(tool_panic)("vanillaNarrowOpOfShape"); 2412 } 2413 } 2414 2415 static 2416 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op, 2417 IRAtom* vatom1, IRAtom* vatom2) 2418 { 2419 IRAtom *at1, *at2, *at3; 2420 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2421 switch (narrow_op) { 2422 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break; 2423 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break; 2424 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 2425 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break; 2426 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break; 2427 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 2428 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break; 2429 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 2430 default: VG_(tool_panic)("vectorNarrowBinV128"); 2431 } 2432 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2433 tl_assert(isShadowAtom(mce,vatom1)); 2434 tl_assert(isShadowAtom(mce,vatom2)); 2435 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2436 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 2437 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2)); 2438 return at3; 2439 } 2440 2441 static 2442 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op, 2443 IRAtom* vatom1, IRAtom* vatom2) 2444 { 2445 IRAtom *at1, *at2, *at3; 2446 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2447 switch (narrow_op) { 2448 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break; 2449 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break; 2450 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break; 2451 default: VG_(tool_panic)("vectorNarrowBin64"); 2452 } 2453 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2454 tl_assert(isShadowAtom(mce,vatom1)); 2455 tl_assert(isShadowAtom(mce,vatom2)); 2456 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 2457 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 2458 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2)); 2459 return at3; 2460 } 2461 2462 static 2463 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op, 2464 IRAtom* vatom1) 2465 { 2466 IRAtom *at1, *at2; 2467 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2468 tl_assert(isShadowAtom(mce,vatom1)); 2469 /* For vanilla narrowing (non-saturating), we can just apply 2470 the op directly to the V bits. */ 2471 switch (narrow_op) { 2472 case Iop_NarrowUn16to8x8: 2473 case Iop_NarrowUn32to16x4: 2474 case Iop_NarrowUn64to32x2: 2475 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1)); 2476 return at1; 2477 default: 2478 break; /* Do Plan B */ 2479 } 2480 /* Plan B: for ops that involve a saturation operation on the args, 2481 we must PCast before the vanilla narrow. */ 2482 switch (narrow_op) { 2483 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break; 2484 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break; 2485 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break; 2486 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break; 2487 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break; 2488 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break; 2489 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break; 2490 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break; 2491 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break; 2492 default: VG_(tool_panic)("vectorNarrowUnV128"); 2493 } 2494 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2495 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2496 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1)); 2497 return at2; 2498 } 2499 2500 static 2501 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op, 2502 IRAtom* vatom1) 2503 { 2504 IRAtom *at1, *at2; 2505 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2506 switch (longen_op) { 2507 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break; 2508 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break; 2509 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break; 2510 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break; 2511 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break; 2512 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break; 2513 default: VG_(tool_panic)("vectorWidenI64"); 2514 } 2515 tl_assert(isShadowAtom(mce,vatom1)); 2516 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 2517 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2518 return at2; 2519 } 2520 2521 2522 /* --- --- Vector integer arithmetic --- --- */ 2523 2524 /* Simple ... UifU the args and per-lane pessimise the results. */ 2525 2526 /* --- V256-bit versions --- */ 2527 2528 static 2529 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2530 { 2531 IRAtom* at; 2532 at = mkUifUV256(mce, vatom1, vatom2); 2533 at = mkPCast8x32(mce, at); 2534 return at; 2535 } 2536 2537 static 2538 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2539 { 2540 IRAtom* at; 2541 at = mkUifUV256(mce, vatom1, vatom2); 2542 at = mkPCast16x16(mce, at); 2543 return at; 2544 } 2545 2546 static 2547 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2548 { 2549 IRAtom* at; 2550 at = mkUifUV256(mce, vatom1, vatom2); 2551 at = mkPCast32x8(mce, at); 2552 return at; 2553 } 2554 2555 static 2556 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2557 { 2558 IRAtom* at; 2559 at = mkUifUV256(mce, vatom1, vatom2); 2560 at = mkPCast64x4(mce, at); 2561 return at; 2562 } 2563 2564 /* --- V128-bit versions --- */ 2565 2566 static 2567 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2568 { 2569 IRAtom* at; 2570 at = mkUifUV128(mce, vatom1, vatom2); 2571 at = mkPCast8x16(mce, at); 2572 return at; 2573 } 2574 2575 static 2576 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2577 { 2578 IRAtom* at; 2579 at = mkUifUV128(mce, vatom1, vatom2); 2580 at = mkPCast16x8(mce, at); 2581 return at; 2582 } 2583 2584 static 2585 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2586 { 2587 IRAtom* at; 2588 at = mkUifUV128(mce, vatom1, vatom2); 2589 at = mkPCast32x4(mce, at); 2590 return at; 2591 } 2592 2593 static 2594 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2595 { 2596 IRAtom* at; 2597 at = mkUifUV128(mce, vatom1, vatom2); 2598 at = mkPCast64x2(mce, at); 2599 return at; 2600 } 2601 2602 /* --- 64-bit versions --- */ 2603 2604 static 2605 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2606 { 2607 IRAtom* at; 2608 at = mkUifU64(mce, vatom1, vatom2); 2609 at = mkPCast8x8(mce, at); 2610 return at; 2611 } 2612 2613 static 2614 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2615 { 2616 IRAtom* at; 2617 at = mkUifU64(mce, vatom1, vatom2); 2618 at = mkPCast16x4(mce, at); 2619 return at; 2620 } 2621 2622 static 2623 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2624 { 2625 IRAtom* at; 2626 at = mkUifU64(mce, vatom1, vatom2); 2627 at = mkPCast32x2(mce, at); 2628 return at; 2629 } 2630 2631 static 2632 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2633 { 2634 IRAtom* at; 2635 at = mkUifU64(mce, vatom1, vatom2); 2636 at = mkPCastTo(mce, Ity_I64, at); 2637 return at; 2638 } 2639 2640 /* --- 32-bit versions --- */ 2641 2642 static 2643 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2644 { 2645 IRAtom* at; 2646 at = mkUifU32(mce, vatom1, vatom2); 2647 at = mkPCast8x4(mce, at); 2648 return at; 2649 } 2650 2651 static 2652 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2653 { 2654 IRAtom* at; 2655 at = mkUifU32(mce, vatom1, vatom2); 2656 at = mkPCast16x2(mce, at); 2657 return at; 2658 } 2659 2660 2661 /*------------------------------------------------------------*/ 2662 /*--- Generate shadow values from all kinds of IRExprs. ---*/ 2663 /*------------------------------------------------------------*/ 2664 2665 static 2666 IRAtom* expr2vbits_Qop ( MCEnv* mce, 2667 IROp op, 2668 IRAtom* atom1, IRAtom* atom2, 2669 IRAtom* atom3, IRAtom* atom4 ) 2670 { 2671 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2672 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2673 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2674 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2675 2676 tl_assert(isOriginalAtom(mce,atom1)); 2677 tl_assert(isOriginalAtom(mce,atom2)); 2678 tl_assert(isOriginalAtom(mce,atom3)); 2679 tl_assert(isOriginalAtom(mce,atom4)); 2680 tl_assert(isShadowAtom(mce,vatom1)); 2681 tl_assert(isShadowAtom(mce,vatom2)); 2682 tl_assert(isShadowAtom(mce,vatom3)); 2683 tl_assert(isShadowAtom(mce,vatom4)); 2684 tl_assert(sameKindedAtoms(atom1,vatom1)); 2685 tl_assert(sameKindedAtoms(atom2,vatom2)); 2686 tl_assert(sameKindedAtoms(atom3,vatom3)); 2687 tl_assert(sameKindedAtoms(atom4,vatom4)); 2688 switch (op) { 2689 case Iop_MAddF64: 2690 case Iop_MAddF64r32: 2691 case Iop_MSubF64: 2692 case Iop_MSubF64r32: 2693 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2694 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2695 2696 case Iop_MAddF32: 2697 case Iop_MSubF32: 2698 /* I32(rm) x F32 x F32 x F32 -> F32 */ 2699 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4); 2700 2701 /* V256-bit data-steering */ 2702 case Iop_64x4toV256: 2703 return assignNew('V', mce, Ity_V256, 2704 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4)); 2705 2706 default: 2707 ppIROp(op); 2708 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2709 } 2710 } 2711 2712 2713 static 2714 IRAtom* expr2vbits_Triop ( MCEnv* mce, 2715 IROp op, 2716 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2717 { 2718 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2719 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2720 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2721 2722 tl_assert(isOriginalAtom(mce,atom1)); 2723 tl_assert(isOriginalAtom(mce,atom2)); 2724 tl_assert(isOriginalAtom(mce,atom3)); 2725 tl_assert(isShadowAtom(mce,vatom1)); 2726 tl_assert(isShadowAtom(mce,vatom2)); 2727 tl_assert(isShadowAtom(mce,vatom3)); 2728 tl_assert(sameKindedAtoms(atom1,vatom1)); 2729 tl_assert(sameKindedAtoms(atom2,vatom2)); 2730 tl_assert(sameKindedAtoms(atom3,vatom3)); 2731 switch (op) { 2732 case Iop_AddF128: 2733 case Iop_AddD128: 2734 case Iop_SubF128: 2735 case Iop_SubD128: 2736 case Iop_MulF128: 2737 case Iop_MulD128: 2738 case Iop_DivF128: 2739 case Iop_DivD128: 2740 case Iop_QuantizeD128: 2741 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */ 2742 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2743 case Iop_AddF64: 2744 case Iop_AddD64: 2745 case Iop_AddF64r32: 2746 case Iop_SubF64: 2747 case Iop_SubD64: 2748 case Iop_SubF64r32: 2749 case Iop_MulF64: 2750 case Iop_MulD64: 2751 case Iop_MulF64r32: 2752 case Iop_DivF64: 2753 case Iop_DivD64: 2754 case Iop_DivF64r32: 2755 case Iop_ScaleF64: 2756 case Iop_Yl2xF64: 2757 case Iop_Yl2xp1F64: 2758 case Iop_AtanF64: 2759 case Iop_PRemF64: 2760 case Iop_PRem1F64: 2761 case Iop_QuantizeD64: 2762 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */ 2763 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2764 case Iop_PRemC3210F64: 2765 case Iop_PRem1C3210F64: 2766 /* I32(rm) x F64 x F64 -> I32 */ 2767 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2768 case Iop_AddF32: 2769 case Iop_SubF32: 2770 case Iop_MulF32: 2771 case Iop_DivF32: 2772 /* I32(rm) x F32 x F32 -> I32 */ 2773 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2774 case Iop_SignificanceRoundD64: 2775 /* IRRoundingMode(I32) x I8 x D64 -> D64 */ 2776 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2777 case Iop_SignificanceRoundD128: 2778 /* IRRoundingMode(I32) x I8 x D128 -> D128 */ 2779 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2780 case Iop_ExtractV128: 2781 complainIfUndefined(mce, atom3, NULL); 2782 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2783 case Iop_Extract64: 2784 complainIfUndefined(mce, atom3, NULL); 2785 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2786 case Iop_SetElem8x8: 2787 case Iop_SetElem16x4: 2788 case Iop_SetElem32x2: 2789 complainIfUndefined(mce, atom2, NULL); 2790 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2791 /* BCDIops */ 2792 case Iop_BCDAdd: 2793 case Iop_BCDSub: 2794 complainIfUndefined(mce, atom3, NULL); 2795 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2796 2797 /* Vector FP with rounding mode as the first arg */ 2798 case Iop_Add64Fx2: 2799 case Iop_Sub64Fx2: 2800 case Iop_Mul64Fx2: 2801 case Iop_Div64Fx2: 2802 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3); 2803 2804 case Iop_Add32Fx4: 2805 case Iop_Sub32Fx4: 2806 case Iop_Mul32Fx4: 2807 case Iop_Div32Fx4: 2808 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3); 2809 2810 case Iop_Add64Fx4: 2811 case Iop_Sub64Fx4: 2812 case Iop_Mul64Fx4: 2813 case Iop_Div64Fx4: 2814 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3); 2815 2816 case Iop_Add32Fx8: 2817 case Iop_Sub32Fx8: 2818 case Iop_Mul32Fx8: 2819 case Iop_Div32Fx8: 2820 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3); 2821 2822 default: 2823 ppIROp(op); 2824 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2825 } 2826 } 2827 2828 2829 static 2830 IRAtom* expr2vbits_Binop ( MCEnv* mce, 2831 IROp op, 2832 IRAtom* atom1, IRAtom* atom2 ) 2833 { 2834 IRType and_or_ty; 2835 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2836 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2837 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2838 2839 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2840 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2841 2842 tl_assert(isOriginalAtom(mce,atom1)); 2843 tl_assert(isOriginalAtom(mce,atom2)); 2844 tl_assert(isShadowAtom(mce,vatom1)); 2845 tl_assert(isShadowAtom(mce,vatom2)); 2846 tl_assert(sameKindedAtoms(atom1,vatom1)); 2847 tl_assert(sameKindedAtoms(atom2,vatom2)); 2848 switch (op) { 2849 2850 /* 32-bit SIMD */ 2851 2852 case Iop_Add16x2: 2853 case Iop_HAdd16Ux2: 2854 case Iop_HAdd16Sx2: 2855 case Iop_Sub16x2: 2856 case Iop_HSub16Ux2: 2857 case Iop_HSub16Sx2: 2858 case Iop_QAdd16Sx2: 2859 case Iop_QSub16Sx2: 2860 case Iop_QSub16Ux2: 2861 case Iop_QAdd16Ux2: 2862 return binary16Ix2(mce, vatom1, vatom2); 2863 2864 case Iop_Add8x4: 2865 case Iop_HAdd8Ux4: 2866 case Iop_HAdd8Sx4: 2867 case Iop_Sub8x4: 2868 case Iop_HSub8Ux4: 2869 case Iop_HSub8Sx4: 2870 case Iop_QSub8Ux4: 2871 case Iop_QAdd8Ux4: 2872 case Iop_QSub8Sx4: 2873 case Iop_QAdd8Sx4: 2874 return binary8Ix4(mce, vatom1, vatom2); 2875 2876 /* 64-bit SIMD */ 2877 2878 case Iop_ShrN8x8: 2879 case Iop_ShrN16x4: 2880 case Iop_ShrN32x2: 2881 case Iop_SarN8x8: 2882 case Iop_SarN16x4: 2883 case Iop_SarN32x2: 2884 case Iop_ShlN16x4: 2885 case Iop_ShlN32x2: 2886 case Iop_ShlN8x8: 2887 /* Same scheme as with all other shifts. */ 2888 complainIfUndefined(mce, atom2, NULL); 2889 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2890 2891 case Iop_QNarrowBin32Sto16Sx4: 2892 case Iop_QNarrowBin16Sto8Sx8: 2893 case Iop_QNarrowBin16Sto8Ux8: 2894 return vectorNarrowBin64(mce, op, vatom1, vatom2); 2895 2896 case Iop_Min8Ux8: 2897 case Iop_Min8Sx8: 2898 case Iop_Max8Ux8: 2899 case Iop_Max8Sx8: 2900 case Iop_Avg8Ux8: 2901 case Iop_QSub8Sx8: 2902 case Iop_QSub8Ux8: 2903 case Iop_Sub8x8: 2904 case Iop_CmpGT8Sx8: 2905 case Iop_CmpGT8Ux8: 2906 case Iop_CmpEQ8x8: 2907 case Iop_QAdd8Sx8: 2908 case Iop_QAdd8Ux8: 2909 case Iop_QSal8x8: 2910 case Iop_QShl8x8: 2911 case Iop_Add8x8: 2912 case Iop_Mul8x8: 2913 case Iop_PolynomialMul8x8: 2914 return binary8Ix8(mce, vatom1, vatom2); 2915 2916 case Iop_Min16Sx4: 2917 case Iop_Min16Ux4: 2918 case Iop_Max16Sx4: 2919 case Iop_Max16Ux4: 2920 case Iop_Avg16Ux4: 2921 case Iop_QSub16Ux4: 2922 case Iop_QSub16Sx4: 2923 case Iop_Sub16x4: 2924 case Iop_Mul16x4: 2925 case Iop_MulHi16Sx4: 2926 case Iop_MulHi16Ux4: 2927 case Iop_CmpGT16Sx4: 2928 case Iop_CmpGT16Ux4: 2929 case Iop_CmpEQ16x4: 2930 case Iop_QAdd16Sx4: 2931 case Iop_QAdd16Ux4: 2932 case Iop_QSal16x4: 2933 case Iop_QShl16x4: 2934 case Iop_Add16x4: 2935 case Iop_QDMulHi16Sx4: 2936 case Iop_QRDMulHi16Sx4: 2937 return binary16Ix4(mce, vatom1, vatom2); 2938 2939 case Iop_Sub32x2: 2940 case Iop_Mul32x2: 2941 case Iop_Max32Sx2: 2942 case Iop_Max32Ux2: 2943 case Iop_Min32Sx2: 2944 case Iop_Min32Ux2: 2945 case Iop_CmpGT32Sx2: 2946 case Iop_CmpGT32Ux2: 2947 case Iop_CmpEQ32x2: 2948 case Iop_Add32x2: 2949 case Iop_QAdd32Ux2: 2950 case Iop_QAdd32Sx2: 2951 case Iop_QSub32Ux2: 2952 case Iop_QSub32Sx2: 2953 case Iop_QSal32x2: 2954 case Iop_QShl32x2: 2955 case Iop_QDMulHi32Sx2: 2956 case Iop_QRDMulHi32Sx2: 2957 return binary32Ix2(mce, vatom1, vatom2); 2958 2959 case Iop_QSub64Ux1: 2960 case Iop_QSub64Sx1: 2961 case Iop_QAdd64Ux1: 2962 case Iop_QAdd64Sx1: 2963 case Iop_QSal64x1: 2964 case Iop_QShl64x1: 2965 case Iop_Sal64x1: 2966 return binary64Ix1(mce, vatom1, vatom2); 2967 2968 case Iop_QShlN8Sx8: 2969 case Iop_QShlN8x8: 2970 case Iop_QSalN8x8: 2971 complainIfUndefined(mce, atom2, NULL); 2972 return mkPCast8x8(mce, vatom1); 2973 2974 case Iop_QShlN16Sx4: 2975 case Iop_QShlN16x4: 2976 case Iop_QSalN16x4: 2977 complainIfUndefined(mce, atom2, NULL); 2978 return mkPCast16x4(mce, vatom1); 2979 2980 case Iop_QShlN32Sx2: 2981 case Iop_QShlN32x2: 2982 case Iop_QSalN32x2: 2983 complainIfUndefined(mce, atom2, NULL); 2984 return mkPCast32x2(mce, vatom1); 2985 2986 case Iop_QShlN64Sx1: 2987 case Iop_QShlN64x1: 2988 case Iop_QSalN64x1: 2989 complainIfUndefined(mce, atom2, NULL); 2990 return mkPCast32x2(mce, vatom1); 2991 2992 case Iop_PwMax32Sx2: 2993 case Iop_PwMax32Ux2: 2994 case Iop_PwMin32Sx2: 2995 case Iop_PwMin32Ux2: 2996 case Iop_PwMax32Fx2: 2997 case Iop_PwMin32Fx2: 2998 return assignNew('V', mce, Ity_I64, 2999 binop(Iop_PwMax32Ux2, 3000 mkPCast32x2(mce, vatom1), 3001 mkPCast32x2(mce, vatom2))); 3002 3003 case Iop_PwMax16Sx4: 3004 case Iop_PwMax16Ux4: 3005 case Iop_PwMin16Sx4: 3006 case Iop_PwMin16Ux4: 3007 return assignNew('V', mce, Ity_I64, 3008 binop(Iop_PwMax16Ux4, 3009 mkPCast16x4(mce, vatom1), 3010 mkPCast16x4(mce, vatom2))); 3011 3012 case Iop_PwMax8Sx8: 3013 case Iop_PwMax8Ux8: 3014 case Iop_PwMin8Sx8: 3015 case Iop_PwMin8Ux8: 3016 return assignNew('V', mce, Ity_I64, 3017 binop(Iop_PwMax8Ux8, 3018 mkPCast8x8(mce, vatom1), 3019 mkPCast8x8(mce, vatom2))); 3020 3021 case Iop_PwAdd32x2: 3022 case Iop_PwAdd32Fx2: 3023 return mkPCast32x2(mce, 3024 assignNew('V', mce, Ity_I64, 3025 binop(Iop_PwAdd32x2, 3026 mkPCast32x2(mce, vatom1), 3027 mkPCast32x2(mce, vatom2)))); 3028 3029 case Iop_PwAdd16x4: 3030 return mkPCast16x4(mce, 3031 assignNew('V', mce, Ity_I64, 3032 binop(op, mkPCast16x4(mce, vatom1), 3033 mkPCast16x4(mce, vatom2)))); 3034 3035 case Iop_PwAdd8x8: 3036 return mkPCast8x8(mce, 3037 assignNew('V', mce, Ity_I64, 3038 binop(op, mkPCast8x8(mce, vatom1), 3039 mkPCast8x8(mce, vatom2)))); 3040 3041 case Iop_Shl8x8: 3042 case Iop_Shr8x8: 3043 case Iop_Sar8x8: 3044 case Iop_Sal8x8: 3045 return mkUifU64(mce, 3046 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3047 mkPCast8x8(mce,vatom2) 3048 ); 3049 3050 case Iop_Shl16x4: 3051 case Iop_Shr16x4: 3052 case Iop_Sar16x4: 3053 case Iop_Sal16x4: 3054 return mkUifU64(mce, 3055 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3056 mkPCast16x4(mce,vatom2) 3057 ); 3058 3059 case Iop_Shl32x2: 3060 case Iop_Shr32x2: 3061 case Iop_Sar32x2: 3062 case Iop_Sal32x2: 3063 return mkUifU64(mce, 3064 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3065 mkPCast32x2(mce,vatom2) 3066 ); 3067 3068 /* 64-bit data-steering */ 3069 case Iop_InterleaveLO32x2: 3070 case Iop_InterleaveLO16x4: 3071 case Iop_InterleaveLO8x8: 3072 case Iop_InterleaveHI32x2: 3073 case Iop_InterleaveHI16x4: 3074 case Iop_InterleaveHI8x8: 3075 case Iop_CatOddLanes8x8: 3076 case Iop_CatEvenLanes8x8: 3077 case Iop_CatOddLanes16x4: 3078 case Iop_CatEvenLanes16x4: 3079 case Iop_InterleaveOddLanes8x8: 3080 case Iop_InterleaveEvenLanes8x8: 3081 case Iop_InterleaveOddLanes16x4: 3082 case Iop_InterleaveEvenLanes16x4: 3083 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3084 3085 case Iop_GetElem8x8: 3086 complainIfUndefined(mce, atom2, NULL); 3087 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3088 case Iop_GetElem16x4: 3089 complainIfUndefined(mce, atom2, NULL); 3090 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3091 case Iop_GetElem32x2: 3092 complainIfUndefined(mce, atom2, NULL); 3093 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3094 3095 /* Perm8x8: rearrange values in left arg using steering values 3096 from right arg. So rearrange the vbits in the same way but 3097 pessimise wrt steering values. */ 3098 case Iop_Perm8x8: 3099 return mkUifU64( 3100 mce, 3101 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3102 mkPCast8x8(mce, vatom2) 3103 ); 3104 3105 /* V128-bit SIMD */ 3106 3107 case Iop_ShrN8x16: 3108 case Iop_ShrN16x8: 3109 case Iop_ShrN32x4: 3110 case Iop_ShrN64x2: 3111 case Iop_SarN8x16: 3112 case Iop_SarN16x8: 3113 case Iop_SarN32x4: 3114 case Iop_SarN64x2: 3115 case Iop_ShlN8x16: 3116 case Iop_ShlN16x8: 3117 case Iop_ShlN32x4: 3118 case Iop_ShlN64x2: 3119 /* Same scheme as with all other shifts. Note: 22 Oct 05: 3120 this is wrong now, scalar shifts are done properly lazily. 3121 Vector shifts should be fixed too. */ 3122 complainIfUndefined(mce, atom2, NULL); 3123 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3124 3125 /* V x V shifts/rotates are done using the standard lazy scheme. */ 3126 case Iop_Shl8x16: 3127 case Iop_Shr8x16: 3128 case Iop_Sar8x16: 3129 case Iop_Sal8x16: 3130 case Iop_Rol8x16: 3131 return mkUifUV128(mce, 3132 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3133 mkPCast8x16(mce,vatom2) 3134 ); 3135 3136 case Iop_Shl16x8: 3137 case Iop_Shr16x8: 3138 case Iop_Sar16x8: 3139 case Iop_Sal16x8: 3140 case Iop_Rol16x8: 3141 return mkUifUV128(mce, 3142 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3143 mkPCast16x8(mce,vatom2) 3144 ); 3145 3146 case Iop_Shl32x4: 3147 case Iop_Shr32x4: 3148 case Iop_Sar32x4: 3149 case Iop_Sal32x4: 3150 case Iop_Rol32x4: 3151 case Iop_Rol64x2: 3152 return mkUifUV128(mce, 3153 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3154 mkPCast32x4(mce,vatom2) 3155 ); 3156 3157 case Iop_Shl64x2: 3158 case Iop_Shr64x2: 3159 case Iop_Sar64x2: 3160 case Iop_Sal64x2: 3161 return mkUifUV128(mce, 3162 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3163 mkPCast64x2(mce,vatom2) 3164 ); 3165 3166 case Iop_F32ToFixed32Ux4_RZ: 3167 case Iop_F32ToFixed32Sx4_RZ: 3168 case Iop_Fixed32UToF32x4_RN: 3169 case Iop_Fixed32SToF32x4_RN: 3170 complainIfUndefined(mce, atom2, NULL); 3171 return mkPCast32x4(mce, vatom1); 3172 3173 case Iop_F32ToFixed32Ux2_RZ: 3174 case Iop_F32ToFixed32Sx2_RZ: 3175 case Iop_Fixed32UToF32x2_RN: 3176 case Iop_Fixed32SToF32x2_RN: 3177 complainIfUndefined(mce, atom2, NULL); 3178 return mkPCast32x2(mce, vatom1); 3179 3180 case Iop_QSub8Ux16: 3181 case Iop_QSub8Sx16: 3182 case Iop_Sub8x16: 3183 case Iop_Min8Ux16: 3184 case Iop_Min8Sx16: 3185 case Iop_Max8Ux16: 3186 case Iop_Max8Sx16: 3187 case Iop_CmpGT8Sx16: 3188 case Iop_CmpGT8Ux16: 3189 case Iop_CmpEQ8x16: 3190 case Iop_Avg8Ux16: 3191 case Iop_Avg8Sx16: 3192 case Iop_QAdd8Ux16: 3193 case Iop_QAdd8Sx16: 3194 case Iop_QSal8x16: 3195 case Iop_QShl8x16: 3196 case Iop_Add8x16: 3197 case Iop_Mul8x16: 3198 case Iop_PolynomialMul8x16: 3199 case Iop_PolynomialMulAdd8x16: 3200 return binary8Ix16(mce, vatom1, vatom2); 3201 3202 case Iop_QSub16Ux8: 3203 case Iop_QSub16Sx8: 3204 case Iop_Sub16x8: 3205 case Iop_Mul16x8: 3206 case Iop_MulHi16Sx8: 3207 case Iop_MulHi16Ux8: 3208 case Iop_Min16Sx8: 3209 case Iop_Min16Ux8: 3210 case Iop_Max16Sx8: 3211 case Iop_Max16Ux8: 3212 case Iop_CmpGT16Sx8: 3213 case Iop_CmpGT16Ux8: 3214 case Iop_CmpEQ16x8: 3215 case Iop_Avg16Ux8: 3216 case Iop_Avg16Sx8: 3217 case Iop_QAdd16Ux8: 3218 case Iop_QAdd16Sx8: 3219 case Iop_QSal16x8: 3220 case Iop_QShl16x8: 3221 case Iop_Add16x8: 3222 case Iop_QDMulHi16Sx8: 3223 case Iop_QRDMulHi16Sx8: 3224 case Iop_PolynomialMulAdd16x8: 3225 return binary16Ix8(mce, vatom1, vatom2); 3226 3227 case Iop_Sub32x4: 3228 case Iop_CmpGT32Sx4: 3229 case Iop_CmpGT32Ux4: 3230 case Iop_CmpEQ32x4: 3231 case Iop_QAdd32Sx4: 3232 case Iop_QAdd32Ux4: 3233 case Iop_QSub32Sx4: 3234 case Iop_QSub32Ux4: 3235 case Iop_QSal32x4: 3236 case Iop_QShl32x4: 3237 case Iop_Avg32Ux4: 3238 case Iop_Avg32Sx4: 3239 case Iop_Add32x4: 3240 case Iop_Max32Ux4: 3241 case Iop_Max32Sx4: 3242 case Iop_Min32Ux4: 3243 case Iop_Min32Sx4: 3244 case Iop_Mul32x4: 3245 case Iop_QDMulHi32Sx4: 3246 case Iop_QRDMulHi32Sx4: 3247 case Iop_PolynomialMulAdd32x4: 3248 return binary32Ix4(mce, vatom1, vatom2); 3249 3250 case Iop_Sub64x2: 3251 case Iop_Add64x2: 3252 case Iop_Max64Sx2: 3253 case Iop_Max64Ux2: 3254 case Iop_Min64Sx2: 3255 case Iop_Min64Ux2: 3256 case Iop_CmpEQ64x2: 3257 case Iop_CmpGT64Sx2: 3258 case Iop_CmpGT64Ux2: 3259 case Iop_QSal64x2: 3260 case Iop_QShl64x2: 3261 case Iop_QAdd64Ux2: 3262 case Iop_QAdd64Sx2: 3263 case Iop_QSub64Ux2: 3264 case Iop_QSub64Sx2: 3265 case Iop_PolynomialMulAdd64x2: 3266 case Iop_CipherV128: 3267 case Iop_CipherLV128: 3268 case Iop_NCipherV128: 3269 case Iop_NCipherLV128: 3270 return binary64Ix2(mce, vatom1, vatom2); 3271 3272 case Iop_QNarrowBin64Sto32Sx4: 3273 case Iop_QNarrowBin64Uto32Ux4: 3274 case Iop_QNarrowBin32Sto16Sx8: 3275 case Iop_QNarrowBin32Uto16Ux8: 3276 case Iop_QNarrowBin32Sto16Ux8: 3277 case Iop_QNarrowBin16Sto8Sx16: 3278 case Iop_QNarrowBin16Uto8Ux16: 3279 case Iop_QNarrowBin16Sto8Ux16: 3280 return vectorNarrowBinV128(mce, op, vatom1, vatom2); 3281 3282 case Iop_Min64Fx2: 3283 case Iop_Max64Fx2: 3284 case Iop_CmpLT64Fx2: 3285 case Iop_CmpLE64Fx2: 3286 case Iop_CmpEQ64Fx2: 3287 case Iop_CmpUN64Fx2: 3288 return binary64Fx2(mce, vatom1, vatom2); 3289 3290 case Iop_Sub64F0x2: 3291 case Iop_Mul64F0x2: 3292 case Iop_Min64F0x2: 3293 case Iop_Max64F0x2: 3294 case Iop_Div64F0x2: 3295 case Iop_CmpLT64F0x2: 3296 case Iop_CmpLE64F0x2: 3297 case Iop_CmpEQ64F0x2: 3298 case Iop_CmpUN64F0x2: 3299 case Iop_Add64F0x2: 3300 return binary64F0x2(mce, vatom1, vatom2); 3301 3302 case Iop_Min32Fx4: 3303 case Iop_Max32Fx4: 3304 case Iop_CmpLT32Fx4: 3305 case Iop_CmpLE32Fx4: 3306 case Iop_CmpEQ32Fx4: 3307 case Iop_CmpUN32Fx4: 3308 case Iop_CmpGT32Fx4: 3309 case Iop_CmpGE32Fx4: 3310 case Iop_Recps32Fx4: 3311 case Iop_Rsqrts32Fx4: 3312 return binary32Fx4(mce, vatom1, vatom2); 3313 3314 case Iop_Sub32Fx2: 3315 case Iop_Mul32Fx2: 3316 case Iop_Min32Fx2: 3317 case Iop_Max32Fx2: 3318 case Iop_CmpEQ32Fx2: 3319 case Iop_CmpGT32Fx2: 3320 case Iop_CmpGE32Fx2: 3321 case Iop_Add32Fx2: 3322 case Iop_Recps32Fx2: 3323 case Iop_Rsqrts32Fx2: 3324 return binary32Fx2(mce, vatom1, vatom2); 3325 3326 case Iop_Sub32F0x4: 3327 case Iop_Mul32F0x4: 3328 case Iop_Min32F0x4: 3329 case Iop_Max32F0x4: 3330 case Iop_Div32F0x4: 3331 case Iop_CmpLT32F0x4: 3332 case Iop_CmpLE32F0x4: 3333 case Iop_CmpEQ32F0x4: 3334 case Iop_CmpUN32F0x4: 3335 case Iop_Add32F0x4: 3336 return binary32F0x4(mce, vatom1, vatom2); 3337 3338 case Iop_QShlN8Sx16: 3339 case Iop_QShlN8x16: 3340 case Iop_QSalN8x16: 3341 complainIfUndefined(mce, atom2, NULL); 3342 return mkPCast8x16(mce, vatom1); 3343 3344 case Iop_QShlN16Sx8: 3345 case Iop_QShlN16x8: 3346 case Iop_QSalN16x8: 3347 complainIfUndefined(mce, atom2, NULL); 3348 return mkPCast16x8(mce, vatom1); 3349 3350 case Iop_QShlN32Sx4: 3351 case Iop_QShlN32x4: 3352 case Iop_QSalN32x4: 3353 complainIfUndefined(mce, atom2, NULL); 3354 return mkPCast32x4(mce, vatom1); 3355 3356 case Iop_QShlN64Sx2: 3357 case Iop_QShlN64x2: 3358 case Iop_QSalN64x2: 3359 complainIfUndefined(mce, atom2, NULL); 3360 return mkPCast32x4(mce, vatom1); 3361 3362 case Iop_Mull32Sx2: 3363 case Iop_Mull32Ux2: 3364 case Iop_QDMulLong32Sx2: 3365 return vectorWidenI64(mce, Iop_Widen32Sto64x2, 3366 mkUifU64(mce, vatom1, vatom2)); 3367 3368 case Iop_Mull16Sx4: 3369 case Iop_Mull16Ux4: 3370 case Iop_QDMulLong16Sx4: 3371 return vectorWidenI64(mce, Iop_Widen16Sto32x4, 3372 mkUifU64(mce, vatom1, vatom2)); 3373 3374 case Iop_Mull8Sx8: 3375 case Iop_Mull8Ux8: 3376 case Iop_PolynomialMull8x8: 3377 return vectorWidenI64(mce, Iop_Widen8Sto16x8, 3378 mkUifU64(mce, vatom1, vatom2)); 3379 3380 case Iop_PwAdd32x4: 3381 return mkPCast32x4(mce, 3382 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 3383 mkPCast32x4(mce, vatom2)))); 3384 3385 case Iop_PwAdd16x8: 3386 return mkPCast16x8(mce, 3387 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 3388 mkPCast16x8(mce, vatom2)))); 3389 3390 case Iop_PwAdd8x16: 3391 return mkPCast8x16(mce, 3392 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 3393 mkPCast8x16(mce, vatom2)))); 3394 3395 /* V128-bit data-steering */ 3396 case Iop_SetV128lo32: 3397 case Iop_SetV128lo64: 3398 case Iop_64HLtoV128: 3399 case Iop_InterleaveLO64x2: 3400 case Iop_InterleaveLO32x4: 3401 case Iop_InterleaveLO16x8: 3402 case Iop_InterleaveLO8x16: 3403 case Iop_InterleaveHI64x2: 3404 case Iop_InterleaveHI32x4: 3405 case Iop_InterleaveHI16x8: 3406 case Iop_InterleaveHI8x16: 3407 case Iop_CatOddLanes8x16: 3408 case Iop_CatOddLanes16x8: 3409 case Iop_CatOddLanes32x4: 3410 case Iop_CatEvenLanes8x16: 3411 case Iop_CatEvenLanes16x8: 3412 case Iop_CatEvenLanes32x4: 3413 case Iop_InterleaveOddLanes8x16: 3414 case Iop_InterleaveOddLanes16x8: 3415 case Iop_InterleaveOddLanes32x4: 3416 case Iop_InterleaveEvenLanes8x16: 3417 case Iop_InterleaveEvenLanes16x8: 3418 case Iop_InterleaveEvenLanes32x4: 3419 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 3420 3421 case Iop_GetElem8x16: 3422 complainIfUndefined(mce, atom2, NULL); 3423 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3424 case Iop_GetElem16x8: 3425 complainIfUndefined(mce, atom2, NULL); 3426 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3427 case Iop_GetElem32x4: 3428 complainIfUndefined(mce, atom2, NULL); 3429 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3430 case Iop_GetElem64x2: 3431 complainIfUndefined(mce, atom2, NULL); 3432 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 3433 3434 /* Perm8x16: rearrange values in left arg using steering values 3435 from right arg. So rearrange the vbits in the same way but 3436 pessimise wrt steering values. Perm32x4 ditto. */ 3437 case Iop_Perm8x16: 3438 return mkUifUV128( 3439 mce, 3440 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3441 mkPCast8x16(mce, vatom2) 3442 ); 3443 case Iop_Perm32x4: 3444 return mkUifUV128( 3445 mce, 3446 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3447 mkPCast32x4(mce, vatom2) 3448 ); 3449 3450 /* These two take the lower half of each 16-bit lane, sign/zero 3451 extend it to 32, and multiply together, producing a 32x4 3452 result (and implicitly ignoring half the operand bits). So 3453 treat it as a bunch of independent 16x8 operations, but then 3454 do 32-bit shifts left-right to copy the lower half results 3455 (which are all 0s or all 1s due to PCasting in binary16Ix8) 3456 into the upper half of each result lane. */ 3457 case Iop_MullEven16Ux8: 3458 case Iop_MullEven16Sx8: { 3459 IRAtom* at; 3460 at = binary16Ix8(mce,vatom1,vatom2); 3461 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 3462 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 3463 return at; 3464 } 3465 3466 /* Same deal as Iop_MullEven16{S,U}x8 */ 3467 case Iop_MullEven8Ux16: 3468 case Iop_MullEven8Sx16: { 3469 IRAtom* at; 3470 at = binary8Ix16(mce,vatom1,vatom2); 3471 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 3472 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 3473 return at; 3474 } 3475 3476 /* Same deal as Iop_MullEven16{S,U}x8 */ 3477 case Iop_MullEven32Ux4: 3478 case Iop_MullEven32Sx4: { 3479 IRAtom* at; 3480 at = binary32Ix4(mce,vatom1,vatom2); 3481 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32))); 3482 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32))); 3483 return at; 3484 } 3485 3486 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 3487 32x4 -> 16x8 laneage, discarding the upper half of each lane. 3488 Simply apply same op to the V bits, since this really no more 3489 than a data steering operation. */ 3490 case Iop_NarrowBin32to16x8: 3491 case Iop_NarrowBin16to8x16: 3492 case Iop_NarrowBin64to32x4: 3493 return assignNew('V', mce, Ity_V128, 3494 binop(op, vatom1, vatom2)); 3495 3496 case Iop_ShrV128: 3497 case Iop_ShlV128: 3498 /* Same scheme as with all other shifts. Note: 10 Nov 05: 3499 this is wrong now, scalar shifts are done properly lazily. 3500 Vector shifts should be fixed too. */ 3501 complainIfUndefined(mce, atom2, NULL); 3502 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3503 3504 /* SHA Iops */ 3505 case Iop_SHA256: 3506 case Iop_SHA512: 3507 complainIfUndefined(mce, atom2, NULL); 3508 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3509 3510 /* I128-bit data-steering */ 3511 case Iop_64HLto128: 3512 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 3513 3514 /* V256-bit SIMD */ 3515 3516 case Iop_Max64Fx4: 3517 case Iop_Min64Fx4: 3518 return binary64Fx4(mce, vatom1, vatom2); 3519 3520 case Iop_Max32Fx8: 3521 case Iop_Min32Fx8: 3522 return binary32Fx8(mce, vatom1, vatom2); 3523 3524 /* V256-bit data-steering */ 3525 case Iop_V128HLtoV256: 3526 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2)); 3527 3528 /* Scalar floating point */ 3529 3530 case Iop_F32toI64S: 3531 case Iop_F32toI64U: 3532 /* I32(rm) x F32 -> I64 */ 3533 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3534 3535 case Iop_I64StoF32: 3536 /* I32(rm) x I64 -> F32 */ 3537 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3538 3539 case Iop_RoundF64toInt: 3540 case Iop_RoundF64toF32: 3541 case Iop_F64toI64S: 3542 case Iop_F64toI64U: 3543 case Iop_I64StoF64: 3544 case Iop_I64UtoF64: 3545 case Iop_SinF64: 3546 case Iop_CosF64: 3547 case Iop_TanF64: 3548 case Iop_2xm1F64: 3549 case Iop_SqrtF64: 3550 /* I32(rm) x I64/F64 -> I64/F64 */ 3551 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3552 3553 case Iop_ShlD64: 3554 case Iop_ShrD64: 3555 case Iop_RoundD64toInt: 3556 /* I32(rm) x D64 -> D64 */ 3557 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3558 3559 case Iop_ShlD128: 3560 case Iop_ShrD128: 3561 case Iop_RoundD128toInt: 3562 /* I32(rm) x D128 -> D128 */ 3563 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3564 3565 case Iop_D64toI64S: 3566 case Iop_D64toI64U: 3567 case Iop_I64StoD64: 3568 case Iop_I64UtoD64: 3569 /* I32(rm) x I64/D64 -> D64/I64 */ 3570 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3571 3572 case Iop_F32toD32: 3573 case Iop_F64toD32: 3574 case Iop_F128toD32: 3575 case Iop_D32toF32: 3576 case Iop_D64toF32: 3577 case Iop_D128toF32: 3578 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */ 3579 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3580 3581 case Iop_F32toD64: 3582 case Iop_F64toD64: 3583 case Iop_F128toD64: 3584 case Iop_D32toF64: 3585 case Iop_D64toF64: 3586 case Iop_D128toF64: 3587 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */ 3588 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3589 3590 case Iop_F32toD128: 3591 case Iop_F64toD128: 3592 case Iop_F128toD128: 3593 case Iop_D32toF128: 3594 case Iop_D64toF128: 3595 case Iop_D128toF128: 3596 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */ 3597 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3598 3599 case Iop_RoundF32toInt: 3600 case Iop_SqrtF32: 3601 /* I32(rm) x I32/F32 -> I32/F32 */ 3602 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3603 3604 case Iop_SqrtF128: 3605 /* I32(rm) x F128 -> F128 */ 3606 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3607 3608 case Iop_I32StoF32: 3609 case Iop_I32UtoF32: 3610 case Iop_F32toI32S: 3611 case Iop_F32toI32U: 3612 /* First arg is I32 (rounding mode), second is F32/I32 (data). */ 3613 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3614 3615 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */ 3616 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */ 3617 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */ 3618 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */ 3619 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */ 3620 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3621 3622 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */ 3623 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */ 3624 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */ 3625 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */ 3626 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */ 3627 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */ 3628 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3629 3630 case Iop_F64HLtoF128: 3631 case Iop_D64HLtoD128: 3632 return assignNew('V', mce, Ity_I128, 3633 binop(Iop_64HLto128, vatom1, vatom2)); 3634 3635 case Iop_F64toI32U: 3636 case Iop_F64toI32S: 3637 case Iop_F64toF32: 3638 case Iop_I64UtoF32: 3639 case Iop_D64toI32U: 3640 case Iop_D64toI32S: 3641 /* First arg is I32 (rounding mode), second is F64/D64 (data). */ 3642 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3643 3644 case Iop_D64toD32: 3645 /* First arg is I32 (rounding mode), second is D64 (data). */ 3646 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3647 3648 case Iop_F64toI16S: 3649 /* First arg is I32 (rounding mode), second is F64 (data). */ 3650 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3651 3652 case Iop_InsertExpD64: 3653 /* I64 x I64 -> D64 */ 3654 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3655 3656 case Iop_InsertExpD128: 3657 /* I64 x I128 -> D128 */ 3658 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3659 3660 case Iop_CmpF32: 3661 case Iop_CmpF64: 3662 case Iop_CmpF128: 3663 case Iop_CmpD64: 3664 case Iop_CmpD128: 3665 case Iop_CmpExpD64: 3666 case Iop_CmpExpD128: 3667 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3668 3669 /* non-FP after here */ 3670 3671 case Iop_DivModU64to32: 3672 case Iop_DivModS64to32: 3673 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3674 3675 case Iop_DivModU128to64: 3676 case Iop_DivModS128to64: 3677 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3678 3679 case Iop_8HLto16: 3680 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2)); 3681 case Iop_16HLto32: 3682 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 3683 case Iop_32HLto64: 3684 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3685 3686 case Iop_DivModS64to64: 3687 case Iop_MullS64: 3688 case Iop_MullU64: { 3689 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3690 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 3691 return assignNew('V', mce, Ity_I128, 3692 binop(Iop_64HLto128, vHi64, vLo64)); 3693 } 3694 3695 case Iop_MullS32: 3696 case Iop_MullU32: { 3697 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3698 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 3699 return assignNew('V', mce, Ity_I64, 3700 binop(Iop_32HLto64, vHi32, vLo32)); 3701 } 3702 3703 case Iop_MullS16: 3704 case Iop_MullU16: { 3705 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3706 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 3707 return assignNew('V', mce, Ity_I32, 3708 binop(Iop_16HLto32, vHi16, vLo16)); 3709 } 3710 3711 case Iop_MullS8: 3712 case Iop_MullU8: { 3713 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3714 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 3715 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 3716 } 3717 3718 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 3719 case Iop_DivS32: 3720 case Iop_DivU32: 3721 case Iop_DivU32E: 3722 case Iop_DivS32E: 3723 case Iop_QAdd32S: /* could probably do better */ 3724 case Iop_QSub32S: /* could probably do better */ 3725 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3726 3727 case Iop_DivS64: 3728 case Iop_DivU64: 3729 case Iop_DivS64E: 3730 case Iop_DivU64E: 3731 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3732 3733 case Iop_Add32: 3734 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3735 return expensiveAddSub(mce,True,Ity_I32, 3736 vatom1,vatom2, atom1,atom2); 3737 else 3738 goto cheap_AddSub32; 3739 case Iop_Sub32: 3740 if (mce->bogusLiterals) 3741 return expensiveAddSub(mce,False,Ity_I32, 3742 vatom1,vatom2, atom1,atom2); 3743 else 3744 goto cheap_AddSub32; 3745 3746 cheap_AddSub32: 3747 case Iop_Mul32: 3748 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3749 3750 case Iop_CmpORD32S: 3751 case Iop_CmpORD32U: 3752 case Iop_CmpORD64S: 3753 case Iop_CmpORD64U: 3754 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 3755 3756 case Iop_Add64: 3757 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3758 return expensiveAddSub(mce,True,Ity_I64, 3759 vatom1,vatom2, atom1,atom2); 3760 else 3761 goto cheap_AddSub64; 3762 case Iop_Sub64: 3763 if (mce->bogusLiterals) 3764 return expensiveAddSub(mce,False,Ity_I64, 3765 vatom1,vatom2, atom1,atom2); 3766 else 3767 goto cheap_AddSub64; 3768 3769 cheap_AddSub64: 3770 case Iop_Mul64: 3771 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3772 3773 case Iop_Mul16: 3774 case Iop_Add16: 3775 case Iop_Sub16: 3776 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3777 3778 case Iop_Mul8: 3779 case Iop_Sub8: 3780 case Iop_Add8: 3781 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3782 3783 case Iop_CmpEQ64: 3784 case Iop_CmpNE64: 3785 if (mce->bogusLiterals) 3786 goto expensive_cmp64; 3787 else 3788 goto cheap_cmp64; 3789 3790 expensive_cmp64: 3791 case Iop_ExpCmpNE64: 3792 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 3793 3794 cheap_cmp64: 3795 case Iop_CmpLE64S: case Iop_CmpLE64U: 3796 case Iop_CmpLT64U: case Iop_CmpLT64S: 3797 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 3798 3799 case Iop_CmpEQ32: 3800 case Iop_CmpNE32: 3801 if (mce->bogusLiterals) 3802 goto expensive_cmp32; 3803 else 3804 goto cheap_cmp32; 3805 3806 expensive_cmp32: 3807 case Iop_ExpCmpNE32: 3808 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 3809 3810 cheap_cmp32: 3811 case Iop_CmpLE32S: case Iop_CmpLE32U: 3812 case Iop_CmpLT32U: case Iop_CmpLT32S: 3813 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 3814 3815 case Iop_CmpEQ16: case Iop_CmpNE16: 3816 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 3817 3818 case Iop_ExpCmpNE16: 3819 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 ); 3820 3821 case Iop_CmpEQ8: case Iop_CmpNE8: 3822 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 3823 3824 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 3825 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 3826 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 3827 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 3828 /* Just say these all produce a defined result, regardless 3829 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 3830 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 3831 3832 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 3833 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 3834 3835 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 3836 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 3837 3838 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 3839 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 3840 3841 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8: 3842 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 3843 3844 case Iop_AndV256: 3845 uifu = mkUifUV256; difd = mkDifDV256; 3846 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or; 3847 case Iop_AndV128: 3848 uifu = mkUifUV128; difd = mkDifDV128; 3849 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 3850 case Iop_And64: 3851 uifu = mkUifU64; difd = mkDifD64; 3852 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 3853 case Iop_And32: 3854 uifu = mkUifU32; difd = mkDifD32; 3855 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 3856 case Iop_And16: 3857 uifu = mkUifU16; difd = mkDifD16; 3858 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 3859 case Iop_And8: 3860 uifu = mkUifU8; difd = mkDifD8; 3861 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 3862 3863 case Iop_OrV256: 3864 uifu = mkUifUV256; difd = mkDifDV256; 3865 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or; 3866 case Iop_OrV128: 3867 uifu = mkUifUV128; difd = mkDifDV128; 3868 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 3869 case Iop_Or64: 3870 uifu = mkUifU64; difd = mkDifD64; 3871 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 3872 case Iop_Or32: 3873 uifu = mkUifU32; difd = mkDifD32; 3874 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 3875 case Iop_Or16: 3876 uifu = mkUifU16; difd = mkDifD16; 3877 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 3878 case Iop_Or8: 3879 uifu = mkUifU8; difd = mkDifD8; 3880 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 3881 3882 do_And_Or: 3883 return 3884 assignNew( 3885 'V', mce, 3886 and_or_ty, 3887 difd(mce, uifu(mce, vatom1, vatom2), 3888 difd(mce, improve(mce, atom1, vatom1), 3889 improve(mce, atom2, vatom2) ) ) ); 3890 3891 case Iop_Xor8: 3892 return mkUifU8(mce, vatom1, vatom2); 3893 case Iop_Xor16: 3894 return mkUifU16(mce, vatom1, vatom2); 3895 case Iop_Xor32: 3896 return mkUifU32(mce, vatom1, vatom2); 3897 case Iop_Xor64: 3898 return mkUifU64(mce, vatom1, vatom2); 3899 case Iop_XorV128: 3900 return mkUifUV128(mce, vatom1, vatom2); 3901 case Iop_XorV256: 3902 return mkUifUV256(mce, vatom1, vatom2); 3903 3904 /* V256-bit SIMD */ 3905 3906 case Iop_ShrN16x16: 3907 case Iop_ShrN32x8: 3908 case Iop_ShrN64x4: 3909 case Iop_SarN16x16: 3910 case Iop_SarN32x8: 3911 case Iop_ShlN16x16: 3912 case Iop_ShlN32x8: 3913 case Iop_ShlN64x4: 3914 /* Same scheme as with all other shifts. Note: 22 Oct 05: 3915 this is wrong now, scalar shifts are done properly lazily. 3916 Vector shifts should be fixed too. */ 3917 complainIfUndefined(mce, atom2, NULL); 3918 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)); 3919 3920 case Iop_QSub8Ux32: 3921 case Iop_QSub8Sx32: 3922 case Iop_Sub8x32: 3923 case Iop_Min8Ux32: 3924 case Iop_Min8Sx32: 3925 case Iop_Max8Ux32: 3926 case Iop_Max8Sx32: 3927 case Iop_CmpGT8Sx32: 3928 case Iop_CmpEQ8x32: 3929 case Iop_Avg8Ux32: 3930 case Iop_QAdd8Ux32: 3931 case Iop_QAdd8Sx32: 3932 case Iop_Add8x32: 3933 return binary8Ix32(mce, vatom1, vatom2); 3934 3935 case Iop_QSub16Ux16: 3936 case Iop_QSub16Sx16: 3937 case Iop_Sub16x16: 3938 case Iop_Mul16x16: 3939 case Iop_MulHi16Sx16: 3940 case Iop_MulHi16Ux16: 3941 case Iop_Min16Sx16: 3942 case Iop_Min16Ux16: 3943 case Iop_Max16Sx16: 3944 case Iop_Max16Ux16: 3945 case Iop_CmpGT16Sx16: 3946 case Iop_CmpEQ16x16: 3947 case Iop_Avg16Ux16: 3948 case Iop_QAdd16Ux16: 3949 case Iop_QAdd16Sx16: 3950 case Iop_Add16x16: 3951 return binary16Ix16(mce, vatom1, vatom2); 3952 3953 case Iop_Sub32x8: 3954 case Iop_CmpGT32Sx8: 3955 case Iop_CmpEQ32x8: 3956 case Iop_Add32x8: 3957 case Iop_Max32Ux8: 3958 case Iop_Max32Sx8: 3959 case Iop_Min32Ux8: 3960 case Iop_Min32Sx8: 3961 case Iop_Mul32x8: 3962 return binary32Ix8(mce, vatom1, vatom2); 3963 3964 case Iop_Sub64x4: 3965 case Iop_Add64x4: 3966 case Iop_CmpEQ64x4: 3967 case Iop_CmpGT64Sx4: 3968 return binary64Ix4(mce, vatom1, vatom2); 3969 3970 /* Perm32x8: rearrange values in left arg using steering values 3971 from right arg. So rearrange the vbits in the same way but 3972 pessimise wrt steering values. */ 3973 case Iop_Perm32x8: 3974 return mkUifUV256( 3975 mce, 3976 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)), 3977 mkPCast32x8(mce, vatom2) 3978 ); 3979 3980 default: 3981 ppIROp(op); 3982 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 3983 } 3984 } 3985 3986 3987 static 3988 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 3989 { 3990 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the 3991 selection of shadow operation implicitly duplicates the logic in 3992 do_shadow_LoadG and should be kept in sync (in the very unlikely 3993 event that the interpretation of such widening ops changes in 3994 future). See comment in do_shadow_LoadG. */ 3995 IRAtom* vatom = expr2vbits( mce, atom ); 3996 tl_assert(isOriginalAtom(mce,atom)); 3997 switch (op) { 3998 3999 case Iop_Sqrt64Fx2: 4000 case Iop_Abs64Fx2: 4001 case Iop_Neg64Fx2: 4002 return unary64Fx2(mce, vatom); 4003 4004 case Iop_Sqrt64F0x2: 4005 return unary64F0x2(mce, vatom); 4006 4007 case Iop_Sqrt32Fx8: 4008 case Iop_RSqrt32Fx8: 4009 case Iop_Recip32Fx8: 4010 return unary32Fx8(mce, vatom); 4011 4012 case Iop_Sqrt64Fx4: 4013 return unary64Fx4(mce, vatom); 4014 4015 case Iop_Sqrt32Fx4: 4016 case Iop_RSqrt32Fx4: 4017 case Iop_Recip32Fx4: 4018 case Iop_I32UtoFx4: 4019 case Iop_I32StoFx4: 4020 case Iop_QFtoI32Ux4_RZ: 4021 case Iop_QFtoI32Sx4_RZ: 4022 case Iop_RoundF32x4_RM: 4023 case Iop_RoundF32x4_RP: 4024 case Iop_RoundF32x4_RN: 4025 case Iop_RoundF32x4_RZ: 4026 case Iop_Recip32x4: 4027 case Iop_Abs32Fx4: 4028 case Iop_Neg32Fx4: 4029 case Iop_Rsqrte32Fx4: 4030 return unary32Fx4(mce, vatom); 4031 4032 case Iop_I32UtoFx2: 4033 case Iop_I32StoFx2: 4034 case Iop_Recip32Fx2: 4035 case Iop_Recip32x2: 4036 case Iop_Abs32Fx2: 4037 case Iop_Neg32Fx2: 4038 case Iop_Rsqrte32Fx2: 4039 return unary32Fx2(mce, vatom); 4040 4041 case Iop_Sqrt32F0x4: 4042 case Iop_RSqrt32F0x4: 4043 case Iop_Recip32F0x4: 4044 return unary32F0x4(mce, vatom); 4045 4046 case Iop_32UtoV128: 4047 case Iop_64UtoV128: 4048 case Iop_Dup8x16: 4049 case Iop_Dup16x8: 4050 case Iop_Dup32x4: 4051 case Iop_Reverse16_8x16: 4052 case Iop_Reverse32_8x16: 4053 case Iop_Reverse32_16x8: 4054 case Iop_Reverse64_8x16: 4055 case Iop_Reverse64_16x8: 4056 case Iop_Reverse64_32x4: 4057 case Iop_V256toV128_1: case Iop_V256toV128_0: 4058 case Iop_ZeroHI64ofV128: 4059 case Iop_ZeroHI96ofV128: 4060 case Iop_ZeroHI112ofV128: 4061 case Iop_ZeroHI120ofV128: 4062 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 4063 4064 case Iop_F128HItoF64: /* F128 -> high half of F128 */ 4065 case Iop_D128HItoD64: /* D128 -> high half of D128 */ 4066 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom)); 4067 case Iop_F128LOtoF64: /* F128 -> low half of F128 */ 4068 case Iop_D128LOtoD64: /* D128 -> low half of D128 */ 4069 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom)); 4070 4071 case Iop_NegF128: 4072 case Iop_AbsF128: 4073 return mkPCastTo(mce, Ity_I128, vatom); 4074 4075 case Iop_I32StoF128: /* signed I32 -> F128 */ 4076 case Iop_I64StoF128: /* signed I64 -> F128 */ 4077 case Iop_I32UtoF128: /* unsigned I32 -> F128 */ 4078 case Iop_I64UtoF128: /* unsigned I64 -> F128 */ 4079 case Iop_F32toF128: /* F32 -> F128 */ 4080 case Iop_F64toF128: /* F64 -> F128 */ 4081 case Iop_I32StoD128: /* signed I64 -> D128 */ 4082 case Iop_I64StoD128: /* signed I64 -> D128 */ 4083 case Iop_I32UtoD128: /* unsigned I32 -> D128 */ 4084 case Iop_I64UtoD128: /* unsigned I64 -> D128 */ 4085 return mkPCastTo(mce, Ity_I128, vatom); 4086 4087 case Iop_F32toF64: 4088 case Iop_I32StoF64: 4089 case Iop_I32UtoF64: 4090 case Iop_NegF64: 4091 case Iop_AbsF64: 4092 case Iop_Est5FRSqrt: 4093 case Iop_RoundF64toF64_NEAREST: 4094 case Iop_RoundF64toF64_NegINF: 4095 case Iop_RoundF64toF64_PosINF: 4096 case Iop_RoundF64toF64_ZERO: 4097 case Iop_Clz64: 4098 case Iop_D32toD64: 4099 case Iop_I32StoD64: 4100 case Iop_I32UtoD64: 4101 case Iop_ExtractExpD64: /* D64 -> I64 */ 4102 case Iop_ExtractExpD128: /* D128 -> I64 */ 4103 case Iop_ExtractSigD64: /* D64 -> I64 */ 4104 case Iop_ExtractSigD128: /* D128 -> I64 */ 4105 case Iop_DPBtoBCD: 4106 case Iop_BCDtoDPB: 4107 return mkPCastTo(mce, Ity_I64, vatom); 4108 4109 case Iop_D64toD128: 4110 return mkPCastTo(mce, Ity_I128, vatom); 4111 4112 case Iop_Clz32: 4113 case Iop_TruncF64asF32: 4114 case Iop_NegF32: 4115 case Iop_AbsF32: 4116 return mkPCastTo(mce, Ity_I32, vatom); 4117 4118 case Iop_Ctz32: 4119 case Iop_Ctz64: 4120 return expensiveCountTrailingZeroes(mce, op, atom, vatom); 4121 4122 case Iop_1Uto64: 4123 case Iop_1Sto64: 4124 case Iop_8Uto64: 4125 case Iop_8Sto64: 4126 case Iop_16Uto64: 4127 case Iop_16Sto64: 4128 case Iop_32Sto64: 4129 case Iop_32Uto64: 4130 case Iop_V128to64: 4131 case Iop_V128HIto64: 4132 case Iop_128HIto64: 4133 case Iop_128to64: 4134 case Iop_Dup8x8: 4135 case Iop_Dup16x4: 4136 case Iop_Dup32x2: 4137 case Iop_Reverse16_8x8: 4138 case Iop_Reverse32_8x8: 4139 case Iop_Reverse32_16x4: 4140 case Iop_Reverse64_8x8: 4141 case Iop_Reverse64_16x4: 4142 case Iop_Reverse64_32x2: 4143 case Iop_V256to64_0: case Iop_V256to64_1: 4144 case Iop_V256to64_2: case Iop_V256to64_3: 4145 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 4146 4147 case Iop_64to32: 4148 case Iop_64HIto32: 4149 case Iop_1Uto32: 4150 case Iop_1Sto32: 4151 case Iop_8Uto32: 4152 case Iop_16Uto32: 4153 case Iop_16Sto32: 4154 case Iop_8Sto32: 4155 case Iop_V128to32: 4156 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 4157 4158 case Iop_8Sto16: 4159 case Iop_8Uto16: 4160 case Iop_32to16: 4161 case Iop_32HIto16: 4162 case Iop_64to16: 4163 case Iop_GetMSBs8x16: 4164 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 4165 4166 case Iop_1Uto8: 4167 case Iop_1Sto8: 4168 case Iop_16to8: 4169 case Iop_16HIto8: 4170 case Iop_32to8: 4171 case Iop_64to8: 4172 case Iop_GetMSBs8x8: 4173 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 4174 4175 case Iop_32to1: 4176 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 4177 4178 case Iop_64to1: 4179 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 4180 4181 case Iop_ReinterpF64asI64: 4182 case Iop_ReinterpI64asF64: 4183 case Iop_ReinterpI32asF32: 4184 case Iop_ReinterpF32asI32: 4185 case Iop_ReinterpI64asD64: 4186 case Iop_ReinterpD64asI64: 4187 case Iop_NotV256: 4188 case Iop_NotV128: 4189 case Iop_Not64: 4190 case Iop_Not32: 4191 case Iop_Not16: 4192 case Iop_Not8: 4193 case Iop_Not1: 4194 return vatom; 4195 4196 case Iop_CmpNEZ8x8: 4197 case Iop_Cnt8x8: 4198 case Iop_Clz8Sx8: 4199 case Iop_Cls8Sx8: 4200 case Iop_Abs8x8: 4201 return mkPCast8x8(mce, vatom); 4202 4203 case Iop_CmpNEZ8x16: 4204 case Iop_Cnt8x16: 4205 case Iop_Clz8Sx16: 4206 case Iop_Cls8Sx16: 4207 case Iop_Abs8x16: 4208 return mkPCast8x16(mce, vatom); 4209 4210 case Iop_CmpNEZ16x4: 4211 case Iop_Clz16Sx4: 4212 case Iop_Cls16Sx4: 4213 case Iop_Abs16x4: 4214 return mkPCast16x4(mce, vatom); 4215 4216 case Iop_CmpNEZ16x8: 4217 case Iop_Clz16Sx8: 4218 case Iop_Cls16Sx8: 4219 case Iop_Abs16x8: 4220 return mkPCast16x8(mce, vatom); 4221 4222 case Iop_CmpNEZ32x2: 4223 case Iop_Clz32Sx2: 4224 case Iop_Cls32Sx2: 4225 case Iop_FtoI32Ux2_RZ: 4226 case Iop_FtoI32Sx2_RZ: 4227 case Iop_Abs32x2: 4228 return mkPCast32x2(mce, vatom); 4229 4230 case Iop_CmpNEZ32x4: 4231 case Iop_Clz32Sx4: 4232 case Iop_Cls32Sx4: 4233 case Iop_FtoI32Ux4_RZ: 4234 case Iop_FtoI32Sx4_RZ: 4235 case Iop_Abs32x4: 4236 return mkPCast32x4(mce, vatom); 4237 4238 case Iop_CmpwNEZ32: 4239 return mkPCastTo(mce, Ity_I32, vatom); 4240 4241 case Iop_CmpwNEZ64: 4242 return mkPCastTo(mce, Ity_I64, vatom); 4243 4244 case Iop_CmpNEZ64x2: 4245 case Iop_CipherSV128: 4246 case Iop_Clz64x2: 4247 return mkPCast64x2(mce, vatom); 4248 4249 case Iop_PwBitMtxXpose64x2: 4250 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 4251 4252 case Iop_NarrowUn16to8x8: 4253 case Iop_NarrowUn32to16x4: 4254 case Iop_NarrowUn64to32x2: 4255 case Iop_QNarrowUn16Sto8Sx8: 4256 case Iop_QNarrowUn16Sto8Ux8: 4257 case Iop_QNarrowUn16Uto8Ux8: 4258 case Iop_QNarrowUn32Sto16Sx4: 4259 case Iop_QNarrowUn32Sto16Ux4: 4260 case Iop_QNarrowUn32Uto16Ux4: 4261 case Iop_QNarrowUn64Sto32Sx2: 4262 case Iop_QNarrowUn64Sto32Ux2: 4263 case Iop_QNarrowUn64Uto32Ux2: 4264 return vectorNarrowUnV128(mce, op, vatom); 4265 4266 case Iop_Widen8Sto16x8: 4267 case Iop_Widen8Uto16x8: 4268 case Iop_Widen16Sto32x4: 4269 case Iop_Widen16Uto32x4: 4270 case Iop_Widen32Sto64x2: 4271 case Iop_Widen32Uto64x2: 4272 return vectorWidenI64(mce, op, vatom); 4273 4274 case Iop_PwAddL32Ux2: 4275 case Iop_PwAddL32Sx2: 4276 return mkPCastTo(mce, Ity_I64, 4277 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 4278 4279 case Iop_PwAddL16Ux4: 4280 case Iop_PwAddL16Sx4: 4281 return mkPCast32x2(mce, 4282 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 4283 4284 case Iop_PwAddL8Ux8: 4285 case Iop_PwAddL8Sx8: 4286 return mkPCast16x4(mce, 4287 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 4288 4289 case Iop_PwAddL32Ux4: 4290 case Iop_PwAddL32Sx4: 4291 return mkPCast64x2(mce, 4292 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 4293 4294 case Iop_PwAddL16Ux8: 4295 case Iop_PwAddL16Sx8: 4296 return mkPCast32x4(mce, 4297 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 4298 4299 case Iop_PwAddL8Ux16: 4300 case Iop_PwAddL8Sx16: 4301 return mkPCast16x8(mce, 4302 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 4303 4304 // TODO: is this correct? 4305 case Iop_AddLV8Ux16: 4306 case Iop_AddLV8Sx16: 4307 return assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))); 4308 4309 case Iop_AddLV16Ux8: 4310 case Iop_AddLV16Sx8: 4311 return assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))); 4312 4313 case Iop_AddLV32Ux4: 4314 case Iop_AddLV32Sx4: 4315 return assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))); 4316 4317 case Iop_I64UtoF32: 4318 default: 4319 ppIROp(op); 4320 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 4321 } 4322 } 4323 4324 4325 /* Worker function -- do not call directly. See comments on 4326 expr2vbits_Load for the meaning of |guard|. 4327 4328 Generates IR to (1) perform a definedness test of |addr|, (2) 4329 perform a validity test of |addr|, and (3) return the Vbits for the 4330 location indicated by |addr|. All of this only happens when 4331 |guard| is NULL or |guard| evaluates to True at run time. 4332 4333 If |guard| evaluates to False at run time, the returned value is 4334 the IR-mandated 0x55..55 value, and no checks nor shadow loads are 4335 performed. 4336 4337 The definedness of |guard| itself is not checked. That is assumed 4338 to have been done before this point, by the caller. */ 4339 static 4340 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 4341 IREndness end, IRType ty, 4342 IRAtom* addr, UInt bias, IRAtom* guard ) 4343 { 4344 tl_assert(isOriginalAtom(mce,addr)); 4345 tl_assert(end == Iend_LE || end == Iend_BE); 4346 4347 /* First, emit a definedness test for the address. This also sets 4348 the address (shadow) to 'defined' following the test. */ 4349 complainIfUndefined( mce, addr, guard ); 4350 4351 /* Now cook up a call to the relevant helper function, to read the 4352 data V bits from shadow memory. */ 4353 ty = shadowTypeV(ty); 4354 4355 void* helper = NULL; 4356 const HChar* hname = NULL; 4357 Bool ret_via_outparam = False; 4358 4359 if (end == Iend_LE) { 4360 switch (ty) { 4361 case Ity_V256: helper = &MC_(helperc_LOADV256le); 4362 hname = "MC_(helperc_LOADV256le)"; 4363 ret_via_outparam = True; 4364 break; 4365 case Ity_V128: helper = &MC_(helperc_LOADV128le); 4366 hname = "MC_(helperc_LOADV128le)"; 4367 ret_via_outparam = True; 4368 break; 4369 case Ity_I64: helper = &MC_(helperc_LOADV64le); 4370 hname = "MC_(helperc_LOADV64le)"; 4371 break; 4372 case Ity_I32: helper = &MC_(helperc_LOADV32le); 4373 hname = "MC_(helperc_LOADV32le)"; 4374 break; 4375 case Ity_I16: helper = &MC_(helperc_LOADV16le); 4376 hname = "MC_(helperc_LOADV16le)"; 4377 break; 4378 case Ity_I8: helper = &MC_(helperc_LOADV8); 4379 hname = "MC_(helperc_LOADV8)"; 4380 break; 4381 default: ppIRType(ty); 4382 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)"); 4383 } 4384 } else { 4385 switch (ty) { 4386 case Ity_V256: helper = &MC_(helperc_LOADV256be); 4387 hname = "MC_(helperc_LOADV256be)"; 4388 ret_via_outparam = True; 4389 break; 4390 case Ity_V128: helper = &MC_(helperc_LOADV128be); 4391 hname = "MC_(helperc_LOADV128be)"; 4392 ret_via_outparam = True; 4393 break; 4394 case Ity_I64: helper = &MC_(helperc_LOADV64be); 4395 hname = "MC_(helperc_LOADV64be)"; 4396 break; 4397 case Ity_I32: helper = &MC_(helperc_LOADV32be); 4398 hname = "MC_(helperc_LOADV32be)"; 4399 break; 4400 case Ity_I16: helper = &MC_(helperc_LOADV16be); 4401 hname = "MC_(helperc_LOADV16be)"; 4402 break; 4403 case Ity_I8: helper = &MC_(helperc_LOADV8); 4404 hname = "MC_(helperc_LOADV8)"; 4405 break; 4406 default: ppIRType(ty); 4407 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)"); 4408 } 4409 } 4410 4411 tl_assert(helper); 4412 tl_assert(hname); 4413 4414 /* Generate the actual address into addrAct. */ 4415 IRAtom* addrAct; 4416 if (bias == 0) { 4417 addrAct = addr; 4418 } else { 4419 IROp mkAdd; 4420 IRAtom* eBias; 4421 IRType tyAddr = mce->hWordTy; 4422 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 4423 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 4424 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 4425 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 4426 } 4427 4428 /* We need to have a place to park the V bits we're just about to 4429 read. */ 4430 IRTemp datavbits = newTemp(mce, ty, VSh); 4431 4432 /* Here's the call. */ 4433 IRDirty* di; 4434 if (ret_via_outparam) { 4435 di = unsafeIRDirty_1_N( datavbits, 4436 2/*regparms*/, 4437 hname, VG_(fnptr_to_fnentry)( helper ), 4438 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) ); 4439 } else { 4440 di = unsafeIRDirty_1_N( datavbits, 4441 1/*regparms*/, 4442 hname, VG_(fnptr_to_fnentry)( helper ), 4443 mkIRExprVec_1( addrAct ) ); 4444 } 4445 4446 setHelperAnns( mce, di ); 4447 if (guard) { 4448 di->guard = guard; 4449 /* Ideally the didn't-happen return value here would be all-ones 4450 (all-undefined), so it'd be obvious if it got used 4451 inadvertantly. We can get by with the IR-mandated default 4452 value (0b01 repeating, 0x55 etc) as that'll still look pretty 4453 undefined if it ever leaks out. */ 4454 } 4455 stmt( 'V', mce, IRStmt_Dirty(di) ); 4456 4457 return mkexpr(datavbits); 4458 } 4459 4460 4461 /* Generate IR to do a shadow load. The helper is expected to check 4462 the validity of the address and return the V bits for that address. 4463 This can optionally be controlled by a guard, which is assumed to 4464 be True if NULL. In the case where the guard is False at runtime, 4465 the helper will return the didn't-do-the-call value of 0x55..55. 4466 Since that means "completely undefined result", the caller of 4467 this function will need to fix up the result somehow in that 4468 case. 4469 4470 Caller of this function is also expected to have checked the 4471 definedness of |guard| before this point. 4472 */ 4473 static 4474 IRAtom* expr2vbits_Load ( MCEnv* mce, 4475 IREndness end, IRType ty, 4476 IRAtom* addr, UInt bias, 4477 IRAtom* guard ) 4478 { 4479 tl_assert(end == Iend_LE || end == Iend_BE); 4480 switch (shadowTypeV(ty)) { 4481 case Ity_I8: 4482 case Ity_I16: 4483 case Ity_I32: 4484 case Ity_I64: 4485 case Ity_V128: 4486 case Ity_V256: 4487 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard); 4488 default: 4489 VG_(tool_panic)("expr2vbits_Load"); 4490 } 4491 } 4492 4493 4494 /* The most general handler for guarded loads. Assumes the 4495 definedness of GUARD has already been checked by the caller. A 4496 GUARD of NULL is assumed to mean "always True". Generates code to 4497 check the definedness and validity of ADDR. 4498 4499 Generate IR to do a shadow load from ADDR and return the V bits. 4500 The loaded type is TY. The loaded data is then (shadow) widened by 4501 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD 4502 evaluates to False at run time then the returned Vbits are simply 4503 VALT instead. Note therefore that the argument type of VWIDEN must 4504 be TY and the result type of VWIDEN must equal the type of VALT. 4505 */ 4506 static 4507 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce, 4508 IREndness end, IRType ty, 4509 IRAtom* addr, UInt bias, 4510 IRAtom* guard, 4511 IROp vwiden, IRAtom* valt ) 4512 { 4513 /* Sanity check the conversion operation, and also set TYWIDE. */ 4514 IRType tyWide = Ity_INVALID; 4515 switch (vwiden) { 4516 case Iop_INVALID: 4517 tyWide = ty; 4518 break; 4519 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32: 4520 tyWide = Ity_I32; 4521 break; 4522 default: 4523 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General"); 4524 } 4525 4526 /* If the guard evaluates to True, this will hold the loaded V bits 4527 at TY. If the guard evaluates to False, this will be all 4528 ones, meaning "all undefined", in which case we will have to 4529 replace it using an ITE below. */ 4530 IRAtom* iftrue1 4531 = assignNew('V', mce, ty, 4532 expr2vbits_Load(mce, end, ty, addr, bias, guard)); 4533 /* Now (shadow-) widen the loaded V bits to the desired width. In 4534 the guard-is-False case, the allowable widening operators will 4535 in the worst case (unsigned widening) at least leave the 4536 pre-widened part as being marked all-undefined, and in the best 4537 case (signed widening) mark the whole widened result as 4538 undefined. Anyway, it doesn't matter really, since in this case 4539 we will replace said value with the default value |valt| using an 4540 ITE. */ 4541 IRAtom* iftrue2 4542 = vwiden == Iop_INVALID 4543 ? iftrue1 4544 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1)); 4545 /* These are the V bits we will return if the load doesn't take 4546 place. */ 4547 IRAtom* iffalse 4548 = valt; 4549 /* Prepare the cond for the ITE. Convert a NULL cond into 4550 something that iropt knows how to fold out later. */ 4551 IRAtom* cond 4552 = guard == NULL ? mkU1(1) : guard; 4553 /* And assemble the final result. */ 4554 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse)); 4555 } 4556 4557 4558 /* A simpler handler for guarded loads, in which there is no 4559 conversion operation, and the default V bit return (when the guard 4560 evaluates to False at runtime) is "all defined". If there is no 4561 guard expression or the guard is always TRUE this function behaves 4562 like expr2vbits_Load. It is assumed that definedness of GUARD has 4563 already been checked at the call site. */ 4564 static 4565 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce, 4566 IREndness end, IRType ty, 4567 IRAtom* addr, UInt bias, 4568 IRAtom *guard ) 4569 { 4570 return expr2vbits_Load_guarded_General( 4571 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty) 4572 ); 4573 } 4574 4575 4576 static 4577 IRAtom* expr2vbits_ITE ( MCEnv* mce, 4578 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse ) 4579 { 4580 IRAtom *vbitsC, *vbits0, *vbits1; 4581 IRType ty; 4582 /* Given ITE(cond, iftrue, iffalse), generate 4583 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#) 4584 That is, steer the V bits like the originals, but trash the 4585 result if the steering value is undefined. This gives 4586 lazy propagation. */ 4587 tl_assert(isOriginalAtom(mce, cond)); 4588 tl_assert(isOriginalAtom(mce, iftrue)); 4589 tl_assert(isOriginalAtom(mce, iffalse)); 4590 4591 vbitsC = expr2vbits(mce, cond); 4592 vbits1 = expr2vbits(mce, iftrue); 4593 vbits0 = expr2vbits(mce, iffalse); 4594 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 4595 4596 return 4597 mkUifU(mce, ty, assignNew('V', mce, ty, 4598 IRExpr_ITE(cond, vbits1, vbits0)), 4599 mkPCastTo(mce, ty, vbitsC) ); 4600 } 4601 4602 /* --------- This is the main expression-handling function. --------- */ 4603 4604 static 4605 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 4606 { 4607 switch (e->tag) { 4608 4609 case Iex_Get: 4610 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 4611 4612 case Iex_GetI: 4613 return shadow_GETI( mce, e->Iex.GetI.descr, 4614 e->Iex.GetI.ix, e->Iex.GetI.bias ); 4615 4616 case Iex_RdTmp: 4617 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 4618 4619 case Iex_Const: 4620 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 4621 4622 case Iex_Qop: 4623 return expr2vbits_Qop( 4624 mce, 4625 e->Iex.Qop.details->op, 4626 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2, 4627 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4 4628 ); 4629 4630 case Iex_Triop: 4631 return expr2vbits_Triop( 4632 mce, 4633 e->Iex.Triop.details->op, 4634 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2, 4635 e->Iex.Triop.details->arg3 4636 ); 4637 4638 case Iex_Binop: 4639 return expr2vbits_Binop( 4640 mce, 4641 e->Iex.Binop.op, 4642 e->Iex.Binop.arg1, e->Iex.Binop.arg2 4643 ); 4644 4645 case Iex_Unop: 4646 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 4647 4648 case Iex_Load: 4649 return expr2vbits_Load( mce, e->Iex.Load.end, 4650 e->Iex.Load.ty, 4651 e->Iex.Load.addr, 0/*addr bias*/, 4652 NULL/* guard == "always True"*/ ); 4653 4654 case Iex_CCall: 4655 return mkLazyN( mce, e->Iex.CCall.args, 4656 e->Iex.CCall.retty, 4657 e->Iex.CCall.cee ); 4658 4659 case Iex_ITE: 4660 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue, 4661 e->Iex.ITE.iffalse); 4662 4663 default: 4664 VG_(printf)("\n"); 4665 ppIRExpr(e); 4666 VG_(printf)("\n"); 4667 VG_(tool_panic)("memcheck: expr2vbits"); 4668 } 4669 } 4670 4671 /*------------------------------------------------------------*/ 4672 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 4673 /*------------------------------------------------------------*/ 4674 4675 /* Widen a value to the host word size. */ 4676 4677 static 4678 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 4679 { 4680 IRType ty, tyH; 4681 4682 /* vatom is vbits-value and as such can only have a shadow type. */ 4683 tl_assert(isShadowAtom(mce,vatom)); 4684 4685 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 4686 tyH = mce->hWordTy; 4687 4688 if (tyH == Ity_I32) { 4689 switch (ty) { 4690 case Ity_I32: 4691 return vatom; 4692 case Ity_I16: 4693 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 4694 case Ity_I8: 4695 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 4696 default: 4697 goto unhandled; 4698 } 4699 } else 4700 if (tyH == Ity_I64) { 4701 switch (ty) { 4702 case Ity_I32: 4703 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 4704 case Ity_I16: 4705 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4706 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 4707 case Ity_I8: 4708 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4709 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 4710 default: 4711 goto unhandled; 4712 } 4713 } else { 4714 goto unhandled; 4715 } 4716 unhandled: 4717 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 4718 VG_(tool_panic)("zwidenToHostWord"); 4719 } 4720 4721 4722 /* Generate a shadow store. |addr| is always the original address 4723 atom. You can pass in either originals or V-bits for the data 4724 atom, but obviously not both. This function generates a check for 4725 the definedness and (indirectly) the validity of |addr|, but only 4726 when |guard| evaluates to True at run time (or is NULL). 4727 4728 |guard| :: Ity_I1 controls whether the store really happens; NULL 4729 means it unconditionally does. Note that |guard| itself is not 4730 checked for definedness; the caller of this function must do that 4731 if necessary. 4732 */ 4733 static 4734 void do_shadow_Store ( MCEnv* mce, 4735 IREndness end, 4736 IRAtom* addr, UInt bias, 4737 IRAtom* data, IRAtom* vdata, 4738 IRAtom* guard ) 4739 { 4740 IROp mkAdd; 4741 IRType ty, tyAddr; 4742 void* helper = NULL; 4743 const HChar* hname = NULL; 4744 IRConst* c; 4745 4746 tyAddr = mce->hWordTy; 4747 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 4748 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 4749 tl_assert( end == Iend_LE || end == Iend_BE ); 4750 4751 if (data) { 4752 tl_assert(!vdata); 4753 tl_assert(isOriginalAtom(mce, data)); 4754 tl_assert(bias == 0); 4755 vdata = expr2vbits( mce, data ); 4756 } else { 4757 tl_assert(vdata); 4758 } 4759 4760 tl_assert(isOriginalAtom(mce,addr)); 4761 tl_assert(isShadowAtom(mce,vdata)); 4762 4763 if (guard) { 4764 tl_assert(isOriginalAtom(mce, guard)); 4765 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 4766 } 4767 4768 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 4769 4770 // If we're not doing undefined value checking, pretend that this value 4771 // is "all valid". That lets Vex's optimiser remove some of the V bit 4772 // shadow computation ops that precede it. 4773 if (MC_(clo_mc_level) == 1) { 4774 switch (ty) { 4775 case Ity_V256: // V256 weirdness -- used four times 4776 c = IRConst_V256(V_BITS32_DEFINED); break; 4777 case Ity_V128: // V128 weirdness -- used twice 4778 c = IRConst_V128(V_BITS16_DEFINED); break; 4779 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 4780 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 4781 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 4782 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 4783 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4784 } 4785 vdata = IRExpr_Const( c ); 4786 } 4787 4788 /* First, emit a definedness test for the address. This also sets 4789 the address (shadow) to 'defined' following the test. Both of 4790 those actions are gated on |guard|. */ 4791 complainIfUndefined( mce, addr, guard ); 4792 4793 /* Now decide which helper function to call to write the data V 4794 bits into shadow memory. */ 4795 if (end == Iend_LE) { 4796 switch (ty) { 4797 case Ity_V256: /* we'll use the helper four times */ 4798 case Ity_V128: /* we'll use the helper twice */ 4799 case Ity_I64: helper = &MC_(helperc_STOREV64le); 4800 hname = "MC_(helperc_STOREV64le)"; 4801 break; 4802 case Ity_I32: helper = &MC_(helperc_STOREV32le); 4803 hname = "MC_(helperc_STOREV32le)"; 4804 break; 4805 case Ity_I16: helper = &MC_(helperc_STOREV16le); 4806 hname = "MC_(helperc_STOREV16le)"; 4807 break; 4808 case Ity_I8: helper = &MC_(helperc_STOREV8); 4809 hname = "MC_(helperc_STOREV8)"; 4810 break; 4811 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 4812 } 4813 } else { 4814 switch (ty) { 4815 case Ity_V128: /* we'll use the helper twice */ 4816 case Ity_I64: helper = &MC_(helperc_STOREV64be); 4817 hname = "MC_(helperc_STOREV64be)"; 4818 break; 4819 case Ity_I32: helper = &MC_(helperc_STOREV32be); 4820 hname = "MC_(helperc_STOREV32be)"; 4821 break; 4822 case Ity_I16: helper = &MC_(helperc_STOREV16be); 4823 hname = "MC_(helperc_STOREV16be)"; 4824 break; 4825 case Ity_I8: helper = &MC_(helperc_STOREV8); 4826 hname = "MC_(helperc_STOREV8)"; 4827 break; 4828 /* Note, no V256 case here, because no big-endian target that 4829 we support, has 256 vectors. */ 4830 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 4831 } 4832 } 4833 4834 if (UNLIKELY(ty == Ity_V256)) { 4835 4836 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with 4837 Q3 being the most significant lane. */ 4838 /* These are the offsets of the Qs in memory. */ 4839 Int offQ0, offQ1, offQ2, offQ3; 4840 4841 /* Various bits for constructing the 4 lane helper calls */ 4842 IRDirty *diQ0, *diQ1, *diQ2, *diQ3; 4843 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3; 4844 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3; 4845 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3; 4846 4847 if (end == Iend_LE) { 4848 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24; 4849 } else { 4850 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24; 4851 } 4852 4853 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0); 4854 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) ); 4855 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata)); 4856 diQ0 = unsafeIRDirty_0_N( 4857 1/*regparms*/, 4858 hname, VG_(fnptr_to_fnentry)( helper ), 4859 mkIRExprVec_2( addrQ0, vdataQ0 ) 4860 ); 4861 4862 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1); 4863 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) ); 4864 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata)); 4865 diQ1 = unsafeIRDirty_0_N( 4866 1/*regparms*/, 4867 hname, VG_(fnptr_to_fnentry)( helper ), 4868 mkIRExprVec_2( addrQ1, vdataQ1 ) 4869 ); 4870 4871 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2); 4872 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) ); 4873 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata)); 4874 diQ2 = unsafeIRDirty_0_N( 4875 1/*regparms*/, 4876 hname, VG_(fnptr_to_fnentry)( helper ), 4877 mkIRExprVec_2( addrQ2, vdataQ2 ) 4878 ); 4879 4880 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3); 4881 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) ); 4882 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata)); 4883 diQ3 = unsafeIRDirty_0_N( 4884 1/*regparms*/, 4885 hname, VG_(fnptr_to_fnentry)( helper ), 4886 mkIRExprVec_2( addrQ3, vdataQ3 ) 4887 ); 4888 4889 if (guard) 4890 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard; 4891 4892 setHelperAnns( mce, diQ0 ); 4893 setHelperAnns( mce, diQ1 ); 4894 setHelperAnns( mce, diQ2 ); 4895 setHelperAnns( mce, diQ3 ); 4896 stmt( 'V', mce, IRStmt_Dirty(diQ0) ); 4897 stmt( 'V', mce, IRStmt_Dirty(diQ1) ); 4898 stmt( 'V', mce, IRStmt_Dirty(diQ2) ); 4899 stmt( 'V', mce, IRStmt_Dirty(diQ3) ); 4900 4901 } 4902 else if (UNLIKELY(ty == Ity_V128)) { 4903 4904 /* V128-bit case */ 4905 /* See comment in next clause re 64-bit regparms */ 4906 /* also, need to be careful about endianness */ 4907 4908 Int offLo64, offHi64; 4909 IRDirty *diLo64, *diHi64; 4910 IRAtom *addrLo64, *addrHi64; 4911 IRAtom *vdataLo64, *vdataHi64; 4912 IRAtom *eBiasLo64, *eBiasHi64; 4913 4914 if (end == Iend_LE) { 4915 offLo64 = 0; 4916 offHi64 = 8; 4917 } else { 4918 offLo64 = 8; 4919 offHi64 = 0; 4920 } 4921 4922 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 4923 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 4924 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 4925 diLo64 = unsafeIRDirty_0_N( 4926 1/*regparms*/, 4927 hname, VG_(fnptr_to_fnentry)( helper ), 4928 mkIRExprVec_2( addrLo64, vdataLo64 ) 4929 ); 4930 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 4931 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 4932 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 4933 diHi64 = unsafeIRDirty_0_N( 4934 1/*regparms*/, 4935 hname, VG_(fnptr_to_fnentry)( helper ), 4936 mkIRExprVec_2( addrHi64, vdataHi64 ) 4937 ); 4938 if (guard) diLo64->guard = guard; 4939 if (guard) diHi64->guard = guard; 4940 setHelperAnns( mce, diLo64 ); 4941 setHelperAnns( mce, diHi64 ); 4942 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 4943 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 4944 4945 } else { 4946 4947 IRDirty *di; 4948 IRAtom *addrAct; 4949 4950 /* 8/16/32/64-bit cases */ 4951 /* Generate the actual address into addrAct. */ 4952 if (bias == 0) { 4953 addrAct = addr; 4954 } else { 4955 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 4956 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 4957 } 4958 4959 if (ty == Ity_I64) { 4960 /* We can't do this with regparm 2 on 32-bit platforms, since 4961 the back ends aren't clever enough to handle 64-bit 4962 regparm args. Therefore be different. */ 4963 di = unsafeIRDirty_0_N( 4964 1/*regparms*/, 4965 hname, VG_(fnptr_to_fnentry)( helper ), 4966 mkIRExprVec_2( addrAct, vdata ) 4967 ); 4968 } else { 4969 di = unsafeIRDirty_0_N( 4970 2/*regparms*/, 4971 hname, VG_(fnptr_to_fnentry)( helper ), 4972 mkIRExprVec_2( addrAct, 4973 zwidenToHostWord( mce, vdata )) 4974 ); 4975 } 4976 if (guard) di->guard = guard; 4977 setHelperAnns( mce, di ); 4978 stmt( 'V', mce, IRStmt_Dirty(di) ); 4979 } 4980 4981 } 4982 4983 4984 /* Do lazy pessimistic propagation through a dirty helper call, by 4985 looking at the annotations on it. This is the most complex part of 4986 Memcheck. */ 4987 4988 static IRType szToITy ( Int n ) 4989 { 4990 switch (n) { 4991 case 1: return Ity_I8; 4992 case 2: return Ity_I16; 4993 case 4: return Ity_I32; 4994 case 8: return Ity_I64; 4995 default: VG_(tool_panic)("szToITy(memcheck)"); 4996 } 4997 } 4998 4999 static 5000 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 5001 { 5002 Int i, k, n, toDo, gSz, gOff; 5003 IRAtom *src, *here, *curr; 5004 IRType tySrc, tyDst; 5005 IRTemp dst; 5006 IREndness end; 5007 5008 /* What's the native endianness? We need to know this. */ 5009 # if defined(VG_BIGENDIAN) 5010 end = Iend_BE; 5011 # elif defined(VG_LITTLEENDIAN) 5012 end = Iend_LE; 5013 # else 5014 # error "Unknown endianness" 5015 # endif 5016 5017 /* First check the guard. */ 5018 complainIfUndefined(mce, d->guard, NULL); 5019 5020 /* Now round up all inputs and PCast over them. */ 5021 curr = definedOfType(Ity_I32); 5022 5023 /* Inputs: unmasked args 5024 Note: arguments are evaluated REGARDLESS of the guard expression */ 5025 for (i = 0; d->args[i]; i++) { 5026 IRAtom* arg = d->args[i]; 5027 if ( (d->cee->mcx_mask & (1<<i)) 5028 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) { 5029 /* ignore this arg */ 5030 } else { 5031 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) ); 5032 curr = mkUifU32(mce, here, curr); 5033 } 5034 } 5035 5036 /* Inputs: guest state that we read. */ 5037 for (i = 0; i < d->nFxState; i++) { 5038 tl_assert(d->fxState[i].fx != Ifx_None); 5039 if (d->fxState[i].fx == Ifx_Write) 5040 continue; 5041 5042 /* Enumerate the described state segments */ 5043 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 5044 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 5045 gSz = d->fxState[i].size; 5046 5047 /* Ignore any sections marked as 'always defined'. */ 5048 if (isAlwaysDefd(mce, gOff, gSz)) { 5049 if (0) 5050 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 5051 gOff, gSz); 5052 continue; 5053 } 5054 5055 /* This state element is read or modified. So we need to 5056 consider it. If larger than 8 bytes, deal with it in 5057 8-byte chunks. */ 5058 while (True) { 5059 tl_assert(gSz >= 0); 5060 if (gSz == 0) break; 5061 n = gSz <= 8 ? gSz : 8; 5062 /* update 'curr' with UifU of the state slice 5063 gOff .. gOff+n-1 */ 5064 tySrc = szToITy( n ); 5065 5066 /* Observe the guard expression. If it is false use an 5067 all-bits-defined bit pattern */ 5068 IRAtom *cond, *iffalse, *iftrue; 5069 5070 cond = assignNew('V', mce, Ity_I1, d->guard); 5071 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc)); 5072 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc)); 5073 src = assignNew('V', mce, tySrc, 5074 IRExpr_ITE(cond, iftrue, iffalse)); 5075 5076 here = mkPCastTo( mce, Ity_I32, src ); 5077 curr = mkUifU32(mce, here, curr); 5078 gSz -= n; 5079 gOff += n; 5080 } 5081 } 5082 } 5083 5084 /* Inputs: memory. First set up some info needed regardless of 5085 whether we're doing reads or writes. */ 5086 5087 if (d->mFx != Ifx_None) { 5088 /* Because we may do multiple shadow loads/stores from the same 5089 base address, it's best to do a single test of its 5090 definedness right now. Post-instrumentation optimisation 5091 should remove all but this test. */ 5092 IRType tyAddr; 5093 tl_assert(d->mAddr); 5094 complainIfUndefined(mce, d->mAddr, d->guard); 5095 5096 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 5097 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 5098 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 5099 } 5100 5101 /* Deal with memory inputs (reads or modifies) */ 5102 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 5103 toDo = d->mSize; 5104 /* chew off 32-bit chunks. We don't care about the endianness 5105 since it's all going to be condensed down to a single bit, 5106 but nevertheless choose an endianness which is hopefully 5107 native to the platform. */ 5108 while (toDo >= 4) { 5109 here = mkPCastTo( 5110 mce, Ity_I32, 5111 expr2vbits_Load_guarded_Simple( 5112 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard ) 5113 ); 5114 curr = mkUifU32(mce, here, curr); 5115 toDo -= 4; 5116 } 5117 /* chew off 16-bit chunks */ 5118 while (toDo >= 2) { 5119 here = mkPCastTo( 5120 mce, Ity_I32, 5121 expr2vbits_Load_guarded_Simple( 5122 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard ) 5123 ); 5124 curr = mkUifU32(mce, here, curr); 5125 toDo -= 2; 5126 } 5127 /* chew off the remaining 8-bit chunk, if any */ 5128 if (toDo == 1) { 5129 here = mkPCastTo( 5130 mce, Ity_I32, 5131 expr2vbits_Load_guarded_Simple( 5132 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard ) 5133 ); 5134 curr = mkUifU32(mce, here, curr); 5135 toDo -= 1; 5136 } 5137 tl_assert(toDo == 0); 5138 } 5139 5140 /* Whew! So curr is a 32-bit V-value summarising pessimistically 5141 all the inputs to the helper. Now we need to re-distribute the 5142 results to all destinations. */ 5143 5144 /* Outputs: the destination temporary, if there is one. */ 5145 if (d->tmp != IRTemp_INVALID) { 5146 dst = findShadowTmpV(mce, d->tmp); 5147 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 5148 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 5149 } 5150 5151 /* Outputs: guest state that we write or modify. */ 5152 for (i = 0; i < d->nFxState; i++) { 5153 tl_assert(d->fxState[i].fx != Ifx_None); 5154 if (d->fxState[i].fx == Ifx_Read) 5155 continue; 5156 5157 /* Enumerate the described state segments */ 5158 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 5159 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 5160 gSz = d->fxState[i].size; 5161 5162 /* Ignore any sections marked as 'always defined'. */ 5163 if (isAlwaysDefd(mce, gOff, gSz)) 5164 continue; 5165 5166 /* This state element is written or modified. So we need to 5167 consider it. If larger than 8 bytes, deal with it in 5168 8-byte chunks. */ 5169 while (True) { 5170 tl_assert(gSz >= 0); 5171 if (gSz == 0) break; 5172 n = gSz <= 8 ? gSz : 8; 5173 /* Write suitably-casted 'curr' to the state slice 5174 gOff .. gOff+n-1 */ 5175 tyDst = szToITy( n ); 5176 do_shadow_PUT( mce, gOff, 5177 NULL, /* original atom */ 5178 mkPCastTo( mce, tyDst, curr ), d->guard ); 5179 gSz -= n; 5180 gOff += n; 5181 } 5182 } 5183 } 5184 5185 /* Outputs: memory that we write or modify. Same comments about 5186 endianness as above apply. */ 5187 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 5188 toDo = d->mSize; 5189 /* chew off 32-bit chunks */ 5190 while (toDo >= 4) { 5191 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5192 NULL, /* original data */ 5193 mkPCastTo( mce, Ity_I32, curr ), 5194 d->guard ); 5195 toDo -= 4; 5196 } 5197 /* chew off 16-bit chunks */ 5198 while (toDo >= 2) { 5199 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5200 NULL, /* original data */ 5201 mkPCastTo( mce, Ity_I16, curr ), 5202 d->guard ); 5203 toDo -= 2; 5204 } 5205 /* chew off the remaining 8-bit chunk, if any */ 5206 if (toDo == 1) { 5207 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5208 NULL, /* original data */ 5209 mkPCastTo( mce, Ity_I8, curr ), 5210 d->guard ); 5211 toDo -= 1; 5212 } 5213 tl_assert(toDo == 0); 5214 } 5215 5216 } 5217 5218 5219 /* We have an ABI hint telling us that [base .. base+len-1] is to 5220 become undefined ("writable"). Generate code to call a helper to 5221 notify the A/V bit machinery of this fact. 5222 5223 We call 5224 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 5225 Addr nia ); 5226 */ 5227 static 5228 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 5229 { 5230 IRDirty* di; 5231 /* Minor optimisation: if not doing origin tracking, ignore the 5232 supplied nia and pass zero instead. This is on the basis that 5233 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can 5234 almost always generate a shorter instruction to put zero into a 5235 register than any other value. */ 5236 if (MC_(clo_mc_level) < 3) 5237 nia = mkIRExpr_HWord(0); 5238 5239 di = unsafeIRDirty_0_N( 5240 0/*regparms*/, 5241 "MC_(helperc_MAKE_STACK_UNINIT)", 5242 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ), 5243 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 5244 ); 5245 stmt( 'V', mce, IRStmt_Dirty(di) ); 5246 } 5247 5248 5249 /* ------ Dealing with IRCAS (big and complex) ------ */ 5250 5251 /* FWDS */ 5252 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5253 IRAtom* baseaddr, Int offset ); 5254 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 5255 static void gen_store_b ( MCEnv* mce, Int szB, 5256 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5257 IRAtom* guard ); 5258 5259 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 5260 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 5261 5262 5263 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 5264 IRExpr.Consts, else this asserts. If they are both Consts, it 5265 doesn't do anything. So that just leaves the RdTmp case. 5266 5267 In which case: this assigns the shadow value SHADOW to the IR 5268 shadow temporary associated with ORIG. That is, ORIG, being an 5269 original temporary, will have a shadow temporary associated with 5270 it. However, in the case envisaged here, there will so far have 5271 been no IR emitted to actually write a shadow value into that 5272 temporary. What this routine does is to (emit IR to) copy the 5273 value in SHADOW into said temporary, so that after this call, 5274 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 5275 value in SHADOW. 5276 5277 Point is to allow callers to compute "by hand" a shadow value for 5278 ORIG, and force it to be associated with ORIG. 5279 5280 How do we know that that shadow associated with ORIG has not so far 5281 been assigned to? Well, we don't per se know that, but supposing 5282 it had. Then this routine would create a second assignment to it, 5283 and later the IR sanity checker would barf. But that never 5284 happens. QED. 5285 */ 5286 static void bind_shadow_tmp_to_orig ( UChar how, 5287 MCEnv* mce, 5288 IRAtom* orig, IRAtom* shadow ) 5289 { 5290 tl_assert(isOriginalAtom(mce, orig)); 5291 tl_assert(isShadowAtom(mce, shadow)); 5292 switch (orig->tag) { 5293 case Iex_Const: 5294 tl_assert(shadow->tag == Iex_Const); 5295 break; 5296 case Iex_RdTmp: 5297 tl_assert(shadow->tag == Iex_RdTmp); 5298 if (how == 'V') { 5299 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 5300 shadow); 5301 } else { 5302 tl_assert(how == 'B'); 5303 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 5304 shadow); 5305 } 5306 break; 5307 default: 5308 tl_assert(0); 5309 } 5310 } 5311 5312 5313 static 5314 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 5315 { 5316 /* Scheme is (both single- and double- cases): 5317 5318 1. fetch data#,dataB (the proposed new value) 5319 5320 2. fetch expd#,expdB (what we expect to see at the address) 5321 5322 3. check definedness of address 5323 5324 4. load old#,oldB from shadow memory; this also checks 5325 addressibility of the address 5326 5327 5. the CAS itself 5328 5329 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 5330 5331 7. if "expected == old" (as computed by (6)) 5332 store data#,dataB to shadow memory 5333 5334 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 5335 'data' but 7 stores 'data#'. Hence it is possible for the 5336 shadow data to be incorrectly checked and/or updated: 5337 5338 * 7 is at least gated correctly, since the 'expected == old' 5339 condition is derived from outputs of 5. However, the shadow 5340 write could happen too late: imagine after 5 we are 5341 descheduled, a different thread runs, writes a different 5342 (shadow) value at the address, and then we resume, hence 5343 overwriting the shadow value written by the other thread. 5344 5345 Because the original memory access is atomic, there's no way to 5346 make both the original and shadow accesses into a single atomic 5347 thing, hence this is unavoidable. 5348 5349 At least as Valgrind stands, I don't think it's a problem, since 5350 we're single threaded *and* we guarantee that there are no 5351 context switches during the execution of any specific superblock 5352 -- context switches can only happen at superblock boundaries. 5353 5354 If Valgrind ever becomes MT in the future, then it might be more 5355 of a problem. A possible kludge would be to artificially 5356 associate with the location, a lock, which we must acquire and 5357 release around the transaction as a whole. Hmm, that probably 5358 would't work properly since it only guards us against other 5359 threads doing CASs on the same location, not against other 5360 threads doing normal reads and writes. 5361 5362 ------------------------------------------------------------ 5363 5364 COMMENT_ON_CasCmpEQ: 5365 5366 Note two things. Firstly, in the sequence above, we compute 5367 "expected == old", but we don't check definedness of it. Why 5368 not? Also, the x86 and amd64 front ends use 5369 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent 5370 determination (expected == old ?) for themselves, and we also 5371 don't check definedness for those primops; we just say that the 5372 result is defined. Why? Details follow. 5373 5374 x86/amd64 contains various forms of locked insns: 5375 * lock prefix before all basic arithmetic insn; 5376 eg lock xorl %reg1,(%reg2) 5377 * atomic exchange reg-mem 5378 * compare-and-swaps 5379 5380 Rather than attempt to represent them all, which would be a 5381 royal PITA, I used a result from Maurice Herlihy 5382 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 5383 demonstrates that compare-and-swap is a primitive more general 5384 than the other two, and so can be used to represent all of them. 5385 So the translation scheme for (eg) lock incl (%reg) is as 5386 follows: 5387 5388 again: 5389 old = * %reg 5390 new = old + 1 5391 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 5392 5393 The "atomically" is the CAS bit. The scheme is always the same: 5394 get old value from memory, compute new value, atomically stuff 5395 new value back in memory iff the old value has not changed (iow, 5396 no other thread modified it in the meantime). If it has changed 5397 then we've been out-raced and we have to start over. 5398 5399 Now that's all very neat, but it has the bad side effect of 5400 introducing an explicit equality test into the translation. 5401 Consider the behaviour of said code on a memory location which 5402 is uninitialised. We will wind up doing a comparison on 5403 uninitialised data, and mc duly complains. 5404 5405 What's difficult about this is, the common case is that the 5406 location is uncontended, and so we're usually comparing the same 5407 value (* %reg) with itself. So we shouldn't complain even if it 5408 is undefined. But mc doesn't know that. 5409 5410 My solution is to mark the == in the IR specially, so as to tell 5411 mc that it almost certainly compares a value with itself, and we 5412 should just regard the result as always defined. Rather than 5413 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 5414 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 5415 5416 So there's always the question of, can this give a false 5417 negative? eg, imagine that initially, * %reg is defined; and we 5418 read that; but then in the gap between the read and the CAS, a 5419 different thread writes an undefined (and different) value at 5420 the location. Then the CAS in this thread will fail and we will 5421 go back to "again:", but without knowing that the trip back 5422 there was based on an undefined comparison. No matter; at least 5423 the other thread won the race and the location is correctly 5424 marked as undefined. What if it wrote an uninitialised version 5425 of the same value that was there originally, though? 5426 5427 etc etc. Seems like there's a small corner case in which we 5428 might lose the fact that something's defined -- we're out-raced 5429 in between the "old = * reg" and the "atomically {", _and_ the 5430 other thread is writing in an undefined version of what's 5431 already there. Well, that seems pretty unlikely. 5432 5433 --- 5434 5435 If we ever need to reinstate it .. code which generates a 5436 definedness test for "expected == old" was removed at r10432 of 5437 this file. 5438 */ 5439 if (cas->oldHi == IRTemp_INVALID) { 5440 do_shadow_CAS_single( mce, cas ); 5441 } else { 5442 do_shadow_CAS_double( mce, cas ); 5443 } 5444 } 5445 5446 5447 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 5448 { 5449 IRAtom *vdataLo = NULL, *bdataLo = NULL; 5450 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 5451 IRAtom *voldLo = NULL, *boldLo = NULL; 5452 IRAtom *expd_eq_old = NULL; 5453 IROp opCasCmpEQ; 5454 Int elemSzB; 5455 IRType elemTy; 5456 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 5457 5458 /* single CAS */ 5459 tl_assert(cas->oldHi == IRTemp_INVALID); 5460 tl_assert(cas->expdHi == NULL); 5461 tl_assert(cas->dataHi == NULL); 5462 5463 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 5464 switch (elemTy) { 5465 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 5466 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 5467 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 5468 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 5469 default: tl_assert(0); /* IR defn disallows any other types */ 5470 } 5471 5472 /* 1. fetch data# (the proposed new value) */ 5473 tl_assert(isOriginalAtom(mce, cas->dataLo)); 5474 vdataLo 5475 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 5476 tl_assert(isShadowAtom(mce, vdataLo)); 5477 if (otrak) { 5478 bdataLo 5479 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 5480 tl_assert(isShadowAtom(mce, bdataLo)); 5481 } 5482 5483 /* 2. fetch expected# (what we expect to see at the address) */ 5484 tl_assert(isOriginalAtom(mce, cas->expdLo)); 5485 vexpdLo 5486 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5487 tl_assert(isShadowAtom(mce, vexpdLo)); 5488 if (otrak) { 5489 bexpdLo 5490 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5491 tl_assert(isShadowAtom(mce, bexpdLo)); 5492 } 5493 5494 /* 3. check definedness of address */ 5495 /* 4. fetch old# from shadow memory; this also checks 5496 addressibility of the address */ 5497 voldLo 5498 = assignNew( 5499 'V', mce, elemTy, 5500 expr2vbits_Load( 5501 mce, 5502 cas->end, elemTy, cas->addr, 0/*Addr bias*/, 5503 NULL/*always happens*/ 5504 )); 5505 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5506 if (otrak) { 5507 boldLo 5508 = assignNew('B', mce, Ity_I32, 5509 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 5510 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5511 } 5512 5513 /* 5. the CAS itself */ 5514 stmt( 'C', mce, IRStmt_CAS(cas) ); 5515 5516 /* 6. compute "expected == old" */ 5517 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5518 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5519 tree, but it's not copied from the input block. */ 5520 expd_eq_old 5521 = assignNew('C', mce, Ity_I1, 5522 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 5523 5524 /* 7. if "expected == old" 5525 store data# to shadow memory */ 5526 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 5527 NULL/*data*/, vdataLo/*vdata*/, 5528 expd_eq_old/*guard for store*/ ); 5529 if (otrak) { 5530 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 5531 bdataLo/*bdata*/, 5532 expd_eq_old/*guard for store*/ ); 5533 } 5534 } 5535 5536 5537 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 5538 { 5539 IRAtom *vdataHi = NULL, *bdataHi = NULL; 5540 IRAtom *vdataLo = NULL, *bdataLo = NULL; 5541 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 5542 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 5543 IRAtom *voldHi = NULL, *boldHi = NULL; 5544 IRAtom *voldLo = NULL, *boldLo = NULL; 5545 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 5546 IRAtom *expd_eq_old = NULL, *zero = NULL; 5547 IROp opCasCmpEQ, opOr, opXor; 5548 Int elemSzB, memOffsLo, memOffsHi; 5549 IRType elemTy; 5550 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 5551 5552 /* double CAS */ 5553 tl_assert(cas->oldHi != IRTemp_INVALID); 5554 tl_assert(cas->expdHi != NULL); 5555 tl_assert(cas->dataHi != NULL); 5556 5557 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 5558 switch (elemTy) { 5559 case Ity_I8: 5560 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 5561 elemSzB = 1; zero = mkU8(0); 5562 break; 5563 case Ity_I16: 5564 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 5565 elemSzB = 2; zero = mkU16(0); 5566 break; 5567 case Ity_I32: 5568 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 5569 elemSzB = 4; zero = mkU32(0); 5570 break; 5571 case Ity_I64: 5572 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 5573 elemSzB = 8; zero = mkU64(0); 5574 break; 5575 default: 5576 tl_assert(0); /* IR defn disallows any other types */ 5577 } 5578 5579 /* 1. fetch data# (the proposed new value) */ 5580 tl_assert(isOriginalAtom(mce, cas->dataHi)); 5581 tl_assert(isOriginalAtom(mce, cas->dataLo)); 5582 vdataHi 5583 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 5584 vdataLo 5585 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 5586 tl_assert(isShadowAtom(mce, vdataHi)); 5587 tl_assert(isShadowAtom(mce, vdataLo)); 5588 if (otrak) { 5589 bdataHi 5590 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 5591 bdataLo 5592 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 5593 tl_assert(isShadowAtom(mce, bdataHi)); 5594 tl_assert(isShadowAtom(mce, bdataLo)); 5595 } 5596 5597 /* 2. fetch expected# (what we expect to see at the address) */ 5598 tl_assert(isOriginalAtom(mce, cas->expdHi)); 5599 tl_assert(isOriginalAtom(mce, cas->expdLo)); 5600 vexpdHi 5601 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 5602 vexpdLo 5603 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5604 tl_assert(isShadowAtom(mce, vexpdHi)); 5605 tl_assert(isShadowAtom(mce, vexpdLo)); 5606 if (otrak) { 5607 bexpdHi 5608 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 5609 bexpdLo 5610 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5611 tl_assert(isShadowAtom(mce, bexpdHi)); 5612 tl_assert(isShadowAtom(mce, bexpdLo)); 5613 } 5614 5615 /* 3. check definedness of address */ 5616 /* 4. fetch old# from shadow memory; this also checks 5617 addressibility of the address */ 5618 if (cas->end == Iend_LE) { 5619 memOffsLo = 0; 5620 memOffsHi = elemSzB; 5621 } else { 5622 tl_assert(cas->end == Iend_BE); 5623 memOffsLo = elemSzB; 5624 memOffsHi = 0; 5625 } 5626 voldHi 5627 = assignNew( 5628 'V', mce, elemTy, 5629 expr2vbits_Load( 5630 mce, 5631 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/, 5632 NULL/*always happens*/ 5633 )); 5634 voldLo 5635 = assignNew( 5636 'V', mce, elemTy, 5637 expr2vbits_Load( 5638 mce, 5639 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/, 5640 NULL/*always happens*/ 5641 )); 5642 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 5643 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5644 if (otrak) { 5645 boldHi 5646 = assignNew('B', mce, Ity_I32, 5647 gen_load_b(mce, elemSzB, cas->addr, 5648 memOffsHi/*addr bias*/)); 5649 boldLo 5650 = assignNew('B', mce, Ity_I32, 5651 gen_load_b(mce, elemSzB, cas->addr, 5652 memOffsLo/*addr bias*/)); 5653 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 5654 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5655 } 5656 5657 /* 5. the CAS itself */ 5658 stmt( 'C', mce, IRStmt_CAS(cas) ); 5659 5660 /* 6. compute "expected == old" */ 5661 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5662 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5663 tree, but it's not copied from the input block. */ 5664 /* 5665 xHi = oldHi ^ expdHi; 5666 xLo = oldLo ^ expdLo; 5667 xHL = xHi | xLo; 5668 expd_eq_old = xHL == 0; 5669 */ 5670 xHi = assignNew('C', mce, elemTy, 5671 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 5672 xLo = assignNew('C', mce, elemTy, 5673 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 5674 xHL = assignNew('C', mce, elemTy, 5675 binop(opOr, xHi, xLo)); 5676 expd_eq_old 5677 = assignNew('C', mce, Ity_I1, 5678 binop(opCasCmpEQ, xHL, zero)); 5679 5680 /* 7. if "expected == old" 5681 store data# to shadow memory */ 5682 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 5683 NULL/*data*/, vdataHi/*vdata*/, 5684 expd_eq_old/*guard for store*/ ); 5685 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 5686 NULL/*data*/, vdataLo/*vdata*/, 5687 expd_eq_old/*guard for store*/ ); 5688 if (otrak) { 5689 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 5690 bdataHi/*bdata*/, 5691 expd_eq_old/*guard for store*/ ); 5692 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 5693 bdataLo/*bdata*/, 5694 expd_eq_old/*guard for store*/ ); 5695 } 5696 } 5697 5698 5699 /* ------ Dealing with LL/SC (not difficult) ------ */ 5700 5701 static void do_shadow_LLSC ( MCEnv* mce, 5702 IREndness stEnd, 5703 IRTemp stResult, 5704 IRExpr* stAddr, 5705 IRExpr* stStoredata ) 5706 { 5707 /* In short: treat a load-linked like a normal load followed by an 5708 assignment of the loaded (shadow) data to the result temporary. 5709 Treat a store-conditional like a normal store, and mark the 5710 result temporary as defined. */ 5711 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 5712 IRTemp resTmp = findShadowTmpV(mce, stResult); 5713 5714 tl_assert(isIRAtom(stAddr)); 5715 if (stStoredata) 5716 tl_assert(isIRAtom(stStoredata)); 5717 5718 if (stStoredata == NULL) { 5719 /* Load Linked */ 5720 /* Just treat this as a normal load, followed by an assignment of 5721 the value to .result. */ 5722 /* Stay sane */ 5723 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 5724 || resTy == Ity_I16 || resTy == Ity_I8); 5725 assign( 'V', mce, resTmp, 5726 expr2vbits_Load( 5727 mce, stEnd, resTy, stAddr, 0/*addr bias*/, 5728 NULL/*always happens*/) ); 5729 } else { 5730 /* Store Conditional */ 5731 /* Stay sane */ 5732 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 5733 stStoredata); 5734 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 5735 || dataTy == Ity_I16 || dataTy == Ity_I8); 5736 do_shadow_Store( mce, stEnd, 5737 stAddr, 0/* addr bias */, 5738 stStoredata, 5739 NULL /* shadow data */, 5740 NULL/*guard*/ ); 5741 /* This is a store conditional, so it writes to .result a value 5742 indicating whether or not the store succeeded. Just claim 5743 this value is always defined. In the PowerPC interpretation 5744 of store-conditional, definedness of the success indication 5745 depends on whether the address of the store matches the 5746 reservation address. But we can't tell that here (and 5747 anyway, we're not being PowerPC-specific). At least we are 5748 guaranteed that the definedness of the store address, and its 5749 addressibility, will be checked as per normal. So it seems 5750 pretty safe to just say that the success indication is always 5751 defined. 5752 5753 In schemeS, for origin tracking, we must correspondingly set 5754 a no-origin value for the origin shadow of .result. 5755 */ 5756 tl_assert(resTy == Ity_I1); 5757 assign( 'V', mce, resTmp, definedOfType(resTy) ); 5758 } 5759 } 5760 5761 5762 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */ 5763 5764 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg ) 5765 { 5766 complainIfUndefined(mce, sg->guard, NULL); 5767 /* do_shadow_Store will generate code to check the definedness and 5768 validity of sg->addr, in the case where sg->guard evaluates to 5769 True at run-time. */ 5770 do_shadow_Store( mce, sg->end, 5771 sg->addr, 0/* addr bias */, 5772 sg->data, 5773 NULL /* shadow data */, 5774 sg->guard ); 5775 } 5776 5777 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg ) 5778 { 5779 complainIfUndefined(mce, lg->guard, NULL); 5780 /* expr2vbits_Load_guarded_General will generate code to check the 5781 definedness and validity of lg->addr, in the case where 5782 lg->guard evaluates to True at run-time. */ 5783 5784 /* Look at the LoadG's built-in conversion operation, to determine 5785 the source (actual loaded data) type, and the equivalent IROp. 5786 NOTE that implicitly we are taking a widening operation to be 5787 applied to original atoms and producing one that applies to V 5788 bits. Since signed and unsigned widening are self-shadowing, 5789 this is a straight copy of the op (modulo swapping from the 5790 IRLoadGOp form to the IROp form). Note also therefore that this 5791 implicitly duplicates the logic to do with said widening ops in 5792 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */ 5793 IROp vwiden = Iop_INVALID; 5794 IRType loadedTy = Ity_INVALID; 5795 switch (lg->cvt) { 5796 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break; 5797 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break; 5798 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break; 5799 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break; 5800 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break; 5801 default: VG_(tool_panic)("do_shadow_LoadG"); 5802 } 5803 5804 IRAtom* vbits_alt 5805 = expr2vbits( mce, lg->alt ); 5806 IRAtom* vbits_final 5807 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy, 5808 lg->addr, 0/*addr bias*/, 5809 lg->guard, vwiden, vbits_alt ); 5810 /* And finally, bind the V bits to the destination temporary. */ 5811 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final ); 5812 } 5813 5814 5815 /*------------------------------------------------------------*/ 5816 /*--- Memcheck main ---*/ 5817 /*------------------------------------------------------------*/ 5818 5819 static void schemeS ( MCEnv* mce, IRStmt* st ); 5820 5821 static Bool isBogusAtom ( IRAtom* at ) 5822 { 5823 ULong n = 0; 5824 IRConst* con; 5825 tl_assert(isIRAtom(at)); 5826 if (at->tag == Iex_RdTmp) 5827 return False; 5828 tl_assert(at->tag == Iex_Const); 5829 con = at->Iex.Const.con; 5830 switch (con->tag) { 5831 case Ico_U1: return False; 5832 case Ico_U8: n = (ULong)con->Ico.U8; break; 5833 case Ico_U16: n = (ULong)con->Ico.U16; break; 5834 case Ico_U32: n = (ULong)con->Ico.U32; break; 5835 case Ico_U64: n = (ULong)con->Ico.U64; break; 5836 case Ico_F64: return False; 5837 case Ico_F32i: return False; 5838 case Ico_F64i: return False; 5839 case Ico_V128: return False; 5840 case Ico_V256: return False; 5841 default: ppIRExpr(at); tl_assert(0); 5842 } 5843 /* VG_(printf)("%llx\n", n); */ 5844 return (/*32*/ n == 0xFEFEFEFFULL 5845 /*32*/ || n == 0x80808080ULL 5846 /*32*/ || n == 0x7F7F7F7FULL 5847 /*32*/ || n == 0x7EFEFEFFULL 5848 /*32*/ || n == 0x81010100ULL 5849 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 5850 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 5851 /*64*/ || n == 0x0000000000008080ULL 5852 /*64*/ || n == 0x8080808080808080ULL 5853 /*64*/ || n == 0x0101010101010101ULL 5854 ); 5855 } 5856 5857 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 5858 { 5859 Int i; 5860 IRExpr* e; 5861 IRDirty* d; 5862 IRCAS* cas; 5863 switch (st->tag) { 5864 case Ist_WrTmp: 5865 e = st->Ist.WrTmp.data; 5866 switch (e->tag) { 5867 case Iex_Get: 5868 case Iex_RdTmp: 5869 return False; 5870 case Iex_Const: 5871 return isBogusAtom(e); 5872 case Iex_Unop: 5873 return isBogusAtom(e->Iex.Unop.arg) 5874 || e->Iex.Unop.op == Iop_GetMSBs8x16; 5875 case Iex_GetI: 5876 return isBogusAtom(e->Iex.GetI.ix); 5877 case Iex_Binop: 5878 return isBogusAtom(e->Iex.Binop.arg1) 5879 || isBogusAtom(e->Iex.Binop.arg2); 5880 case Iex_Triop: 5881 return isBogusAtom(e->Iex.Triop.details->arg1) 5882 || isBogusAtom(e->Iex.Triop.details->arg2) 5883 || isBogusAtom(e->Iex.Triop.details->arg3); 5884 case Iex_Qop: 5885 return isBogusAtom(e->Iex.Qop.details->arg1) 5886 || isBogusAtom(e->Iex.Qop.details->arg2) 5887 || isBogusAtom(e->Iex.Qop.details->arg3) 5888 || isBogusAtom(e->Iex.Qop.details->arg4); 5889 case Iex_ITE: 5890 return isBogusAtom(e->Iex.ITE.cond) 5891 || isBogusAtom(e->Iex.ITE.iftrue) 5892 || isBogusAtom(e->Iex.ITE.iffalse); 5893 case Iex_Load: 5894 return isBogusAtom(e->Iex.Load.addr); 5895 case Iex_CCall: 5896 for (i = 0; e->Iex.CCall.args[i]; i++) 5897 if (isBogusAtom(e->Iex.CCall.args[i])) 5898 return True; 5899 return False; 5900 default: 5901 goto unhandled; 5902 } 5903 case Ist_Dirty: 5904 d = st->Ist.Dirty.details; 5905 for (i = 0; d->args[i]; i++) { 5906 IRAtom* atom = d->args[i]; 5907 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(atom))) { 5908 if (isBogusAtom(atom)) 5909 return True; 5910 } 5911 } 5912 if (isBogusAtom(d->guard)) 5913 return True; 5914 if (d->mAddr && isBogusAtom(d->mAddr)) 5915 return True; 5916 return False; 5917 case Ist_Put: 5918 return isBogusAtom(st->Ist.Put.data); 5919 case Ist_PutI: 5920 return isBogusAtom(st->Ist.PutI.details->ix) 5921 || isBogusAtom(st->Ist.PutI.details->data); 5922 case Ist_Store: 5923 return isBogusAtom(st->Ist.Store.addr) 5924 || isBogusAtom(st->Ist.Store.data); 5925 case Ist_StoreG: { 5926 IRStoreG* sg = st->Ist.StoreG.details; 5927 return isBogusAtom(sg->addr) || isBogusAtom(sg->data) 5928 || isBogusAtom(sg->guard); 5929 } 5930 case Ist_LoadG: { 5931 IRLoadG* lg = st->Ist.LoadG.details; 5932 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt) 5933 || isBogusAtom(lg->guard); 5934 } 5935 case Ist_Exit: 5936 return isBogusAtom(st->Ist.Exit.guard); 5937 case Ist_AbiHint: 5938 return isBogusAtom(st->Ist.AbiHint.base) 5939 || isBogusAtom(st->Ist.AbiHint.nia); 5940 case Ist_NoOp: 5941 case Ist_IMark: 5942 case Ist_MBE: 5943 return False; 5944 case Ist_CAS: 5945 cas = st->Ist.CAS.details; 5946 return isBogusAtom(cas->addr) 5947 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 5948 || isBogusAtom(cas->expdLo) 5949 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 5950 || isBogusAtom(cas->dataLo); 5951 case Ist_LLSC: 5952 return isBogusAtom(st->Ist.LLSC.addr) 5953 || (st->Ist.LLSC.storedata 5954 ? isBogusAtom(st->Ist.LLSC.storedata) 5955 : False); 5956 default: 5957 unhandled: 5958 ppIRStmt(st); 5959 VG_(tool_panic)("hasBogusLiterals"); 5960 } 5961 } 5962 5963 5964 IRSB* MC_(instrument) ( VgCallbackClosure* closure, 5965 IRSB* sb_in, 5966 VexGuestLayout* layout, 5967 VexGuestExtents* vge, 5968 VexArchInfo* archinfo_host, 5969 IRType gWordTy, IRType hWordTy ) 5970 { 5971 Bool verboze = 0||False; 5972 Bool bogus; 5973 Int i, j, first_stmt; 5974 IRStmt* st; 5975 MCEnv mce; 5976 IRSB* sb_out; 5977 5978 if (gWordTy != hWordTy) { 5979 /* We don't currently support this case. */ 5980 VG_(tool_panic)("host/guest word size mismatch"); 5981 } 5982 5983 /* Check we're not completely nuts */ 5984 tl_assert(sizeof(UWord) == sizeof(void*)); 5985 tl_assert(sizeof(Word) == sizeof(void*)); 5986 tl_assert(sizeof(Addr) == sizeof(void*)); 5987 tl_assert(sizeof(ULong) == 8); 5988 tl_assert(sizeof(Long) == 8); 5989 tl_assert(sizeof(Addr64) == 8); 5990 tl_assert(sizeof(UInt) == 4); 5991 tl_assert(sizeof(Int) == 4); 5992 5993 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 5994 5995 /* Set up SB */ 5996 sb_out = deepCopyIRSBExceptStmts(sb_in); 5997 5998 /* Set up the running environment. Both .sb and .tmpMap are 5999 modified as we go along. Note that tmps are added to both 6000 .sb->tyenv and .tmpMap together, so the valid index-set for 6001 those two arrays should always be identical. */ 6002 VG_(memset)(&mce, 0, sizeof(mce)); 6003 mce.sb = sb_out; 6004 mce.trace = verboze; 6005 mce.layout = layout; 6006 mce.hWordTy = hWordTy; 6007 mce.bogusLiterals = False; 6008 6009 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on 6010 Darwin. 10.7 is mostly built with LLVM, which uses these for 6011 bitfield inserts, and we get a lot of false errors if the cheap 6012 interpretation is used, alas. Could solve this much better if 6013 we knew which of such adds came from x86/amd64 LEA instructions, 6014 since these are the only ones really needing the expensive 6015 interpretation, but that would require some way to tag them in 6016 the _toIR.c front ends, which is a lot of faffing around. So 6017 for now just use the slow and blunt-instrument solution. */ 6018 mce.useLLVMworkarounds = False; 6019 # if defined(VGO_darwin) 6020 mce.useLLVMworkarounds = True; 6021 # endif 6022 6023 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 6024 sizeof(TempMapEnt)); 6025 for (i = 0; i < sb_in->tyenv->types_used; i++) { 6026 TempMapEnt ent; 6027 ent.kind = Orig; 6028 ent.shadowV = IRTemp_INVALID; 6029 ent.shadowB = IRTemp_INVALID; 6030 VG_(addToXA)( mce.tmpMap, &ent ); 6031 } 6032 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 6033 6034 /* Make a preliminary inspection of the statements, to see if there 6035 are any dodgy-looking literals. If there are, we generate 6036 extra-detailed (hence extra-expensive) instrumentation in 6037 places. Scan the whole bb even if dodgyness is found earlier, 6038 so that the flatness assertion is applied to all stmts. */ 6039 6040 bogus = False; 6041 6042 for (i = 0; i < sb_in->stmts_used; i++) { 6043 6044 st = sb_in->stmts[i]; 6045 tl_assert(st); 6046 tl_assert(isFlatIRStmt(st)); 6047 6048 if (!bogus) { 6049 bogus = checkForBogusLiterals(st); 6050 if (0 && bogus) { 6051 VG_(printf)("bogus: "); 6052 ppIRStmt(st); 6053 VG_(printf)("\n"); 6054 } 6055 } 6056 6057 } 6058 6059 mce.bogusLiterals = bogus; 6060 6061 /* Copy verbatim any IR preamble preceding the first IMark */ 6062 6063 tl_assert(mce.sb == sb_out); 6064 tl_assert(mce.sb != sb_in); 6065 6066 i = 0; 6067 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 6068 6069 st = sb_in->stmts[i]; 6070 tl_assert(st); 6071 tl_assert(isFlatIRStmt(st)); 6072 6073 stmt( 'C', &mce, sb_in->stmts[i] ); 6074 i++; 6075 } 6076 6077 /* Nasty problem. IR optimisation of the pre-instrumented IR may 6078 cause the IR following the preamble to contain references to IR 6079 temporaries defined in the preamble. Because the preamble isn't 6080 instrumented, these temporaries don't have any shadows. 6081 Nevertheless uses of them following the preamble will cause 6082 memcheck to generate references to their shadows. End effect is 6083 to cause IR sanity check failures, due to references to 6084 non-existent shadows. This is only evident for the complex 6085 preambles used for function wrapping on TOC-afflicted platforms 6086 (ppc64-linux). 6087 6088 The following loop therefore scans the preamble looking for 6089 assignments to temporaries. For each one found it creates an 6090 assignment to the corresponding (V) shadow temp, marking it as 6091 'defined'. This is the same resulting IR as if the main 6092 instrumentation loop before had been applied to the statement 6093 'tmp = CONSTANT'. 6094 6095 Similarly, if origin tracking is enabled, we must generate an 6096 assignment for the corresponding origin (B) shadow, claiming 6097 no-origin, as appropriate for a defined value. 6098 */ 6099 for (j = 0; j < i; j++) { 6100 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 6101 /* findShadowTmpV checks its arg is an original tmp; 6102 no need to assert that here. */ 6103 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 6104 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 6105 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 6106 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 6107 if (MC_(clo_mc_level) == 3) { 6108 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 6109 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 6110 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 6111 } 6112 if (0) { 6113 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 6114 ppIRType( ty_v ); 6115 VG_(printf)("\n"); 6116 } 6117 } 6118 } 6119 6120 /* Iterate over the remaining stmts to generate instrumentation. */ 6121 6122 tl_assert(sb_in->stmts_used > 0); 6123 tl_assert(i >= 0); 6124 tl_assert(i < sb_in->stmts_used); 6125 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 6126 6127 for (/* use current i*/; i < sb_in->stmts_used; i++) { 6128 6129 st = sb_in->stmts[i]; 6130 first_stmt = sb_out->stmts_used; 6131 6132 if (verboze) { 6133 VG_(printf)("\n"); 6134 ppIRStmt(st); 6135 VG_(printf)("\n"); 6136 } 6137 6138 if (MC_(clo_mc_level) == 3) { 6139 /* See comments on case Ist_CAS below. */ 6140 if (st->tag != Ist_CAS) 6141 schemeS( &mce, st ); 6142 } 6143 6144 /* Generate instrumentation code for each stmt ... */ 6145 6146 switch (st->tag) { 6147 6148 case Ist_WrTmp: 6149 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 6150 expr2vbits( &mce, st->Ist.WrTmp.data) ); 6151 break; 6152 6153 case Ist_Put: 6154 do_shadow_PUT( &mce, 6155 st->Ist.Put.offset, 6156 st->Ist.Put.data, 6157 NULL /* shadow atom */, NULL /* guard */ ); 6158 break; 6159 6160 case Ist_PutI: 6161 do_shadow_PUTI( &mce, st->Ist.PutI.details); 6162 break; 6163 6164 case Ist_Store: 6165 do_shadow_Store( &mce, st->Ist.Store.end, 6166 st->Ist.Store.addr, 0/* addr bias */, 6167 st->Ist.Store.data, 6168 NULL /* shadow data */, 6169 NULL/*guard*/ ); 6170 break; 6171 6172 case Ist_StoreG: 6173 do_shadow_StoreG( &mce, st->Ist.StoreG.details ); 6174 break; 6175 6176 case Ist_LoadG: 6177 do_shadow_LoadG( &mce, st->Ist.LoadG.details ); 6178 break; 6179 6180 case Ist_Exit: 6181 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL ); 6182 break; 6183 6184 case Ist_IMark: 6185 break; 6186 6187 case Ist_NoOp: 6188 case Ist_MBE: 6189 break; 6190 6191 case Ist_Dirty: 6192 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 6193 break; 6194 6195 case Ist_AbiHint: 6196 do_AbiHint( &mce, st->Ist.AbiHint.base, 6197 st->Ist.AbiHint.len, 6198 st->Ist.AbiHint.nia ); 6199 break; 6200 6201 case Ist_CAS: 6202 do_shadow_CAS( &mce, st->Ist.CAS.details ); 6203 /* Note, do_shadow_CAS copies the CAS itself to the output 6204 block, because it needs to add instrumentation both 6205 before and after it. Hence skip the copy below. Also 6206 skip the origin-tracking stuff (call to schemeS) above, 6207 since that's all tangled up with it too; do_shadow_CAS 6208 does it all. */ 6209 break; 6210 6211 case Ist_LLSC: 6212 do_shadow_LLSC( &mce, 6213 st->Ist.LLSC.end, 6214 st->Ist.LLSC.result, 6215 st->Ist.LLSC.addr, 6216 st->Ist.LLSC.storedata ); 6217 break; 6218 6219 default: 6220 VG_(printf)("\n"); 6221 ppIRStmt(st); 6222 VG_(printf)("\n"); 6223 VG_(tool_panic)("memcheck: unhandled IRStmt"); 6224 6225 } /* switch (st->tag) */ 6226 6227 if (0 && verboze) { 6228 for (j = first_stmt; j < sb_out->stmts_used; j++) { 6229 VG_(printf)(" "); 6230 ppIRStmt(sb_out->stmts[j]); 6231 VG_(printf)("\n"); 6232 } 6233 VG_(printf)("\n"); 6234 } 6235 6236 /* ... and finally copy the stmt itself to the output. Except, 6237 skip the copy of IRCASs; see comments on case Ist_CAS 6238 above. */ 6239 if (st->tag != Ist_CAS) 6240 stmt('C', &mce, st); 6241 } 6242 6243 /* Now we need to complain if the jump target is undefined. */ 6244 first_stmt = sb_out->stmts_used; 6245 6246 if (verboze) { 6247 VG_(printf)("sb_in->next = "); 6248 ppIRExpr(sb_in->next); 6249 VG_(printf)("\n\n"); 6250 } 6251 6252 complainIfUndefined( &mce, sb_in->next, NULL ); 6253 6254 if (0 && verboze) { 6255 for (j = first_stmt; j < sb_out->stmts_used; j++) { 6256 VG_(printf)(" "); 6257 ppIRStmt(sb_out->stmts[j]); 6258 VG_(printf)("\n"); 6259 } 6260 VG_(printf)("\n"); 6261 } 6262 6263 /* If this fails, there's been some serious snafu with tmp management, 6264 that should be investigated. */ 6265 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 6266 VG_(deleteXA)( mce.tmpMap ); 6267 6268 tl_assert(mce.sb == sb_out); 6269 return sb_out; 6270 } 6271 6272 /*------------------------------------------------------------*/ 6273 /*--- Post-tree-build final tidying ---*/ 6274 /*------------------------------------------------------------*/ 6275 6276 /* This exploits the observation that Memcheck often produces 6277 repeated conditional calls of the form 6278 6279 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 6280 6281 with the same guard expression G guarding the same helper call. 6282 The second and subsequent calls are redundant. This usually 6283 results from instrumentation of guest code containing multiple 6284 memory references at different constant offsets from the same base 6285 register. After optimisation of the instrumentation, you get a 6286 test for the definedness of the base register for each memory 6287 reference, which is kinda pointless. MC_(final_tidy) therefore 6288 looks for such repeated calls and removes all but the first. */ 6289 6290 /* A struct for recording which (helper, guard) pairs we have already 6291 seen. */ 6292 typedef 6293 struct { void* entry; IRExpr* guard; } 6294 Pair; 6295 6296 /* Return True if e1 and e2 definitely denote the same value (used to 6297 compare guards). Return False if unknown; False is the safe 6298 answer. Since guest registers and guest memory do not have the 6299 SSA property we must return False if any Gets or Loads appear in 6300 the expression. */ 6301 6302 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 6303 { 6304 if (e1->tag != e2->tag) 6305 return False; 6306 switch (e1->tag) { 6307 case Iex_Const: 6308 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 6309 case Iex_Binop: 6310 return e1->Iex.Binop.op == e2->Iex.Binop.op 6311 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 6312 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 6313 case Iex_Unop: 6314 return e1->Iex.Unop.op == e2->Iex.Unop.op 6315 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 6316 case Iex_RdTmp: 6317 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 6318 case Iex_ITE: 6319 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond ) 6320 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue ) 6321 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse ); 6322 case Iex_Qop: 6323 case Iex_Triop: 6324 case Iex_CCall: 6325 /* be lazy. Could define equality for these, but they never 6326 appear to be used. */ 6327 return False; 6328 case Iex_Get: 6329 case Iex_GetI: 6330 case Iex_Load: 6331 /* be conservative - these may not give the same value each 6332 time */ 6333 return False; 6334 case Iex_Binder: 6335 /* should never see this */ 6336 /* fallthrough */ 6337 default: 6338 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 6339 ppIRExpr(e1); 6340 VG_(tool_panic)("memcheck:sameIRValue"); 6341 return False; 6342 } 6343 } 6344 6345 /* See if 'pairs' already has an entry for (entry, guard). Return 6346 True if so. If not, add an entry. */ 6347 6348 static 6349 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) 6350 { 6351 Pair p; 6352 Pair* pp; 6353 Int i, n = VG_(sizeXA)( pairs ); 6354 for (i = 0; i < n; i++) { 6355 pp = VG_(indexXA)( pairs, i ); 6356 if (pp->entry == entry && sameIRValue(pp->guard, guard)) 6357 return True; 6358 } 6359 p.guard = guard; 6360 p.entry = entry; 6361 VG_(addToXA)( pairs, &p ); 6362 return False; 6363 } 6364 6365 static Bool is_helperc_value_checkN_fail ( const HChar* name ) 6366 { 6367 return 6368 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") 6369 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") 6370 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") 6371 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") 6372 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") 6373 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") 6374 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") 6375 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); 6376 } 6377 6378 IRSB* MC_(final_tidy) ( IRSB* sb_in ) 6379 { 6380 Int i; 6381 IRStmt* st; 6382 IRDirty* di; 6383 IRExpr* guard; 6384 IRCallee* cee; 6385 Bool alreadyPresent; 6386 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", 6387 VG_(free), sizeof(Pair) ); 6388 /* Scan forwards through the statements. Each time a call to one 6389 of the relevant helpers is seen, check if we have made a 6390 previous call to the same helper using the same guard 6391 expression, and if so, delete the call. */ 6392 for (i = 0; i < sb_in->stmts_used; i++) { 6393 st = sb_in->stmts[i]; 6394 tl_assert(st); 6395 if (st->tag != Ist_Dirty) 6396 continue; 6397 di = st->Ist.Dirty.details; 6398 guard = di->guard; 6399 tl_assert(guard); 6400 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 6401 cee = di->cee; 6402 if (!is_helperc_value_checkN_fail( cee->name )) 6403 continue; 6404 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 6405 guard 'guard'. Check if we have already seen a call to this 6406 function with the same guard. If so, delete it. If not, 6407 add it to the set of calls we do know about. */ 6408 alreadyPresent = check_or_add( pairs, guard, cee->addr ); 6409 if (alreadyPresent) { 6410 sb_in->stmts[i] = IRStmt_NoOp(); 6411 if (0) VG_(printf)("XX\n"); 6412 } 6413 } 6414 VG_(deleteXA)( pairs ); 6415 return sb_in; 6416 } 6417 6418 6419 /*------------------------------------------------------------*/ 6420 /*--- Origin tracking stuff ---*/ 6421 /*------------------------------------------------------------*/ 6422 6423 /* Almost identical to findShadowTmpV. */ 6424 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 6425 { 6426 TempMapEnt* ent; 6427 /* VG_(indexXA) range-checks 'orig', hence no need to check 6428 here. */ 6429 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 6430 tl_assert(ent->kind == Orig); 6431 if (ent->shadowB == IRTemp_INVALID) { 6432 IRTemp tmpB 6433 = newTemp( mce, Ity_I32, BSh ); 6434 /* newTemp may cause mce->tmpMap to resize, hence previous results 6435 from VG_(indexXA) are invalid. */ 6436 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 6437 tl_assert(ent->kind == Orig); 6438 tl_assert(ent->shadowB == IRTemp_INVALID); 6439 ent->shadowB = tmpB; 6440 } 6441 return ent->shadowB; 6442 } 6443 6444 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 6445 { 6446 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 6447 } 6448 6449 6450 /* Make a guarded origin load, with no special handling in the 6451 didn't-happen case. A GUARD of NULL is assumed to mean "always 6452 True". 6453 6454 Generate IR to do a shadow origins load from BASEADDR+OFFSET and 6455 return the otag. The loaded size is SZB. If GUARD evaluates to 6456 False at run time then the returned otag is zero. 6457 */ 6458 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, 6459 IRAtom* baseaddr, 6460 Int offset, IRExpr* guard ) 6461 { 6462 void* hFun; 6463 const HChar* hName; 6464 IRTemp bTmp; 6465 IRDirty* di; 6466 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 6467 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 6468 IRAtom* ea = baseaddr; 6469 if (offset != 0) { 6470 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 6471 : mkU64( (Long)(Int)offset ); 6472 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 6473 } 6474 bTmp = newTemp(mce, mce->hWordTy, BSh); 6475 6476 switch (szB) { 6477 case 1: hFun = (void*)&MC_(helperc_b_load1); 6478 hName = "MC_(helperc_b_load1)"; 6479 break; 6480 case 2: hFun = (void*)&MC_(helperc_b_load2); 6481 hName = "MC_(helperc_b_load2)"; 6482 break; 6483 case 4: hFun = (void*)&MC_(helperc_b_load4); 6484 hName = "MC_(helperc_b_load4)"; 6485 break; 6486 case 8: hFun = (void*)&MC_(helperc_b_load8); 6487 hName = "MC_(helperc_b_load8)"; 6488 break; 6489 case 16: hFun = (void*)&MC_(helperc_b_load16); 6490 hName = "MC_(helperc_b_load16)"; 6491 break; 6492 case 32: hFun = (void*)&MC_(helperc_b_load32); 6493 hName = "MC_(helperc_b_load32)"; 6494 break; 6495 default: 6496 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 6497 tl_assert(0); 6498 } 6499 di = unsafeIRDirty_1_N( 6500 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 6501 mkIRExprVec_1( ea ) 6502 ); 6503 if (guard) { 6504 di->guard = guard; 6505 /* Ideally the didn't-happen return value here would be 6506 all-zeroes (unknown-origin), so it'd be harmless if it got 6507 used inadvertantly. We slum it out with the IR-mandated 6508 default value (0b01 repeating, 0x55 etc) as that'll probably 6509 trump all legitimate otags via Max32, and it's pretty 6510 obviously bogus. */ 6511 } 6512 /* no need to mess with any annotations. This call accesses 6513 neither guest state nor guest memory. */ 6514 stmt( 'B', mce, IRStmt_Dirty(di) ); 6515 if (mce->hWordTy == Ity_I64) { 6516 /* 64-bit host */ 6517 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 6518 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 6519 return mkexpr(bTmp32); 6520 } else { 6521 /* 32-bit host */ 6522 return mkexpr(bTmp); 6523 } 6524 } 6525 6526 6527 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The 6528 loaded size is SZB. The load is regarded as unconditional (always 6529 happens). 6530 */ 6531 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr, 6532 Int offset ) 6533 { 6534 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/); 6535 } 6536 6537 6538 /* The most general handler for guarded origin loads. A GUARD of NULL 6539 is assumed to mean "always True". 6540 6541 Generate IR to do a shadow origin load from ADDR+BIAS and return 6542 the B bits. The loaded type is TY. If GUARD evaluates to False at 6543 run time then the returned B bits are simply BALT instead. 6544 */ 6545 static 6546 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce, 6547 IRType ty, 6548 IRAtom* addr, UInt bias, 6549 IRAtom* guard, IRAtom* balt ) 6550 { 6551 /* If the guard evaluates to True, this will hold the loaded 6552 origin. If the guard evaluates to False, this will be zero, 6553 meaning "unknown origin", in which case we will have to replace 6554 it using an ITE below. */ 6555 IRAtom* iftrue 6556 = assignNew('B', mce, Ity_I32, 6557 gen_guarded_load_b(mce, sizeofIRType(ty), 6558 addr, bias, guard)); 6559 /* These are the bits we will return if the load doesn't take 6560 place. */ 6561 IRAtom* iffalse 6562 = balt; 6563 /* Prepare the cond for the ITE. Convert a NULL cond into 6564 something that iropt knows how to fold out later. */ 6565 IRAtom* cond 6566 = guard == NULL ? mkU1(1) : guard; 6567 /* And assemble the final result. */ 6568 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse)); 6569 } 6570 6571 6572 /* Generate a shadow origins store. guard :: Ity_I1 controls whether 6573 the store really happens; NULL means it unconditionally does. */ 6574 static void gen_store_b ( MCEnv* mce, Int szB, 6575 IRAtom* baseaddr, Int offset, IRAtom* dataB, 6576 IRAtom* guard ) 6577 { 6578 void* hFun; 6579 const HChar* hName; 6580 IRDirty* di; 6581 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 6582 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 6583 IRAtom* ea = baseaddr; 6584 if (guard) { 6585 tl_assert(isOriginalAtom(mce, guard)); 6586 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 6587 } 6588 if (offset != 0) { 6589 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 6590 : mkU64( (Long)(Int)offset ); 6591 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 6592 } 6593 if (mce->hWordTy == Ity_I64) 6594 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 6595 6596 switch (szB) { 6597 case 1: hFun = (void*)&MC_(helperc_b_store1); 6598 hName = "MC_(helperc_b_store1)"; 6599 break; 6600 case 2: hFun = (void*)&MC_(helperc_b_store2); 6601 hName = "MC_(helperc_b_store2)"; 6602 break; 6603 case 4: hFun = (void*)&MC_(helperc_b_store4); 6604 hName = "MC_(helperc_b_store4)"; 6605 break; 6606 case 8: hFun = (void*)&MC_(helperc_b_store8); 6607 hName = "MC_(helperc_b_store8)"; 6608 break; 6609 case 16: hFun = (void*)&MC_(helperc_b_store16); 6610 hName = "MC_(helperc_b_store16)"; 6611 break; 6612 case 32: hFun = (void*)&MC_(helperc_b_store32); 6613 hName = "MC_(helperc_b_store32)"; 6614 break; 6615 default: 6616 tl_assert(0); 6617 } 6618 di = unsafeIRDirty_0_N( 2/*regparms*/, 6619 hName, VG_(fnptr_to_fnentry)( hFun ), 6620 mkIRExprVec_2( ea, dataB ) 6621 ); 6622 /* no need to mess with any annotations. This call accesses 6623 neither guest state nor guest memory. */ 6624 if (guard) di->guard = guard; 6625 stmt( 'B', mce, IRStmt_Dirty(di) ); 6626 } 6627 6628 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 6629 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 6630 if (eTy == Ity_I64) 6631 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 6632 if (eTy == Ity_I32) 6633 return e; 6634 tl_assert(0); 6635 } 6636 6637 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 6638 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 6639 tl_assert(eTy == Ity_I32); 6640 if (dstTy == Ity_I64) 6641 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 6642 tl_assert(0); 6643 } 6644 6645 6646 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 6647 { 6648 tl_assert(MC_(clo_mc_level) == 3); 6649 6650 switch (e->tag) { 6651 6652 case Iex_GetI: { 6653 IRRegArray* descr_b; 6654 IRAtom *t1, *t2, *t3, *t4; 6655 IRRegArray* descr = e->Iex.GetI.descr; 6656 IRType equivIntTy 6657 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 6658 /* If this array is unshadowable for whatever reason, use the 6659 usual approximation. */ 6660 if (equivIntTy == Ity_INVALID) 6661 return mkU32(0); 6662 tl_assert(sizeofIRType(equivIntTy) >= 4); 6663 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 6664 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 6665 equivIntTy, descr->nElems ); 6666 /* Do a shadow indexed get of the same size, giving t1. Take 6667 the bottom 32 bits of it, giving t2. Compute into t3 the 6668 origin for the index (almost certainly zero, but there's 6669 no harm in being completely general here, since iropt will 6670 remove any useless code), and fold it in, giving a final 6671 value t4. */ 6672 t1 = assignNew( 'B', mce, equivIntTy, 6673 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 6674 e->Iex.GetI.bias )); 6675 t2 = narrowTo32( mce, t1 ); 6676 t3 = schemeE( mce, e->Iex.GetI.ix ); 6677 t4 = gen_maxU32( mce, t2, t3 ); 6678 return t4; 6679 } 6680 case Iex_CCall: { 6681 Int i; 6682 IRAtom* here; 6683 IRExpr** args = e->Iex.CCall.args; 6684 IRAtom* curr = mkU32(0); 6685 for (i = 0; args[i]; i++) { 6686 tl_assert(i < 32); 6687 tl_assert(isOriginalAtom(mce, args[i])); 6688 /* Only take notice of this arg if the callee's 6689 mc-exclusion mask does not say it is to be excluded. */ 6690 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 6691 /* the arg is to be excluded from definedness checking. 6692 Do nothing. */ 6693 if (0) VG_(printf)("excluding %s(%d)\n", 6694 e->Iex.CCall.cee->name, i); 6695 } else { 6696 /* calculate the arg's definedness, and pessimistically 6697 merge it in. */ 6698 here = schemeE( mce, args[i] ); 6699 curr = gen_maxU32( mce, curr, here ); 6700 } 6701 } 6702 return curr; 6703 } 6704 case Iex_Load: { 6705 Int dszB; 6706 dszB = sizeofIRType(e->Iex.Load.ty); 6707 /* assert that the B value for the address is already 6708 available (somewhere) */ 6709 tl_assert(isIRAtom(e->Iex.Load.addr)); 6710 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 6711 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 6712 } 6713 case Iex_ITE: { 6714 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond ); 6715 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue ); 6716 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse ); 6717 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 6718 } 6719 case Iex_Qop: { 6720 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 ); 6721 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 ); 6722 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 ); 6723 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 ); 6724 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 6725 gen_maxU32( mce, b3, b4 ) ); 6726 } 6727 case Iex_Triop: { 6728 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 ); 6729 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 ); 6730 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 ); 6731 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 6732 } 6733 case Iex_Binop: { 6734 switch (e->Iex.Binop.op) { 6735 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 6736 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 6737 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 6738 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 6739 /* Just say these all produce a defined result, 6740 regardless of their arguments. See 6741 COMMENT_ON_CasCmpEQ in this file. */ 6742 return mkU32(0); 6743 default: { 6744 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 6745 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 6746 return gen_maxU32( mce, b1, b2 ); 6747 } 6748 } 6749 tl_assert(0); 6750 /*NOTREACHED*/ 6751 } 6752 case Iex_Unop: { 6753 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 6754 return b1; 6755 } 6756 case Iex_Const: 6757 return mkU32(0); 6758 case Iex_RdTmp: 6759 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 6760 case Iex_Get: { 6761 Int b_offset = MC_(get_otrack_shadow_offset)( 6762 e->Iex.Get.offset, 6763 sizeofIRType(e->Iex.Get.ty) 6764 ); 6765 tl_assert(b_offset >= -1 6766 && b_offset <= mce->layout->total_sizeB -4); 6767 if (b_offset >= 0) { 6768 /* FIXME: this isn't an atom! */ 6769 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 6770 Ity_I32 ); 6771 } 6772 return mkU32(0); 6773 } 6774 default: 6775 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 6776 ppIRExpr(e); 6777 VG_(tool_panic)("memcheck:schemeE"); 6778 } 6779 } 6780 6781 6782 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 6783 { 6784 // This is a hacked version of do_shadow_Dirty 6785 Int i, k, n, toDo, gSz, gOff; 6786 IRAtom *here, *curr; 6787 IRTemp dst; 6788 6789 /* First check the guard. */ 6790 curr = schemeE( mce, d->guard ); 6791 6792 /* Now round up all inputs and maxU32 over them. */ 6793 6794 /* Inputs: unmasked args 6795 Note: arguments are evaluated REGARDLESS of the guard expression */ 6796 for (i = 0; d->args[i]; i++) { 6797 IRAtom* arg = d->args[i]; 6798 if ( (d->cee->mcx_mask & (1<<i)) 6799 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) { 6800 /* ignore this arg */ 6801 } else { 6802 here = schemeE( mce, arg ); 6803 curr = gen_maxU32( mce, curr, here ); 6804 } 6805 } 6806 6807 /* Inputs: guest state that we read. */ 6808 for (i = 0; i < d->nFxState; i++) { 6809 tl_assert(d->fxState[i].fx != Ifx_None); 6810 if (d->fxState[i].fx == Ifx_Write) 6811 continue; 6812 6813 /* Enumerate the described state segments */ 6814 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6815 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6816 gSz = d->fxState[i].size; 6817 6818 /* Ignore any sections marked as 'always defined'. */ 6819 if (isAlwaysDefd(mce, gOff, gSz)) { 6820 if (0) 6821 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 6822 gOff, gSz); 6823 continue; 6824 } 6825 6826 /* This state element is read or modified. So we need to 6827 consider it. If larger than 4 bytes, deal with it in 6828 4-byte chunks. */ 6829 while (True) { 6830 Int b_offset; 6831 tl_assert(gSz >= 0); 6832 if (gSz == 0) break; 6833 n = gSz <= 4 ? gSz : 4; 6834 /* update 'curr' with maxU32 of the state slice 6835 gOff .. gOff+n-1 */ 6836 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6837 if (b_offset != -1) { 6838 /* Observe the guard expression. If it is false use 0, i.e. 6839 nothing is known about the origin */ 6840 IRAtom *cond, *iffalse, *iftrue; 6841 6842 cond = assignNew( 'B', mce, Ity_I1, d->guard); 6843 iffalse = mkU32(0); 6844 iftrue = assignNew( 'B', mce, Ity_I32, 6845 IRExpr_Get(b_offset 6846 + 2*mce->layout->total_sizeB, 6847 Ity_I32)); 6848 here = assignNew( 'B', mce, Ity_I32, 6849 IRExpr_ITE(cond, iftrue, iffalse)); 6850 curr = gen_maxU32( mce, curr, here ); 6851 } 6852 gSz -= n; 6853 gOff += n; 6854 } 6855 } 6856 } 6857 6858 /* Inputs: memory */ 6859 6860 if (d->mFx != Ifx_None) { 6861 /* Because we may do multiple shadow loads/stores from the same 6862 base address, it's best to do a single test of its 6863 definedness right now. Post-instrumentation optimisation 6864 should remove all but this test. */ 6865 tl_assert(d->mAddr); 6866 here = schemeE( mce, d->mAddr ); 6867 curr = gen_maxU32( mce, curr, here ); 6868 } 6869 6870 /* Deal with memory inputs (reads or modifies) */ 6871 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 6872 toDo = d->mSize; 6873 /* chew off 32-bit chunks. We don't care about the endianness 6874 since it's all going to be condensed down to a single bit, 6875 but nevertheless choose an endianness which is hopefully 6876 native to the platform. */ 6877 while (toDo >= 4) { 6878 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo, 6879 d->guard ); 6880 curr = gen_maxU32( mce, curr, here ); 6881 toDo -= 4; 6882 } 6883 /* handle possible 16-bit excess */ 6884 while (toDo >= 2) { 6885 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo, 6886 d->guard ); 6887 curr = gen_maxU32( mce, curr, here ); 6888 toDo -= 2; 6889 } 6890 /* chew off the remaining 8-bit chunk, if any */ 6891 if (toDo == 1) { 6892 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo, 6893 d->guard ); 6894 curr = gen_maxU32( mce, curr, here ); 6895 toDo -= 1; 6896 } 6897 tl_assert(toDo == 0); 6898 } 6899 6900 /* Whew! So curr is a 32-bit B-value which should give an origin 6901 of some use if any of the inputs to the helper are undefined. 6902 Now we need to re-distribute the results to all destinations. */ 6903 6904 /* Outputs: the destination temporary, if there is one. */ 6905 if (d->tmp != IRTemp_INVALID) { 6906 dst = findShadowTmpB(mce, d->tmp); 6907 assign( 'V', mce, dst, curr ); 6908 } 6909 6910 /* Outputs: guest state that we write or modify. */ 6911 for (i = 0; i < d->nFxState; i++) { 6912 tl_assert(d->fxState[i].fx != Ifx_None); 6913 if (d->fxState[i].fx == Ifx_Read) 6914 continue; 6915 6916 /* Enumerate the described state segments */ 6917 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 6918 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 6919 gSz = d->fxState[i].size; 6920 6921 /* Ignore any sections marked as 'always defined'. */ 6922 if (isAlwaysDefd(mce, gOff, gSz)) 6923 continue; 6924 6925 /* This state element is written or modified. So we need to 6926 consider it. If larger than 4 bytes, deal with it in 6927 4-byte chunks. */ 6928 while (True) { 6929 Int b_offset; 6930 tl_assert(gSz >= 0); 6931 if (gSz == 0) break; 6932 n = gSz <= 4 ? gSz : 4; 6933 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 6934 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 6935 if (b_offset != -1) { 6936 6937 /* If the guard expression evaluates to false we simply Put 6938 the value that is already stored in the guest state slot */ 6939 IRAtom *cond, *iffalse; 6940 6941 cond = assignNew('B', mce, Ity_I1, 6942 d->guard); 6943 iffalse = assignNew('B', mce, Ity_I32, 6944 IRExpr_Get(b_offset + 6945 2*mce->layout->total_sizeB, 6946 Ity_I32)); 6947 curr = assignNew('V', mce, Ity_I32, 6948 IRExpr_ITE(cond, curr, iffalse)); 6949 6950 stmt( 'B', mce, IRStmt_Put(b_offset 6951 + 2*mce->layout->total_sizeB, 6952 curr )); 6953 } 6954 gSz -= n; 6955 gOff += n; 6956 } 6957 } 6958 } 6959 6960 /* Outputs: memory that we write or modify. Same comments about 6961 endianness as above apply. */ 6962 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 6963 toDo = d->mSize; 6964 /* chew off 32-bit chunks */ 6965 while (toDo >= 4) { 6966 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 6967 d->guard ); 6968 toDo -= 4; 6969 } 6970 /* handle possible 16-bit excess */ 6971 while (toDo >= 2) { 6972 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 6973 d->guard ); 6974 toDo -= 2; 6975 } 6976 /* chew off the remaining 8-bit chunk, if any */ 6977 if (toDo == 1) { 6978 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr, 6979 d->guard ); 6980 toDo -= 1; 6981 } 6982 tl_assert(toDo == 0); 6983 } 6984 } 6985 6986 6987 /* Generate IR for origin shadowing for a general guarded store. */ 6988 static void do_origins_Store_guarded ( MCEnv* mce, 6989 IREndness stEnd, 6990 IRExpr* stAddr, 6991 IRExpr* stData, 6992 IRExpr* guard ) 6993 { 6994 Int dszB; 6995 IRAtom* dataB; 6996 /* assert that the B value for the address is already available 6997 (somewhere), since the call to schemeE will want to see it. 6998 XXXX how does this actually ensure that?? */ 6999 tl_assert(isIRAtom(stAddr)); 7000 tl_assert(isIRAtom(stData)); 7001 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 7002 dataB = schemeE( mce, stData ); 7003 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard ); 7004 } 7005 7006 7007 /* Generate IR for origin shadowing for a plain store. */ 7008 static void do_origins_Store_plain ( MCEnv* mce, 7009 IREndness stEnd, 7010 IRExpr* stAddr, 7011 IRExpr* stData ) 7012 { 7013 do_origins_Store_guarded ( mce, stEnd, stAddr, stData, 7014 NULL/*guard*/ ); 7015 } 7016 7017 7018 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */ 7019 7020 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg ) 7021 { 7022 do_origins_Store_guarded( mce, sg->end, sg->addr, 7023 sg->data, sg->guard ); 7024 } 7025 7026 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg ) 7027 { 7028 IRType loadedTy = Ity_INVALID; 7029 switch (lg->cvt) { 7030 case ILGop_Ident32: loadedTy = Ity_I32; break; 7031 case ILGop_16Uto32: loadedTy = Ity_I16; break; 7032 case ILGop_16Sto32: loadedTy = Ity_I16; break; 7033 case ILGop_8Uto32: loadedTy = Ity_I8; break; 7034 case ILGop_8Sto32: loadedTy = Ity_I8; break; 7035 default: VG_(tool_panic)("schemeS.IRLoadG"); 7036 } 7037 IRAtom* ori_alt 7038 = schemeE( mce,lg->alt ); 7039 IRAtom* ori_final 7040 = expr2ori_Load_guarded_General(mce, loadedTy, 7041 lg->addr, 0/*addr bias*/, 7042 lg->guard, ori_alt ); 7043 /* And finally, bind the origin to the destination temporary. */ 7044 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final ); 7045 } 7046 7047 7048 static void schemeS ( MCEnv* mce, IRStmt* st ) 7049 { 7050 tl_assert(MC_(clo_mc_level) == 3); 7051 7052 switch (st->tag) { 7053 7054 case Ist_AbiHint: 7055 /* The value-check instrumenter handles this - by arranging 7056 to pass the address of the next instruction to 7057 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 7058 happen for origin tracking w.r.t. AbiHints. So there is 7059 nothing to do here. */ 7060 break; 7061 7062 case Ist_PutI: { 7063 IRPutI *puti = st->Ist.PutI.details; 7064 IRRegArray* descr_b; 7065 IRAtom *t1, *t2, *t3, *t4; 7066 IRRegArray* descr = puti->descr; 7067 IRType equivIntTy 7068 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 7069 /* If this array is unshadowable for whatever reason, 7070 generate no code. */ 7071 if (equivIntTy == Ity_INVALID) 7072 break; 7073 tl_assert(sizeofIRType(equivIntTy) >= 4); 7074 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 7075 descr_b 7076 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 7077 equivIntTy, descr->nElems ); 7078 /* Compute a value to Put - the conjoinment of the origin for 7079 the data to be Put-ted (obviously) and of the index value 7080 (not so obviously). */ 7081 t1 = schemeE( mce, puti->data ); 7082 t2 = schemeE( mce, puti->ix ); 7083 t3 = gen_maxU32( mce, t1, t2 ); 7084 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 7085 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix, 7086 puti->bias, t4) )); 7087 break; 7088 } 7089 7090 case Ist_Dirty: 7091 do_origins_Dirty( mce, st->Ist.Dirty.details ); 7092 break; 7093 7094 case Ist_Store: 7095 do_origins_Store_plain( mce, st->Ist.Store.end, 7096 st->Ist.Store.addr, 7097 st->Ist.Store.data ); 7098 break; 7099 7100 case Ist_StoreG: 7101 do_origins_StoreG( mce, st->Ist.StoreG.details ); 7102 break; 7103 7104 case Ist_LoadG: 7105 do_origins_LoadG( mce, st->Ist.LoadG.details ); 7106 break; 7107 7108 case Ist_LLSC: { 7109 /* In short: treat a load-linked like a normal load followed 7110 by an assignment of the loaded (shadow) data the result 7111 temporary. Treat a store-conditional like a normal store, 7112 and mark the result temporary as defined. */ 7113 if (st->Ist.LLSC.storedata == NULL) { 7114 /* Load Linked */ 7115 IRType resTy 7116 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 7117 IRExpr* vanillaLoad 7118 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 7119 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 7120 || resTy == Ity_I16 || resTy == Ity_I8); 7121 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 7122 schemeE(mce, vanillaLoad)); 7123 } else { 7124 /* Store conditional */ 7125 do_origins_Store_plain( mce, st->Ist.LLSC.end, 7126 st->Ist.LLSC.addr, 7127 st->Ist.LLSC.storedata ); 7128 /* For the rationale behind this, see comments at the 7129 place where the V-shadow for .result is constructed, in 7130 do_shadow_LLSC. In short, we regard .result as 7131 always-defined. */ 7132 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 7133 mkU32(0) ); 7134 } 7135 break; 7136 } 7137 7138 case Ist_Put: { 7139 Int b_offset 7140 = MC_(get_otrack_shadow_offset)( 7141 st->Ist.Put.offset, 7142 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 7143 ); 7144 if (b_offset >= 0) { 7145 /* FIXME: this isn't an atom! */ 7146 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 7147 schemeE( mce, st->Ist.Put.data )) ); 7148 } 7149 break; 7150 } 7151 7152 case Ist_WrTmp: 7153 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 7154 schemeE(mce, st->Ist.WrTmp.data) ); 7155 break; 7156 7157 case Ist_MBE: 7158 case Ist_NoOp: 7159 case Ist_Exit: 7160 case Ist_IMark: 7161 break; 7162 7163 default: 7164 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 7165 ppIRStmt(st); 7166 VG_(tool_panic)("memcheck:schemeS"); 7167 } 7168 } 7169 7170 7171 /*--------------------------------------------------------------------*/ 7172 /*--- end mc_translate.c ---*/ 7173 /*--------------------------------------------------------------------*/ 7174