1 2 /*--------------------------------------------------------------------*/ 3 /*--- Instrument IR to perform memory checking operations. ---*/ 4 /*--- mc_translate.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2017 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_tool_basics.h" 33 #include "pub_tool_poolalloc.h" // For mc_include.h 34 #include "pub_tool_hashtable.h" // For mc_include.h 35 #include "pub_tool_libcassert.h" 36 #include "pub_tool_libcprint.h" 37 #include "pub_tool_tooliface.h" 38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 39 #include "pub_tool_xarray.h" 40 #include "pub_tool_mallocfree.h" 41 #include "pub_tool_libcbase.h" 42 43 #include "mc_include.h" 44 45 46 /* FIXMEs JRS 2011-June-16. 47 48 Check the interpretation for vector narrowing and widening ops, 49 particularly the saturating ones. I suspect they are either overly 50 pessimistic and/or wrong. 51 52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional 53 saturating shifts): the interpretation is overly pessimistic. 54 See comments on the relevant cases below for details. 55 56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts, 57 both rounding and non-rounding variants): ditto 58 */ 59 60 /* This file implements the Memcheck instrumentation, and in 61 particular contains the core of its undefined value detection 62 machinery. For a comprehensive background of the terminology, 63 algorithms and rationale used herein, read: 64 65 Using Valgrind to detect undefined value errors with 66 bit-precision 67 68 Julian Seward and Nicholas Nethercote 69 70 2005 USENIX Annual Technical Conference (General Track), 71 Anaheim, CA, USA, April 10-15, 2005. 72 73 ---- 74 75 Here is as good a place as any to record exactly when V bits are and 76 should be checked, why, and what function is responsible. 77 78 79 Memcheck complains when an undefined value is used: 80 81 1. In the condition of a conditional branch. Because it could cause 82 incorrect control flow, and thus cause incorrect externally-visible 83 behaviour. [mc_translate.c:complainIfUndefined] 84 85 2. As an argument to a system call, or as the value that specifies 86 the system call number. Because it could cause an incorrect 87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 88 89 3. As the address in a load or store. Because it could cause an 90 incorrect value to be used later, which could cause externally-visible 91 behaviour (eg. via incorrect control flow or an incorrect system call 92 argument) [complainIfUndefined] 93 94 4. As the target address of a branch. Because it could cause incorrect 95 control flow. [complainIfUndefined] 96 97 5. As an argument to setenv, unsetenv, or putenv. Because it could put 98 an incorrect value into the external environment. 99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 100 101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 102 [complainIfUndefined] 103 104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 106 requested it. [in memcheck.h] 107 108 109 Memcheck also complains, but should not, when an undefined value is used: 110 111 8. As the shift value in certain SIMD shift operations (but not in the 112 standard integer shift operations). This inconsistency is due to 113 historical reasons.) [complainIfUndefined] 114 115 116 Memcheck does not complain, but should, when an undefined value is used: 117 118 9. As an input to a client request. Because the client request may 119 affect the visible behaviour -- see bug #144362 for an example 120 involving the malloc replacements in vg_replace_malloc.c and 121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 122 isn't identified. That bug report also has some info on how to solve 123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 124 125 126 In practice, 1 and 2 account for the vast majority of cases. 127 */ 128 129 /* Generation of addr-definedness, addr-validity and 130 guard-definedness checks pertaining to loads and stores (Iex_Load, 131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory 132 loads/stores) was re-checked 11 May 2013. */ 133 134 /*------------------------------------------------------------*/ 135 /*--- Forward decls ---*/ 136 /*------------------------------------------------------------*/ 137 138 struct _MCEnv; 139 140 static IRType shadowTypeV ( IRType ty ); 141 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 142 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 143 144 static IRExpr *i128_const_zero(void); 145 146 /*------------------------------------------------------------*/ 147 /*--- Memcheck running state, and tmp management. ---*/ 148 /*------------------------------------------------------------*/ 149 150 /* Carries info about a particular tmp. The tmp's number is not 151 recorded, as this is implied by (equal to) its index in the tmpMap 152 in MCEnv. The tmp's type is also not recorded, as this is present 153 in MCEnv.sb->tyenv. 154 155 When .kind is Orig, .shadowV and .shadowB may give the identities 156 of the temps currently holding the associated definedness (shadowV) 157 and origin (shadowB) values, or these may be IRTemp_INVALID if code 158 to compute such values has not yet been emitted. 159 160 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 161 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 162 illogical for a shadow tmp itself to be shadowed. 163 */ 164 typedef 165 enum { Orig=1, VSh=2, BSh=3 } 166 TempKind; 167 168 typedef 169 struct { 170 TempKind kind; 171 IRTemp shadowV; 172 IRTemp shadowB; 173 } 174 TempMapEnt; 175 176 177 /* Carries around state during memcheck instrumentation. */ 178 typedef 179 struct _MCEnv { 180 /* MODIFIED: the superblock being constructed. IRStmts are 181 added. */ 182 IRSB* sb; 183 Bool trace; 184 185 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 186 current kind and possibly shadow temps for each temp in the 187 IRSB being constructed. Note that it does not contain the 188 type of each tmp. If you want to know the type, look at the 189 relevant entry in sb->tyenv. It follows that at all times 190 during the instrumentation process, the valid indices for 191 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 192 total number of Orig, V- and B- temps allocated so far. 193 194 The reason for this strange split (types in one place, all 195 other info in another) is that we need the types to be 196 attached to sb so as to make it possible to do 197 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 198 instrumentation process. */ 199 XArray* /* of TempMapEnt */ tmpMap; 200 201 /* MODIFIED: indicates whether "bogus" literals have so far been 202 found. Starts off False, and may change to True. */ 203 Bool bogusLiterals; 204 205 /* READONLY: indicates whether we should use expensive 206 interpretations of integer adds, since unfortunately LLVM 207 uses them to do ORs in some circumstances. Defaulted to True 208 on MacOS and False everywhere else. */ 209 Bool useLLVMworkarounds; 210 211 /* READONLY: the guest layout. This indicates which parts of 212 the guest state should be regarded as 'always defined'. */ 213 const VexGuestLayout* layout; 214 215 /* READONLY: the host word type. Needed for constructing 216 arguments of type 'HWord' to be passed to helper functions. 217 Ity_I32 or Ity_I64 only. */ 218 IRType hWordTy; 219 } 220 MCEnv; 221 222 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 223 demand), as they are encountered. This is for two reasons. 224 225 (1) (less important reason): Many original tmps are unused due to 226 initial IR optimisation, and we do not want to spaces in tables 227 tracking them. 228 229 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 230 table indexed [0 .. n_types-1], which gives the current shadow for 231 each original tmp, or INVALID_IRTEMP if none is so far assigned. 232 It is necessary to support making multiple assignments to a shadow 233 -- specifically, after testing a shadow for definedness, it needs 234 to be made defined. But IR's SSA property disallows this. 235 236 (2) (more important reason): Therefore, when a shadow needs to get 237 a new value, a new temporary is created, the value is assigned to 238 that, and the tmpMap is updated to reflect the new binding. 239 240 A corollary is that if the tmpMap maps a given tmp to 241 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 242 there's a read-before-write error in the original tmps. The IR 243 sanity checker should catch all such anomalies, however. 244 */ 245 246 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 247 both the table in mce->sb and to our auxiliary mapping. Note that 248 newTemp may cause mce->tmpMap to resize, hence previous results 249 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 250 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 251 { 252 Word newIx; 253 TempMapEnt ent; 254 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 255 ent.kind = kind; 256 ent.shadowV = IRTemp_INVALID; 257 ent.shadowB = IRTemp_INVALID; 258 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 259 tl_assert(newIx == (Word)tmp); 260 return tmp; 261 } 262 263 264 /* Find the tmp currently shadowing the given original tmp. If none 265 so far exists, allocate one. */ 266 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 267 { 268 TempMapEnt* ent; 269 /* VG_(indexXA) range-checks 'orig', hence no need to check 270 here. */ 271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 272 tl_assert(ent->kind == Orig); 273 if (ent->shadowV == IRTemp_INVALID) { 274 IRTemp tmpV 275 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 276 /* newTemp may cause mce->tmpMap to resize, hence previous results 277 from VG_(indexXA) are invalid. */ 278 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 279 tl_assert(ent->kind == Orig); 280 tl_assert(ent->shadowV == IRTemp_INVALID); 281 ent->shadowV = tmpV; 282 } 283 return ent->shadowV; 284 } 285 286 /* Allocate a new shadow for the given original tmp. This means any 287 previous shadow is abandoned. This is needed because it is 288 necessary to give a new value to a shadow once it has been tested 289 for undefinedness, but unfortunately IR's SSA property disallows 290 this. Instead we must abandon the old shadow, allocate a new one 291 and use that instead. 292 293 This is the same as findShadowTmpV, except we don't bother to see 294 if a shadow temp already existed -- we simply allocate a new one 295 regardless. */ 296 static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 297 { 298 TempMapEnt* ent; 299 /* VG_(indexXA) range-checks 'orig', hence no need to check 300 here. */ 301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 302 tl_assert(ent->kind == Orig); 303 if (1) { 304 IRTemp tmpV 305 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 306 /* newTemp may cause mce->tmpMap to resize, hence previous results 307 from VG_(indexXA) are invalid. */ 308 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 309 tl_assert(ent->kind == Orig); 310 ent->shadowV = tmpV; 311 } 312 } 313 314 315 /*------------------------------------------------------------*/ 316 /*--- IRAtoms -- a subset of IRExprs ---*/ 317 /*------------------------------------------------------------*/ 318 319 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 320 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 321 input, most of this code deals in atoms. Usefully, a value atom 322 always has a V-value which is also an atom: constants are shadowed 323 by constants, and temps are shadowed by the corresponding shadow 324 temporary. */ 325 326 typedef IRExpr IRAtom; 327 328 /* (used for sanity checks only): is this an atom which looks 329 like it's from original code? */ 330 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 331 { 332 if (a1->tag == Iex_Const) 333 return True; 334 if (a1->tag == Iex_RdTmp) { 335 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 336 return ent->kind == Orig; 337 } 338 return False; 339 } 340 341 /* (used for sanity checks only): is this an atom which looks 342 like it's from shadow code? */ 343 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 344 { 345 if (a1->tag == Iex_Const) 346 return True; 347 if (a1->tag == Iex_RdTmp) { 348 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 349 return ent->kind == VSh || ent->kind == BSh; 350 } 351 return False; 352 } 353 354 /* (used for sanity checks only): check that both args are atoms and 355 are identically-kinded. */ 356 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 357 { 358 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 359 return True; 360 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 361 return True; 362 return False; 363 } 364 365 366 /*------------------------------------------------------------*/ 367 /*--- Type management ---*/ 368 /*------------------------------------------------------------*/ 369 370 /* Shadow state is always accessed using integer types. This returns 371 an integer type with the same size (as per sizeofIRType) as the 372 given type. The only valid shadow types are Bit, I8, I16, I32, 373 I64, I128, V128, V256. */ 374 375 static IRType shadowTypeV ( IRType ty ) 376 { 377 switch (ty) { 378 case Ity_I1: 379 case Ity_I8: 380 case Ity_I16: 381 case Ity_I32: 382 case Ity_I64: 383 case Ity_I128: return ty; 384 case Ity_F16: return Ity_I16; 385 case Ity_F32: return Ity_I32; 386 case Ity_D32: return Ity_I32; 387 case Ity_F64: return Ity_I64; 388 case Ity_D64: return Ity_I64; 389 case Ity_F128: return Ity_I128; 390 case Ity_D128: return Ity_I128; 391 case Ity_V128: return Ity_V128; 392 case Ity_V256: return Ity_V256; 393 default: ppIRType(ty); 394 VG_(tool_panic)("memcheck:shadowTypeV"); 395 } 396 } 397 398 /* Produce a 'defined' value of the given shadow type. Should only be 399 supplied shadow types (Bit/I8/I16/I32/UI64). */ 400 static IRExpr* definedOfType ( IRType ty ) { 401 switch (ty) { 402 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 403 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 404 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 405 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 406 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 407 case Ity_I128: return i128_const_zero(); 408 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 409 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000)); 410 default: VG_(tool_panic)("memcheck:definedOfType"); 411 } 412 } 413 414 415 /*------------------------------------------------------------*/ 416 /*--- Constructing IR fragments ---*/ 417 /*------------------------------------------------------------*/ 418 419 /* add stmt to a bb */ 420 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 421 if (mce->trace) { 422 VG_(printf)(" %c: ", cat); 423 ppIRStmt(st); 424 VG_(printf)("\n"); 425 } 426 addStmtToIRSB(mce->sb, st); 427 } 428 429 /* assign value to tmp */ 430 static inline 431 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 432 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 433 } 434 435 /* build various kinds of expressions */ 436 #define triop(_op, _arg1, _arg2, _arg3) \ 437 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 438 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 439 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 440 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n)) 441 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 442 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 443 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 444 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 445 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 446 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 447 448 /* Bind the given expression to a new temporary, and return the 449 temporary. This effectively converts an arbitrary expression into 450 an atom. 451 452 'ty' is the type of 'e' and hence the type that the new temporary 453 needs to be. But passing it in is redundant, since we can deduce 454 the type merely by inspecting 'e'. So at least use that fact to 455 assert that the two types agree. */ 456 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 457 { 458 TempKind k; 459 IRTemp t; 460 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 461 462 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 463 switch (cat) { 464 case 'V': k = VSh; break; 465 case 'B': k = BSh; break; 466 case 'C': k = Orig; break; 467 /* happens when we are making up new "orig" 468 expressions, for IRCAS handling */ 469 default: tl_assert(0); 470 } 471 t = newTemp(mce, ty, k); 472 assign(cat, mce, t, e); 473 return mkexpr(t); 474 } 475 476 477 /*------------------------------------------------------------*/ 478 /*--- Helper functions for 128-bit ops ---*/ 479 /*------------------------------------------------------------*/ 480 481 static IRExpr *i128_const_zero(void) 482 { 483 IRAtom* z64 = IRExpr_Const(IRConst_U64(0)); 484 return binop(Iop_64HLto128, z64, z64); 485 } 486 487 /* There are no I128-bit loads and/or stores [as generated by any 488 current front ends]. So we do not need to worry about that in 489 expr2vbits_Load */ 490 491 492 /*------------------------------------------------------------*/ 493 /*--- Constructing definedness primitive ops ---*/ 494 /*------------------------------------------------------------*/ 495 496 /* --------- Defined-if-either-defined --------- */ 497 498 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 499 tl_assert(isShadowAtom(mce,a1)); 500 tl_assert(isShadowAtom(mce,a2)); 501 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 502 } 503 504 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 505 tl_assert(isShadowAtom(mce,a1)); 506 tl_assert(isShadowAtom(mce,a2)); 507 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 508 } 509 510 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 511 tl_assert(isShadowAtom(mce,a1)); 512 tl_assert(isShadowAtom(mce,a2)); 513 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 514 } 515 516 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 517 tl_assert(isShadowAtom(mce,a1)); 518 tl_assert(isShadowAtom(mce,a2)); 519 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 520 } 521 522 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 523 tl_assert(isShadowAtom(mce,a1)); 524 tl_assert(isShadowAtom(mce,a2)); 525 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 526 } 527 528 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 529 tl_assert(isShadowAtom(mce,a1)); 530 tl_assert(isShadowAtom(mce,a2)); 531 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2)); 532 } 533 534 /* --------- Undefined-if-either-undefined --------- */ 535 536 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 537 tl_assert(isShadowAtom(mce,a1)); 538 tl_assert(isShadowAtom(mce,a2)); 539 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 540 } 541 542 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 543 tl_assert(isShadowAtom(mce,a1)); 544 tl_assert(isShadowAtom(mce,a2)); 545 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 546 } 547 548 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 549 tl_assert(isShadowAtom(mce,a1)); 550 tl_assert(isShadowAtom(mce,a2)); 551 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 552 } 553 554 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 555 tl_assert(isShadowAtom(mce,a1)); 556 tl_assert(isShadowAtom(mce,a2)); 557 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 558 } 559 560 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 561 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6; 562 tl_assert(isShadowAtom(mce,a1)); 563 tl_assert(isShadowAtom(mce,a2)); 564 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1)); 565 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1)); 566 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2)); 567 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2)); 568 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3)); 569 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4)); 570 571 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5)); 572 } 573 574 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 575 tl_assert(isShadowAtom(mce,a1)); 576 tl_assert(isShadowAtom(mce,a2)); 577 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 578 } 579 580 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 581 tl_assert(isShadowAtom(mce,a1)); 582 tl_assert(isShadowAtom(mce,a2)); 583 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2)); 584 } 585 586 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 587 switch (vty) { 588 case Ity_I8: return mkUifU8(mce, a1, a2); 589 case Ity_I16: return mkUifU16(mce, a1, a2); 590 case Ity_I32: return mkUifU32(mce, a1, a2); 591 case Ity_I64: return mkUifU64(mce, a1, a2); 592 case Ity_I128: return mkUifU128(mce, a1, a2); 593 case Ity_V128: return mkUifUV128(mce, a1, a2); 594 case Ity_V256: return mkUifUV256(mce, a1, a2); 595 default: 596 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 597 VG_(tool_panic)("memcheck:mkUifU"); 598 } 599 } 600 601 /* --------- The Left-family of operations. --------- */ 602 603 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 604 tl_assert(isShadowAtom(mce,a1)); 605 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 606 } 607 608 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 609 tl_assert(isShadowAtom(mce,a1)); 610 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 611 } 612 613 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 614 tl_assert(isShadowAtom(mce,a1)); 615 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 616 } 617 618 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 619 tl_assert(isShadowAtom(mce,a1)); 620 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 621 } 622 623 /* --------- 'Improvement' functions for AND/OR. --------- */ 624 625 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 626 defined (0); all other -> undefined (1). 627 */ 628 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 629 { 630 tl_assert(isOriginalAtom(mce, data)); 631 tl_assert(isShadowAtom(mce, vbits)); 632 tl_assert(sameKindedAtoms(data, vbits)); 633 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 634 } 635 636 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 637 { 638 tl_assert(isOriginalAtom(mce, data)); 639 tl_assert(isShadowAtom(mce, vbits)); 640 tl_assert(sameKindedAtoms(data, vbits)); 641 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 642 } 643 644 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 645 { 646 tl_assert(isOriginalAtom(mce, data)); 647 tl_assert(isShadowAtom(mce, vbits)); 648 tl_assert(sameKindedAtoms(data, vbits)); 649 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 650 } 651 652 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 653 { 654 tl_assert(isOriginalAtom(mce, data)); 655 tl_assert(isShadowAtom(mce, vbits)); 656 tl_assert(sameKindedAtoms(data, vbits)); 657 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 658 } 659 660 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 661 { 662 tl_assert(isOriginalAtom(mce, data)); 663 tl_assert(isShadowAtom(mce, vbits)); 664 tl_assert(sameKindedAtoms(data, vbits)); 665 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 666 } 667 668 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 669 { 670 tl_assert(isOriginalAtom(mce, data)); 671 tl_assert(isShadowAtom(mce, vbits)); 672 tl_assert(sameKindedAtoms(data, vbits)); 673 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits)); 674 } 675 676 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 677 defined (0); all other -> undefined (1). 678 */ 679 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 680 { 681 tl_assert(isOriginalAtom(mce, data)); 682 tl_assert(isShadowAtom(mce, vbits)); 683 tl_assert(sameKindedAtoms(data, vbits)); 684 return assignNew( 685 'V', mce, Ity_I8, 686 binop(Iop_Or8, 687 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 688 vbits) ); 689 } 690 691 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 692 { 693 tl_assert(isOriginalAtom(mce, data)); 694 tl_assert(isShadowAtom(mce, vbits)); 695 tl_assert(sameKindedAtoms(data, vbits)); 696 return assignNew( 697 'V', mce, Ity_I16, 698 binop(Iop_Or16, 699 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 700 vbits) ); 701 } 702 703 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 704 { 705 tl_assert(isOriginalAtom(mce, data)); 706 tl_assert(isShadowAtom(mce, vbits)); 707 tl_assert(sameKindedAtoms(data, vbits)); 708 return assignNew( 709 'V', mce, Ity_I32, 710 binop(Iop_Or32, 711 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 712 vbits) ); 713 } 714 715 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 716 { 717 tl_assert(isOriginalAtom(mce, data)); 718 tl_assert(isShadowAtom(mce, vbits)); 719 tl_assert(sameKindedAtoms(data, vbits)); 720 return assignNew( 721 'V', mce, Ity_I64, 722 binop(Iop_Or64, 723 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 724 vbits) ); 725 } 726 727 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 728 { 729 tl_assert(isOriginalAtom(mce, data)); 730 tl_assert(isShadowAtom(mce, vbits)); 731 tl_assert(sameKindedAtoms(data, vbits)); 732 return assignNew( 733 'V', mce, Ity_V128, 734 binop(Iop_OrV128, 735 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 736 vbits) ); 737 } 738 739 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 740 { 741 tl_assert(isOriginalAtom(mce, data)); 742 tl_assert(isShadowAtom(mce, vbits)); 743 tl_assert(sameKindedAtoms(data, vbits)); 744 return assignNew( 745 'V', mce, Ity_V256, 746 binop(Iop_OrV256, 747 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)), 748 vbits) ); 749 } 750 751 /* --------- Pessimising casts. --------- */ 752 753 /* The function returns an expression of type DST_TY. If any of the VBITS 754 is undefined (value == 1) the resulting expression has all bits set to 755 1. Otherwise, all bits are 0. */ 756 757 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 758 { 759 IRType src_ty; 760 IRAtom* tmp1; 761 762 /* Note, dst_ty is a shadow type, not an original type. */ 763 tl_assert(isShadowAtom(mce,vbits)); 764 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 765 766 /* Fast-track some common cases */ 767 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 768 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 769 770 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 771 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 772 773 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 774 /* PCast the arg, then clone it. */ 775 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 776 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 777 } 778 779 if (src_ty == Ity_I32 && dst_ty == Ity_V128) { 780 /* PCast the arg, then clone it 4 times. */ 781 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 782 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 783 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); 784 } 785 786 if (src_ty == Ity_I32 && dst_ty == Ity_V256) { 787 /* PCast the arg, then clone it 8 times. */ 788 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 789 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 790 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); 791 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp)); 792 } 793 794 if (src_ty == Ity_I64 && dst_ty == Ity_I32) { 795 /* PCast the arg. This gives all 0s or all 1s. Then throw away 796 the top half. */ 797 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 798 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp)); 799 } 800 801 if (src_ty == Ity_V128 && dst_ty == Ity_I64) { 802 /* Use InterleaveHI64x2 to copy the top half of the vector into 803 the bottom half. Then we can UifU it with the original, throw 804 away the upper half of the result, and PCast-I64-to-I64 805 the lower half. */ 806 // Generates vbits[127:64] : vbits[127:64] 807 IRAtom* hi64hi64 808 = assignNew('V', mce, Ity_V128, 809 binop(Iop_InterleaveHI64x2, vbits, vbits)); 810 // Generates 811 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0]) 812 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0]) 813 IRAtom* lohi64 814 = mkUifUV128(mce, hi64hi64, vbits); 815 // Generates UifU(vbits[127:64],vbits[63:0]) 816 IRAtom* lo64 817 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64)); 818 // Generates 819 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] ) 820 // == PCast-to-I64( vbits[127:0] ) 821 IRAtom* res 822 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64)); 823 return res; 824 } 825 826 /* Else do it the slow way .. */ 827 /* First of all, collapse vbits down to a single bit. */ 828 tmp1 = NULL; 829 switch (src_ty) { 830 case Ity_I1: 831 tmp1 = vbits; 832 break; 833 case Ity_I8: 834 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 835 break; 836 case Ity_I16: 837 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 838 break; 839 case Ity_I32: 840 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 841 break; 842 case Ity_I64: 843 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 844 break; 845 case Ity_I128: { 846 /* Gah. Chop it in half, OR the halves together, and compare 847 that with zero. */ 848 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 849 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 850 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 851 tmp1 = assignNew('V', mce, Ity_I1, 852 unop(Iop_CmpNEZ64, tmp4)); 853 break; 854 } 855 case Ity_V128: { 856 /* Chop it in half, OR the halves together, and compare that 857 * with zero. 858 */ 859 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vbits)); 860 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vbits)); 861 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 862 tmp1 = assignNew('V', mce, Ity_I1, 863 unop(Iop_CmpNEZ64, tmp4)); 864 break; 865 } 866 default: 867 ppIRType(src_ty); 868 VG_(tool_panic)("mkPCastTo(1)"); 869 } 870 tl_assert(tmp1); 871 /* Now widen up to the dst type. */ 872 switch (dst_ty) { 873 case Ity_I1: 874 return tmp1; 875 case Ity_I8: 876 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 877 case Ity_I16: 878 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 879 case Ity_I32: 880 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 881 case Ity_I64: 882 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 883 case Ity_V128: 884 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 885 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 886 return tmp1; 887 case Ity_I128: 888 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 889 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 890 return tmp1; 891 case Ity_V256: 892 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 893 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, 894 tmp1, tmp1)); 895 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, 896 tmp1, tmp1)); 897 return tmp1; 898 default: 899 ppIRType(dst_ty); 900 VG_(tool_panic)("mkPCastTo(2)"); 901 } 902 } 903 904 /* This is a minor variant. It takes an arg of some type and returns 905 a value of the same type. The result consists entirely of Defined 906 (zero) bits except its least significant bit, which is a PCast of 907 the entire argument down to a single bit. */ 908 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty ) 909 { 910 if (ty == Ity_V128) { 911 /* --- Case for V128 --- */ 912 IRAtom* varg128 = varg; 913 // generates: PCast-to-I64(varg128) 914 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128); 915 // Now introduce zeros (defined bits) in the top 63 places 916 // generates: Def--(63)--Def PCast-to-I1(varg128) 917 IRAtom* d63pc 918 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1))); 919 // generates: Def--(64)--Def 920 IRAtom* d64 921 = definedOfType(Ity_I64); 922 // generates: Def--(127)--Def PCast-to-I1(varg128) 923 IRAtom* res 924 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc)); 925 return res; 926 } 927 if (ty == Ity_I64) { 928 /* --- Case for I64 --- */ 929 // PCast to 64 930 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg); 931 // Zero (Def) out the top 63 bits 932 IRAtom* res 933 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1))); 934 return res; 935 } 936 /*NOTREACHED*/ 937 tl_assert(0); 938 } 939 940 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 941 /* 942 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 943 PCasting to Ity_U1. However, sometimes it is necessary to be more 944 accurate. The insight is that the result is defined if two 945 corresponding bits can be found, one from each argument, so that 946 both bits are defined but are different -- that makes EQ say "No" 947 and NE say "Yes". Hence, we compute an improvement term and DifD 948 it onto the "normal" (UifU) result. 949 950 The result is: 951 952 PCastTo<1> ( 953 -- naive version 954 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 955 956 `DifD<sz>` 957 958 -- improvement term 959 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 960 ) 961 962 where 963 vec contains 0 (defined) bits where the corresponding arg bits 964 are defined but different, and 1 bits otherwise. 965 966 vec = Or<sz>( vxx, // 0 iff bit defined 967 vyy, // 0 iff bit defined 968 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 969 ) 970 971 If any bit of vec is 0, the result is defined and so the 972 improvement term should produce 0...0, else it should produce 973 1...1. 974 975 Hence require for the improvement term: 976 977 if vec == 1...1 then 1...1 else 0...0 978 -> 979 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 980 981 This was extensively re-analysed and checked on 6 July 05. 982 */ 983 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 984 IRType ty, 985 IRAtom* vxx, IRAtom* vyy, 986 IRAtom* xx, IRAtom* yy ) 987 { 988 IRAtom *naive, *vec, *improvement_term; 989 IRAtom *improved, *final_cast, *top; 990 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 991 992 tl_assert(isShadowAtom(mce,vxx)); 993 tl_assert(isShadowAtom(mce,vyy)); 994 tl_assert(isOriginalAtom(mce,xx)); 995 tl_assert(isOriginalAtom(mce,yy)); 996 tl_assert(sameKindedAtoms(vxx,xx)); 997 tl_assert(sameKindedAtoms(vyy,yy)); 998 999 switch (ty) { 1000 case Ity_I16: 1001 opOR = Iop_Or16; 1002 opDIFD = Iop_And16; 1003 opUIFU = Iop_Or16; 1004 opNOT = Iop_Not16; 1005 opXOR = Iop_Xor16; 1006 opCMP = Iop_CmpEQ16; 1007 top = mkU16(0xFFFF); 1008 break; 1009 case Ity_I32: 1010 opOR = Iop_Or32; 1011 opDIFD = Iop_And32; 1012 opUIFU = Iop_Or32; 1013 opNOT = Iop_Not32; 1014 opXOR = Iop_Xor32; 1015 opCMP = Iop_CmpEQ32; 1016 top = mkU32(0xFFFFFFFF); 1017 break; 1018 case Ity_I64: 1019 opOR = Iop_Or64; 1020 opDIFD = Iop_And64; 1021 opUIFU = Iop_Or64; 1022 opNOT = Iop_Not64; 1023 opXOR = Iop_Xor64; 1024 opCMP = Iop_CmpEQ64; 1025 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 1026 break; 1027 default: 1028 VG_(tool_panic)("expensiveCmpEQorNE"); 1029 } 1030 1031 naive 1032 = mkPCastTo(mce,ty, 1033 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 1034 1035 vec 1036 = assignNew( 1037 'V', mce,ty, 1038 binop( opOR, 1039 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 1040 assignNew( 1041 'V', mce,ty, 1042 unop( opNOT, 1043 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 1044 1045 improvement_term 1046 = mkPCastTo( mce,ty, 1047 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 1048 1049 improved 1050 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 1051 1052 final_cast 1053 = mkPCastTo( mce, Ity_I1, improved ); 1054 1055 return final_cast; 1056 } 1057 1058 1059 /* --------- Semi-accurate interpretation of CmpORD. --------- */ 1060 1061 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 1062 1063 CmpORD32S(x,y) = 1<<3 if x <s y 1064 = 1<<2 if x >s y 1065 = 1<<1 if x == y 1066 1067 and similarly the unsigned variant. The default interpretation is: 1068 1069 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 1070 & (7<<1) 1071 1072 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 1073 are zero and therefore defined (viz, zero). 1074 1075 Also deal with a special case better: 1076 1077 CmpORD32S(x,0) 1078 1079 Here, bit 3 (LT) of the result is a copy of the top bit of x and 1080 will be defined even if the rest of x isn't. In which case we do: 1081 1082 CmpORD32S#(x,x#,0,{impliedly 0}#) 1083 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 1084 | (x# >>u 31) << 3 -- LT# = x#[31] 1085 1086 Analogous handling for CmpORD64{S,U}. 1087 */ 1088 static Bool isZeroU32 ( IRAtom* e ) 1089 { 1090 return 1091 toBool( e->tag == Iex_Const 1092 && e->Iex.Const.con->tag == Ico_U32 1093 && e->Iex.Const.con->Ico.U32 == 0 ); 1094 } 1095 1096 static Bool isZeroU64 ( IRAtom* e ) 1097 { 1098 return 1099 toBool( e->tag == Iex_Const 1100 && e->Iex.Const.con->tag == Ico_U64 1101 && e->Iex.Const.con->Ico.U64 == 0 ); 1102 } 1103 1104 static IRAtom* doCmpORD ( MCEnv* mce, 1105 IROp cmp_op, 1106 IRAtom* xxhash, IRAtom* yyhash, 1107 IRAtom* xx, IRAtom* yy ) 1108 { 1109 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 1110 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 1111 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 1112 IROp opAND = m64 ? Iop_And64 : Iop_And32; 1113 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 1114 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 1115 IRType ty = m64 ? Ity_I64 : Ity_I32; 1116 Int width = m64 ? 64 : 32; 1117 1118 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 1119 1120 IRAtom* threeLeft1 = NULL; 1121 IRAtom* sevenLeft1 = NULL; 1122 1123 tl_assert(isShadowAtom(mce,xxhash)); 1124 tl_assert(isShadowAtom(mce,yyhash)); 1125 tl_assert(isOriginalAtom(mce,xx)); 1126 tl_assert(isOriginalAtom(mce,yy)); 1127 tl_assert(sameKindedAtoms(xxhash,xx)); 1128 tl_assert(sameKindedAtoms(yyhash,yy)); 1129 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 1130 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 1131 1132 if (0) { 1133 ppIROp(cmp_op); VG_(printf)(" "); 1134 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 1135 } 1136 1137 if (syned && isZero(yy)) { 1138 /* fancy interpretation */ 1139 /* if yy is zero, then it must be fully defined (zero#). */ 1140 tl_assert(isZero(yyhash)); 1141 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 1142 return 1143 binop( 1144 opOR, 1145 assignNew( 1146 'V', mce,ty, 1147 binop( 1148 opAND, 1149 mkPCastTo(mce,ty, xxhash), 1150 threeLeft1 1151 )), 1152 assignNew( 1153 'V', mce,ty, 1154 binop( 1155 opSHL, 1156 assignNew( 1157 'V', mce,ty, 1158 binop(opSHR, xxhash, mkU8(width-1))), 1159 mkU8(3) 1160 )) 1161 ); 1162 } else { 1163 /* standard interpretation */ 1164 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 1165 return 1166 binop( 1167 opAND, 1168 mkPCastTo( mce,ty, 1169 mkUifU(mce,ty, xxhash,yyhash)), 1170 sevenLeft1 1171 ); 1172 } 1173 } 1174 1175 1176 /*------------------------------------------------------------*/ 1177 /*--- Emit a test and complaint if something is undefined. ---*/ 1178 /*------------------------------------------------------------*/ 1179 1180 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 1181 1182 1183 /* Set the annotations on a dirty helper to indicate that the stack 1184 pointer and instruction pointers might be read. This is the 1185 behaviour of all 'emit-a-complaint' style functions we might 1186 call. */ 1187 1188 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1189 di->nFxState = 2; 1190 di->fxState[0].fx = Ifx_Read; 1191 di->fxState[0].offset = mce->layout->offset_SP; 1192 di->fxState[0].size = mce->layout->sizeof_SP; 1193 di->fxState[0].nRepeats = 0; 1194 di->fxState[0].repeatLen = 0; 1195 di->fxState[1].fx = Ifx_Read; 1196 di->fxState[1].offset = mce->layout->offset_IP; 1197 di->fxState[1].size = mce->layout->sizeof_IP; 1198 di->fxState[1].nRepeats = 0; 1199 di->fxState[1].repeatLen = 0; 1200 } 1201 1202 1203 /* Check the supplied *original* |atom| for undefinedness, and emit a 1204 complaint if so. Once that happens, mark it as defined. This is 1205 possible because the atom is either a tmp or literal. If it's a 1206 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1207 be defined. In fact as mentioned above, we will have to allocate a 1208 new tmp to carry the new 'defined' shadow value, and update the 1209 original->tmp mapping accordingly; we cannot simply assign a new 1210 value to an existing shadow tmp as this breaks SSAness. 1211 1212 The checks are performed, any resulting complaint emitted, and 1213 |atom|'s shadow temp set to 'defined', ONLY in the case that 1214 |guard| evaluates to True at run-time. If it evaluates to False 1215 then no action is performed. If |guard| is NULL (the usual case) 1216 then it is assumed to be always-true, and hence these actions are 1217 performed unconditionally. 1218 1219 This routine does not generate code to check the definedness of 1220 |guard|. The caller is assumed to have taken care of that already. 1221 */ 1222 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) 1223 { 1224 IRAtom* vatom; 1225 IRType ty; 1226 Int sz; 1227 IRDirty* di; 1228 IRAtom* cond; 1229 IRAtom* origin; 1230 void* fn; 1231 const HChar* nm; 1232 IRExpr** args; 1233 Int nargs; 1234 1235 // Don't do V bit tests if we're not reporting undefined value errors. 1236 if (MC_(clo_mc_level) == 1) 1237 return; 1238 1239 if (guard) 1240 tl_assert(isOriginalAtom(mce, guard)); 1241 1242 /* Since the original expression is atomic, there's no duplicated 1243 work generated by making multiple V-expressions for it. So we 1244 don't really care about the possibility that someone else may 1245 also create a V-interpretion for it. */ 1246 tl_assert(isOriginalAtom(mce, atom)); 1247 vatom = expr2vbits( mce, atom ); 1248 tl_assert(isShadowAtom(mce, vatom)); 1249 tl_assert(sameKindedAtoms(atom, vatom)); 1250 1251 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1252 1253 /* sz is only used for constructing the error message */ 1254 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1255 1256 cond = mkPCastTo( mce, Ity_I1, vatom ); 1257 /* cond will be 0 if all defined, and 1 if any not defined. */ 1258 1259 /* Get the origin info for the value we are about to check. At 1260 least, if we are doing origin tracking. If not, use a dummy 1261 zero origin. */ 1262 if (MC_(clo_mc_level) == 3) { 1263 origin = schemeE( mce, atom ); 1264 if (mce->hWordTy == Ity_I64) { 1265 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1266 } 1267 } else { 1268 origin = NULL; 1269 } 1270 1271 fn = NULL; 1272 nm = NULL; 1273 args = NULL; 1274 nargs = -1; 1275 1276 switch (sz) { 1277 case 0: 1278 if (origin) { 1279 fn = &MC_(helperc_value_check0_fail_w_o); 1280 nm = "MC_(helperc_value_check0_fail_w_o)"; 1281 args = mkIRExprVec_1(origin); 1282 nargs = 1; 1283 } else { 1284 fn = &MC_(helperc_value_check0_fail_no_o); 1285 nm = "MC_(helperc_value_check0_fail_no_o)"; 1286 args = mkIRExprVec_0(); 1287 nargs = 0; 1288 } 1289 break; 1290 case 1: 1291 if (origin) { 1292 fn = &MC_(helperc_value_check1_fail_w_o); 1293 nm = "MC_(helperc_value_check1_fail_w_o)"; 1294 args = mkIRExprVec_1(origin); 1295 nargs = 1; 1296 } else { 1297 fn = &MC_(helperc_value_check1_fail_no_o); 1298 nm = "MC_(helperc_value_check1_fail_no_o)"; 1299 args = mkIRExprVec_0(); 1300 nargs = 0; 1301 } 1302 break; 1303 case 4: 1304 if (origin) { 1305 fn = &MC_(helperc_value_check4_fail_w_o); 1306 nm = "MC_(helperc_value_check4_fail_w_o)"; 1307 args = mkIRExprVec_1(origin); 1308 nargs = 1; 1309 } else { 1310 fn = &MC_(helperc_value_check4_fail_no_o); 1311 nm = "MC_(helperc_value_check4_fail_no_o)"; 1312 args = mkIRExprVec_0(); 1313 nargs = 0; 1314 } 1315 break; 1316 case 8: 1317 if (origin) { 1318 fn = &MC_(helperc_value_check8_fail_w_o); 1319 nm = "MC_(helperc_value_check8_fail_w_o)"; 1320 args = mkIRExprVec_1(origin); 1321 nargs = 1; 1322 } else { 1323 fn = &MC_(helperc_value_check8_fail_no_o); 1324 nm = "MC_(helperc_value_check8_fail_no_o)"; 1325 args = mkIRExprVec_0(); 1326 nargs = 0; 1327 } 1328 break; 1329 case 2: 1330 case 16: 1331 if (origin) { 1332 fn = &MC_(helperc_value_checkN_fail_w_o); 1333 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1334 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1335 nargs = 2; 1336 } else { 1337 fn = &MC_(helperc_value_checkN_fail_no_o); 1338 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1339 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1340 nargs = 1; 1341 } 1342 break; 1343 default: 1344 VG_(tool_panic)("unexpected szB"); 1345 } 1346 1347 tl_assert(fn); 1348 tl_assert(nm); 1349 tl_assert(args); 1350 tl_assert(nargs >= 0 && nargs <= 2); 1351 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1352 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1353 1354 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1355 VG_(fnptr_to_fnentry)( fn ), args ); 1356 di->guard = cond; // and cond is PCast-to-1(atom#) 1357 1358 /* If the complaint is to be issued under a guard condition, AND 1359 that into the guard condition for the helper call. */ 1360 if (guard) { 1361 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard)); 1362 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard)); 1363 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2)); 1364 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e)); 1365 } 1366 1367 setHelperAnns( mce, di ); 1368 stmt( 'V', mce, IRStmt_Dirty(di)); 1369 1370 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be 1371 defined -- but only in the case where the guard evaluates to 1372 True at run-time. Do the update by setting the orig->shadow 1373 mapping for tmp to reflect the fact that this shadow is getting 1374 a new value. */ 1375 tl_assert(isIRAtom(vatom)); 1376 /* sameKindedAtoms ... */ 1377 if (vatom->tag == Iex_RdTmp) { 1378 tl_assert(atom->tag == Iex_RdTmp); 1379 if (guard == NULL) { 1380 // guard is 'always True', hence update unconditionally 1381 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1382 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1383 definedOfType(ty)); 1384 } else { 1385 // update the temp only conditionally. Do this by copying 1386 // its old value when the guard is False. 1387 // The old value .. 1388 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1389 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1390 IRAtom* new_tmpV 1391 = assignNew('V', mce, shadowTypeV(ty), 1392 IRExpr_ITE(guard, definedOfType(ty), 1393 mkexpr(old_tmpV))); 1394 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV); 1395 } 1396 } 1397 } 1398 1399 1400 /*------------------------------------------------------------*/ 1401 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1402 /*------------------------------------------------------------*/ 1403 1404 /* Examine the always-defined sections declared in layout to see if 1405 the (offset,size) section is within one. Note, is is an error to 1406 partially fall into such a region: (offset,size) should either be 1407 completely in such a region or completely not-in such a region. 1408 */ 1409 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1410 { 1411 Int minoffD, maxoffD, i; 1412 Int minoff = offset; 1413 Int maxoff = minoff + size - 1; 1414 tl_assert((minoff & ~0xFFFF) == 0); 1415 tl_assert((maxoff & ~0xFFFF) == 0); 1416 1417 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1418 minoffD = mce->layout->alwaysDefd[i].offset; 1419 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1420 tl_assert((minoffD & ~0xFFFF) == 0); 1421 tl_assert((maxoffD & ~0xFFFF) == 0); 1422 1423 if (maxoff < minoffD || maxoffD < minoff) 1424 continue; /* no overlap */ 1425 if (minoff >= minoffD && maxoff <= maxoffD) 1426 return True; /* completely contained in an always-defd section */ 1427 1428 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1429 } 1430 return False; /* could not find any containing section */ 1431 } 1432 1433 1434 /* Generate into bb suitable actions to shadow this Put. If the state 1435 slice is marked 'always defined', do nothing. Otherwise, write the 1436 supplied V bits to the shadow state. We can pass in either an 1437 original atom or a V-atom, but not both. In the former case the 1438 relevant V-bits are then generated from the original. 1439 We assume here, that the definedness of GUARD has already been checked. 1440 */ 1441 static 1442 void do_shadow_PUT ( MCEnv* mce, Int offset, 1443 IRAtom* atom, IRAtom* vatom, IRExpr *guard ) 1444 { 1445 IRType ty; 1446 1447 // Don't do shadow PUTs if we're not doing undefined value checking. 1448 // Their absence lets Vex's optimiser remove all the shadow computation 1449 // that they depend on, which includes GETs of the shadow registers. 1450 if (MC_(clo_mc_level) == 1) 1451 return; 1452 1453 if (atom) { 1454 tl_assert(!vatom); 1455 tl_assert(isOriginalAtom(mce, atom)); 1456 vatom = expr2vbits( mce, atom ); 1457 } else { 1458 tl_assert(vatom); 1459 tl_assert(isShadowAtom(mce, vatom)); 1460 } 1461 1462 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1463 tl_assert(ty != Ity_I1); 1464 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1465 /* later: no ... */ 1466 /* emit code to emit a complaint if any of the vbits are 1. */ 1467 /* complainIfUndefined(mce, atom); */ 1468 } else { 1469 /* Do a plain shadow Put. */ 1470 if (guard) { 1471 /* If the guard expression evaluates to false we simply Put the value 1472 that is already stored in the guest state slot */ 1473 IRAtom *cond, *iffalse; 1474 1475 cond = assignNew('V', mce, Ity_I1, guard); 1476 iffalse = assignNew('V', mce, ty, 1477 IRExpr_Get(offset + mce->layout->total_sizeB, ty)); 1478 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse)); 1479 } 1480 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom )); 1481 } 1482 } 1483 1484 1485 /* Return an expression which contains the V bits corresponding to the 1486 given GETI (passed in in pieces). 1487 */ 1488 static 1489 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti) 1490 { 1491 IRAtom* vatom; 1492 IRType ty, tyS; 1493 Int arrSize;; 1494 IRRegArray* descr = puti->descr; 1495 IRAtom* ix = puti->ix; 1496 Int bias = puti->bias; 1497 IRAtom* atom = puti->data; 1498 1499 // Don't do shadow PUTIs if we're not doing undefined value checking. 1500 // Their absence lets Vex's optimiser remove all the shadow computation 1501 // that they depend on, which includes GETIs of the shadow registers. 1502 if (MC_(clo_mc_level) == 1) 1503 return; 1504 1505 tl_assert(isOriginalAtom(mce,atom)); 1506 vatom = expr2vbits( mce, atom ); 1507 tl_assert(sameKindedAtoms(atom, vatom)); 1508 ty = descr->elemTy; 1509 tyS = shadowTypeV(ty); 1510 arrSize = descr->nElems * sizeofIRType(ty); 1511 tl_assert(ty != Ity_I1); 1512 tl_assert(isOriginalAtom(mce,ix)); 1513 complainIfUndefined(mce, ix, NULL); 1514 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1515 /* later: no ... */ 1516 /* emit code to emit a complaint if any of the vbits are 1. */ 1517 /* complainIfUndefined(mce, atom); */ 1518 } else { 1519 /* Do a cloned version of the Put that refers to the shadow 1520 area. */ 1521 IRRegArray* new_descr 1522 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1523 tyS, descr->nElems); 1524 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) )); 1525 } 1526 } 1527 1528 1529 /* Return an expression which contains the V bits corresponding to the 1530 given GET (passed in in pieces). 1531 */ 1532 static 1533 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1534 { 1535 IRType tyS = shadowTypeV(ty); 1536 tl_assert(ty != Ity_I1); 1537 tl_assert(ty != Ity_I128); 1538 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1539 /* Always defined, return all zeroes of the relevant type */ 1540 return definedOfType(tyS); 1541 } else { 1542 /* return a cloned version of the Get that refers to the shadow 1543 area. */ 1544 /* FIXME: this isn't an atom! */ 1545 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1546 } 1547 } 1548 1549 1550 /* Return an expression which contains the V bits corresponding to the 1551 given GETI (passed in in pieces). 1552 */ 1553 static 1554 IRExpr* shadow_GETI ( MCEnv* mce, 1555 IRRegArray* descr, IRAtom* ix, Int bias ) 1556 { 1557 IRType ty = descr->elemTy; 1558 IRType tyS = shadowTypeV(ty); 1559 Int arrSize = descr->nElems * sizeofIRType(ty); 1560 tl_assert(ty != Ity_I1); 1561 tl_assert(isOriginalAtom(mce,ix)); 1562 complainIfUndefined(mce, ix, NULL); 1563 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1564 /* Always defined, return all zeroes of the relevant type */ 1565 return definedOfType(tyS); 1566 } else { 1567 /* return a cloned version of the Get that refers to the shadow 1568 area. */ 1569 IRRegArray* new_descr 1570 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1571 tyS, descr->nElems); 1572 return IRExpr_GetI( new_descr, ix, bias ); 1573 } 1574 } 1575 1576 1577 /*------------------------------------------------------------*/ 1578 /*--- Generating approximations for unknown operations, ---*/ 1579 /*--- using lazy-propagate semantics ---*/ 1580 /*------------------------------------------------------------*/ 1581 1582 /* Lazy propagation of undefinedness from two values, resulting in the 1583 specified shadow type. 1584 */ 1585 static 1586 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1587 { 1588 IRAtom* at; 1589 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1590 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1591 tl_assert(isShadowAtom(mce,va1)); 1592 tl_assert(isShadowAtom(mce,va2)); 1593 1594 /* The general case is inefficient because PCast is an expensive 1595 operation. Here are some special cases which use PCast only 1596 once rather than twice. */ 1597 1598 /* I64 x I64 -> I64 */ 1599 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1600 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1601 at = mkUifU(mce, Ity_I64, va1, va2); 1602 at = mkPCastTo(mce, Ity_I64, at); 1603 return at; 1604 } 1605 1606 /* I64 x I64 -> I32 */ 1607 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1608 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1609 at = mkUifU(mce, Ity_I64, va1, va2); 1610 at = mkPCastTo(mce, Ity_I32, at); 1611 return at; 1612 } 1613 1614 /* I32 x I32 -> I32 */ 1615 if (t1 == Ity_I32 && t2 == Ity_I32 && finalVty == Ity_I32) { 1616 if (0) VG_(printf)("mkLazy2: I32 x I32 -> I32\n"); 1617 at = mkUifU(mce, Ity_I32, va1, va2); 1618 at = mkPCastTo(mce, Ity_I32, at); 1619 return at; 1620 } 1621 1622 if (0) { 1623 VG_(printf)("mkLazy2 "); 1624 ppIRType(t1); 1625 VG_(printf)("_"); 1626 ppIRType(t2); 1627 VG_(printf)("_"); 1628 ppIRType(finalVty); 1629 VG_(printf)("\n"); 1630 } 1631 1632 /* General case: force everything via 32-bit intermediaries. */ 1633 at = mkPCastTo(mce, Ity_I32, va1); 1634 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1635 at = mkPCastTo(mce, finalVty, at); 1636 return at; 1637 } 1638 1639 1640 /* 3-arg version of the above. */ 1641 static 1642 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1643 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1644 { 1645 IRAtom* at; 1646 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1647 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1648 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1649 tl_assert(isShadowAtom(mce,va1)); 1650 tl_assert(isShadowAtom(mce,va2)); 1651 tl_assert(isShadowAtom(mce,va3)); 1652 1653 /* The general case is inefficient because PCast is an expensive 1654 operation. Here are some special cases which use PCast only 1655 twice rather than three times. */ 1656 1657 /* I32 x I64 x I64 -> I64 */ 1658 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1659 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1660 && finalVty == Ity_I64) { 1661 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1662 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1663 mode indication which is fully defined, this should get 1664 folded out later. */ 1665 at = mkPCastTo(mce, Ity_I64, va1); 1666 /* Now fold in 2nd and 3rd args. */ 1667 at = mkUifU(mce, Ity_I64, at, va2); 1668 at = mkUifU(mce, Ity_I64, at, va3); 1669 /* and PCast once again. */ 1670 at = mkPCastTo(mce, Ity_I64, at); 1671 return at; 1672 } 1673 1674 /* I32 x I8 x I64 -> I64 */ 1675 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64 1676 && finalVty == Ity_I64) { 1677 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n"); 1678 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a 1679 * rounding mode indication which is fully defined, this should 1680 * get folded out later. 1681 */ 1682 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1); 1683 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2); 1684 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2)) 1685 at = mkUifU(mce, Ity_I64, at, va3); 1686 /* and PCast once again. */ 1687 at = mkPCastTo(mce, Ity_I64, at); 1688 return at; 1689 } 1690 1691 /* I32 x I64 x I64 -> I32 */ 1692 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1693 && finalVty == Ity_I32) { 1694 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1695 at = mkPCastTo(mce, Ity_I64, va1); 1696 at = mkUifU(mce, Ity_I64, at, va2); 1697 at = mkUifU(mce, Ity_I64, at, va3); 1698 at = mkPCastTo(mce, Ity_I32, at); 1699 return at; 1700 } 1701 1702 /* I32 x I32 x I32 -> I32 */ 1703 /* 32-bit FP idiom, as (eg) happens on ARM */ 1704 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1705 && finalVty == Ity_I32) { 1706 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1707 at = va1; 1708 at = mkUifU(mce, Ity_I32, at, va2); 1709 at = mkUifU(mce, Ity_I32, at, va3); 1710 at = mkPCastTo(mce, Ity_I32, at); 1711 return at; 1712 } 1713 1714 /* I32 x I128 x I128 -> I128 */ 1715 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1716 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 1717 && finalVty == Ity_I128) { 1718 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n"); 1719 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1720 mode indication which is fully defined, this should get 1721 folded out later. */ 1722 at = mkPCastTo(mce, Ity_I128, va1); 1723 /* Now fold in 2nd and 3rd args. */ 1724 at = mkUifU(mce, Ity_I128, at, va2); 1725 at = mkUifU(mce, Ity_I128, at, va3); 1726 /* and PCast once again. */ 1727 at = mkPCastTo(mce, Ity_I128, at); 1728 return at; 1729 } 1730 1731 /* I32 x I8 x I128 -> I128 */ 1732 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1733 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128 1734 && finalVty == Ity_I128) { 1735 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n"); 1736 /* Use I64 as an intermediate type, which means PCasting all 3 1737 args to I64 to start with. 1st arg is typically a rounding 1738 mode indication which is fully defined, so we hope that it 1739 will get folded out later. */ 1740 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1); 1741 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2); 1742 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3); 1743 /* Now UifU all three together. */ 1744 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2)) 1745 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3) 1746 /* and PCast once again. */ 1747 at = mkPCastTo(mce, Ity_I128, at); 1748 return at; 1749 } 1750 if (1) { 1751 VG_(printf)("mkLazy3: "); 1752 ppIRType(t1); 1753 VG_(printf)(" x "); 1754 ppIRType(t2); 1755 VG_(printf)(" x "); 1756 ppIRType(t3); 1757 VG_(printf)(" -> "); 1758 ppIRType(finalVty); 1759 VG_(printf)("\n"); 1760 } 1761 1762 tl_assert(0); 1763 /* General case: force everything via 32-bit intermediaries. */ 1764 /* 1765 at = mkPCastTo(mce, Ity_I32, va1); 1766 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1767 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1768 at = mkPCastTo(mce, finalVty, at); 1769 return at; 1770 */ 1771 } 1772 1773 1774 /* 4-arg version of the above. */ 1775 static 1776 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1777 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1778 { 1779 IRAtom* at; 1780 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1781 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1782 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1783 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1784 tl_assert(isShadowAtom(mce,va1)); 1785 tl_assert(isShadowAtom(mce,va2)); 1786 tl_assert(isShadowAtom(mce,va3)); 1787 tl_assert(isShadowAtom(mce,va4)); 1788 1789 /* The general case is inefficient because PCast is an expensive 1790 operation. Here are some special cases which use PCast only 1791 twice rather than three times. */ 1792 1793 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1794 1795 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 && t4 == Ity_I128 1796 && finalVty == Ity_I128) { 1797 if (0) VG_(printf)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n"); 1798 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1799 mode indication which is fully defined, this should get 1800 folded out later. */ 1801 at = mkPCastTo(mce, Ity_I128, va1); 1802 /* Now fold in 2nd, 3rd, 4th args. */ 1803 at = mkUifU(mce, Ity_I128, at, va2); 1804 at = mkUifU(mce, Ity_I128, at, va3); 1805 at = mkUifU(mce, Ity_I128, at, va4); 1806 /* and PCast once again. */ 1807 at = mkPCastTo(mce, Ity_I128, at); 1808 return at; 1809 } 1810 1811 /* I32 x I64 x I64 x I64 -> I64 */ 1812 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1813 && finalVty == Ity_I64) { 1814 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1815 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1816 mode indication which is fully defined, this should get 1817 folded out later. */ 1818 at = mkPCastTo(mce, Ity_I64, va1); 1819 /* Now fold in 2nd, 3rd, 4th args. */ 1820 at = mkUifU(mce, Ity_I64, at, va2); 1821 at = mkUifU(mce, Ity_I64, at, va3); 1822 at = mkUifU(mce, Ity_I64, at, va4); 1823 /* and PCast once again. */ 1824 at = mkPCastTo(mce, Ity_I64, at); 1825 return at; 1826 } 1827 /* I32 x I32 x I32 x I32 -> I32 */ 1828 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1829 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32 1830 && finalVty == Ity_I32) { 1831 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n"); 1832 at = va1; 1833 /* Now fold in 2nd, 3rd, 4th args. */ 1834 at = mkUifU(mce, Ity_I32, at, va2); 1835 at = mkUifU(mce, Ity_I32, at, va3); 1836 at = mkUifU(mce, Ity_I32, at, va4); 1837 at = mkPCastTo(mce, Ity_I32, at); 1838 return at; 1839 } 1840 1841 if (1) { 1842 VG_(printf)("mkLazy4: "); 1843 ppIRType(t1); 1844 VG_(printf)(" x "); 1845 ppIRType(t2); 1846 VG_(printf)(" x "); 1847 ppIRType(t3); 1848 VG_(printf)(" x "); 1849 ppIRType(t4); 1850 VG_(printf)(" -> "); 1851 ppIRType(finalVty); 1852 VG_(printf)("\n"); 1853 } 1854 1855 tl_assert(0); 1856 } 1857 1858 1859 /* Do the lazy propagation game from a null-terminated vector of 1860 atoms. This is presumably the arguments to a helper call, so the 1861 IRCallee info is also supplied in order that we can know which 1862 arguments should be ignored (via the .mcx_mask field). 1863 */ 1864 static 1865 IRAtom* mkLazyN ( MCEnv* mce, 1866 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1867 { 1868 Int i; 1869 IRAtom* here; 1870 IRAtom* curr; 1871 IRType mergeTy; 1872 Bool mergeTy64 = True; 1873 1874 /* Decide on the type of the merge intermediary. If all relevant 1875 args are I64, then it's I64. In all other circumstances, use 1876 I32. */ 1877 for (i = 0; exprvec[i]; i++) { 1878 tl_assert(i < 32); 1879 tl_assert(isOriginalAtom(mce, exprvec[i])); 1880 if (cee->mcx_mask & (1<<i)) 1881 continue; 1882 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1883 mergeTy64 = False; 1884 } 1885 1886 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1887 curr = definedOfType(mergeTy); 1888 1889 for (i = 0; exprvec[i]; i++) { 1890 tl_assert(i < 32); 1891 tl_assert(isOriginalAtom(mce, exprvec[i])); 1892 /* Only take notice of this arg if the callee's mc-exclusion 1893 mask does not say it is to be excluded. */ 1894 if (cee->mcx_mask & (1<<i)) { 1895 /* the arg is to be excluded from definedness checking. Do 1896 nothing. */ 1897 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1898 } else { 1899 /* calculate the arg's definedness, and pessimistically merge 1900 it in. */ 1901 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1902 curr = mergeTy64 1903 ? mkUifU64(mce, here, curr) 1904 : mkUifU32(mce, here, curr); 1905 } 1906 } 1907 return mkPCastTo(mce, finalVtype, curr ); 1908 } 1909 1910 1911 /*------------------------------------------------------------*/ 1912 /*--- Generating expensive sequences for exact carry-chain ---*/ 1913 /*--- propagation in add/sub and related operations. ---*/ 1914 /*------------------------------------------------------------*/ 1915 1916 static 1917 IRAtom* expensiveAddSub ( MCEnv* mce, 1918 Bool add, 1919 IRType ty, 1920 IRAtom* qaa, IRAtom* qbb, 1921 IRAtom* aa, IRAtom* bb ) 1922 { 1923 IRAtom *a_min, *b_min, *a_max, *b_max; 1924 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1925 1926 tl_assert(isShadowAtom(mce,qaa)); 1927 tl_assert(isShadowAtom(mce,qbb)); 1928 tl_assert(isOriginalAtom(mce,aa)); 1929 tl_assert(isOriginalAtom(mce,bb)); 1930 tl_assert(sameKindedAtoms(qaa,aa)); 1931 tl_assert(sameKindedAtoms(qbb,bb)); 1932 1933 switch (ty) { 1934 case Ity_I32: 1935 opAND = Iop_And32; 1936 opOR = Iop_Or32; 1937 opXOR = Iop_Xor32; 1938 opNOT = Iop_Not32; 1939 opADD = Iop_Add32; 1940 opSUB = Iop_Sub32; 1941 break; 1942 case Ity_I64: 1943 opAND = Iop_And64; 1944 opOR = Iop_Or64; 1945 opXOR = Iop_Xor64; 1946 opNOT = Iop_Not64; 1947 opADD = Iop_Add64; 1948 opSUB = Iop_Sub64; 1949 break; 1950 default: 1951 VG_(tool_panic)("expensiveAddSub"); 1952 } 1953 1954 // a_min = aa & ~qaa 1955 a_min = assignNew('V', mce,ty, 1956 binop(opAND, aa, 1957 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1958 1959 // b_min = bb & ~qbb 1960 b_min = assignNew('V', mce,ty, 1961 binop(opAND, bb, 1962 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1963 1964 // a_max = aa | qaa 1965 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1966 1967 // b_max = bb | qbb 1968 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1969 1970 if (add) { 1971 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1972 return 1973 assignNew('V', mce,ty, 1974 binop( opOR, 1975 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1976 assignNew('V', mce,ty, 1977 binop( opXOR, 1978 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1979 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1980 ) 1981 ) 1982 ) 1983 ); 1984 } else { 1985 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1986 return 1987 assignNew('V', mce,ty, 1988 binop( opOR, 1989 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1990 assignNew('V', mce,ty, 1991 binop( opXOR, 1992 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1993 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1994 ) 1995 ) 1996 ) 1997 ); 1998 } 1999 2000 } 2001 2002 2003 static 2004 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop, 2005 IRAtom* atom, IRAtom* vatom ) 2006 { 2007 IRType ty; 2008 IROp xorOp, subOp, andOp; 2009 IRExpr *one; 2010 IRAtom *improver, *improved; 2011 tl_assert(isShadowAtom(mce,vatom)); 2012 tl_assert(isOriginalAtom(mce,atom)); 2013 tl_assert(sameKindedAtoms(atom,vatom)); 2014 2015 switch (czop) { 2016 case Iop_Ctz32: 2017 ty = Ity_I32; 2018 xorOp = Iop_Xor32; 2019 subOp = Iop_Sub32; 2020 andOp = Iop_And32; 2021 one = mkU32(1); 2022 break; 2023 case Iop_Ctz64: 2024 ty = Ity_I64; 2025 xorOp = Iop_Xor64; 2026 subOp = Iop_Sub64; 2027 andOp = Iop_And64; 2028 one = mkU64(1); 2029 break; 2030 default: 2031 ppIROp(czop); 2032 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes"); 2033 } 2034 2035 // improver = atom ^ (atom - 1) 2036 // 2037 // That is, improver has its low ctz(atom) bits equal to one; 2038 // higher bits (if any) equal to zero. 2039 improver = assignNew('V', mce,ty, 2040 binop(xorOp, 2041 atom, 2042 assignNew('V', mce, ty, 2043 binop(subOp, atom, one)))); 2044 2045 // improved = vatom & improver 2046 // 2047 // That is, treat any V bits above the first ctz(atom) bits as 2048 // "defined". 2049 improved = assignNew('V', mce, ty, 2050 binop(andOp, vatom, improver)); 2051 2052 // Return pessimizing cast of improved. 2053 return mkPCastTo(mce, ty, improved); 2054 } 2055 2056 2057 /*------------------------------------------------------------*/ 2058 /*--- Scalar shifts. ---*/ 2059 /*------------------------------------------------------------*/ 2060 2061 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 2062 idea is to shift the definedness bits by the original shift amount. 2063 This introduces 0s ("defined") in new positions for left shifts and 2064 unsigned right shifts, and copies the top definedness bit for 2065 signed right shifts. So, conveniently, applying the original shift 2066 operator to the definedness bits for the left arg is exactly the 2067 right thing to do: 2068 2069 (qaa << bb) 2070 2071 However if the shift amount is undefined then the whole result 2072 is undefined. Hence need: 2073 2074 (qaa << bb) `UifU` PCast(qbb) 2075 2076 If the shift amount bb is a literal than qbb will say 'all defined' 2077 and the UifU and PCast will get folded out by post-instrumentation 2078 optimisation. 2079 */ 2080 static IRAtom* scalarShift ( MCEnv* mce, 2081 IRType ty, 2082 IROp original_op, 2083 IRAtom* qaa, IRAtom* qbb, 2084 IRAtom* aa, IRAtom* bb ) 2085 { 2086 tl_assert(isShadowAtom(mce,qaa)); 2087 tl_assert(isShadowAtom(mce,qbb)); 2088 tl_assert(isOriginalAtom(mce,aa)); 2089 tl_assert(isOriginalAtom(mce,bb)); 2090 tl_assert(sameKindedAtoms(qaa,aa)); 2091 tl_assert(sameKindedAtoms(qbb,bb)); 2092 return 2093 assignNew( 2094 'V', mce, ty, 2095 mkUifU( mce, ty, 2096 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 2097 mkPCastTo(mce, ty, qbb) 2098 ) 2099 ); 2100 } 2101 2102 2103 /*------------------------------------------------------------*/ 2104 /*--- Helpers for dealing with vector primops. ---*/ 2105 /*------------------------------------------------------------*/ 2106 2107 /* Vector pessimisation -- pessimise within each lane individually. */ 2108 2109 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 2110 { 2111 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 2112 } 2113 2114 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 2115 { 2116 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 2117 } 2118 2119 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 2120 { 2121 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 2122 } 2123 2124 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 2125 { 2126 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 2127 } 2128 2129 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at ) 2130 { 2131 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at)); 2132 } 2133 2134 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at ) 2135 { 2136 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at)); 2137 } 2138 2139 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 2140 { 2141 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 2142 } 2143 2144 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at ) 2145 { 2146 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at)); 2147 } 2148 2149 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 2150 { 2151 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 2152 } 2153 2154 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at ) 2155 { 2156 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at)); 2157 } 2158 2159 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 2160 { 2161 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 2162 } 2163 2164 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 2165 { 2166 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 2167 } 2168 2169 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 2170 { 2171 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 2172 } 2173 2174 2175 /* Here's a simple scheme capable of handling ops derived from SSE1 2176 code and while only generating ops that can be efficiently 2177 implemented in SSE1. */ 2178 2179 /* All-lanes versions are straightforward: 2180 2181 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 2182 2183 unary32Fx4(x,y) ==> PCast32x4(x#) 2184 2185 Lowest-lane-only versions are more complex: 2186 2187 binary32F0x4(x,y) ==> SetV128lo32( 2188 x#, 2189 PCast32(V128to32(UifUV128(x#,y#))) 2190 ) 2191 2192 This is perhaps not so obvious. In particular, it's faster to 2193 do a V128-bit UifU and then take the bottom 32 bits than the more 2194 obvious scheme of taking the bottom 32 bits of each operand 2195 and doing a 32-bit UifU. Basically since UifU is fast and 2196 chopping lanes off vector values is slow. 2197 2198 Finally: 2199 2200 unary32F0x4(x) ==> SetV128lo32( 2201 x#, 2202 PCast32(V128to32(x#)) 2203 ) 2204 2205 Where: 2206 2207 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 2208 PCast32x4(v#) = CmpNEZ32x4(v#) 2209 */ 2210 2211 static 2212 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2213 { 2214 IRAtom* at; 2215 tl_assert(isShadowAtom(mce, vatomX)); 2216 tl_assert(isShadowAtom(mce, vatomY)); 2217 at = mkUifUV128(mce, vatomX, vatomY); 2218 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 2219 return at; 2220 } 2221 2222 static 2223 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2224 { 2225 IRAtom* at; 2226 tl_assert(isShadowAtom(mce, vatomX)); 2227 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 2228 return at; 2229 } 2230 2231 static 2232 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2233 { 2234 IRAtom* at; 2235 tl_assert(isShadowAtom(mce, vatomX)); 2236 tl_assert(isShadowAtom(mce, vatomY)); 2237 at = mkUifUV128(mce, vatomX, vatomY); 2238 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 2239 at = mkPCastTo(mce, Ity_I32, at); 2240 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2241 return at; 2242 } 2243 2244 static 2245 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 2246 { 2247 IRAtom* at; 2248 tl_assert(isShadowAtom(mce, vatomX)); 2249 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 2250 at = mkPCastTo(mce, Ity_I32, at); 2251 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2252 return at; 2253 } 2254 2255 /* --- ... and ... 64Fx2 versions of the same ... --- */ 2256 2257 static 2258 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2259 { 2260 IRAtom* at; 2261 tl_assert(isShadowAtom(mce, vatomX)); 2262 tl_assert(isShadowAtom(mce, vatomY)); 2263 at = mkUifUV128(mce, vatomX, vatomY); 2264 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 2265 return at; 2266 } 2267 2268 static 2269 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2270 { 2271 IRAtom* at; 2272 tl_assert(isShadowAtom(mce, vatomX)); 2273 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 2274 return at; 2275 } 2276 2277 static 2278 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2279 { 2280 IRAtom* at; 2281 tl_assert(isShadowAtom(mce, vatomX)); 2282 tl_assert(isShadowAtom(mce, vatomY)); 2283 at = mkUifUV128(mce, vatomX, vatomY); 2284 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 2285 at = mkPCastTo(mce, Ity_I64, at); 2286 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2287 return at; 2288 } 2289 2290 static 2291 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 2292 { 2293 IRAtom* at; 2294 tl_assert(isShadowAtom(mce, vatomX)); 2295 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 2296 at = mkPCastTo(mce, Ity_I64, at); 2297 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2298 return at; 2299 } 2300 2301 /* --- --- ... and ... 32Fx2 versions of the same --- --- */ 2302 2303 static 2304 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2305 { 2306 IRAtom* at; 2307 tl_assert(isShadowAtom(mce, vatomX)); 2308 tl_assert(isShadowAtom(mce, vatomY)); 2309 at = mkUifU64(mce, vatomX, vatomY); 2310 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 2311 return at; 2312 } 2313 2314 static 2315 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2316 { 2317 IRAtom* at; 2318 tl_assert(isShadowAtom(mce, vatomX)); 2319 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 2320 return at; 2321 } 2322 2323 /* --- ... and ... 64Fx4 versions of the same ... --- */ 2324 2325 static 2326 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2327 { 2328 IRAtom* at; 2329 tl_assert(isShadowAtom(mce, vatomX)); 2330 tl_assert(isShadowAtom(mce, vatomY)); 2331 at = mkUifUV256(mce, vatomX, vatomY); 2332 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at)); 2333 return at; 2334 } 2335 2336 static 2337 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2338 { 2339 IRAtom* at; 2340 tl_assert(isShadowAtom(mce, vatomX)); 2341 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX)); 2342 return at; 2343 } 2344 2345 /* --- ... and ... 32Fx8 versions of the same ... --- */ 2346 2347 static 2348 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2349 { 2350 IRAtom* at; 2351 tl_assert(isShadowAtom(mce, vatomX)); 2352 tl_assert(isShadowAtom(mce, vatomY)); 2353 at = mkUifUV256(mce, vatomX, vatomY); 2354 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at)); 2355 return at; 2356 } 2357 2358 static 2359 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX ) 2360 { 2361 IRAtom* at; 2362 tl_assert(isShadowAtom(mce, vatomX)); 2363 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX)); 2364 return at; 2365 } 2366 2367 /* --- 64Fx2 binary FP ops, with rounding mode --- */ 2368 2369 static 2370 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, 2371 IRAtom* vatomX, IRAtom* vatomY ) 2372 { 2373 /* This is the same as binary64Fx2, except that we subsequently 2374 pessimise vRM (definedness of the rounding mode), widen to 128 2375 bits and UifU it into the result. As with the scalar cases, if 2376 the RM is a constant then it is defined and so this extra bit 2377 will get constant-folded out later. */ 2378 // "do" the vector args 2379 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY); 2380 // PCast the RM, and widen it to 128 bits 2381 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2382 // Roll it into the result 2383 t1 = mkUifUV128(mce, t1, t2); 2384 return t1; 2385 } 2386 2387 /* --- ... and ... 32Fx4 versions of the same --- */ 2388 2389 static 2390 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, 2391 IRAtom* vatomX, IRAtom* vatomY ) 2392 { 2393 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY); 2394 // PCast the RM, and widen it to 128 bits 2395 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2396 // Roll it into the result 2397 t1 = mkUifUV128(mce, t1, t2); 2398 return t1; 2399 } 2400 2401 /* --- ... and ... 64Fx4 versions of the same --- */ 2402 2403 static 2404 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, 2405 IRAtom* vatomX, IRAtom* vatomY ) 2406 { 2407 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY); 2408 // PCast the RM, and widen it to 256 bits 2409 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); 2410 // Roll it into the result 2411 t1 = mkUifUV256(mce, t1, t2); 2412 return t1; 2413 } 2414 2415 /* --- ... and ... 32Fx8 versions of the same --- */ 2416 2417 static 2418 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM, 2419 IRAtom* vatomX, IRAtom* vatomY ) 2420 { 2421 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY); 2422 // PCast the RM, and widen it to 256 bits 2423 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); 2424 // Roll it into the result 2425 t1 = mkUifUV256(mce, t1, t2); 2426 return t1; 2427 } 2428 2429 /* --- 64Fx2 unary FP ops, with rounding mode --- */ 2430 2431 static 2432 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX ) 2433 { 2434 /* Same scheme as binary64Fx2_w_rm. */ 2435 // "do" the vector arg 2436 IRAtom* t1 = unary64Fx2(mce, vatomX); 2437 // PCast the RM, and widen it to 128 bits 2438 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2439 // Roll it into the result 2440 t1 = mkUifUV128(mce, t1, t2); 2441 return t1; 2442 } 2443 2444 /* --- ... and ... 32Fx4 versions of the same --- */ 2445 2446 static 2447 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX ) 2448 { 2449 /* Same scheme as unary32Fx4_w_rm. */ 2450 IRAtom* t1 = unary32Fx4(mce, vatomX); 2451 // PCast the RM, and widen it to 128 bits 2452 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2453 // Roll it into the result 2454 t1 = mkUifUV128(mce, t1, t2); 2455 return t1; 2456 } 2457 2458 2459 /* --- --- Vector saturated narrowing --- --- */ 2460 2461 /* We used to do something very clever here, but on closer inspection 2462 (2011-Jun-15), and in particular bug #279698, it turns out to be 2463 wrong. Part of the problem came from the fact that for a long 2464 time, the IR primops to do with saturated narrowing were 2465 underspecified and managed to confuse multiple cases which needed 2466 to be separate: the op names had a signedness qualifier, but in 2467 fact the source and destination signednesses needed to be specified 2468 independently, so the op names really need two independent 2469 signedness specifiers. 2470 2471 As of 2011-Jun-15 (ish) the underspecification was sorted out 2472 properly. The incorrect instrumentation remained, though. That 2473 has now (2011-Oct-22) been fixed. 2474 2475 What we now do is simple: 2476 2477 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a 2478 number of lanes, X is the source lane width and signedness, and Y 2479 is the destination lane width and signedness. In all cases the 2480 destination lane width is half the source lane width, so the names 2481 have a bit of redundancy, but are at least easy to read. 2482 2483 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s 2484 to unsigned 16s. 2485 2486 Let Vanilla(OP) be a function that takes OP, one of these 2487 saturating narrowing ops, and produces the same "shaped" narrowing 2488 op which is not saturating, but merely dumps the most significant 2489 bits. "same shape" means that the lane numbers and widths are the 2490 same as with OP. 2491 2492 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8) 2493 = Iop_NarrowBin32to16x8, 2494 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by 2495 dumping the top half of each lane. 2496 2497 So, with that in place, the scheme is simple, and it is simple to 2498 pessimise each lane individually and then apply Vanilla(OP) so as 2499 to get the result in the right "shape". If the original OP is 2500 QNarrowBinXtoYxZ then we produce 2501 2502 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) ) 2503 2504 or for the case when OP is unary (Iop_QNarrowUn*) 2505 2506 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) ) 2507 */ 2508 static 2509 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp ) 2510 { 2511 switch (qnarrowOp) { 2512 /* Binary: (128, 128) -> 128 */ 2513 case Iop_QNarrowBin16Sto8Ux16: 2514 case Iop_QNarrowBin16Sto8Sx16: 2515 case Iop_QNarrowBin16Uto8Ux16: 2516 case Iop_QNarrowBin64Sto32Sx4: 2517 case Iop_QNarrowBin64Uto32Ux4: 2518 return Iop_NarrowBin16to8x16; 2519 case Iop_QNarrowBin32Sto16Ux8: 2520 case Iop_QNarrowBin32Sto16Sx8: 2521 case Iop_QNarrowBin32Uto16Ux8: 2522 return Iop_NarrowBin32to16x8; 2523 /* Binary: (64, 64) -> 64 */ 2524 case Iop_QNarrowBin32Sto16Sx4: 2525 return Iop_NarrowBin32to16x4; 2526 case Iop_QNarrowBin16Sto8Ux8: 2527 case Iop_QNarrowBin16Sto8Sx8: 2528 return Iop_NarrowBin16to8x8; 2529 /* Unary: 128 -> 64 */ 2530 case Iop_QNarrowUn64Uto32Ux2: 2531 case Iop_QNarrowUn64Sto32Sx2: 2532 case Iop_QNarrowUn64Sto32Ux2: 2533 return Iop_NarrowUn64to32x2; 2534 case Iop_QNarrowUn32Uto16Ux4: 2535 case Iop_QNarrowUn32Sto16Sx4: 2536 case Iop_QNarrowUn32Sto16Ux4: 2537 case Iop_F32toF16x4: 2538 return Iop_NarrowUn32to16x4; 2539 case Iop_QNarrowUn16Uto8Ux8: 2540 case Iop_QNarrowUn16Sto8Sx8: 2541 case Iop_QNarrowUn16Sto8Ux8: 2542 return Iop_NarrowUn16to8x8; 2543 default: 2544 ppIROp(qnarrowOp); 2545 VG_(tool_panic)("vanillaNarrowOpOfShape"); 2546 } 2547 } 2548 2549 static 2550 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op, 2551 IRAtom* vatom1, IRAtom* vatom2) 2552 { 2553 IRAtom *at1, *at2, *at3; 2554 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2555 switch (narrow_op) { 2556 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break; 2557 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break; 2558 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 2559 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break; 2560 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break; 2561 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 2562 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break; 2563 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 2564 default: VG_(tool_panic)("vectorNarrowBinV128"); 2565 } 2566 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2567 tl_assert(isShadowAtom(mce,vatom1)); 2568 tl_assert(isShadowAtom(mce,vatom2)); 2569 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2570 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 2571 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2)); 2572 return at3; 2573 } 2574 2575 static 2576 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op, 2577 IRAtom* vatom1, IRAtom* vatom2) 2578 { 2579 IRAtom *at1, *at2, *at3; 2580 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2581 switch (narrow_op) { 2582 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break; 2583 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break; 2584 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break; 2585 default: VG_(tool_panic)("vectorNarrowBin64"); 2586 } 2587 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2588 tl_assert(isShadowAtom(mce,vatom1)); 2589 tl_assert(isShadowAtom(mce,vatom2)); 2590 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 2591 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 2592 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2)); 2593 return at3; 2594 } 2595 2596 static 2597 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op, 2598 IRAtom* vatom1) 2599 { 2600 IRAtom *at1, *at2; 2601 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2602 tl_assert(isShadowAtom(mce,vatom1)); 2603 /* For vanilla narrowing (non-saturating), we can just apply 2604 the op directly to the V bits. */ 2605 switch (narrow_op) { 2606 case Iop_NarrowUn16to8x8: 2607 case Iop_NarrowUn32to16x4: 2608 case Iop_NarrowUn64to32x2: 2609 case Iop_F32toF16x4: 2610 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1)); 2611 return at1; 2612 default: 2613 break; /* Do Plan B */ 2614 } 2615 /* Plan B: for ops that involve a saturation operation on the args, 2616 we must PCast before the vanilla narrow. */ 2617 switch (narrow_op) { 2618 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break; 2619 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break; 2620 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break; 2621 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break; 2622 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break; 2623 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break; 2624 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break; 2625 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break; 2626 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break; 2627 default: VG_(tool_panic)("vectorNarrowUnV128"); 2628 } 2629 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2630 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2631 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1)); 2632 return at2; 2633 } 2634 2635 static 2636 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op, 2637 IRAtom* vatom1) 2638 { 2639 IRAtom *at1, *at2; 2640 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2641 switch (longen_op) { 2642 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break; 2643 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break; 2644 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break; 2645 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break; 2646 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break; 2647 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break; 2648 case Iop_F16toF32x4: pcast = mkPCast32x4; break; 2649 default: VG_(tool_panic)("vectorWidenI64"); 2650 } 2651 tl_assert(isShadowAtom(mce,vatom1)); 2652 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 2653 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2654 return at2; 2655 } 2656 2657 2658 /* --- --- Vector integer arithmetic --- --- */ 2659 2660 /* Simple ... UifU the args and per-lane pessimise the results. */ 2661 2662 /* --- V256-bit versions --- */ 2663 2664 static 2665 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2666 { 2667 IRAtom* at; 2668 at = mkUifUV256(mce, vatom1, vatom2); 2669 at = mkPCast8x32(mce, at); 2670 return at; 2671 } 2672 2673 static 2674 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2675 { 2676 IRAtom* at; 2677 at = mkUifUV256(mce, vatom1, vatom2); 2678 at = mkPCast16x16(mce, at); 2679 return at; 2680 } 2681 2682 static 2683 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2684 { 2685 IRAtom* at; 2686 at = mkUifUV256(mce, vatom1, vatom2); 2687 at = mkPCast32x8(mce, at); 2688 return at; 2689 } 2690 2691 static 2692 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2693 { 2694 IRAtom* at; 2695 at = mkUifUV256(mce, vatom1, vatom2); 2696 at = mkPCast64x4(mce, at); 2697 return at; 2698 } 2699 2700 /* --- V128-bit versions --- */ 2701 2702 static 2703 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2704 { 2705 IRAtom* at; 2706 at = mkUifUV128(mce, vatom1, vatom2); 2707 at = mkPCast8x16(mce, at); 2708 return at; 2709 } 2710 2711 static 2712 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2713 { 2714 IRAtom* at; 2715 at = mkUifUV128(mce, vatom1, vatom2); 2716 at = mkPCast16x8(mce, at); 2717 return at; 2718 } 2719 2720 static 2721 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2722 { 2723 IRAtom* at; 2724 at = mkUifUV128(mce, vatom1, vatom2); 2725 at = mkPCast32x4(mce, at); 2726 return at; 2727 } 2728 2729 static 2730 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2731 { 2732 IRAtom* at; 2733 at = mkUifUV128(mce, vatom1, vatom2); 2734 at = mkPCast64x2(mce, at); 2735 return at; 2736 } 2737 2738 /* --- 64-bit versions --- */ 2739 2740 static 2741 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2742 { 2743 IRAtom* at; 2744 at = mkUifU64(mce, vatom1, vatom2); 2745 at = mkPCast8x8(mce, at); 2746 return at; 2747 } 2748 2749 static 2750 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2751 { 2752 IRAtom* at; 2753 at = mkUifU64(mce, vatom1, vatom2); 2754 at = mkPCast16x4(mce, at); 2755 return at; 2756 } 2757 2758 static 2759 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2760 { 2761 IRAtom* at; 2762 at = mkUifU64(mce, vatom1, vatom2); 2763 at = mkPCast32x2(mce, at); 2764 return at; 2765 } 2766 2767 static 2768 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2769 { 2770 IRAtom* at; 2771 at = mkUifU64(mce, vatom1, vatom2); 2772 at = mkPCastTo(mce, Ity_I64, at); 2773 return at; 2774 } 2775 2776 /* --- 32-bit versions --- */ 2777 2778 static 2779 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2780 { 2781 IRAtom* at; 2782 at = mkUifU32(mce, vatom1, vatom2); 2783 at = mkPCast8x4(mce, at); 2784 return at; 2785 } 2786 2787 static 2788 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2789 { 2790 IRAtom* at; 2791 at = mkUifU32(mce, vatom1, vatom2); 2792 at = mkPCast16x2(mce, at); 2793 return at; 2794 } 2795 2796 2797 /*------------------------------------------------------------*/ 2798 /*--- Generate shadow values from all kinds of IRExprs. ---*/ 2799 /*------------------------------------------------------------*/ 2800 2801 static 2802 IRAtom* expr2vbits_Qop ( MCEnv* mce, 2803 IROp op, 2804 IRAtom* atom1, IRAtom* atom2, 2805 IRAtom* atom3, IRAtom* atom4 ) 2806 { 2807 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2808 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2809 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2810 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2811 2812 tl_assert(isOriginalAtom(mce,atom1)); 2813 tl_assert(isOriginalAtom(mce,atom2)); 2814 tl_assert(isOriginalAtom(mce,atom3)); 2815 tl_assert(isOriginalAtom(mce,atom4)); 2816 tl_assert(isShadowAtom(mce,vatom1)); 2817 tl_assert(isShadowAtom(mce,vatom2)); 2818 tl_assert(isShadowAtom(mce,vatom3)); 2819 tl_assert(isShadowAtom(mce,vatom4)); 2820 tl_assert(sameKindedAtoms(atom1,vatom1)); 2821 tl_assert(sameKindedAtoms(atom2,vatom2)); 2822 tl_assert(sameKindedAtoms(atom3,vatom3)); 2823 tl_assert(sameKindedAtoms(atom4,vatom4)); 2824 switch (op) { 2825 case Iop_MAddF64: 2826 case Iop_MAddF64r32: 2827 case Iop_MSubF64: 2828 case Iop_MSubF64r32: 2829 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2830 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2831 2832 case Iop_MAddF32: 2833 case Iop_MSubF32: 2834 /* I32(rm) x F32 x F32 x F32 -> F32 */ 2835 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4); 2836 2837 case Iop_MAddF128: 2838 case Iop_MSubF128: 2839 case Iop_NegMAddF128: 2840 case Iop_NegMSubF128: 2841 /* I32(rm) x F128 x F128 x F128 -> F128 */ 2842 return mkLazy4(mce, Ity_I128, vatom1, vatom2, vatom3, vatom4); 2843 2844 /* V256-bit data-steering */ 2845 case Iop_64x4toV256: 2846 return assignNew('V', mce, Ity_V256, 2847 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4)); 2848 2849 default: 2850 ppIROp(op); 2851 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2852 } 2853 } 2854 2855 2856 static 2857 IRAtom* expr2vbits_Triop ( MCEnv* mce, 2858 IROp op, 2859 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2860 { 2861 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2862 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2863 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2864 2865 tl_assert(isOriginalAtom(mce,atom1)); 2866 tl_assert(isOriginalAtom(mce,atom2)); 2867 tl_assert(isOriginalAtom(mce,atom3)); 2868 tl_assert(isShadowAtom(mce,vatom1)); 2869 tl_assert(isShadowAtom(mce,vatom2)); 2870 tl_assert(isShadowAtom(mce,vatom3)); 2871 tl_assert(sameKindedAtoms(atom1,vatom1)); 2872 tl_assert(sameKindedAtoms(atom2,vatom2)); 2873 tl_assert(sameKindedAtoms(atom3,vatom3)); 2874 switch (op) { 2875 case Iop_AddF128: 2876 case Iop_SubF128: 2877 case Iop_MulF128: 2878 case Iop_DivF128: 2879 case Iop_AddD128: 2880 case Iop_SubD128: 2881 case Iop_MulD128: 2882 case Iop_DivD128: 2883 case Iop_QuantizeD128: 2884 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */ 2885 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2886 case Iop_AddF64: 2887 case Iop_AddD64: 2888 case Iop_AddF64r32: 2889 case Iop_SubF64: 2890 case Iop_SubD64: 2891 case Iop_SubF64r32: 2892 case Iop_MulF64: 2893 case Iop_MulD64: 2894 case Iop_MulF64r32: 2895 case Iop_DivF64: 2896 case Iop_DivD64: 2897 case Iop_DivF64r32: 2898 case Iop_ScaleF64: 2899 case Iop_Yl2xF64: 2900 case Iop_Yl2xp1F64: 2901 case Iop_AtanF64: 2902 case Iop_PRemF64: 2903 case Iop_PRem1F64: 2904 case Iop_QuantizeD64: 2905 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */ 2906 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2907 case Iop_PRemC3210F64: 2908 case Iop_PRem1C3210F64: 2909 /* I32(rm) x F64 x F64 -> I32 */ 2910 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2911 case Iop_AddF32: 2912 case Iop_SubF32: 2913 case Iop_MulF32: 2914 case Iop_DivF32: 2915 /* I32(rm) x F32 x F32 -> I32 */ 2916 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2917 case Iop_SignificanceRoundD64: 2918 /* IRRoundingMode(I32) x I8 x D64 -> D64 */ 2919 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2920 case Iop_SignificanceRoundD128: 2921 /* IRRoundingMode(I32) x I8 x D128 -> D128 */ 2922 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2923 case Iop_SliceV128: 2924 /* (V128, V128, I8) -> V128 */ 2925 complainIfUndefined(mce, atom3, NULL); 2926 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2927 case Iop_Slice64: 2928 /* (I64, I64, I8) -> I64 */ 2929 complainIfUndefined(mce, atom3, NULL); 2930 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2931 case Iop_SetElem8x8: 2932 case Iop_SetElem16x4: 2933 case Iop_SetElem32x2: 2934 complainIfUndefined(mce, atom2, NULL); 2935 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2936 2937 /* Vector FP with rounding mode as the first arg */ 2938 case Iop_Add64Fx2: 2939 case Iop_Sub64Fx2: 2940 case Iop_Mul64Fx2: 2941 case Iop_Div64Fx2: 2942 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3); 2943 2944 case Iop_Add32Fx4: 2945 case Iop_Sub32Fx4: 2946 case Iop_Mul32Fx4: 2947 case Iop_Div32Fx4: 2948 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3); 2949 2950 case Iop_Add64Fx4: 2951 case Iop_Sub64Fx4: 2952 case Iop_Mul64Fx4: 2953 case Iop_Div64Fx4: 2954 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3); 2955 2956 case Iop_Add32Fx8: 2957 case Iop_Sub32Fx8: 2958 case Iop_Mul32Fx8: 2959 case Iop_Div32Fx8: 2960 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3); 2961 2962 default: 2963 ppIROp(op); 2964 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2965 } 2966 } 2967 2968 2969 static 2970 IRAtom* expr2vbits_Binop ( MCEnv* mce, 2971 IROp op, 2972 IRAtom* atom1, IRAtom* atom2 ) 2973 { 2974 IRType and_or_ty; 2975 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2976 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2977 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2978 2979 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2980 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2981 2982 tl_assert(isOriginalAtom(mce,atom1)); 2983 tl_assert(isOriginalAtom(mce,atom2)); 2984 tl_assert(isShadowAtom(mce,vatom1)); 2985 tl_assert(isShadowAtom(mce,vatom2)); 2986 tl_assert(sameKindedAtoms(atom1,vatom1)); 2987 tl_assert(sameKindedAtoms(atom2,vatom2)); 2988 switch (op) { 2989 2990 /* 32-bit SIMD */ 2991 2992 case Iop_Add16x2: 2993 case Iop_HAdd16Ux2: 2994 case Iop_HAdd16Sx2: 2995 case Iop_Sub16x2: 2996 case Iop_HSub16Ux2: 2997 case Iop_HSub16Sx2: 2998 case Iop_QAdd16Sx2: 2999 case Iop_QSub16Sx2: 3000 case Iop_QSub16Ux2: 3001 case Iop_QAdd16Ux2: 3002 return binary16Ix2(mce, vatom1, vatom2); 3003 3004 case Iop_Add8x4: 3005 case Iop_HAdd8Ux4: 3006 case Iop_HAdd8Sx4: 3007 case Iop_Sub8x4: 3008 case Iop_HSub8Ux4: 3009 case Iop_HSub8Sx4: 3010 case Iop_QSub8Ux4: 3011 case Iop_QAdd8Ux4: 3012 case Iop_QSub8Sx4: 3013 case Iop_QAdd8Sx4: 3014 return binary8Ix4(mce, vatom1, vatom2); 3015 3016 /* 64-bit SIMD */ 3017 3018 case Iop_ShrN8x8: 3019 case Iop_ShrN16x4: 3020 case Iop_ShrN32x2: 3021 case Iop_SarN8x8: 3022 case Iop_SarN16x4: 3023 case Iop_SarN32x2: 3024 case Iop_ShlN16x4: 3025 case Iop_ShlN32x2: 3026 case Iop_ShlN8x8: 3027 /* Same scheme as with all other shifts. */ 3028 complainIfUndefined(mce, atom2, NULL); 3029 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 3030 3031 case Iop_QNarrowBin32Sto16Sx4: 3032 case Iop_QNarrowBin16Sto8Sx8: 3033 case Iop_QNarrowBin16Sto8Ux8: 3034 return vectorNarrowBin64(mce, op, vatom1, vatom2); 3035 3036 case Iop_Min8Ux8: 3037 case Iop_Min8Sx8: 3038 case Iop_Max8Ux8: 3039 case Iop_Max8Sx8: 3040 case Iop_Avg8Ux8: 3041 case Iop_QSub8Sx8: 3042 case Iop_QSub8Ux8: 3043 case Iop_Sub8x8: 3044 case Iop_CmpGT8Sx8: 3045 case Iop_CmpGT8Ux8: 3046 case Iop_CmpEQ8x8: 3047 case Iop_QAdd8Sx8: 3048 case Iop_QAdd8Ux8: 3049 case Iop_QSal8x8: 3050 case Iop_QShl8x8: 3051 case Iop_Add8x8: 3052 case Iop_Mul8x8: 3053 case Iop_PolynomialMul8x8: 3054 return binary8Ix8(mce, vatom1, vatom2); 3055 3056 case Iop_Min16Sx4: 3057 case Iop_Min16Ux4: 3058 case Iop_Max16Sx4: 3059 case Iop_Max16Ux4: 3060 case Iop_Avg16Ux4: 3061 case Iop_QSub16Ux4: 3062 case Iop_QSub16Sx4: 3063 case Iop_Sub16x4: 3064 case Iop_Mul16x4: 3065 case Iop_MulHi16Sx4: 3066 case Iop_MulHi16Ux4: 3067 case Iop_CmpGT16Sx4: 3068 case Iop_CmpGT16Ux4: 3069 case Iop_CmpEQ16x4: 3070 case Iop_QAdd16Sx4: 3071 case Iop_QAdd16Ux4: 3072 case Iop_QSal16x4: 3073 case Iop_QShl16x4: 3074 case Iop_Add16x4: 3075 case Iop_QDMulHi16Sx4: 3076 case Iop_QRDMulHi16Sx4: 3077 return binary16Ix4(mce, vatom1, vatom2); 3078 3079 case Iop_Sub32x2: 3080 case Iop_Mul32x2: 3081 case Iop_Max32Sx2: 3082 case Iop_Max32Ux2: 3083 case Iop_Min32Sx2: 3084 case Iop_Min32Ux2: 3085 case Iop_CmpGT32Sx2: 3086 case Iop_CmpGT32Ux2: 3087 case Iop_CmpEQ32x2: 3088 case Iop_Add32x2: 3089 case Iop_QAdd32Ux2: 3090 case Iop_QAdd32Sx2: 3091 case Iop_QSub32Ux2: 3092 case Iop_QSub32Sx2: 3093 case Iop_QSal32x2: 3094 case Iop_QShl32x2: 3095 case Iop_QDMulHi32Sx2: 3096 case Iop_QRDMulHi32Sx2: 3097 return binary32Ix2(mce, vatom1, vatom2); 3098 3099 case Iop_QSub64Ux1: 3100 case Iop_QSub64Sx1: 3101 case Iop_QAdd64Ux1: 3102 case Iop_QAdd64Sx1: 3103 case Iop_QSal64x1: 3104 case Iop_QShl64x1: 3105 case Iop_Sal64x1: 3106 return binary64Ix1(mce, vatom1, vatom2); 3107 3108 case Iop_QShlNsatSU8x8: 3109 case Iop_QShlNsatUU8x8: 3110 case Iop_QShlNsatSS8x8: 3111 complainIfUndefined(mce, atom2, NULL); 3112 return mkPCast8x8(mce, vatom1); 3113 3114 case Iop_QShlNsatSU16x4: 3115 case Iop_QShlNsatUU16x4: 3116 case Iop_QShlNsatSS16x4: 3117 complainIfUndefined(mce, atom2, NULL); 3118 return mkPCast16x4(mce, vatom1); 3119 3120 case Iop_QShlNsatSU32x2: 3121 case Iop_QShlNsatUU32x2: 3122 case Iop_QShlNsatSS32x2: 3123 complainIfUndefined(mce, atom2, NULL); 3124 return mkPCast32x2(mce, vatom1); 3125 3126 case Iop_QShlNsatSU64x1: 3127 case Iop_QShlNsatUU64x1: 3128 case Iop_QShlNsatSS64x1: 3129 complainIfUndefined(mce, atom2, NULL); 3130 return mkPCast32x2(mce, vatom1); 3131 3132 case Iop_PwMax32Sx2: 3133 case Iop_PwMax32Ux2: 3134 case Iop_PwMin32Sx2: 3135 case Iop_PwMin32Ux2: 3136 case Iop_PwMax32Fx2: 3137 case Iop_PwMin32Fx2: 3138 return assignNew('V', mce, Ity_I64, 3139 binop(Iop_PwMax32Ux2, 3140 mkPCast32x2(mce, vatom1), 3141 mkPCast32x2(mce, vatom2))); 3142 3143 case Iop_PwMax16Sx4: 3144 case Iop_PwMax16Ux4: 3145 case Iop_PwMin16Sx4: 3146 case Iop_PwMin16Ux4: 3147 return assignNew('V', mce, Ity_I64, 3148 binop(Iop_PwMax16Ux4, 3149 mkPCast16x4(mce, vatom1), 3150 mkPCast16x4(mce, vatom2))); 3151 3152 case Iop_PwMax8Sx8: 3153 case Iop_PwMax8Ux8: 3154 case Iop_PwMin8Sx8: 3155 case Iop_PwMin8Ux8: 3156 return assignNew('V', mce, Ity_I64, 3157 binop(Iop_PwMax8Ux8, 3158 mkPCast8x8(mce, vatom1), 3159 mkPCast8x8(mce, vatom2))); 3160 3161 case Iop_PwAdd32x2: 3162 case Iop_PwAdd32Fx2: 3163 return mkPCast32x2(mce, 3164 assignNew('V', mce, Ity_I64, 3165 binop(Iop_PwAdd32x2, 3166 mkPCast32x2(mce, vatom1), 3167 mkPCast32x2(mce, vatom2)))); 3168 3169 case Iop_PwAdd16x4: 3170 return mkPCast16x4(mce, 3171 assignNew('V', mce, Ity_I64, 3172 binop(op, mkPCast16x4(mce, vatom1), 3173 mkPCast16x4(mce, vatom2)))); 3174 3175 case Iop_PwAdd8x8: 3176 return mkPCast8x8(mce, 3177 assignNew('V', mce, Ity_I64, 3178 binop(op, mkPCast8x8(mce, vatom1), 3179 mkPCast8x8(mce, vatom2)))); 3180 3181 case Iop_Shl8x8: 3182 case Iop_Shr8x8: 3183 case Iop_Sar8x8: 3184 case Iop_Sal8x8: 3185 return mkUifU64(mce, 3186 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3187 mkPCast8x8(mce,vatom2) 3188 ); 3189 3190 case Iop_Shl16x4: 3191 case Iop_Shr16x4: 3192 case Iop_Sar16x4: 3193 case Iop_Sal16x4: 3194 return mkUifU64(mce, 3195 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3196 mkPCast16x4(mce,vatom2) 3197 ); 3198 3199 case Iop_Shl32x2: 3200 case Iop_Shr32x2: 3201 case Iop_Sar32x2: 3202 case Iop_Sal32x2: 3203 return mkUifU64(mce, 3204 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3205 mkPCast32x2(mce,vatom2) 3206 ); 3207 3208 /* 64-bit data-steering */ 3209 case Iop_InterleaveLO32x2: 3210 case Iop_InterleaveLO16x4: 3211 case Iop_InterleaveLO8x8: 3212 case Iop_InterleaveHI32x2: 3213 case Iop_InterleaveHI16x4: 3214 case Iop_InterleaveHI8x8: 3215 case Iop_CatOddLanes8x8: 3216 case Iop_CatEvenLanes8x8: 3217 case Iop_CatOddLanes16x4: 3218 case Iop_CatEvenLanes16x4: 3219 case Iop_InterleaveOddLanes8x8: 3220 case Iop_InterleaveEvenLanes8x8: 3221 case Iop_InterleaveOddLanes16x4: 3222 case Iop_InterleaveEvenLanes16x4: 3223 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3224 3225 case Iop_GetElem8x8: 3226 complainIfUndefined(mce, atom2, NULL); 3227 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3228 case Iop_GetElem16x4: 3229 complainIfUndefined(mce, atom2, NULL); 3230 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3231 case Iop_GetElem32x2: 3232 complainIfUndefined(mce, atom2, NULL); 3233 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3234 3235 /* Perm8x8: rearrange values in left arg using steering values 3236 from right arg. So rearrange the vbits in the same way but 3237 pessimise wrt steering values. */ 3238 case Iop_Perm8x8: 3239 return mkUifU64( 3240 mce, 3241 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3242 mkPCast8x8(mce, vatom2) 3243 ); 3244 3245 /* V128-bit SIMD */ 3246 3247 case Iop_Sqrt32Fx4: 3248 return unary32Fx4_w_rm(mce, vatom1, vatom2); 3249 case Iop_Sqrt64Fx2: 3250 return unary64Fx2_w_rm(mce, vatom1, vatom2); 3251 3252 case Iop_ShrN8x16: 3253 case Iop_ShrN16x8: 3254 case Iop_ShrN32x4: 3255 case Iop_ShrN64x2: 3256 case Iop_SarN8x16: 3257 case Iop_SarN16x8: 3258 case Iop_SarN32x4: 3259 case Iop_SarN64x2: 3260 case Iop_ShlN8x16: 3261 case Iop_ShlN16x8: 3262 case Iop_ShlN32x4: 3263 case Iop_ShlN64x2: 3264 /* Same scheme as with all other shifts. Note: 22 Oct 05: 3265 this is wrong now, scalar shifts are done properly lazily. 3266 Vector shifts should be fixed too. */ 3267 complainIfUndefined(mce, atom2, NULL); 3268 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3269 3270 /* V x V shifts/rotates are done using the standard lazy scheme. */ 3271 /* For the non-rounding variants of bi-di vector x vector 3272 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme. 3273 But note that this is overly pessimistic, because in fact only 3274 the bottom 8 bits of each lane of the second argument are taken 3275 into account when shifting. So really we ought to ignore 3276 undefinedness in bits 8 and above of each lane in the 3277 second argument. */ 3278 case Iop_Shl8x16: 3279 case Iop_Shr8x16: 3280 case Iop_Sar8x16: 3281 case Iop_Sal8x16: 3282 case Iop_Rol8x16: 3283 case Iop_Sh8Sx16: 3284 case Iop_Sh8Ux16: 3285 return mkUifUV128(mce, 3286 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3287 mkPCast8x16(mce,vatom2) 3288 ); 3289 3290 case Iop_Shl16x8: 3291 case Iop_Shr16x8: 3292 case Iop_Sar16x8: 3293 case Iop_Sal16x8: 3294 case Iop_Rol16x8: 3295 case Iop_Sh16Sx8: 3296 case Iop_Sh16Ux8: 3297 return mkUifUV128(mce, 3298 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3299 mkPCast16x8(mce,vatom2) 3300 ); 3301 3302 case Iop_Shl32x4: 3303 case Iop_Shr32x4: 3304 case Iop_Sar32x4: 3305 case Iop_Sal32x4: 3306 case Iop_Rol32x4: 3307 case Iop_Sh32Sx4: 3308 case Iop_Sh32Ux4: 3309 return mkUifUV128(mce, 3310 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3311 mkPCast32x4(mce,vatom2) 3312 ); 3313 3314 case Iop_Shl64x2: 3315 case Iop_Shr64x2: 3316 case Iop_Sar64x2: 3317 case Iop_Sal64x2: 3318 case Iop_Rol64x2: 3319 case Iop_Sh64Sx2: 3320 case Iop_Sh64Ux2: 3321 return mkUifUV128(mce, 3322 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3323 mkPCast64x2(mce,vatom2) 3324 ); 3325 3326 /* For the rounding variants of bi-di vector x vector shifts, the 3327 rounding adjustment can cause undefinedness to propagate through 3328 the entire lane, in the worst case. Too complex to handle 3329 properly .. just UifU the arguments and then PCast them. 3330 Suboptimal but safe. */ 3331 case Iop_Rsh8Sx16: 3332 case Iop_Rsh8Ux16: 3333 return binary8Ix16(mce, vatom1, vatom2); 3334 case Iop_Rsh16Sx8: 3335 case Iop_Rsh16Ux8: 3336 return binary16Ix8(mce, vatom1, vatom2); 3337 case Iop_Rsh32Sx4: 3338 case Iop_Rsh32Ux4: 3339 return binary32Ix4(mce, vatom1, vatom2); 3340 case Iop_Rsh64Sx2: 3341 case Iop_Rsh64Ux2: 3342 return binary64Ix2(mce, vatom1, vatom2); 3343 3344 case Iop_F32ToFixed32Ux4_RZ: 3345 case Iop_F32ToFixed32Sx4_RZ: 3346 case Iop_Fixed32UToF32x4_RN: 3347 case Iop_Fixed32SToF32x4_RN: 3348 complainIfUndefined(mce, atom2, NULL); 3349 return mkPCast32x4(mce, vatom1); 3350 3351 case Iop_F32ToFixed32Ux2_RZ: 3352 case Iop_F32ToFixed32Sx2_RZ: 3353 case Iop_Fixed32UToF32x2_RN: 3354 case Iop_Fixed32SToF32x2_RN: 3355 complainIfUndefined(mce, atom2, NULL); 3356 return mkPCast32x2(mce, vatom1); 3357 3358 case Iop_QSub8Ux16: 3359 case Iop_QSub8Sx16: 3360 case Iop_Sub8x16: 3361 case Iop_Min8Ux16: 3362 case Iop_Min8Sx16: 3363 case Iop_Max8Ux16: 3364 case Iop_Max8Sx16: 3365 case Iop_CmpGT8Sx16: 3366 case Iop_CmpGT8Ux16: 3367 case Iop_CmpEQ8x16: 3368 case Iop_Avg8Ux16: 3369 case Iop_Avg8Sx16: 3370 case Iop_QAdd8Ux16: 3371 case Iop_QAdd8Sx16: 3372 case Iop_QAddExtUSsatSS8x16: 3373 case Iop_QAddExtSUsatUU8x16: 3374 case Iop_QSal8x16: 3375 case Iop_QShl8x16: 3376 case Iop_Add8x16: 3377 case Iop_Mul8x16: 3378 case Iop_PolynomialMul8x16: 3379 case Iop_PolynomialMulAdd8x16: 3380 return binary8Ix16(mce, vatom1, vatom2); 3381 3382 case Iop_QSub16Ux8: 3383 case Iop_QSub16Sx8: 3384 case Iop_Sub16x8: 3385 case Iop_Mul16x8: 3386 case Iop_MulHi16Sx8: 3387 case Iop_MulHi16Ux8: 3388 case Iop_Min16Sx8: 3389 case Iop_Min16Ux8: 3390 case Iop_Max16Sx8: 3391 case Iop_Max16Ux8: 3392 case Iop_CmpGT16Sx8: 3393 case Iop_CmpGT16Ux8: 3394 case Iop_CmpEQ16x8: 3395 case Iop_Avg16Ux8: 3396 case Iop_Avg16Sx8: 3397 case Iop_QAdd16Ux8: 3398 case Iop_QAdd16Sx8: 3399 case Iop_QAddExtUSsatSS16x8: 3400 case Iop_QAddExtSUsatUU16x8: 3401 case Iop_QSal16x8: 3402 case Iop_QShl16x8: 3403 case Iop_Add16x8: 3404 case Iop_QDMulHi16Sx8: 3405 case Iop_QRDMulHi16Sx8: 3406 case Iop_PolynomialMulAdd16x8: 3407 return binary16Ix8(mce, vatom1, vatom2); 3408 3409 case Iop_Sub32x4: 3410 case Iop_CmpGT32Sx4: 3411 case Iop_CmpGT32Ux4: 3412 case Iop_CmpEQ32x4: 3413 case Iop_QAdd32Sx4: 3414 case Iop_QAdd32Ux4: 3415 case Iop_QSub32Sx4: 3416 case Iop_QSub32Ux4: 3417 case Iop_QAddExtUSsatSS32x4: 3418 case Iop_QAddExtSUsatUU32x4: 3419 case Iop_QSal32x4: 3420 case Iop_QShl32x4: 3421 case Iop_Avg32Ux4: 3422 case Iop_Avg32Sx4: 3423 case Iop_Add32x4: 3424 case Iop_Max32Ux4: 3425 case Iop_Max32Sx4: 3426 case Iop_Min32Ux4: 3427 case Iop_Min32Sx4: 3428 case Iop_Mul32x4: 3429 case Iop_QDMulHi32Sx4: 3430 case Iop_QRDMulHi32Sx4: 3431 case Iop_PolynomialMulAdd32x4: 3432 return binary32Ix4(mce, vatom1, vatom2); 3433 3434 case Iop_Sub64x2: 3435 case Iop_Add64x2: 3436 case Iop_Max64Sx2: 3437 case Iop_Max64Ux2: 3438 case Iop_Min64Sx2: 3439 case Iop_Min64Ux2: 3440 case Iop_CmpEQ64x2: 3441 case Iop_CmpGT64Sx2: 3442 case Iop_CmpGT64Ux2: 3443 case Iop_QSal64x2: 3444 case Iop_QShl64x2: 3445 case Iop_QAdd64Ux2: 3446 case Iop_QAdd64Sx2: 3447 case Iop_QSub64Ux2: 3448 case Iop_QSub64Sx2: 3449 case Iop_QAddExtUSsatSS64x2: 3450 case Iop_QAddExtSUsatUU64x2: 3451 case Iop_PolynomialMulAdd64x2: 3452 case Iop_CipherV128: 3453 case Iop_CipherLV128: 3454 case Iop_NCipherV128: 3455 case Iop_NCipherLV128: 3456 case Iop_MulI128by10E: 3457 case Iop_MulI128by10ECarry: 3458 return binary64Ix2(mce, vatom1, vatom2); 3459 3460 case Iop_QNarrowBin64Sto32Sx4: 3461 case Iop_QNarrowBin64Uto32Ux4: 3462 case Iop_QNarrowBin32Sto16Sx8: 3463 case Iop_QNarrowBin32Uto16Ux8: 3464 case Iop_QNarrowBin32Sto16Ux8: 3465 case Iop_QNarrowBin16Sto8Sx16: 3466 case Iop_QNarrowBin16Uto8Ux16: 3467 case Iop_QNarrowBin16Sto8Ux16: 3468 return vectorNarrowBinV128(mce, op, vatom1, vatom2); 3469 3470 case Iop_Min64Fx2: 3471 case Iop_Max64Fx2: 3472 case Iop_CmpLT64Fx2: 3473 case Iop_CmpLE64Fx2: 3474 case Iop_CmpEQ64Fx2: 3475 case Iop_CmpUN64Fx2: 3476 case Iop_RecipStep64Fx2: 3477 case Iop_RSqrtStep64Fx2: 3478 return binary64Fx2(mce, vatom1, vatom2); 3479 3480 case Iop_Sub64F0x2: 3481 case Iop_Mul64F0x2: 3482 case Iop_Min64F0x2: 3483 case Iop_Max64F0x2: 3484 case Iop_Div64F0x2: 3485 case Iop_CmpLT64F0x2: 3486 case Iop_CmpLE64F0x2: 3487 case Iop_CmpEQ64F0x2: 3488 case Iop_CmpUN64F0x2: 3489 case Iop_Add64F0x2: 3490 return binary64F0x2(mce, vatom1, vatom2); 3491 3492 case Iop_Min32Fx4: 3493 case Iop_Max32Fx4: 3494 case Iop_CmpLT32Fx4: 3495 case Iop_CmpLE32Fx4: 3496 case Iop_CmpEQ32Fx4: 3497 case Iop_CmpUN32Fx4: 3498 case Iop_CmpGT32Fx4: 3499 case Iop_CmpGE32Fx4: 3500 case Iop_RecipStep32Fx4: 3501 case Iop_RSqrtStep32Fx4: 3502 return binary32Fx4(mce, vatom1, vatom2); 3503 3504 case Iop_Sub32Fx2: 3505 case Iop_Mul32Fx2: 3506 case Iop_Min32Fx2: 3507 case Iop_Max32Fx2: 3508 case Iop_CmpEQ32Fx2: 3509 case Iop_CmpGT32Fx2: 3510 case Iop_CmpGE32Fx2: 3511 case Iop_Add32Fx2: 3512 case Iop_RecipStep32Fx2: 3513 case Iop_RSqrtStep32Fx2: 3514 return binary32Fx2(mce, vatom1, vatom2); 3515 3516 case Iop_Sub32F0x4: 3517 case Iop_Mul32F0x4: 3518 case Iop_Min32F0x4: 3519 case Iop_Max32F0x4: 3520 case Iop_Div32F0x4: 3521 case Iop_CmpLT32F0x4: 3522 case Iop_CmpLE32F0x4: 3523 case Iop_CmpEQ32F0x4: 3524 case Iop_CmpUN32F0x4: 3525 case Iop_Add32F0x4: 3526 return binary32F0x4(mce, vatom1, vatom2); 3527 3528 case Iop_QShlNsatSU8x16: 3529 case Iop_QShlNsatUU8x16: 3530 case Iop_QShlNsatSS8x16: 3531 complainIfUndefined(mce, atom2, NULL); 3532 return mkPCast8x16(mce, vatom1); 3533 3534 case Iop_QShlNsatSU16x8: 3535 case Iop_QShlNsatUU16x8: 3536 case Iop_QShlNsatSS16x8: 3537 complainIfUndefined(mce, atom2, NULL); 3538 return mkPCast16x8(mce, vatom1); 3539 3540 case Iop_QShlNsatSU32x4: 3541 case Iop_QShlNsatUU32x4: 3542 case Iop_QShlNsatSS32x4: 3543 complainIfUndefined(mce, atom2, NULL); 3544 return mkPCast32x4(mce, vatom1); 3545 3546 case Iop_QShlNsatSU64x2: 3547 case Iop_QShlNsatUU64x2: 3548 case Iop_QShlNsatSS64x2: 3549 complainIfUndefined(mce, atom2, NULL); 3550 return mkPCast32x4(mce, vatom1); 3551 3552 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128. 3553 To make this simpler, do the following: 3554 * complain if the shift amount (the I8) is undefined 3555 * pcast each lane at the wide width 3556 * truncate each lane to half width 3557 * pcast the resulting 64-bit value to a single bit and use 3558 that as the least significant bit of the upper half of the 3559 result. */ 3560 case Iop_QandQShrNnarrow64Uto32Ux2: 3561 case Iop_QandQSarNnarrow64Sto32Sx2: 3562 case Iop_QandQSarNnarrow64Sto32Ux2: 3563 case Iop_QandQRShrNnarrow64Uto32Ux2: 3564 case Iop_QandQRSarNnarrow64Sto32Sx2: 3565 case Iop_QandQRSarNnarrow64Sto32Ux2: 3566 case Iop_QandQShrNnarrow32Uto16Ux4: 3567 case Iop_QandQSarNnarrow32Sto16Sx4: 3568 case Iop_QandQSarNnarrow32Sto16Ux4: 3569 case Iop_QandQRShrNnarrow32Uto16Ux4: 3570 case Iop_QandQRSarNnarrow32Sto16Sx4: 3571 case Iop_QandQRSarNnarrow32Sto16Ux4: 3572 case Iop_QandQShrNnarrow16Uto8Ux8: 3573 case Iop_QandQSarNnarrow16Sto8Sx8: 3574 case Iop_QandQSarNnarrow16Sto8Ux8: 3575 case Iop_QandQRShrNnarrow16Uto8Ux8: 3576 case Iop_QandQRSarNnarrow16Sto8Sx8: 3577 case Iop_QandQRSarNnarrow16Sto8Ux8: 3578 { 3579 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL; 3580 IROp opNarrow = Iop_INVALID; 3581 switch (op) { 3582 case Iop_QandQShrNnarrow64Uto32Ux2: 3583 case Iop_QandQSarNnarrow64Sto32Sx2: 3584 case Iop_QandQSarNnarrow64Sto32Ux2: 3585 case Iop_QandQRShrNnarrow64Uto32Ux2: 3586 case Iop_QandQRSarNnarrow64Sto32Sx2: 3587 case Iop_QandQRSarNnarrow64Sto32Ux2: 3588 fnPessim = mkPCast64x2; 3589 opNarrow = Iop_NarrowUn64to32x2; 3590 break; 3591 case Iop_QandQShrNnarrow32Uto16Ux4: 3592 case Iop_QandQSarNnarrow32Sto16Sx4: 3593 case Iop_QandQSarNnarrow32Sto16Ux4: 3594 case Iop_QandQRShrNnarrow32Uto16Ux4: 3595 case Iop_QandQRSarNnarrow32Sto16Sx4: 3596 case Iop_QandQRSarNnarrow32Sto16Ux4: 3597 fnPessim = mkPCast32x4; 3598 opNarrow = Iop_NarrowUn32to16x4; 3599 break; 3600 case Iop_QandQShrNnarrow16Uto8Ux8: 3601 case Iop_QandQSarNnarrow16Sto8Sx8: 3602 case Iop_QandQSarNnarrow16Sto8Ux8: 3603 case Iop_QandQRShrNnarrow16Uto8Ux8: 3604 case Iop_QandQRSarNnarrow16Sto8Sx8: 3605 case Iop_QandQRSarNnarrow16Sto8Ux8: 3606 fnPessim = mkPCast16x8; 3607 opNarrow = Iop_NarrowUn16to8x8; 3608 break; 3609 default: 3610 tl_assert(0); 3611 } 3612 complainIfUndefined(mce, atom2, NULL); 3613 // Pessimised shift result 3614 IRAtom* shV 3615 = fnPessim(mce, vatom1); 3616 // Narrowed, pessimised shift result 3617 IRAtom* shVnarrowed 3618 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV)); 3619 // Generates: Def--(63)--Def PCast-to-I1(narrowed) 3620 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64); 3621 // and assemble the result 3622 return assignNew('V', mce, Ity_V128, 3623 binop(Iop_64HLtoV128, qV, shVnarrowed)); 3624 } 3625 3626 case Iop_Mull32Sx2: 3627 case Iop_Mull32Ux2: 3628 case Iop_QDMull32Sx2: 3629 return vectorWidenI64(mce, Iop_Widen32Sto64x2, 3630 mkUifU64(mce, vatom1, vatom2)); 3631 3632 case Iop_Mull16Sx4: 3633 case Iop_Mull16Ux4: 3634 case Iop_QDMull16Sx4: 3635 return vectorWidenI64(mce, Iop_Widen16Sto32x4, 3636 mkUifU64(mce, vatom1, vatom2)); 3637 3638 case Iop_Mull8Sx8: 3639 case Iop_Mull8Ux8: 3640 case Iop_PolynomialMull8x8: 3641 return vectorWidenI64(mce, Iop_Widen8Sto16x8, 3642 mkUifU64(mce, vatom1, vatom2)); 3643 3644 case Iop_PwAdd32x4: 3645 return mkPCast32x4(mce, 3646 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 3647 mkPCast32x4(mce, vatom2)))); 3648 3649 case Iop_PwAdd16x8: 3650 return mkPCast16x8(mce, 3651 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 3652 mkPCast16x8(mce, vatom2)))); 3653 3654 case Iop_PwAdd8x16: 3655 return mkPCast8x16(mce, 3656 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 3657 mkPCast8x16(mce, vatom2)))); 3658 3659 /* V128-bit data-steering */ 3660 case Iop_SetV128lo32: 3661 case Iop_SetV128lo64: 3662 case Iop_64HLtoV128: 3663 case Iop_InterleaveLO64x2: 3664 case Iop_InterleaveLO32x4: 3665 case Iop_InterleaveLO16x8: 3666 case Iop_InterleaveLO8x16: 3667 case Iop_InterleaveHI64x2: 3668 case Iop_InterleaveHI32x4: 3669 case Iop_InterleaveHI16x8: 3670 case Iop_InterleaveHI8x16: 3671 case Iop_CatOddLanes8x16: 3672 case Iop_CatOddLanes16x8: 3673 case Iop_CatOddLanes32x4: 3674 case Iop_CatEvenLanes8x16: 3675 case Iop_CatEvenLanes16x8: 3676 case Iop_CatEvenLanes32x4: 3677 case Iop_InterleaveOddLanes8x16: 3678 case Iop_InterleaveOddLanes16x8: 3679 case Iop_InterleaveOddLanes32x4: 3680 case Iop_InterleaveEvenLanes8x16: 3681 case Iop_InterleaveEvenLanes16x8: 3682 case Iop_InterleaveEvenLanes32x4: 3683 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 3684 3685 case Iop_GetElem8x16: 3686 complainIfUndefined(mce, atom2, NULL); 3687 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3688 case Iop_GetElem16x8: 3689 complainIfUndefined(mce, atom2, NULL); 3690 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3691 case Iop_GetElem32x4: 3692 complainIfUndefined(mce, atom2, NULL); 3693 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3694 case Iop_GetElem64x2: 3695 complainIfUndefined(mce, atom2, NULL); 3696 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 3697 3698 /* Perm8x16: rearrange values in left arg using steering values 3699 from right arg. So rearrange the vbits in the same way but 3700 pessimise wrt steering values. Perm32x4 ditto. */ 3701 case Iop_Perm8x16: 3702 return mkUifUV128( 3703 mce, 3704 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3705 mkPCast8x16(mce, vatom2) 3706 ); 3707 case Iop_Perm32x4: 3708 return mkUifUV128( 3709 mce, 3710 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3711 mkPCast32x4(mce, vatom2) 3712 ); 3713 3714 /* These two take the lower half of each 16-bit lane, sign/zero 3715 extend it to 32, and multiply together, producing a 32x4 3716 result (and implicitly ignoring half the operand bits). So 3717 treat it as a bunch of independent 16x8 operations, but then 3718 do 32-bit shifts left-right to copy the lower half results 3719 (which are all 0s or all 1s due to PCasting in binary16Ix8) 3720 into the upper half of each result lane. */ 3721 case Iop_MullEven16Ux8: 3722 case Iop_MullEven16Sx8: { 3723 IRAtom* at; 3724 at = binary16Ix8(mce,vatom1,vatom2); 3725 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 3726 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 3727 return at; 3728 } 3729 3730 /* Same deal as Iop_MullEven16{S,U}x8 */ 3731 case Iop_MullEven8Ux16: 3732 case Iop_MullEven8Sx16: { 3733 IRAtom* at; 3734 at = binary8Ix16(mce,vatom1,vatom2); 3735 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 3736 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 3737 return at; 3738 } 3739 3740 /* Same deal as Iop_MullEven16{S,U}x8 */ 3741 case Iop_MullEven32Ux4: 3742 case Iop_MullEven32Sx4: { 3743 IRAtom* at; 3744 at = binary32Ix4(mce,vatom1,vatom2); 3745 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32))); 3746 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32))); 3747 return at; 3748 } 3749 3750 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 3751 32x4 -> 16x8 laneage, discarding the upper half of each lane. 3752 Simply apply same op to the V bits, since this really no more 3753 than a data steering operation. */ 3754 case Iop_NarrowBin32to16x8: 3755 case Iop_NarrowBin16to8x16: 3756 case Iop_NarrowBin64to32x4: 3757 return assignNew('V', mce, Ity_V128, 3758 binop(op, vatom1, vatom2)); 3759 3760 case Iop_ShrV128: 3761 case Iop_ShlV128: 3762 case Iop_I128StoBCD128: 3763 /* Same scheme as with all other shifts. Note: 10 Nov 05: 3764 this is wrong now, scalar shifts are done properly lazily. 3765 Vector shifts should be fixed too. */ 3766 complainIfUndefined(mce, atom2, NULL); 3767 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3768 3769 case Iop_BCDAdd: 3770 case Iop_BCDSub: 3771 return mkLazy2(mce, Ity_V128, vatom1, vatom2); 3772 3773 /* SHA Iops */ 3774 case Iop_SHA256: 3775 case Iop_SHA512: 3776 complainIfUndefined(mce, atom2, NULL); 3777 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3778 3779 /* I128-bit data-steering */ 3780 case Iop_64HLto128: 3781 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 3782 3783 /* V256-bit SIMD */ 3784 3785 case Iop_Max64Fx4: 3786 case Iop_Min64Fx4: 3787 return binary64Fx4(mce, vatom1, vatom2); 3788 3789 case Iop_Max32Fx8: 3790 case Iop_Min32Fx8: 3791 return binary32Fx8(mce, vatom1, vatom2); 3792 3793 /* V256-bit data-steering */ 3794 case Iop_V128HLtoV256: 3795 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2)); 3796 3797 /* Scalar floating point */ 3798 3799 case Iop_F32toI64S: 3800 case Iop_F32toI64U: 3801 /* I32(rm) x F32 -> I64 */ 3802 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3803 3804 case Iop_I64StoF32: 3805 /* I32(rm) x I64 -> F32 */ 3806 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3807 3808 case Iop_RoundF64toInt: 3809 case Iop_RoundF64toF32: 3810 case Iop_F64toI64S: 3811 case Iop_F64toI64U: 3812 case Iop_I64StoF64: 3813 case Iop_I64UtoF64: 3814 case Iop_SinF64: 3815 case Iop_CosF64: 3816 case Iop_TanF64: 3817 case Iop_2xm1F64: 3818 case Iop_SqrtF64: 3819 case Iop_RecpExpF64: 3820 /* I32(rm) x I64/F64 -> I64/F64 */ 3821 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3822 3823 case Iop_ShlD64: 3824 case Iop_ShrD64: 3825 case Iop_RoundD64toInt: 3826 /* I32(rm) x D64 -> D64 */ 3827 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3828 3829 case Iop_ShlD128: 3830 case Iop_ShrD128: 3831 case Iop_RoundD128toInt: 3832 /* I32(rm) x D128 -> D128 */ 3833 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3834 3835 case Iop_RoundF128toInt: 3836 /* I32(rm) x F128 -> F128 */ 3837 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3838 3839 case Iop_D64toI64S: 3840 case Iop_D64toI64U: 3841 case Iop_I64StoD64: 3842 case Iop_I64UtoD64: 3843 /* I32(rm) x I64/D64 -> D64/I64 */ 3844 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3845 3846 case Iop_F32toD32: 3847 case Iop_F64toD32: 3848 case Iop_F128toD32: 3849 case Iop_D32toF32: 3850 case Iop_D64toF32: 3851 case Iop_D128toF32: 3852 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */ 3853 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3854 3855 case Iop_F32toD64: 3856 case Iop_F64toD64: 3857 case Iop_F128toD64: 3858 case Iop_D32toF64: 3859 case Iop_D64toF64: 3860 case Iop_D128toF64: 3861 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */ 3862 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3863 3864 case Iop_F32toD128: 3865 case Iop_F64toD128: 3866 case Iop_F128toD128: 3867 case Iop_D32toF128: 3868 case Iop_D64toF128: 3869 case Iop_D128toF128: 3870 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */ 3871 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3872 3873 case Iop_RoundF32toInt: 3874 case Iop_SqrtF32: 3875 case Iop_RecpExpF32: 3876 /* I32(rm) x I32/F32 -> I32/F32 */ 3877 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3878 3879 case Iop_SqrtF128: 3880 /* I32(rm) x F128 -> F128 */ 3881 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3882 3883 case Iop_I32StoF32: 3884 case Iop_I32UtoF32: 3885 case Iop_F32toI32S: 3886 case Iop_F32toI32U: 3887 /* First arg is I32 (rounding mode), second is F32/I32 (data). */ 3888 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3889 3890 case Iop_F64toF16: 3891 case Iop_F32toF16: 3892 /* First arg is I32 (rounding mode), second is F64/F32 (data). */ 3893 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3894 3895 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */ 3896 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */ 3897 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */ 3898 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */ 3899 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */ 3900 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3901 3902 case Iop_F128toI128S: /* IRRoundingMode(I32) x F128 -> signed I128 */ 3903 case Iop_RndF128: /* IRRoundingMode(I32) x F128 -> F128 */ 3904 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3905 3906 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */ 3907 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */ 3908 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */ 3909 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */ 3910 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */ 3911 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */ 3912 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3913 3914 case Iop_F64HLtoF128: 3915 case Iop_D64HLtoD128: 3916 return assignNew('V', mce, Ity_I128, 3917 binop(Iop_64HLto128, vatom1, vatom2)); 3918 3919 case Iop_F64toI32U: 3920 case Iop_F64toI32S: 3921 case Iop_F64toF32: 3922 case Iop_I64UtoF32: 3923 case Iop_D64toI32U: 3924 case Iop_D64toI32S: 3925 /* First arg is I32 (rounding mode), second is F64/D64 (data). */ 3926 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3927 3928 case Iop_D64toD32: 3929 /* First arg is I32 (rounding mode), second is D64 (data). */ 3930 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3931 3932 case Iop_F64toI16S: 3933 /* First arg is I32 (rounding mode), second is F64 (data). */ 3934 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3935 3936 case Iop_InsertExpD64: 3937 /* I64 x I64 -> D64 */ 3938 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3939 3940 case Iop_InsertExpD128: 3941 /* I64 x I128 -> D128 */ 3942 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3943 3944 case Iop_CmpF32: 3945 case Iop_CmpF64: 3946 case Iop_CmpF128: 3947 case Iop_CmpD64: 3948 case Iop_CmpD128: 3949 case Iop_CmpExpD64: 3950 case Iop_CmpExpD128: 3951 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3952 3953 case Iop_MaxNumF32: 3954 case Iop_MinNumF32: 3955 /* F32 x F32 -> F32 */ 3956 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3957 3958 case Iop_MaxNumF64: 3959 case Iop_MinNumF64: 3960 /* F64 x F64 -> F64 */ 3961 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3962 3963 /* non-FP after here */ 3964 3965 case Iop_DivModU64to32: 3966 case Iop_DivModS64to32: 3967 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3968 3969 case Iop_DivModU128to64: 3970 case Iop_DivModS128to64: 3971 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3972 3973 case Iop_8HLto16: 3974 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2)); 3975 case Iop_16HLto32: 3976 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 3977 case Iop_32HLto64: 3978 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3979 3980 case Iop_DivModS64to64: 3981 case Iop_MullS64: 3982 case Iop_MullU64: { 3983 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3984 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 3985 return assignNew('V', mce, Ity_I128, 3986 binop(Iop_64HLto128, vHi64, vLo64)); 3987 } 3988 3989 case Iop_MullS32: 3990 case Iop_MullU32: { 3991 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3992 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 3993 return assignNew('V', mce, Ity_I64, 3994 binop(Iop_32HLto64, vHi32, vLo32)); 3995 } 3996 3997 case Iop_MullS16: 3998 case Iop_MullU16: { 3999 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 4000 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 4001 return assignNew('V', mce, Ity_I32, 4002 binop(Iop_16HLto32, vHi16, vLo16)); 4003 } 4004 4005 case Iop_MullS8: 4006 case Iop_MullU8: { 4007 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 4008 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 4009 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 4010 } 4011 4012 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 4013 case Iop_DivS32: 4014 case Iop_DivU32: 4015 case Iop_DivU32E: 4016 case Iop_DivS32E: 4017 case Iop_QAdd32S: /* could probably do better */ 4018 case Iop_QSub32S: /* could probably do better */ 4019 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 4020 4021 case Iop_DivS64: 4022 case Iop_DivU64: 4023 case Iop_DivS64E: 4024 case Iop_DivU64E: 4025 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 4026 4027 case Iop_Add32: 4028 if (mce->bogusLiterals || mce->useLLVMworkarounds) 4029 return expensiveAddSub(mce,True,Ity_I32, 4030 vatom1,vatom2, atom1,atom2); 4031 else 4032 goto cheap_AddSub32; 4033 case Iop_Sub32: 4034 if (mce->bogusLiterals) 4035 return expensiveAddSub(mce,False,Ity_I32, 4036 vatom1,vatom2, atom1,atom2); 4037 else 4038 goto cheap_AddSub32; 4039 4040 cheap_AddSub32: 4041 case Iop_Mul32: 4042 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 4043 4044 case Iop_CmpORD32S: 4045 case Iop_CmpORD32U: 4046 case Iop_CmpORD64S: 4047 case Iop_CmpORD64U: 4048 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 4049 4050 case Iop_Add64: 4051 if (mce->bogusLiterals || mce->useLLVMworkarounds) 4052 return expensiveAddSub(mce,True,Ity_I64, 4053 vatom1,vatom2, atom1,atom2); 4054 else 4055 goto cheap_AddSub64; 4056 case Iop_Sub64: 4057 if (mce->bogusLiterals) 4058 return expensiveAddSub(mce,False,Ity_I64, 4059 vatom1,vatom2, atom1,atom2); 4060 else 4061 goto cheap_AddSub64; 4062 4063 cheap_AddSub64: 4064 case Iop_Mul64: 4065 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 4066 4067 case Iop_Mul16: 4068 case Iop_Add16: 4069 case Iop_Sub16: 4070 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 4071 4072 case Iop_Mul8: 4073 case Iop_Sub8: 4074 case Iop_Add8: 4075 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 4076 4077 case Iop_CmpEQ64: 4078 case Iop_CmpNE64: 4079 if (mce->bogusLiterals) 4080 goto expensive_cmp64; 4081 else 4082 goto cheap_cmp64; 4083 4084 expensive_cmp64: 4085 case Iop_ExpCmpNE64: 4086 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 4087 4088 cheap_cmp64: 4089 case Iop_CmpLE64S: case Iop_CmpLE64U: 4090 case Iop_CmpLT64U: case Iop_CmpLT64S: 4091 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 4092 4093 case Iop_CmpEQ32: 4094 case Iop_CmpNE32: 4095 if (mce->bogusLiterals) 4096 goto expensive_cmp32; 4097 else 4098 goto cheap_cmp32; 4099 4100 expensive_cmp32: 4101 case Iop_ExpCmpNE32: 4102 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 4103 4104 cheap_cmp32: 4105 case Iop_CmpLE32S: case Iop_CmpLE32U: 4106 case Iop_CmpLT32U: case Iop_CmpLT32S: 4107 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 4108 4109 case Iop_CmpEQ16: case Iop_CmpNE16: 4110 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 4111 4112 case Iop_ExpCmpNE16: 4113 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 ); 4114 4115 case Iop_CmpEQ8: case Iop_CmpNE8: 4116 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 4117 4118 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 4119 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 4120 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 4121 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 4122 /* Just say these all produce a defined result, regardless 4123 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 4124 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 4125 4126 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 4127 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 4128 4129 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 4130 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 4131 4132 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 4133 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 4134 4135 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8: 4136 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 4137 4138 case Iop_AndV256: 4139 uifu = mkUifUV256; difd = mkDifDV256; 4140 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or; 4141 case Iop_AndV128: 4142 uifu = mkUifUV128; difd = mkDifDV128; 4143 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 4144 case Iop_And64: 4145 uifu = mkUifU64; difd = mkDifD64; 4146 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 4147 case Iop_And32: 4148 uifu = mkUifU32; difd = mkDifD32; 4149 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 4150 case Iop_And16: 4151 uifu = mkUifU16; difd = mkDifD16; 4152 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 4153 case Iop_And8: 4154 uifu = mkUifU8; difd = mkDifD8; 4155 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 4156 4157 case Iop_OrV256: 4158 uifu = mkUifUV256; difd = mkDifDV256; 4159 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or; 4160 case Iop_OrV128: 4161 uifu = mkUifUV128; difd = mkDifDV128; 4162 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 4163 case Iop_Or64: 4164 uifu = mkUifU64; difd = mkDifD64; 4165 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 4166 case Iop_Or32: 4167 uifu = mkUifU32; difd = mkDifD32; 4168 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 4169 case Iop_Or16: 4170 uifu = mkUifU16; difd = mkDifD16; 4171 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 4172 case Iop_Or8: 4173 uifu = mkUifU8; difd = mkDifD8; 4174 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 4175 4176 do_And_Or: 4177 return 4178 assignNew( 4179 'V', mce, 4180 and_or_ty, 4181 difd(mce, uifu(mce, vatom1, vatom2), 4182 difd(mce, improve(mce, atom1, vatom1), 4183 improve(mce, atom2, vatom2) ) ) ); 4184 4185 case Iop_Xor8: 4186 return mkUifU8(mce, vatom1, vatom2); 4187 case Iop_Xor16: 4188 return mkUifU16(mce, vatom1, vatom2); 4189 case Iop_Xor32: 4190 return mkUifU32(mce, vatom1, vatom2); 4191 case Iop_Xor64: 4192 return mkUifU64(mce, vatom1, vatom2); 4193 case Iop_XorV128: 4194 return mkUifUV128(mce, vatom1, vatom2); 4195 case Iop_XorV256: 4196 return mkUifUV256(mce, vatom1, vatom2); 4197 4198 /* V256-bit SIMD */ 4199 4200 case Iop_ShrN16x16: 4201 case Iop_ShrN32x8: 4202 case Iop_ShrN64x4: 4203 case Iop_SarN16x16: 4204 case Iop_SarN32x8: 4205 case Iop_ShlN16x16: 4206 case Iop_ShlN32x8: 4207 case Iop_ShlN64x4: 4208 /* Same scheme as with all other shifts. Note: 22 Oct 05: 4209 this is wrong now, scalar shifts are done properly lazily. 4210 Vector shifts should be fixed too. */ 4211 complainIfUndefined(mce, atom2, NULL); 4212 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)); 4213 4214 case Iop_QSub8Ux32: 4215 case Iop_QSub8Sx32: 4216 case Iop_Sub8x32: 4217 case Iop_Min8Ux32: 4218 case Iop_Min8Sx32: 4219 case Iop_Max8Ux32: 4220 case Iop_Max8Sx32: 4221 case Iop_CmpGT8Sx32: 4222 case Iop_CmpEQ8x32: 4223 case Iop_Avg8Ux32: 4224 case Iop_QAdd8Ux32: 4225 case Iop_QAdd8Sx32: 4226 case Iop_Add8x32: 4227 return binary8Ix32(mce, vatom1, vatom2); 4228 4229 case Iop_QSub16Ux16: 4230 case Iop_QSub16Sx16: 4231 case Iop_Sub16x16: 4232 case Iop_Mul16x16: 4233 case Iop_MulHi16Sx16: 4234 case Iop_MulHi16Ux16: 4235 case Iop_Min16Sx16: 4236 case Iop_Min16Ux16: 4237 case Iop_Max16Sx16: 4238 case Iop_Max16Ux16: 4239 case Iop_CmpGT16Sx16: 4240 case Iop_CmpEQ16x16: 4241 case Iop_Avg16Ux16: 4242 case Iop_QAdd16Ux16: 4243 case Iop_QAdd16Sx16: 4244 case Iop_Add16x16: 4245 return binary16Ix16(mce, vatom1, vatom2); 4246 4247 case Iop_Sub32x8: 4248 case Iop_CmpGT32Sx8: 4249 case Iop_CmpEQ32x8: 4250 case Iop_Add32x8: 4251 case Iop_Max32Ux8: 4252 case Iop_Max32Sx8: 4253 case Iop_Min32Ux8: 4254 case Iop_Min32Sx8: 4255 case Iop_Mul32x8: 4256 return binary32Ix8(mce, vatom1, vatom2); 4257 4258 case Iop_Sub64x4: 4259 case Iop_Add64x4: 4260 case Iop_CmpEQ64x4: 4261 case Iop_CmpGT64Sx4: 4262 return binary64Ix4(mce, vatom1, vatom2); 4263 4264 /* Perm32x8: rearrange values in left arg using steering values 4265 from right arg. So rearrange the vbits in the same way but 4266 pessimise wrt steering values. */ 4267 case Iop_Perm32x8: 4268 return mkUifUV256( 4269 mce, 4270 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)), 4271 mkPCast32x8(mce, vatom2) 4272 ); 4273 4274 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256. 4275 Handle the shifted results in the same way that other 4276 binary Q ops are handled, eg QSub: UifU the two args, 4277 then pessimise -- which is binaryNIxM. But for the upper 4278 V128, we require to generate just 1 bit which is the 4279 pessimised shift result, with 127 defined zeroes above it. 4280 4281 Note that this overly pessimistic in that in fact only the 4282 bottom 8 bits of each lane of the second arg determine the shift 4283 amount. Really we ought to ignore any undefinedness in the 4284 rest of the lanes of the second arg. */ 4285 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2: 4286 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2: 4287 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4: 4288 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4: 4289 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8: 4290 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8: 4291 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16: 4292 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16: 4293 { 4294 // The function to generate the pessimised shift result 4295 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL; 4296 switch (op) { 4297 case Iop_QandSQsh64x2: 4298 case Iop_QandUQsh64x2: 4299 case Iop_QandSQRsh64x2: 4300 case Iop_QandUQRsh64x2: 4301 binaryNIxM = binary64Ix2; 4302 break; 4303 case Iop_QandSQsh32x4: 4304 case Iop_QandUQsh32x4: 4305 case Iop_QandSQRsh32x4: 4306 case Iop_QandUQRsh32x4: 4307 binaryNIxM = binary32Ix4; 4308 break; 4309 case Iop_QandSQsh16x8: 4310 case Iop_QandUQsh16x8: 4311 case Iop_QandSQRsh16x8: 4312 case Iop_QandUQRsh16x8: 4313 binaryNIxM = binary16Ix8; 4314 break; 4315 case Iop_QandSQsh8x16: 4316 case Iop_QandUQsh8x16: 4317 case Iop_QandSQRsh8x16: 4318 case Iop_QandUQRsh8x16: 4319 binaryNIxM = binary8Ix16; 4320 break; 4321 default: 4322 tl_assert(0); 4323 } 4324 tl_assert(binaryNIxM); 4325 // Pessimised shift result, shV[127:0] 4326 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2); 4327 // Generates: Def--(127)--Def PCast-to-I1(shV) 4328 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128); 4329 // and assemble the result 4330 return assignNew('V', mce, Ity_V256, 4331 binop(Iop_V128HLtoV256, qV, shV)); 4332 } 4333 4334 default: 4335 ppIROp(op); 4336 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 4337 } 4338 } 4339 4340 4341 static 4342 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 4343 { 4344 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the 4345 selection of shadow operation implicitly duplicates the logic in 4346 do_shadow_LoadG and should be kept in sync (in the very unlikely 4347 event that the interpretation of such widening ops changes in 4348 future). See comment in do_shadow_LoadG. */ 4349 IRAtom* vatom = expr2vbits( mce, atom ); 4350 tl_assert(isOriginalAtom(mce,atom)); 4351 switch (op) { 4352 4353 case Iop_Abs64Fx2: 4354 case Iop_Neg64Fx2: 4355 case Iop_RSqrtEst64Fx2: 4356 case Iop_RecipEst64Fx2: 4357 return unary64Fx2(mce, vatom); 4358 4359 case Iop_Sqrt64F0x2: 4360 return unary64F0x2(mce, vatom); 4361 4362 case Iop_Sqrt32Fx8: 4363 case Iop_RSqrtEst32Fx8: 4364 case Iop_RecipEst32Fx8: 4365 return unary32Fx8(mce, vatom); 4366 4367 case Iop_Sqrt64Fx4: 4368 return unary64Fx4(mce, vatom); 4369 4370 case Iop_RecipEst32Fx4: 4371 case Iop_I32UtoFx4: 4372 case Iop_I32StoFx4: 4373 case Iop_QFtoI32Ux4_RZ: 4374 case Iop_QFtoI32Sx4_RZ: 4375 case Iop_RoundF32x4_RM: 4376 case Iop_RoundF32x4_RP: 4377 case Iop_RoundF32x4_RN: 4378 case Iop_RoundF32x4_RZ: 4379 case Iop_RecipEst32Ux4: 4380 case Iop_Abs32Fx4: 4381 case Iop_Neg32Fx4: 4382 case Iop_RSqrtEst32Fx4: 4383 return unary32Fx4(mce, vatom); 4384 4385 case Iop_I32UtoFx2: 4386 case Iop_I32StoFx2: 4387 case Iop_RecipEst32Fx2: 4388 case Iop_RecipEst32Ux2: 4389 case Iop_Abs32Fx2: 4390 case Iop_Neg32Fx2: 4391 case Iop_RSqrtEst32Fx2: 4392 return unary32Fx2(mce, vatom); 4393 4394 case Iop_Sqrt32F0x4: 4395 case Iop_RSqrtEst32F0x4: 4396 case Iop_RecipEst32F0x4: 4397 return unary32F0x4(mce, vatom); 4398 4399 case Iop_32UtoV128: 4400 case Iop_64UtoV128: 4401 case Iop_Dup8x16: 4402 case Iop_Dup16x8: 4403 case Iop_Dup32x4: 4404 case Iop_Reverse1sIn8_x16: 4405 case Iop_Reverse8sIn16_x8: 4406 case Iop_Reverse8sIn32_x4: 4407 case Iop_Reverse16sIn32_x4: 4408 case Iop_Reverse8sIn64_x2: 4409 case Iop_Reverse16sIn64_x2: 4410 case Iop_Reverse32sIn64_x2: 4411 case Iop_V256toV128_1: case Iop_V256toV128_0: 4412 case Iop_ZeroHI64ofV128: 4413 case Iop_ZeroHI96ofV128: 4414 case Iop_ZeroHI112ofV128: 4415 case Iop_ZeroHI120ofV128: 4416 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 4417 4418 case Iop_F128HItoF64: /* F128 -> high half of F128 */ 4419 case Iop_D128HItoD64: /* D128 -> high half of D128 */ 4420 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom)); 4421 case Iop_F128LOtoF64: /* F128 -> low half of F128 */ 4422 case Iop_D128LOtoD64: /* D128 -> low half of D128 */ 4423 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom)); 4424 4425 case Iop_NegF128: 4426 case Iop_AbsF128: 4427 case Iop_RndF128: 4428 case Iop_TruncF128toI64S: /* F128 -> I64S */ 4429 case Iop_TruncF128toI32S: /* F128 -> I32S (result stored in 64-bits) */ 4430 case Iop_TruncF128toI64U: /* F128 -> I64U */ 4431 case Iop_TruncF128toI32U: /* F128 -> I32U (result stored in 64-bits) */ 4432 return mkPCastTo(mce, Ity_I128, vatom); 4433 4434 case Iop_BCD128toI128S: 4435 case Iop_MulI128by10: 4436 case Iop_MulI128by10Carry: 4437 case Iop_F16toF64x2: 4438 case Iop_F64toF16x2: 4439 return vatom; 4440 4441 case Iop_I32StoF128: /* signed I32 -> F128 */ 4442 case Iop_I64StoF128: /* signed I64 -> F128 */ 4443 case Iop_I32UtoF128: /* unsigned I32 -> F128 */ 4444 case Iop_I64UtoF128: /* unsigned I64 -> F128 */ 4445 case Iop_F32toF128: /* F32 -> F128 */ 4446 case Iop_F64toF128: /* F64 -> F128 */ 4447 case Iop_I32StoD128: /* signed I64 -> D128 */ 4448 case Iop_I64StoD128: /* signed I64 -> D128 */ 4449 case Iop_I32UtoD128: /* unsigned I32 -> D128 */ 4450 case Iop_I64UtoD128: /* unsigned I64 -> D128 */ 4451 return mkPCastTo(mce, Ity_I128, vatom); 4452 4453 case Iop_F16toF64: 4454 case Iop_F32toF64: 4455 case Iop_I32StoF64: 4456 case Iop_I32UtoF64: 4457 case Iop_NegF64: 4458 case Iop_AbsF64: 4459 case Iop_RSqrtEst5GoodF64: 4460 case Iop_RoundF64toF64_NEAREST: 4461 case Iop_RoundF64toF64_NegINF: 4462 case Iop_RoundF64toF64_PosINF: 4463 case Iop_RoundF64toF64_ZERO: 4464 case Iop_Clz64: 4465 case Iop_D32toD64: 4466 case Iop_I32StoD64: 4467 case Iop_I32UtoD64: 4468 case Iop_ExtractExpD64: /* D64 -> I64 */ 4469 case Iop_ExtractExpD128: /* D128 -> I64 */ 4470 case Iop_ExtractSigD64: /* D64 -> I64 */ 4471 case Iop_ExtractSigD128: /* D128 -> I64 */ 4472 case Iop_DPBtoBCD: 4473 case Iop_BCDtoDPB: 4474 return mkPCastTo(mce, Ity_I64, vatom); 4475 4476 case Iop_D64toD128: 4477 return mkPCastTo(mce, Ity_I128, vatom); 4478 4479 case Iop_Clz32: 4480 case Iop_TruncF64asF32: 4481 case Iop_NegF32: 4482 case Iop_AbsF32: 4483 case Iop_F16toF32: 4484 return mkPCastTo(mce, Ity_I32, vatom); 4485 4486 case Iop_Ctz32: 4487 case Iop_Ctz64: 4488 return expensiveCountTrailingZeroes(mce, op, atom, vatom); 4489 4490 case Iop_1Uto64: 4491 case Iop_1Sto64: 4492 case Iop_8Uto64: 4493 case Iop_8Sto64: 4494 case Iop_16Uto64: 4495 case Iop_16Sto64: 4496 case Iop_32Sto64: 4497 case Iop_32Uto64: 4498 case Iop_V128to64: 4499 case Iop_V128HIto64: 4500 case Iop_128HIto64: 4501 case Iop_128to64: 4502 case Iop_Dup8x8: 4503 case Iop_Dup16x4: 4504 case Iop_Dup32x2: 4505 case Iop_Reverse8sIn16_x4: 4506 case Iop_Reverse8sIn32_x2: 4507 case Iop_Reverse16sIn32_x2: 4508 case Iop_Reverse8sIn64_x1: 4509 case Iop_Reverse16sIn64_x1: 4510 case Iop_Reverse32sIn64_x1: 4511 case Iop_V256to64_0: case Iop_V256to64_1: 4512 case Iop_V256to64_2: case Iop_V256to64_3: 4513 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 4514 4515 case Iop_64to32: 4516 case Iop_64HIto32: 4517 case Iop_1Uto32: 4518 case Iop_1Sto32: 4519 case Iop_8Uto32: 4520 case Iop_16Uto32: 4521 case Iop_16Sto32: 4522 case Iop_8Sto32: 4523 case Iop_V128to32: 4524 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 4525 4526 case Iop_8Sto16: 4527 case Iop_8Uto16: 4528 case Iop_32to16: 4529 case Iop_32HIto16: 4530 case Iop_64to16: 4531 case Iop_GetMSBs8x16: 4532 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 4533 4534 case Iop_1Uto8: 4535 case Iop_1Sto8: 4536 case Iop_16to8: 4537 case Iop_16HIto8: 4538 case Iop_32to8: 4539 case Iop_64to8: 4540 case Iop_GetMSBs8x8: 4541 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 4542 4543 case Iop_32to1: 4544 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 4545 4546 case Iop_64to1: 4547 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 4548 4549 case Iop_ReinterpF64asI64: 4550 case Iop_ReinterpI64asF64: 4551 case Iop_ReinterpI32asF32: 4552 case Iop_ReinterpF32asI32: 4553 case Iop_ReinterpI64asD64: 4554 case Iop_ReinterpD64asI64: 4555 case Iop_NotV256: 4556 case Iop_NotV128: 4557 case Iop_Not64: 4558 case Iop_Not32: 4559 case Iop_Not16: 4560 case Iop_Not8: 4561 case Iop_Not1: 4562 return vatom; 4563 4564 case Iop_CmpNEZ8x8: 4565 case Iop_Cnt8x8: 4566 case Iop_Clz8x8: 4567 case Iop_Cls8x8: 4568 case Iop_Abs8x8: 4569 return mkPCast8x8(mce, vatom); 4570 4571 case Iop_CmpNEZ8x16: 4572 case Iop_Cnt8x16: 4573 case Iop_Clz8x16: 4574 case Iop_Cls8x16: 4575 case Iop_Abs8x16: 4576 case Iop_Ctz8x16: 4577 return mkPCast8x16(mce, vatom); 4578 4579 case Iop_CmpNEZ16x4: 4580 case Iop_Clz16x4: 4581 case Iop_Cls16x4: 4582 case Iop_Abs16x4: 4583 return mkPCast16x4(mce, vatom); 4584 4585 case Iop_CmpNEZ16x8: 4586 case Iop_Clz16x8: 4587 case Iop_Cls16x8: 4588 case Iop_Abs16x8: 4589 case Iop_Ctz16x8: 4590 return mkPCast16x8(mce, vatom); 4591 4592 case Iop_CmpNEZ32x2: 4593 case Iop_Clz32x2: 4594 case Iop_Cls32x2: 4595 case Iop_FtoI32Ux2_RZ: 4596 case Iop_FtoI32Sx2_RZ: 4597 case Iop_Abs32x2: 4598 return mkPCast32x2(mce, vatom); 4599 4600 case Iop_CmpNEZ32x4: 4601 case Iop_Clz32x4: 4602 case Iop_Cls32x4: 4603 case Iop_FtoI32Ux4_RZ: 4604 case Iop_FtoI32Sx4_RZ: 4605 case Iop_Abs32x4: 4606 case Iop_RSqrtEst32Ux4: 4607 case Iop_Ctz32x4: 4608 return mkPCast32x4(mce, vatom); 4609 4610 case Iop_CmpwNEZ32: 4611 return mkPCastTo(mce, Ity_I32, vatom); 4612 4613 case Iop_CmpwNEZ64: 4614 return mkPCastTo(mce, Ity_I64, vatom); 4615 4616 case Iop_CmpNEZ64x2: 4617 case Iop_CipherSV128: 4618 case Iop_Clz64x2: 4619 case Iop_Abs64x2: 4620 case Iop_Ctz64x2: 4621 return mkPCast64x2(mce, vatom); 4622 4623 case Iop_PwBitMtxXpose64x2: 4624 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 4625 4626 case Iop_NarrowUn16to8x8: 4627 case Iop_NarrowUn32to16x4: 4628 case Iop_NarrowUn64to32x2: 4629 case Iop_QNarrowUn16Sto8Sx8: 4630 case Iop_QNarrowUn16Sto8Ux8: 4631 case Iop_QNarrowUn16Uto8Ux8: 4632 case Iop_QNarrowUn32Sto16Sx4: 4633 case Iop_QNarrowUn32Sto16Ux4: 4634 case Iop_QNarrowUn32Uto16Ux4: 4635 case Iop_QNarrowUn64Sto32Sx2: 4636 case Iop_QNarrowUn64Sto32Ux2: 4637 case Iop_QNarrowUn64Uto32Ux2: 4638 case Iop_F32toF16x4: 4639 return vectorNarrowUnV128(mce, op, vatom); 4640 4641 case Iop_Widen8Sto16x8: 4642 case Iop_Widen8Uto16x8: 4643 case Iop_Widen16Sto32x4: 4644 case Iop_Widen16Uto32x4: 4645 case Iop_Widen32Sto64x2: 4646 case Iop_Widen32Uto64x2: 4647 case Iop_F16toF32x4: 4648 return vectorWidenI64(mce, op, vatom); 4649 4650 case Iop_PwAddL32Ux2: 4651 case Iop_PwAddL32Sx2: 4652 return mkPCastTo(mce, Ity_I64, 4653 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 4654 4655 case Iop_PwAddL16Ux4: 4656 case Iop_PwAddL16Sx4: 4657 return mkPCast32x2(mce, 4658 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 4659 4660 case Iop_PwAddL8Ux8: 4661 case Iop_PwAddL8Sx8: 4662 return mkPCast16x4(mce, 4663 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 4664 4665 case Iop_PwAddL32Ux4: 4666 case Iop_PwAddL32Sx4: 4667 return mkPCast64x2(mce, 4668 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 4669 4670 case Iop_PwAddL16Ux8: 4671 case Iop_PwAddL16Sx8: 4672 return mkPCast32x4(mce, 4673 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 4674 4675 case Iop_PwAddL8Ux16: 4676 case Iop_PwAddL8Sx16: 4677 return mkPCast16x8(mce, 4678 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 4679 4680 case Iop_I64UtoF32: 4681 default: 4682 ppIROp(op); 4683 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 4684 } 4685 } 4686 4687 4688 /* Worker function -- do not call directly. See comments on 4689 expr2vbits_Load for the meaning of |guard|. 4690 4691 Generates IR to (1) perform a definedness test of |addr|, (2) 4692 perform a validity test of |addr|, and (3) return the Vbits for the 4693 location indicated by |addr|. All of this only happens when 4694 |guard| is NULL or |guard| evaluates to True at run time. 4695 4696 If |guard| evaluates to False at run time, the returned value is 4697 the IR-mandated 0x55..55 value, and no checks nor shadow loads are 4698 performed. 4699 4700 The definedness of |guard| itself is not checked. That is assumed 4701 to have been done before this point, by the caller. */ 4702 static 4703 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 4704 IREndness end, IRType ty, 4705 IRAtom* addr, UInt bias, IRAtom* guard ) 4706 { 4707 tl_assert(isOriginalAtom(mce,addr)); 4708 tl_assert(end == Iend_LE || end == Iend_BE); 4709 4710 /* First, emit a definedness test for the address. This also sets 4711 the address (shadow) to 'defined' following the test. */ 4712 complainIfUndefined( mce, addr, guard ); 4713 4714 /* Now cook up a call to the relevant helper function, to read the 4715 data V bits from shadow memory. */ 4716 ty = shadowTypeV(ty); 4717 4718 void* helper = NULL; 4719 const HChar* hname = NULL; 4720 Bool ret_via_outparam = False; 4721 4722 if (end == Iend_LE) { 4723 switch (ty) { 4724 case Ity_V256: helper = &MC_(helperc_LOADV256le); 4725 hname = "MC_(helperc_LOADV256le)"; 4726 ret_via_outparam = True; 4727 break; 4728 case Ity_V128: helper = &MC_(helperc_LOADV128le); 4729 hname = "MC_(helperc_LOADV128le)"; 4730 ret_via_outparam = True; 4731 break; 4732 case Ity_I64: helper = &MC_(helperc_LOADV64le); 4733 hname = "MC_(helperc_LOADV64le)"; 4734 break; 4735 case Ity_I32: helper = &MC_(helperc_LOADV32le); 4736 hname = "MC_(helperc_LOADV32le)"; 4737 break; 4738 case Ity_I16: helper = &MC_(helperc_LOADV16le); 4739 hname = "MC_(helperc_LOADV16le)"; 4740 break; 4741 case Ity_I8: helper = &MC_(helperc_LOADV8); 4742 hname = "MC_(helperc_LOADV8)"; 4743 break; 4744 default: ppIRType(ty); 4745 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)"); 4746 } 4747 } else { 4748 switch (ty) { 4749 case Ity_V256: helper = &MC_(helperc_LOADV256be); 4750 hname = "MC_(helperc_LOADV256be)"; 4751 ret_via_outparam = True; 4752 break; 4753 case Ity_V128: helper = &MC_(helperc_LOADV128be); 4754 hname = "MC_(helperc_LOADV128be)"; 4755 ret_via_outparam = True; 4756 break; 4757 case Ity_I64: helper = &MC_(helperc_LOADV64be); 4758 hname = "MC_(helperc_LOADV64be)"; 4759 break; 4760 case Ity_I32: helper = &MC_(helperc_LOADV32be); 4761 hname = "MC_(helperc_LOADV32be)"; 4762 break; 4763 case Ity_I16: helper = &MC_(helperc_LOADV16be); 4764 hname = "MC_(helperc_LOADV16be)"; 4765 break; 4766 case Ity_I8: helper = &MC_(helperc_LOADV8); 4767 hname = "MC_(helperc_LOADV8)"; 4768 break; 4769 default: ppIRType(ty); 4770 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)"); 4771 } 4772 } 4773 4774 tl_assert(helper); 4775 tl_assert(hname); 4776 4777 /* Generate the actual address into addrAct. */ 4778 IRAtom* addrAct; 4779 if (bias == 0) { 4780 addrAct = addr; 4781 } else { 4782 IROp mkAdd; 4783 IRAtom* eBias; 4784 IRType tyAddr = mce->hWordTy; 4785 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 4786 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 4787 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 4788 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 4789 } 4790 4791 /* We need to have a place to park the V bits we're just about to 4792 read. */ 4793 IRTemp datavbits = newTemp(mce, ty, VSh); 4794 4795 /* Here's the call. */ 4796 IRDirty* di; 4797 if (ret_via_outparam) { 4798 di = unsafeIRDirty_1_N( datavbits, 4799 2/*regparms*/, 4800 hname, VG_(fnptr_to_fnentry)( helper ), 4801 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) ); 4802 } else { 4803 di = unsafeIRDirty_1_N( datavbits, 4804 1/*regparms*/, 4805 hname, VG_(fnptr_to_fnentry)( helper ), 4806 mkIRExprVec_1( addrAct ) ); 4807 } 4808 4809 setHelperAnns( mce, di ); 4810 if (guard) { 4811 di->guard = guard; 4812 /* Ideally the didn't-happen return value here would be all-ones 4813 (all-undefined), so it'd be obvious if it got used 4814 inadvertently. We can get by with the IR-mandated default 4815 value (0b01 repeating, 0x55 etc) as that'll still look pretty 4816 undefined if it ever leaks out. */ 4817 } 4818 stmt( 'V', mce, IRStmt_Dirty(di) ); 4819 4820 return mkexpr(datavbits); 4821 } 4822 4823 4824 /* Generate IR to do a shadow load. The helper is expected to check 4825 the validity of the address and return the V bits for that address. 4826 This can optionally be controlled by a guard, which is assumed to 4827 be True if NULL. In the case where the guard is False at runtime, 4828 the helper will return the didn't-do-the-call value of 0x55..55. 4829 Since that means "completely undefined result", the caller of 4830 this function will need to fix up the result somehow in that 4831 case. 4832 4833 Caller of this function is also expected to have checked the 4834 definedness of |guard| before this point. 4835 */ 4836 static 4837 IRAtom* expr2vbits_Load ( MCEnv* mce, 4838 IREndness end, IRType ty, 4839 IRAtom* addr, UInt bias, 4840 IRAtom* guard ) 4841 { 4842 tl_assert(end == Iend_LE || end == Iend_BE); 4843 switch (shadowTypeV(ty)) { 4844 case Ity_I8: 4845 case Ity_I16: 4846 case Ity_I32: 4847 case Ity_I64: 4848 case Ity_V128: 4849 case Ity_V256: 4850 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard); 4851 default: 4852 VG_(tool_panic)("expr2vbits_Load"); 4853 } 4854 } 4855 4856 4857 /* The most general handler for guarded loads. Assumes the 4858 definedness of GUARD has already been checked by the caller. A 4859 GUARD of NULL is assumed to mean "always True". Generates code to 4860 check the definedness and validity of ADDR. 4861 4862 Generate IR to do a shadow load from ADDR and return the V bits. 4863 The loaded type is TY. The loaded data is then (shadow) widened by 4864 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD 4865 evaluates to False at run time then the returned Vbits are simply 4866 VALT instead. Note therefore that the argument type of VWIDEN must 4867 be TY and the result type of VWIDEN must equal the type of VALT. 4868 */ 4869 static 4870 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce, 4871 IREndness end, IRType ty, 4872 IRAtom* addr, UInt bias, 4873 IRAtom* guard, 4874 IROp vwiden, IRAtom* valt ) 4875 { 4876 /* Sanity check the conversion operation, and also set TYWIDE. */ 4877 IRType tyWide = Ity_INVALID; 4878 switch (vwiden) { 4879 case Iop_INVALID: 4880 tyWide = ty; 4881 break; 4882 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32: 4883 tyWide = Ity_I32; 4884 break; 4885 default: 4886 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General"); 4887 } 4888 4889 /* If the guard evaluates to True, this will hold the loaded V bits 4890 at TY. If the guard evaluates to False, this will be all 4891 ones, meaning "all undefined", in which case we will have to 4892 replace it using an ITE below. */ 4893 IRAtom* iftrue1 4894 = assignNew('V', mce, ty, 4895 expr2vbits_Load(mce, end, ty, addr, bias, guard)); 4896 /* Now (shadow-) widen the loaded V bits to the desired width. In 4897 the guard-is-False case, the allowable widening operators will 4898 in the worst case (unsigned widening) at least leave the 4899 pre-widened part as being marked all-undefined, and in the best 4900 case (signed widening) mark the whole widened result as 4901 undefined. Anyway, it doesn't matter really, since in this case 4902 we will replace said value with the default value |valt| using an 4903 ITE. */ 4904 IRAtom* iftrue2 4905 = vwiden == Iop_INVALID 4906 ? iftrue1 4907 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1)); 4908 /* These are the V bits we will return if the load doesn't take 4909 place. */ 4910 IRAtom* iffalse 4911 = valt; 4912 /* Prepare the cond for the ITE. Convert a NULL cond into 4913 something that iropt knows how to fold out later. */ 4914 IRAtom* cond 4915 = guard == NULL ? mkU1(1) : guard; 4916 /* And assemble the final result. */ 4917 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse)); 4918 } 4919 4920 4921 /* A simpler handler for guarded loads, in which there is no 4922 conversion operation, and the default V bit return (when the guard 4923 evaluates to False at runtime) is "all defined". If there is no 4924 guard expression or the guard is always TRUE this function behaves 4925 like expr2vbits_Load. It is assumed that definedness of GUARD has 4926 already been checked at the call site. */ 4927 static 4928 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce, 4929 IREndness end, IRType ty, 4930 IRAtom* addr, UInt bias, 4931 IRAtom *guard ) 4932 { 4933 return expr2vbits_Load_guarded_General( 4934 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty) 4935 ); 4936 } 4937 4938 4939 static 4940 IRAtom* expr2vbits_ITE ( MCEnv* mce, 4941 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse ) 4942 { 4943 IRAtom *vbitsC, *vbits0, *vbits1; 4944 IRType ty; 4945 /* Given ITE(cond, iftrue, iffalse), generate 4946 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#) 4947 That is, steer the V bits like the originals, but trash the 4948 result if the steering value is undefined. This gives 4949 lazy propagation. */ 4950 tl_assert(isOriginalAtom(mce, cond)); 4951 tl_assert(isOriginalAtom(mce, iftrue)); 4952 tl_assert(isOriginalAtom(mce, iffalse)); 4953 4954 vbitsC = expr2vbits(mce, cond); 4955 vbits1 = expr2vbits(mce, iftrue); 4956 vbits0 = expr2vbits(mce, iffalse); 4957 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 4958 4959 return 4960 mkUifU(mce, ty, assignNew('V', mce, ty, 4961 IRExpr_ITE(cond, vbits1, vbits0)), 4962 mkPCastTo(mce, ty, vbitsC) ); 4963 } 4964 4965 /* --------- This is the main expression-handling function. --------- */ 4966 4967 static 4968 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 4969 { 4970 switch (e->tag) { 4971 4972 case Iex_Get: 4973 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 4974 4975 case Iex_GetI: 4976 return shadow_GETI( mce, e->Iex.GetI.descr, 4977 e->Iex.GetI.ix, e->Iex.GetI.bias ); 4978 4979 case Iex_RdTmp: 4980 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 4981 4982 case Iex_Const: 4983 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 4984 4985 case Iex_Qop: 4986 return expr2vbits_Qop( 4987 mce, 4988 e->Iex.Qop.details->op, 4989 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2, 4990 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4 4991 ); 4992 4993 case Iex_Triop: 4994 return expr2vbits_Triop( 4995 mce, 4996 e->Iex.Triop.details->op, 4997 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2, 4998 e->Iex.Triop.details->arg3 4999 ); 5000 5001 case Iex_Binop: 5002 return expr2vbits_Binop( 5003 mce, 5004 e->Iex.Binop.op, 5005 e->Iex.Binop.arg1, e->Iex.Binop.arg2 5006 ); 5007 5008 case Iex_Unop: 5009 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 5010 5011 case Iex_Load: 5012 return expr2vbits_Load( mce, e->Iex.Load.end, 5013 e->Iex.Load.ty, 5014 e->Iex.Load.addr, 0/*addr bias*/, 5015 NULL/* guard == "always True"*/ ); 5016 5017 case Iex_CCall: 5018 return mkLazyN( mce, e->Iex.CCall.args, 5019 e->Iex.CCall.retty, 5020 e->Iex.CCall.cee ); 5021 5022 case Iex_ITE: 5023 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue, 5024 e->Iex.ITE.iffalse); 5025 5026 default: 5027 VG_(printf)("\n"); 5028 ppIRExpr(e); 5029 VG_(printf)("\n"); 5030 VG_(tool_panic)("memcheck: expr2vbits"); 5031 } 5032 } 5033 5034 /*------------------------------------------------------------*/ 5035 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 5036 /*------------------------------------------------------------*/ 5037 5038 /* Widen a value to the host word size. */ 5039 5040 static 5041 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 5042 { 5043 IRType ty, tyH; 5044 5045 /* vatom is vbits-value and as such can only have a shadow type. */ 5046 tl_assert(isShadowAtom(mce,vatom)); 5047 5048 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 5049 tyH = mce->hWordTy; 5050 5051 if (tyH == Ity_I32) { 5052 switch (ty) { 5053 case Ity_I32: 5054 return vatom; 5055 case Ity_I16: 5056 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 5057 case Ity_I8: 5058 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 5059 default: 5060 goto unhandled; 5061 } 5062 } else 5063 if (tyH == Ity_I64) { 5064 switch (ty) { 5065 case Ity_I32: 5066 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 5067 case Ity_I16: 5068 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 5069 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 5070 case Ity_I8: 5071 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 5072 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 5073 default: 5074 goto unhandled; 5075 } 5076 } else { 5077 goto unhandled; 5078 } 5079 unhandled: 5080 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 5081 VG_(tool_panic)("zwidenToHostWord"); 5082 } 5083 5084 5085 /* Generate a shadow store. |addr| is always the original address 5086 atom. You can pass in either originals or V-bits for the data 5087 atom, but obviously not both. This function generates a check for 5088 the definedness and (indirectly) the validity of |addr|, but only 5089 when |guard| evaluates to True at run time (or is NULL). 5090 5091 |guard| :: Ity_I1 controls whether the store really happens; NULL 5092 means it unconditionally does. Note that |guard| itself is not 5093 checked for definedness; the caller of this function must do that 5094 if necessary. 5095 */ 5096 static 5097 void do_shadow_Store ( MCEnv* mce, 5098 IREndness end, 5099 IRAtom* addr, UInt bias, 5100 IRAtom* data, IRAtom* vdata, 5101 IRAtom* guard ) 5102 { 5103 IROp mkAdd; 5104 IRType ty, tyAddr; 5105 void* helper = NULL; 5106 const HChar* hname = NULL; 5107 IRConst* c; 5108 5109 tyAddr = mce->hWordTy; 5110 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 5111 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 5112 tl_assert( end == Iend_LE || end == Iend_BE ); 5113 5114 if (data) { 5115 tl_assert(!vdata); 5116 tl_assert(isOriginalAtom(mce, data)); 5117 tl_assert(bias == 0); 5118 vdata = expr2vbits( mce, data ); 5119 } else { 5120 tl_assert(vdata); 5121 } 5122 5123 tl_assert(isOriginalAtom(mce,addr)); 5124 tl_assert(isShadowAtom(mce,vdata)); 5125 5126 if (guard) { 5127 tl_assert(isOriginalAtom(mce, guard)); 5128 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 5129 } 5130 5131 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 5132 5133 // If we're not doing undefined value checking, pretend that this value 5134 // is "all valid". That lets Vex's optimiser remove some of the V bit 5135 // shadow computation ops that precede it. 5136 if (MC_(clo_mc_level) == 1) { 5137 switch (ty) { 5138 case Ity_V256: // V256 weirdness -- used four times 5139 c = IRConst_V256(V_BITS32_DEFINED); break; 5140 case Ity_V128: // V128 weirdness -- used twice 5141 c = IRConst_V128(V_BITS16_DEFINED); break; 5142 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 5143 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 5144 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 5145 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 5146 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 5147 } 5148 vdata = IRExpr_Const( c ); 5149 } 5150 5151 /* First, emit a definedness test for the address. This also sets 5152 the address (shadow) to 'defined' following the test. Both of 5153 those actions are gated on |guard|. */ 5154 complainIfUndefined( mce, addr, guard ); 5155 5156 /* Now decide which helper function to call to write the data V 5157 bits into shadow memory. */ 5158 if (end == Iend_LE) { 5159 switch (ty) { 5160 case Ity_V256: /* we'll use the helper four times */ 5161 case Ity_V128: /* we'll use the helper twice */ 5162 case Ity_I64: helper = &MC_(helperc_STOREV64le); 5163 hname = "MC_(helperc_STOREV64le)"; 5164 break; 5165 case Ity_I32: helper = &MC_(helperc_STOREV32le); 5166 hname = "MC_(helperc_STOREV32le)"; 5167 break; 5168 case Ity_I16: helper = &MC_(helperc_STOREV16le); 5169 hname = "MC_(helperc_STOREV16le)"; 5170 break; 5171 case Ity_I8: helper = &MC_(helperc_STOREV8); 5172 hname = "MC_(helperc_STOREV8)"; 5173 break; 5174 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 5175 } 5176 } else { 5177 switch (ty) { 5178 case Ity_V128: /* we'll use the helper twice */ 5179 case Ity_I64: helper = &MC_(helperc_STOREV64be); 5180 hname = "MC_(helperc_STOREV64be)"; 5181 break; 5182 case Ity_I32: helper = &MC_(helperc_STOREV32be); 5183 hname = "MC_(helperc_STOREV32be)"; 5184 break; 5185 case Ity_I16: helper = &MC_(helperc_STOREV16be); 5186 hname = "MC_(helperc_STOREV16be)"; 5187 break; 5188 case Ity_I8: helper = &MC_(helperc_STOREV8); 5189 hname = "MC_(helperc_STOREV8)"; 5190 break; 5191 /* Note, no V256 case here, because no big-endian target that 5192 we support, has 256 vectors. */ 5193 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 5194 } 5195 } 5196 5197 if (UNLIKELY(ty == Ity_V256)) { 5198 5199 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with 5200 Q3 being the most significant lane. */ 5201 /* These are the offsets of the Qs in memory. */ 5202 Int offQ0, offQ1, offQ2, offQ3; 5203 5204 /* Various bits for constructing the 4 lane helper calls */ 5205 IRDirty *diQ0, *diQ1, *diQ2, *diQ3; 5206 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3; 5207 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3; 5208 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3; 5209 5210 if (end == Iend_LE) { 5211 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24; 5212 } else { 5213 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24; 5214 } 5215 5216 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0); 5217 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) ); 5218 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata)); 5219 diQ0 = unsafeIRDirty_0_N( 5220 1/*regparms*/, 5221 hname, VG_(fnptr_to_fnentry)( helper ), 5222 mkIRExprVec_2( addrQ0, vdataQ0 ) 5223 ); 5224 5225 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1); 5226 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) ); 5227 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata)); 5228 diQ1 = unsafeIRDirty_0_N( 5229 1/*regparms*/, 5230 hname, VG_(fnptr_to_fnentry)( helper ), 5231 mkIRExprVec_2( addrQ1, vdataQ1 ) 5232 ); 5233 5234 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2); 5235 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) ); 5236 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata)); 5237 diQ2 = unsafeIRDirty_0_N( 5238 1/*regparms*/, 5239 hname, VG_(fnptr_to_fnentry)( helper ), 5240 mkIRExprVec_2( addrQ2, vdataQ2 ) 5241 ); 5242 5243 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3); 5244 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) ); 5245 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata)); 5246 diQ3 = unsafeIRDirty_0_N( 5247 1/*regparms*/, 5248 hname, VG_(fnptr_to_fnentry)( helper ), 5249 mkIRExprVec_2( addrQ3, vdataQ3 ) 5250 ); 5251 5252 if (guard) 5253 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard; 5254 5255 setHelperAnns( mce, diQ0 ); 5256 setHelperAnns( mce, diQ1 ); 5257 setHelperAnns( mce, diQ2 ); 5258 setHelperAnns( mce, diQ3 ); 5259 stmt( 'V', mce, IRStmt_Dirty(diQ0) ); 5260 stmt( 'V', mce, IRStmt_Dirty(diQ1) ); 5261 stmt( 'V', mce, IRStmt_Dirty(diQ2) ); 5262 stmt( 'V', mce, IRStmt_Dirty(diQ3) ); 5263 5264 } 5265 else if (UNLIKELY(ty == Ity_V128)) { 5266 5267 /* V128-bit case */ 5268 /* See comment in next clause re 64-bit regparms */ 5269 /* also, need to be careful about endianness */ 5270 5271 Int offLo64, offHi64; 5272 IRDirty *diLo64, *diHi64; 5273 IRAtom *addrLo64, *addrHi64; 5274 IRAtom *vdataLo64, *vdataHi64; 5275 IRAtom *eBiasLo64, *eBiasHi64; 5276 5277 if (end == Iend_LE) { 5278 offLo64 = 0; 5279 offHi64 = 8; 5280 } else { 5281 offLo64 = 8; 5282 offHi64 = 0; 5283 } 5284 5285 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 5286 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 5287 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 5288 diLo64 = unsafeIRDirty_0_N( 5289 1/*regparms*/, 5290 hname, VG_(fnptr_to_fnentry)( helper ), 5291 mkIRExprVec_2( addrLo64, vdataLo64 ) 5292 ); 5293 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 5294 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 5295 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 5296 diHi64 = unsafeIRDirty_0_N( 5297 1/*regparms*/, 5298 hname, VG_(fnptr_to_fnentry)( helper ), 5299 mkIRExprVec_2( addrHi64, vdataHi64 ) 5300 ); 5301 if (guard) diLo64->guard = guard; 5302 if (guard) diHi64->guard = guard; 5303 setHelperAnns( mce, diLo64 ); 5304 setHelperAnns( mce, diHi64 ); 5305 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 5306 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 5307 5308 } else { 5309 5310 IRDirty *di; 5311 IRAtom *addrAct; 5312 5313 /* 8/16/32/64-bit cases */ 5314 /* Generate the actual address into addrAct. */ 5315 if (bias == 0) { 5316 addrAct = addr; 5317 } else { 5318 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 5319 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 5320 } 5321 5322 if (ty == Ity_I64) { 5323 /* We can't do this with regparm 2 on 32-bit platforms, since 5324 the back ends aren't clever enough to handle 64-bit 5325 regparm args. Therefore be different. */ 5326 di = unsafeIRDirty_0_N( 5327 1/*regparms*/, 5328 hname, VG_(fnptr_to_fnentry)( helper ), 5329 mkIRExprVec_2( addrAct, vdata ) 5330 ); 5331 } else { 5332 di = unsafeIRDirty_0_N( 5333 2/*regparms*/, 5334 hname, VG_(fnptr_to_fnentry)( helper ), 5335 mkIRExprVec_2( addrAct, 5336 zwidenToHostWord( mce, vdata )) 5337 ); 5338 } 5339 if (guard) di->guard = guard; 5340 setHelperAnns( mce, di ); 5341 stmt( 'V', mce, IRStmt_Dirty(di) ); 5342 } 5343 5344 } 5345 5346 5347 /* Do lazy pessimistic propagation through a dirty helper call, by 5348 looking at the annotations on it. This is the most complex part of 5349 Memcheck. */ 5350 5351 static IRType szToITy ( Int n ) 5352 { 5353 switch (n) { 5354 case 1: return Ity_I8; 5355 case 2: return Ity_I16; 5356 case 4: return Ity_I32; 5357 case 8: return Ity_I64; 5358 default: VG_(tool_panic)("szToITy(memcheck)"); 5359 } 5360 } 5361 5362 static 5363 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 5364 { 5365 Int i, k, n, toDo, gSz, gOff; 5366 IRAtom *src, *here, *curr; 5367 IRType tySrc, tyDst; 5368 IRTemp dst; 5369 IREndness end; 5370 5371 /* What's the native endianness? We need to know this. */ 5372 # if defined(VG_BIGENDIAN) 5373 end = Iend_BE; 5374 # elif defined(VG_LITTLEENDIAN) 5375 end = Iend_LE; 5376 # else 5377 # error "Unknown endianness" 5378 # endif 5379 5380 /* First check the guard. */ 5381 complainIfUndefined(mce, d->guard, NULL); 5382 5383 /* Now round up all inputs and PCast over them. */ 5384 curr = definedOfType(Ity_I32); 5385 5386 /* Inputs: unmasked args 5387 Note: arguments are evaluated REGARDLESS of the guard expression */ 5388 for (i = 0; d->args[i]; i++) { 5389 IRAtom* arg = d->args[i]; 5390 if ( (d->cee->mcx_mask & (1<<i)) 5391 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) { 5392 /* ignore this arg */ 5393 } else { 5394 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) ); 5395 curr = mkUifU32(mce, here, curr); 5396 } 5397 } 5398 5399 /* Inputs: guest state that we read. */ 5400 for (i = 0; i < d->nFxState; i++) { 5401 tl_assert(d->fxState[i].fx != Ifx_None); 5402 if (d->fxState[i].fx == Ifx_Write) 5403 continue; 5404 5405 /* Enumerate the described state segments */ 5406 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 5407 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 5408 gSz = d->fxState[i].size; 5409 5410 /* Ignore any sections marked as 'always defined'. */ 5411 if (isAlwaysDefd(mce, gOff, gSz)) { 5412 if (0) 5413 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 5414 gOff, gSz); 5415 continue; 5416 } 5417 5418 /* This state element is read or modified. So we need to 5419 consider it. If larger than 8 bytes, deal with it in 5420 8-byte chunks. */ 5421 while (True) { 5422 tl_assert(gSz >= 0); 5423 if (gSz == 0) break; 5424 n = gSz <= 8 ? gSz : 8; 5425 /* update 'curr' with UifU of the state slice 5426 gOff .. gOff+n-1 */ 5427 tySrc = szToITy( n ); 5428 5429 /* Observe the guard expression. If it is false use an 5430 all-bits-defined bit pattern */ 5431 IRAtom *cond, *iffalse, *iftrue; 5432 5433 cond = assignNew('V', mce, Ity_I1, d->guard); 5434 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc)); 5435 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc)); 5436 src = assignNew('V', mce, tySrc, 5437 IRExpr_ITE(cond, iftrue, iffalse)); 5438 5439 here = mkPCastTo( mce, Ity_I32, src ); 5440 curr = mkUifU32(mce, here, curr); 5441 gSz -= n; 5442 gOff += n; 5443 } 5444 } 5445 } 5446 5447 /* Inputs: memory. First set up some info needed regardless of 5448 whether we're doing reads or writes. */ 5449 5450 if (d->mFx != Ifx_None) { 5451 /* Because we may do multiple shadow loads/stores from the same 5452 base address, it's best to do a single test of its 5453 definedness right now. Post-instrumentation optimisation 5454 should remove all but this test. */ 5455 IRType tyAddr; 5456 tl_assert(d->mAddr); 5457 complainIfUndefined(mce, d->mAddr, d->guard); 5458 5459 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 5460 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 5461 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 5462 } 5463 5464 /* Deal with memory inputs (reads or modifies) */ 5465 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 5466 toDo = d->mSize; 5467 /* chew off 32-bit chunks. We don't care about the endianness 5468 since it's all going to be condensed down to a single bit, 5469 but nevertheless choose an endianness which is hopefully 5470 native to the platform. */ 5471 while (toDo >= 4) { 5472 here = mkPCastTo( 5473 mce, Ity_I32, 5474 expr2vbits_Load_guarded_Simple( 5475 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard ) 5476 ); 5477 curr = mkUifU32(mce, here, curr); 5478 toDo -= 4; 5479 } 5480 /* chew off 16-bit chunks */ 5481 while (toDo >= 2) { 5482 here = mkPCastTo( 5483 mce, Ity_I32, 5484 expr2vbits_Load_guarded_Simple( 5485 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard ) 5486 ); 5487 curr = mkUifU32(mce, here, curr); 5488 toDo -= 2; 5489 } 5490 /* chew off the remaining 8-bit chunk, if any */ 5491 if (toDo == 1) { 5492 here = mkPCastTo( 5493 mce, Ity_I32, 5494 expr2vbits_Load_guarded_Simple( 5495 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard ) 5496 ); 5497 curr = mkUifU32(mce, here, curr); 5498 toDo -= 1; 5499 } 5500 tl_assert(toDo == 0); 5501 } 5502 5503 /* Whew! So curr is a 32-bit V-value summarising pessimistically 5504 all the inputs to the helper. Now we need to re-distribute the 5505 results to all destinations. */ 5506 5507 /* Outputs: the destination temporary, if there is one. */ 5508 if (d->tmp != IRTemp_INVALID) { 5509 dst = findShadowTmpV(mce, d->tmp); 5510 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 5511 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 5512 } 5513 5514 /* Outputs: guest state that we write or modify. */ 5515 for (i = 0; i < d->nFxState; i++) { 5516 tl_assert(d->fxState[i].fx != Ifx_None); 5517 if (d->fxState[i].fx == Ifx_Read) 5518 continue; 5519 5520 /* Enumerate the described state segments */ 5521 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 5522 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 5523 gSz = d->fxState[i].size; 5524 5525 /* Ignore any sections marked as 'always defined'. */ 5526 if (isAlwaysDefd(mce, gOff, gSz)) 5527 continue; 5528 5529 /* This state element is written or modified. So we need to 5530 consider it. If larger than 8 bytes, deal with it in 5531 8-byte chunks. */ 5532 while (True) { 5533 tl_assert(gSz >= 0); 5534 if (gSz == 0) break; 5535 n = gSz <= 8 ? gSz : 8; 5536 /* Write suitably-casted 'curr' to the state slice 5537 gOff .. gOff+n-1 */ 5538 tyDst = szToITy( n ); 5539 do_shadow_PUT( mce, gOff, 5540 NULL, /* original atom */ 5541 mkPCastTo( mce, tyDst, curr ), d->guard ); 5542 gSz -= n; 5543 gOff += n; 5544 } 5545 } 5546 } 5547 5548 /* Outputs: memory that we write or modify. Same comments about 5549 endianness as above apply. */ 5550 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 5551 toDo = d->mSize; 5552 /* chew off 32-bit chunks */ 5553 while (toDo >= 4) { 5554 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5555 NULL, /* original data */ 5556 mkPCastTo( mce, Ity_I32, curr ), 5557 d->guard ); 5558 toDo -= 4; 5559 } 5560 /* chew off 16-bit chunks */ 5561 while (toDo >= 2) { 5562 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5563 NULL, /* original data */ 5564 mkPCastTo( mce, Ity_I16, curr ), 5565 d->guard ); 5566 toDo -= 2; 5567 } 5568 /* chew off the remaining 8-bit chunk, if any */ 5569 if (toDo == 1) { 5570 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5571 NULL, /* original data */ 5572 mkPCastTo( mce, Ity_I8, curr ), 5573 d->guard ); 5574 toDo -= 1; 5575 } 5576 tl_assert(toDo == 0); 5577 } 5578 5579 } 5580 5581 5582 /* We have an ABI hint telling us that [base .. base+len-1] is to 5583 become undefined ("writable"). Generate code to call a helper to 5584 notify the A/V bit machinery of this fact. 5585 5586 We call 5587 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 5588 Addr nia ); 5589 */ 5590 static 5591 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 5592 { 5593 IRDirty* di; 5594 5595 if (MC_(clo_mc_level) == 3) { 5596 di = unsafeIRDirty_0_N( 5597 3/*regparms*/, 5598 "MC_(helperc_MAKE_STACK_UNINIT_w_o)", 5599 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_w_o) ), 5600 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 5601 ); 5602 } else { 5603 /* We ignore the supplied nia, since it is irrelevant. */ 5604 tl_assert(MC_(clo_mc_level) == 2 || MC_(clo_mc_level) == 1); 5605 /* Special-case the len==128 case, since that is for amd64-ELF, 5606 which is a very common target. */ 5607 if (len == 128) { 5608 di = unsafeIRDirty_0_N( 5609 1/*regparms*/, 5610 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)", 5611 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o)), 5612 mkIRExprVec_1( base ) 5613 ); 5614 } else { 5615 di = unsafeIRDirty_0_N( 5616 2/*regparms*/, 5617 "MC_(helperc_MAKE_STACK_UNINIT_no_o)", 5618 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_no_o) ), 5619 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) ) 5620 ); 5621 } 5622 } 5623 5624 stmt( 'V', mce, IRStmt_Dirty(di) ); 5625 } 5626 5627 5628 /* ------ Dealing with IRCAS (big and complex) ------ */ 5629 5630 /* FWDS */ 5631 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5632 IRAtom* baseaddr, Int offset ); 5633 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 5634 static void gen_store_b ( MCEnv* mce, Int szB, 5635 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5636 IRAtom* guard ); 5637 5638 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 5639 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 5640 5641 5642 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 5643 IRExpr.Consts, else this asserts. If they are both Consts, it 5644 doesn't do anything. So that just leaves the RdTmp case. 5645 5646 In which case: this assigns the shadow value SHADOW to the IR 5647 shadow temporary associated with ORIG. That is, ORIG, being an 5648 original temporary, will have a shadow temporary associated with 5649 it. However, in the case envisaged here, there will so far have 5650 been no IR emitted to actually write a shadow value into that 5651 temporary. What this routine does is to (emit IR to) copy the 5652 value in SHADOW into said temporary, so that after this call, 5653 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 5654 value in SHADOW. 5655 5656 Point is to allow callers to compute "by hand" a shadow value for 5657 ORIG, and force it to be associated with ORIG. 5658 5659 How do we know that that shadow associated with ORIG has not so far 5660 been assigned to? Well, we don't per se know that, but supposing 5661 it had. Then this routine would create a second assignment to it, 5662 and later the IR sanity checker would barf. But that never 5663 happens. QED. 5664 */ 5665 static void bind_shadow_tmp_to_orig ( UChar how, 5666 MCEnv* mce, 5667 IRAtom* orig, IRAtom* shadow ) 5668 { 5669 tl_assert(isOriginalAtom(mce, orig)); 5670 tl_assert(isShadowAtom(mce, shadow)); 5671 switch (orig->tag) { 5672 case Iex_Const: 5673 tl_assert(shadow->tag == Iex_Const); 5674 break; 5675 case Iex_RdTmp: 5676 tl_assert(shadow->tag == Iex_RdTmp); 5677 if (how == 'V') { 5678 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 5679 shadow); 5680 } else { 5681 tl_assert(how == 'B'); 5682 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 5683 shadow); 5684 } 5685 break; 5686 default: 5687 tl_assert(0); 5688 } 5689 } 5690 5691 5692 static 5693 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 5694 { 5695 /* Scheme is (both single- and double- cases): 5696 5697 1. fetch data#,dataB (the proposed new value) 5698 5699 2. fetch expd#,expdB (what we expect to see at the address) 5700 5701 3. check definedness of address 5702 5703 4. load old#,oldB from shadow memory; this also checks 5704 addressibility of the address 5705 5706 5. the CAS itself 5707 5708 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 5709 5710 7. if "expected == old" (as computed by (6)) 5711 store data#,dataB to shadow memory 5712 5713 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 5714 'data' but 7 stores 'data#'. Hence it is possible for the 5715 shadow data to be incorrectly checked and/or updated: 5716 5717 * 7 is at least gated correctly, since the 'expected == old' 5718 condition is derived from outputs of 5. However, the shadow 5719 write could happen too late: imagine after 5 we are 5720 descheduled, a different thread runs, writes a different 5721 (shadow) value at the address, and then we resume, hence 5722 overwriting the shadow value written by the other thread. 5723 5724 Because the original memory access is atomic, there's no way to 5725 make both the original and shadow accesses into a single atomic 5726 thing, hence this is unavoidable. 5727 5728 At least as Valgrind stands, I don't think it's a problem, since 5729 we're single threaded *and* we guarantee that there are no 5730 context switches during the execution of any specific superblock 5731 -- context switches can only happen at superblock boundaries. 5732 5733 If Valgrind ever becomes MT in the future, then it might be more 5734 of a problem. A possible kludge would be to artificially 5735 associate with the location, a lock, which we must acquire and 5736 release around the transaction as a whole. Hmm, that probably 5737 would't work properly since it only guards us against other 5738 threads doing CASs on the same location, not against other 5739 threads doing normal reads and writes. 5740 5741 ------------------------------------------------------------ 5742 5743 COMMENT_ON_CasCmpEQ: 5744 5745 Note two things. Firstly, in the sequence above, we compute 5746 "expected == old", but we don't check definedness of it. Why 5747 not? Also, the x86 and amd64 front ends use 5748 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent 5749 determination (expected == old ?) for themselves, and we also 5750 don't check definedness for those primops; we just say that the 5751 result is defined. Why? Details follow. 5752 5753 x86/amd64 contains various forms of locked insns: 5754 * lock prefix before all basic arithmetic insn; 5755 eg lock xorl %reg1,(%reg2) 5756 * atomic exchange reg-mem 5757 * compare-and-swaps 5758 5759 Rather than attempt to represent them all, which would be a 5760 royal PITA, I used a result from Maurice Herlihy 5761 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 5762 demonstrates that compare-and-swap is a primitive more general 5763 than the other two, and so can be used to represent all of them. 5764 So the translation scheme for (eg) lock incl (%reg) is as 5765 follows: 5766 5767 again: 5768 old = * %reg 5769 new = old + 1 5770 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 5771 5772 The "atomically" is the CAS bit. The scheme is always the same: 5773 get old value from memory, compute new value, atomically stuff 5774 new value back in memory iff the old value has not changed (iow, 5775 no other thread modified it in the meantime). If it has changed 5776 then we've been out-raced and we have to start over. 5777 5778 Now that's all very neat, but it has the bad side effect of 5779 introducing an explicit equality test into the translation. 5780 Consider the behaviour of said code on a memory location which 5781 is uninitialised. We will wind up doing a comparison on 5782 uninitialised data, and mc duly complains. 5783 5784 What's difficult about this is, the common case is that the 5785 location is uncontended, and so we're usually comparing the same 5786 value (* %reg) with itself. So we shouldn't complain even if it 5787 is undefined. But mc doesn't know that. 5788 5789 My solution is to mark the == in the IR specially, so as to tell 5790 mc that it almost certainly compares a value with itself, and we 5791 should just regard the result as always defined. Rather than 5792 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 5793 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 5794 5795 So there's always the question of, can this give a false 5796 negative? eg, imagine that initially, * %reg is defined; and we 5797 read that; but then in the gap between the read and the CAS, a 5798 different thread writes an undefined (and different) value at 5799 the location. Then the CAS in this thread will fail and we will 5800 go back to "again:", but without knowing that the trip back 5801 there was based on an undefined comparison. No matter; at least 5802 the other thread won the race and the location is correctly 5803 marked as undefined. What if it wrote an uninitialised version 5804 of the same value that was there originally, though? 5805 5806 etc etc. Seems like there's a small corner case in which we 5807 might lose the fact that something's defined -- we're out-raced 5808 in between the "old = * reg" and the "atomically {", _and_ the 5809 other thread is writing in an undefined version of what's 5810 already there. Well, that seems pretty unlikely. 5811 5812 --- 5813 5814 If we ever need to reinstate it .. code which generates a 5815 definedness test for "expected == old" was removed at r10432 of 5816 this file. 5817 */ 5818 if (cas->oldHi == IRTemp_INVALID) { 5819 do_shadow_CAS_single( mce, cas ); 5820 } else { 5821 do_shadow_CAS_double( mce, cas ); 5822 } 5823 } 5824 5825 5826 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 5827 { 5828 IRAtom *vdataLo = NULL, *bdataLo = NULL; 5829 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 5830 IRAtom *voldLo = NULL, *boldLo = NULL; 5831 IRAtom *expd_eq_old = NULL; 5832 IROp opCasCmpEQ; 5833 Int elemSzB; 5834 IRType elemTy; 5835 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 5836 5837 /* single CAS */ 5838 tl_assert(cas->oldHi == IRTemp_INVALID); 5839 tl_assert(cas->expdHi == NULL); 5840 tl_assert(cas->dataHi == NULL); 5841 5842 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 5843 switch (elemTy) { 5844 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 5845 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 5846 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 5847 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 5848 default: tl_assert(0); /* IR defn disallows any other types */ 5849 } 5850 5851 /* 1. fetch data# (the proposed new value) */ 5852 tl_assert(isOriginalAtom(mce, cas->dataLo)); 5853 vdataLo 5854 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 5855 tl_assert(isShadowAtom(mce, vdataLo)); 5856 if (otrak) { 5857 bdataLo 5858 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 5859 tl_assert(isShadowAtom(mce, bdataLo)); 5860 } 5861 5862 /* 2. fetch expected# (what we expect to see at the address) */ 5863 tl_assert(isOriginalAtom(mce, cas->expdLo)); 5864 vexpdLo 5865 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5866 tl_assert(isShadowAtom(mce, vexpdLo)); 5867 if (otrak) { 5868 bexpdLo 5869 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5870 tl_assert(isShadowAtom(mce, bexpdLo)); 5871 } 5872 5873 /* 3. check definedness of address */ 5874 /* 4. fetch old# from shadow memory; this also checks 5875 addressibility of the address */ 5876 voldLo 5877 = assignNew( 5878 'V', mce, elemTy, 5879 expr2vbits_Load( 5880 mce, 5881 cas->end, elemTy, cas->addr, 0/*Addr bias*/, 5882 NULL/*always happens*/ 5883 )); 5884 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5885 if (otrak) { 5886 boldLo 5887 = assignNew('B', mce, Ity_I32, 5888 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 5889 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5890 } 5891 5892 /* 5. the CAS itself */ 5893 stmt( 'C', mce, IRStmt_CAS(cas) ); 5894 5895 /* 6. compute "expected == old" */ 5896 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5897 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5898 tree, but it's not copied from the input block. */ 5899 expd_eq_old 5900 = assignNew('C', mce, Ity_I1, 5901 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 5902 5903 /* 7. if "expected == old" 5904 store data# to shadow memory */ 5905 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 5906 NULL/*data*/, vdataLo/*vdata*/, 5907 expd_eq_old/*guard for store*/ ); 5908 if (otrak) { 5909 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 5910 bdataLo/*bdata*/, 5911 expd_eq_old/*guard for store*/ ); 5912 } 5913 } 5914 5915 5916 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 5917 { 5918 IRAtom *vdataHi = NULL, *bdataHi = NULL; 5919 IRAtom *vdataLo = NULL, *bdataLo = NULL; 5920 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 5921 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 5922 IRAtom *voldHi = NULL, *boldHi = NULL; 5923 IRAtom *voldLo = NULL, *boldLo = NULL; 5924 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 5925 IRAtom *expd_eq_old = NULL, *zero = NULL; 5926 IROp opCasCmpEQ, opOr, opXor; 5927 Int elemSzB, memOffsLo, memOffsHi; 5928 IRType elemTy; 5929 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 5930 5931 /* double CAS */ 5932 tl_assert(cas->oldHi != IRTemp_INVALID); 5933 tl_assert(cas->expdHi != NULL); 5934 tl_assert(cas->dataHi != NULL); 5935 5936 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 5937 switch (elemTy) { 5938 case Ity_I8: 5939 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 5940 elemSzB = 1; zero = mkU8(0); 5941 break; 5942 case Ity_I16: 5943 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 5944 elemSzB = 2; zero = mkU16(0); 5945 break; 5946 case Ity_I32: 5947 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 5948 elemSzB = 4; zero = mkU32(0); 5949 break; 5950 case Ity_I64: 5951 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 5952 elemSzB = 8; zero = mkU64(0); 5953 break; 5954 default: 5955 tl_assert(0); /* IR defn disallows any other types */ 5956 } 5957 5958 /* 1. fetch data# (the proposed new value) */ 5959 tl_assert(isOriginalAtom(mce, cas->dataHi)); 5960 tl_assert(isOriginalAtom(mce, cas->dataLo)); 5961 vdataHi 5962 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 5963 vdataLo 5964 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 5965 tl_assert(isShadowAtom(mce, vdataHi)); 5966 tl_assert(isShadowAtom(mce, vdataLo)); 5967 if (otrak) { 5968 bdataHi 5969 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 5970 bdataLo 5971 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 5972 tl_assert(isShadowAtom(mce, bdataHi)); 5973 tl_assert(isShadowAtom(mce, bdataLo)); 5974 } 5975 5976 /* 2. fetch expected# (what we expect to see at the address) */ 5977 tl_assert(isOriginalAtom(mce, cas->expdHi)); 5978 tl_assert(isOriginalAtom(mce, cas->expdLo)); 5979 vexpdHi 5980 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 5981 vexpdLo 5982 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5983 tl_assert(isShadowAtom(mce, vexpdHi)); 5984 tl_assert(isShadowAtom(mce, vexpdLo)); 5985 if (otrak) { 5986 bexpdHi 5987 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 5988 bexpdLo 5989 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5990 tl_assert(isShadowAtom(mce, bexpdHi)); 5991 tl_assert(isShadowAtom(mce, bexpdLo)); 5992 } 5993 5994 /* 3. check definedness of address */ 5995 /* 4. fetch old# from shadow memory; this also checks 5996 addressibility of the address */ 5997 if (cas->end == Iend_LE) { 5998 memOffsLo = 0; 5999 memOffsHi = elemSzB; 6000 } else { 6001 tl_assert(cas->end == Iend_BE); 6002 memOffsLo = elemSzB; 6003 memOffsHi = 0; 6004 } 6005 voldHi 6006 = assignNew( 6007 'V', mce, elemTy, 6008 expr2vbits_Load( 6009 mce, 6010 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/, 6011 NULL/*always happens*/ 6012 )); 6013 voldLo 6014 = assignNew( 6015 'V', mce, elemTy, 6016 expr2vbits_Load( 6017 mce, 6018 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/, 6019 NULL/*always happens*/ 6020 )); 6021 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 6022 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 6023 if (otrak) { 6024 boldHi 6025 = assignNew('B', mce, Ity_I32, 6026 gen_load_b(mce, elemSzB, cas->addr, 6027 memOffsHi/*addr bias*/)); 6028 boldLo 6029 = assignNew('B', mce, Ity_I32, 6030 gen_load_b(mce, elemSzB, cas->addr, 6031 memOffsLo/*addr bias*/)); 6032 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 6033 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 6034 } 6035 6036 /* 5. the CAS itself */ 6037 stmt( 'C', mce, IRStmt_CAS(cas) ); 6038 6039 /* 6. compute "expected == old" */ 6040 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 6041 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 6042 tree, but it's not copied from the input block. */ 6043 /* 6044 xHi = oldHi ^ expdHi; 6045 xLo = oldLo ^ expdLo; 6046 xHL = xHi | xLo; 6047 expd_eq_old = xHL == 0; 6048 */ 6049 xHi = assignNew('C', mce, elemTy, 6050 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 6051 xLo = assignNew('C', mce, elemTy, 6052 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 6053 xHL = assignNew('C', mce, elemTy, 6054 binop(opOr, xHi, xLo)); 6055 expd_eq_old 6056 = assignNew('C', mce, Ity_I1, 6057 binop(opCasCmpEQ, xHL, zero)); 6058 6059 /* 7. if "expected == old" 6060 store data# to shadow memory */ 6061 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 6062 NULL/*data*/, vdataHi/*vdata*/, 6063 expd_eq_old/*guard for store*/ ); 6064 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 6065 NULL/*data*/, vdataLo/*vdata*/, 6066 expd_eq_old/*guard for store*/ ); 6067 if (otrak) { 6068 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 6069 bdataHi/*bdata*/, 6070 expd_eq_old/*guard for store*/ ); 6071 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 6072 bdataLo/*bdata*/, 6073 expd_eq_old/*guard for store*/ ); 6074 } 6075 } 6076 6077 6078 /* ------ Dealing with LL/SC (not difficult) ------ */ 6079 6080 static void do_shadow_LLSC ( MCEnv* mce, 6081 IREndness stEnd, 6082 IRTemp stResult, 6083 IRExpr* stAddr, 6084 IRExpr* stStoredata ) 6085 { 6086 /* In short: treat a load-linked like a normal load followed by an 6087 assignment of the loaded (shadow) data to the result temporary. 6088 Treat a store-conditional like a normal store, and mark the 6089 result temporary as defined. */ 6090 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 6091 IRTemp resTmp = findShadowTmpV(mce, stResult); 6092 6093 tl_assert(isIRAtom(stAddr)); 6094 if (stStoredata) 6095 tl_assert(isIRAtom(stStoredata)); 6096 6097 if (stStoredata == NULL) { 6098 /* Load Linked */ 6099 /* Just treat this as a normal load, followed by an assignment of 6100 the value to .result. */ 6101 /* Stay sane */ 6102 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 6103 || resTy == Ity_I16 || resTy == Ity_I8); 6104 assign( 'V', mce, resTmp, 6105 expr2vbits_Load( 6106 mce, stEnd, resTy, stAddr, 0/*addr bias*/, 6107 NULL/*always happens*/) ); 6108 } else { 6109 /* Store Conditional */ 6110 /* Stay sane */ 6111 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 6112 stStoredata); 6113 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 6114 || dataTy == Ity_I16 || dataTy == Ity_I8); 6115 do_shadow_Store( mce, stEnd, 6116 stAddr, 0/* addr bias */, 6117 stStoredata, 6118 NULL /* shadow data */, 6119 NULL/*guard*/ ); 6120 /* This is a store conditional, so it writes to .result a value 6121 indicating whether or not the store succeeded. Just claim 6122 this value is always defined. In the PowerPC interpretation 6123 of store-conditional, definedness of the success indication 6124 depends on whether the address of the store matches the 6125 reservation address. But we can't tell that here (and 6126 anyway, we're not being PowerPC-specific). At least we are 6127 guaranteed that the definedness of the store address, and its 6128 addressibility, will be checked as per normal. So it seems 6129 pretty safe to just say that the success indication is always 6130 defined. 6131 6132 In schemeS, for origin tracking, we must correspondingly set 6133 a no-origin value for the origin shadow of .result. 6134 */ 6135 tl_assert(resTy == Ity_I1); 6136 assign( 'V', mce, resTmp, definedOfType(resTy) ); 6137 } 6138 } 6139 6140 6141 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */ 6142 6143 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg ) 6144 { 6145 complainIfUndefined(mce, sg->guard, NULL); 6146 /* do_shadow_Store will generate code to check the definedness and 6147 validity of sg->addr, in the case where sg->guard evaluates to 6148 True at run-time. */ 6149 do_shadow_Store( mce, sg->end, 6150 sg->addr, 0/* addr bias */, 6151 sg->data, 6152 NULL /* shadow data */, 6153 sg->guard ); 6154 } 6155 6156 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg ) 6157 { 6158 complainIfUndefined(mce, lg->guard, NULL); 6159 /* expr2vbits_Load_guarded_General will generate code to check the 6160 definedness and validity of lg->addr, in the case where 6161 lg->guard evaluates to True at run-time. */ 6162 6163 /* Look at the LoadG's built-in conversion operation, to determine 6164 the source (actual loaded data) type, and the equivalent IROp. 6165 NOTE that implicitly we are taking a widening operation to be 6166 applied to original atoms and producing one that applies to V 6167 bits. Since signed and unsigned widening are self-shadowing, 6168 this is a straight copy of the op (modulo swapping from the 6169 IRLoadGOp form to the IROp form). Note also therefore that this 6170 implicitly duplicates the logic to do with said widening ops in 6171 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */ 6172 IROp vwiden = Iop_INVALID; 6173 IRType loadedTy = Ity_INVALID; 6174 switch (lg->cvt) { 6175 case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break; 6176 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break; 6177 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break; 6178 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break; 6179 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break; 6180 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break; 6181 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break; 6182 default: VG_(tool_panic)("do_shadow_LoadG"); 6183 } 6184 6185 IRAtom* vbits_alt 6186 = expr2vbits( mce, lg->alt ); 6187 IRAtom* vbits_final 6188 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy, 6189 lg->addr, 0/*addr bias*/, 6190 lg->guard, vwiden, vbits_alt ); 6191 /* And finally, bind the V bits to the destination temporary. */ 6192 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final ); 6193 } 6194 6195 6196 /*------------------------------------------------------------*/ 6197 /*--- Memcheck main ---*/ 6198 /*------------------------------------------------------------*/ 6199 6200 static void schemeS ( MCEnv* mce, IRStmt* st ); 6201 6202 static Bool isBogusAtom ( IRAtom* at ) 6203 { 6204 ULong n = 0; 6205 IRConst* con; 6206 tl_assert(isIRAtom(at)); 6207 if (at->tag == Iex_RdTmp) 6208 return False; 6209 tl_assert(at->tag == Iex_Const); 6210 con = at->Iex.Const.con; 6211 switch (con->tag) { 6212 case Ico_U1: return False; 6213 case Ico_U8: n = (ULong)con->Ico.U8; break; 6214 case Ico_U16: n = (ULong)con->Ico.U16; break; 6215 case Ico_U32: n = (ULong)con->Ico.U32; break; 6216 case Ico_U64: n = (ULong)con->Ico.U64; break; 6217 case Ico_F32: return False; 6218 case Ico_F64: return False; 6219 case Ico_F32i: return False; 6220 case Ico_F64i: return False; 6221 case Ico_V128: return False; 6222 case Ico_V256: return False; 6223 default: ppIRExpr(at); tl_assert(0); 6224 } 6225 /* VG_(printf)("%llx\n", n); */ 6226 return (/*32*/ n == 0xFEFEFEFFULL 6227 /*32*/ || n == 0x80808080ULL 6228 /*32*/ || n == 0x7F7F7F7FULL 6229 /*32*/ || n == 0x7EFEFEFFULL 6230 /*32*/ || n == 0x81010100ULL 6231 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 6232 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 6233 /*64*/ || n == 0x0000000000008080ULL 6234 /*64*/ || n == 0x8080808080808080ULL 6235 /*64*/ || n == 0x0101010101010101ULL 6236 ); 6237 } 6238 6239 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 6240 { 6241 Int i; 6242 IRExpr* e; 6243 IRDirty* d; 6244 IRCAS* cas; 6245 switch (st->tag) { 6246 case Ist_WrTmp: 6247 e = st->Ist.WrTmp.data; 6248 switch (e->tag) { 6249 case Iex_Get: 6250 case Iex_RdTmp: 6251 return False; 6252 case Iex_Const: 6253 return isBogusAtom(e); 6254 case Iex_Unop: 6255 return isBogusAtom(e->Iex.Unop.arg) 6256 || e->Iex.Unop.op == Iop_GetMSBs8x16; 6257 case Iex_GetI: 6258 return isBogusAtom(e->Iex.GetI.ix); 6259 case Iex_Binop: 6260 return isBogusAtom(e->Iex.Binop.arg1) 6261 || isBogusAtom(e->Iex.Binop.arg2); 6262 case Iex_Triop: 6263 return isBogusAtom(e->Iex.Triop.details->arg1) 6264 || isBogusAtom(e->Iex.Triop.details->arg2) 6265 || isBogusAtom(e->Iex.Triop.details->arg3); 6266 case Iex_Qop: 6267 return isBogusAtom(e->Iex.Qop.details->arg1) 6268 || isBogusAtom(e->Iex.Qop.details->arg2) 6269 || isBogusAtom(e->Iex.Qop.details->arg3) 6270 || isBogusAtom(e->Iex.Qop.details->arg4); 6271 case Iex_ITE: 6272 return isBogusAtom(e->Iex.ITE.cond) 6273 || isBogusAtom(e->Iex.ITE.iftrue) 6274 || isBogusAtom(e->Iex.ITE.iffalse); 6275 case Iex_Load: 6276 return isBogusAtom(e->Iex.Load.addr); 6277 case Iex_CCall: 6278 for (i = 0; e->Iex.CCall.args[i]; i++) 6279 if (isBogusAtom(e->Iex.CCall.args[i])) 6280 return True; 6281 return False; 6282 default: 6283 goto unhandled; 6284 } 6285 case Ist_Dirty: 6286 d = st->Ist.Dirty.details; 6287 for (i = 0; d->args[i]; i++) { 6288 IRAtom* atom = d->args[i]; 6289 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom))) { 6290 if (isBogusAtom(atom)) 6291 return True; 6292 } 6293 } 6294 if (isBogusAtom(d->guard)) 6295 return True; 6296 if (d->mAddr && isBogusAtom(d->mAddr)) 6297 return True; 6298 return False; 6299 case Ist_Put: 6300 return isBogusAtom(st->Ist.Put.data); 6301 case Ist_PutI: 6302 return isBogusAtom(st->Ist.PutI.details->ix) 6303 || isBogusAtom(st->Ist.PutI.details->data); 6304 case Ist_Store: 6305 return isBogusAtom(st->Ist.Store.addr) 6306 || isBogusAtom(st->Ist.Store.data); 6307 case Ist_StoreG: { 6308 IRStoreG* sg = st->Ist.StoreG.details; 6309 return isBogusAtom(sg->addr) || isBogusAtom(sg->data) 6310 || isBogusAtom(sg->guard); 6311 } 6312 case Ist_LoadG: { 6313 IRLoadG* lg = st->Ist.LoadG.details; 6314 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt) 6315 || isBogusAtom(lg->guard); 6316 } 6317 case Ist_Exit: 6318 return isBogusAtom(st->Ist.Exit.guard); 6319 case Ist_AbiHint: 6320 return isBogusAtom(st->Ist.AbiHint.base) 6321 || isBogusAtom(st->Ist.AbiHint.nia); 6322 case Ist_NoOp: 6323 case Ist_IMark: 6324 case Ist_MBE: 6325 return False; 6326 case Ist_CAS: 6327 cas = st->Ist.CAS.details; 6328 return isBogusAtom(cas->addr) 6329 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 6330 || isBogusAtom(cas->expdLo) 6331 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 6332 || isBogusAtom(cas->dataLo); 6333 case Ist_LLSC: 6334 return isBogusAtom(st->Ist.LLSC.addr) 6335 || (st->Ist.LLSC.storedata 6336 ? isBogusAtom(st->Ist.LLSC.storedata) 6337 : False); 6338 default: 6339 unhandled: 6340 ppIRStmt(st); 6341 VG_(tool_panic)("hasBogusLiterals"); 6342 } 6343 } 6344 6345 6346 IRSB* MC_(instrument) ( VgCallbackClosure* closure, 6347 IRSB* sb_in, 6348 const VexGuestLayout* layout, 6349 const VexGuestExtents* vge, 6350 const VexArchInfo* archinfo_host, 6351 IRType gWordTy, IRType hWordTy ) 6352 { 6353 Bool verboze = 0||False; 6354 Int i, j, first_stmt; 6355 IRStmt* st; 6356 MCEnv mce; 6357 IRSB* sb_out; 6358 6359 if (gWordTy != hWordTy) { 6360 /* We don't currently support this case. */ 6361 VG_(tool_panic)("host/guest word size mismatch"); 6362 } 6363 6364 /* Check we're not completely nuts */ 6365 tl_assert(sizeof(UWord) == sizeof(void*)); 6366 tl_assert(sizeof(Word) == sizeof(void*)); 6367 tl_assert(sizeof(Addr) == sizeof(void*)); 6368 tl_assert(sizeof(ULong) == 8); 6369 tl_assert(sizeof(Long) == 8); 6370 tl_assert(sizeof(UInt) == 4); 6371 tl_assert(sizeof(Int) == 4); 6372 6373 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 6374 6375 /* Set up SB */ 6376 sb_out = deepCopyIRSBExceptStmts(sb_in); 6377 6378 /* Set up the running environment. Both .sb and .tmpMap are 6379 modified as we go along. Note that tmps are added to both 6380 .sb->tyenv and .tmpMap together, so the valid index-set for 6381 those two arrays should always be identical. */ 6382 VG_(memset)(&mce, 0, sizeof(mce)); 6383 mce.sb = sb_out; 6384 mce.trace = verboze; 6385 mce.layout = layout; 6386 mce.hWordTy = hWordTy; 6387 mce.bogusLiterals = False; 6388 6389 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on 6390 Darwin. 10.7 is mostly built with LLVM, which uses these for 6391 bitfield inserts, and we get a lot of false errors if the cheap 6392 interpretation is used, alas. Could solve this much better if 6393 we knew which of such adds came from x86/amd64 LEA instructions, 6394 since these are the only ones really needing the expensive 6395 interpretation, but that would require some way to tag them in 6396 the _toIR.c front ends, which is a lot of faffing around. So 6397 for now just use the slow and blunt-instrument solution. */ 6398 mce.useLLVMworkarounds = False; 6399 # if defined(VGO_darwin) 6400 mce.useLLVMworkarounds = True; 6401 # endif 6402 6403 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 6404 sizeof(TempMapEnt)); 6405 VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used); 6406 for (i = 0; i < sb_in->tyenv->types_used; i++) { 6407 TempMapEnt ent; 6408 ent.kind = Orig; 6409 ent.shadowV = IRTemp_INVALID; 6410 ent.shadowB = IRTemp_INVALID; 6411 VG_(addToXA)( mce.tmpMap, &ent ); 6412 } 6413 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 6414 6415 if (MC_(clo_expensive_definedness_checks)) { 6416 /* For expensive definedness checking skip looking for bogus 6417 literals. */ 6418 mce.bogusLiterals = True; 6419 } else { 6420 /* Make a preliminary inspection of the statements, to see if there 6421 are any dodgy-looking literals. If there are, we generate 6422 extra-detailed (hence extra-expensive) instrumentation in 6423 places. Scan the whole bb even if dodgyness is found earlier, 6424 so that the flatness assertion is applied to all stmts. */ 6425 Bool bogus = False; 6426 6427 for (i = 0; i < sb_in->stmts_used; i++) { 6428 st = sb_in->stmts[i]; 6429 tl_assert(st); 6430 tl_assert(isFlatIRStmt(st)); 6431 6432 if (!bogus) { 6433 bogus = checkForBogusLiterals(st); 6434 if (0 && bogus) { 6435 VG_(printf)("bogus: "); 6436 ppIRStmt(st); 6437 VG_(printf)("\n"); 6438 } 6439 if (bogus) break; 6440 } 6441 } 6442 mce.bogusLiterals = bogus; 6443 } 6444 6445 /* Copy verbatim any IR preamble preceding the first IMark */ 6446 6447 tl_assert(mce.sb == sb_out); 6448 tl_assert(mce.sb != sb_in); 6449 6450 i = 0; 6451 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 6452 6453 st = sb_in->stmts[i]; 6454 tl_assert(st); 6455 tl_assert(isFlatIRStmt(st)); 6456 6457 stmt( 'C', &mce, sb_in->stmts[i] ); 6458 i++; 6459 } 6460 6461 /* Nasty problem. IR optimisation of the pre-instrumented IR may 6462 cause the IR following the preamble to contain references to IR 6463 temporaries defined in the preamble. Because the preamble isn't 6464 instrumented, these temporaries don't have any shadows. 6465 Nevertheless uses of them following the preamble will cause 6466 memcheck to generate references to their shadows. End effect is 6467 to cause IR sanity check failures, due to references to 6468 non-existent shadows. This is only evident for the complex 6469 preambles used for function wrapping on TOC-afflicted platforms 6470 (ppc64-linux). 6471 6472 The following loop therefore scans the preamble looking for 6473 assignments to temporaries. For each one found it creates an 6474 assignment to the corresponding (V) shadow temp, marking it as 6475 'defined'. This is the same resulting IR as if the main 6476 instrumentation loop before had been applied to the statement 6477 'tmp = CONSTANT'. 6478 6479 Similarly, if origin tracking is enabled, we must generate an 6480 assignment for the corresponding origin (B) shadow, claiming 6481 no-origin, as appropriate for a defined value. 6482 */ 6483 for (j = 0; j < i; j++) { 6484 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 6485 /* findShadowTmpV checks its arg is an original tmp; 6486 no need to assert that here. */ 6487 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 6488 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 6489 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 6490 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 6491 if (MC_(clo_mc_level) == 3) { 6492 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 6493 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 6494 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 6495 } 6496 if (0) { 6497 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 6498 ppIRType( ty_v ); 6499 VG_(printf)("\n"); 6500 } 6501 } 6502 } 6503 6504 /* Iterate over the remaining stmts to generate instrumentation. */ 6505 6506 tl_assert(sb_in->stmts_used > 0); 6507 tl_assert(i >= 0); 6508 tl_assert(i < sb_in->stmts_used); 6509 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 6510 6511 for (/* use current i*/; i < sb_in->stmts_used; i++) { 6512 6513 st = sb_in->stmts[i]; 6514 first_stmt = sb_out->stmts_used; 6515 6516 if (verboze) { 6517 VG_(printf)("\n"); 6518 ppIRStmt(st); 6519 VG_(printf)("\n"); 6520 } 6521 6522 if (MC_(clo_mc_level) == 3) { 6523 /* See comments on case Ist_CAS below. */ 6524 if (st->tag != Ist_CAS) 6525 schemeS( &mce, st ); 6526 } 6527 6528 /* Generate instrumentation code for each stmt ... */ 6529 6530 switch (st->tag) { 6531 6532 case Ist_WrTmp: 6533 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 6534 expr2vbits( &mce, st->Ist.WrTmp.data) ); 6535 break; 6536 6537 case Ist_Put: 6538 do_shadow_PUT( &mce, 6539 st->Ist.Put.offset, 6540 st->Ist.Put.data, 6541 NULL /* shadow atom */, NULL /* guard */ ); 6542 break; 6543 6544 case Ist_PutI: 6545 do_shadow_PUTI( &mce, st->Ist.PutI.details); 6546 break; 6547 6548 case Ist_Store: 6549 do_shadow_Store( &mce, st->Ist.Store.end, 6550 st->Ist.Store.addr, 0/* addr bias */, 6551 st->Ist.Store.data, 6552 NULL /* shadow data */, 6553 NULL/*guard*/ ); 6554 break; 6555 6556 case Ist_StoreG: 6557 do_shadow_StoreG( &mce, st->Ist.StoreG.details ); 6558 break; 6559 6560 case Ist_LoadG: 6561 do_shadow_LoadG( &mce, st->Ist.LoadG.details ); 6562 break; 6563 6564 case Ist_Exit: 6565 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL ); 6566 break; 6567 6568 case Ist_IMark: 6569 break; 6570 6571 case Ist_NoOp: 6572 case Ist_MBE: 6573 break; 6574 6575 case Ist_Dirty: 6576 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 6577 break; 6578 6579 case Ist_AbiHint: 6580 do_AbiHint( &mce, st->Ist.AbiHint.base, 6581 st->Ist.AbiHint.len, 6582 st->Ist.AbiHint.nia ); 6583 break; 6584 6585 case Ist_CAS: 6586 do_shadow_CAS( &mce, st->Ist.CAS.details ); 6587 /* Note, do_shadow_CAS copies the CAS itself to the output 6588 block, because it needs to add instrumentation both 6589 before and after it. Hence skip the copy below. Also 6590 skip the origin-tracking stuff (call to schemeS) above, 6591 since that's all tangled up with it too; do_shadow_CAS 6592 does it all. */ 6593 break; 6594 6595 case Ist_LLSC: 6596 do_shadow_LLSC( &mce, 6597 st->Ist.LLSC.end, 6598 st->Ist.LLSC.result, 6599 st->Ist.LLSC.addr, 6600 st->Ist.LLSC.storedata ); 6601 break; 6602 6603 default: 6604 VG_(printf)("\n"); 6605 ppIRStmt(st); 6606 VG_(printf)("\n"); 6607 VG_(tool_panic)("memcheck: unhandled IRStmt"); 6608 6609 } /* switch (st->tag) */ 6610 6611 if (0 && verboze) { 6612 for (j = first_stmt; j < sb_out->stmts_used; j++) { 6613 VG_(printf)(" "); 6614 ppIRStmt(sb_out->stmts[j]); 6615 VG_(printf)("\n"); 6616 } 6617 VG_(printf)("\n"); 6618 } 6619 6620 /* ... and finally copy the stmt itself to the output. Except, 6621 skip the copy of IRCASs; see comments on case Ist_CAS 6622 above. */ 6623 if (st->tag != Ist_CAS) 6624 stmt('C', &mce, st); 6625 } 6626 6627 /* Now we need to complain if the jump target is undefined. */ 6628 first_stmt = sb_out->stmts_used; 6629 6630 if (verboze) { 6631 VG_(printf)("sb_in->next = "); 6632 ppIRExpr(sb_in->next); 6633 VG_(printf)("\n\n"); 6634 } 6635 6636 complainIfUndefined( &mce, sb_in->next, NULL ); 6637 6638 if (0 && verboze) { 6639 for (j = first_stmt; j < sb_out->stmts_used; j++) { 6640 VG_(printf)(" "); 6641 ppIRStmt(sb_out->stmts[j]); 6642 VG_(printf)("\n"); 6643 } 6644 VG_(printf)("\n"); 6645 } 6646 6647 /* If this fails, there's been some serious snafu with tmp management, 6648 that should be investigated. */ 6649 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 6650 VG_(deleteXA)( mce.tmpMap ); 6651 6652 tl_assert(mce.sb == sb_out); 6653 return sb_out; 6654 } 6655 6656 6657 /*------------------------------------------------------------*/ 6658 /*--- Post-tree-build final tidying ---*/ 6659 /*------------------------------------------------------------*/ 6660 6661 /* This exploits the observation that Memcheck often produces 6662 repeated conditional calls of the form 6663 6664 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 6665 6666 with the same guard expression G guarding the same helper call. 6667 The second and subsequent calls are redundant. This usually 6668 results from instrumentation of guest code containing multiple 6669 memory references at different constant offsets from the same base 6670 register. After optimisation of the instrumentation, you get a 6671 test for the definedness of the base register for each memory 6672 reference, which is kinda pointless. MC_(final_tidy) therefore 6673 looks for such repeated calls and removes all but the first. */ 6674 6675 6676 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with 6677 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to 6678 get almost all the benefits of this transformation whilst causing 6679 the slide-back case to just often enough to be verifiably 6680 correct. For posterity, the numbers are: 6681 6682 bz2-32 6683 6684 1 4,336 (112,212 -> 1,709,473; ratio 15.2) 6685 2 4,336 (112,194 -> 1,669,895; ratio 14.9) 6686 3 4,336 (112,194 -> 1,660,713; ratio 14.8) 6687 4 4,336 (112,194 -> 1,658,555; ratio 14.8) 6688 5 4,336 (112,194 -> 1,655,447; ratio 14.8) 6689 6 4,336 (112,194 -> 1,655,101; ratio 14.8) 6690 7 4,336 (112,194 -> 1,654,858; ratio 14.7) 6691 8 4,336 (112,194 -> 1,654,810; ratio 14.7) 6692 10 4,336 (112,194 -> 1,654,621; ratio 14.7) 6693 12 4,336 (112,194 -> 1,654,678; ratio 14.7) 6694 16 4,336 (112,194 -> 1,654,494; ratio 14.7) 6695 32 4,336 (112,194 -> 1,654,602; ratio 14.7) 6696 inf 4,336 (112,194 -> 1,654,602; ratio 14.7) 6697 6698 bz2-64 6699 6700 1 4,113 (107,329 -> 1,822,171; ratio 17.0) 6701 2 4,113 (107,329 -> 1,806,443; ratio 16.8) 6702 3 4,113 (107,329 -> 1,803,967; ratio 16.8) 6703 4 4,113 (107,329 -> 1,802,785; ratio 16.8) 6704 5 4,113 (107,329 -> 1,802,412; ratio 16.8) 6705 6 4,113 (107,329 -> 1,802,062; ratio 16.8) 6706 7 4,113 (107,329 -> 1,801,976; ratio 16.8) 6707 8 4,113 (107,329 -> 1,801,886; ratio 16.8) 6708 10 4,113 (107,329 -> 1,801,653; ratio 16.8) 6709 12 4,113 (107,329 -> 1,801,526; ratio 16.8) 6710 16 4,113 (107,329 -> 1,801,298; ratio 16.8) 6711 32 4,113 (107,329 -> 1,800,827; ratio 16.8) 6712 inf 4,113 (107,329 -> 1,800,827; ratio 16.8) 6713 */ 6714 6715 /* Structs for recording which (helper, guard) pairs we have already 6716 seen. */ 6717 6718 #define N_TIDYING_PAIRS 16 6719 6720 typedef 6721 struct { void* entry; IRExpr* guard; } 6722 Pair; 6723 6724 typedef 6725 struct { 6726 Pair pairs[N_TIDYING_PAIRS +1/*for bounds checking*/]; 6727 UInt pairsUsed; 6728 } 6729 Pairs; 6730 6731 6732 /* Return True if e1 and e2 definitely denote the same value (used to 6733 compare guards). Return False if unknown; False is the safe 6734 answer. Since guest registers and guest memory do not have the 6735 SSA property we must return False if any Gets or Loads appear in 6736 the expression. This implicitly assumes that e1 and e2 have the 6737 same IR type, which is always true here -- the type is Ity_I1. */ 6738 6739 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 6740 { 6741 if (e1->tag != e2->tag) 6742 return False; 6743 switch (e1->tag) { 6744 case Iex_Const: 6745 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 6746 case Iex_Binop: 6747 return e1->Iex.Binop.op == e2->Iex.Binop.op 6748 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 6749 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 6750 case Iex_Unop: 6751 return e1->Iex.Unop.op == e2->Iex.Unop.op 6752 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 6753 case Iex_RdTmp: 6754 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 6755 case Iex_ITE: 6756 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond ) 6757 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue ) 6758 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse ); 6759 case Iex_Qop: 6760 case Iex_Triop: 6761 case Iex_CCall: 6762 /* be lazy. Could define equality for these, but they never 6763 appear to be used. */ 6764 return False; 6765 case Iex_Get: 6766 case Iex_GetI: 6767 case Iex_Load: 6768 /* be conservative - these may not give the same value each 6769 time */ 6770 return False; 6771 case Iex_Binder: 6772 /* should never see this */ 6773 /* fallthrough */ 6774 default: 6775 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 6776 ppIRExpr(e1); 6777 VG_(tool_panic)("memcheck:sameIRValue"); 6778 return False; 6779 } 6780 } 6781 6782 /* See if 'pairs' already has an entry for (entry, guard). Return 6783 True if so. If not, add an entry. */ 6784 6785 static 6786 Bool check_or_add ( Pairs* tidyingEnv, IRExpr* guard, void* entry ) 6787 { 6788 UInt i, n = tidyingEnv->pairsUsed; 6789 tl_assert(n <= N_TIDYING_PAIRS); 6790 for (i = 0; i < n; i++) { 6791 if (tidyingEnv->pairs[i].entry == entry 6792 && sameIRValue(tidyingEnv->pairs[i].guard, guard)) 6793 return True; 6794 } 6795 /* (guard, entry) wasn't found in the array. Add it at the end. 6796 If the array is already full, slide the entries one slot 6797 backwards. This means we will lose to ability to detect 6798 duplicates from the pair in slot zero, but that happens so 6799 rarely that it's unlikely to have much effect on overall code 6800 quality. Also, this strategy loses the check for the oldest 6801 tracked exit (memory reference, basically) and so that is (I'd 6802 guess) least likely to be re-used after this point. */ 6803 tl_assert(i == n); 6804 if (n == N_TIDYING_PAIRS) { 6805 for (i = 1; i < N_TIDYING_PAIRS; i++) { 6806 tidyingEnv->pairs[i-1] = tidyingEnv->pairs[i]; 6807 } 6808 tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry; 6809 tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard; 6810 } else { 6811 tl_assert(n < N_TIDYING_PAIRS); 6812 tidyingEnv->pairs[n].entry = entry; 6813 tidyingEnv->pairs[n].guard = guard; 6814 n++; 6815 tidyingEnv->pairsUsed = n; 6816 } 6817 return False; 6818 } 6819 6820 static Bool is_helperc_value_checkN_fail ( const HChar* name ) 6821 { 6822 /* This is expensive because it happens a lot. We are checking to 6823 see whether |name| is one of the following 8 strings: 6824 6825 MC_(helperc_value_check8_fail_no_o) 6826 MC_(helperc_value_check4_fail_no_o) 6827 MC_(helperc_value_check0_fail_no_o) 6828 MC_(helperc_value_check1_fail_no_o) 6829 MC_(helperc_value_check8_fail_w_o) 6830 MC_(helperc_value_check0_fail_w_o) 6831 MC_(helperc_value_check1_fail_w_o) 6832 MC_(helperc_value_check4_fail_w_o) 6833 6834 To speed it up, check the common prefix just once, rather than 6835 all 8 times. 6836 */ 6837 const HChar* prefix = "MC_(helperc_value_check"; 6838 6839 HChar n, p; 6840 while (True) { 6841 n = *name; 6842 p = *prefix; 6843 if (p == 0) break; /* ran off the end of the prefix */ 6844 /* We still have some prefix to use */ 6845 if (n == 0) return False; /* have prefix, but name ran out */ 6846 if (n != p) return False; /* have both pfx and name, but no match */ 6847 name++; 6848 prefix++; 6849 } 6850 6851 /* Check the part after the prefix. */ 6852 tl_assert(*prefix == 0 && *name != 0); 6853 return 0==VG_(strcmp)(name, "8_fail_no_o)") 6854 || 0==VG_(strcmp)(name, "4_fail_no_o)") 6855 || 0==VG_(strcmp)(name, "0_fail_no_o)") 6856 || 0==VG_(strcmp)(name, "1_fail_no_o)") 6857 || 0==VG_(strcmp)(name, "8_fail_w_o)") 6858 || 0==VG_(strcmp)(name, "4_fail_w_o)") 6859 || 0==VG_(strcmp)(name, "0_fail_w_o)") 6860 || 0==VG_(strcmp)(name, "1_fail_w_o)"); 6861 } 6862 6863 IRSB* MC_(final_tidy) ( IRSB* sb_in ) 6864 { 6865 Int i; 6866 IRStmt* st; 6867 IRDirty* di; 6868 IRExpr* guard; 6869 IRCallee* cee; 6870 Bool alreadyPresent; 6871 Pairs pairs; 6872 6873 pairs.pairsUsed = 0; 6874 6875 pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123; 6876 pairs.pairs[N_TIDYING_PAIRS].guard = (IRExpr*)0x456; 6877 6878 /* Scan forwards through the statements. Each time a call to one 6879 of the relevant helpers is seen, check if we have made a 6880 previous call to the same helper using the same guard 6881 expression, and if so, delete the call. */ 6882 for (i = 0; i < sb_in->stmts_used; i++) { 6883 st = sb_in->stmts[i]; 6884 tl_assert(st); 6885 if (st->tag != Ist_Dirty) 6886 continue; 6887 di = st->Ist.Dirty.details; 6888 guard = di->guard; 6889 tl_assert(guard); 6890 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 6891 cee = di->cee; 6892 if (!is_helperc_value_checkN_fail( cee->name )) 6893 continue; 6894 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 6895 guard 'guard'. Check if we have already seen a call to this 6896 function with the same guard. If so, delete it. If not, 6897 add it to the set of calls we do know about. */ 6898 alreadyPresent = check_or_add( &pairs, guard, cee->addr ); 6899 if (alreadyPresent) { 6900 sb_in->stmts[i] = IRStmt_NoOp(); 6901 if (0) VG_(printf)("XX\n"); 6902 } 6903 } 6904 6905 tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123); 6906 tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456); 6907 6908 return sb_in; 6909 } 6910 6911 #undef N_TIDYING_PAIRS 6912 6913 6914 /*------------------------------------------------------------*/ 6915 /*--- Origin tracking stuff ---*/ 6916 /*------------------------------------------------------------*/ 6917 6918 /* Almost identical to findShadowTmpV. */ 6919 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 6920 { 6921 TempMapEnt* ent; 6922 /* VG_(indexXA) range-checks 'orig', hence no need to check 6923 here. */ 6924 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 6925 tl_assert(ent->kind == Orig); 6926 if (ent->shadowB == IRTemp_INVALID) { 6927 IRTemp tmpB 6928 = newTemp( mce, Ity_I32, BSh ); 6929 /* newTemp may cause mce->tmpMap to resize, hence previous results 6930 from VG_(indexXA) are invalid. */ 6931 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 6932 tl_assert(ent->kind == Orig); 6933 tl_assert(ent->shadowB == IRTemp_INVALID); 6934 ent->shadowB = tmpB; 6935 } 6936 return ent->shadowB; 6937 } 6938 6939 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 6940 { 6941 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 6942 } 6943 6944 6945 /* Make a guarded origin load, with no special handling in the 6946 didn't-happen case. A GUARD of NULL is assumed to mean "always 6947 True". 6948 6949 Generate IR to do a shadow origins load from BASEADDR+OFFSET and 6950 return the otag. The loaded size is SZB. If GUARD evaluates to 6951 False at run time then the returned otag is zero. 6952 */ 6953 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, 6954 IRAtom* baseaddr, 6955 Int offset, IRExpr* guard ) 6956 { 6957 void* hFun; 6958 const HChar* hName; 6959 IRTemp bTmp; 6960 IRDirty* di; 6961 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 6962 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 6963 IRAtom* ea = baseaddr; 6964 if (offset != 0) { 6965 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 6966 : mkU64( (Long)(Int)offset ); 6967 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 6968 } 6969 bTmp = newTemp(mce, mce->hWordTy, BSh); 6970 6971 switch (szB) { 6972 case 1: hFun = (void*)&MC_(helperc_b_load1); 6973 hName = "MC_(helperc_b_load1)"; 6974 break; 6975 case 2: hFun = (void*)&MC_(helperc_b_load2); 6976 hName = "MC_(helperc_b_load2)"; 6977 break; 6978 case 4: hFun = (void*)&MC_(helperc_b_load4); 6979 hName = "MC_(helperc_b_load4)"; 6980 break; 6981 case 8: hFun = (void*)&MC_(helperc_b_load8); 6982 hName = "MC_(helperc_b_load8)"; 6983 break; 6984 case 16: hFun = (void*)&MC_(helperc_b_load16); 6985 hName = "MC_(helperc_b_load16)"; 6986 break; 6987 case 32: hFun = (void*)&MC_(helperc_b_load32); 6988 hName = "MC_(helperc_b_load32)"; 6989 break; 6990 default: 6991 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 6992 tl_assert(0); 6993 } 6994 di = unsafeIRDirty_1_N( 6995 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 6996 mkIRExprVec_1( ea ) 6997 ); 6998 if (guard) { 6999 di->guard = guard; 7000 /* Ideally the didn't-happen return value here would be 7001 all-zeroes (unknown-origin), so it'd be harmless if it got 7002 used inadvertently. We slum it out with the IR-mandated 7003 default value (0b01 repeating, 0x55 etc) as that'll probably 7004 trump all legitimate otags via Max32, and it's pretty 7005 obviously bogus. */ 7006 } 7007 /* no need to mess with any annotations. This call accesses 7008 neither guest state nor guest memory. */ 7009 stmt( 'B', mce, IRStmt_Dirty(di) ); 7010 if (mce->hWordTy == Ity_I64) { 7011 /* 64-bit host */ 7012 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 7013 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 7014 return mkexpr(bTmp32); 7015 } else { 7016 /* 32-bit host */ 7017 return mkexpr(bTmp); 7018 } 7019 } 7020 7021 7022 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The 7023 loaded size is SZB. The load is regarded as unconditional (always 7024 happens). 7025 */ 7026 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr, 7027 Int offset ) 7028 { 7029 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/); 7030 } 7031 7032 7033 /* The most general handler for guarded origin loads. A GUARD of NULL 7034 is assumed to mean "always True". 7035 7036 Generate IR to do a shadow origin load from ADDR+BIAS and return 7037 the B bits. The loaded type is TY. If GUARD evaluates to False at 7038 run time then the returned B bits are simply BALT instead. 7039 */ 7040 static 7041 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce, 7042 IRType ty, 7043 IRAtom* addr, UInt bias, 7044 IRAtom* guard, IRAtom* balt ) 7045 { 7046 /* If the guard evaluates to True, this will hold the loaded 7047 origin. If the guard evaluates to False, this will be zero, 7048 meaning "unknown origin", in which case we will have to replace 7049 it using an ITE below. */ 7050 IRAtom* iftrue 7051 = assignNew('B', mce, Ity_I32, 7052 gen_guarded_load_b(mce, sizeofIRType(ty), 7053 addr, bias, guard)); 7054 /* These are the bits we will return if the load doesn't take 7055 place. */ 7056 IRAtom* iffalse 7057 = balt; 7058 /* Prepare the cond for the ITE. Convert a NULL cond into 7059 something that iropt knows how to fold out later. */ 7060 IRAtom* cond 7061 = guard == NULL ? mkU1(1) : guard; 7062 /* And assemble the final result. */ 7063 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse)); 7064 } 7065 7066 7067 /* Generate a shadow origins store. guard :: Ity_I1 controls whether 7068 the store really happens; NULL means it unconditionally does. */ 7069 static void gen_store_b ( MCEnv* mce, Int szB, 7070 IRAtom* baseaddr, Int offset, IRAtom* dataB, 7071 IRAtom* guard ) 7072 { 7073 void* hFun; 7074 const HChar* hName; 7075 IRDirty* di; 7076 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 7077 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 7078 IRAtom* ea = baseaddr; 7079 if (guard) { 7080 tl_assert(isOriginalAtom(mce, guard)); 7081 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 7082 } 7083 if (offset != 0) { 7084 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 7085 : mkU64( (Long)(Int)offset ); 7086 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 7087 } 7088 if (mce->hWordTy == Ity_I64) 7089 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 7090 7091 switch (szB) { 7092 case 1: hFun = (void*)&MC_(helperc_b_store1); 7093 hName = "MC_(helperc_b_store1)"; 7094 break; 7095 case 2: hFun = (void*)&MC_(helperc_b_store2); 7096 hName = "MC_(helperc_b_store2)"; 7097 break; 7098 case 4: hFun = (void*)&MC_(helperc_b_store4); 7099 hName = "MC_(helperc_b_store4)"; 7100 break; 7101 case 8: hFun = (void*)&MC_(helperc_b_store8); 7102 hName = "MC_(helperc_b_store8)"; 7103 break; 7104 case 16: hFun = (void*)&MC_(helperc_b_store16); 7105 hName = "MC_(helperc_b_store16)"; 7106 break; 7107 case 32: hFun = (void*)&MC_(helperc_b_store32); 7108 hName = "MC_(helperc_b_store32)"; 7109 break; 7110 default: 7111 tl_assert(0); 7112 } 7113 di = unsafeIRDirty_0_N( 2/*regparms*/, 7114 hName, VG_(fnptr_to_fnentry)( hFun ), 7115 mkIRExprVec_2( ea, dataB ) 7116 ); 7117 /* no need to mess with any annotations. This call accesses 7118 neither guest state nor guest memory. */ 7119 if (guard) di->guard = guard; 7120 stmt( 'B', mce, IRStmt_Dirty(di) ); 7121 } 7122 7123 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 7124 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 7125 if (eTy == Ity_I64) 7126 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 7127 if (eTy == Ity_I32) 7128 return e; 7129 tl_assert(0); 7130 } 7131 7132 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 7133 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 7134 tl_assert(eTy == Ity_I32); 7135 if (dstTy == Ity_I64) 7136 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 7137 tl_assert(0); 7138 } 7139 7140 7141 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 7142 { 7143 tl_assert(MC_(clo_mc_level) == 3); 7144 7145 switch (e->tag) { 7146 7147 case Iex_GetI: { 7148 IRRegArray* descr_b; 7149 IRAtom *t1, *t2, *t3, *t4; 7150 IRRegArray* descr = e->Iex.GetI.descr; 7151 IRType equivIntTy 7152 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 7153 /* If this array is unshadowable for whatever reason, use the 7154 usual approximation. */ 7155 if (equivIntTy == Ity_INVALID) 7156 return mkU32(0); 7157 tl_assert(sizeofIRType(equivIntTy) >= 4); 7158 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 7159 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 7160 equivIntTy, descr->nElems ); 7161 /* Do a shadow indexed get of the same size, giving t1. Take 7162 the bottom 32 bits of it, giving t2. Compute into t3 the 7163 origin for the index (almost certainly zero, but there's 7164 no harm in being completely general here, since iropt will 7165 remove any useless code), and fold it in, giving a final 7166 value t4. */ 7167 t1 = assignNew( 'B', mce, equivIntTy, 7168 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 7169 e->Iex.GetI.bias )); 7170 t2 = narrowTo32( mce, t1 ); 7171 t3 = schemeE( mce, e->Iex.GetI.ix ); 7172 t4 = gen_maxU32( mce, t2, t3 ); 7173 return t4; 7174 } 7175 case Iex_CCall: { 7176 Int i; 7177 IRAtom* here; 7178 IRExpr** args = e->Iex.CCall.args; 7179 IRAtom* curr = mkU32(0); 7180 for (i = 0; args[i]; i++) { 7181 tl_assert(i < 32); 7182 tl_assert(isOriginalAtom(mce, args[i])); 7183 /* Only take notice of this arg if the callee's 7184 mc-exclusion mask does not say it is to be excluded. */ 7185 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 7186 /* the arg is to be excluded from definedness checking. 7187 Do nothing. */ 7188 if (0) VG_(printf)("excluding %s(%d)\n", 7189 e->Iex.CCall.cee->name, i); 7190 } else { 7191 /* calculate the arg's definedness, and pessimistically 7192 merge it in. */ 7193 here = schemeE( mce, args[i] ); 7194 curr = gen_maxU32( mce, curr, here ); 7195 } 7196 } 7197 return curr; 7198 } 7199 case Iex_Load: { 7200 Int dszB; 7201 dszB = sizeofIRType(e->Iex.Load.ty); 7202 /* assert that the B value for the address is already 7203 available (somewhere) */ 7204 tl_assert(isIRAtom(e->Iex.Load.addr)); 7205 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 7206 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 7207 } 7208 case Iex_ITE: { 7209 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond ); 7210 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue ); 7211 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse ); 7212 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 7213 } 7214 case Iex_Qop: { 7215 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 ); 7216 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 ); 7217 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 ); 7218 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 ); 7219 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 7220 gen_maxU32( mce, b3, b4 ) ); 7221 } 7222 case Iex_Triop: { 7223 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 ); 7224 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 ); 7225 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 ); 7226 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 7227 } 7228 case Iex_Binop: { 7229 switch (e->Iex.Binop.op) { 7230 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 7231 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 7232 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 7233 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 7234 /* Just say these all produce a defined result, 7235 regardless of their arguments. See 7236 COMMENT_ON_CasCmpEQ in this file. */ 7237 return mkU32(0); 7238 default: { 7239 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 7240 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 7241 return gen_maxU32( mce, b1, b2 ); 7242 } 7243 } 7244 tl_assert(0); 7245 /*NOTREACHED*/ 7246 } 7247 case Iex_Unop: { 7248 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 7249 return b1; 7250 } 7251 case Iex_Const: 7252 return mkU32(0); 7253 case Iex_RdTmp: 7254 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 7255 case Iex_Get: { 7256 Int b_offset = MC_(get_otrack_shadow_offset)( 7257 e->Iex.Get.offset, 7258 sizeofIRType(e->Iex.Get.ty) 7259 ); 7260 tl_assert(b_offset >= -1 7261 && b_offset <= mce->layout->total_sizeB -4); 7262 if (b_offset >= 0) { 7263 /* FIXME: this isn't an atom! */ 7264 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 7265 Ity_I32 ); 7266 } 7267 return mkU32(0); 7268 } 7269 default: 7270 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 7271 ppIRExpr(e); 7272 VG_(tool_panic)("memcheck:schemeE"); 7273 } 7274 } 7275 7276 7277 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 7278 { 7279 // This is a hacked version of do_shadow_Dirty 7280 Int i, k, n, toDo, gSz, gOff; 7281 IRAtom *here, *curr; 7282 IRTemp dst; 7283 7284 /* First check the guard. */ 7285 curr = schemeE( mce, d->guard ); 7286 7287 /* Now round up all inputs and maxU32 over them. */ 7288 7289 /* Inputs: unmasked args 7290 Note: arguments are evaluated REGARDLESS of the guard expression */ 7291 for (i = 0; d->args[i]; i++) { 7292 IRAtom* arg = d->args[i]; 7293 if ( (d->cee->mcx_mask & (1<<i)) 7294 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) { 7295 /* ignore this arg */ 7296 } else { 7297 here = schemeE( mce, arg ); 7298 curr = gen_maxU32( mce, curr, here ); 7299 } 7300 } 7301 7302 /* Inputs: guest state that we read. */ 7303 for (i = 0; i < d->nFxState; i++) { 7304 tl_assert(d->fxState[i].fx != Ifx_None); 7305 if (d->fxState[i].fx == Ifx_Write) 7306 continue; 7307 7308 /* Enumerate the described state segments */ 7309 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 7310 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 7311 gSz = d->fxState[i].size; 7312 7313 /* Ignore any sections marked as 'always defined'. */ 7314 if (isAlwaysDefd(mce, gOff, gSz)) { 7315 if (0) 7316 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 7317 gOff, gSz); 7318 continue; 7319 } 7320 7321 /* This state element is read or modified. So we need to 7322 consider it. If larger than 4 bytes, deal with it in 7323 4-byte chunks. */ 7324 while (True) { 7325 Int b_offset; 7326 tl_assert(gSz >= 0); 7327 if (gSz == 0) break; 7328 n = gSz <= 4 ? gSz : 4; 7329 /* update 'curr' with maxU32 of the state slice 7330 gOff .. gOff+n-1 */ 7331 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 7332 if (b_offset != -1) { 7333 /* Observe the guard expression. If it is false use 0, i.e. 7334 nothing is known about the origin */ 7335 IRAtom *cond, *iffalse, *iftrue; 7336 7337 cond = assignNew( 'B', mce, Ity_I1, d->guard); 7338 iffalse = mkU32(0); 7339 iftrue = assignNew( 'B', mce, Ity_I32, 7340 IRExpr_Get(b_offset 7341 + 2*mce->layout->total_sizeB, 7342 Ity_I32)); 7343 here = assignNew( 'B', mce, Ity_I32, 7344 IRExpr_ITE(cond, iftrue, iffalse)); 7345 curr = gen_maxU32( mce, curr, here ); 7346 } 7347 gSz -= n; 7348 gOff += n; 7349 } 7350 } 7351 } 7352 7353 /* Inputs: memory */ 7354 7355 if (d->mFx != Ifx_None) { 7356 /* Because we may do multiple shadow loads/stores from the same 7357 base address, it's best to do a single test of its 7358 definedness right now. Post-instrumentation optimisation 7359 should remove all but this test. */ 7360 tl_assert(d->mAddr); 7361 here = schemeE( mce, d->mAddr ); 7362 curr = gen_maxU32( mce, curr, here ); 7363 } 7364 7365 /* Deal with memory inputs (reads or modifies) */ 7366 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 7367 toDo = d->mSize; 7368 /* chew off 32-bit chunks. We don't care about the endianness 7369 since it's all going to be condensed down to a single bit, 7370 but nevertheless choose an endianness which is hopefully 7371 native to the platform. */ 7372 while (toDo >= 4) { 7373 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo, 7374 d->guard ); 7375 curr = gen_maxU32( mce, curr, here ); 7376 toDo -= 4; 7377 } 7378 /* handle possible 16-bit excess */ 7379 while (toDo >= 2) { 7380 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo, 7381 d->guard ); 7382 curr = gen_maxU32( mce, curr, here ); 7383 toDo -= 2; 7384 } 7385 /* chew off the remaining 8-bit chunk, if any */ 7386 if (toDo == 1) { 7387 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo, 7388 d->guard ); 7389 curr = gen_maxU32( mce, curr, here ); 7390 toDo -= 1; 7391 } 7392 tl_assert(toDo == 0); 7393 } 7394 7395 /* Whew! So curr is a 32-bit B-value which should give an origin 7396 of some use if any of the inputs to the helper are undefined. 7397 Now we need to re-distribute the results to all destinations. */ 7398 7399 /* Outputs: the destination temporary, if there is one. */ 7400 if (d->tmp != IRTemp_INVALID) { 7401 dst = findShadowTmpB(mce, d->tmp); 7402 assign( 'V', mce, dst, curr ); 7403 } 7404 7405 /* Outputs: guest state that we write or modify. */ 7406 for (i = 0; i < d->nFxState; i++) { 7407 tl_assert(d->fxState[i].fx != Ifx_None); 7408 if (d->fxState[i].fx == Ifx_Read) 7409 continue; 7410 7411 /* Enumerate the described state segments */ 7412 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 7413 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 7414 gSz = d->fxState[i].size; 7415 7416 /* Ignore any sections marked as 'always defined'. */ 7417 if (isAlwaysDefd(mce, gOff, gSz)) 7418 continue; 7419 7420 /* This state element is written or modified. So we need to 7421 consider it. If larger than 4 bytes, deal with it in 7422 4-byte chunks. */ 7423 while (True) { 7424 Int b_offset; 7425 tl_assert(gSz >= 0); 7426 if (gSz == 0) break; 7427 n = gSz <= 4 ? gSz : 4; 7428 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 7429 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 7430 if (b_offset != -1) { 7431 7432 /* If the guard expression evaluates to false we simply Put 7433 the value that is already stored in the guest state slot */ 7434 IRAtom *cond, *iffalse; 7435 7436 cond = assignNew('B', mce, Ity_I1, 7437 d->guard); 7438 iffalse = assignNew('B', mce, Ity_I32, 7439 IRExpr_Get(b_offset + 7440 2*mce->layout->total_sizeB, 7441 Ity_I32)); 7442 curr = assignNew('V', mce, Ity_I32, 7443 IRExpr_ITE(cond, curr, iffalse)); 7444 7445 stmt( 'B', mce, IRStmt_Put(b_offset 7446 + 2*mce->layout->total_sizeB, 7447 curr )); 7448 } 7449 gSz -= n; 7450 gOff += n; 7451 } 7452 } 7453 } 7454 7455 /* Outputs: memory that we write or modify. Same comments about 7456 endianness as above apply. */ 7457 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 7458 toDo = d->mSize; 7459 /* chew off 32-bit chunks */ 7460 while (toDo >= 4) { 7461 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 7462 d->guard ); 7463 toDo -= 4; 7464 } 7465 /* handle possible 16-bit excess */ 7466 while (toDo >= 2) { 7467 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 7468 d->guard ); 7469 toDo -= 2; 7470 } 7471 /* chew off the remaining 8-bit chunk, if any */ 7472 if (toDo == 1) { 7473 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr, 7474 d->guard ); 7475 toDo -= 1; 7476 } 7477 tl_assert(toDo == 0); 7478 } 7479 } 7480 7481 7482 /* Generate IR for origin shadowing for a general guarded store. */ 7483 static void do_origins_Store_guarded ( MCEnv* mce, 7484 IREndness stEnd, 7485 IRExpr* stAddr, 7486 IRExpr* stData, 7487 IRExpr* guard ) 7488 { 7489 Int dszB; 7490 IRAtom* dataB; 7491 /* assert that the B value for the address is already available 7492 (somewhere), since the call to schemeE will want to see it. 7493 XXXX how does this actually ensure that?? */ 7494 tl_assert(isIRAtom(stAddr)); 7495 tl_assert(isIRAtom(stData)); 7496 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 7497 dataB = schemeE( mce, stData ); 7498 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard ); 7499 } 7500 7501 7502 /* Generate IR for origin shadowing for a plain store. */ 7503 static void do_origins_Store_plain ( MCEnv* mce, 7504 IREndness stEnd, 7505 IRExpr* stAddr, 7506 IRExpr* stData ) 7507 { 7508 do_origins_Store_guarded ( mce, stEnd, stAddr, stData, 7509 NULL/*guard*/ ); 7510 } 7511 7512 7513 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */ 7514 7515 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg ) 7516 { 7517 do_origins_Store_guarded( mce, sg->end, sg->addr, 7518 sg->data, sg->guard ); 7519 } 7520 7521 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg ) 7522 { 7523 IRType loadedTy = Ity_INVALID; 7524 switch (lg->cvt) { 7525 case ILGop_IdentV128: loadedTy = Ity_V128; break; 7526 case ILGop_Ident64: loadedTy = Ity_I64; break; 7527 case ILGop_Ident32: loadedTy = Ity_I32; break; 7528 case ILGop_16Uto32: loadedTy = Ity_I16; break; 7529 case ILGop_16Sto32: loadedTy = Ity_I16; break; 7530 case ILGop_8Uto32: loadedTy = Ity_I8; break; 7531 case ILGop_8Sto32: loadedTy = Ity_I8; break; 7532 default: VG_(tool_panic)("schemeS.IRLoadG"); 7533 } 7534 IRAtom* ori_alt 7535 = schemeE( mce,lg->alt ); 7536 IRAtom* ori_final 7537 = expr2ori_Load_guarded_General(mce, loadedTy, 7538 lg->addr, 0/*addr bias*/, 7539 lg->guard, ori_alt ); 7540 /* And finally, bind the origin to the destination temporary. */ 7541 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final ); 7542 } 7543 7544 7545 static void schemeS ( MCEnv* mce, IRStmt* st ) 7546 { 7547 tl_assert(MC_(clo_mc_level) == 3); 7548 7549 switch (st->tag) { 7550 7551 case Ist_AbiHint: 7552 /* The value-check instrumenter handles this - by arranging 7553 to pass the address of the next instruction to 7554 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 7555 happen for origin tracking w.r.t. AbiHints. So there is 7556 nothing to do here. */ 7557 break; 7558 7559 case Ist_PutI: { 7560 IRPutI *puti = st->Ist.PutI.details; 7561 IRRegArray* descr_b; 7562 IRAtom *t1, *t2, *t3, *t4; 7563 IRRegArray* descr = puti->descr; 7564 IRType equivIntTy 7565 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 7566 /* If this array is unshadowable for whatever reason, 7567 generate no code. */ 7568 if (equivIntTy == Ity_INVALID) 7569 break; 7570 tl_assert(sizeofIRType(equivIntTy) >= 4); 7571 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 7572 descr_b 7573 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 7574 equivIntTy, descr->nElems ); 7575 /* Compute a value to Put - the conjoinment of the origin for 7576 the data to be Put-ted (obviously) and of the index value 7577 (not so obviously). */ 7578 t1 = schemeE( mce, puti->data ); 7579 t2 = schemeE( mce, puti->ix ); 7580 t3 = gen_maxU32( mce, t1, t2 ); 7581 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 7582 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix, 7583 puti->bias, t4) )); 7584 break; 7585 } 7586 7587 case Ist_Dirty: 7588 do_origins_Dirty( mce, st->Ist.Dirty.details ); 7589 break; 7590 7591 case Ist_Store: 7592 do_origins_Store_plain( mce, st->Ist.Store.end, 7593 st->Ist.Store.addr, 7594 st->Ist.Store.data ); 7595 break; 7596 7597 case Ist_StoreG: 7598 do_origins_StoreG( mce, st->Ist.StoreG.details ); 7599 break; 7600 7601 case Ist_LoadG: 7602 do_origins_LoadG( mce, st->Ist.LoadG.details ); 7603 break; 7604 7605 case Ist_LLSC: { 7606 /* In short: treat a load-linked like a normal load followed 7607 by an assignment of the loaded (shadow) data the result 7608 temporary. Treat a store-conditional like a normal store, 7609 and mark the result temporary as defined. */ 7610 if (st->Ist.LLSC.storedata == NULL) { 7611 /* Load Linked */ 7612 IRType resTy 7613 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 7614 IRExpr* vanillaLoad 7615 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 7616 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 7617 || resTy == Ity_I16 || resTy == Ity_I8); 7618 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 7619 schemeE(mce, vanillaLoad)); 7620 } else { 7621 /* Store conditional */ 7622 do_origins_Store_plain( mce, st->Ist.LLSC.end, 7623 st->Ist.LLSC.addr, 7624 st->Ist.LLSC.storedata ); 7625 /* For the rationale behind this, see comments at the 7626 place where the V-shadow for .result is constructed, in 7627 do_shadow_LLSC. In short, we regard .result as 7628 always-defined. */ 7629 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 7630 mkU32(0) ); 7631 } 7632 break; 7633 } 7634 7635 case Ist_Put: { 7636 Int b_offset 7637 = MC_(get_otrack_shadow_offset)( 7638 st->Ist.Put.offset, 7639 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 7640 ); 7641 if (b_offset >= 0) { 7642 /* FIXME: this isn't an atom! */ 7643 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 7644 schemeE( mce, st->Ist.Put.data )) ); 7645 } 7646 break; 7647 } 7648 7649 case Ist_WrTmp: 7650 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 7651 schemeE(mce, st->Ist.WrTmp.data) ); 7652 break; 7653 7654 case Ist_MBE: 7655 case Ist_NoOp: 7656 case Ist_Exit: 7657 case Ist_IMark: 7658 break; 7659 7660 default: 7661 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 7662 ppIRStmt(st); 7663 VG_(tool_panic)("memcheck:schemeS"); 7664 } 7665 } 7666 7667 7668 /*------------------------------------------------------------*/ 7669 /*--- Startup assertion checking ---*/ 7670 /*------------------------------------------------------------*/ 7671 7672 void MC_(do_instrumentation_startup_checks)( void ) 7673 { 7674 /* Make a best-effort check to see that is_helperc_value_checkN_fail 7675 is working as we expect. */ 7676 7677 # define CHECK(_expected, _string) \ 7678 tl_assert((_expected) == is_helperc_value_checkN_fail(_string)) 7679 7680 /* It should identify these 8, and no others, as targets. */ 7681 CHECK(True, "MC_(helperc_value_check8_fail_no_o)"); 7682 CHECK(True, "MC_(helperc_value_check4_fail_no_o)"); 7683 CHECK(True, "MC_(helperc_value_check0_fail_no_o)"); 7684 CHECK(True, "MC_(helperc_value_check1_fail_no_o)"); 7685 CHECK(True, "MC_(helperc_value_check8_fail_w_o)"); 7686 CHECK(True, "MC_(helperc_value_check0_fail_w_o)"); 7687 CHECK(True, "MC_(helperc_value_check1_fail_w_o)"); 7688 CHECK(True, "MC_(helperc_value_check4_fail_w_o)"); 7689 7690 /* Ad-hoc selection of other strings gathered via a quick test. */ 7691 CHECK(False, "amd64g_dirtyhelper_CPUID_avx2"); 7692 CHECK(False, "amd64g_dirtyhelper_RDTSC"); 7693 CHECK(False, "MC_(helperc_b_load1)"); 7694 CHECK(False, "MC_(helperc_b_load2)"); 7695 CHECK(False, "MC_(helperc_b_load4)"); 7696 CHECK(False, "MC_(helperc_b_load8)"); 7697 CHECK(False, "MC_(helperc_b_load16)"); 7698 CHECK(False, "MC_(helperc_b_load32)"); 7699 CHECK(False, "MC_(helperc_b_store1)"); 7700 CHECK(False, "MC_(helperc_b_store2)"); 7701 CHECK(False, "MC_(helperc_b_store4)"); 7702 CHECK(False, "MC_(helperc_b_store8)"); 7703 CHECK(False, "MC_(helperc_b_store16)"); 7704 CHECK(False, "MC_(helperc_b_store32)"); 7705 CHECK(False, "MC_(helperc_LOADV8)"); 7706 CHECK(False, "MC_(helperc_LOADV16le)"); 7707 CHECK(False, "MC_(helperc_LOADV32le)"); 7708 CHECK(False, "MC_(helperc_LOADV64le)"); 7709 CHECK(False, "MC_(helperc_LOADV128le)"); 7710 CHECK(False, "MC_(helperc_LOADV256le)"); 7711 CHECK(False, "MC_(helperc_STOREV16le)"); 7712 CHECK(False, "MC_(helperc_STOREV32le)"); 7713 CHECK(False, "MC_(helperc_STOREV64le)"); 7714 CHECK(False, "MC_(helperc_STOREV8)"); 7715 CHECK(False, "track_die_mem_stack_8"); 7716 CHECK(False, "track_new_mem_stack_8_w_ECU"); 7717 CHECK(False, "MC_(helperc_MAKE_STACK_UNINIT_w_o)"); 7718 CHECK(False, "VG_(unknown_SP_update_w_ECU)"); 7719 7720 # undef CHECK 7721 } 7722 7723 7724 /*--------------------------------------------------------------------*/ 7725 /*--- end mc_translate.c ---*/ 7726 /*--------------------------------------------------------------------*/ 7727