1 2 /*--------------------------------------------------------------------*/ 3 /*--- Instrument IR to perform memory checking operations. ---*/ 4 /*--- mc_translate.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2013 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_tool_basics.h" 33 #include "pub_tool_poolalloc.h" // For mc_include.h 34 #include "pub_tool_hashtable.h" // For mc_include.h 35 #include "pub_tool_libcassert.h" 36 #include "pub_tool_libcprint.h" 37 #include "pub_tool_tooliface.h" 38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 39 #include "pub_tool_xarray.h" 40 #include "pub_tool_mallocfree.h" 41 #include "pub_tool_libcbase.h" 42 43 #include "mc_include.h" 44 45 46 /* FIXMEs JRS 2011-June-16. 47 48 Check the interpretation for vector narrowing and widening ops, 49 particularly the saturating ones. I suspect they are either overly 50 pessimistic and/or wrong. 51 52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional 53 saturating shifts): the interpretation is overly pessimistic. 54 See comments on the relevant cases below for details. 55 56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts, 57 both rounding and non-rounding variants): ditto 58 */ 59 60 /* This file implements the Memcheck instrumentation, and in 61 particular contains the core of its undefined value detection 62 machinery. For a comprehensive background of the terminology, 63 algorithms and rationale used herein, read: 64 65 Using Valgrind to detect undefined value errors with 66 bit-precision 67 68 Julian Seward and Nicholas Nethercote 69 70 2005 USENIX Annual Technical Conference (General Track), 71 Anaheim, CA, USA, April 10-15, 2005. 72 73 ---- 74 75 Here is as good a place as any to record exactly when V bits are and 76 should be checked, why, and what function is responsible. 77 78 79 Memcheck complains when an undefined value is used: 80 81 1. In the condition of a conditional branch. Because it could cause 82 incorrect control flow, and thus cause incorrect externally-visible 83 behaviour. [mc_translate.c:complainIfUndefined] 84 85 2. As an argument to a system call, or as the value that specifies 86 the system call number. Because it could cause an incorrect 87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read] 88 89 3. As the address in a load or store. Because it could cause an 90 incorrect value to be used later, which could cause externally-visible 91 behaviour (eg. via incorrect control flow or an incorrect system call 92 argument) [complainIfUndefined] 93 94 4. As the target address of a branch. Because it could cause incorrect 95 control flow. [complainIfUndefined] 96 97 5. As an argument to setenv, unsetenv, or putenv. Because it could put 98 an incorrect value into the external environment. 99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)] 100 101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn). 102 [complainIfUndefined] 103 104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and 105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user 106 requested it. [in memcheck.h] 107 108 109 Memcheck also complains, but should not, when an undefined value is used: 110 111 8. As the shift value in certain SIMD shift operations (but not in the 112 standard integer shift operations). This inconsistency is due to 113 historical reasons.) [complainIfUndefined] 114 115 116 Memcheck does not complain, but should, when an undefined value is used: 117 118 9. As an input to a client request. Because the client request may 119 affect the visible behaviour -- see bug #144362 for an example 120 involving the malloc replacements in vg_replace_malloc.c and 121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument 122 isn't identified. That bug report also has some info on how to solve 123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST] 124 125 126 In practice, 1 and 2 account for the vast majority of cases. 127 */ 128 129 /* Generation of addr-definedness, addr-validity and 130 guard-definedness checks pertaining to loads and stores (Iex_Load, 131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory 132 loads/stores) was re-checked 11 May 2013. */ 133 134 /*------------------------------------------------------------*/ 135 /*--- Forward decls ---*/ 136 /*------------------------------------------------------------*/ 137 138 struct _MCEnv; 139 140 static IRType shadowTypeV ( IRType ty ); 141 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 142 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig ); 143 144 static IRExpr *i128_const_zero(void); 145 146 /*------------------------------------------------------------*/ 147 /*--- Memcheck running state, and tmp management. ---*/ 148 /*------------------------------------------------------------*/ 149 150 /* Carries info about a particular tmp. The tmp's number is not 151 recorded, as this is implied by (equal to) its index in the tmpMap 152 in MCEnv. The tmp's type is also not recorded, as this is present 153 in MCEnv.sb->tyenv. 154 155 When .kind is Orig, .shadowV and .shadowB may give the identities 156 of the temps currently holding the associated definedness (shadowV) 157 and origin (shadowB) values, or these may be IRTemp_INVALID if code 158 to compute such values has not yet been emitted. 159 160 When .kind is VSh or BSh then the tmp is holds a V- or B- value, 161 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is 162 illogical for a shadow tmp itself to be shadowed. 163 */ 164 typedef 165 enum { Orig=1, VSh=2, BSh=3 } 166 TempKind; 167 168 typedef 169 struct { 170 TempKind kind; 171 IRTemp shadowV; 172 IRTemp shadowB; 173 } 174 TempMapEnt; 175 176 177 /* Carries around state during memcheck instrumentation. */ 178 typedef 179 struct _MCEnv { 180 /* MODIFIED: the superblock being constructed. IRStmts are 181 added. */ 182 IRSB* sb; 183 Bool trace; 184 185 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the 186 current kind and possibly shadow temps for each temp in the 187 IRSB being constructed. Note that it does not contain the 188 type of each tmp. If you want to know the type, look at the 189 relevant entry in sb->tyenv. It follows that at all times 190 during the instrumentation process, the valid indices for 191 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is 192 total number of Orig, V- and B- temps allocated so far. 193 194 The reason for this strange split (types in one place, all 195 other info in another) is that we need the types to be 196 attached to sb so as to make it possible to do 197 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the 198 instrumentation process. */ 199 XArray* /* of TempMapEnt */ tmpMap; 200 201 /* MODIFIED: indicates whether "bogus" literals have so far been 202 found. Starts off False, and may change to True. */ 203 Bool bogusLiterals; 204 205 /* READONLY: indicates whether we should use expensive 206 interpretations of integer adds, since unfortunately LLVM 207 uses them to do ORs in some circumstances. Defaulted to True 208 on MacOS and False everywhere else. */ 209 Bool useLLVMworkarounds; 210 211 /* READONLY: the guest layout. This indicates which parts of 212 the guest state should be regarded as 'always defined'. */ 213 const VexGuestLayout* layout; 214 215 /* READONLY: the host word type. Needed for constructing 216 arguments of type 'HWord' to be passed to helper functions. 217 Ity_I32 or Ity_I64 only. */ 218 IRType hWordTy; 219 } 220 MCEnv; 221 222 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 223 demand), as they are encountered. This is for two reasons. 224 225 (1) (less important reason): Many original tmps are unused due to 226 initial IR optimisation, and we do not want to spaces in tables 227 tracking them. 228 229 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 230 table indexed [0 .. n_types-1], which gives the current shadow for 231 each original tmp, or INVALID_IRTEMP if none is so far assigned. 232 It is necessary to support making multiple assignments to a shadow 233 -- specifically, after testing a shadow for definedness, it needs 234 to be made defined. But IR's SSA property disallows this. 235 236 (2) (more important reason): Therefore, when a shadow needs to get 237 a new value, a new temporary is created, the value is assigned to 238 that, and the tmpMap is updated to reflect the new binding. 239 240 A corollary is that if the tmpMap maps a given tmp to 241 IRTemp_INVALID and we are hoping to read that shadow tmp, it means 242 there's a read-before-write error in the original tmps. The IR 243 sanity checker should catch all such anomalies, however. 244 */ 245 246 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to 247 both the table in mce->sb and to our auxiliary mapping. Note that 248 newTemp may cause mce->tmpMap to resize, hence previous results 249 from VG_(indexXA)(mce->tmpMap) are invalidated. */ 250 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) 251 { 252 Word newIx; 253 TempMapEnt ent; 254 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); 255 ent.kind = kind; 256 ent.shadowV = IRTemp_INVALID; 257 ent.shadowB = IRTemp_INVALID; 258 newIx = VG_(addToXA)( mce->tmpMap, &ent ); 259 tl_assert(newIx == (Word)tmp); 260 return tmp; 261 } 262 263 264 /* Find the tmp currently shadowing the given original tmp. If none 265 so far exists, allocate one. */ 266 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) 267 { 268 TempMapEnt* ent; 269 /* VG_(indexXA) range-checks 'orig', hence no need to check 270 here. */ 271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 272 tl_assert(ent->kind == Orig); 273 if (ent->shadowV == IRTemp_INVALID) { 274 IRTemp tmpV 275 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 276 /* newTemp may cause mce->tmpMap to resize, hence previous results 277 from VG_(indexXA) are invalid. */ 278 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 279 tl_assert(ent->kind == Orig); 280 tl_assert(ent->shadowV == IRTemp_INVALID); 281 ent->shadowV = tmpV; 282 } 283 return ent->shadowV; 284 } 285 286 /* Allocate a new shadow for the given original tmp. This means any 287 previous shadow is abandoned. This is needed because it is 288 necessary to give a new value to a shadow once it has been tested 289 for undefinedness, but unfortunately IR's SSA property disallows 290 this. Instead we must abandon the old shadow, allocate a new one 291 and use that instead. 292 293 This is the same as findShadowTmpV, except we don't bother to see 294 if a shadow temp already existed -- we simply allocate a new one 295 regardless. */ 296 static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) 297 { 298 TempMapEnt* ent; 299 /* VG_(indexXA) range-checks 'orig', hence no need to check 300 here. */ 301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 302 tl_assert(ent->kind == Orig); 303 if (1) { 304 IRTemp tmpV 305 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); 306 /* newTemp may cause mce->tmpMap to resize, hence previous results 307 from VG_(indexXA) are invalid. */ 308 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 309 tl_assert(ent->kind == Orig); 310 ent->shadowV = tmpV; 311 } 312 } 313 314 315 /*------------------------------------------------------------*/ 316 /*--- IRAtoms -- a subset of IRExprs ---*/ 317 /*------------------------------------------------------------*/ 318 319 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 320 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 321 input, most of this code deals in atoms. Usefully, a value atom 322 always has a V-value which is also an atom: constants are shadowed 323 by constants, and temps are shadowed by the corresponding shadow 324 temporary. */ 325 326 typedef IRExpr IRAtom; 327 328 /* (used for sanity checks only): is this an atom which looks 329 like it's from original code? */ 330 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 331 { 332 if (a1->tag == Iex_Const) 333 return True; 334 if (a1->tag == Iex_RdTmp) { 335 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 336 return ent->kind == Orig; 337 } 338 return False; 339 } 340 341 /* (used for sanity checks only): is this an atom which looks 342 like it's from shadow code? */ 343 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 344 { 345 if (a1->tag == Iex_Const) 346 return True; 347 if (a1->tag == Iex_RdTmp) { 348 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); 349 return ent->kind == VSh || ent->kind == BSh; 350 } 351 return False; 352 } 353 354 /* (used for sanity checks only): check that both args are atoms and 355 are identically-kinded. */ 356 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 357 { 358 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 359 return True; 360 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 361 return True; 362 return False; 363 } 364 365 366 /*------------------------------------------------------------*/ 367 /*--- Type management ---*/ 368 /*------------------------------------------------------------*/ 369 370 /* Shadow state is always accessed using integer types. This returns 371 an integer type with the same size (as per sizeofIRType) as the 372 given type. The only valid shadow types are Bit, I8, I16, I32, 373 I64, I128, V128, V256. */ 374 375 static IRType shadowTypeV ( IRType ty ) 376 { 377 switch (ty) { 378 case Ity_I1: 379 case Ity_I8: 380 case Ity_I16: 381 case Ity_I32: 382 case Ity_I64: 383 case Ity_I128: return ty; 384 case Ity_F16: return Ity_I16; 385 case Ity_F32: return Ity_I32; 386 case Ity_D32: return Ity_I32; 387 case Ity_F64: return Ity_I64; 388 case Ity_D64: return Ity_I64; 389 case Ity_F128: return Ity_I128; 390 case Ity_D128: return Ity_I128; 391 case Ity_V128: return Ity_V128; 392 case Ity_V256: return Ity_V256; 393 default: ppIRType(ty); 394 VG_(tool_panic)("memcheck:shadowTypeV"); 395 } 396 } 397 398 /* Produce a 'defined' value of the given shadow type. Should only be 399 supplied shadow types (Bit/I8/I16/I32/UI64). */ 400 static IRExpr* definedOfType ( IRType ty ) { 401 switch (ty) { 402 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 403 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 404 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 405 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 406 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 407 case Ity_I128: return i128_const_zero(); 408 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 409 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000)); 410 default: VG_(tool_panic)("memcheck:definedOfType"); 411 } 412 } 413 414 415 /*------------------------------------------------------------*/ 416 /*--- Constructing IR fragments ---*/ 417 /*------------------------------------------------------------*/ 418 419 /* add stmt to a bb */ 420 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { 421 if (mce->trace) { 422 VG_(printf)(" %c: ", cat); 423 ppIRStmt(st); 424 VG_(printf)("\n"); 425 } 426 addStmtToIRSB(mce->sb, st); 427 } 428 429 /* assign value to tmp */ 430 static inline 431 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { 432 stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); 433 } 434 435 /* build various kinds of expressions */ 436 #define triop(_op, _arg1, _arg2, _arg3) \ 437 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) 438 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 439 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 440 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n)) 441 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 442 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 443 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 444 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 445 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 446 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 447 448 /* Bind the given expression to a new temporary, and return the 449 temporary. This effectively converts an arbitrary expression into 450 an atom. 451 452 'ty' is the type of 'e' and hence the type that the new temporary 453 needs to be. But passing it in is redundant, since we can deduce 454 the type merely by inspecting 'e'. So at least use that fact to 455 assert that the two types agree. */ 456 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) 457 { 458 TempKind k; 459 IRTemp t; 460 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); 461 462 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ 463 switch (cat) { 464 case 'V': k = VSh; break; 465 case 'B': k = BSh; break; 466 case 'C': k = Orig; break; 467 /* happens when we are making up new "orig" 468 expressions, for IRCAS handling */ 469 default: tl_assert(0); 470 } 471 t = newTemp(mce, ty, k); 472 assign(cat, mce, t, e); 473 return mkexpr(t); 474 } 475 476 477 /*------------------------------------------------------------*/ 478 /*--- Helper functions for 128-bit ops ---*/ 479 /*------------------------------------------------------------*/ 480 481 static IRExpr *i128_const_zero(void) 482 { 483 IRAtom* z64 = IRExpr_Const(IRConst_U64(0)); 484 return binop(Iop_64HLto128, z64, z64); 485 } 486 487 /* There are no I128-bit loads and/or stores [as generated by any 488 current front ends]. So we do not need to worry about that in 489 expr2vbits_Load */ 490 491 492 /*------------------------------------------------------------*/ 493 /*--- Constructing definedness primitive ops ---*/ 494 /*------------------------------------------------------------*/ 495 496 /* --------- Defined-if-either-defined --------- */ 497 498 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 499 tl_assert(isShadowAtom(mce,a1)); 500 tl_assert(isShadowAtom(mce,a2)); 501 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2)); 502 } 503 504 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 505 tl_assert(isShadowAtom(mce,a1)); 506 tl_assert(isShadowAtom(mce,a2)); 507 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2)); 508 } 509 510 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 511 tl_assert(isShadowAtom(mce,a1)); 512 tl_assert(isShadowAtom(mce,a2)); 513 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2)); 514 } 515 516 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 517 tl_assert(isShadowAtom(mce,a1)); 518 tl_assert(isShadowAtom(mce,a2)); 519 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2)); 520 } 521 522 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 523 tl_assert(isShadowAtom(mce,a1)); 524 tl_assert(isShadowAtom(mce,a2)); 525 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 526 } 527 528 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 529 tl_assert(isShadowAtom(mce,a1)); 530 tl_assert(isShadowAtom(mce,a2)); 531 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2)); 532 } 533 534 /* --------- Undefined-if-either-undefined --------- */ 535 536 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 537 tl_assert(isShadowAtom(mce,a1)); 538 tl_assert(isShadowAtom(mce,a2)); 539 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2)); 540 } 541 542 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 543 tl_assert(isShadowAtom(mce,a1)); 544 tl_assert(isShadowAtom(mce,a2)); 545 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2)); 546 } 547 548 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 549 tl_assert(isShadowAtom(mce,a1)); 550 tl_assert(isShadowAtom(mce,a2)); 551 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2)); 552 } 553 554 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 555 tl_assert(isShadowAtom(mce,a1)); 556 tl_assert(isShadowAtom(mce,a2)); 557 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2)); 558 } 559 560 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 561 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6; 562 tl_assert(isShadowAtom(mce,a1)); 563 tl_assert(isShadowAtom(mce,a2)); 564 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1)); 565 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1)); 566 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2)); 567 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2)); 568 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3)); 569 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4)); 570 571 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5)); 572 } 573 574 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 575 tl_assert(isShadowAtom(mce,a1)); 576 tl_assert(isShadowAtom(mce,a2)); 577 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 578 } 579 580 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 581 tl_assert(isShadowAtom(mce,a1)); 582 tl_assert(isShadowAtom(mce,a2)); 583 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2)); 584 } 585 586 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 587 switch (vty) { 588 case Ity_I8: return mkUifU8(mce, a1, a2); 589 case Ity_I16: return mkUifU16(mce, a1, a2); 590 case Ity_I32: return mkUifU32(mce, a1, a2); 591 case Ity_I64: return mkUifU64(mce, a1, a2); 592 case Ity_I128: return mkUifU128(mce, a1, a2); 593 case Ity_V128: return mkUifUV128(mce, a1, a2); 594 case Ity_V256: return mkUifUV256(mce, a1, a2); 595 default: 596 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 597 VG_(tool_panic)("memcheck:mkUifU"); 598 } 599 } 600 601 /* --------- The Left-family of operations. --------- */ 602 603 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 604 tl_assert(isShadowAtom(mce,a1)); 605 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1)); 606 } 607 608 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 609 tl_assert(isShadowAtom(mce,a1)); 610 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1)); 611 } 612 613 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 614 tl_assert(isShadowAtom(mce,a1)); 615 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1)); 616 } 617 618 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 619 tl_assert(isShadowAtom(mce,a1)); 620 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1)); 621 } 622 623 /* --------- 'Improvement' functions for AND/OR. --------- */ 624 625 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 626 defined (0); all other -> undefined (1). 627 */ 628 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 629 { 630 tl_assert(isOriginalAtom(mce, data)); 631 tl_assert(isShadowAtom(mce, vbits)); 632 tl_assert(sameKindedAtoms(data, vbits)); 633 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits)); 634 } 635 636 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 637 { 638 tl_assert(isOriginalAtom(mce, data)); 639 tl_assert(isShadowAtom(mce, vbits)); 640 tl_assert(sameKindedAtoms(data, vbits)); 641 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits)); 642 } 643 644 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 645 { 646 tl_assert(isOriginalAtom(mce, data)); 647 tl_assert(isShadowAtom(mce, vbits)); 648 tl_assert(sameKindedAtoms(data, vbits)); 649 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits)); 650 } 651 652 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 653 { 654 tl_assert(isOriginalAtom(mce, data)); 655 tl_assert(isShadowAtom(mce, vbits)); 656 tl_assert(sameKindedAtoms(data, vbits)); 657 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits)); 658 } 659 660 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 661 { 662 tl_assert(isOriginalAtom(mce, data)); 663 tl_assert(isShadowAtom(mce, vbits)); 664 tl_assert(sameKindedAtoms(data, vbits)); 665 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 666 } 667 668 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 669 { 670 tl_assert(isOriginalAtom(mce, data)); 671 tl_assert(isShadowAtom(mce, vbits)); 672 tl_assert(sameKindedAtoms(data, vbits)); 673 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits)); 674 } 675 676 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 677 defined (0); all other -> undefined (1). 678 */ 679 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 680 { 681 tl_assert(isOriginalAtom(mce, data)); 682 tl_assert(isShadowAtom(mce, vbits)); 683 tl_assert(sameKindedAtoms(data, vbits)); 684 return assignNew( 685 'V', mce, Ity_I8, 686 binop(Iop_Or8, 687 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)), 688 vbits) ); 689 } 690 691 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 692 { 693 tl_assert(isOriginalAtom(mce, data)); 694 tl_assert(isShadowAtom(mce, vbits)); 695 tl_assert(sameKindedAtoms(data, vbits)); 696 return assignNew( 697 'V', mce, Ity_I16, 698 binop(Iop_Or16, 699 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)), 700 vbits) ); 701 } 702 703 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 704 { 705 tl_assert(isOriginalAtom(mce, data)); 706 tl_assert(isShadowAtom(mce, vbits)); 707 tl_assert(sameKindedAtoms(data, vbits)); 708 return assignNew( 709 'V', mce, Ity_I32, 710 binop(Iop_Or32, 711 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)), 712 vbits) ); 713 } 714 715 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 716 { 717 tl_assert(isOriginalAtom(mce, data)); 718 tl_assert(isShadowAtom(mce, vbits)); 719 tl_assert(sameKindedAtoms(data, vbits)); 720 return assignNew( 721 'V', mce, Ity_I64, 722 binop(Iop_Or64, 723 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)), 724 vbits) ); 725 } 726 727 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 728 { 729 tl_assert(isOriginalAtom(mce, data)); 730 tl_assert(isShadowAtom(mce, vbits)); 731 tl_assert(sameKindedAtoms(data, vbits)); 732 return assignNew( 733 'V', mce, Ity_V128, 734 binop(Iop_OrV128, 735 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)), 736 vbits) ); 737 } 738 739 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 740 { 741 tl_assert(isOriginalAtom(mce, data)); 742 tl_assert(isShadowAtom(mce, vbits)); 743 tl_assert(sameKindedAtoms(data, vbits)); 744 return assignNew( 745 'V', mce, Ity_V256, 746 binop(Iop_OrV256, 747 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)), 748 vbits) ); 749 } 750 751 /* --------- Pessimising casts. --------- */ 752 753 /* The function returns an expression of type DST_TY. If any of the VBITS 754 is undefined (value == 1) the resulting expression has all bits set to 755 1. Otherwise, all bits are 0. */ 756 757 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 758 { 759 IRType src_ty; 760 IRAtom* tmp1; 761 762 /* Note, dst_ty is a shadow type, not an original type. */ 763 tl_assert(isShadowAtom(mce,vbits)); 764 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); 765 766 /* Fast-track some common cases */ 767 if (src_ty == Ity_I32 && dst_ty == Ity_I32) 768 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 769 770 if (src_ty == Ity_I64 && dst_ty == Ity_I64) 771 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 772 773 if (src_ty == Ity_I32 && dst_ty == Ity_I64) { 774 /* PCast the arg, then clone it. */ 775 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 776 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 777 } 778 779 if (src_ty == Ity_I32 && dst_ty == Ity_V128) { 780 /* PCast the arg, then clone it 4 times. */ 781 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 782 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 783 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); 784 } 785 786 if (src_ty == Ity_I32 && dst_ty == Ity_V256) { 787 /* PCast the arg, then clone it 8 times. */ 788 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); 789 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); 790 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); 791 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp)); 792 } 793 794 if (src_ty == Ity_I64 && dst_ty == Ity_I32) { 795 /* PCast the arg. This gives all 0s or all 1s. Then throw away 796 the top half. */ 797 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits)); 798 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp)); 799 } 800 801 if (src_ty == Ity_V128 && dst_ty == Ity_I64) { 802 /* Use InterleaveHI64x2 to copy the top half of the vector into 803 the bottom half. Then we can UifU it with the original, throw 804 away the upper half of the result, and PCast-I64-to-I64 805 the lower half. */ 806 // Generates vbits[127:64] : vbits[127:64] 807 IRAtom* hi64hi64 808 = assignNew('V', mce, Ity_V128, 809 binop(Iop_InterleaveHI64x2, vbits, vbits)); 810 // Generates 811 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0]) 812 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0]) 813 IRAtom* lohi64 814 = mkUifUV128(mce, hi64hi64, vbits); 815 // Generates UifU(vbits[127:64],vbits[63:0]) 816 IRAtom* lo64 817 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64)); 818 // Generates 819 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] ) 820 // == PCast-to-I64( vbits[127:0] ) 821 IRAtom* res 822 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64)); 823 return res; 824 } 825 826 /* Else do it the slow way .. */ 827 /* First of all, collapse vbits down to a single bit. */ 828 tmp1 = NULL; 829 switch (src_ty) { 830 case Ity_I1: 831 tmp1 = vbits; 832 break; 833 case Ity_I8: 834 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 835 break; 836 case Ity_I16: 837 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 838 break; 839 case Ity_I32: 840 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 841 break; 842 case Ity_I64: 843 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 844 break; 845 case Ity_I128: { 846 /* Gah. Chop it in half, OR the halves together, and compare 847 that with zero. */ 848 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits)); 849 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits)); 850 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 851 tmp1 = assignNew('V', mce, Ity_I1, 852 unop(Iop_CmpNEZ64, tmp4)); 853 break; 854 } 855 default: 856 ppIRType(src_ty); 857 VG_(tool_panic)("mkPCastTo(1)"); 858 } 859 tl_assert(tmp1); 860 /* Now widen up to the dst type. */ 861 switch (dst_ty) { 862 case Ity_I1: 863 return tmp1; 864 case Ity_I8: 865 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 866 case Ity_I16: 867 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 868 case Ity_I32: 869 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 870 case Ity_I64: 871 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 872 case Ity_V128: 873 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 874 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 875 return tmp1; 876 case Ity_I128: 877 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 878 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 879 return tmp1; 880 case Ity_V256: 881 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 882 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, 883 tmp1, tmp1)); 884 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, 885 tmp1, tmp1)); 886 return tmp1; 887 default: 888 ppIRType(dst_ty); 889 VG_(tool_panic)("mkPCastTo(2)"); 890 } 891 } 892 893 /* This is a minor variant. It takes an arg of some type and returns 894 a value of the same type. The result consists entirely of Defined 895 (zero) bits except its least significant bit, which is a PCast of 896 the entire argument down to a single bit. */ 897 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty ) 898 { 899 if (ty == Ity_V128) { 900 /* --- Case for V128 --- */ 901 IRAtom* varg128 = varg; 902 // generates: PCast-to-I64(varg128) 903 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128); 904 // Now introduce zeros (defined bits) in the top 63 places 905 // generates: Def--(63)--Def PCast-to-I1(varg128) 906 IRAtom* d63pc 907 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1))); 908 // generates: Def--(64)--Def 909 IRAtom* d64 910 = definedOfType(Ity_I64); 911 // generates: Def--(127)--Def PCast-to-I1(varg128) 912 IRAtom* res 913 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc)); 914 return res; 915 } 916 if (ty == Ity_I64) { 917 /* --- Case for I64 --- */ 918 // PCast to 64 919 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg); 920 // Zero (Def) out the top 63 bits 921 IRAtom* res 922 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1))); 923 return res; 924 } 925 /*NOTREACHED*/ 926 tl_assert(0); 927 } 928 929 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 930 /* 931 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 932 PCasting to Ity_U1. However, sometimes it is necessary to be more 933 accurate. The insight is that the result is defined if two 934 corresponding bits can be found, one from each argument, so that 935 both bits are defined but are different -- that makes EQ say "No" 936 and NE say "Yes". Hence, we compute an improvement term and DifD 937 it onto the "normal" (UifU) result. 938 939 The result is: 940 941 PCastTo<1> ( 942 -- naive version 943 PCastTo<sz>( UifU<sz>(vxx, vyy) ) 944 945 `DifD<sz>` 946 947 -- improvement term 948 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) ) 949 ) 950 951 where 952 vec contains 0 (defined) bits where the corresponding arg bits 953 are defined but different, and 1 bits otherwise. 954 955 vec = Or<sz>( vxx, // 0 iff bit defined 956 vyy, // 0 iff bit defined 957 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different 958 ) 959 960 If any bit of vec is 0, the result is defined and so the 961 improvement term should produce 0...0, else it should produce 962 1...1. 963 964 Hence require for the improvement term: 965 966 if vec == 1...1 then 1...1 else 0...0 967 -> 968 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) 969 970 This was extensively re-analysed and checked on 6 July 05. 971 */ 972 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 973 IRType ty, 974 IRAtom* vxx, IRAtom* vyy, 975 IRAtom* xx, IRAtom* yy ) 976 { 977 IRAtom *naive, *vec, *improvement_term; 978 IRAtom *improved, *final_cast, *top; 979 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR; 980 981 tl_assert(isShadowAtom(mce,vxx)); 982 tl_assert(isShadowAtom(mce,vyy)); 983 tl_assert(isOriginalAtom(mce,xx)); 984 tl_assert(isOriginalAtom(mce,yy)); 985 tl_assert(sameKindedAtoms(vxx,xx)); 986 tl_assert(sameKindedAtoms(vyy,yy)); 987 988 switch (ty) { 989 case Ity_I16: 990 opOR = Iop_Or16; 991 opDIFD = Iop_And16; 992 opUIFU = Iop_Or16; 993 opNOT = Iop_Not16; 994 opXOR = Iop_Xor16; 995 opCMP = Iop_CmpEQ16; 996 top = mkU16(0xFFFF); 997 break; 998 case Ity_I32: 999 opOR = Iop_Or32; 1000 opDIFD = Iop_And32; 1001 opUIFU = Iop_Or32; 1002 opNOT = Iop_Not32; 1003 opXOR = Iop_Xor32; 1004 opCMP = Iop_CmpEQ32; 1005 top = mkU32(0xFFFFFFFF); 1006 break; 1007 case Ity_I64: 1008 opOR = Iop_Or64; 1009 opDIFD = Iop_And64; 1010 opUIFU = Iop_Or64; 1011 opNOT = Iop_Not64; 1012 opXOR = Iop_Xor64; 1013 opCMP = Iop_CmpEQ64; 1014 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 1015 break; 1016 default: 1017 VG_(tool_panic)("expensiveCmpEQorNE"); 1018 } 1019 1020 naive 1021 = mkPCastTo(mce,ty, 1022 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy))); 1023 1024 vec 1025 = assignNew( 1026 'V', mce,ty, 1027 binop( opOR, 1028 assignNew('V', mce,ty, binop(opOR, vxx, vyy)), 1029 assignNew( 1030 'V', mce,ty, 1031 unop( opNOT, 1032 assignNew('V', mce,ty, binop(opXOR, xx, yy)))))); 1033 1034 improvement_term 1035 = mkPCastTo( mce,ty, 1036 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top))); 1037 1038 improved 1039 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) ); 1040 1041 final_cast 1042 = mkPCastTo( mce, Ity_I1, improved ); 1043 1044 return final_cast; 1045 } 1046 1047 1048 /* --------- Semi-accurate interpretation of CmpORD. --------- */ 1049 1050 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons: 1051 1052 CmpORD32S(x,y) = 1<<3 if x <s y 1053 = 1<<2 if x >s y 1054 = 1<<1 if x == y 1055 1056 and similarly the unsigned variant. The default interpretation is: 1057 1058 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#) 1059 & (7<<1) 1060 1061 The "& (7<<1)" reflects the fact that all result bits except 3,2,1 1062 are zero and therefore defined (viz, zero). 1063 1064 Also deal with a special case better: 1065 1066 CmpORD32S(x,0) 1067 1068 Here, bit 3 (LT) of the result is a copy of the top bit of x and 1069 will be defined even if the rest of x isn't. In which case we do: 1070 1071 CmpORD32S#(x,x#,0,{impliedly 0}#) 1072 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ# 1073 | (x# >>u 31) << 3 -- LT# = x#[31] 1074 1075 Analogous handling for CmpORD64{S,U}. 1076 */ 1077 static Bool isZeroU32 ( IRAtom* e ) 1078 { 1079 return 1080 toBool( e->tag == Iex_Const 1081 && e->Iex.Const.con->tag == Ico_U32 1082 && e->Iex.Const.con->Ico.U32 == 0 ); 1083 } 1084 1085 static Bool isZeroU64 ( IRAtom* e ) 1086 { 1087 return 1088 toBool( e->tag == Iex_Const 1089 && e->Iex.Const.con->tag == Ico_U64 1090 && e->Iex.Const.con->Ico.U64 == 0 ); 1091 } 1092 1093 static IRAtom* doCmpORD ( MCEnv* mce, 1094 IROp cmp_op, 1095 IRAtom* xxhash, IRAtom* yyhash, 1096 IRAtom* xx, IRAtom* yy ) 1097 { 1098 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U; 1099 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S; 1100 IROp opOR = m64 ? Iop_Or64 : Iop_Or32; 1101 IROp opAND = m64 ? Iop_And64 : Iop_And32; 1102 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32; 1103 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32; 1104 IRType ty = m64 ? Ity_I64 : Ity_I32; 1105 Int width = m64 ? 64 : 32; 1106 1107 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32; 1108 1109 IRAtom* threeLeft1 = NULL; 1110 IRAtom* sevenLeft1 = NULL; 1111 1112 tl_assert(isShadowAtom(mce,xxhash)); 1113 tl_assert(isShadowAtom(mce,yyhash)); 1114 tl_assert(isOriginalAtom(mce,xx)); 1115 tl_assert(isOriginalAtom(mce,yy)); 1116 tl_assert(sameKindedAtoms(xxhash,xx)); 1117 tl_assert(sameKindedAtoms(yyhash,yy)); 1118 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U 1119 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U); 1120 1121 if (0) { 1122 ppIROp(cmp_op); VG_(printf)(" "); 1123 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n"); 1124 } 1125 1126 if (syned && isZero(yy)) { 1127 /* fancy interpretation */ 1128 /* if yy is zero, then it must be fully defined (zero#). */ 1129 tl_assert(isZero(yyhash)); 1130 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1); 1131 return 1132 binop( 1133 opOR, 1134 assignNew( 1135 'V', mce,ty, 1136 binop( 1137 opAND, 1138 mkPCastTo(mce,ty, xxhash), 1139 threeLeft1 1140 )), 1141 assignNew( 1142 'V', mce,ty, 1143 binop( 1144 opSHL, 1145 assignNew( 1146 'V', mce,ty, 1147 binop(opSHR, xxhash, mkU8(width-1))), 1148 mkU8(3) 1149 )) 1150 ); 1151 } else { 1152 /* standard interpretation */ 1153 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1); 1154 return 1155 binop( 1156 opAND, 1157 mkPCastTo( mce,ty, 1158 mkUifU(mce,ty, xxhash,yyhash)), 1159 sevenLeft1 1160 ); 1161 } 1162 } 1163 1164 1165 /*------------------------------------------------------------*/ 1166 /*--- Emit a test and complaint if something is undefined. ---*/ 1167 /*------------------------------------------------------------*/ 1168 1169 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ 1170 1171 1172 /* Set the annotations on a dirty helper to indicate that the stack 1173 pointer and instruction pointers might be read. This is the 1174 behaviour of all 'emit-a-complaint' style functions we might 1175 call. */ 1176 1177 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1178 di->nFxState = 2; 1179 di->fxState[0].fx = Ifx_Read; 1180 di->fxState[0].offset = mce->layout->offset_SP; 1181 di->fxState[0].size = mce->layout->sizeof_SP; 1182 di->fxState[0].nRepeats = 0; 1183 di->fxState[0].repeatLen = 0; 1184 di->fxState[1].fx = Ifx_Read; 1185 di->fxState[1].offset = mce->layout->offset_IP; 1186 di->fxState[1].size = mce->layout->sizeof_IP; 1187 di->fxState[1].nRepeats = 0; 1188 di->fxState[1].repeatLen = 0; 1189 } 1190 1191 1192 /* Check the supplied *original* |atom| for undefinedness, and emit a 1193 complaint if so. Once that happens, mark it as defined. This is 1194 possible because the atom is either a tmp or literal. If it's a 1195 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1196 be defined. In fact as mentioned above, we will have to allocate a 1197 new tmp to carry the new 'defined' shadow value, and update the 1198 original->tmp mapping accordingly; we cannot simply assign a new 1199 value to an existing shadow tmp as this breaks SSAness. 1200 1201 The checks are performed, any resulting complaint emitted, and 1202 |atom|'s shadow temp set to 'defined', ONLY in the case that 1203 |guard| evaluates to True at run-time. If it evaluates to False 1204 then no action is performed. If |guard| is NULL (the usual case) 1205 then it is assumed to be always-true, and hence these actions are 1206 performed unconditionally. 1207 1208 This routine does not generate code to check the definedness of 1209 |guard|. The caller is assumed to have taken care of that already. 1210 */ 1211 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) 1212 { 1213 IRAtom* vatom; 1214 IRType ty; 1215 Int sz; 1216 IRDirty* di; 1217 IRAtom* cond; 1218 IRAtom* origin; 1219 void* fn; 1220 const HChar* nm; 1221 IRExpr** args; 1222 Int nargs; 1223 1224 // Don't do V bit tests if we're not reporting undefined value errors. 1225 if (MC_(clo_mc_level) == 1) 1226 return; 1227 1228 if (guard) 1229 tl_assert(isOriginalAtom(mce, guard)); 1230 1231 /* Since the original expression is atomic, there's no duplicated 1232 work generated by making multiple V-expressions for it. So we 1233 don't really care about the possibility that someone else may 1234 also create a V-interpretion for it. */ 1235 tl_assert(isOriginalAtom(mce, atom)); 1236 vatom = expr2vbits( mce, atom ); 1237 tl_assert(isShadowAtom(mce, vatom)); 1238 tl_assert(sameKindedAtoms(atom, vatom)); 1239 1240 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1241 1242 /* sz is only used for constructing the error message */ 1243 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1244 1245 cond = mkPCastTo( mce, Ity_I1, vatom ); 1246 /* cond will be 0 if all defined, and 1 if any not defined. */ 1247 1248 /* Get the origin info for the value we are about to check. At 1249 least, if we are doing origin tracking. If not, use a dummy 1250 zero origin. */ 1251 if (MC_(clo_mc_level) == 3) { 1252 origin = schemeE( mce, atom ); 1253 if (mce->hWordTy == Ity_I64) { 1254 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); 1255 } 1256 } else { 1257 origin = NULL; 1258 } 1259 1260 fn = NULL; 1261 nm = NULL; 1262 args = NULL; 1263 nargs = -1; 1264 1265 switch (sz) { 1266 case 0: 1267 if (origin) { 1268 fn = &MC_(helperc_value_check0_fail_w_o); 1269 nm = "MC_(helperc_value_check0_fail_w_o)"; 1270 args = mkIRExprVec_1(origin); 1271 nargs = 1; 1272 } else { 1273 fn = &MC_(helperc_value_check0_fail_no_o); 1274 nm = "MC_(helperc_value_check0_fail_no_o)"; 1275 args = mkIRExprVec_0(); 1276 nargs = 0; 1277 } 1278 break; 1279 case 1: 1280 if (origin) { 1281 fn = &MC_(helperc_value_check1_fail_w_o); 1282 nm = "MC_(helperc_value_check1_fail_w_o)"; 1283 args = mkIRExprVec_1(origin); 1284 nargs = 1; 1285 } else { 1286 fn = &MC_(helperc_value_check1_fail_no_o); 1287 nm = "MC_(helperc_value_check1_fail_no_o)"; 1288 args = mkIRExprVec_0(); 1289 nargs = 0; 1290 } 1291 break; 1292 case 4: 1293 if (origin) { 1294 fn = &MC_(helperc_value_check4_fail_w_o); 1295 nm = "MC_(helperc_value_check4_fail_w_o)"; 1296 args = mkIRExprVec_1(origin); 1297 nargs = 1; 1298 } else { 1299 fn = &MC_(helperc_value_check4_fail_no_o); 1300 nm = "MC_(helperc_value_check4_fail_no_o)"; 1301 args = mkIRExprVec_0(); 1302 nargs = 0; 1303 } 1304 break; 1305 case 8: 1306 if (origin) { 1307 fn = &MC_(helperc_value_check8_fail_w_o); 1308 nm = "MC_(helperc_value_check8_fail_w_o)"; 1309 args = mkIRExprVec_1(origin); 1310 nargs = 1; 1311 } else { 1312 fn = &MC_(helperc_value_check8_fail_no_o); 1313 nm = "MC_(helperc_value_check8_fail_no_o)"; 1314 args = mkIRExprVec_0(); 1315 nargs = 0; 1316 } 1317 break; 1318 case 2: 1319 case 16: 1320 if (origin) { 1321 fn = &MC_(helperc_value_checkN_fail_w_o); 1322 nm = "MC_(helperc_value_checkN_fail_w_o)"; 1323 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin); 1324 nargs = 2; 1325 } else { 1326 fn = &MC_(helperc_value_checkN_fail_no_o); 1327 nm = "MC_(helperc_value_checkN_fail_no_o)"; 1328 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) ); 1329 nargs = 1; 1330 } 1331 break; 1332 default: 1333 VG_(tool_panic)("unexpected szB"); 1334 } 1335 1336 tl_assert(fn); 1337 tl_assert(nm); 1338 tl_assert(args); 1339 tl_assert(nargs >= 0 && nargs <= 2); 1340 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL) 1341 || (MC_(clo_mc_level) == 2 && origin == NULL) ); 1342 1343 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm, 1344 VG_(fnptr_to_fnentry)( fn ), args ); 1345 di->guard = cond; // and cond is PCast-to-1(atom#) 1346 1347 /* If the complaint is to be issued under a guard condition, AND 1348 that into the guard condition for the helper call. */ 1349 if (guard) { 1350 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard)); 1351 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard)); 1352 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2)); 1353 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e)); 1354 } 1355 1356 setHelperAnns( mce, di ); 1357 stmt( 'V', mce, IRStmt_Dirty(di)); 1358 1359 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be 1360 defined -- but only in the case where the guard evaluates to 1361 True at run-time. Do the update by setting the orig->shadow 1362 mapping for tmp to reflect the fact that this shadow is getting 1363 a new value. */ 1364 tl_assert(isIRAtom(vatom)); 1365 /* sameKindedAtoms ... */ 1366 if (vatom->tag == Iex_RdTmp) { 1367 tl_assert(atom->tag == Iex_RdTmp); 1368 if (guard == NULL) { 1369 // guard is 'always True', hence update unconditionally 1370 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1371 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), 1372 definedOfType(ty)); 1373 } else { 1374 // update the temp only conditionally. Do this by copying 1375 // its old value when the guard is False. 1376 // The old value .. 1377 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1378 newShadowTmpV(mce, atom->Iex.RdTmp.tmp); 1379 IRAtom* new_tmpV 1380 = assignNew('V', mce, shadowTypeV(ty), 1381 IRExpr_ITE(guard, definedOfType(ty), 1382 mkexpr(old_tmpV))); 1383 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV); 1384 } 1385 } 1386 } 1387 1388 1389 /*------------------------------------------------------------*/ 1390 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1391 /*------------------------------------------------------------*/ 1392 1393 /* Examine the always-defined sections declared in layout to see if 1394 the (offset,size) section is within one. Note, is is an error to 1395 partially fall into such a region: (offset,size) should either be 1396 completely in such a region or completely not-in such a region. 1397 */ 1398 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1399 { 1400 Int minoffD, maxoffD, i; 1401 Int minoff = offset; 1402 Int maxoff = minoff + size - 1; 1403 tl_assert((minoff & ~0xFFFF) == 0); 1404 tl_assert((maxoff & ~0xFFFF) == 0); 1405 1406 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1407 minoffD = mce->layout->alwaysDefd[i].offset; 1408 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1409 tl_assert((minoffD & ~0xFFFF) == 0); 1410 tl_assert((maxoffD & ~0xFFFF) == 0); 1411 1412 if (maxoff < minoffD || maxoffD < minoff) 1413 continue; /* no overlap */ 1414 if (minoff >= minoffD && maxoff <= maxoffD) 1415 return True; /* completely contained in an always-defd section */ 1416 1417 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1418 } 1419 return False; /* could not find any containing section */ 1420 } 1421 1422 1423 /* Generate into bb suitable actions to shadow this Put. If the state 1424 slice is marked 'always defined', do nothing. Otherwise, write the 1425 supplied V bits to the shadow state. We can pass in either an 1426 original atom or a V-atom, but not both. In the former case the 1427 relevant V-bits are then generated from the original. 1428 We assume here, that the definedness of GUARD has already been checked. 1429 */ 1430 static 1431 void do_shadow_PUT ( MCEnv* mce, Int offset, 1432 IRAtom* atom, IRAtom* vatom, IRExpr *guard ) 1433 { 1434 IRType ty; 1435 1436 // Don't do shadow PUTs if we're not doing undefined value checking. 1437 // Their absence lets Vex's optimiser remove all the shadow computation 1438 // that they depend on, which includes GETs of the shadow registers. 1439 if (MC_(clo_mc_level) == 1) 1440 return; 1441 1442 if (atom) { 1443 tl_assert(!vatom); 1444 tl_assert(isOriginalAtom(mce, atom)); 1445 vatom = expr2vbits( mce, atom ); 1446 } else { 1447 tl_assert(vatom); 1448 tl_assert(isShadowAtom(mce, vatom)); 1449 } 1450 1451 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 1452 tl_assert(ty != Ity_I1); 1453 tl_assert(ty != Ity_I128); 1454 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1455 /* later: no ... */ 1456 /* emit code to emit a complaint if any of the vbits are 1. */ 1457 /* complainIfUndefined(mce, atom); */ 1458 } else { 1459 /* Do a plain shadow Put. */ 1460 if (guard) { 1461 /* If the guard expression evaluates to false we simply Put the value 1462 that is already stored in the guest state slot */ 1463 IRAtom *cond, *iffalse; 1464 1465 cond = assignNew('V', mce, Ity_I1, guard); 1466 iffalse = assignNew('V', mce, ty, 1467 IRExpr_Get(offset + mce->layout->total_sizeB, ty)); 1468 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse)); 1469 } 1470 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom )); 1471 } 1472 } 1473 1474 1475 /* Return an expression which contains the V bits corresponding to the 1476 given GETI (passed in in pieces). 1477 */ 1478 static 1479 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti) 1480 { 1481 IRAtom* vatom; 1482 IRType ty, tyS; 1483 Int arrSize;; 1484 IRRegArray* descr = puti->descr; 1485 IRAtom* ix = puti->ix; 1486 Int bias = puti->bias; 1487 IRAtom* atom = puti->data; 1488 1489 // Don't do shadow PUTIs if we're not doing undefined value checking. 1490 // Their absence lets Vex's optimiser remove all the shadow computation 1491 // that they depend on, which includes GETIs of the shadow registers. 1492 if (MC_(clo_mc_level) == 1) 1493 return; 1494 1495 tl_assert(isOriginalAtom(mce,atom)); 1496 vatom = expr2vbits( mce, atom ); 1497 tl_assert(sameKindedAtoms(atom, vatom)); 1498 ty = descr->elemTy; 1499 tyS = shadowTypeV(ty); 1500 arrSize = descr->nElems * sizeofIRType(ty); 1501 tl_assert(ty != Ity_I1); 1502 tl_assert(isOriginalAtom(mce,ix)); 1503 complainIfUndefined(mce, ix, NULL); 1504 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1505 /* later: no ... */ 1506 /* emit code to emit a complaint if any of the vbits are 1. */ 1507 /* complainIfUndefined(mce, atom); */ 1508 } else { 1509 /* Do a cloned version of the Put that refers to the shadow 1510 area. */ 1511 IRRegArray* new_descr 1512 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1513 tyS, descr->nElems); 1514 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) )); 1515 } 1516 } 1517 1518 1519 /* Return an expression which contains the V bits corresponding to the 1520 given GET (passed in in pieces). 1521 */ 1522 static 1523 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1524 { 1525 IRType tyS = shadowTypeV(ty); 1526 tl_assert(ty != Ity_I1); 1527 tl_assert(ty != Ity_I128); 1528 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1529 /* Always defined, return all zeroes of the relevant type */ 1530 return definedOfType(tyS); 1531 } else { 1532 /* return a cloned version of the Get that refers to the shadow 1533 area. */ 1534 /* FIXME: this isn't an atom! */ 1535 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1536 } 1537 } 1538 1539 1540 /* Return an expression which contains the V bits corresponding to the 1541 given GETI (passed in in pieces). 1542 */ 1543 static 1544 IRExpr* shadow_GETI ( MCEnv* mce, 1545 IRRegArray* descr, IRAtom* ix, Int bias ) 1546 { 1547 IRType ty = descr->elemTy; 1548 IRType tyS = shadowTypeV(ty); 1549 Int arrSize = descr->nElems * sizeofIRType(ty); 1550 tl_assert(ty != Ity_I1); 1551 tl_assert(isOriginalAtom(mce,ix)); 1552 complainIfUndefined(mce, ix, NULL); 1553 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1554 /* Always defined, return all zeroes of the relevant type */ 1555 return definedOfType(tyS); 1556 } else { 1557 /* return a cloned version of the Get that refers to the shadow 1558 area. */ 1559 IRRegArray* new_descr 1560 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1561 tyS, descr->nElems); 1562 return IRExpr_GetI( new_descr, ix, bias ); 1563 } 1564 } 1565 1566 1567 /*------------------------------------------------------------*/ 1568 /*--- Generating approximations for unknown operations, ---*/ 1569 /*--- using lazy-propagate semantics ---*/ 1570 /*------------------------------------------------------------*/ 1571 1572 /* Lazy propagation of undefinedness from two values, resulting in the 1573 specified shadow type. 1574 */ 1575 static 1576 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1577 { 1578 IRAtom* at; 1579 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1580 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1581 tl_assert(isShadowAtom(mce,va1)); 1582 tl_assert(isShadowAtom(mce,va2)); 1583 1584 /* The general case is inefficient because PCast is an expensive 1585 operation. Here are some special cases which use PCast only 1586 once rather than twice. */ 1587 1588 /* I64 x I64 -> I64 */ 1589 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 1590 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 1591 at = mkUifU(mce, Ity_I64, va1, va2); 1592 at = mkPCastTo(mce, Ity_I64, at); 1593 return at; 1594 } 1595 1596 /* I64 x I64 -> I32 */ 1597 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 1598 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 1599 at = mkUifU(mce, Ity_I64, va1, va2); 1600 at = mkPCastTo(mce, Ity_I32, at); 1601 return at; 1602 } 1603 1604 if (0) { 1605 VG_(printf)("mkLazy2 "); 1606 ppIRType(t1); 1607 VG_(printf)("_"); 1608 ppIRType(t2); 1609 VG_(printf)("_"); 1610 ppIRType(finalVty); 1611 VG_(printf)("\n"); 1612 } 1613 1614 /* General case: force everything via 32-bit intermediaries. */ 1615 at = mkPCastTo(mce, Ity_I32, va1); 1616 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1617 at = mkPCastTo(mce, finalVty, at); 1618 return at; 1619 } 1620 1621 1622 /* 3-arg version of the above. */ 1623 static 1624 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 1625 IRAtom* va1, IRAtom* va2, IRAtom* va3 ) 1626 { 1627 IRAtom* at; 1628 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1629 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1630 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1631 tl_assert(isShadowAtom(mce,va1)); 1632 tl_assert(isShadowAtom(mce,va2)); 1633 tl_assert(isShadowAtom(mce,va3)); 1634 1635 /* The general case is inefficient because PCast is an expensive 1636 operation. Here are some special cases which use PCast only 1637 twice rather than three times. */ 1638 1639 /* I32 x I64 x I64 -> I64 */ 1640 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1641 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1642 && finalVty == Ity_I64) { 1643 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); 1644 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1645 mode indication which is fully defined, this should get 1646 folded out later. */ 1647 at = mkPCastTo(mce, Ity_I64, va1); 1648 /* Now fold in 2nd and 3rd args. */ 1649 at = mkUifU(mce, Ity_I64, at, va2); 1650 at = mkUifU(mce, Ity_I64, at, va3); 1651 /* and PCast once again. */ 1652 at = mkPCastTo(mce, Ity_I64, at); 1653 return at; 1654 } 1655 1656 /* I32 x I8 x I64 -> I64 */ 1657 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64 1658 && finalVty == Ity_I64) { 1659 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n"); 1660 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a 1661 * rounding mode indication which is fully defined, this should 1662 * get folded out later. 1663 */ 1664 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1); 1665 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2); 1666 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2)) 1667 at = mkUifU(mce, Ity_I64, at, va3); 1668 /* and PCast once again. */ 1669 at = mkPCastTo(mce, Ity_I64, at); 1670 return at; 1671 } 1672 1673 /* I32 x I64 x I64 -> I32 */ 1674 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 1675 && finalVty == Ity_I32) { 1676 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n"); 1677 at = mkPCastTo(mce, Ity_I64, va1); 1678 at = mkUifU(mce, Ity_I64, at, va2); 1679 at = mkUifU(mce, Ity_I64, at, va3); 1680 at = mkPCastTo(mce, Ity_I32, at); 1681 return at; 1682 } 1683 1684 /* I32 x I32 x I32 -> I32 */ 1685 /* 32-bit FP idiom, as (eg) happens on ARM */ 1686 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 1687 && finalVty == Ity_I32) { 1688 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n"); 1689 at = va1; 1690 at = mkUifU(mce, Ity_I32, at, va2); 1691 at = mkUifU(mce, Ity_I32, at, va3); 1692 at = mkPCastTo(mce, Ity_I32, at); 1693 return at; 1694 } 1695 1696 /* I32 x I128 x I128 -> I128 */ 1697 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1698 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 1699 && finalVty == Ity_I128) { 1700 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n"); 1701 /* Widen 1st arg to I128. Since 1st arg is typically a rounding 1702 mode indication which is fully defined, this should get 1703 folded out later. */ 1704 at = mkPCastTo(mce, Ity_I128, va1); 1705 /* Now fold in 2nd and 3rd args. */ 1706 at = mkUifU(mce, Ity_I128, at, va2); 1707 at = mkUifU(mce, Ity_I128, at, va3); 1708 /* and PCast once again. */ 1709 at = mkPCastTo(mce, Ity_I128, at); 1710 return at; 1711 } 1712 1713 /* I32 x I8 x I128 -> I128 */ 1714 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ 1715 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128 1716 && finalVty == Ity_I128) { 1717 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n"); 1718 /* Use I64 as an intermediate type, which means PCasting all 3 1719 args to I64 to start with. 1st arg is typically a rounding 1720 mode indication which is fully defined, so we hope that it 1721 will get folded out later. */ 1722 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1); 1723 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2); 1724 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3); 1725 /* Now UifU all three together. */ 1726 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2)) 1727 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3) 1728 /* and PCast once again. */ 1729 at = mkPCastTo(mce, Ity_I128, at); 1730 return at; 1731 } 1732 if (1) { 1733 VG_(printf)("mkLazy3: "); 1734 ppIRType(t1); 1735 VG_(printf)(" x "); 1736 ppIRType(t2); 1737 VG_(printf)(" x "); 1738 ppIRType(t3); 1739 VG_(printf)(" -> "); 1740 ppIRType(finalVty); 1741 VG_(printf)("\n"); 1742 } 1743 1744 tl_assert(0); 1745 /* General case: force everything via 32-bit intermediaries. */ 1746 /* 1747 at = mkPCastTo(mce, Ity_I32, va1); 1748 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1749 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); 1750 at = mkPCastTo(mce, finalVty, at); 1751 return at; 1752 */ 1753 } 1754 1755 1756 /* 4-arg version of the above. */ 1757 static 1758 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, 1759 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) 1760 { 1761 IRAtom* at; 1762 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); 1763 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); 1764 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); 1765 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); 1766 tl_assert(isShadowAtom(mce,va1)); 1767 tl_assert(isShadowAtom(mce,va2)); 1768 tl_assert(isShadowAtom(mce,va3)); 1769 tl_assert(isShadowAtom(mce,va4)); 1770 1771 /* The general case is inefficient because PCast is an expensive 1772 operation. Here are some special cases which use PCast only 1773 twice rather than three times. */ 1774 1775 /* I32 x I64 x I64 x I64 -> I64 */ 1776 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1777 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64 1778 && finalVty == Ity_I64) { 1779 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n"); 1780 /* Widen 1st arg to I64. Since 1st arg is typically a rounding 1781 mode indication which is fully defined, this should get 1782 folded out later. */ 1783 at = mkPCastTo(mce, Ity_I64, va1); 1784 /* Now fold in 2nd, 3rd, 4th args. */ 1785 at = mkUifU(mce, Ity_I64, at, va2); 1786 at = mkUifU(mce, Ity_I64, at, va3); 1787 at = mkUifU(mce, Ity_I64, at, va4); 1788 /* and PCast once again. */ 1789 at = mkPCastTo(mce, Ity_I64, at); 1790 return at; 1791 } 1792 /* I32 x I32 x I32 x I32 -> I32 */ 1793 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */ 1794 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32 1795 && finalVty == Ity_I32) { 1796 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n"); 1797 at = va1; 1798 /* Now fold in 2nd, 3rd, 4th args. */ 1799 at = mkUifU(mce, Ity_I32, at, va2); 1800 at = mkUifU(mce, Ity_I32, at, va3); 1801 at = mkUifU(mce, Ity_I32, at, va4); 1802 at = mkPCastTo(mce, Ity_I32, at); 1803 return at; 1804 } 1805 1806 if (1) { 1807 VG_(printf)("mkLazy4: "); 1808 ppIRType(t1); 1809 VG_(printf)(" x "); 1810 ppIRType(t2); 1811 VG_(printf)(" x "); 1812 ppIRType(t3); 1813 VG_(printf)(" x "); 1814 ppIRType(t4); 1815 VG_(printf)(" -> "); 1816 ppIRType(finalVty); 1817 VG_(printf)("\n"); 1818 } 1819 1820 tl_assert(0); 1821 } 1822 1823 1824 /* Do the lazy propagation game from a null-terminated vector of 1825 atoms. This is presumably the arguments to a helper call, so the 1826 IRCallee info is also supplied in order that we can know which 1827 arguments should be ignored (via the .mcx_mask field). 1828 */ 1829 static 1830 IRAtom* mkLazyN ( MCEnv* mce, 1831 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1832 { 1833 Int i; 1834 IRAtom* here; 1835 IRAtom* curr; 1836 IRType mergeTy; 1837 Bool mergeTy64 = True; 1838 1839 /* Decide on the type of the merge intermediary. If all relevant 1840 args are I64, then it's I64. In all other circumstances, use 1841 I32. */ 1842 for (i = 0; exprvec[i]; i++) { 1843 tl_assert(i < 32); 1844 tl_assert(isOriginalAtom(mce, exprvec[i])); 1845 if (cee->mcx_mask & (1<<i)) 1846 continue; 1847 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) 1848 mergeTy64 = False; 1849 } 1850 1851 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32; 1852 curr = definedOfType(mergeTy); 1853 1854 for (i = 0; exprvec[i]; i++) { 1855 tl_assert(i < 32); 1856 tl_assert(isOriginalAtom(mce, exprvec[i])); 1857 /* Only take notice of this arg if the callee's mc-exclusion 1858 mask does not say it is to be excluded. */ 1859 if (cee->mcx_mask & (1<<i)) { 1860 /* the arg is to be excluded from definedness checking. Do 1861 nothing. */ 1862 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1863 } else { 1864 /* calculate the arg's definedness, and pessimistically merge 1865 it in. */ 1866 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) ); 1867 curr = mergeTy64 1868 ? mkUifU64(mce, here, curr) 1869 : mkUifU32(mce, here, curr); 1870 } 1871 } 1872 return mkPCastTo(mce, finalVtype, curr ); 1873 } 1874 1875 1876 /*------------------------------------------------------------*/ 1877 /*--- Generating expensive sequences for exact carry-chain ---*/ 1878 /*--- propagation in add/sub and related operations. ---*/ 1879 /*------------------------------------------------------------*/ 1880 1881 static 1882 IRAtom* expensiveAddSub ( MCEnv* mce, 1883 Bool add, 1884 IRType ty, 1885 IRAtom* qaa, IRAtom* qbb, 1886 IRAtom* aa, IRAtom* bb ) 1887 { 1888 IRAtom *a_min, *b_min, *a_max, *b_max; 1889 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 1890 1891 tl_assert(isShadowAtom(mce,qaa)); 1892 tl_assert(isShadowAtom(mce,qbb)); 1893 tl_assert(isOriginalAtom(mce,aa)); 1894 tl_assert(isOriginalAtom(mce,bb)); 1895 tl_assert(sameKindedAtoms(qaa,aa)); 1896 tl_assert(sameKindedAtoms(qbb,bb)); 1897 1898 switch (ty) { 1899 case Ity_I32: 1900 opAND = Iop_And32; 1901 opOR = Iop_Or32; 1902 opXOR = Iop_Xor32; 1903 opNOT = Iop_Not32; 1904 opADD = Iop_Add32; 1905 opSUB = Iop_Sub32; 1906 break; 1907 case Ity_I64: 1908 opAND = Iop_And64; 1909 opOR = Iop_Or64; 1910 opXOR = Iop_Xor64; 1911 opNOT = Iop_Not64; 1912 opADD = Iop_Add64; 1913 opSUB = Iop_Sub64; 1914 break; 1915 default: 1916 VG_(tool_panic)("expensiveAddSub"); 1917 } 1918 1919 // a_min = aa & ~qaa 1920 a_min = assignNew('V', mce,ty, 1921 binop(opAND, aa, 1922 assignNew('V', mce,ty, unop(opNOT, qaa)))); 1923 1924 // b_min = bb & ~qbb 1925 b_min = assignNew('V', mce,ty, 1926 binop(opAND, bb, 1927 assignNew('V', mce,ty, unop(opNOT, qbb)))); 1928 1929 // a_max = aa | qaa 1930 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa)); 1931 1932 // b_max = bb | qbb 1933 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb)); 1934 1935 if (add) { 1936 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1937 return 1938 assignNew('V', mce,ty, 1939 binop( opOR, 1940 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1941 assignNew('V', mce,ty, 1942 binop( opXOR, 1943 assignNew('V', mce,ty, binop(opADD, a_min, b_min)), 1944 assignNew('V', mce,ty, binop(opADD, a_max, b_max)) 1945 ) 1946 ) 1947 ) 1948 ); 1949 } else { 1950 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1951 return 1952 assignNew('V', mce,ty, 1953 binop( opOR, 1954 assignNew('V', mce,ty, binop(opOR, qaa, qbb)), 1955 assignNew('V', mce,ty, 1956 binop( opXOR, 1957 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)), 1958 assignNew('V', mce,ty, binop(opSUB, a_max, b_min)) 1959 ) 1960 ) 1961 ) 1962 ); 1963 } 1964 1965 } 1966 1967 1968 static 1969 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop, 1970 IRAtom* atom, IRAtom* vatom ) 1971 { 1972 IRType ty; 1973 IROp xorOp, subOp, andOp; 1974 IRExpr *one; 1975 IRAtom *improver, *improved; 1976 tl_assert(isShadowAtom(mce,vatom)); 1977 tl_assert(isOriginalAtom(mce,atom)); 1978 tl_assert(sameKindedAtoms(atom,vatom)); 1979 1980 switch (czop) { 1981 case Iop_Ctz32: 1982 ty = Ity_I32; 1983 xorOp = Iop_Xor32; 1984 subOp = Iop_Sub32; 1985 andOp = Iop_And32; 1986 one = mkU32(1); 1987 break; 1988 case Iop_Ctz64: 1989 ty = Ity_I64; 1990 xorOp = Iop_Xor64; 1991 subOp = Iop_Sub64; 1992 andOp = Iop_And64; 1993 one = mkU64(1); 1994 break; 1995 default: 1996 ppIROp(czop); 1997 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes"); 1998 } 1999 2000 // improver = atom ^ (atom - 1) 2001 // 2002 // That is, improver has its low ctz(atom) bits equal to one; 2003 // higher bits (if any) equal to zero. 2004 improver = assignNew('V', mce,ty, 2005 binop(xorOp, 2006 atom, 2007 assignNew('V', mce, ty, 2008 binop(subOp, atom, one)))); 2009 2010 // improved = vatom & improver 2011 // 2012 // That is, treat any V bits above the first ctz(atom) bits as 2013 // "defined". 2014 improved = assignNew('V', mce, ty, 2015 binop(andOp, vatom, improver)); 2016 2017 // Return pessimizing cast of improved. 2018 return mkPCastTo(mce, ty, improved); 2019 } 2020 2021 2022 /*------------------------------------------------------------*/ 2023 /*--- Scalar shifts. ---*/ 2024 /*------------------------------------------------------------*/ 2025 2026 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic 2027 idea is to shift the definedness bits by the original shift amount. 2028 This introduces 0s ("defined") in new positions for left shifts and 2029 unsigned right shifts, and copies the top definedness bit for 2030 signed right shifts. So, conveniently, applying the original shift 2031 operator to the definedness bits for the left arg is exactly the 2032 right thing to do: 2033 2034 (qaa << bb) 2035 2036 However if the shift amount is undefined then the whole result 2037 is undefined. Hence need: 2038 2039 (qaa << bb) `UifU` PCast(qbb) 2040 2041 If the shift amount bb is a literal than qbb will say 'all defined' 2042 and the UifU and PCast will get folded out by post-instrumentation 2043 optimisation. 2044 */ 2045 static IRAtom* scalarShift ( MCEnv* mce, 2046 IRType ty, 2047 IROp original_op, 2048 IRAtom* qaa, IRAtom* qbb, 2049 IRAtom* aa, IRAtom* bb ) 2050 { 2051 tl_assert(isShadowAtom(mce,qaa)); 2052 tl_assert(isShadowAtom(mce,qbb)); 2053 tl_assert(isOriginalAtom(mce,aa)); 2054 tl_assert(isOriginalAtom(mce,bb)); 2055 tl_assert(sameKindedAtoms(qaa,aa)); 2056 tl_assert(sameKindedAtoms(qbb,bb)); 2057 return 2058 assignNew( 2059 'V', mce, ty, 2060 mkUifU( mce, ty, 2061 assignNew('V', mce, ty, binop(original_op, qaa, bb)), 2062 mkPCastTo(mce, ty, qbb) 2063 ) 2064 ); 2065 } 2066 2067 2068 /*------------------------------------------------------------*/ 2069 /*--- Helpers for dealing with vector primops. ---*/ 2070 /*------------------------------------------------------------*/ 2071 2072 /* Vector pessimisation -- pessimise within each lane individually. */ 2073 2074 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 2075 { 2076 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 2077 } 2078 2079 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 2080 { 2081 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 2082 } 2083 2084 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 2085 { 2086 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 2087 } 2088 2089 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 2090 { 2091 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 2092 } 2093 2094 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at ) 2095 { 2096 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at)); 2097 } 2098 2099 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at ) 2100 { 2101 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at)); 2102 } 2103 2104 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 2105 { 2106 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 2107 } 2108 2109 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at ) 2110 { 2111 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at)); 2112 } 2113 2114 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 2115 { 2116 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 2117 } 2118 2119 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at ) 2120 { 2121 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at)); 2122 } 2123 2124 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 2125 { 2126 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 2127 } 2128 2129 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at ) 2130 { 2131 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at)); 2132 } 2133 2134 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at ) 2135 { 2136 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at)); 2137 } 2138 2139 2140 /* Here's a simple scheme capable of handling ops derived from SSE1 2141 code and while only generating ops that can be efficiently 2142 implemented in SSE1. */ 2143 2144 /* All-lanes versions are straightforward: 2145 2146 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 2147 2148 unary32Fx4(x,y) ==> PCast32x4(x#) 2149 2150 Lowest-lane-only versions are more complex: 2151 2152 binary32F0x4(x,y) ==> SetV128lo32( 2153 x#, 2154 PCast32(V128to32(UifUV128(x#,y#))) 2155 ) 2156 2157 This is perhaps not so obvious. In particular, it's faster to 2158 do a V128-bit UifU and then take the bottom 32 bits than the more 2159 obvious scheme of taking the bottom 32 bits of each operand 2160 and doing a 32-bit UifU. Basically since UifU is fast and 2161 chopping lanes off vector values is slow. 2162 2163 Finally: 2164 2165 unary32F0x4(x) ==> SetV128lo32( 2166 x#, 2167 PCast32(V128to32(x#)) 2168 ) 2169 2170 Where: 2171 2172 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 2173 PCast32x4(v#) = CmpNEZ32x4(v#) 2174 */ 2175 2176 static 2177 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2178 { 2179 IRAtom* at; 2180 tl_assert(isShadowAtom(mce, vatomX)); 2181 tl_assert(isShadowAtom(mce, vatomY)); 2182 at = mkUifUV128(mce, vatomX, vatomY); 2183 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at)); 2184 return at; 2185 } 2186 2187 static 2188 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2189 { 2190 IRAtom* at; 2191 tl_assert(isShadowAtom(mce, vatomX)); 2192 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX)); 2193 return at; 2194 } 2195 2196 static 2197 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2198 { 2199 IRAtom* at; 2200 tl_assert(isShadowAtom(mce, vatomX)); 2201 tl_assert(isShadowAtom(mce, vatomY)); 2202 at = mkUifUV128(mce, vatomX, vatomY); 2203 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at)); 2204 at = mkPCastTo(mce, Ity_I32, at); 2205 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2206 return at; 2207 } 2208 2209 static 2210 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 2211 { 2212 IRAtom* at; 2213 tl_assert(isShadowAtom(mce, vatomX)); 2214 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX)); 2215 at = mkPCastTo(mce, Ity_I32, at); 2216 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 2217 return at; 2218 } 2219 2220 /* --- ... and ... 64Fx2 versions of the same ... --- */ 2221 2222 static 2223 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2224 { 2225 IRAtom* at; 2226 tl_assert(isShadowAtom(mce, vatomX)); 2227 tl_assert(isShadowAtom(mce, vatomY)); 2228 at = mkUifUV128(mce, vatomX, vatomY); 2229 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at)); 2230 return at; 2231 } 2232 2233 static 2234 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2235 { 2236 IRAtom* at; 2237 tl_assert(isShadowAtom(mce, vatomX)); 2238 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX)); 2239 return at; 2240 } 2241 2242 static 2243 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2244 { 2245 IRAtom* at; 2246 tl_assert(isShadowAtom(mce, vatomX)); 2247 tl_assert(isShadowAtom(mce, vatomY)); 2248 at = mkUifUV128(mce, vatomX, vatomY); 2249 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at)); 2250 at = mkPCastTo(mce, Ity_I64, at); 2251 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2252 return at; 2253 } 2254 2255 static 2256 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 2257 { 2258 IRAtom* at; 2259 tl_assert(isShadowAtom(mce, vatomX)); 2260 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX)); 2261 at = mkPCastTo(mce, Ity_I64, at); 2262 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 2263 return at; 2264 } 2265 2266 /* --- --- ... and ... 32Fx2 versions of the same --- --- */ 2267 2268 static 2269 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2270 { 2271 IRAtom* at; 2272 tl_assert(isShadowAtom(mce, vatomX)); 2273 tl_assert(isShadowAtom(mce, vatomY)); 2274 at = mkUifU64(mce, vatomX, vatomY); 2275 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at)); 2276 return at; 2277 } 2278 2279 static 2280 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX ) 2281 { 2282 IRAtom* at; 2283 tl_assert(isShadowAtom(mce, vatomX)); 2284 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX)); 2285 return at; 2286 } 2287 2288 /* --- ... and ... 64Fx4 versions of the same ... --- */ 2289 2290 static 2291 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2292 { 2293 IRAtom* at; 2294 tl_assert(isShadowAtom(mce, vatomX)); 2295 tl_assert(isShadowAtom(mce, vatomY)); 2296 at = mkUifUV256(mce, vatomX, vatomY); 2297 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at)); 2298 return at; 2299 } 2300 2301 static 2302 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX ) 2303 { 2304 IRAtom* at; 2305 tl_assert(isShadowAtom(mce, vatomX)); 2306 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX)); 2307 return at; 2308 } 2309 2310 /* --- ... and ... 32Fx8 versions of the same ... --- */ 2311 2312 static 2313 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 2314 { 2315 IRAtom* at; 2316 tl_assert(isShadowAtom(mce, vatomX)); 2317 tl_assert(isShadowAtom(mce, vatomY)); 2318 at = mkUifUV256(mce, vatomX, vatomY); 2319 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at)); 2320 return at; 2321 } 2322 2323 static 2324 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX ) 2325 { 2326 IRAtom* at; 2327 tl_assert(isShadowAtom(mce, vatomX)); 2328 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX)); 2329 return at; 2330 } 2331 2332 /* --- 64Fx2 binary FP ops, with rounding mode --- */ 2333 2334 static 2335 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, 2336 IRAtom* vatomX, IRAtom* vatomY ) 2337 { 2338 /* This is the same as binary64Fx2, except that we subsequently 2339 pessimise vRM (definedness of the rounding mode), widen to 128 2340 bits and UifU it into the result. As with the scalar cases, if 2341 the RM is a constant then it is defined and so this extra bit 2342 will get constant-folded out later. */ 2343 // "do" the vector args 2344 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY); 2345 // PCast the RM, and widen it to 128 bits 2346 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2347 // Roll it into the result 2348 t1 = mkUifUV128(mce, t1, t2); 2349 return t1; 2350 } 2351 2352 /* --- ... and ... 32Fx4 versions of the same --- */ 2353 2354 static 2355 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, 2356 IRAtom* vatomX, IRAtom* vatomY ) 2357 { 2358 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY); 2359 // PCast the RM, and widen it to 128 bits 2360 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2361 // Roll it into the result 2362 t1 = mkUifUV128(mce, t1, t2); 2363 return t1; 2364 } 2365 2366 /* --- ... and ... 64Fx4 versions of the same --- */ 2367 2368 static 2369 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, 2370 IRAtom* vatomX, IRAtom* vatomY ) 2371 { 2372 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY); 2373 // PCast the RM, and widen it to 256 bits 2374 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); 2375 // Roll it into the result 2376 t1 = mkUifUV256(mce, t1, t2); 2377 return t1; 2378 } 2379 2380 /* --- ... and ... 32Fx8 versions of the same --- */ 2381 2382 static 2383 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM, 2384 IRAtom* vatomX, IRAtom* vatomY ) 2385 { 2386 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY); 2387 // PCast the RM, and widen it to 256 bits 2388 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); 2389 // Roll it into the result 2390 t1 = mkUifUV256(mce, t1, t2); 2391 return t1; 2392 } 2393 2394 /* --- 64Fx2 unary FP ops, with rounding mode --- */ 2395 2396 static 2397 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX ) 2398 { 2399 /* Same scheme as binary64Fx2_w_rm. */ 2400 // "do" the vector arg 2401 IRAtom* t1 = unary64Fx2(mce, vatomX); 2402 // PCast the RM, and widen it to 128 bits 2403 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2404 // Roll it into the result 2405 t1 = mkUifUV128(mce, t1, t2); 2406 return t1; 2407 } 2408 2409 /* --- ... and ... 32Fx4 versions of the same --- */ 2410 2411 static 2412 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX ) 2413 { 2414 /* Same scheme as unary32Fx4_w_rm. */ 2415 IRAtom* t1 = unary32Fx4(mce, vatomX); 2416 // PCast the RM, and widen it to 128 bits 2417 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); 2418 // Roll it into the result 2419 t1 = mkUifUV128(mce, t1, t2); 2420 return t1; 2421 } 2422 2423 2424 /* --- --- Vector saturated narrowing --- --- */ 2425 2426 /* We used to do something very clever here, but on closer inspection 2427 (2011-Jun-15), and in particular bug #279698, it turns out to be 2428 wrong. Part of the problem came from the fact that for a long 2429 time, the IR primops to do with saturated narrowing were 2430 underspecified and managed to confuse multiple cases which needed 2431 to be separate: the op names had a signedness qualifier, but in 2432 fact the source and destination signednesses needed to be specified 2433 independently, so the op names really need two independent 2434 signedness specifiers. 2435 2436 As of 2011-Jun-15 (ish) the underspecification was sorted out 2437 properly. The incorrect instrumentation remained, though. That 2438 has now (2011-Oct-22) been fixed. 2439 2440 What we now do is simple: 2441 2442 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a 2443 number of lanes, X is the source lane width and signedness, and Y 2444 is the destination lane width and signedness. In all cases the 2445 destination lane width is half the source lane width, so the names 2446 have a bit of redundancy, but are at least easy to read. 2447 2448 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s 2449 to unsigned 16s. 2450 2451 Let Vanilla(OP) be a function that takes OP, one of these 2452 saturating narrowing ops, and produces the same "shaped" narrowing 2453 op which is not saturating, but merely dumps the most significant 2454 bits. "same shape" means that the lane numbers and widths are the 2455 same as with OP. 2456 2457 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8) 2458 = Iop_NarrowBin32to16x8, 2459 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by 2460 dumping the top half of each lane. 2461 2462 So, with that in place, the scheme is simple, and it is simple to 2463 pessimise each lane individually and then apply Vanilla(OP) so as 2464 to get the result in the right "shape". If the original OP is 2465 QNarrowBinXtoYxZ then we produce 2466 2467 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) ) 2468 2469 or for the case when OP is unary (Iop_QNarrowUn*) 2470 2471 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) ) 2472 */ 2473 static 2474 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp ) 2475 { 2476 switch (qnarrowOp) { 2477 /* Binary: (128, 128) -> 128 */ 2478 case Iop_QNarrowBin16Sto8Ux16: 2479 case Iop_QNarrowBin16Sto8Sx16: 2480 case Iop_QNarrowBin16Uto8Ux16: 2481 case Iop_QNarrowBin64Sto32Sx4: 2482 case Iop_QNarrowBin64Uto32Ux4: 2483 return Iop_NarrowBin16to8x16; 2484 case Iop_QNarrowBin32Sto16Ux8: 2485 case Iop_QNarrowBin32Sto16Sx8: 2486 case Iop_QNarrowBin32Uto16Ux8: 2487 return Iop_NarrowBin32to16x8; 2488 /* Binary: (64, 64) -> 64 */ 2489 case Iop_QNarrowBin32Sto16Sx4: 2490 return Iop_NarrowBin32to16x4; 2491 case Iop_QNarrowBin16Sto8Ux8: 2492 case Iop_QNarrowBin16Sto8Sx8: 2493 return Iop_NarrowBin16to8x8; 2494 /* Unary: 128 -> 64 */ 2495 case Iop_QNarrowUn64Uto32Ux2: 2496 case Iop_QNarrowUn64Sto32Sx2: 2497 case Iop_QNarrowUn64Sto32Ux2: 2498 return Iop_NarrowUn64to32x2; 2499 case Iop_QNarrowUn32Uto16Ux4: 2500 case Iop_QNarrowUn32Sto16Sx4: 2501 case Iop_QNarrowUn32Sto16Ux4: 2502 return Iop_NarrowUn32to16x4; 2503 case Iop_QNarrowUn16Uto8Ux8: 2504 case Iop_QNarrowUn16Sto8Sx8: 2505 case Iop_QNarrowUn16Sto8Ux8: 2506 return Iop_NarrowUn16to8x8; 2507 default: 2508 ppIROp(qnarrowOp); 2509 VG_(tool_panic)("vanillaNarrowOpOfShape"); 2510 } 2511 } 2512 2513 static 2514 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op, 2515 IRAtom* vatom1, IRAtom* vatom2) 2516 { 2517 IRAtom *at1, *at2, *at3; 2518 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2519 switch (narrow_op) { 2520 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break; 2521 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break; 2522 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 2523 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break; 2524 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break; 2525 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 2526 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break; 2527 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 2528 default: VG_(tool_panic)("vectorNarrowBinV128"); 2529 } 2530 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2531 tl_assert(isShadowAtom(mce,vatom1)); 2532 tl_assert(isShadowAtom(mce,vatom2)); 2533 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2534 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2)); 2535 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2)); 2536 return at3; 2537 } 2538 2539 static 2540 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op, 2541 IRAtom* vatom1, IRAtom* vatom2) 2542 { 2543 IRAtom *at1, *at2, *at3; 2544 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2545 switch (narrow_op) { 2546 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break; 2547 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break; 2548 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break; 2549 default: VG_(tool_panic)("vectorNarrowBin64"); 2550 } 2551 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2552 tl_assert(isShadowAtom(mce,vatom1)); 2553 tl_assert(isShadowAtom(mce,vatom2)); 2554 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1)); 2555 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2)); 2556 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2)); 2557 return at3; 2558 } 2559 2560 static 2561 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op, 2562 IRAtom* vatom1) 2563 { 2564 IRAtom *at1, *at2; 2565 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2566 tl_assert(isShadowAtom(mce,vatom1)); 2567 /* For vanilla narrowing (non-saturating), we can just apply 2568 the op directly to the V bits. */ 2569 switch (narrow_op) { 2570 case Iop_NarrowUn16to8x8: 2571 case Iop_NarrowUn32to16x4: 2572 case Iop_NarrowUn64to32x2: 2573 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1)); 2574 return at1; 2575 default: 2576 break; /* Do Plan B */ 2577 } 2578 /* Plan B: for ops that involve a saturation operation on the args, 2579 we must PCast before the vanilla narrow. */ 2580 switch (narrow_op) { 2581 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break; 2582 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break; 2583 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break; 2584 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break; 2585 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break; 2586 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break; 2587 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break; 2588 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break; 2589 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break; 2590 default: VG_(tool_panic)("vectorNarrowUnV128"); 2591 } 2592 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op); 2593 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1)); 2594 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1)); 2595 return at2; 2596 } 2597 2598 static 2599 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op, 2600 IRAtom* vatom1) 2601 { 2602 IRAtom *at1, *at2; 2603 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 2604 switch (longen_op) { 2605 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break; 2606 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break; 2607 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break; 2608 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break; 2609 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break; 2610 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break; 2611 default: VG_(tool_panic)("vectorWidenI64"); 2612 } 2613 tl_assert(isShadowAtom(mce,vatom1)); 2614 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1)); 2615 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1)); 2616 return at2; 2617 } 2618 2619 2620 /* --- --- Vector integer arithmetic --- --- */ 2621 2622 /* Simple ... UifU the args and per-lane pessimise the results. */ 2623 2624 /* --- V256-bit versions --- */ 2625 2626 static 2627 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2628 { 2629 IRAtom* at; 2630 at = mkUifUV256(mce, vatom1, vatom2); 2631 at = mkPCast8x32(mce, at); 2632 return at; 2633 } 2634 2635 static 2636 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2637 { 2638 IRAtom* at; 2639 at = mkUifUV256(mce, vatom1, vatom2); 2640 at = mkPCast16x16(mce, at); 2641 return at; 2642 } 2643 2644 static 2645 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2646 { 2647 IRAtom* at; 2648 at = mkUifUV256(mce, vatom1, vatom2); 2649 at = mkPCast32x8(mce, at); 2650 return at; 2651 } 2652 2653 static 2654 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2655 { 2656 IRAtom* at; 2657 at = mkUifUV256(mce, vatom1, vatom2); 2658 at = mkPCast64x4(mce, at); 2659 return at; 2660 } 2661 2662 /* --- V128-bit versions --- */ 2663 2664 static 2665 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2666 { 2667 IRAtom* at; 2668 at = mkUifUV128(mce, vatom1, vatom2); 2669 at = mkPCast8x16(mce, at); 2670 return at; 2671 } 2672 2673 static 2674 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2675 { 2676 IRAtom* at; 2677 at = mkUifUV128(mce, vatom1, vatom2); 2678 at = mkPCast16x8(mce, at); 2679 return at; 2680 } 2681 2682 static 2683 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2684 { 2685 IRAtom* at; 2686 at = mkUifUV128(mce, vatom1, vatom2); 2687 at = mkPCast32x4(mce, at); 2688 return at; 2689 } 2690 2691 static 2692 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2693 { 2694 IRAtom* at; 2695 at = mkUifUV128(mce, vatom1, vatom2); 2696 at = mkPCast64x2(mce, at); 2697 return at; 2698 } 2699 2700 /* --- 64-bit versions --- */ 2701 2702 static 2703 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2704 { 2705 IRAtom* at; 2706 at = mkUifU64(mce, vatom1, vatom2); 2707 at = mkPCast8x8(mce, at); 2708 return at; 2709 } 2710 2711 static 2712 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2713 { 2714 IRAtom* at; 2715 at = mkUifU64(mce, vatom1, vatom2); 2716 at = mkPCast16x4(mce, at); 2717 return at; 2718 } 2719 2720 static 2721 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2722 { 2723 IRAtom* at; 2724 at = mkUifU64(mce, vatom1, vatom2); 2725 at = mkPCast32x2(mce, at); 2726 return at; 2727 } 2728 2729 static 2730 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2731 { 2732 IRAtom* at; 2733 at = mkUifU64(mce, vatom1, vatom2); 2734 at = mkPCastTo(mce, Ity_I64, at); 2735 return at; 2736 } 2737 2738 /* --- 32-bit versions --- */ 2739 2740 static 2741 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2742 { 2743 IRAtom* at; 2744 at = mkUifU32(mce, vatom1, vatom2); 2745 at = mkPCast8x4(mce, at); 2746 return at; 2747 } 2748 2749 static 2750 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 2751 { 2752 IRAtom* at; 2753 at = mkUifU32(mce, vatom1, vatom2); 2754 at = mkPCast16x2(mce, at); 2755 return at; 2756 } 2757 2758 2759 /*------------------------------------------------------------*/ 2760 /*--- Generate shadow values from all kinds of IRExprs. ---*/ 2761 /*------------------------------------------------------------*/ 2762 2763 static 2764 IRAtom* expr2vbits_Qop ( MCEnv* mce, 2765 IROp op, 2766 IRAtom* atom1, IRAtom* atom2, 2767 IRAtom* atom3, IRAtom* atom4 ) 2768 { 2769 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2770 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2771 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2772 IRAtom* vatom4 = expr2vbits( mce, atom4 ); 2773 2774 tl_assert(isOriginalAtom(mce,atom1)); 2775 tl_assert(isOriginalAtom(mce,atom2)); 2776 tl_assert(isOriginalAtom(mce,atom3)); 2777 tl_assert(isOriginalAtom(mce,atom4)); 2778 tl_assert(isShadowAtom(mce,vatom1)); 2779 tl_assert(isShadowAtom(mce,vatom2)); 2780 tl_assert(isShadowAtom(mce,vatom3)); 2781 tl_assert(isShadowAtom(mce,vatom4)); 2782 tl_assert(sameKindedAtoms(atom1,vatom1)); 2783 tl_assert(sameKindedAtoms(atom2,vatom2)); 2784 tl_assert(sameKindedAtoms(atom3,vatom3)); 2785 tl_assert(sameKindedAtoms(atom4,vatom4)); 2786 switch (op) { 2787 case Iop_MAddF64: 2788 case Iop_MAddF64r32: 2789 case Iop_MSubF64: 2790 case Iop_MSubF64r32: 2791 /* I32(rm) x F64 x F64 x F64 -> F64 */ 2792 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4); 2793 2794 case Iop_MAddF32: 2795 case Iop_MSubF32: 2796 /* I32(rm) x F32 x F32 x F32 -> F32 */ 2797 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4); 2798 2799 /* V256-bit data-steering */ 2800 case Iop_64x4toV256: 2801 return assignNew('V', mce, Ity_V256, 2802 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4)); 2803 2804 default: 2805 ppIROp(op); 2806 VG_(tool_panic)("memcheck:expr2vbits_Qop"); 2807 } 2808 } 2809 2810 2811 static 2812 IRAtom* expr2vbits_Triop ( MCEnv* mce, 2813 IROp op, 2814 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) 2815 { 2816 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2817 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2818 IRAtom* vatom3 = expr2vbits( mce, atom3 ); 2819 2820 tl_assert(isOriginalAtom(mce,atom1)); 2821 tl_assert(isOriginalAtom(mce,atom2)); 2822 tl_assert(isOriginalAtom(mce,atom3)); 2823 tl_assert(isShadowAtom(mce,vatom1)); 2824 tl_assert(isShadowAtom(mce,vatom2)); 2825 tl_assert(isShadowAtom(mce,vatom3)); 2826 tl_assert(sameKindedAtoms(atom1,vatom1)); 2827 tl_assert(sameKindedAtoms(atom2,vatom2)); 2828 tl_assert(sameKindedAtoms(atom3,vatom3)); 2829 switch (op) { 2830 case Iop_AddF128: 2831 case Iop_AddD128: 2832 case Iop_SubF128: 2833 case Iop_SubD128: 2834 case Iop_MulF128: 2835 case Iop_MulD128: 2836 case Iop_DivF128: 2837 case Iop_DivD128: 2838 case Iop_QuantizeD128: 2839 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */ 2840 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2841 case Iop_AddF64: 2842 case Iop_AddD64: 2843 case Iop_AddF64r32: 2844 case Iop_SubF64: 2845 case Iop_SubD64: 2846 case Iop_SubF64r32: 2847 case Iop_MulF64: 2848 case Iop_MulD64: 2849 case Iop_MulF64r32: 2850 case Iop_DivF64: 2851 case Iop_DivD64: 2852 case Iop_DivF64r32: 2853 case Iop_ScaleF64: 2854 case Iop_Yl2xF64: 2855 case Iop_Yl2xp1F64: 2856 case Iop_AtanF64: 2857 case Iop_PRemF64: 2858 case Iop_PRem1F64: 2859 case Iop_QuantizeD64: 2860 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */ 2861 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2862 case Iop_PRemC3210F64: 2863 case Iop_PRem1C3210F64: 2864 /* I32(rm) x F64 x F64 -> I32 */ 2865 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2866 case Iop_AddF32: 2867 case Iop_SubF32: 2868 case Iop_MulF32: 2869 case Iop_DivF32: 2870 /* I32(rm) x F32 x F32 -> I32 */ 2871 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); 2872 case Iop_SignificanceRoundD64: 2873 /* IRRoundingMode(I32) x I8 x D64 -> D64 */ 2874 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); 2875 case Iop_SignificanceRoundD128: 2876 /* IRRoundingMode(I32) x I8 x D128 -> D128 */ 2877 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3); 2878 case Iop_SliceV128: 2879 /* (V128, V128, I8) -> V128 */ 2880 complainIfUndefined(mce, atom3, NULL); 2881 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2882 case Iop_Slice64: 2883 /* (I64, I64, I8) -> I64 */ 2884 complainIfUndefined(mce, atom3, NULL); 2885 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3)); 2886 case Iop_SetElem8x8: 2887 case Iop_SetElem16x4: 2888 case Iop_SetElem32x2: 2889 complainIfUndefined(mce, atom2, NULL); 2890 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3)); 2891 /* BCDIops */ 2892 case Iop_BCDAdd: 2893 case Iop_BCDSub: 2894 complainIfUndefined(mce, atom3, NULL); 2895 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); 2896 2897 /* Vector FP with rounding mode as the first arg */ 2898 case Iop_Add64Fx2: 2899 case Iop_Sub64Fx2: 2900 case Iop_Mul64Fx2: 2901 case Iop_Div64Fx2: 2902 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3); 2903 2904 case Iop_Add32Fx4: 2905 case Iop_Sub32Fx4: 2906 case Iop_Mul32Fx4: 2907 case Iop_Div32Fx4: 2908 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3); 2909 2910 case Iop_Add64Fx4: 2911 case Iop_Sub64Fx4: 2912 case Iop_Mul64Fx4: 2913 case Iop_Div64Fx4: 2914 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3); 2915 2916 case Iop_Add32Fx8: 2917 case Iop_Sub32Fx8: 2918 case Iop_Mul32Fx8: 2919 case Iop_Div32Fx8: 2920 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3); 2921 2922 default: 2923 ppIROp(op); 2924 VG_(tool_panic)("memcheck:expr2vbits_Triop"); 2925 } 2926 } 2927 2928 2929 static 2930 IRAtom* expr2vbits_Binop ( MCEnv* mce, 2931 IROp op, 2932 IRAtom* atom1, IRAtom* atom2 ) 2933 { 2934 IRType and_or_ty; 2935 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 2936 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 2937 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 2938 2939 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 2940 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 2941 2942 tl_assert(isOriginalAtom(mce,atom1)); 2943 tl_assert(isOriginalAtom(mce,atom2)); 2944 tl_assert(isShadowAtom(mce,vatom1)); 2945 tl_assert(isShadowAtom(mce,vatom2)); 2946 tl_assert(sameKindedAtoms(atom1,vatom1)); 2947 tl_assert(sameKindedAtoms(atom2,vatom2)); 2948 switch (op) { 2949 2950 /* 32-bit SIMD */ 2951 2952 case Iop_Add16x2: 2953 case Iop_HAdd16Ux2: 2954 case Iop_HAdd16Sx2: 2955 case Iop_Sub16x2: 2956 case Iop_HSub16Ux2: 2957 case Iop_HSub16Sx2: 2958 case Iop_QAdd16Sx2: 2959 case Iop_QSub16Sx2: 2960 case Iop_QSub16Ux2: 2961 case Iop_QAdd16Ux2: 2962 return binary16Ix2(mce, vatom1, vatom2); 2963 2964 case Iop_Add8x4: 2965 case Iop_HAdd8Ux4: 2966 case Iop_HAdd8Sx4: 2967 case Iop_Sub8x4: 2968 case Iop_HSub8Ux4: 2969 case Iop_HSub8Sx4: 2970 case Iop_QSub8Ux4: 2971 case Iop_QAdd8Ux4: 2972 case Iop_QSub8Sx4: 2973 case Iop_QAdd8Sx4: 2974 return binary8Ix4(mce, vatom1, vatom2); 2975 2976 /* 64-bit SIMD */ 2977 2978 case Iop_ShrN8x8: 2979 case Iop_ShrN16x4: 2980 case Iop_ShrN32x2: 2981 case Iop_SarN8x8: 2982 case Iop_SarN16x4: 2983 case Iop_SarN32x2: 2984 case Iop_ShlN16x4: 2985 case Iop_ShlN32x2: 2986 case Iop_ShlN8x8: 2987 /* Same scheme as with all other shifts. */ 2988 complainIfUndefined(mce, atom2, NULL); 2989 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 2990 2991 case Iop_QNarrowBin32Sto16Sx4: 2992 case Iop_QNarrowBin16Sto8Sx8: 2993 case Iop_QNarrowBin16Sto8Ux8: 2994 return vectorNarrowBin64(mce, op, vatom1, vatom2); 2995 2996 case Iop_Min8Ux8: 2997 case Iop_Min8Sx8: 2998 case Iop_Max8Ux8: 2999 case Iop_Max8Sx8: 3000 case Iop_Avg8Ux8: 3001 case Iop_QSub8Sx8: 3002 case Iop_QSub8Ux8: 3003 case Iop_Sub8x8: 3004 case Iop_CmpGT8Sx8: 3005 case Iop_CmpGT8Ux8: 3006 case Iop_CmpEQ8x8: 3007 case Iop_QAdd8Sx8: 3008 case Iop_QAdd8Ux8: 3009 case Iop_QSal8x8: 3010 case Iop_QShl8x8: 3011 case Iop_Add8x8: 3012 case Iop_Mul8x8: 3013 case Iop_PolynomialMul8x8: 3014 return binary8Ix8(mce, vatom1, vatom2); 3015 3016 case Iop_Min16Sx4: 3017 case Iop_Min16Ux4: 3018 case Iop_Max16Sx4: 3019 case Iop_Max16Ux4: 3020 case Iop_Avg16Ux4: 3021 case Iop_QSub16Ux4: 3022 case Iop_QSub16Sx4: 3023 case Iop_Sub16x4: 3024 case Iop_Mul16x4: 3025 case Iop_MulHi16Sx4: 3026 case Iop_MulHi16Ux4: 3027 case Iop_CmpGT16Sx4: 3028 case Iop_CmpGT16Ux4: 3029 case Iop_CmpEQ16x4: 3030 case Iop_QAdd16Sx4: 3031 case Iop_QAdd16Ux4: 3032 case Iop_QSal16x4: 3033 case Iop_QShl16x4: 3034 case Iop_Add16x4: 3035 case Iop_QDMulHi16Sx4: 3036 case Iop_QRDMulHi16Sx4: 3037 return binary16Ix4(mce, vatom1, vatom2); 3038 3039 case Iop_Sub32x2: 3040 case Iop_Mul32x2: 3041 case Iop_Max32Sx2: 3042 case Iop_Max32Ux2: 3043 case Iop_Min32Sx2: 3044 case Iop_Min32Ux2: 3045 case Iop_CmpGT32Sx2: 3046 case Iop_CmpGT32Ux2: 3047 case Iop_CmpEQ32x2: 3048 case Iop_Add32x2: 3049 case Iop_QAdd32Ux2: 3050 case Iop_QAdd32Sx2: 3051 case Iop_QSub32Ux2: 3052 case Iop_QSub32Sx2: 3053 case Iop_QSal32x2: 3054 case Iop_QShl32x2: 3055 case Iop_QDMulHi32Sx2: 3056 case Iop_QRDMulHi32Sx2: 3057 return binary32Ix2(mce, vatom1, vatom2); 3058 3059 case Iop_QSub64Ux1: 3060 case Iop_QSub64Sx1: 3061 case Iop_QAdd64Ux1: 3062 case Iop_QAdd64Sx1: 3063 case Iop_QSal64x1: 3064 case Iop_QShl64x1: 3065 case Iop_Sal64x1: 3066 return binary64Ix1(mce, vatom1, vatom2); 3067 3068 case Iop_QShlNsatSU8x8: 3069 case Iop_QShlNsatUU8x8: 3070 case Iop_QShlNsatSS8x8: 3071 complainIfUndefined(mce, atom2, NULL); 3072 return mkPCast8x8(mce, vatom1); 3073 3074 case Iop_QShlNsatSU16x4: 3075 case Iop_QShlNsatUU16x4: 3076 case Iop_QShlNsatSS16x4: 3077 complainIfUndefined(mce, atom2, NULL); 3078 return mkPCast16x4(mce, vatom1); 3079 3080 case Iop_QShlNsatSU32x2: 3081 case Iop_QShlNsatUU32x2: 3082 case Iop_QShlNsatSS32x2: 3083 complainIfUndefined(mce, atom2, NULL); 3084 return mkPCast32x2(mce, vatom1); 3085 3086 case Iop_QShlNsatSU64x1: 3087 case Iop_QShlNsatUU64x1: 3088 case Iop_QShlNsatSS64x1: 3089 complainIfUndefined(mce, atom2, NULL); 3090 return mkPCast32x2(mce, vatom1); 3091 3092 case Iop_PwMax32Sx2: 3093 case Iop_PwMax32Ux2: 3094 case Iop_PwMin32Sx2: 3095 case Iop_PwMin32Ux2: 3096 case Iop_PwMax32Fx2: 3097 case Iop_PwMin32Fx2: 3098 return assignNew('V', mce, Ity_I64, 3099 binop(Iop_PwMax32Ux2, 3100 mkPCast32x2(mce, vatom1), 3101 mkPCast32x2(mce, vatom2))); 3102 3103 case Iop_PwMax16Sx4: 3104 case Iop_PwMax16Ux4: 3105 case Iop_PwMin16Sx4: 3106 case Iop_PwMin16Ux4: 3107 return assignNew('V', mce, Ity_I64, 3108 binop(Iop_PwMax16Ux4, 3109 mkPCast16x4(mce, vatom1), 3110 mkPCast16x4(mce, vatom2))); 3111 3112 case Iop_PwMax8Sx8: 3113 case Iop_PwMax8Ux8: 3114 case Iop_PwMin8Sx8: 3115 case Iop_PwMin8Ux8: 3116 return assignNew('V', mce, Ity_I64, 3117 binop(Iop_PwMax8Ux8, 3118 mkPCast8x8(mce, vatom1), 3119 mkPCast8x8(mce, vatom2))); 3120 3121 case Iop_PwAdd32x2: 3122 case Iop_PwAdd32Fx2: 3123 return mkPCast32x2(mce, 3124 assignNew('V', mce, Ity_I64, 3125 binop(Iop_PwAdd32x2, 3126 mkPCast32x2(mce, vatom1), 3127 mkPCast32x2(mce, vatom2)))); 3128 3129 case Iop_PwAdd16x4: 3130 return mkPCast16x4(mce, 3131 assignNew('V', mce, Ity_I64, 3132 binop(op, mkPCast16x4(mce, vatom1), 3133 mkPCast16x4(mce, vatom2)))); 3134 3135 case Iop_PwAdd8x8: 3136 return mkPCast8x8(mce, 3137 assignNew('V', mce, Ity_I64, 3138 binop(op, mkPCast8x8(mce, vatom1), 3139 mkPCast8x8(mce, vatom2)))); 3140 3141 case Iop_Shl8x8: 3142 case Iop_Shr8x8: 3143 case Iop_Sar8x8: 3144 case Iop_Sal8x8: 3145 return mkUifU64(mce, 3146 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3147 mkPCast8x8(mce,vatom2) 3148 ); 3149 3150 case Iop_Shl16x4: 3151 case Iop_Shr16x4: 3152 case Iop_Sar16x4: 3153 case Iop_Sal16x4: 3154 return mkUifU64(mce, 3155 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3156 mkPCast16x4(mce,vatom2) 3157 ); 3158 3159 case Iop_Shl32x2: 3160 case Iop_Shr32x2: 3161 case Iop_Sar32x2: 3162 case Iop_Sal32x2: 3163 return mkUifU64(mce, 3164 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3165 mkPCast32x2(mce,vatom2) 3166 ); 3167 3168 /* 64-bit data-steering */ 3169 case Iop_InterleaveLO32x2: 3170 case Iop_InterleaveLO16x4: 3171 case Iop_InterleaveLO8x8: 3172 case Iop_InterleaveHI32x2: 3173 case Iop_InterleaveHI16x4: 3174 case Iop_InterleaveHI8x8: 3175 case Iop_CatOddLanes8x8: 3176 case Iop_CatEvenLanes8x8: 3177 case Iop_CatOddLanes16x4: 3178 case Iop_CatEvenLanes16x4: 3179 case Iop_InterleaveOddLanes8x8: 3180 case Iop_InterleaveEvenLanes8x8: 3181 case Iop_InterleaveOddLanes16x4: 3182 case Iop_InterleaveEvenLanes16x4: 3183 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3184 3185 case Iop_GetElem8x8: 3186 complainIfUndefined(mce, atom2, NULL); 3187 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3188 case Iop_GetElem16x4: 3189 complainIfUndefined(mce, atom2, NULL); 3190 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3191 case Iop_GetElem32x2: 3192 complainIfUndefined(mce, atom2, NULL); 3193 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3194 3195 /* Perm8x8: rearrange values in left arg using steering values 3196 from right arg. So rearrange the vbits in the same way but 3197 pessimise wrt steering values. */ 3198 case Iop_Perm8x8: 3199 return mkUifU64( 3200 mce, 3201 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)), 3202 mkPCast8x8(mce, vatom2) 3203 ); 3204 3205 /* V128-bit SIMD */ 3206 3207 case Iop_Sqrt32Fx4: 3208 return unary32Fx4_w_rm(mce, vatom1, vatom2); 3209 case Iop_Sqrt64Fx2: 3210 return unary64Fx2_w_rm(mce, vatom1, vatom2); 3211 3212 case Iop_ShrN8x16: 3213 case Iop_ShrN16x8: 3214 case Iop_ShrN32x4: 3215 case Iop_ShrN64x2: 3216 case Iop_SarN8x16: 3217 case Iop_SarN16x8: 3218 case Iop_SarN32x4: 3219 case Iop_SarN64x2: 3220 case Iop_ShlN8x16: 3221 case Iop_ShlN16x8: 3222 case Iop_ShlN32x4: 3223 case Iop_ShlN64x2: 3224 /* Same scheme as with all other shifts. Note: 22 Oct 05: 3225 this is wrong now, scalar shifts are done properly lazily. 3226 Vector shifts should be fixed too. */ 3227 complainIfUndefined(mce, atom2, NULL); 3228 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3229 3230 /* V x V shifts/rotates are done using the standard lazy scheme. */ 3231 /* For the non-rounding variants of bi-di vector x vector 3232 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme. 3233 But note that this is overly pessimistic, because in fact only 3234 the bottom 8 bits of each lane of the second argument are taken 3235 into account when shifting. So really we ought to ignore 3236 undefinedness in bits 8 and above of each lane in the 3237 second argument. */ 3238 case Iop_Shl8x16: 3239 case Iop_Shr8x16: 3240 case Iop_Sar8x16: 3241 case Iop_Sal8x16: 3242 case Iop_Rol8x16: 3243 case Iop_Sh8Sx16: 3244 case Iop_Sh8Ux16: 3245 return mkUifUV128(mce, 3246 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3247 mkPCast8x16(mce,vatom2) 3248 ); 3249 3250 case Iop_Shl16x8: 3251 case Iop_Shr16x8: 3252 case Iop_Sar16x8: 3253 case Iop_Sal16x8: 3254 case Iop_Rol16x8: 3255 case Iop_Sh16Sx8: 3256 case Iop_Sh16Ux8: 3257 return mkUifUV128(mce, 3258 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3259 mkPCast16x8(mce,vatom2) 3260 ); 3261 3262 case Iop_Shl32x4: 3263 case Iop_Shr32x4: 3264 case Iop_Sar32x4: 3265 case Iop_Sal32x4: 3266 case Iop_Rol32x4: 3267 case Iop_Sh32Sx4: 3268 case Iop_Sh32Ux4: 3269 return mkUifUV128(mce, 3270 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3271 mkPCast32x4(mce,vatom2) 3272 ); 3273 3274 case Iop_Shl64x2: 3275 case Iop_Shr64x2: 3276 case Iop_Sar64x2: 3277 case Iop_Sal64x2: 3278 case Iop_Rol64x2: 3279 case Iop_Sh64Sx2: 3280 case Iop_Sh64Ux2: 3281 return mkUifUV128(mce, 3282 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3283 mkPCast64x2(mce,vatom2) 3284 ); 3285 3286 /* For the rounding variants of bi-di vector x vector shifts, the 3287 rounding adjustment can cause undefinedness to propagate through 3288 the entire lane, in the worst case. Too complex to handle 3289 properly .. just UifU the arguments and then PCast them. 3290 Suboptimal but safe. */ 3291 case Iop_Rsh8Sx16: 3292 case Iop_Rsh8Ux16: 3293 return binary8Ix16(mce, vatom1, vatom2); 3294 case Iop_Rsh16Sx8: 3295 case Iop_Rsh16Ux8: 3296 return binary16Ix8(mce, vatom1, vatom2); 3297 case Iop_Rsh32Sx4: 3298 case Iop_Rsh32Ux4: 3299 return binary32Ix4(mce, vatom1, vatom2); 3300 case Iop_Rsh64Sx2: 3301 case Iop_Rsh64Ux2: 3302 return binary64Ix2(mce, vatom1, vatom2); 3303 3304 case Iop_F32ToFixed32Ux4_RZ: 3305 case Iop_F32ToFixed32Sx4_RZ: 3306 case Iop_Fixed32UToF32x4_RN: 3307 case Iop_Fixed32SToF32x4_RN: 3308 complainIfUndefined(mce, atom2, NULL); 3309 return mkPCast32x4(mce, vatom1); 3310 3311 case Iop_F32ToFixed32Ux2_RZ: 3312 case Iop_F32ToFixed32Sx2_RZ: 3313 case Iop_Fixed32UToF32x2_RN: 3314 case Iop_Fixed32SToF32x2_RN: 3315 complainIfUndefined(mce, atom2, NULL); 3316 return mkPCast32x2(mce, vatom1); 3317 3318 case Iop_QSub8Ux16: 3319 case Iop_QSub8Sx16: 3320 case Iop_Sub8x16: 3321 case Iop_Min8Ux16: 3322 case Iop_Min8Sx16: 3323 case Iop_Max8Ux16: 3324 case Iop_Max8Sx16: 3325 case Iop_CmpGT8Sx16: 3326 case Iop_CmpGT8Ux16: 3327 case Iop_CmpEQ8x16: 3328 case Iop_Avg8Ux16: 3329 case Iop_Avg8Sx16: 3330 case Iop_QAdd8Ux16: 3331 case Iop_QAdd8Sx16: 3332 case Iop_QAddExtUSsatSS8x16: 3333 case Iop_QAddExtSUsatUU8x16: 3334 case Iop_QSal8x16: 3335 case Iop_QShl8x16: 3336 case Iop_Add8x16: 3337 case Iop_Mul8x16: 3338 case Iop_PolynomialMul8x16: 3339 case Iop_PolynomialMulAdd8x16: 3340 return binary8Ix16(mce, vatom1, vatom2); 3341 3342 case Iop_QSub16Ux8: 3343 case Iop_QSub16Sx8: 3344 case Iop_Sub16x8: 3345 case Iop_Mul16x8: 3346 case Iop_MulHi16Sx8: 3347 case Iop_MulHi16Ux8: 3348 case Iop_Min16Sx8: 3349 case Iop_Min16Ux8: 3350 case Iop_Max16Sx8: 3351 case Iop_Max16Ux8: 3352 case Iop_CmpGT16Sx8: 3353 case Iop_CmpGT16Ux8: 3354 case Iop_CmpEQ16x8: 3355 case Iop_Avg16Ux8: 3356 case Iop_Avg16Sx8: 3357 case Iop_QAdd16Ux8: 3358 case Iop_QAdd16Sx8: 3359 case Iop_QAddExtUSsatSS16x8: 3360 case Iop_QAddExtSUsatUU16x8: 3361 case Iop_QSal16x8: 3362 case Iop_QShl16x8: 3363 case Iop_Add16x8: 3364 case Iop_QDMulHi16Sx8: 3365 case Iop_QRDMulHi16Sx8: 3366 case Iop_PolynomialMulAdd16x8: 3367 return binary16Ix8(mce, vatom1, vatom2); 3368 3369 case Iop_Sub32x4: 3370 case Iop_CmpGT32Sx4: 3371 case Iop_CmpGT32Ux4: 3372 case Iop_CmpEQ32x4: 3373 case Iop_QAdd32Sx4: 3374 case Iop_QAdd32Ux4: 3375 case Iop_QSub32Sx4: 3376 case Iop_QSub32Ux4: 3377 case Iop_QAddExtUSsatSS32x4: 3378 case Iop_QAddExtSUsatUU32x4: 3379 case Iop_QSal32x4: 3380 case Iop_QShl32x4: 3381 case Iop_Avg32Ux4: 3382 case Iop_Avg32Sx4: 3383 case Iop_Add32x4: 3384 case Iop_Max32Ux4: 3385 case Iop_Max32Sx4: 3386 case Iop_Min32Ux4: 3387 case Iop_Min32Sx4: 3388 case Iop_Mul32x4: 3389 case Iop_QDMulHi32Sx4: 3390 case Iop_QRDMulHi32Sx4: 3391 case Iop_PolynomialMulAdd32x4: 3392 return binary32Ix4(mce, vatom1, vatom2); 3393 3394 case Iop_Sub64x2: 3395 case Iop_Add64x2: 3396 case Iop_Max64Sx2: 3397 case Iop_Max64Ux2: 3398 case Iop_Min64Sx2: 3399 case Iop_Min64Ux2: 3400 case Iop_CmpEQ64x2: 3401 case Iop_CmpGT64Sx2: 3402 case Iop_CmpGT64Ux2: 3403 case Iop_QSal64x2: 3404 case Iop_QShl64x2: 3405 case Iop_QAdd64Ux2: 3406 case Iop_QAdd64Sx2: 3407 case Iop_QSub64Ux2: 3408 case Iop_QSub64Sx2: 3409 case Iop_QAddExtUSsatSS64x2: 3410 case Iop_QAddExtSUsatUU64x2: 3411 case Iop_PolynomialMulAdd64x2: 3412 case Iop_CipherV128: 3413 case Iop_CipherLV128: 3414 case Iop_NCipherV128: 3415 case Iop_NCipherLV128: 3416 return binary64Ix2(mce, vatom1, vatom2); 3417 3418 case Iop_QNarrowBin64Sto32Sx4: 3419 case Iop_QNarrowBin64Uto32Ux4: 3420 case Iop_QNarrowBin32Sto16Sx8: 3421 case Iop_QNarrowBin32Uto16Ux8: 3422 case Iop_QNarrowBin32Sto16Ux8: 3423 case Iop_QNarrowBin16Sto8Sx16: 3424 case Iop_QNarrowBin16Uto8Ux16: 3425 case Iop_QNarrowBin16Sto8Ux16: 3426 return vectorNarrowBinV128(mce, op, vatom1, vatom2); 3427 3428 case Iop_Min64Fx2: 3429 case Iop_Max64Fx2: 3430 case Iop_CmpLT64Fx2: 3431 case Iop_CmpLE64Fx2: 3432 case Iop_CmpEQ64Fx2: 3433 case Iop_CmpUN64Fx2: 3434 case Iop_RecipStep64Fx2: 3435 case Iop_RSqrtStep64Fx2: 3436 return binary64Fx2(mce, vatom1, vatom2); 3437 3438 case Iop_Sub64F0x2: 3439 case Iop_Mul64F0x2: 3440 case Iop_Min64F0x2: 3441 case Iop_Max64F0x2: 3442 case Iop_Div64F0x2: 3443 case Iop_CmpLT64F0x2: 3444 case Iop_CmpLE64F0x2: 3445 case Iop_CmpEQ64F0x2: 3446 case Iop_CmpUN64F0x2: 3447 case Iop_Add64F0x2: 3448 return binary64F0x2(mce, vatom1, vatom2); 3449 3450 case Iop_Min32Fx4: 3451 case Iop_Max32Fx4: 3452 case Iop_CmpLT32Fx4: 3453 case Iop_CmpLE32Fx4: 3454 case Iop_CmpEQ32Fx4: 3455 case Iop_CmpUN32Fx4: 3456 case Iop_CmpGT32Fx4: 3457 case Iop_CmpGE32Fx4: 3458 case Iop_RecipStep32Fx4: 3459 case Iop_RSqrtStep32Fx4: 3460 return binary32Fx4(mce, vatom1, vatom2); 3461 3462 case Iop_Sub32Fx2: 3463 case Iop_Mul32Fx2: 3464 case Iop_Min32Fx2: 3465 case Iop_Max32Fx2: 3466 case Iop_CmpEQ32Fx2: 3467 case Iop_CmpGT32Fx2: 3468 case Iop_CmpGE32Fx2: 3469 case Iop_Add32Fx2: 3470 case Iop_RecipStep32Fx2: 3471 case Iop_RSqrtStep32Fx2: 3472 return binary32Fx2(mce, vatom1, vatom2); 3473 3474 case Iop_Sub32F0x4: 3475 case Iop_Mul32F0x4: 3476 case Iop_Min32F0x4: 3477 case Iop_Max32F0x4: 3478 case Iop_Div32F0x4: 3479 case Iop_CmpLT32F0x4: 3480 case Iop_CmpLE32F0x4: 3481 case Iop_CmpEQ32F0x4: 3482 case Iop_CmpUN32F0x4: 3483 case Iop_Add32F0x4: 3484 return binary32F0x4(mce, vatom1, vatom2); 3485 3486 case Iop_QShlNsatSU8x16: 3487 case Iop_QShlNsatUU8x16: 3488 case Iop_QShlNsatSS8x16: 3489 complainIfUndefined(mce, atom2, NULL); 3490 return mkPCast8x16(mce, vatom1); 3491 3492 case Iop_QShlNsatSU16x8: 3493 case Iop_QShlNsatUU16x8: 3494 case Iop_QShlNsatSS16x8: 3495 complainIfUndefined(mce, atom2, NULL); 3496 return mkPCast16x8(mce, vatom1); 3497 3498 case Iop_QShlNsatSU32x4: 3499 case Iop_QShlNsatUU32x4: 3500 case Iop_QShlNsatSS32x4: 3501 complainIfUndefined(mce, atom2, NULL); 3502 return mkPCast32x4(mce, vatom1); 3503 3504 case Iop_QShlNsatSU64x2: 3505 case Iop_QShlNsatUU64x2: 3506 case Iop_QShlNsatSS64x2: 3507 complainIfUndefined(mce, atom2, NULL); 3508 return mkPCast32x4(mce, vatom1); 3509 3510 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128. 3511 To make this simpler, do the following: 3512 * complain if the shift amount (the I8) is undefined 3513 * pcast each lane at the wide width 3514 * truncate each lane to half width 3515 * pcast the resulting 64-bit value to a single bit and use 3516 that as the least significant bit of the upper half of the 3517 result. */ 3518 case Iop_QandQShrNnarrow64Uto32Ux2: 3519 case Iop_QandQSarNnarrow64Sto32Sx2: 3520 case Iop_QandQSarNnarrow64Sto32Ux2: 3521 case Iop_QandQRShrNnarrow64Uto32Ux2: 3522 case Iop_QandQRSarNnarrow64Sto32Sx2: 3523 case Iop_QandQRSarNnarrow64Sto32Ux2: 3524 case Iop_QandQShrNnarrow32Uto16Ux4: 3525 case Iop_QandQSarNnarrow32Sto16Sx4: 3526 case Iop_QandQSarNnarrow32Sto16Ux4: 3527 case Iop_QandQRShrNnarrow32Uto16Ux4: 3528 case Iop_QandQRSarNnarrow32Sto16Sx4: 3529 case Iop_QandQRSarNnarrow32Sto16Ux4: 3530 case Iop_QandQShrNnarrow16Uto8Ux8: 3531 case Iop_QandQSarNnarrow16Sto8Sx8: 3532 case Iop_QandQSarNnarrow16Sto8Ux8: 3533 case Iop_QandQRShrNnarrow16Uto8Ux8: 3534 case Iop_QandQRSarNnarrow16Sto8Sx8: 3535 case Iop_QandQRSarNnarrow16Sto8Ux8: 3536 { 3537 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL; 3538 IROp opNarrow = Iop_INVALID; 3539 switch (op) { 3540 case Iop_QandQShrNnarrow64Uto32Ux2: 3541 case Iop_QandQSarNnarrow64Sto32Sx2: 3542 case Iop_QandQSarNnarrow64Sto32Ux2: 3543 case Iop_QandQRShrNnarrow64Uto32Ux2: 3544 case Iop_QandQRSarNnarrow64Sto32Sx2: 3545 case Iop_QandQRSarNnarrow64Sto32Ux2: 3546 fnPessim = mkPCast64x2; 3547 opNarrow = Iop_NarrowUn64to32x2; 3548 break; 3549 case Iop_QandQShrNnarrow32Uto16Ux4: 3550 case Iop_QandQSarNnarrow32Sto16Sx4: 3551 case Iop_QandQSarNnarrow32Sto16Ux4: 3552 case Iop_QandQRShrNnarrow32Uto16Ux4: 3553 case Iop_QandQRSarNnarrow32Sto16Sx4: 3554 case Iop_QandQRSarNnarrow32Sto16Ux4: 3555 fnPessim = mkPCast32x4; 3556 opNarrow = Iop_NarrowUn32to16x4; 3557 break; 3558 case Iop_QandQShrNnarrow16Uto8Ux8: 3559 case Iop_QandQSarNnarrow16Sto8Sx8: 3560 case Iop_QandQSarNnarrow16Sto8Ux8: 3561 case Iop_QandQRShrNnarrow16Uto8Ux8: 3562 case Iop_QandQRSarNnarrow16Sto8Sx8: 3563 case Iop_QandQRSarNnarrow16Sto8Ux8: 3564 fnPessim = mkPCast16x8; 3565 opNarrow = Iop_NarrowUn16to8x8; 3566 break; 3567 default: 3568 tl_assert(0); 3569 } 3570 complainIfUndefined(mce, atom2, NULL); 3571 // Pessimised shift result 3572 IRAtom* shV 3573 = fnPessim(mce, vatom1); 3574 // Narrowed, pessimised shift result 3575 IRAtom* shVnarrowed 3576 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV)); 3577 // Generates: Def--(63)--Def PCast-to-I1(narrowed) 3578 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64); 3579 // and assemble the result 3580 return assignNew('V', mce, Ity_V128, 3581 binop(Iop_64HLtoV128, qV, shVnarrowed)); 3582 } 3583 3584 case Iop_Mull32Sx2: 3585 case Iop_Mull32Ux2: 3586 case Iop_QDMull32Sx2: 3587 return vectorWidenI64(mce, Iop_Widen32Sto64x2, 3588 mkUifU64(mce, vatom1, vatom2)); 3589 3590 case Iop_Mull16Sx4: 3591 case Iop_Mull16Ux4: 3592 case Iop_QDMull16Sx4: 3593 return vectorWidenI64(mce, Iop_Widen16Sto32x4, 3594 mkUifU64(mce, vatom1, vatom2)); 3595 3596 case Iop_Mull8Sx8: 3597 case Iop_Mull8Ux8: 3598 case Iop_PolynomialMull8x8: 3599 return vectorWidenI64(mce, Iop_Widen8Sto16x8, 3600 mkUifU64(mce, vatom1, vatom2)); 3601 3602 case Iop_PwAdd32x4: 3603 return mkPCast32x4(mce, 3604 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1), 3605 mkPCast32x4(mce, vatom2)))); 3606 3607 case Iop_PwAdd16x8: 3608 return mkPCast16x8(mce, 3609 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1), 3610 mkPCast16x8(mce, vatom2)))); 3611 3612 case Iop_PwAdd8x16: 3613 return mkPCast8x16(mce, 3614 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1), 3615 mkPCast8x16(mce, vatom2)))); 3616 3617 /* V128-bit data-steering */ 3618 case Iop_SetV128lo32: 3619 case Iop_SetV128lo64: 3620 case Iop_64HLtoV128: 3621 case Iop_InterleaveLO64x2: 3622 case Iop_InterleaveLO32x4: 3623 case Iop_InterleaveLO16x8: 3624 case Iop_InterleaveLO8x16: 3625 case Iop_InterleaveHI64x2: 3626 case Iop_InterleaveHI32x4: 3627 case Iop_InterleaveHI16x8: 3628 case Iop_InterleaveHI8x16: 3629 case Iop_CatOddLanes8x16: 3630 case Iop_CatOddLanes16x8: 3631 case Iop_CatOddLanes32x4: 3632 case Iop_CatEvenLanes8x16: 3633 case Iop_CatEvenLanes16x8: 3634 case Iop_CatEvenLanes32x4: 3635 case Iop_InterleaveOddLanes8x16: 3636 case Iop_InterleaveOddLanes16x8: 3637 case Iop_InterleaveOddLanes32x4: 3638 case Iop_InterleaveEvenLanes8x16: 3639 case Iop_InterleaveEvenLanes16x8: 3640 case Iop_InterleaveEvenLanes32x4: 3641 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2)); 3642 3643 case Iop_GetElem8x16: 3644 complainIfUndefined(mce, atom2, NULL); 3645 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2)); 3646 case Iop_GetElem16x8: 3647 complainIfUndefined(mce, atom2, NULL); 3648 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2)); 3649 case Iop_GetElem32x4: 3650 complainIfUndefined(mce, atom2, NULL); 3651 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2)); 3652 case Iop_GetElem64x2: 3653 complainIfUndefined(mce, atom2, NULL); 3654 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)); 3655 3656 /* Perm8x16: rearrange values in left arg using steering values 3657 from right arg. So rearrange the vbits in the same way but 3658 pessimise wrt steering values. Perm32x4 ditto. */ 3659 case Iop_Perm8x16: 3660 return mkUifUV128( 3661 mce, 3662 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3663 mkPCast8x16(mce, vatom2) 3664 ); 3665 case Iop_Perm32x4: 3666 return mkUifUV128( 3667 mce, 3668 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)), 3669 mkPCast32x4(mce, vatom2) 3670 ); 3671 3672 /* These two take the lower half of each 16-bit lane, sign/zero 3673 extend it to 32, and multiply together, producing a 32x4 3674 result (and implicitly ignoring half the operand bits). So 3675 treat it as a bunch of independent 16x8 operations, but then 3676 do 32-bit shifts left-right to copy the lower half results 3677 (which are all 0s or all 1s due to PCasting in binary16Ix8) 3678 into the upper half of each result lane. */ 3679 case Iop_MullEven16Ux8: 3680 case Iop_MullEven16Sx8: { 3681 IRAtom* at; 3682 at = binary16Ix8(mce,vatom1,vatom2); 3683 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16))); 3684 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16))); 3685 return at; 3686 } 3687 3688 /* Same deal as Iop_MullEven16{S,U}x8 */ 3689 case Iop_MullEven8Ux16: 3690 case Iop_MullEven8Sx16: { 3691 IRAtom* at; 3692 at = binary8Ix16(mce,vatom1,vatom2); 3693 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8))); 3694 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8))); 3695 return at; 3696 } 3697 3698 /* Same deal as Iop_MullEven16{S,U}x8 */ 3699 case Iop_MullEven32Ux4: 3700 case Iop_MullEven32Sx4: { 3701 IRAtom* at; 3702 at = binary32Ix4(mce,vatom1,vatom2); 3703 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32))); 3704 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32))); 3705 return at; 3706 } 3707 3708 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x 3709 32x4 -> 16x8 laneage, discarding the upper half of each lane. 3710 Simply apply same op to the V bits, since this really no more 3711 than a data steering operation. */ 3712 case Iop_NarrowBin32to16x8: 3713 case Iop_NarrowBin16to8x16: 3714 case Iop_NarrowBin64to32x4: 3715 return assignNew('V', mce, Ity_V128, 3716 binop(op, vatom1, vatom2)); 3717 3718 case Iop_ShrV128: 3719 case Iop_ShlV128: 3720 /* Same scheme as with all other shifts. Note: 10 Nov 05: 3721 this is wrong now, scalar shifts are done properly lazily. 3722 Vector shifts should be fixed too. */ 3723 complainIfUndefined(mce, atom2, NULL); 3724 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3725 3726 /* SHA Iops */ 3727 case Iop_SHA256: 3728 case Iop_SHA512: 3729 complainIfUndefined(mce, atom2, NULL); 3730 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)); 3731 3732 /* I128-bit data-steering */ 3733 case Iop_64HLto128: 3734 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2)); 3735 3736 /* V256-bit SIMD */ 3737 3738 case Iop_Max64Fx4: 3739 case Iop_Min64Fx4: 3740 return binary64Fx4(mce, vatom1, vatom2); 3741 3742 case Iop_Max32Fx8: 3743 case Iop_Min32Fx8: 3744 return binary32Fx8(mce, vatom1, vatom2); 3745 3746 /* V256-bit data-steering */ 3747 case Iop_V128HLtoV256: 3748 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2)); 3749 3750 /* Scalar floating point */ 3751 3752 case Iop_F32toI64S: 3753 case Iop_F32toI64U: 3754 /* I32(rm) x F32 -> I64 */ 3755 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3756 3757 case Iop_I64StoF32: 3758 /* I32(rm) x I64 -> F32 */ 3759 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3760 3761 case Iop_RoundF64toInt: 3762 case Iop_RoundF64toF32: 3763 case Iop_F64toI64S: 3764 case Iop_F64toI64U: 3765 case Iop_I64StoF64: 3766 case Iop_I64UtoF64: 3767 case Iop_SinF64: 3768 case Iop_CosF64: 3769 case Iop_TanF64: 3770 case Iop_2xm1F64: 3771 case Iop_SqrtF64: 3772 case Iop_RecpExpF64: 3773 /* I32(rm) x I64/F64 -> I64/F64 */ 3774 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3775 3776 case Iop_ShlD64: 3777 case Iop_ShrD64: 3778 case Iop_RoundD64toInt: 3779 /* I32(rm) x D64 -> D64 */ 3780 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3781 3782 case Iop_ShlD128: 3783 case Iop_ShrD128: 3784 case Iop_RoundD128toInt: 3785 /* I32(rm) x D128 -> D128 */ 3786 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3787 3788 case Iop_D64toI64S: 3789 case Iop_D64toI64U: 3790 case Iop_I64StoD64: 3791 case Iop_I64UtoD64: 3792 /* I32(rm) x I64/D64 -> D64/I64 */ 3793 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3794 3795 case Iop_F32toD32: 3796 case Iop_F64toD32: 3797 case Iop_F128toD32: 3798 case Iop_D32toF32: 3799 case Iop_D64toF32: 3800 case Iop_D128toF32: 3801 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */ 3802 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3803 3804 case Iop_F32toD64: 3805 case Iop_F64toD64: 3806 case Iop_F128toD64: 3807 case Iop_D32toF64: 3808 case Iop_D64toF64: 3809 case Iop_D128toF64: 3810 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */ 3811 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3812 3813 case Iop_F32toD128: 3814 case Iop_F64toD128: 3815 case Iop_F128toD128: 3816 case Iop_D32toF128: 3817 case Iop_D64toF128: 3818 case Iop_D128toF128: 3819 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */ 3820 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3821 3822 case Iop_RoundF32toInt: 3823 case Iop_SqrtF32: 3824 case Iop_RecpExpF32: 3825 /* I32(rm) x I32/F32 -> I32/F32 */ 3826 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3827 3828 case Iop_SqrtF128: 3829 /* I32(rm) x F128 -> F128 */ 3830 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3831 3832 case Iop_I32StoF32: 3833 case Iop_I32UtoF32: 3834 case Iop_F32toI32S: 3835 case Iop_F32toI32U: 3836 /* First arg is I32 (rounding mode), second is F32/I32 (data). */ 3837 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3838 3839 case Iop_F64toF16: 3840 case Iop_F32toF16: 3841 /* First arg is I32 (rounding mode), second is F64/F32 (data). */ 3842 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3843 3844 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */ 3845 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */ 3846 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */ 3847 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */ 3848 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */ 3849 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3850 3851 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */ 3852 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */ 3853 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */ 3854 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */ 3855 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */ 3856 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */ 3857 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3858 3859 case Iop_F64HLtoF128: 3860 case Iop_D64HLtoD128: 3861 return assignNew('V', mce, Ity_I128, 3862 binop(Iop_64HLto128, vatom1, vatom2)); 3863 3864 case Iop_F64toI32U: 3865 case Iop_F64toI32S: 3866 case Iop_F64toF32: 3867 case Iop_I64UtoF32: 3868 case Iop_D64toI32U: 3869 case Iop_D64toI32S: 3870 /* First arg is I32 (rounding mode), second is F64/D64 (data). */ 3871 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3872 3873 case Iop_D64toD32: 3874 /* First arg is I32 (rounding mode), second is D64 (data). */ 3875 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3876 3877 case Iop_F64toI16S: 3878 /* First arg is I32 (rounding mode), second is F64 (data). */ 3879 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 3880 3881 case Iop_InsertExpD64: 3882 /* I64 x I64 -> D64 */ 3883 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3884 3885 case Iop_InsertExpD128: 3886 /* I64 x I128 -> D128 */ 3887 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3888 3889 case Iop_CmpF32: 3890 case Iop_CmpF64: 3891 case Iop_CmpF128: 3892 case Iop_CmpD64: 3893 case Iop_CmpD128: 3894 case Iop_CmpExpD64: 3895 case Iop_CmpExpD128: 3896 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3897 3898 /* non-FP after here */ 3899 3900 case Iop_DivModU64to32: 3901 case Iop_DivModS64to32: 3902 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3903 3904 case Iop_DivModU128to64: 3905 case Iop_DivModS128to64: 3906 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 3907 3908 case Iop_8HLto16: 3909 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2)); 3910 case Iop_16HLto32: 3911 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2)); 3912 case Iop_32HLto64: 3913 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2)); 3914 3915 case Iop_DivModS64to64: 3916 case Iop_MullS64: 3917 case Iop_MullU64: { 3918 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 3919 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 3920 return assignNew('V', mce, Ity_I128, 3921 binop(Iop_64HLto128, vHi64, vLo64)); 3922 } 3923 3924 case Iop_MullS32: 3925 case Iop_MullU32: { 3926 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3927 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 3928 return assignNew('V', mce, Ity_I64, 3929 binop(Iop_32HLto64, vHi32, vLo32)); 3930 } 3931 3932 case Iop_MullS16: 3933 case Iop_MullU16: { 3934 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 3935 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 3936 return assignNew('V', mce, Ity_I32, 3937 binop(Iop_16HLto32, vHi16, vLo16)); 3938 } 3939 3940 case Iop_MullS8: 3941 case Iop_MullU8: { 3942 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 3943 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 3944 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 3945 } 3946 3947 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */ 3948 case Iop_DivS32: 3949 case Iop_DivU32: 3950 case Iop_DivU32E: 3951 case Iop_DivS32E: 3952 case Iop_QAdd32S: /* could probably do better */ 3953 case Iop_QSub32S: /* could probably do better */ 3954 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 3955 3956 case Iop_DivS64: 3957 case Iop_DivU64: 3958 case Iop_DivS64E: 3959 case Iop_DivU64E: 3960 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 3961 3962 case Iop_Add32: 3963 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3964 return expensiveAddSub(mce,True,Ity_I32, 3965 vatom1,vatom2, atom1,atom2); 3966 else 3967 goto cheap_AddSub32; 3968 case Iop_Sub32: 3969 if (mce->bogusLiterals) 3970 return expensiveAddSub(mce,False,Ity_I32, 3971 vatom1,vatom2, atom1,atom2); 3972 else 3973 goto cheap_AddSub32; 3974 3975 cheap_AddSub32: 3976 case Iop_Mul32: 3977 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 3978 3979 case Iop_CmpORD32S: 3980 case Iop_CmpORD32U: 3981 case Iop_CmpORD64S: 3982 case Iop_CmpORD64U: 3983 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); 3984 3985 case Iop_Add64: 3986 if (mce->bogusLiterals || mce->useLLVMworkarounds) 3987 return expensiveAddSub(mce,True,Ity_I64, 3988 vatom1,vatom2, atom1,atom2); 3989 else 3990 goto cheap_AddSub64; 3991 case Iop_Sub64: 3992 if (mce->bogusLiterals) 3993 return expensiveAddSub(mce,False,Ity_I64, 3994 vatom1,vatom2, atom1,atom2); 3995 else 3996 goto cheap_AddSub64; 3997 3998 cheap_AddSub64: 3999 case Iop_Mul64: 4000 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 4001 4002 case Iop_Mul16: 4003 case Iop_Add16: 4004 case Iop_Sub16: 4005 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 4006 4007 case Iop_Mul8: 4008 case Iop_Sub8: 4009 case Iop_Add8: 4010 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 4011 4012 case Iop_CmpEQ64: 4013 case Iop_CmpNE64: 4014 if (mce->bogusLiterals) 4015 goto expensive_cmp64; 4016 else 4017 goto cheap_cmp64; 4018 4019 expensive_cmp64: 4020 case Iop_ExpCmpNE64: 4021 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 4022 4023 cheap_cmp64: 4024 case Iop_CmpLE64S: case Iop_CmpLE64U: 4025 case Iop_CmpLT64U: case Iop_CmpLT64S: 4026 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 4027 4028 case Iop_CmpEQ32: 4029 case Iop_CmpNE32: 4030 if (mce->bogusLiterals) 4031 goto expensive_cmp32; 4032 else 4033 goto cheap_cmp32; 4034 4035 expensive_cmp32: 4036 case Iop_ExpCmpNE32: 4037 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 4038 4039 cheap_cmp32: 4040 case Iop_CmpLE32S: case Iop_CmpLE32U: 4041 case Iop_CmpLT32U: case Iop_CmpLT32S: 4042 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 4043 4044 case Iop_CmpEQ16: case Iop_CmpNE16: 4045 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 4046 4047 case Iop_ExpCmpNE16: 4048 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 ); 4049 4050 case Iop_CmpEQ8: case Iop_CmpNE8: 4051 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 4052 4053 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 4054 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 4055 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 4056 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 4057 /* Just say these all produce a defined result, regardless 4058 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */ 4059 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1)); 4060 4061 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 4062 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 ); 4063 4064 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 4065 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 ); 4066 4067 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 4068 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 ); 4069 4070 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8: 4071 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 ); 4072 4073 case Iop_AndV256: 4074 uifu = mkUifUV256; difd = mkDifDV256; 4075 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or; 4076 case Iop_AndV128: 4077 uifu = mkUifUV128; difd = mkDifDV128; 4078 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 4079 case Iop_And64: 4080 uifu = mkUifU64; difd = mkDifD64; 4081 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 4082 case Iop_And32: 4083 uifu = mkUifU32; difd = mkDifD32; 4084 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 4085 case Iop_And16: 4086 uifu = mkUifU16; difd = mkDifD16; 4087 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 4088 case Iop_And8: 4089 uifu = mkUifU8; difd = mkDifD8; 4090 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 4091 4092 case Iop_OrV256: 4093 uifu = mkUifUV256; difd = mkDifDV256; 4094 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or; 4095 case Iop_OrV128: 4096 uifu = mkUifUV128; difd = mkDifDV128; 4097 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 4098 case Iop_Or64: 4099 uifu = mkUifU64; difd = mkDifD64; 4100 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 4101 case Iop_Or32: 4102 uifu = mkUifU32; difd = mkDifD32; 4103 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 4104 case Iop_Or16: 4105 uifu = mkUifU16; difd = mkDifD16; 4106 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 4107 case Iop_Or8: 4108 uifu = mkUifU8; difd = mkDifD8; 4109 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 4110 4111 do_And_Or: 4112 return 4113 assignNew( 4114 'V', mce, 4115 and_or_ty, 4116 difd(mce, uifu(mce, vatom1, vatom2), 4117 difd(mce, improve(mce, atom1, vatom1), 4118 improve(mce, atom2, vatom2) ) ) ); 4119 4120 case Iop_Xor8: 4121 return mkUifU8(mce, vatom1, vatom2); 4122 case Iop_Xor16: 4123 return mkUifU16(mce, vatom1, vatom2); 4124 case Iop_Xor32: 4125 return mkUifU32(mce, vatom1, vatom2); 4126 case Iop_Xor64: 4127 return mkUifU64(mce, vatom1, vatom2); 4128 case Iop_XorV128: 4129 return mkUifUV128(mce, vatom1, vatom2); 4130 case Iop_XorV256: 4131 return mkUifUV256(mce, vatom1, vatom2); 4132 4133 /* V256-bit SIMD */ 4134 4135 case Iop_ShrN16x16: 4136 case Iop_ShrN32x8: 4137 case Iop_ShrN64x4: 4138 case Iop_SarN16x16: 4139 case Iop_SarN32x8: 4140 case Iop_ShlN16x16: 4141 case Iop_ShlN32x8: 4142 case Iop_ShlN64x4: 4143 /* Same scheme as with all other shifts. Note: 22 Oct 05: 4144 this is wrong now, scalar shifts are done properly lazily. 4145 Vector shifts should be fixed too. */ 4146 complainIfUndefined(mce, atom2, NULL); 4147 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)); 4148 4149 case Iop_QSub8Ux32: 4150 case Iop_QSub8Sx32: 4151 case Iop_Sub8x32: 4152 case Iop_Min8Ux32: 4153 case Iop_Min8Sx32: 4154 case Iop_Max8Ux32: 4155 case Iop_Max8Sx32: 4156 case Iop_CmpGT8Sx32: 4157 case Iop_CmpEQ8x32: 4158 case Iop_Avg8Ux32: 4159 case Iop_QAdd8Ux32: 4160 case Iop_QAdd8Sx32: 4161 case Iop_Add8x32: 4162 return binary8Ix32(mce, vatom1, vatom2); 4163 4164 case Iop_QSub16Ux16: 4165 case Iop_QSub16Sx16: 4166 case Iop_Sub16x16: 4167 case Iop_Mul16x16: 4168 case Iop_MulHi16Sx16: 4169 case Iop_MulHi16Ux16: 4170 case Iop_Min16Sx16: 4171 case Iop_Min16Ux16: 4172 case Iop_Max16Sx16: 4173 case Iop_Max16Ux16: 4174 case Iop_CmpGT16Sx16: 4175 case Iop_CmpEQ16x16: 4176 case Iop_Avg16Ux16: 4177 case Iop_QAdd16Ux16: 4178 case Iop_QAdd16Sx16: 4179 case Iop_Add16x16: 4180 return binary16Ix16(mce, vatom1, vatom2); 4181 4182 case Iop_Sub32x8: 4183 case Iop_CmpGT32Sx8: 4184 case Iop_CmpEQ32x8: 4185 case Iop_Add32x8: 4186 case Iop_Max32Ux8: 4187 case Iop_Max32Sx8: 4188 case Iop_Min32Ux8: 4189 case Iop_Min32Sx8: 4190 case Iop_Mul32x8: 4191 return binary32Ix8(mce, vatom1, vatom2); 4192 4193 case Iop_Sub64x4: 4194 case Iop_Add64x4: 4195 case Iop_CmpEQ64x4: 4196 case Iop_CmpGT64Sx4: 4197 return binary64Ix4(mce, vatom1, vatom2); 4198 4199 /* Perm32x8: rearrange values in left arg using steering values 4200 from right arg. So rearrange the vbits in the same way but 4201 pessimise wrt steering values. */ 4202 case Iop_Perm32x8: 4203 return mkUifUV256( 4204 mce, 4205 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)), 4206 mkPCast32x8(mce, vatom2) 4207 ); 4208 4209 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256. 4210 Handle the shifted results in the same way that other 4211 binary Q ops are handled, eg QSub: UifU the two args, 4212 then pessimise -- which is binaryNIxM. But for the upper 4213 V128, we require to generate just 1 bit which is the 4214 pessimised shift result, with 127 defined zeroes above it. 4215 4216 Note that this overly pessimistic in that in fact only the 4217 bottom 8 bits of each lane of the second arg determine the shift 4218 amount. Really we ought to ignore any undefinedness in the 4219 rest of the lanes of the second arg. */ 4220 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2: 4221 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2: 4222 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4: 4223 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4: 4224 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8: 4225 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8: 4226 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16: 4227 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16: 4228 { 4229 // The function to generate the pessimised shift result 4230 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL; 4231 switch (op) { 4232 case Iop_QandSQsh64x2: 4233 case Iop_QandUQsh64x2: 4234 case Iop_QandSQRsh64x2: 4235 case Iop_QandUQRsh64x2: 4236 binaryNIxM = binary64Ix2; 4237 break; 4238 case Iop_QandSQsh32x4: 4239 case Iop_QandUQsh32x4: 4240 case Iop_QandSQRsh32x4: 4241 case Iop_QandUQRsh32x4: 4242 binaryNIxM = binary32Ix4; 4243 break; 4244 case Iop_QandSQsh16x8: 4245 case Iop_QandUQsh16x8: 4246 case Iop_QandSQRsh16x8: 4247 case Iop_QandUQRsh16x8: 4248 binaryNIxM = binary16Ix8; 4249 break; 4250 case Iop_QandSQsh8x16: 4251 case Iop_QandUQsh8x16: 4252 case Iop_QandSQRsh8x16: 4253 case Iop_QandUQRsh8x16: 4254 binaryNIxM = binary8Ix16; 4255 break; 4256 default: 4257 tl_assert(0); 4258 } 4259 tl_assert(binaryNIxM); 4260 // Pessimised shift result, shV[127:0] 4261 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2); 4262 // Generates: Def--(127)--Def PCast-to-I1(shV) 4263 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128); 4264 // and assemble the result 4265 return assignNew('V', mce, Ity_V256, 4266 binop(Iop_V128HLtoV256, qV, shV)); 4267 } 4268 4269 default: 4270 ppIROp(op); 4271 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 4272 } 4273 } 4274 4275 4276 static 4277 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 4278 { 4279 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the 4280 selection of shadow operation implicitly duplicates the logic in 4281 do_shadow_LoadG and should be kept in sync (in the very unlikely 4282 event that the interpretation of such widening ops changes in 4283 future). See comment in do_shadow_LoadG. */ 4284 IRAtom* vatom = expr2vbits( mce, atom ); 4285 tl_assert(isOriginalAtom(mce,atom)); 4286 switch (op) { 4287 4288 case Iop_Abs64Fx2: 4289 case Iop_Neg64Fx2: 4290 case Iop_RSqrtEst64Fx2: 4291 case Iop_RecipEst64Fx2: 4292 return unary64Fx2(mce, vatom); 4293 4294 case Iop_Sqrt64F0x2: 4295 return unary64F0x2(mce, vatom); 4296 4297 case Iop_Sqrt32Fx8: 4298 case Iop_RSqrtEst32Fx8: 4299 case Iop_RecipEst32Fx8: 4300 return unary32Fx8(mce, vatom); 4301 4302 case Iop_Sqrt64Fx4: 4303 return unary64Fx4(mce, vatom); 4304 4305 case Iop_RecipEst32Fx4: 4306 case Iop_I32UtoFx4: 4307 case Iop_I32StoFx4: 4308 case Iop_QFtoI32Ux4_RZ: 4309 case Iop_QFtoI32Sx4_RZ: 4310 case Iop_RoundF32x4_RM: 4311 case Iop_RoundF32x4_RP: 4312 case Iop_RoundF32x4_RN: 4313 case Iop_RoundF32x4_RZ: 4314 case Iop_RecipEst32Ux4: 4315 case Iop_Abs32Fx4: 4316 case Iop_Neg32Fx4: 4317 case Iop_RSqrtEst32Fx4: 4318 return unary32Fx4(mce, vatom); 4319 4320 case Iop_I32UtoFx2: 4321 case Iop_I32StoFx2: 4322 case Iop_RecipEst32Fx2: 4323 case Iop_RecipEst32Ux2: 4324 case Iop_Abs32Fx2: 4325 case Iop_Neg32Fx2: 4326 case Iop_RSqrtEst32Fx2: 4327 return unary32Fx2(mce, vatom); 4328 4329 case Iop_Sqrt32F0x4: 4330 case Iop_RSqrtEst32F0x4: 4331 case Iop_RecipEst32F0x4: 4332 return unary32F0x4(mce, vatom); 4333 4334 case Iop_32UtoV128: 4335 case Iop_64UtoV128: 4336 case Iop_Dup8x16: 4337 case Iop_Dup16x8: 4338 case Iop_Dup32x4: 4339 case Iop_Reverse1sIn8_x16: 4340 case Iop_Reverse8sIn16_x8: 4341 case Iop_Reverse8sIn32_x4: 4342 case Iop_Reverse16sIn32_x4: 4343 case Iop_Reverse8sIn64_x2: 4344 case Iop_Reverse16sIn64_x2: 4345 case Iop_Reverse32sIn64_x2: 4346 case Iop_V256toV128_1: case Iop_V256toV128_0: 4347 case Iop_ZeroHI64ofV128: 4348 case Iop_ZeroHI96ofV128: 4349 case Iop_ZeroHI112ofV128: 4350 case Iop_ZeroHI120ofV128: 4351 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 4352 4353 case Iop_F128HItoF64: /* F128 -> high half of F128 */ 4354 case Iop_D128HItoD64: /* D128 -> high half of D128 */ 4355 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom)); 4356 case Iop_F128LOtoF64: /* F128 -> low half of F128 */ 4357 case Iop_D128LOtoD64: /* D128 -> low half of D128 */ 4358 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom)); 4359 4360 case Iop_NegF128: 4361 case Iop_AbsF128: 4362 return mkPCastTo(mce, Ity_I128, vatom); 4363 4364 case Iop_I32StoF128: /* signed I32 -> F128 */ 4365 case Iop_I64StoF128: /* signed I64 -> F128 */ 4366 case Iop_I32UtoF128: /* unsigned I32 -> F128 */ 4367 case Iop_I64UtoF128: /* unsigned I64 -> F128 */ 4368 case Iop_F32toF128: /* F32 -> F128 */ 4369 case Iop_F64toF128: /* F64 -> F128 */ 4370 case Iop_I32StoD128: /* signed I64 -> D128 */ 4371 case Iop_I64StoD128: /* signed I64 -> D128 */ 4372 case Iop_I32UtoD128: /* unsigned I32 -> D128 */ 4373 case Iop_I64UtoD128: /* unsigned I64 -> D128 */ 4374 return mkPCastTo(mce, Ity_I128, vatom); 4375 4376 case Iop_F16toF64: 4377 case Iop_F32toF64: 4378 case Iop_I32StoF64: 4379 case Iop_I32UtoF64: 4380 case Iop_NegF64: 4381 case Iop_AbsF64: 4382 case Iop_RSqrtEst5GoodF64: 4383 case Iop_RoundF64toF64_NEAREST: 4384 case Iop_RoundF64toF64_NegINF: 4385 case Iop_RoundF64toF64_PosINF: 4386 case Iop_RoundF64toF64_ZERO: 4387 case Iop_Clz64: 4388 case Iop_D32toD64: 4389 case Iop_I32StoD64: 4390 case Iop_I32UtoD64: 4391 case Iop_ExtractExpD64: /* D64 -> I64 */ 4392 case Iop_ExtractExpD128: /* D128 -> I64 */ 4393 case Iop_ExtractSigD64: /* D64 -> I64 */ 4394 case Iop_ExtractSigD128: /* D128 -> I64 */ 4395 case Iop_DPBtoBCD: 4396 case Iop_BCDtoDPB: 4397 return mkPCastTo(mce, Ity_I64, vatom); 4398 4399 case Iop_D64toD128: 4400 return mkPCastTo(mce, Ity_I128, vatom); 4401 4402 case Iop_Clz32: 4403 case Iop_TruncF64asF32: 4404 case Iop_NegF32: 4405 case Iop_AbsF32: 4406 case Iop_F16toF32: 4407 return mkPCastTo(mce, Ity_I32, vatom); 4408 4409 case Iop_Ctz32: 4410 case Iop_Ctz64: 4411 return expensiveCountTrailingZeroes(mce, op, atom, vatom); 4412 4413 case Iop_1Uto64: 4414 case Iop_1Sto64: 4415 case Iop_8Uto64: 4416 case Iop_8Sto64: 4417 case Iop_16Uto64: 4418 case Iop_16Sto64: 4419 case Iop_32Sto64: 4420 case Iop_32Uto64: 4421 case Iop_V128to64: 4422 case Iop_V128HIto64: 4423 case Iop_128HIto64: 4424 case Iop_128to64: 4425 case Iop_Dup8x8: 4426 case Iop_Dup16x4: 4427 case Iop_Dup32x2: 4428 case Iop_Reverse8sIn16_x4: 4429 case Iop_Reverse8sIn32_x2: 4430 case Iop_Reverse16sIn32_x2: 4431 case Iop_Reverse8sIn64_x1: 4432 case Iop_Reverse16sIn64_x1: 4433 case Iop_Reverse32sIn64_x1: 4434 case Iop_V256to64_0: case Iop_V256to64_1: 4435 case Iop_V256to64_2: case Iop_V256to64_3: 4436 return assignNew('V', mce, Ity_I64, unop(op, vatom)); 4437 4438 case Iop_64to32: 4439 case Iop_64HIto32: 4440 case Iop_1Uto32: 4441 case Iop_1Sto32: 4442 case Iop_8Uto32: 4443 case Iop_16Uto32: 4444 case Iop_16Sto32: 4445 case Iop_8Sto32: 4446 case Iop_V128to32: 4447 return assignNew('V', mce, Ity_I32, unop(op, vatom)); 4448 4449 case Iop_8Sto16: 4450 case Iop_8Uto16: 4451 case Iop_32to16: 4452 case Iop_32HIto16: 4453 case Iop_64to16: 4454 case Iop_GetMSBs8x16: 4455 return assignNew('V', mce, Ity_I16, unop(op, vatom)); 4456 4457 case Iop_1Uto8: 4458 case Iop_1Sto8: 4459 case Iop_16to8: 4460 case Iop_16HIto8: 4461 case Iop_32to8: 4462 case Iop_64to8: 4463 case Iop_GetMSBs8x8: 4464 return assignNew('V', mce, Ity_I8, unop(op, vatom)); 4465 4466 case Iop_32to1: 4467 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom)); 4468 4469 case Iop_64to1: 4470 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom)); 4471 4472 case Iop_ReinterpF64asI64: 4473 case Iop_ReinterpI64asF64: 4474 case Iop_ReinterpI32asF32: 4475 case Iop_ReinterpF32asI32: 4476 case Iop_ReinterpI64asD64: 4477 case Iop_ReinterpD64asI64: 4478 case Iop_NotV256: 4479 case Iop_NotV128: 4480 case Iop_Not64: 4481 case Iop_Not32: 4482 case Iop_Not16: 4483 case Iop_Not8: 4484 case Iop_Not1: 4485 return vatom; 4486 4487 case Iop_CmpNEZ8x8: 4488 case Iop_Cnt8x8: 4489 case Iop_Clz8x8: 4490 case Iop_Cls8x8: 4491 case Iop_Abs8x8: 4492 return mkPCast8x8(mce, vatom); 4493 4494 case Iop_CmpNEZ8x16: 4495 case Iop_Cnt8x16: 4496 case Iop_Clz8x16: 4497 case Iop_Cls8x16: 4498 case Iop_Abs8x16: 4499 return mkPCast8x16(mce, vatom); 4500 4501 case Iop_CmpNEZ16x4: 4502 case Iop_Clz16x4: 4503 case Iop_Cls16x4: 4504 case Iop_Abs16x4: 4505 return mkPCast16x4(mce, vatom); 4506 4507 case Iop_CmpNEZ16x8: 4508 case Iop_Clz16x8: 4509 case Iop_Cls16x8: 4510 case Iop_Abs16x8: 4511 return mkPCast16x8(mce, vatom); 4512 4513 case Iop_CmpNEZ32x2: 4514 case Iop_Clz32x2: 4515 case Iop_Cls32x2: 4516 case Iop_FtoI32Ux2_RZ: 4517 case Iop_FtoI32Sx2_RZ: 4518 case Iop_Abs32x2: 4519 return mkPCast32x2(mce, vatom); 4520 4521 case Iop_CmpNEZ32x4: 4522 case Iop_Clz32x4: 4523 case Iop_Cls32x4: 4524 case Iop_FtoI32Ux4_RZ: 4525 case Iop_FtoI32Sx4_RZ: 4526 case Iop_Abs32x4: 4527 case Iop_RSqrtEst32Ux4: 4528 return mkPCast32x4(mce, vatom); 4529 4530 case Iop_CmpwNEZ32: 4531 return mkPCastTo(mce, Ity_I32, vatom); 4532 4533 case Iop_CmpwNEZ64: 4534 return mkPCastTo(mce, Ity_I64, vatom); 4535 4536 case Iop_CmpNEZ64x2: 4537 case Iop_CipherSV128: 4538 case Iop_Clz64x2: 4539 case Iop_Abs64x2: 4540 return mkPCast64x2(mce, vatom); 4541 4542 case Iop_PwBitMtxXpose64x2: 4543 return assignNew('V', mce, Ity_V128, unop(op, vatom)); 4544 4545 case Iop_NarrowUn16to8x8: 4546 case Iop_NarrowUn32to16x4: 4547 case Iop_NarrowUn64to32x2: 4548 case Iop_QNarrowUn16Sto8Sx8: 4549 case Iop_QNarrowUn16Sto8Ux8: 4550 case Iop_QNarrowUn16Uto8Ux8: 4551 case Iop_QNarrowUn32Sto16Sx4: 4552 case Iop_QNarrowUn32Sto16Ux4: 4553 case Iop_QNarrowUn32Uto16Ux4: 4554 case Iop_QNarrowUn64Sto32Sx2: 4555 case Iop_QNarrowUn64Sto32Ux2: 4556 case Iop_QNarrowUn64Uto32Ux2: 4557 return vectorNarrowUnV128(mce, op, vatom); 4558 4559 case Iop_Widen8Sto16x8: 4560 case Iop_Widen8Uto16x8: 4561 case Iop_Widen16Sto32x4: 4562 case Iop_Widen16Uto32x4: 4563 case Iop_Widen32Sto64x2: 4564 case Iop_Widen32Uto64x2: 4565 return vectorWidenI64(mce, op, vatom); 4566 4567 case Iop_PwAddL32Ux2: 4568 case Iop_PwAddL32Sx2: 4569 return mkPCastTo(mce, Ity_I64, 4570 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom)))); 4571 4572 case Iop_PwAddL16Ux4: 4573 case Iop_PwAddL16Sx4: 4574 return mkPCast32x2(mce, 4575 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom)))); 4576 4577 case Iop_PwAddL8Ux8: 4578 case Iop_PwAddL8Sx8: 4579 return mkPCast16x4(mce, 4580 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom)))); 4581 4582 case Iop_PwAddL32Ux4: 4583 case Iop_PwAddL32Sx4: 4584 return mkPCast64x2(mce, 4585 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); 4586 4587 case Iop_PwAddL16Ux8: 4588 case Iop_PwAddL16Sx8: 4589 return mkPCast32x4(mce, 4590 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom)))); 4591 4592 case Iop_PwAddL8Ux16: 4593 case Iop_PwAddL8Sx16: 4594 return mkPCast16x8(mce, 4595 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom)))); 4596 4597 case Iop_I64UtoF32: 4598 default: 4599 ppIROp(op); 4600 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 4601 } 4602 } 4603 4604 4605 /* Worker function -- do not call directly. See comments on 4606 expr2vbits_Load for the meaning of |guard|. 4607 4608 Generates IR to (1) perform a definedness test of |addr|, (2) 4609 perform a validity test of |addr|, and (3) return the Vbits for the 4610 location indicated by |addr|. All of this only happens when 4611 |guard| is NULL or |guard| evaluates to True at run time. 4612 4613 If |guard| evaluates to False at run time, the returned value is 4614 the IR-mandated 0x55..55 value, and no checks nor shadow loads are 4615 performed. 4616 4617 The definedness of |guard| itself is not checked. That is assumed 4618 to have been done before this point, by the caller. */ 4619 static 4620 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 4621 IREndness end, IRType ty, 4622 IRAtom* addr, UInt bias, IRAtom* guard ) 4623 { 4624 tl_assert(isOriginalAtom(mce,addr)); 4625 tl_assert(end == Iend_LE || end == Iend_BE); 4626 4627 /* First, emit a definedness test for the address. This also sets 4628 the address (shadow) to 'defined' following the test. */ 4629 complainIfUndefined( mce, addr, guard ); 4630 4631 /* Now cook up a call to the relevant helper function, to read the 4632 data V bits from shadow memory. */ 4633 ty = shadowTypeV(ty); 4634 4635 void* helper = NULL; 4636 const HChar* hname = NULL; 4637 Bool ret_via_outparam = False; 4638 4639 if (end == Iend_LE) { 4640 switch (ty) { 4641 case Ity_V256: helper = &MC_(helperc_LOADV256le); 4642 hname = "MC_(helperc_LOADV256le)"; 4643 ret_via_outparam = True; 4644 break; 4645 case Ity_V128: helper = &MC_(helperc_LOADV128le); 4646 hname = "MC_(helperc_LOADV128le)"; 4647 ret_via_outparam = True; 4648 break; 4649 case Ity_I64: helper = &MC_(helperc_LOADV64le); 4650 hname = "MC_(helperc_LOADV64le)"; 4651 break; 4652 case Ity_I32: helper = &MC_(helperc_LOADV32le); 4653 hname = "MC_(helperc_LOADV32le)"; 4654 break; 4655 case Ity_I16: helper = &MC_(helperc_LOADV16le); 4656 hname = "MC_(helperc_LOADV16le)"; 4657 break; 4658 case Ity_I8: helper = &MC_(helperc_LOADV8); 4659 hname = "MC_(helperc_LOADV8)"; 4660 break; 4661 default: ppIRType(ty); 4662 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)"); 4663 } 4664 } else { 4665 switch (ty) { 4666 case Ity_V256: helper = &MC_(helperc_LOADV256be); 4667 hname = "MC_(helperc_LOADV256be)"; 4668 ret_via_outparam = True; 4669 break; 4670 case Ity_V128: helper = &MC_(helperc_LOADV128be); 4671 hname = "MC_(helperc_LOADV128be)"; 4672 ret_via_outparam = True; 4673 break; 4674 case Ity_I64: helper = &MC_(helperc_LOADV64be); 4675 hname = "MC_(helperc_LOADV64be)"; 4676 break; 4677 case Ity_I32: helper = &MC_(helperc_LOADV32be); 4678 hname = "MC_(helperc_LOADV32be)"; 4679 break; 4680 case Ity_I16: helper = &MC_(helperc_LOADV16be); 4681 hname = "MC_(helperc_LOADV16be)"; 4682 break; 4683 case Ity_I8: helper = &MC_(helperc_LOADV8); 4684 hname = "MC_(helperc_LOADV8)"; 4685 break; 4686 default: ppIRType(ty); 4687 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)"); 4688 } 4689 } 4690 4691 tl_assert(helper); 4692 tl_assert(hname); 4693 4694 /* Generate the actual address into addrAct. */ 4695 IRAtom* addrAct; 4696 if (bias == 0) { 4697 addrAct = addr; 4698 } else { 4699 IROp mkAdd; 4700 IRAtom* eBias; 4701 IRType tyAddr = mce->hWordTy; 4702 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 4703 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 4704 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 4705 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) ); 4706 } 4707 4708 /* We need to have a place to park the V bits we're just about to 4709 read. */ 4710 IRTemp datavbits = newTemp(mce, ty, VSh); 4711 4712 /* Here's the call. */ 4713 IRDirty* di; 4714 if (ret_via_outparam) { 4715 di = unsafeIRDirty_1_N( datavbits, 4716 2/*regparms*/, 4717 hname, VG_(fnptr_to_fnentry)( helper ), 4718 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) ); 4719 } else { 4720 di = unsafeIRDirty_1_N( datavbits, 4721 1/*regparms*/, 4722 hname, VG_(fnptr_to_fnentry)( helper ), 4723 mkIRExprVec_1( addrAct ) ); 4724 } 4725 4726 setHelperAnns( mce, di ); 4727 if (guard) { 4728 di->guard = guard; 4729 /* Ideally the didn't-happen return value here would be all-ones 4730 (all-undefined), so it'd be obvious if it got used 4731 inadvertantly. We can get by with the IR-mandated default 4732 value (0b01 repeating, 0x55 etc) as that'll still look pretty 4733 undefined if it ever leaks out. */ 4734 } 4735 stmt( 'V', mce, IRStmt_Dirty(di) ); 4736 4737 return mkexpr(datavbits); 4738 } 4739 4740 4741 /* Generate IR to do a shadow load. The helper is expected to check 4742 the validity of the address and return the V bits for that address. 4743 This can optionally be controlled by a guard, which is assumed to 4744 be True if NULL. In the case where the guard is False at runtime, 4745 the helper will return the didn't-do-the-call value of 0x55..55. 4746 Since that means "completely undefined result", the caller of 4747 this function will need to fix up the result somehow in that 4748 case. 4749 4750 Caller of this function is also expected to have checked the 4751 definedness of |guard| before this point. 4752 */ 4753 static 4754 IRAtom* expr2vbits_Load ( MCEnv* mce, 4755 IREndness end, IRType ty, 4756 IRAtom* addr, UInt bias, 4757 IRAtom* guard ) 4758 { 4759 tl_assert(end == Iend_LE || end == Iend_BE); 4760 switch (shadowTypeV(ty)) { 4761 case Ity_I8: 4762 case Ity_I16: 4763 case Ity_I32: 4764 case Ity_I64: 4765 case Ity_V128: 4766 case Ity_V256: 4767 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard); 4768 default: 4769 VG_(tool_panic)("expr2vbits_Load"); 4770 } 4771 } 4772 4773 4774 /* The most general handler for guarded loads. Assumes the 4775 definedness of GUARD has already been checked by the caller. A 4776 GUARD of NULL is assumed to mean "always True". Generates code to 4777 check the definedness and validity of ADDR. 4778 4779 Generate IR to do a shadow load from ADDR and return the V bits. 4780 The loaded type is TY. The loaded data is then (shadow) widened by 4781 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD 4782 evaluates to False at run time then the returned Vbits are simply 4783 VALT instead. Note therefore that the argument type of VWIDEN must 4784 be TY and the result type of VWIDEN must equal the type of VALT. 4785 */ 4786 static 4787 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce, 4788 IREndness end, IRType ty, 4789 IRAtom* addr, UInt bias, 4790 IRAtom* guard, 4791 IROp vwiden, IRAtom* valt ) 4792 { 4793 /* Sanity check the conversion operation, and also set TYWIDE. */ 4794 IRType tyWide = Ity_INVALID; 4795 switch (vwiden) { 4796 case Iop_INVALID: 4797 tyWide = ty; 4798 break; 4799 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32: 4800 tyWide = Ity_I32; 4801 break; 4802 default: 4803 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General"); 4804 } 4805 4806 /* If the guard evaluates to True, this will hold the loaded V bits 4807 at TY. If the guard evaluates to False, this will be all 4808 ones, meaning "all undefined", in which case we will have to 4809 replace it using an ITE below. */ 4810 IRAtom* iftrue1 4811 = assignNew('V', mce, ty, 4812 expr2vbits_Load(mce, end, ty, addr, bias, guard)); 4813 /* Now (shadow-) widen the loaded V bits to the desired width. In 4814 the guard-is-False case, the allowable widening operators will 4815 in the worst case (unsigned widening) at least leave the 4816 pre-widened part as being marked all-undefined, and in the best 4817 case (signed widening) mark the whole widened result as 4818 undefined. Anyway, it doesn't matter really, since in this case 4819 we will replace said value with the default value |valt| using an 4820 ITE. */ 4821 IRAtom* iftrue2 4822 = vwiden == Iop_INVALID 4823 ? iftrue1 4824 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1)); 4825 /* These are the V bits we will return if the load doesn't take 4826 place. */ 4827 IRAtom* iffalse 4828 = valt; 4829 /* Prepare the cond for the ITE. Convert a NULL cond into 4830 something that iropt knows how to fold out later. */ 4831 IRAtom* cond 4832 = guard == NULL ? mkU1(1) : guard; 4833 /* And assemble the final result. */ 4834 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse)); 4835 } 4836 4837 4838 /* A simpler handler for guarded loads, in which there is no 4839 conversion operation, and the default V bit return (when the guard 4840 evaluates to False at runtime) is "all defined". If there is no 4841 guard expression or the guard is always TRUE this function behaves 4842 like expr2vbits_Load. It is assumed that definedness of GUARD has 4843 already been checked at the call site. */ 4844 static 4845 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce, 4846 IREndness end, IRType ty, 4847 IRAtom* addr, UInt bias, 4848 IRAtom *guard ) 4849 { 4850 return expr2vbits_Load_guarded_General( 4851 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty) 4852 ); 4853 } 4854 4855 4856 static 4857 IRAtom* expr2vbits_ITE ( MCEnv* mce, 4858 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse ) 4859 { 4860 IRAtom *vbitsC, *vbits0, *vbits1; 4861 IRType ty; 4862 /* Given ITE(cond, iftrue, iffalse), generate 4863 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#) 4864 That is, steer the V bits like the originals, but trash the 4865 result if the steering value is undefined. This gives 4866 lazy propagation. */ 4867 tl_assert(isOriginalAtom(mce, cond)); 4868 tl_assert(isOriginalAtom(mce, iftrue)); 4869 tl_assert(isOriginalAtom(mce, iffalse)); 4870 4871 vbitsC = expr2vbits(mce, cond); 4872 vbits1 = expr2vbits(mce, iftrue); 4873 vbits0 = expr2vbits(mce, iffalse); 4874 ty = typeOfIRExpr(mce->sb->tyenv, vbits0); 4875 4876 return 4877 mkUifU(mce, ty, assignNew('V', mce, ty, 4878 IRExpr_ITE(cond, vbits1, vbits0)), 4879 mkPCastTo(mce, ty, vbitsC) ); 4880 } 4881 4882 /* --------- This is the main expression-handling function. --------- */ 4883 4884 static 4885 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 4886 { 4887 switch (e->tag) { 4888 4889 case Iex_Get: 4890 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 4891 4892 case Iex_GetI: 4893 return shadow_GETI( mce, e->Iex.GetI.descr, 4894 e->Iex.GetI.ix, e->Iex.GetI.bias ); 4895 4896 case Iex_RdTmp: 4897 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); 4898 4899 case Iex_Const: 4900 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); 4901 4902 case Iex_Qop: 4903 return expr2vbits_Qop( 4904 mce, 4905 e->Iex.Qop.details->op, 4906 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2, 4907 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4 4908 ); 4909 4910 case Iex_Triop: 4911 return expr2vbits_Triop( 4912 mce, 4913 e->Iex.Triop.details->op, 4914 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2, 4915 e->Iex.Triop.details->arg3 4916 ); 4917 4918 case Iex_Binop: 4919 return expr2vbits_Binop( 4920 mce, 4921 e->Iex.Binop.op, 4922 e->Iex.Binop.arg1, e->Iex.Binop.arg2 4923 ); 4924 4925 case Iex_Unop: 4926 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 4927 4928 case Iex_Load: 4929 return expr2vbits_Load( mce, e->Iex.Load.end, 4930 e->Iex.Load.ty, 4931 e->Iex.Load.addr, 0/*addr bias*/, 4932 NULL/* guard == "always True"*/ ); 4933 4934 case Iex_CCall: 4935 return mkLazyN( mce, e->Iex.CCall.args, 4936 e->Iex.CCall.retty, 4937 e->Iex.CCall.cee ); 4938 4939 case Iex_ITE: 4940 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue, 4941 e->Iex.ITE.iffalse); 4942 4943 default: 4944 VG_(printf)("\n"); 4945 ppIRExpr(e); 4946 VG_(printf)("\n"); 4947 VG_(tool_panic)("memcheck: expr2vbits"); 4948 } 4949 } 4950 4951 /*------------------------------------------------------------*/ 4952 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 4953 /*------------------------------------------------------------*/ 4954 4955 /* Widen a value to the host word size. */ 4956 4957 static 4958 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 4959 { 4960 IRType ty, tyH; 4961 4962 /* vatom is vbits-value and as such can only have a shadow type. */ 4963 tl_assert(isShadowAtom(mce,vatom)); 4964 4965 ty = typeOfIRExpr(mce->sb->tyenv, vatom); 4966 tyH = mce->hWordTy; 4967 4968 if (tyH == Ity_I32) { 4969 switch (ty) { 4970 case Ity_I32: 4971 return vatom; 4972 case Ity_I16: 4973 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom)); 4974 case Ity_I8: 4975 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom)); 4976 default: 4977 goto unhandled; 4978 } 4979 } else 4980 if (tyH == Ity_I64) { 4981 switch (ty) { 4982 case Ity_I32: 4983 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom)); 4984 case Ity_I16: 4985 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4986 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 4987 case Ity_I8: 4988 return assignNew('V', mce, tyH, unop(Iop_32Uto64, 4989 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 4990 default: 4991 goto unhandled; 4992 } 4993 } else { 4994 goto unhandled; 4995 } 4996 unhandled: 4997 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 4998 VG_(tool_panic)("zwidenToHostWord"); 4999 } 5000 5001 5002 /* Generate a shadow store. |addr| is always the original address 5003 atom. You can pass in either originals or V-bits for the data 5004 atom, but obviously not both. This function generates a check for 5005 the definedness and (indirectly) the validity of |addr|, but only 5006 when |guard| evaluates to True at run time (or is NULL). 5007 5008 |guard| :: Ity_I1 controls whether the store really happens; NULL 5009 means it unconditionally does. Note that |guard| itself is not 5010 checked for definedness; the caller of this function must do that 5011 if necessary. 5012 */ 5013 static 5014 void do_shadow_Store ( MCEnv* mce, 5015 IREndness end, 5016 IRAtom* addr, UInt bias, 5017 IRAtom* data, IRAtom* vdata, 5018 IRAtom* guard ) 5019 { 5020 IROp mkAdd; 5021 IRType ty, tyAddr; 5022 void* helper = NULL; 5023 const HChar* hname = NULL; 5024 IRConst* c; 5025 5026 tyAddr = mce->hWordTy; 5027 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 5028 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 5029 tl_assert( end == Iend_LE || end == Iend_BE ); 5030 5031 if (data) { 5032 tl_assert(!vdata); 5033 tl_assert(isOriginalAtom(mce, data)); 5034 tl_assert(bias == 0); 5035 vdata = expr2vbits( mce, data ); 5036 } else { 5037 tl_assert(vdata); 5038 } 5039 5040 tl_assert(isOriginalAtom(mce,addr)); 5041 tl_assert(isShadowAtom(mce,vdata)); 5042 5043 if (guard) { 5044 tl_assert(isOriginalAtom(mce, guard)); 5045 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 5046 } 5047 5048 ty = typeOfIRExpr(mce->sb->tyenv, vdata); 5049 5050 // If we're not doing undefined value checking, pretend that this value 5051 // is "all valid". That lets Vex's optimiser remove some of the V bit 5052 // shadow computation ops that precede it. 5053 if (MC_(clo_mc_level) == 1) { 5054 switch (ty) { 5055 case Ity_V256: // V256 weirdness -- used four times 5056 c = IRConst_V256(V_BITS32_DEFINED); break; 5057 case Ity_V128: // V128 weirdness -- used twice 5058 c = IRConst_V128(V_BITS16_DEFINED); break; 5059 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; 5060 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; 5061 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; 5062 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break; 5063 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 5064 } 5065 vdata = IRExpr_Const( c ); 5066 } 5067 5068 /* First, emit a definedness test for the address. This also sets 5069 the address (shadow) to 'defined' following the test. Both of 5070 those actions are gated on |guard|. */ 5071 complainIfUndefined( mce, addr, guard ); 5072 5073 /* Now decide which helper function to call to write the data V 5074 bits into shadow memory. */ 5075 if (end == Iend_LE) { 5076 switch (ty) { 5077 case Ity_V256: /* we'll use the helper four times */ 5078 case Ity_V128: /* we'll use the helper twice */ 5079 case Ity_I64: helper = &MC_(helperc_STOREV64le); 5080 hname = "MC_(helperc_STOREV64le)"; 5081 break; 5082 case Ity_I32: helper = &MC_(helperc_STOREV32le); 5083 hname = "MC_(helperc_STOREV32le)"; 5084 break; 5085 case Ity_I16: helper = &MC_(helperc_STOREV16le); 5086 hname = "MC_(helperc_STOREV16le)"; 5087 break; 5088 case Ity_I8: helper = &MC_(helperc_STOREV8); 5089 hname = "MC_(helperc_STOREV8)"; 5090 break; 5091 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)"); 5092 } 5093 } else { 5094 switch (ty) { 5095 case Ity_V128: /* we'll use the helper twice */ 5096 case Ity_I64: helper = &MC_(helperc_STOREV64be); 5097 hname = "MC_(helperc_STOREV64be)"; 5098 break; 5099 case Ity_I32: helper = &MC_(helperc_STOREV32be); 5100 hname = "MC_(helperc_STOREV32be)"; 5101 break; 5102 case Ity_I16: helper = &MC_(helperc_STOREV16be); 5103 hname = "MC_(helperc_STOREV16be)"; 5104 break; 5105 case Ity_I8: helper = &MC_(helperc_STOREV8); 5106 hname = "MC_(helperc_STOREV8)"; 5107 break; 5108 /* Note, no V256 case here, because no big-endian target that 5109 we support, has 256 vectors. */ 5110 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)"); 5111 } 5112 } 5113 5114 if (UNLIKELY(ty == Ity_V256)) { 5115 5116 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with 5117 Q3 being the most significant lane. */ 5118 /* These are the offsets of the Qs in memory. */ 5119 Int offQ0, offQ1, offQ2, offQ3; 5120 5121 /* Various bits for constructing the 4 lane helper calls */ 5122 IRDirty *diQ0, *diQ1, *diQ2, *diQ3; 5123 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3; 5124 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3; 5125 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3; 5126 5127 if (end == Iend_LE) { 5128 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24; 5129 } else { 5130 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24; 5131 } 5132 5133 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0); 5134 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) ); 5135 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata)); 5136 diQ0 = unsafeIRDirty_0_N( 5137 1/*regparms*/, 5138 hname, VG_(fnptr_to_fnentry)( helper ), 5139 mkIRExprVec_2( addrQ0, vdataQ0 ) 5140 ); 5141 5142 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1); 5143 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) ); 5144 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata)); 5145 diQ1 = unsafeIRDirty_0_N( 5146 1/*regparms*/, 5147 hname, VG_(fnptr_to_fnentry)( helper ), 5148 mkIRExprVec_2( addrQ1, vdataQ1 ) 5149 ); 5150 5151 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2); 5152 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) ); 5153 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata)); 5154 diQ2 = unsafeIRDirty_0_N( 5155 1/*regparms*/, 5156 hname, VG_(fnptr_to_fnentry)( helper ), 5157 mkIRExprVec_2( addrQ2, vdataQ2 ) 5158 ); 5159 5160 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3); 5161 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) ); 5162 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata)); 5163 diQ3 = unsafeIRDirty_0_N( 5164 1/*regparms*/, 5165 hname, VG_(fnptr_to_fnentry)( helper ), 5166 mkIRExprVec_2( addrQ3, vdataQ3 ) 5167 ); 5168 5169 if (guard) 5170 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard; 5171 5172 setHelperAnns( mce, diQ0 ); 5173 setHelperAnns( mce, diQ1 ); 5174 setHelperAnns( mce, diQ2 ); 5175 setHelperAnns( mce, diQ3 ); 5176 stmt( 'V', mce, IRStmt_Dirty(diQ0) ); 5177 stmt( 'V', mce, IRStmt_Dirty(diQ1) ); 5178 stmt( 'V', mce, IRStmt_Dirty(diQ2) ); 5179 stmt( 'V', mce, IRStmt_Dirty(diQ3) ); 5180 5181 } 5182 else if (UNLIKELY(ty == Ity_V128)) { 5183 5184 /* V128-bit case */ 5185 /* See comment in next clause re 64-bit regparms */ 5186 /* also, need to be careful about endianness */ 5187 5188 Int offLo64, offHi64; 5189 IRDirty *diLo64, *diHi64; 5190 IRAtom *addrLo64, *addrHi64; 5191 IRAtom *vdataLo64, *vdataHi64; 5192 IRAtom *eBiasLo64, *eBiasHi64; 5193 5194 if (end == Iend_LE) { 5195 offLo64 = 0; 5196 offHi64 = 8; 5197 } else { 5198 offLo64 = 8; 5199 offHi64 = 0; 5200 } 5201 5202 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); 5203 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); 5204 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); 5205 diLo64 = unsafeIRDirty_0_N( 5206 1/*regparms*/, 5207 hname, VG_(fnptr_to_fnentry)( helper ), 5208 mkIRExprVec_2( addrLo64, vdataLo64 ) 5209 ); 5210 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); 5211 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); 5212 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 5213 diHi64 = unsafeIRDirty_0_N( 5214 1/*regparms*/, 5215 hname, VG_(fnptr_to_fnentry)( helper ), 5216 mkIRExprVec_2( addrHi64, vdataHi64 ) 5217 ); 5218 if (guard) diLo64->guard = guard; 5219 if (guard) diHi64->guard = guard; 5220 setHelperAnns( mce, diLo64 ); 5221 setHelperAnns( mce, diHi64 ); 5222 stmt( 'V', mce, IRStmt_Dirty(diLo64) ); 5223 stmt( 'V', mce, IRStmt_Dirty(diHi64) ); 5224 5225 } else { 5226 5227 IRDirty *di; 5228 IRAtom *addrAct; 5229 5230 /* 8/16/32/64-bit cases */ 5231 /* Generate the actual address into addrAct. */ 5232 if (bias == 0) { 5233 addrAct = addr; 5234 } else { 5235 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 5236 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias)); 5237 } 5238 5239 if (ty == Ity_I64) { 5240 /* We can't do this with regparm 2 on 32-bit platforms, since 5241 the back ends aren't clever enough to handle 64-bit 5242 regparm args. Therefore be different. */ 5243 di = unsafeIRDirty_0_N( 5244 1/*regparms*/, 5245 hname, VG_(fnptr_to_fnentry)( helper ), 5246 mkIRExprVec_2( addrAct, vdata ) 5247 ); 5248 } else { 5249 di = unsafeIRDirty_0_N( 5250 2/*regparms*/, 5251 hname, VG_(fnptr_to_fnentry)( helper ), 5252 mkIRExprVec_2( addrAct, 5253 zwidenToHostWord( mce, vdata )) 5254 ); 5255 } 5256 if (guard) di->guard = guard; 5257 setHelperAnns( mce, di ); 5258 stmt( 'V', mce, IRStmt_Dirty(di) ); 5259 } 5260 5261 } 5262 5263 5264 /* Do lazy pessimistic propagation through a dirty helper call, by 5265 looking at the annotations on it. This is the most complex part of 5266 Memcheck. */ 5267 5268 static IRType szToITy ( Int n ) 5269 { 5270 switch (n) { 5271 case 1: return Ity_I8; 5272 case 2: return Ity_I16; 5273 case 4: return Ity_I32; 5274 case 8: return Ity_I64; 5275 default: VG_(tool_panic)("szToITy(memcheck)"); 5276 } 5277 } 5278 5279 static 5280 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 5281 { 5282 Int i, k, n, toDo, gSz, gOff; 5283 IRAtom *src, *here, *curr; 5284 IRType tySrc, tyDst; 5285 IRTemp dst; 5286 IREndness end; 5287 5288 /* What's the native endianness? We need to know this. */ 5289 # if defined(VG_BIGENDIAN) 5290 end = Iend_BE; 5291 # elif defined(VG_LITTLEENDIAN) 5292 end = Iend_LE; 5293 # else 5294 # error "Unknown endianness" 5295 # endif 5296 5297 /* First check the guard. */ 5298 complainIfUndefined(mce, d->guard, NULL); 5299 5300 /* Now round up all inputs and PCast over them. */ 5301 curr = definedOfType(Ity_I32); 5302 5303 /* Inputs: unmasked args 5304 Note: arguments are evaluated REGARDLESS of the guard expression */ 5305 for (i = 0; d->args[i]; i++) { 5306 IRAtom* arg = d->args[i]; 5307 if ( (d->cee->mcx_mask & (1<<i)) 5308 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) { 5309 /* ignore this arg */ 5310 } else { 5311 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) ); 5312 curr = mkUifU32(mce, here, curr); 5313 } 5314 } 5315 5316 /* Inputs: guest state that we read. */ 5317 for (i = 0; i < d->nFxState; i++) { 5318 tl_assert(d->fxState[i].fx != Ifx_None); 5319 if (d->fxState[i].fx == Ifx_Write) 5320 continue; 5321 5322 /* Enumerate the described state segments */ 5323 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 5324 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 5325 gSz = d->fxState[i].size; 5326 5327 /* Ignore any sections marked as 'always defined'. */ 5328 if (isAlwaysDefd(mce, gOff, gSz)) { 5329 if (0) 5330 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 5331 gOff, gSz); 5332 continue; 5333 } 5334 5335 /* This state element is read or modified. So we need to 5336 consider it. If larger than 8 bytes, deal with it in 5337 8-byte chunks. */ 5338 while (True) { 5339 tl_assert(gSz >= 0); 5340 if (gSz == 0) break; 5341 n = gSz <= 8 ? gSz : 8; 5342 /* update 'curr' with UifU of the state slice 5343 gOff .. gOff+n-1 */ 5344 tySrc = szToITy( n ); 5345 5346 /* Observe the guard expression. If it is false use an 5347 all-bits-defined bit pattern */ 5348 IRAtom *cond, *iffalse, *iftrue; 5349 5350 cond = assignNew('V', mce, Ity_I1, d->guard); 5351 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc)); 5352 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc)); 5353 src = assignNew('V', mce, tySrc, 5354 IRExpr_ITE(cond, iftrue, iffalse)); 5355 5356 here = mkPCastTo( mce, Ity_I32, src ); 5357 curr = mkUifU32(mce, here, curr); 5358 gSz -= n; 5359 gOff += n; 5360 } 5361 } 5362 } 5363 5364 /* Inputs: memory. First set up some info needed regardless of 5365 whether we're doing reads or writes. */ 5366 5367 if (d->mFx != Ifx_None) { 5368 /* Because we may do multiple shadow loads/stores from the same 5369 base address, it's best to do a single test of its 5370 definedness right now. Post-instrumentation optimisation 5371 should remove all but this test. */ 5372 IRType tyAddr; 5373 tl_assert(d->mAddr); 5374 complainIfUndefined(mce, d->mAddr, d->guard); 5375 5376 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); 5377 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 5378 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 5379 } 5380 5381 /* Deal with memory inputs (reads or modifies) */ 5382 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 5383 toDo = d->mSize; 5384 /* chew off 32-bit chunks. We don't care about the endianness 5385 since it's all going to be condensed down to a single bit, 5386 but nevertheless choose an endianness which is hopefully 5387 native to the platform. */ 5388 while (toDo >= 4) { 5389 here = mkPCastTo( 5390 mce, Ity_I32, 5391 expr2vbits_Load_guarded_Simple( 5392 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard ) 5393 ); 5394 curr = mkUifU32(mce, here, curr); 5395 toDo -= 4; 5396 } 5397 /* chew off 16-bit chunks */ 5398 while (toDo >= 2) { 5399 here = mkPCastTo( 5400 mce, Ity_I32, 5401 expr2vbits_Load_guarded_Simple( 5402 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard ) 5403 ); 5404 curr = mkUifU32(mce, here, curr); 5405 toDo -= 2; 5406 } 5407 /* chew off the remaining 8-bit chunk, if any */ 5408 if (toDo == 1) { 5409 here = mkPCastTo( 5410 mce, Ity_I32, 5411 expr2vbits_Load_guarded_Simple( 5412 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard ) 5413 ); 5414 curr = mkUifU32(mce, here, curr); 5415 toDo -= 1; 5416 } 5417 tl_assert(toDo == 0); 5418 } 5419 5420 /* Whew! So curr is a 32-bit V-value summarising pessimistically 5421 all the inputs to the helper. Now we need to re-distribute the 5422 results to all destinations. */ 5423 5424 /* Outputs: the destination temporary, if there is one. */ 5425 if (d->tmp != IRTemp_INVALID) { 5426 dst = findShadowTmpV(mce, d->tmp); 5427 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); 5428 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); 5429 } 5430 5431 /* Outputs: guest state that we write or modify. */ 5432 for (i = 0; i < d->nFxState; i++) { 5433 tl_assert(d->fxState[i].fx != Ifx_None); 5434 if (d->fxState[i].fx == Ifx_Read) 5435 continue; 5436 5437 /* Enumerate the described state segments */ 5438 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 5439 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 5440 gSz = d->fxState[i].size; 5441 5442 /* Ignore any sections marked as 'always defined'. */ 5443 if (isAlwaysDefd(mce, gOff, gSz)) 5444 continue; 5445 5446 /* This state element is written or modified. So we need to 5447 consider it. If larger than 8 bytes, deal with it in 5448 8-byte chunks. */ 5449 while (True) { 5450 tl_assert(gSz >= 0); 5451 if (gSz == 0) break; 5452 n = gSz <= 8 ? gSz : 8; 5453 /* Write suitably-casted 'curr' to the state slice 5454 gOff .. gOff+n-1 */ 5455 tyDst = szToITy( n ); 5456 do_shadow_PUT( mce, gOff, 5457 NULL, /* original atom */ 5458 mkPCastTo( mce, tyDst, curr ), d->guard ); 5459 gSz -= n; 5460 gOff += n; 5461 } 5462 } 5463 } 5464 5465 /* Outputs: memory that we write or modify. Same comments about 5466 endianness as above apply. */ 5467 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 5468 toDo = d->mSize; 5469 /* chew off 32-bit chunks */ 5470 while (toDo >= 4) { 5471 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5472 NULL, /* original data */ 5473 mkPCastTo( mce, Ity_I32, curr ), 5474 d->guard ); 5475 toDo -= 4; 5476 } 5477 /* chew off 16-bit chunks */ 5478 while (toDo >= 2) { 5479 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5480 NULL, /* original data */ 5481 mkPCastTo( mce, Ity_I16, curr ), 5482 d->guard ); 5483 toDo -= 2; 5484 } 5485 /* chew off the remaining 8-bit chunk, if any */ 5486 if (toDo == 1) { 5487 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo, 5488 NULL, /* original data */ 5489 mkPCastTo( mce, Ity_I8, curr ), 5490 d->guard ); 5491 toDo -= 1; 5492 } 5493 tl_assert(toDo == 0); 5494 } 5495 5496 } 5497 5498 5499 /* We have an ABI hint telling us that [base .. base+len-1] is to 5500 become undefined ("writable"). Generate code to call a helper to 5501 notify the A/V bit machinery of this fact. 5502 5503 We call 5504 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, 5505 Addr nia ); 5506 */ 5507 static 5508 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia ) 5509 { 5510 IRDirty* di; 5511 /* Minor optimisation: if not doing origin tracking, ignore the 5512 supplied nia and pass zero instead. This is on the basis that 5513 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can 5514 almost always generate a shorter instruction to put zero into a 5515 register than any other value. */ 5516 if (MC_(clo_mc_level) < 3) 5517 nia = mkIRExpr_HWord(0); 5518 5519 di = unsafeIRDirty_0_N( 5520 0/*regparms*/, 5521 "MC_(helperc_MAKE_STACK_UNINIT)", 5522 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ), 5523 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia ) 5524 ); 5525 stmt( 'V', mce, IRStmt_Dirty(di) ); 5526 } 5527 5528 5529 /* ------ Dealing with IRCAS (big and complex) ------ */ 5530 5531 /* FWDS */ 5532 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, 5533 IRAtom* baseaddr, Int offset ); 5534 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ); 5535 static void gen_store_b ( MCEnv* mce, Int szB, 5536 IRAtom* baseaddr, Int offset, IRAtom* dataB, 5537 IRAtom* guard ); 5538 5539 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ); 5540 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ); 5541 5542 5543 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both 5544 IRExpr.Consts, else this asserts. If they are both Consts, it 5545 doesn't do anything. So that just leaves the RdTmp case. 5546 5547 In which case: this assigns the shadow value SHADOW to the IR 5548 shadow temporary associated with ORIG. That is, ORIG, being an 5549 original temporary, will have a shadow temporary associated with 5550 it. However, in the case envisaged here, there will so far have 5551 been no IR emitted to actually write a shadow value into that 5552 temporary. What this routine does is to (emit IR to) copy the 5553 value in SHADOW into said temporary, so that after this call, 5554 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the 5555 value in SHADOW. 5556 5557 Point is to allow callers to compute "by hand" a shadow value for 5558 ORIG, and force it to be associated with ORIG. 5559 5560 How do we know that that shadow associated with ORIG has not so far 5561 been assigned to? Well, we don't per se know that, but supposing 5562 it had. Then this routine would create a second assignment to it, 5563 and later the IR sanity checker would barf. But that never 5564 happens. QED. 5565 */ 5566 static void bind_shadow_tmp_to_orig ( UChar how, 5567 MCEnv* mce, 5568 IRAtom* orig, IRAtom* shadow ) 5569 { 5570 tl_assert(isOriginalAtom(mce, orig)); 5571 tl_assert(isShadowAtom(mce, shadow)); 5572 switch (orig->tag) { 5573 case Iex_Const: 5574 tl_assert(shadow->tag == Iex_Const); 5575 break; 5576 case Iex_RdTmp: 5577 tl_assert(shadow->tag == Iex_RdTmp); 5578 if (how == 'V') { 5579 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp), 5580 shadow); 5581 } else { 5582 tl_assert(how == 'B'); 5583 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp), 5584 shadow); 5585 } 5586 break; 5587 default: 5588 tl_assert(0); 5589 } 5590 } 5591 5592 5593 static 5594 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas ) 5595 { 5596 /* Scheme is (both single- and double- cases): 5597 5598 1. fetch data#,dataB (the proposed new value) 5599 5600 2. fetch expd#,expdB (what we expect to see at the address) 5601 5602 3. check definedness of address 5603 5604 4. load old#,oldB from shadow memory; this also checks 5605 addressibility of the address 5606 5607 5. the CAS itself 5608 5609 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below. 5610 5611 7. if "expected == old" (as computed by (6)) 5612 store data#,dataB to shadow memory 5613 5614 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores 5615 'data' but 7 stores 'data#'. Hence it is possible for the 5616 shadow data to be incorrectly checked and/or updated: 5617 5618 * 7 is at least gated correctly, since the 'expected == old' 5619 condition is derived from outputs of 5. However, the shadow 5620 write could happen too late: imagine after 5 we are 5621 descheduled, a different thread runs, writes a different 5622 (shadow) value at the address, and then we resume, hence 5623 overwriting the shadow value written by the other thread. 5624 5625 Because the original memory access is atomic, there's no way to 5626 make both the original and shadow accesses into a single atomic 5627 thing, hence this is unavoidable. 5628 5629 At least as Valgrind stands, I don't think it's a problem, since 5630 we're single threaded *and* we guarantee that there are no 5631 context switches during the execution of any specific superblock 5632 -- context switches can only happen at superblock boundaries. 5633 5634 If Valgrind ever becomes MT in the future, then it might be more 5635 of a problem. A possible kludge would be to artificially 5636 associate with the location, a lock, which we must acquire and 5637 release around the transaction as a whole. Hmm, that probably 5638 would't work properly since it only guards us against other 5639 threads doing CASs on the same location, not against other 5640 threads doing normal reads and writes. 5641 5642 ------------------------------------------------------------ 5643 5644 COMMENT_ON_CasCmpEQ: 5645 5646 Note two things. Firstly, in the sequence above, we compute 5647 "expected == old", but we don't check definedness of it. Why 5648 not? Also, the x86 and amd64 front ends use 5649 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent 5650 determination (expected == old ?) for themselves, and we also 5651 don't check definedness for those primops; we just say that the 5652 result is defined. Why? Details follow. 5653 5654 x86/amd64 contains various forms of locked insns: 5655 * lock prefix before all basic arithmetic insn; 5656 eg lock xorl %reg1,(%reg2) 5657 * atomic exchange reg-mem 5658 * compare-and-swaps 5659 5660 Rather than attempt to represent them all, which would be a 5661 royal PITA, I used a result from Maurice Herlihy 5662 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he 5663 demonstrates that compare-and-swap is a primitive more general 5664 than the other two, and so can be used to represent all of them. 5665 So the translation scheme for (eg) lock incl (%reg) is as 5666 follows: 5667 5668 again: 5669 old = * %reg 5670 new = old + 1 5671 atomically { if (* %reg == old) { * %reg = new } else { goto again } } 5672 5673 The "atomically" is the CAS bit. The scheme is always the same: 5674 get old value from memory, compute new value, atomically stuff 5675 new value back in memory iff the old value has not changed (iow, 5676 no other thread modified it in the meantime). If it has changed 5677 then we've been out-raced and we have to start over. 5678 5679 Now that's all very neat, but it has the bad side effect of 5680 introducing an explicit equality test into the translation. 5681 Consider the behaviour of said code on a memory location which 5682 is uninitialised. We will wind up doing a comparison on 5683 uninitialised data, and mc duly complains. 5684 5685 What's difficult about this is, the common case is that the 5686 location is uncontended, and so we're usually comparing the same 5687 value (* %reg) with itself. So we shouldn't complain even if it 5688 is undefined. But mc doesn't know that. 5689 5690 My solution is to mark the == in the IR specially, so as to tell 5691 mc that it almost certainly compares a value with itself, and we 5692 should just regard the result as always defined. Rather than 5693 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into 5694 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else. 5695 5696 So there's always the question of, can this give a false 5697 negative? eg, imagine that initially, * %reg is defined; and we 5698 read that; but then in the gap between the read and the CAS, a 5699 different thread writes an undefined (and different) value at 5700 the location. Then the CAS in this thread will fail and we will 5701 go back to "again:", but without knowing that the trip back 5702 there was based on an undefined comparison. No matter; at least 5703 the other thread won the race and the location is correctly 5704 marked as undefined. What if it wrote an uninitialised version 5705 of the same value that was there originally, though? 5706 5707 etc etc. Seems like there's a small corner case in which we 5708 might lose the fact that something's defined -- we're out-raced 5709 in between the "old = * reg" and the "atomically {", _and_ the 5710 other thread is writing in an undefined version of what's 5711 already there. Well, that seems pretty unlikely. 5712 5713 --- 5714 5715 If we ever need to reinstate it .. code which generates a 5716 definedness test for "expected == old" was removed at r10432 of 5717 this file. 5718 */ 5719 if (cas->oldHi == IRTemp_INVALID) { 5720 do_shadow_CAS_single( mce, cas ); 5721 } else { 5722 do_shadow_CAS_double( mce, cas ); 5723 } 5724 } 5725 5726 5727 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) 5728 { 5729 IRAtom *vdataLo = NULL, *bdataLo = NULL; 5730 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 5731 IRAtom *voldLo = NULL, *boldLo = NULL; 5732 IRAtom *expd_eq_old = NULL; 5733 IROp opCasCmpEQ; 5734 Int elemSzB; 5735 IRType elemTy; 5736 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 5737 5738 /* single CAS */ 5739 tl_assert(cas->oldHi == IRTemp_INVALID); 5740 tl_assert(cas->expdHi == NULL); 5741 tl_assert(cas->dataHi == NULL); 5742 5743 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 5744 switch (elemTy) { 5745 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; 5746 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; 5747 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break; 5748 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break; 5749 default: tl_assert(0); /* IR defn disallows any other types */ 5750 } 5751 5752 /* 1. fetch data# (the proposed new value) */ 5753 tl_assert(isOriginalAtom(mce, cas->dataLo)); 5754 vdataLo 5755 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 5756 tl_assert(isShadowAtom(mce, vdataLo)); 5757 if (otrak) { 5758 bdataLo 5759 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 5760 tl_assert(isShadowAtom(mce, bdataLo)); 5761 } 5762 5763 /* 2. fetch expected# (what we expect to see at the address) */ 5764 tl_assert(isOriginalAtom(mce, cas->expdLo)); 5765 vexpdLo 5766 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5767 tl_assert(isShadowAtom(mce, vexpdLo)); 5768 if (otrak) { 5769 bexpdLo 5770 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5771 tl_assert(isShadowAtom(mce, bexpdLo)); 5772 } 5773 5774 /* 3. check definedness of address */ 5775 /* 4. fetch old# from shadow memory; this also checks 5776 addressibility of the address */ 5777 voldLo 5778 = assignNew( 5779 'V', mce, elemTy, 5780 expr2vbits_Load( 5781 mce, 5782 cas->end, elemTy, cas->addr, 0/*Addr bias*/, 5783 NULL/*always happens*/ 5784 )); 5785 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5786 if (otrak) { 5787 boldLo 5788 = assignNew('B', mce, Ity_I32, 5789 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/)); 5790 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5791 } 5792 5793 /* 5. the CAS itself */ 5794 stmt( 'C', mce, IRStmt_CAS(cas) ); 5795 5796 /* 6. compute "expected == old" */ 5797 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5798 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5799 tree, but it's not copied from the input block. */ 5800 expd_eq_old 5801 = assignNew('C', mce, Ity_I1, 5802 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo))); 5803 5804 /* 7. if "expected == old" 5805 store data# to shadow memory */ 5806 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/, 5807 NULL/*data*/, vdataLo/*vdata*/, 5808 expd_eq_old/*guard for store*/ ); 5809 if (otrak) { 5810 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/, 5811 bdataLo/*bdata*/, 5812 expd_eq_old/*guard for store*/ ); 5813 } 5814 } 5815 5816 5817 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) 5818 { 5819 IRAtom *vdataHi = NULL, *bdataHi = NULL; 5820 IRAtom *vdataLo = NULL, *bdataLo = NULL; 5821 IRAtom *vexpdHi = NULL, *bexpdHi = NULL; 5822 IRAtom *vexpdLo = NULL, *bexpdLo = NULL; 5823 IRAtom *voldHi = NULL, *boldHi = NULL; 5824 IRAtom *voldLo = NULL, *boldLo = NULL; 5825 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL; 5826 IRAtom *expd_eq_old = NULL, *zero = NULL; 5827 IROp opCasCmpEQ, opOr, opXor; 5828 Int elemSzB, memOffsLo, memOffsHi; 5829 IRType elemTy; 5830 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */ 5831 5832 /* double CAS */ 5833 tl_assert(cas->oldHi != IRTemp_INVALID); 5834 tl_assert(cas->expdHi != NULL); 5835 tl_assert(cas->dataHi != NULL); 5836 5837 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); 5838 switch (elemTy) { 5839 case Ity_I8: 5840 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; 5841 elemSzB = 1; zero = mkU8(0); 5842 break; 5843 case Ity_I16: 5844 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16; 5845 elemSzB = 2; zero = mkU16(0); 5846 break; 5847 case Ity_I32: 5848 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32; 5849 elemSzB = 4; zero = mkU32(0); 5850 break; 5851 case Ity_I64: 5852 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64; 5853 elemSzB = 8; zero = mkU64(0); 5854 break; 5855 default: 5856 tl_assert(0); /* IR defn disallows any other types */ 5857 } 5858 5859 /* 1. fetch data# (the proposed new value) */ 5860 tl_assert(isOriginalAtom(mce, cas->dataHi)); 5861 tl_assert(isOriginalAtom(mce, cas->dataLo)); 5862 vdataHi 5863 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi)); 5864 vdataLo 5865 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo)); 5866 tl_assert(isShadowAtom(mce, vdataHi)); 5867 tl_assert(isShadowAtom(mce, vdataLo)); 5868 if (otrak) { 5869 bdataHi 5870 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi)); 5871 bdataLo 5872 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo)); 5873 tl_assert(isShadowAtom(mce, bdataHi)); 5874 tl_assert(isShadowAtom(mce, bdataLo)); 5875 } 5876 5877 /* 2. fetch expected# (what we expect to see at the address) */ 5878 tl_assert(isOriginalAtom(mce, cas->expdHi)); 5879 tl_assert(isOriginalAtom(mce, cas->expdLo)); 5880 vexpdHi 5881 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi)); 5882 vexpdLo 5883 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo)); 5884 tl_assert(isShadowAtom(mce, vexpdHi)); 5885 tl_assert(isShadowAtom(mce, vexpdLo)); 5886 if (otrak) { 5887 bexpdHi 5888 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi)); 5889 bexpdLo 5890 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo)); 5891 tl_assert(isShadowAtom(mce, bexpdHi)); 5892 tl_assert(isShadowAtom(mce, bexpdLo)); 5893 } 5894 5895 /* 3. check definedness of address */ 5896 /* 4. fetch old# from shadow memory; this also checks 5897 addressibility of the address */ 5898 if (cas->end == Iend_LE) { 5899 memOffsLo = 0; 5900 memOffsHi = elemSzB; 5901 } else { 5902 tl_assert(cas->end == Iend_BE); 5903 memOffsLo = elemSzB; 5904 memOffsHi = 0; 5905 } 5906 voldHi 5907 = assignNew( 5908 'V', mce, elemTy, 5909 expr2vbits_Load( 5910 mce, 5911 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/, 5912 NULL/*always happens*/ 5913 )); 5914 voldLo 5915 = assignNew( 5916 'V', mce, elemTy, 5917 expr2vbits_Load( 5918 mce, 5919 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/, 5920 NULL/*always happens*/ 5921 )); 5922 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi); 5923 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo); 5924 if (otrak) { 5925 boldHi 5926 = assignNew('B', mce, Ity_I32, 5927 gen_load_b(mce, elemSzB, cas->addr, 5928 memOffsHi/*addr bias*/)); 5929 boldLo 5930 = assignNew('B', mce, Ity_I32, 5931 gen_load_b(mce, elemSzB, cas->addr, 5932 memOffsLo/*addr bias*/)); 5933 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi); 5934 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo); 5935 } 5936 5937 /* 5. the CAS itself */ 5938 stmt( 'C', mce, IRStmt_CAS(cas) ); 5939 5940 /* 6. compute "expected == old" */ 5941 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */ 5942 /* Note that 'C' is kinda faking it; it is indeed a non-shadow 5943 tree, but it's not copied from the input block. */ 5944 /* 5945 xHi = oldHi ^ expdHi; 5946 xLo = oldLo ^ expdLo; 5947 xHL = xHi | xLo; 5948 expd_eq_old = xHL == 0; 5949 */ 5950 xHi = assignNew('C', mce, elemTy, 5951 binop(opXor, cas->expdHi, mkexpr(cas->oldHi))); 5952 xLo = assignNew('C', mce, elemTy, 5953 binop(opXor, cas->expdLo, mkexpr(cas->oldLo))); 5954 xHL = assignNew('C', mce, elemTy, 5955 binop(opOr, xHi, xLo)); 5956 expd_eq_old 5957 = assignNew('C', mce, Ity_I1, 5958 binop(opCasCmpEQ, xHL, zero)); 5959 5960 /* 7. if "expected == old" 5961 store data# to shadow memory */ 5962 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/, 5963 NULL/*data*/, vdataHi/*vdata*/, 5964 expd_eq_old/*guard for store*/ ); 5965 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/, 5966 NULL/*data*/, vdataLo/*vdata*/, 5967 expd_eq_old/*guard for store*/ ); 5968 if (otrak) { 5969 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/, 5970 bdataHi/*bdata*/, 5971 expd_eq_old/*guard for store*/ ); 5972 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/, 5973 bdataLo/*bdata*/, 5974 expd_eq_old/*guard for store*/ ); 5975 } 5976 } 5977 5978 5979 /* ------ Dealing with LL/SC (not difficult) ------ */ 5980 5981 static void do_shadow_LLSC ( MCEnv* mce, 5982 IREndness stEnd, 5983 IRTemp stResult, 5984 IRExpr* stAddr, 5985 IRExpr* stStoredata ) 5986 { 5987 /* In short: treat a load-linked like a normal load followed by an 5988 assignment of the loaded (shadow) data to the result temporary. 5989 Treat a store-conditional like a normal store, and mark the 5990 result temporary as defined. */ 5991 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); 5992 IRTemp resTmp = findShadowTmpV(mce, stResult); 5993 5994 tl_assert(isIRAtom(stAddr)); 5995 if (stStoredata) 5996 tl_assert(isIRAtom(stStoredata)); 5997 5998 if (stStoredata == NULL) { 5999 /* Load Linked */ 6000 /* Just treat this as a normal load, followed by an assignment of 6001 the value to .result. */ 6002 /* Stay sane */ 6003 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 6004 || resTy == Ity_I16 || resTy == Ity_I8); 6005 assign( 'V', mce, resTmp, 6006 expr2vbits_Load( 6007 mce, stEnd, resTy, stAddr, 0/*addr bias*/, 6008 NULL/*always happens*/) ); 6009 } else { 6010 /* Store Conditional */ 6011 /* Stay sane */ 6012 IRType dataTy = typeOfIRExpr(mce->sb->tyenv, 6013 stStoredata); 6014 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 6015 || dataTy == Ity_I16 || dataTy == Ity_I8); 6016 do_shadow_Store( mce, stEnd, 6017 stAddr, 0/* addr bias */, 6018 stStoredata, 6019 NULL /* shadow data */, 6020 NULL/*guard*/ ); 6021 /* This is a store conditional, so it writes to .result a value 6022 indicating whether or not the store succeeded. Just claim 6023 this value is always defined. In the PowerPC interpretation 6024 of store-conditional, definedness of the success indication 6025 depends on whether the address of the store matches the 6026 reservation address. But we can't tell that here (and 6027 anyway, we're not being PowerPC-specific). At least we are 6028 guaranteed that the definedness of the store address, and its 6029 addressibility, will be checked as per normal. So it seems 6030 pretty safe to just say that the success indication is always 6031 defined. 6032 6033 In schemeS, for origin tracking, we must correspondingly set 6034 a no-origin value for the origin shadow of .result. 6035 */ 6036 tl_assert(resTy == Ity_I1); 6037 assign( 'V', mce, resTmp, definedOfType(resTy) ); 6038 } 6039 } 6040 6041 6042 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */ 6043 6044 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg ) 6045 { 6046 complainIfUndefined(mce, sg->guard, NULL); 6047 /* do_shadow_Store will generate code to check the definedness and 6048 validity of sg->addr, in the case where sg->guard evaluates to 6049 True at run-time. */ 6050 do_shadow_Store( mce, sg->end, 6051 sg->addr, 0/* addr bias */, 6052 sg->data, 6053 NULL /* shadow data */, 6054 sg->guard ); 6055 } 6056 6057 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg ) 6058 { 6059 complainIfUndefined(mce, lg->guard, NULL); 6060 /* expr2vbits_Load_guarded_General will generate code to check the 6061 definedness and validity of lg->addr, in the case where 6062 lg->guard evaluates to True at run-time. */ 6063 6064 /* Look at the LoadG's built-in conversion operation, to determine 6065 the source (actual loaded data) type, and the equivalent IROp. 6066 NOTE that implicitly we are taking a widening operation to be 6067 applied to original atoms and producing one that applies to V 6068 bits. Since signed and unsigned widening are self-shadowing, 6069 this is a straight copy of the op (modulo swapping from the 6070 IRLoadGOp form to the IROp form). Note also therefore that this 6071 implicitly duplicates the logic to do with said widening ops in 6072 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */ 6073 IROp vwiden = Iop_INVALID; 6074 IRType loadedTy = Ity_INVALID; 6075 switch (lg->cvt) { 6076 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break; 6077 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break; 6078 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break; 6079 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break; 6080 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break; 6081 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break; 6082 default: VG_(tool_panic)("do_shadow_LoadG"); 6083 } 6084 6085 IRAtom* vbits_alt 6086 = expr2vbits( mce, lg->alt ); 6087 IRAtom* vbits_final 6088 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy, 6089 lg->addr, 0/*addr bias*/, 6090 lg->guard, vwiden, vbits_alt ); 6091 /* And finally, bind the V bits to the destination temporary. */ 6092 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final ); 6093 } 6094 6095 6096 /*------------------------------------------------------------*/ 6097 /*--- Memcheck main ---*/ 6098 /*------------------------------------------------------------*/ 6099 6100 static void schemeS ( MCEnv* mce, IRStmt* st ); 6101 6102 static Bool isBogusAtom ( IRAtom* at ) 6103 { 6104 ULong n = 0; 6105 IRConst* con; 6106 tl_assert(isIRAtom(at)); 6107 if (at->tag == Iex_RdTmp) 6108 return False; 6109 tl_assert(at->tag == Iex_Const); 6110 con = at->Iex.Const.con; 6111 switch (con->tag) { 6112 case Ico_U1: return False; 6113 case Ico_U8: n = (ULong)con->Ico.U8; break; 6114 case Ico_U16: n = (ULong)con->Ico.U16; break; 6115 case Ico_U32: n = (ULong)con->Ico.U32; break; 6116 case Ico_U64: n = (ULong)con->Ico.U64; break; 6117 case Ico_F32: return False; 6118 case Ico_F64: return False; 6119 case Ico_F32i: return False; 6120 case Ico_F64i: return False; 6121 case Ico_V128: return False; 6122 case Ico_V256: return False; 6123 default: ppIRExpr(at); tl_assert(0); 6124 } 6125 /* VG_(printf)("%llx\n", n); */ 6126 return (/*32*/ n == 0xFEFEFEFFULL 6127 /*32*/ || n == 0x80808080ULL 6128 /*32*/ || n == 0x7F7F7F7FULL 6129 /*32*/ || n == 0x7EFEFEFFULL 6130 /*32*/ || n == 0x81010100ULL 6131 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL 6132 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 6133 /*64*/ || n == 0x0000000000008080ULL 6134 /*64*/ || n == 0x8080808080808080ULL 6135 /*64*/ || n == 0x0101010101010101ULL 6136 ); 6137 } 6138 6139 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 6140 { 6141 Int i; 6142 IRExpr* e; 6143 IRDirty* d; 6144 IRCAS* cas; 6145 switch (st->tag) { 6146 case Ist_WrTmp: 6147 e = st->Ist.WrTmp.data; 6148 switch (e->tag) { 6149 case Iex_Get: 6150 case Iex_RdTmp: 6151 return False; 6152 case Iex_Const: 6153 return isBogusAtom(e); 6154 case Iex_Unop: 6155 return isBogusAtom(e->Iex.Unop.arg) 6156 || e->Iex.Unop.op == Iop_GetMSBs8x16; 6157 case Iex_GetI: 6158 return isBogusAtom(e->Iex.GetI.ix); 6159 case Iex_Binop: 6160 return isBogusAtom(e->Iex.Binop.arg1) 6161 || isBogusAtom(e->Iex.Binop.arg2); 6162 case Iex_Triop: 6163 return isBogusAtom(e->Iex.Triop.details->arg1) 6164 || isBogusAtom(e->Iex.Triop.details->arg2) 6165 || isBogusAtom(e->Iex.Triop.details->arg3); 6166 case Iex_Qop: 6167 return isBogusAtom(e->Iex.Qop.details->arg1) 6168 || isBogusAtom(e->Iex.Qop.details->arg2) 6169 || isBogusAtom(e->Iex.Qop.details->arg3) 6170 || isBogusAtom(e->Iex.Qop.details->arg4); 6171 case Iex_ITE: 6172 return isBogusAtom(e->Iex.ITE.cond) 6173 || isBogusAtom(e->Iex.ITE.iftrue) 6174 || isBogusAtom(e->Iex.ITE.iffalse); 6175 case Iex_Load: 6176 return isBogusAtom(e->Iex.Load.addr); 6177 case Iex_CCall: 6178 for (i = 0; e->Iex.CCall.args[i]; i++) 6179 if (isBogusAtom(e->Iex.CCall.args[i])) 6180 return True; 6181 return False; 6182 default: 6183 goto unhandled; 6184 } 6185 case Ist_Dirty: 6186 d = st->Ist.Dirty.details; 6187 for (i = 0; d->args[i]; i++) { 6188 IRAtom* atom = d->args[i]; 6189 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(atom))) { 6190 if (isBogusAtom(atom)) 6191 return True; 6192 } 6193 } 6194 if (isBogusAtom(d->guard)) 6195 return True; 6196 if (d->mAddr && isBogusAtom(d->mAddr)) 6197 return True; 6198 return False; 6199 case Ist_Put: 6200 return isBogusAtom(st->Ist.Put.data); 6201 case Ist_PutI: 6202 return isBogusAtom(st->Ist.PutI.details->ix) 6203 || isBogusAtom(st->Ist.PutI.details->data); 6204 case Ist_Store: 6205 return isBogusAtom(st->Ist.Store.addr) 6206 || isBogusAtom(st->Ist.Store.data); 6207 case Ist_StoreG: { 6208 IRStoreG* sg = st->Ist.StoreG.details; 6209 return isBogusAtom(sg->addr) || isBogusAtom(sg->data) 6210 || isBogusAtom(sg->guard); 6211 } 6212 case Ist_LoadG: { 6213 IRLoadG* lg = st->Ist.LoadG.details; 6214 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt) 6215 || isBogusAtom(lg->guard); 6216 } 6217 case Ist_Exit: 6218 return isBogusAtom(st->Ist.Exit.guard); 6219 case Ist_AbiHint: 6220 return isBogusAtom(st->Ist.AbiHint.base) 6221 || isBogusAtom(st->Ist.AbiHint.nia); 6222 case Ist_NoOp: 6223 case Ist_IMark: 6224 case Ist_MBE: 6225 return False; 6226 case Ist_CAS: 6227 cas = st->Ist.CAS.details; 6228 return isBogusAtom(cas->addr) 6229 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False) 6230 || isBogusAtom(cas->expdLo) 6231 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False) 6232 || isBogusAtom(cas->dataLo); 6233 case Ist_LLSC: 6234 return isBogusAtom(st->Ist.LLSC.addr) 6235 || (st->Ist.LLSC.storedata 6236 ? isBogusAtom(st->Ist.LLSC.storedata) 6237 : False); 6238 default: 6239 unhandled: 6240 ppIRStmt(st); 6241 VG_(tool_panic)("hasBogusLiterals"); 6242 } 6243 } 6244 6245 6246 IRSB* MC_(instrument) ( VgCallbackClosure* closure, 6247 IRSB* sb_in, 6248 const VexGuestLayout* layout, 6249 const VexGuestExtents* vge, 6250 const VexArchInfo* archinfo_host, 6251 IRType gWordTy, IRType hWordTy ) 6252 { 6253 Bool verboze = 0||False; 6254 Bool bogus; 6255 Int i, j, first_stmt; 6256 IRStmt* st; 6257 MCEnv mce; 6258 IRSB* sb_out; 6259 6260 if (gWordTy != hWordTy) { 6261 /* We don't currently support this case. */ 6262 VG_(tool_panic)("host/guest word size mismatch"); 6263 } 6264 6265 /* Check we're not completely nuts */ 6266 tl_assert(sizeof(UWord) == sizeof(void*)); 6267 tl_assert(sizeof(Word) == sizeof(void*)); 6268 tl_assert(sizeof(Addr) == sizeof(void*)); 6269 tl_assert(sizeof(ULong) == 8); 6270 tl_assert(sizeof(Long) == 8); 6271 tl_assert(sizeof(UInt) == 4); 6272 tl_assert(sizeof(Int) == 4); 6273 6274 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); 6275 6276 /* Set up SB */ 6277 sb_out = deepCopyIRSBExceptStmts(sb_in); 6278 6279 /* Set up the running environment. Both .sb and .tmpMap are 6280 modified as we go along. Note that tmps are added to both 6281 .sb->tyenv and .tmpMap together, so the valid index-set for 6282 those two arrays should always be identical. */ 6283 VG_(memset)(&mce, 0, sizeof(mce)); 6284 mce.sb = sb_out; 6285 mce.trace = verboze; 6286 mce.layout = layout; 6287 mce.hWordTy = hWordTy; 6288 mce.bogusLiterals = False; 6289 6290 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on 6291 Darwin. 10.7 is mostly built with LLVM, which uses these for 6292 bitfield inserts, and we get a lot of false errors if the cheap 6293 interpretation is used, alas. Could solve this much better if 6294 we knew which of such adds came from x86/amd64 LEA instructions, 6295 since these are the only ones really needing the expensive 6296 interpretation, but that would require some way to tag them in 6297 the _toIR.c front ends, which is a lot of faffing around. So 6298 for now just use the slow and blunt-instrument solution. */ 6299 mce.useLLVMworkarounds = False; 6300 # if defined(VGO_darwin) 6301 mce.useLLVMworkarounds = True; 6302 # endif 6303 6304 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), 6305 sizeof(TempMapEnt)); 6306 VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used); 6307 for (i = 0; i < sb_in->tyenv->types_used; i++) { 6308 TempMapEnt ent; 6309 ent.kind = Orig; 6310 ent.shadowV = IRTemp_INVALID; 6311 ent.shadowB = IRTemp_INVALID; 6312 VG_(addToXA)( mce.tmpMap, &ent ); 6313 } 6314 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); 6315 6316 /* Make a preliminary inspection of the statements, to see if there 6317 are any dodgy-looking literals. If there are, we generate 6318 extra-detailed (hence extra-expensive) instrumentation in 6319 places. Scan the whole bb even if dodgyness is found earlier, 6320 so that the flatness assertion is applied to all stmts. */ 6321 6322 bogus = False; 6323 6324 for (i = 0; i < sb_in->stmts_used; i++) { 6325 6326 st = sb_in->stmts[i]; 6327 tl_assert(st); 6328 tl_assert(isFlatIRStmt(st)); 6329 6330 if (!bogus) { 6331 bogus = checkForBogusLiterals(st); 6332 if (0 && bogus) { 6333 VG_(printf)("bogus: "); 6334 ppIRStmt(st); 6335 VG_(printf)("\n"); 6336 } 6337 } 6338 6339 } 6340 6341 mce.bogusLiterals = bogus; 6342 6343 /* Copy verbatim any IR preamble preceding the first IMark */ 6344 6345 tl_assert(mce.sb == sb_out); 6346 tl_assert(mce.sb != sb_in); 6347 6348 i = 0; 6349 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { 6350 6351 st = sb_in->stmts[i]; 6352 tl_assert(st); 6353 tl_assert(isFlatIRStmt(st)); 6354 6355 stmt( 'C', &mce, sb_in->stmts[i] ); 6356 i++; 6357 } 6358 6359 /* Nasty problem. IR optimisation of the pre-instrumented IR may 6360 cause the IR following the preamble to contain references to IR 6361 temporaries defined in the preamble. Because the preamble isn't 6362 instrumented, these temporaries don't have any shadows. 6363 Nevertheless uses of them following the preamble will cause 6364 memcheck to generate references to their shadows. End effect is 6365 to cause IR sanity check failures, due to references to 6366 non-existent shadows. This is only evident for the complex 6367 preambles used for function wrapping on TOC-afflicted platforms 6368 (ppc64-linux). 6369 6370 The following loop therefore scans the preamble looking for 6371 assignments to temporaries. For each one found it creates an 6372 assignment to the corresponding (V) shadow temp, marking it as 6373 'defined'. This is the same resulting IR as if the main 6374 instrumentation loop before had been applied to the statement 6375 'tmp = CONSTANT'. 6376 6377 Similarly, if origin tracking is enabled, we must generate an 6378 assignment for the corresponding origin (B) shadow, claiming 6379 no-origin, as appropriate for a defined value. 6380 */ 6381 for (j = 0; j < i; j++) { 6382 if (sb_in->stmts[j]->tag == Ist_WrTmp) { 6383 /* findShadowTmpV checks its arg is an original tmp; 6384 no need to assert that here. */ 6385 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; 6386 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); 6387 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); 6388 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); 6389 if (MC_(clo_mc_level) == 3) { 6390 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); 6391 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); 6392 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); 6393 } 6394 if (0) { 6395 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); 6396 ppIRType( ty_v ); 6397 VG_(printf)("\n"); 6398 } 6399 } 6400 } 6401 6402 /* Iterate over the remaining stmts to generate instrumentation. */ 6403 6404 tl_assert(sb_in->stmts_used > 0); 6405 tl_assert(i >= 0); 6406 tl_assert(i < sb_in->stmts_used); 6407 tl_assert(sb_in->stmts[i]->tag == Ist_IMark); 6408 6409 for (/* use current i*/; i < sb_in->stmts_used; i++) { 6410 6411 st = sb_in->stmts[i]; 6412 first_stmt = sb_out->stmts_used; 6413 6414 if (verboze) { 6415 VG_(printf)("\n"); 6416 ppIRStmt(st); 6417 VG_(printf)("\n"); 6418 } 6419 6420 if (MC_(clo_mc_level) == 3) { 6421 /* See comments on case Ist_CAS below. */ 6422 if (st->tag != Ist_CAS) 6423 schemeS( &mce, st ); 6424 } 6425 6426 /* Generate instrumentation code for each stmt ... */ 6427 6428 switch (st->tag) { 6429 6430 case Ist_WrTmp: 6431 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), 6432 expr2vbits( &mce, st->Ist.WrTmp.data) ); 6433 break; 6434 6435 case Ist_Put: 6436 do_shadow_PUT( &mce, 6437 st->Ist.Put.offset, 6438 st->Ist.Put.data, 6439 NULL /* shadow atom */, NULL /* guard */ ); 6440 break; 6441 6442 case Ist_PutI: 6443 do_shadow_PUTI( &mce, st->Ist.PutI.details); 6444 break; 6445 6446 case Ist_Store: 6447 do_shadow_Store( &mce, st->Ist.Store.end, 6448 st->Ist.Store.addr, 0/* addr bias */, 6449 st->Ist.Store.data, 6450 NULL /* shadow data */, 6451 NULL/*guard*/ ); 6452 break; 6453 6454 case Ist_StoreG: 6455 do_shadow_StoreG( &mce, st->Ist.StoreG.details ); 6456 break; 6457 6458 case Ist_LoadG: 6459 do_shadow_LoadG( &mce, st->Ist.LoadG.details ); 6460 break; 6461 6462 case Ist_Exit: 6463 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL ); 6464 break; 6465 6466 case Ist_IMark: 6467 break; 6468 6469 case Ist_NoOp: 6470 case Ist_MBE: 6471 break; 6472 6473 case Ist_Dirty: 6474 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 6475 break; 6476 6477 case Ist_AbiHint: 6478 do_AbiHint( &mce, st->Ist.AbiHint.base, 6479 st->Ist.AbiHint.len, 6480 st->Ist.AbiHint.nia ); 6481 break; 6482 6483 case Ist_CAS: 6484 do_shadow_CAS( &mce, st->Ist.CAS.details ); 6485 /* Note, do_shadow_CAS copies the CAS itself to the output 6486 block, because it needs to add instrumentation both 6487 before and after it. Hence skip the copy below. Also 6488 skip the origin-tracking stuff (call to schemeS) above, 6489 since that's all tangled up with it too; do_shadow_CAS 6490 does it all. */ 6491 break; 6492 6493 case Ist_LLSC: 6494 do_shadow_LLSC( &mce, 6495 st->Ist.LLSC.end, 6496 st->Ist.LLSC.result, 6497 st->Ist.LLSC.addr, 6498 st->Ist.LLSC.storedata ); 6499 break; 6500 6501 default: 6502 VG_(printf)("\n"); 6503 ppIRStmt(st); 6504 VG_(printf)("\n"); 6505 VG_(tool_panic)("memcheck: unhandled IRStmt"); 6506 6507 } /* switch (st->tag) */ 6508 6509 if (0 && verboze) { 6510 for (j = first_stmt; j < sb_out->stmts_used; j++) { 6511 VG_(printf)(" "); 6512 ppIRStmt(sb_out->stmts[j]); 6513 VG_(printf)("\n"); 6514 } 6515 VG_(printf)("\n"); 6516 } 6517 6518 /* ... and finally copy the stmt itself to the output. Except, 6519 skip the copy of IRCASs; see comments on case Ist_CAS 6520 above. */ 6521 if (st->tag != Ist_CAS) 6522 stmt('C', &mce, st); 6523 } 6524 6525 /* Now we need to complain if the jump target is undefined. */ 6526 first_stmt = sb_out->stmts_used; 6527 6528 if (verboze) { 6529 VG_(printf)("sb_in->next = "); 6530 ppIRExpr(sb_in->next); 6531 VG_(printf)("\n\n"); 6532 } 6533 6534 complainIfUndefined( &mce, sb_in->next, NULL ); 6535 6536 if (0 && verboze) { 6537 for (j = first_stmt; j < sb_out->stmts_used; j++) { 6538 VG_(printf)(" "); 6539 ppIRStmt(sb_out->stmts[j]); 6540 VG_(printf)("\n"); 6541 } 6542 VG_(printf)("\n"); 6543 } 6544 6545 /* If this fails, there's been some serious snafu with tmp management, 6546 that should be investigated. */ 6547 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); 6548 VG_(deleteXA)( mce.tmpMap ); 6549 6550 tl_assert(mce.sb == sb_out); 6551 return sb_out; 6552 } 6553 6554 /*------------------------------------------------------------*/ 6555 /*--- Post-tree-build final tidying ---*/ 6556 /*------------------------------------------------------------*/ 6557 6558 /* This exploits the observation that Memcheck often produces 6559 repeated conditional calls of the form 6560 6561 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag) 6562 6563 with the same guard expression G guarding the same helper call. 6564 The second and subsequent calls are redundant. This usually 6565 results from instrumentation of guest code containing multiple 6566 memory references at different constant offsets from the same base 6567 register. After optimisation of the instrumentation, you get a 6568 test for the definedness of the base register for each memory 6569 reference, which is kinda pointless. MC_(final_tidy) therefore 6570 looks for such repeated calls and removes all but the first. */ 6571 6572 /* A struct for recording which (helper, guard) pairs we have already 6573 seen. */ 6574 typedef 6575 struct { void* entry; IRExpr* guard; } 6576 Pair; 6577 6578 /* Return True if e1 and e2 definitely denote the same value (used to 6579 compare guards). Return False if unknown; False is the safe 6580 answer. Since guest registers and guest memory do not have the 6581 SSA property we must return False if any Gets or Loads appear in 6582 the expression. */ 6583 6584 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) 6585 { 6586 if (e1->tag != e2->tag) 6587 return False; 6588 switch (e1->tag) { 6589 case Iex_Const: 6590 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con ); 6591 case Iex_Binop: 6592 return e1->Iex.Binop.op == e2->Iex.Binop.op 6593 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1) 6594 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2); 6595 case Iex_Unop: 6596 return e1->Iex.Unop.op == e2->Iex.Unop.op 6597 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg); 6598 case Iex_RdTmp: 6599 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp; 6600 case Iex_ITE: 6601 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond ) 6602 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue ) 6603 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse ); 6604 case Iex_Qop: 6605 case Iex_Triop: 6606 case Iex_CCall: 6607 /* be lazy. Could define equality for these, but they never 6608 appear to be used. */ 6609 return False; 6610 case Iex_Get: 6611 case Iex_GetI: 6612 case Iex_Load: 6613 /* be conservative - these may not give the same value each 6614 time */ 6615 return False; 6616 case Iex_Binder: 6617 /* should never see this */ 6618 /* fallthrough */ 6619 default: 6620 VG_(printf)("mc_translate.c: sameIRValue: unhandled: "); 6621 ppIRExpr(e1); 6622 VG_(tool_panic)("memcheck:sameIRValue"); 6623 return False; 6624 } 6625 } 6626 6627 /* See if 'pairs' already has an entry for (entry, guard). Return 6628 True if so. If not, add an entry. */ 6629 6630 static 6631 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) 6632 { 6633 Pair p; 6634 Pair* pp; 6635 Int i, n = VG_(sizeXA)( pairs ); 6636 for (i = 0; i < n; i++) { 6637 pp = VG_(indexXA)( pairs, i ); 6638 if (pp->entry == entry && sameIRValue(pp->guard, guard)) 6639 return True; 6640 } 6641 p.guard = guard; 6642 p.entry = entry; 6643 VG_(addToXA)( pairs, &p ); 6644 return False; 6645 } 6646 6647 static Bool is_helperc_value_checkN_fail ( const HChar* name ) 6648 { 6649 return 6650 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") 6651 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") 6652 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") 6653 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") 6654 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") 6655 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") 6656 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") 6657 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); 6658 } 6659 6660 IRSB* MC_(final_tidy) ( IRSB* sb_in ) 6661 { 6662 Int i; 6663 IRStmt* st; 6664 IRDirty* di; 6665 IRExpr* guard; 6666 IRCallee* cee; 6667 Bool alreadyPresent; 6668 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", 6669 VG_(free), sizeof(Pair) ); 6670 /* Scan forwards through the statements. Each time a call to one 6671 of the relevant helpers is seen, check if we have made a 6672 previous call to the same helper using the same guard 6673 expression, and if so, delete the call. */ 6674 for (i = 0; i < sb_in->stmts_used; i++) { 6675 st = sb_in->stmts[i]; 6676 tl_assert(st); 6677 if (st->tag != Ist_Dirty) 6678 continue; 6679 di = st->Ist.Dirty.details; 6680 guard = di->guard; 6681 tl_assert(guard); 6682 if (0) { ppIRExpr(guard); VG_(printf)("\n"); } 6683 cee = di->cee; 6684 if (!is_helperc_value_checkN_fail( cee->name )) 6685 continue; 6686 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with 6687 guard 'guard'. Check if we have already seen a call to this 6688 function with the same guard. If so, delete it. If not, 6689 add it to the set of calls we do know about. */ 6690 alreadyPresent = check_or_add( pairs, guard, cee->addr ); 6691 if (alreadyPresent) { 6692 sb_in->stmts[i] = IRStmt_NoOp(); 6693 if (0) VG_(printf)("XX\n"); 6694 } 6695 } 6696 VG_(deleteXA)( pairs ); 6697 return sb_in; 6698 } 6699 6700 6701 /*------------------------------------------------------------*/ 6702 /*--- Origin tracking stuff ---*/ 6703 /*------------------------------------------------------------*/ 6704 6705 /* Almost identical to findShadowTmpV. */ 6706 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) 6707 { 6708 TempMapEnt* ent; 6709 /* VG_(indexXA) range-checks 'orig', hence no need to check 6710 here. */ 6711 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 6712 tl_assert(ent->kind == Orig); 6713 if (ent->shadowB == IRTemp_INVALID) { 6714 IRTemp tmpB 6715 = newTemp( mce, Ity_I32, BSh ); 6716 /* newTemp may cause mce->tmpMap to resize, hence previous results 6717 from VG_(indexXA) are invalid. */ 6718 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); 6719 tl_assert(ent->kind == Orig); 6720 tl_assert(ent->shadowB == IRTemp_INVALID); 6721 ent->shadowB = tmpB; 6722 } 6723 return ent->shadowB; 6724 } 6725 6726 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 ) 6727 { 6728 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) ); 6729 } 6730 6731 6732 /* Make a guarded origin load, with no special handling in the 6733 didn't-happen case. A GUARD of NULL is assumed to mean "always 6734 True". 6735 6736 Generate IR to do a shadow origins load from BASEADDR+OFFSET and 6737 return the otag. The loaded size is SZB. If GUARD evaluates to 6738 False at run time then the returned otag is zero. 6739 */ 6740 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, 6741 IRAtom* baseaddr, 6742 Int offset, IRExpr* guard ) 6743 { 6744 void* hFun; 6745 const HChar* hName; 6746 IRTemp bTmp; 6747 IRDirty* di; 6748 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 6749 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 6750 IRAtom* ea = baseaddr; 6751 if (offset != 0) { 6752 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 6753 : mkU64( (Long)(Int)offset ); 6754 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 6755 } 6756 bTmp = newTemp(mce, mce->hWordTy, BSh); 6757 6758 switch (szB) { 6759 case 1: hFun = (void*)&MC_(helperc_b_load1); 6760 hName = "MC_(helperc_b_load1)"; 6761 break; 6762 case 2: hFun = (void*)&MC_(helperc_b_load2); 6763 hName = "MC_(helperc_b_load2)"; 6764 break; 6765 case 4: hFun = (void*)&MC_(helperc_b_load4); 6766 hName = "MC_(helperc_b_load4)"; 6767 break; 6768 case 8: hFun = (void*)&MC_(helperc_b_load8); 6769 hName = "MC_(helperc_b_load8)"; 6770 break; 6771 case 16: hFun = (void*)&MC_(helperc_b_load16); 6772 hName = "MC_(helperc_b_load16)"; 6773 break; 6774 case 32: hFun = (void*)&MC_(helperc_b_load32); 6775 hName = "MC_(helperc_b_load32)"; 6776 break; 6777 default: 6778 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB); 6779 tl_assert(0); 6780 } 6781 di = unsafeIRDirty_1_N( 6782 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ), 6783 mkIRExprVec_1( ea ) 6784 ); 6785 if (guard) { 6786 di->guard = guard; 6787 /* Ideally the didn't-happen return value here would be 6788 all-zeroes (unknown-origin), so it'd be harmless if it got 6789 used inadvertantly. We slum it out with the IR-mandated 6790 default value (0b01 repeating, 0x55 etc) as that'll probably 6791 trump all legitimate otags via Max32, and it's pretty 6792 obviously bogus. */ 6793 } 6794 /* no need to mess with any annotations. This call accesses 6795 neither guest state nor guest memory. */ 6796 stmt( 'B', mce, IRStmt_Dirty(di) ); 6797 if (mce->hWordTy == Ity_I64) { 6798 /* 64-bit host */ 6799 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); 6800 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); 6801 return mkexpr(bTmp32); 6802 } else { 6803 /* 32-bit host */ 6804 return mkexpr(bTmp); 6805 } 6806 } 6807 6808 6809 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The 6810 loaded size is SZB. The load is regarded as unconditional (always 6811 happens). 6812 */ 6813 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr, 6814 Int offset ) 6815 { 6816 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/); 6817 } 6818 6819 6820 /* The most general handler for guarded origin loads. A GUARD of NULL 6821 is assumed to mean "always True". 6822 6823 Generate IR to do a shadow origin load from ADDR+BIAS and return 6824 the B bits. The loaded type is TY. If GUARD evaluates to False at 6825 run time then the returned B bits are simply BALT instead. 6826 */ 6827 static 6828 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce, 6829 IRType ty, 6830 IRAtom* addr, UInt bias, 6831 IRAtom* guard, IRAtom* balt ) 6832 { 6833 /* If the guard evaluates to True, this will hold the loaded 6834 origin. If the guard evaluates to False, this will be zero, 6835 meaning "unknown origin", in which case we will have to replace 6836 it using an ITE below. */ 6837 IRAtom* iftrue 6838 = assignNew('B', mce, Ity_I32, 6839 gen_guarded_load_b(mce, sizeofIRType(ty), 6840 addr, bias, guard)); 6841 /* These are the bits we will return if the load doesn't take 6842 place. */ 6843 IRAtom* iffalse 6844 = balt; 6845 /* Prepare the cond for the ITE. Convert a NULL cond into 6846 something that iropt knows how to fold out later. */ 6847 IRAtom* cond 6848 = guard == NULL ? mkU1(1) : guard; 6849 /* And assemble the final result. */ 6850 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse)); 6851 } 6852 6853 6854 /* Generate a shadow origins store. guard :: Ity_I1 controls whether 6855 the store really happens; NULL means it unconditionally does. */ 6856 static void gen_store_b ( MCEnv* mce, Int szB, 6857 IRAtom* baseaddr, Int offset, IRAtom* dataB, 6858 IRAtom* guard ) 6859 { 6860 void* hFun; 6861 const HChar* hName; 6862 IRDirty* di; 6863 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); 6864 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; 6865 IRAtom* ea = baseaddr; 6866 if (guard) { 6867 tl_assert(isOriginalAtom(mce, guard)); 6868 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); 6869 } 6870 if (offset != 0) { 6871 IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) 6872 : mkU64( (Long)(Int)offset ); 6873 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); 6874 } 6875 if (mce->hWordTy == Ity_I64) 6876 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); 6877 6878 switch (szB) { 6879 case 1: hFun = (void*)&MC_(helperc_b_store1); 6880 hName = "MC_(helperc_b_store1)"; 6881 break; 6882 case 2: hFun = (void*)&MC_(helperc_b_store2); 6883 hName = "MC_(helperc_b_store2)"; 6884 break; 6885 case 4: hFun = (void*)&MC_(helperc_b_store4); 6886 hName = "MC_(helperc_b_store4)"; 6887 break; 6888 case 8: hFun = (void*)&MC_(helperc_b_store8); 6889 hName = "MC_(helperc_b_store8)"; 6890 break; 6891 case 16: hFun = (void*)&MC_(helperc_b_store16); 6892 hName = "MC_(helperc_b_store16)"; 6893 break; 6894 case 32: hFun = (void*)&MC_(helperc_b_store32); 6895 hName = "MC_(helperc_b_store32)"; 6896 break; 6897 default: 6898 tl_assert(0); 6899 } 6900 di = unsafeIRDirty_0_N( 2/*regparms*/, 6901 hName, VG_(fnptr_to_fnentry)( hFun ), 6902 mkIRExprVec_2( ea, dataB ) 6903 ); 6904 /* no need to mess with any annotations. This call accesses 6905 neither guest state nor guest memory. */ 6906 if (guard) di->guard = guard; 6907 stmt( 'B', mce, IRStmt_Dirty(di) ); 6908 } 6909 6910 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { 6911 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 6912 if (eTy == Ity_I64) 6913 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); 6914 if (eTy == Ity_I32) 6915 return e; 6916 tl_assert(0); 6917 } 6918 6919 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { 6920 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); 6921 tl_assert(eTy == Ity_I32); 6922 if (dstTy == Ity_I64) 6923 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); 6924 tl_assert(0); 6925 } 6926 6927 6928 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) 6929 { 6930 tl_assert(MC_(clo_mc_level) == 3); 6931 6932 switch (e->tag) { 6933 6934 case Iex_GetI: { 6935 IRRegArray* descr_b; 6936 IRAtom *t1, *t2, *t3, *t4; 6937 IRRegArray* descr = e->Iex.GetI.descr; 6938 IRType equivIntTy 6939 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 6940 /* If this array is unshadowable for whatever reason, use the 6941 usual approximation. */ 6942 if (equivIntTy == Ity_INVALID) 6943 return mkU32(0); 6944 tl_assert(sizeofIRType(equivIntTy) >= 4); 6945 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 6946 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 6947 equivIntTy, descr->nElems ); 6948 /* Do a shadow indexed get of the same size, giving t1. Take 6949 the bottom 32 bits of it, giving t2. Compute into t3 the 6950 origin for the index (almost certainly zero, but there's 6951 no harm in being completely general here, since iropt will 6952 remove any useless code), and fold it in, giving a final 6953 value t4. */ 6954 t1 = assignNew( 'B', mce, equivIntTy, 6955 IRExpr_GetI( descr_b, e->Iex.GetI.ix, 6956 e->Iex.GetI.bias )); 6957 t2 = narrowTo32( mce, t1 ); 6958 t3 = schemeE( mce, e->Iex.GetI.ix ); 6959 t4 = gen_maxU32( mce, t2, t3 ); 6960 return t4; 6961 } 6962 case Iex_CCall: { 6963 Int i; 6964 IRAtom* here; 6965 IRExpr** args = e->Iex.CCall.args; 6966 IRAtom* curr = mkU32(0); 6967 for (i = 0; args[i]; i++) { 6968 tl_assert(i < 32); 6969 tl_assert(isOriginalAtom(mce, args[i])); 6970 /* Only take notice of this arg if the callee's 6971 mc-exclusion mask does not say it is to be excluded. */ 6972 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) { 6973 /* the arg is to be excluded from definedness checking. 6974 Do nothing. */ 6975 if (0) VG_(printf)("excluding %s(%d)\n", 6976 e->Iex.CCall.cee->name, i); 6977 } else { 6978 /* calculate the arg's definedness, and pessimistically 6979 merge it in. */ 6980 here = schemeE( mce, args[i] ); 6981 curr = gen_maxU32( mce, curr, here ); 6982 } 6983 } 6984 return curr; 6985 } 6986 case Iex_Load: { 6987 Int dszB; 6988 dszB = sizeofIRType(e->Iex.Load.ty); 6989 /* assert that the B value for the address is already 6990 available (somewhere) */ 6991 tl_assert(isIRAtom(e->Iex.Load.addr)); 6992 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); 6993 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); 6994 } 6995 case Iex_ITE: { 6996 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond ); 6997 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue ); 6998 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse ); 6999 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 )); 7000 } 7001 case Iex_Qop: { 7002 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 ); 7003 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 ); 7004 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 ); 7005 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 ); 7006 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ), 7007 gen_maxU32( mce, b3, b4 ) ); 7008 } 7009 case Iex_Triop: { 7010 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 ); 7011 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 ); 7012 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 ); 7013 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) ); 7014 } 7015 case Iex_Binop: { 7016 switch (e->Iex.Binop.op) { 7017 case Iop_CasCmpEQ8: case Iop_CasCmpNE8: 7018 case Iop_CasCmpEQ16: case Iop_CasCmpNE16: 7019 case Iop_CasCmpEQ32: case Iop_CasCmpNE32: 7020 case Iop_CasCmpEQ64: case Iop_CasCmpNE64: 7021 /* Just say these all produce a defined result, 7022 regardless of their arguments. See 7023 COMMENT_ON_CasCmpEQ in this file. */ 7024 return mkU32(0); 7025 default: { 7026 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 ); 7027 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 ); 7028 return gen_maxU32( mce, b1, b2 ); 7029 } 7030 } 7031 tl_assert(0); 7032 /*NOTREACHED*/ 7033 } 7034 case Iex_Unop: { 7035 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg ); 7036 return b1; 7037 } 7038 case Iex_Const: 7039 return mkU32(0); 7040 case Iex_RdTmp: 7041 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp )); 7042 case Iex_Get: { 7043 Int b_offset = MC_(get_otrack_shadow_offset)( 7044 e->Iex.Get.offset, 7045 sizeofIRType(e->Iex.Get.ty) 7046 ); 7047 tl_assert(b_offset >= -1 7048 && b_offset <= mce->layout->total_sizeB -4); 7049 if (b_offset >= 0) { 7050 /* FIXME: this isn't an atom! */ 7051 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, 7052 Ity_I32 ); 7053 } 7054 return mkU32(0); 7055 } 7056 default: 7057 VG_(printf)("mc_translate.c: schemeE: unhandled: "); 7058 ppIRExpr(e); 7059 VG_(tool_panic)("memcheck:schemeE"); 7060 } 7061 } 7062 7063 7064 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) 7065 { 7066 // This is a hacked version of do_shadow_Dirty 7067 Int i, k, n, toDo, gSz, gOff; 7068 IRAtom *here, *curr; 7069 IRTemp dst; 7070 7071 /* First check the guard. */ 7072 curr = schemeE( mce, d->guard ); 7073 7074 /* Now round up all inputs and maxU32 over them. */ 7075 7076 /* Inputs: unmasked args 7077 Note: arguments are evaluated REGARDLESS of the guard expression */ 7078 for (i = 0; d->args[i]; i++) { 7079 IRAtom* arg = d->args[i]; 7080 if ( (d->cee->mcx_mask & (1<<i)) 7081 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) { 7082 /* ignore this arg */ 7083 } else { 7084 here = schemeE( mce, arg ); 7085 curr = gen_maxU32( mce, curr, here ); 7086 } 7087 } 7088 7089 /* Inputs: guest state that we read. */ 7090 for (i = 0; i < d->nFxState; i++) { 7091 tl_assert(d->fxState[i].fx != Ifx_None); 7092 if (d->fxState[i].fx == Ifx_Write) 7093 continue; 7094 7095 /* Enumerate the described state segments */ 7096 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 7097 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 7098 gSz = d->fxState[i].size; 7099 7100 /* Ignore any sections marked as 'always defined'. */ 7101 if (isAlwaysDefd(mce, gOff, gSz)) { 7102 if (0) 7103 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 7104 gOff, gSz); 7105 continue; 7106 } 7107 7108 /* This state element is read or modified. So we need to 7109 consider it. If larger than 4 bytes, deal with it in 7110 4-byte chunks. */ 7111 while (True) { 7112 Int b_offset; 7113 tl_assert(gSz >= 0); 7114 if (gSz == 0) break; 7115 n = gSz <= 4 ? gSz : 4; 7116 /* update 'curr' with maxU32 of the state slice 7117 gOff .. gOff+n-1 */ 7118 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 7119 if (b_offset != -1) { 7120 /* Observe the guard expression. If it is false use 0, i.e. 7121 nothing is known about the origin */ 7122 IRAtom *cond, *iffalse, *iftrue; 7123 7124 cond = assignNew( 'B', mce, Ity_I1, d->guard); 7125 iffalse = mkU32(0); 7126 iftrue = assignNew( 'B', mce, Ity_I32, 7127 IRExpr_Get(b_offset 7128 + 2*mce->layout->total_sizeB, 7129 Ity_I32)); 7130 here = assignNew( 'B', mce, Ity_I32, 7131 IRExpr_ITE(cond, iftrue, iffalse)); 7132 curr = gen_maxU32( mce, curr, here ); 7133 } 7134 gSz -= n; 7135 gOff += n; 7136 } 7137 } 7138 } 7139 7140 /* Inputs: memory */ 7141 7142 if (d->mFx != Ifx_None) { 7143 /* Because we may do multiple shadow loads/stores from the same 7144 base address, it's best to do a single test of its 7145 definedness right now. Post-instrumentation optimisation 7146 should remove all but this test. */ 7147 tl_assert(d->mAddr); 7148 here = schemeE( mce, d->mAddr ); 7149 curr = gen_maxU32( mce, curr, here ); 7150 } 7151 7152 /* Deal with memory inputs (reads or modifies) */ 7153 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 7154 toDo = d->mSize; 7155 /* chew off 32-bit chunks. We don't care about the endianness 7156 since it's all going to be condensed down to a single bit, 7157 but nevertheless choose an endianness which is hopefully 7158 native to the platform. */ 7159 while (toDo >= 4) { 7160 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo, 7161 d->guard ); 7162 curr = gen_maxU32( mce, curr, here ); 7163 toDo -= 4; 7164 } 7165 /* handle possible 16-bit excess */ 7166 while (toDo >= 2) { 7167 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo, 7168 d->guard ); 7169 curr = gen_maxU32( mce, curr, here ); 7170 toDo -= 2; 7171 } 7172 /* chew off the remaining 8-bit chunk, if any */ 7173 if (toDo == 1) { 7174 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo, 7175 d->guard ); 7176 curr = gen_maxU32( mce, curr, here ); 7177 toDo -= 1; 7178 } 7179 tl_assert(toDo == 0); 7180 } 7181 7182 /* Whew! So curr is a 32-bit B-value which should give an origin 7183 of some use if any of the inputs to the helper are undefined. 7184 Now we need to re-distribute the results to all destinations. */ 7185 7186 /* Outputs: the destination temporary, if there is one. */ 7187 if (d->tmp != IRTemp_INVALID) { 7188 dst = findShadowTmpB(mce, d->tmp); 7189 assign( 'V', mce, dst, curr ); 7190 } 7191 7192 /* Outputs: guest state that we write or modify. */ 7193 for (i = 0; i < d->nFxState; i++) { 7194 tl_assert(d->fxState[i].fx != Ifx_None); 7195 if (d->fxState[i].fx == Ifx_Read) 7196 continue; 7197 7198 /* Enumerate the described state segments */ 7199 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) { 7200 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen; 7201 gSz = d->fxState[i].size; 7202 7203 /* Ignore any sections marked as 'always defined'. */ 7204 if (isAlwaysDefd(mce, gOff, gSz)) 7205 continue; 7206 7207 /* This state element is written or modified. So we need to 7208 consider it. If larger than 4 bytes, deal with it in 7209 4-byte chunks. */ 7210 while (True) { 7211 Int b_offset; 7212 tl_assert(gSz >= 0); 7213 if (gSz == 0) break; 7214 n = gSz <= 4 ? gSz : 4; 7215 /* Write 'curr' to the state slice gOff .. gOff+n-1 */ 7216 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4); 7217 if (b_offset != -1) { 7218 7219 /* If the guard expression evaluates to false we simply Put 7220 the value that is already stored in the guest state slot */ 7221 IRAtom *cond, *iffalse; 7222 7223 cond = assignNew('B', mce, Ity_I1, 7224 d->guard); 7225 iffalse = assignNew('B', mce, Ity_I32, 7226 IRExpr_Get(b_offset + 7227 2*mce->layout->total_sizeB, 7228 Ity_I32)); 7229 curr = assignNew('V', mce, Ity_I32, 7230 IRExpr_ITE(cond, curr, iffalse)); 7231 7232 stmt( 'B', mce, IRStmt_Put(b_offset 7233 + 2*mce->layout->total_sizeB, 7234 curr )); 7235 } 7236 gSz -= n; 7237 gOff += n; 7238 } 7239 } 7240 } 7241 7242 /* Outputs: memory that we write or modify. Same comments about 7243 endianness as above apply. */ 7244 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 7245 toDo = d->mSize; 7246 /* chew off 32-bit chunks */ 7247 while (toDo >= 4) { 7248 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr, 7249 d->guard ); 7250 toDo -= 4; 7251 } 7252 /* handle possible 16-bit excess */ 7253 while (toDo >= 2) { 7254 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr, 7255 d->guard ); 7256 toDo -= 2; 7257 } 7258 /* chew off the remaining 8-bit chunk, if any */ 7259 if (toDo == 1) { 7260 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr, 7261 d->guard ); 7262 toDo -= 1; 7263 } 7264 tl_assert(toDo == 0); 7265 } 7266 } 7267 7268 7269 /* Generate IR for origin shadowing for a general guarded store. */ 7270 static void do_origins_Store_guarded ( MCEnv* mce, 7271 IREndness stEnd, 7272 IRExpr* stAddr, 7273 IRExpr* stData, 7274 IRExpr* guard ) 7275 { 7276 Int dszB; 7277 IRAtom* dataB; 7278 /* assert that the B value for the address is already available 7279 (somewhere), since the call to schemeE will want to see it. 7280 XXXX how does this actually ensure that?? */ 7281 tl_assert(isIRAtom(stAddr)); 7282 tl_assert(isIRAtom(stData)); 7283 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); 7284 dataB = schemeE( mce, stData ); 7285 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard ); 7286 } 7287 7288 7289 /* Generate IR for origin shadowing for a plain store. */ 7290 static void do_origins_Store_plain ( MCEnv* mce, 7291 IREndness stEnd, 7292 IRExpr* stAddr, 7293 IRExpr* stData ) 7294 { 7295 do_origins_Store_guarded ( mce, stEnd, stAddr, stData, 7296 NULL/*guard*/ ); 7297 } 7298 7299 7300 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */ 7301 7302 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg ) 7303 { 7304 do_origins_Store_guarded( mce, sg->end, sg->addr, 7305 sg->data, sg->guard ); 7306 } 7307 7308 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg ) 7309 { 7310 IRType loadedTy = Ity_INVALID; 7311 switch (lg->cvt) { 7312 case ILGop_Ident64: loadedTy = Ity_I64; break; 7313 case ILGop_Ident32: loadedTy = Ity_I32; break; 7314 case ILGop_16Uto32: loadedTy = Ity_I16; break; 7315 case ILGop_16Sto32: loadedTy = Ity_I16; break; 7316 case ILGop_8Uto32: loadedTy = Ity_I8; break; 7317 case ILGop_8Sto32: loadedTy = Ity_I8; break; 7318 default: VG_(tool_panic)("schemeS.IRLoadG"); 7319 } 7320 IRAtom* ori_alt 7321 = schemeE( mce,lg->alt ); 7322 IRAtom* ori_final 7323 = expr2ori_Load_guarded_General(mce, loadedTy, 7324 lg->addr, 0/*addr bias*/, 7325 lg->guard, ori_alt ); 7326 /* And finally, bind the origin to the destination temporary. */ 7327 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final ); 7328 } 7329 7330 7331 static void schemeS ( MCEnv* mce, IRStmt* st ) 7332 { 7333 tl_assert(MC_(clo_mc_level) == 3); 7334 7335 switch (st->tag) { 7336 7337 case Ist_AbiHint: 7338 /* The value-check instrumenter handles this - by arranging 7339 to pass the address of the next instruction to 7340 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to 7341 happen for origin tracking w.r.t. AbiHints. So there is 7342 nothing to do here. */ 7343 break; 7344 7345 case Ist_PutI: { 7346 IRPutI *puti = st->Ist.PutI.details; 7347 IRRegArray* descr_b; 7348 IRAtom *t1, *t2, *t3, *t4; 7349 IRRegArray* descr = puti->descr; 7350 IRType equivIntTy 7351 = MC_(get_otrack_reg_array_equiv_int_type)(descr); 7352 /* If this array is unshadowable for whatever reason, 7353 generate no code. */ 7354 if (equivIntTy == Ity_INVALID) 7355 break; 7356 tl_assert(sizeofIRType(equivIntTy) >= 4); 7357 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); 7358 descr_b 7359 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, 7360 equivIntTy, descr->nElems ); 7361 /* Compute a value to Put - the conjoinment of the origin for 7362 the data to be Put-ted (obviously) and of the index value 7363 (not so obviously). */ 7364 t1 = schemeE( mce, puti->data ); 7365 t2 = schemeE( mce, puti->ix ); 7366 t3 = gen_maxU32( mce, t1, t2 ); 7367 t4 = zWidenFrom32( mce, equivIntTy, t3 ); 7368 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix, 7369 puti->bias, t4) )); 7370 break; 7371 } 7372 7373 case Ist_Dirty: 7374 do_origins_Dirty( mce, st->Ist.Dirty.details ); 7375 break; 7376 7377 case Ist_Store: 7378 do_origins_Store_plain( mce, st->Ist.Store.end, 7379 st->Ist.Store.addr, 7380 st->Ist.Store.data ); 7381 break; 7382 7383 case Ist_StoreG: 7384 do_origins_StoreG( mce, st->Ist.StoreG.details ); 7385 break; 7386 7387 case Ist_LoadG: 7388 do_origins_LoadG( mce, st->Ist.LoadG.details ); 7389 break; 7390 7391 case Ist_LLSC: { 7392 /* In short: treat a load-linked like a normal load followed 7393 by an assignment of the loaded (shadow) data the result 7394 temporary. Treat a store-conditional like a normal store, 7395 and mark the result temporary as defined. */ 7396 if (st->Ist.LLSC.storedata == NULL) { 7397 /* Load Linked */ 7398 IRType resTy 7399 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); 7400 IRExpr* vanillaLoad 7401 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); 7402 tl_assert(resTy == Ity_I64 || resTy == Ity_I32 7403 || resTy == Ity_I16 || resTy == Ity_I8); 7404 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 7405 schemeE(mce, vanillaLoad)); 7406 } else { 7407 /* Store conditional */ 7408 do_origins_Store_plain( mce, st->Ist.LLSC.end, 7409 st->Ist.LLSC.addr, 7410 st->Ist.LLSC.storedata ); 7411 /* For the rationale behind this, see comments at the 7412 place where the V-shadow for .result is constructed, in 7413 do_shadow_LLSC. In short, we regard .result as 7414 always-defined. */ 7415 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), 7416 mkU32(0) ); 7417 } 7418 break; 7419 } 7420 7421 case Ist_Put: { 7422 Int b_offset 7423 = MC_(get_otrack_shadow_offset)( 7424 st->Ist.Put.offset, 7425 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) 7426 ); 7427 if (b_offset >= 0) { 7428 /* FIXME: this isn't an atom! */ 7429 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, 7430 schemeE( mce, st->Ist.Put.data )) ); 7431 } 7432 break; 7433 } 7434 7435 case Ist_WrTmp: 7436 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp), 7437 schemeE(mce, st->Ist.WrTmp.data) ); 7438 break; 7439 7440 case Ist_MBE: 7441 case Ist_NoOp: 7442 case Ist_Exit: 7443 case Ist_IMark: 7444 break; 7445 7446 default: 7447 VG_(printf)("mc_translate.c: schemeS: unhandled: "); 7448 ppIRStmt(st); 7449 VG_(tool_panic)("memcheck:schemeS"); 7450 } 7451 } 7452 7453 7454 /*--------------------------------------------------------------------*/ 7455 /*--- end mc_translate.c ---*/ 7456 /*--------------------------------------------------------------------*/ 7457