1 2 /*---------------------------------------------------------------*/ 3 /*--- begin test_main.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2015 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <assert.h> 39 #include <string.h> 40 41 #include "libvex_basictypes.h" 42 #include "libvex.h" 43 44 #include "test_main.h" 45 46 47 /*---------------------------------------------------------------*/ 48 /*--- Test ---*/ 49 /*---------------------------------------------------------------*/ 50 51 52 __attribute__ ((noreturn)) 53 static 54 void failure_exit ( void ) 55 { 56 fprintf(stdout, "VEX did failure_exit. Bye.\n"); 57 exit(1); 58 } 59 60 static 61 void log_bytes ( const HChar* bytes, SizeT nbytes ) 62 { 63 fwrite ( bytes, 1, nbytes, stdout ); 64 } 65 66 #define N_LINEBUF 10000 67 static HChar linebuf[N_LINEBUF]; 68 69 #define N_ORIGBUF 10000 70 #define N_TRANSBUF 5000 71 72 static UChar origbuf[N_ORIGBUF]; 73 static UChar transbuf[N_TRANSBUF]; 74 75 static Bool verbose = True; 76 77 /* Forwards */ 78 #if 1 /* UNUSED */ 79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType ); 80 static 81 IRSB* mc_instrument ( void* closureV, 82 IRSB* bb_in, VexGuestLayout* layout, 83 VexGuestExtents* vge, 84 IRType gWordTy, IRType hWordTy ); 85 #endif 86 87 static Bool chase_into_not_ok ( void* opaque, Addr dst ) { 88 return False; 89 } 90 static UInt needs_self_check ( void *closureV, VexRegisterUpdates *pxControl, 91 const VexGuestExtents *vge ) { 92 return 0; 93 } 94 95 int main ( int argc, char** argv ) 96 { 97 FILE* f; 98 Int i; 99 UInt u, sum; 100 Addr32 orig_addr; 101 Int bb_number, n_bbs_done = 0; 102 Int orig_nbytes, trans_used; 103 VexTranslateResult tres; 104 VexControl vcon; 105 VexGuestExtents vge; 106 VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm, vai_mips32, vai_mips64; 107 VexAbiInfo vbi; 108 VexTranslateArgs vta; 109 110 if (argc != 2) { 111 fprintf(stderr, "usage: vex file.orig\n"); 112 exit(1); 113 } 114 f = fopen(argv[1], "r"); 115 if (!f) { 116 fprintf(stderr, "can't open `%s'\n", argv[1]); 117 exit(1); 118 } 119 120 /* Run with default params. However, we can't allow bb chasing 121 since that causes the front end to get segfaults when it tries 122 to read code outside the initial BB we hand it. So when calling 123 LibVEX_Translate, send in a chase-into predicate that always 124 returns False. */ 125 LibVEX_default_VexControl ( &vcon ); 126 vcon.iropt_level = 2; 127 vcon.guest_max_insns = 60; 128 129 LibVEX_Init ( &failure_exit, &log_bytes, 130 1, /* debug_paranoia */ 131 &vcon ); 132 133 134 while (!feof(f)) { 135 136 __attribute__((unused)) 137 char* unused1 = fgets(linebuf, N_LINEBUF,f); 138 if (linebuf[0] == 0) continue; 139 if (linebuf[0] != '.') continue; 140 141 if (n_bbs_done == TEST_N_BBS) break; 142 n_bbs_done++; 143 144 /* first line is: . bb-number bb-addr n-bytes */ 145 assert(3 == sscanf(&linebuf[1], " %d %x %d\n", 146 & bb_number, 147 & orig_addr, & orig_nbytes )); 148 assert(orig_nbytes >= 1); 149 assert(!feof(f)); 150 __attribute__((unused)) 151 char* unused2 = fgets(linebuf, N_LINEBUF,f); 152 assert(linebuf[0] == '.'); 153 154 /* second line is: . byte byte byte etc */ 155 if (verbose) 156 printf("============ Basic Block %d, Done %d, " 157 "Start %x, nbytes %2d ============", 158 bb_number, n_bbs_done-1, orig_addr, orig_nbytes); 159 160 /* thumb ITstate analysis needs to examine the 18 bytes 161 preceding the first instruction. So let's leave the first 18 162 zeroed out. */ 163 memset(origbuf, 0, sizeof(origbuf)); 164 165 assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF); 166 for (i = 0; i < orig_nbytes; i++) { 167 assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u)); 168 origbuf[18+ i] = (UChar)u; 169 } 170 171 /* FIXME: put sensible values into the .hwcaps fields */ 172 LibVEX_default_VexArchInfo(&vai_x86); 173 vai_x86.hwcaps = VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 174 | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3; 175 vai_x86.endness = VexEndnessLE; 176 177 LibVEX_default_VexArchInfo(&vai_amd64); 178 vai_amd64.hwcaps = 0; 179 vai_amd64.endness = VexEndnessLE; 180 181 LibVEX_default_VexArchInfo(&vai_ppc32); 182 vai_ppc32.hwcaps = 0; 183 vai_ppc32.ppc_icache_line_szB = 128; 184 185 LibVEX_default_VexArchInfo(&vai_arm); 186 vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7; 187 188 LibVEX_default_VexArchInfo(&vai_mips32); 189 vai_mips32.endness = VexEndnessLE; 190 vai_mips32.hwcaps = VEX_PRID_COMP_MIPS; 191 192 LibVEX_default_VexArchInfo(&vai_mips64); 193 vai_mips64.endness = VexEndnessLE; 194 195 LibVEX_default_VexAbiInfo(&vbi); 196 vbi.guest_stack_redzone_size = 128; 197 198 /* ----- Set up args for LibVEX_Translate ----- */ 199 200 vta.abiinfo_both = vbi; 201 vta.guest_bytes = &origbuf[18]; 202 vta.guest_bytes_addr = orig_addr; 203 vta.callback_opaque = NULL; 204 vta.chase_into_ok = chase_into_not_ok; 205 vta.guest_extents = &vge; 206 vta.host_bytes = transbuf; 207 vta.host_bytes_size = N_TRANSBUF; 208 vta.host_bytes_used = &trans_used; 209 210 #if 0 /* ppc32 -> ppc32 */ 211 vta.arch_guest = VexArchPPC32; 212 vta.archinfo_guest = vai_ppc32; 213 vta.arch_host = VexArchPPC32; 214 vta.archinfo_host = vai_ppc32; 215 #endif 216 #if 0 /* amd64 -> amd64 */ 217 vta.arch_guest = VexArchAMD64; 218 vta.archinfo_guest = vai_amd64; 219 vta.arch_host = VexArchAMD64; 220 vta.archinfo_host = vai_amd64; 221 #endif 222 #if 0 /* x86 -> x86 */ 223 vta.arch_guest = VexArchX86; 224 vta.archinfo_guest = vai_x86; 225 vta.arch_host = VexArchX86; 226 vta.archinfo_host = vai_x86; 227 #endif 228 #if 1 /* x86 -> mips32 */ 229 vta.arch_guest = VexArchX86; 230 vta.archinfo_guest = vai_x86; 231 vta.arch_host = VexArchMIPS32; 232 vta.archinfo_host = vai_mips32; 233 #endif 234 #if 0 /* amd64 -> mips64 */ 235 vta.arch_guest = VexArchAMD64; 236 vta.archinfo_guest = vai_amd64; 237 vta.arch_host = VexArchMIPS64; 238 vta.archinfo_host = vai_mips64; 239 #endif 240 #if 0 /* arm -> arm */ 241 vta.arch_guest = VexArchARM; 242 vta.archinfo_guest = vai_arm; 243 vta.arch_host = VexArchARM; 244 vta.archinfo_host = vai_arm; 245 /* ARM/Thumb only hacks, that are needed to keep the ITstate 246 analyser in the front end happy. */ 247 vta.guest_bytes = &origbuf[18 +1]; 248 vta.guest_bytes_addr = (Addr) &origbuf[18 +1]; 249 #endif 250 251 #if 1 /* no instrumentation */ 252 vta.instrument1 = NULL; 253 vta.instrument2 = NULL; 254 #endif 255 #if 0 /* addrcheck */ 256 vta.instrument1 = ac_instrument; 257 vta.instrument2 = NULL; 258 #endif 259 #if 0 /* memcheck */ 260 vta.instrument1 = mc_instrument; 261 vta.instrument2 = NULL; 262 #endif 263 vta.needs_self_check = needs_self_check; 264 vta.preamble_function = NULL; 265 vta.traceflags = TEST_FLAGS; 266 vta.addProfInc = False; 267 vta.sigill_diag = True; 268 269 vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678; 270 vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679; 271 vta.disp_cp_xindir = (void*)0x1234567A; 272 vta.disp_cp_xassisted = (void*)0x1234567B; 273 274 vta.finaltidy = NULL; 275 276 for (i = 0; i < TEST_N_ITERS; i++) 277 tres = LibVEX_Translate ( &vta ); 278 279 if (tres.status != VexTransOK) 280 printf("\ntres = %d\n", (Int)tres.status); 281 assert(tres.status == VexTransOK); 282 assert(tres.n_sc_extents == 0); 283 assert(vge.n_used == 1); 284 assert((UInt)(vge.len[0]) == orig_nbytes); 285 286 sum = 0; 287 for (i = 0; i < trans_used; i++) 288 sum += (UInt)transbuf[i]; 289 printf ( " %6.2f ... %u\n", 290 (double)trans_used / (double)vge.len[0], sum ); 291 } 292 293 fclose(f); 294 printf("\n"); 295 LibVEX_ShowAllocStats(); 296 297 return 0; 298 } 299 300 ////////////////////////////////////////////////////////////////////// 301 ////////////////////////////////////////////////////////////////////// 302 ////////////////////////////////////////////////////////////////////// 303 ////////////////////////////////////////////////////////////////////// 304 ////////////////////////////////////////////////////////////////////// 305 ////////////////////////////////////////////////////////////////////// 306 ////////////////////////////////////////////////////////////////////// 307 ////////////////////////////////////////////////////////////////////// 308 309 #if 0 /* UNUSED */ 310 311 static 312 __attribute((noreturn)) 313 void panic ( HChar* s ) 314 { 315 printf("\npanic: %s\n", s); 316 failure_exit(); 317 } 318 319 static 320 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy ) 321 { 322 /* Use this rather than eg. -1 because it's a UInt. */ 323 #define INVALID_DATA_SIZE 999999 324 325 Int i; 326 Int sz; 327 IRCallee* helper; 328 IRStmt* st; 329 IRExpr* data; 330 IRExpr* addr; 331 Bool needSz; 332 333 /* Set up BB */ 334 IRSB* bb = emptyIRSB(); 335 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv); 336 bb->next = dopyIRExpr(bb_in->next); 337 bb->jumpkind = bb_in->jumpkind; 338 339 /* No loads to consider in ->next. */ 340 assert(isIRAtom(bb_in->next)); 341 342 for (i = 0; i < bb_in->stmts_used; i++) { 343 st = bb_in->stmts[i]; 344 if (!st) continue; 345 346 switch (st->tag) { 347 348 case Ist_Tmp: 349 data = st->Ist.Tmp.data; 350 if (data->tag == Iex_LDle) { 351 addr = data->Iex.LDle.addr; 352 sz = sizeofIRType(data->Iex.LDle.ty); 353 needSz = False; 354 switch (sz) { 355 case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4", 356 (void*)0x12345601); break; 357 case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2", 358 (void*)0x12345602); break; 359 case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1", 360 (void*)0x12345603); break; 361 default: helper = mkIRCallee(0, "ac_helperc_LOADN", 362 (void*)0x12345604); 363 needSz = True; break; 364 } 365 if (needSz) { 366 addStmtToIRSB( 367 bb, 368 IRStmt_Dirty( 369 unsafeIRDirty_0_N( helper->regparms, 370 helper->name, helper->addr, 371 mkIRExprVec_2(addr, mkIRExpr_HWord(sz))) 372 )); 373 } else { 374 addStmtToIRSB( 375 bb, 376 IRStmt_Dirty( 377 unsafeIRDirty_0_N( helper->regparms, 378 helper->name, helper->addr, 379 mkIRExprVec_1(addr) ) 380 )); 381 } 382 } 383 break; 384 385 case Ist_STle: 386 data = st->Ist.STle.data; 387 addr = st->Ist.STle.addr; 388 assert(isIRAtom(data)); 389 assert(isIRAtom(addr)); 390 sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data)); 391 needSz = False; 392 switch (sz) { 393 case 4: helper = mkIRCallee(1, "ac_helperc_STORE4", 394 (void*)0x12345605); break; 395 case 2: helper = mkIRCallee(0, "ac_helperc_STORE2", 396 (void*)0x12345606); break; 397 case 1: helper = mkIRCallee(1, "ac_helperc_STORE1", 398 (void*)0x12345607); break; 399 default: helper = mkIRCallee(0, "ac_helperc_STOREN", 400 (void*)0x12345608); 401 needSz = True; break; 402 } 403 if (needSz) { 404 addStmtToIRSB( 405 bb, 406 IRStmt_Dirty( 407 unsafeIRDirty_0_N( helper->regparms, 408 helper->name, helper->addr, 409 mkIRExprVec_2(addr, mkIRExpr_HWord(sz))) 410 )); 411 } else { 412 addStmtToIRSB( 413 bb, 414 IRStmt_Dirty( 415 unsafeIRDirty_0_N( helper->regparms, 416 helper->name, helper->addr, 417 mkIRExprVec_1(addr) ) 418 )); 419 } 420 break; 421 422 case Ist_Put: 423 assert(isIRAtom(st->Ist.Put.data)); 424 break; 425 426 case Ist_PutI: 427 assert(isIRAtom(st->Ist.PutI.ix)); 428 assert(isIRAtom(st->Ist.PutI.data)); 429 break; 430 431 case Ist_Exit: 432 assert(isIRAtom(st->Ist.Exit.guard)); 433 break; 434 435 case Ist_Dirty: 436 /* If the call doesn't interact with memory, we ain't 437 interested. */ 438 if (st->Ist.Dirty.details->mFx == Ifx_None) 439 break; 440 goto unhandled; 441 442 default: 443 unhandled: 444 printf("\n"); 445 ppIRStmt(st); 446 printf("\n"); 447 panic("addrcheck: unhandled IRStmt"); 448 } 449 450 addStmtToIRSB( bb, dopyIRStmt(st)); 451 } 452 453 return bb; 454 } 455 #endif /* UNUSED */ 456 457 ////////////////////////////////////////////////////////////////////// 458 ////////////////////////////////////////////////////////////////////// 459 ////////////////////////////////////////////////////////////////////// 460 ////////////////////////////////////////////////////////////////////// 461 ////////////////////////////////////////////////////////////////////// 462 ////////////////////////////////////////////////////////////////////// 463 ////////////////////////////////////////////////////////////////////// 464 ////////////////////////////////////////////////////////////////////// 465 466 #if 1 /* UNUSED */ 467 468 static 469 __attribute((noreturn)) 470 void panic ( HChar* s ) 471 { 472 printf("\npanic: %s\n", s); 473 failure_exit(); 474 } 475 476 #define tl_assert(xxx) assert(xxx) 477 #define VG_(xxxx) xxxx 478 #define tool_panic(zzz) panic(zzz) 479 #define MC_(zzzz) MC_##zzzz 480 #define TL_(zzzz) SK_##zzzz 481 482 483 static void MC_helperc_complain_undef ( void ); 484 static void MC_helperc_LOADV8 ( void ); 485 static void MC_helperc_LOADV4 ( void ); 486 static void MC_helperc_LOADV2 ( void ); 487 static void MC_helperc_LOADV1 ( void ); 488 static void MC_helperc_STOREV8( void ); 489 static void MC_helperc_STOREV4( void ); 490 static void MC_helperc_STOREV2( void ); 491 static void MC_helperc_STOREV1( void ); 492 static void MC_helperc_value_check0_fail( void ); 493 static void MC_helperc_value_check1_fail( void ); 494 static void MC_helperc_value_check4_fail( void ); 495 496 static void MC_helperc_complain_undef ( void ) { } 497 static void MC_helperc_LOADV8 ( void ) { } 498 static void MC_helperc_LOADV4 ( void ) { } 499 static void MC_helperc_LOADV2 ( void ) { } 500 static void MC_helperc_LOADV1 ( void ) { } 501 static void MC_helperc_STOREV8( void ) { } 502 static void MC_helperc_STOREV4( void ) { } 503 static void MC_helperc_STOREV2( void ) { } 504 static void MC_helperc_STOREV1( void ) { } 505 static void MC_helperc_value_check0_fail( void ) { } 506 static void MC_helperc_value_check1_fail( void ) { } 507 static void MC_helperc_value_check4_fail( void ) { } 508 509 510 /*--------------------------------------------------------------------*/ 511 /*--- Instrument IR to perform memory checking operations. ---*/ 512 /*--- mc_translate.c ---*/ 513 /*--------------------------------------------------------------------*/ 514 515 /* 516 This file is part of MemCheck, a heavyweight Valgrind tool for 517 detecting memory errors. 518 519 Copyright (C) 2000-2015 Julian Seward 520 jseward (at) acm.org 521 522 This program is free software; you can redistribute it and/or 523 modify it under the terms of the GNU General Public License as 524 published by the Free Software Foundation; either version 2 of the 525 License, or (at your option) any later version. 526 527 This program is distributed in the hope that it will be useful, but 528 WITHOUT ANY WARRANTY; without even the implied warranty of 529 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 530 General Public License for more details. 531 532 You should have received a copy of the GNU General Public License 533 along with this program; if not, write to the Free Software 534 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 535 02111-1307, USA. 536 537 The GNU General Public License is contained in the file COPYING. 538 */ 539 540 //#include "mc_include.h" 541 542 543 /*------------------------------------------------------------*/ 544 /*--- Forward decls ---*/ 545 /*------------------------------------------------------------*/ 546 547 struct _MCEnv; 548 549 static IRType shadowType ( IRType ty ); 550 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 551 552 553 /*------------------------------------------------------------*/ 554 /*--- Memcheck running state, and tmp management. ---*/ 555 /*------------------------------------------------------------*/ 556 557 /* Carries around state during memcheck instrumentation. */ 558 typedef 559 struct _MCEnv { 560 /* MODIFIED: the bb being constructed. IRStmts are added. */ 561 IRSB* bb; 562 563 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps 564 original temps to their current their current shadow temp. 565 Initially all entries are IRTemp_INVALID. Entries are added 566 lazily since many original temps are not used due to 567 optimisation prior to instrumentation. Note that floating 568 point original tmps are shadowed by integer tmps of the same 569 size, and Bit-typed original tmps are shadowed by the type 570 Ity_I8. See comment below. */ 571 IRTemp* tmpMap; 572 Int n_originalTmps; /* for range checking */ 573 574 /* READONLY: the guest layout. This indicates which parts of 575 the guest state should be regarded as 'always defined'. */ 576 VexGuestLayout* layout; 577 /* READONLY: the host word type. Needed for constructing 578 arguments of type 'HWord' to be passed to helper functions. 579 Ity_I32 or Ity_I64 only. */ 580 IRType hWordTy; 581 } 582 MCEnv; 583 584 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 585 demand), as they are encountered. This is for two reasons. 586 587 (1) (less important reason): Many original tmps are unused due to 588 initial IR optimisation, and we do not want to spaces in tables 589 tracking them. 590 591 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 592 table indexed [0 .. n_types-1], which gives the current shadow for 593 each original tmp, or INVALID_IRTEMP if none is so far assigned. 594 It is necessary to support making multiple assignments to a shadow 595 -- specifically, after testing a shadow for definedness, it needs 596 to be made defined. But IR's SSA property disallows this. 597 598 (2) (more important reason): Therefore, when a shadow needs to get 599 a new value, a new temporary is created, the value is assigned to 600 that, and the tmpMap is updated to reflect the new binding. 601 602 A corollary is that if the tmpMap maps a given tmp to 603 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means 604 there's a read-before-write error in the original tmps. The IR 605 sanity checker should catch all such anomalies, however. 606 */ 607 608 /* Find the tmp currently shadowing the given original tmp. If none 609 so far exists, allocate one. */ 610 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig ) 611 { 612 tl_assert(orig < mce->n_originalTmps); 613 if (mce->tmpMap[orig] == IRTemp_INVALID) { 614 mce->tmpMap[orig] 615 = newIRTemp(mce->bb->tyenv, 616 shadowType(mce->bb->tyenv->types[orig])); 617 } 618 return mce->tmpMap[orig]; 619 } 620 621 /* Allocate a new shadow for the given original tmp. This means any 622 previous shadow is abandoned. This is needed because it is 623 necessary to give a new value to a shadow once it has been tested 624 for undefinedness, but unfortunately IR's SSA property disallows 625 this. Instead we must abandon the old shadow, allocate a new one 626 and use that instead. */ 627 static void newShadowTmp ( MCEnv* mce, IRTemp orig ) 628 { 629 tl_assert(orig < mce->n_originalTmps); 630 mce->tmpMap[orig] 631 = newIRTemp(mce->bb->tyenv, 632 shadowType(mce->bb->tyenv->types[orig])); 633 } 634 635 636 /*------------------------------------------------------------*/ 637 /*--- IRAtoms -- a subset of IRExprs ---*/ 638 /*------------------------------------------------------------*/ 639 640 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 641 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 642 input, most of this code deals in atoms. Usefully, a value atom 643 always has a V-value which is also an atom: constants are shadowed 644 by constants, and temps are shadowed by the corresponding shadow 645 temporary. */ 646 647 typedef IRExpr IRAtom; 648 649 /* (used for sanity checks only): is this an atom which looks 650 like it's from original code? */ 651 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 652 { 653 if (a1->tag == Iex_Const) 654 return True; 655 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps) 656 return True; 657 return False; 658 } 659 660 /* (used for sanity checks only): is this an atom which looks 661 like it's from shadow code? */ 662 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 663 { 664 if (a1->tag == Iex_Const) 665 return True; 666 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps) 667 return True; 668 return False; 669 } 670 671 /* (used for sanity checks only): check that both args are atoms and 672 are identically-kinded. */ 673 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 674 { 675 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp) 676 return True; 677 if (a1->tag == Iex_Const && a2->tag == Iex_Const) 678 return True; 679 return False; 680 } 681 682 683 /*------------------------------------------------------------*/ 684 /*--- Type management ---*/ 685 /*------------------------------------------------------------*/ 686 687 /* Shadow state is always accessed using integer types. This returns 688 an integer type with the same size (as per sizeofIRType) as the 689 given type. The only valid shadow types are Bit, I8, I16, I32, 690 I64, V128. */ 691 692 static IRType shadowType ( IRType ty ) 693 { 694 switch (ty) { 695 case Ity_I1: 696 case Ity_I8: 697 case Ity_I16: 698 case Ity_I32: 699 case Ity_I64: return ty; 700 case Ity_F32: return Ity_I32; 701 case Ity_F64: return Ity_I64; 702 case Ity_V128: return Ity_V128; 703 default: ppIRType(ty); 704 VG_(tool_panic)("memcheck:shadowType"); 705 } 706 } 707 708 /* Produce a 'defined' value of the given shadow type. Should only be 709 supplied shadow types (Bit/I8/I16/I32/UI64). */ 710 static IRExpr* definedOfType ( IRType ty ) { 711 switch (ty) { 712 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 713 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 714 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 715 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 716 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 717 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 718 default: VG_(tool_panic)("memcheck:definedOfType"); 719 } 720 } 721 722 723 /*------------------------------------------------------------*/ 724 /*--- Constructing IR fragments ---*/ 725 /*------------------------------------------------------------*/ 726 727 /* assign value to tmp */ 728 #define assign(_bb,_tmp,_expr) \ 729 addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr))) 730 731 /* add stmt to a bb */ 732 #define stmt(_bb,_stmt) \ 733 addStmtToIRSB((_bb), (_stmt)) 734 735 /* build various kinds of expressions */ 736 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 737 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 738 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 739 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 740 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 741 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 742 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 743 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) 744 745 /* bind the given expression to a new temporary, and return the 746 temporary. This effectively converts an arbitrary expression into 747 an atom. */ 748 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) { 749 IRTemp t = newIRTemp(mce->bb->tyenv, ty); 750 assign(mce->bb, t, e); 751 return mkexpr(t); 752 } 753 754 755 /*------------------------------------------------------------*/ 756 /*--- Constructing definedness primitive ops ---*/ 757 /*------------------------------------------------------------*/ 758 759 /* --------- Defined-if-either-defined --------- */ 760 761 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 762 tl_assert(isShadowAtom(mce,a1)); 763 tl_assert(isShadowAtom(mce,a2)); 764 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2)); 765 } 766 767 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 768 tl_assert(isShadowAtom(mce,a1)); 769 tl_assert(isShadowAtom(mce,a2)); 770 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2)); 771 } 772 773 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 774 tl_assert(isShadowAtom(mce,a1)); 775 tl_assert(isShadowAtom(mce,a2)); 776 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2)); 777 } 778 779 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 780 tl_assert(isShadowAtom(mce,a1)); 781 tl_assert(isShadowAtom(mce,a2)); 782 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2)); 783 } 784 785 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 786 tl_assert(isShadowAtom(mce,a1)); 787 tl_assert(isShadowAtom(mce,a2)); 788 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 789 } 790 791 /* --------- Undefined-if-either-undefined --------- */ 792 793 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 794 tl_assert(isShadowAtom(mce,a1)); 795 tl_assert(isShadowAtom(mce,a2)); 796 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2)); 797 } 798 799 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 800 tl_assert(isShadowAtom(mce,a1)); 801 tl_assert(isShadowAtom(mce,a2)); 802 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2)); 803 } 804 805 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 806 tl_assert(isShadowAtom(mce,a1)); 807 tl_assert(isShadowAtom(mce,a2)); 808 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2)); 809 } 810 811 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 812 tl_assert(isShadowAtom(mce,a1)); 813 tl_assert(isShadowAtom(mce,a2)); 814 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2)); 815 } 816 817 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 818 tl_assert(isShadowAtom(mce,a1)); 819 tl_assert(isShadowAtom(mce,a2)); 820 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 821 } 822 823 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 824 switch (vty) { 825 case Ity_I8: return mkUifU8(mce, a1, a2); 826 case Ity_I16: return mkUifU16(mce, a1, a2); 827 case Ity_I32: return mkUifU32(mce, a1, a2); 828 case Ity_I64: return mkUifU64(mce, a1, a2); 829 case Ity_V128: return mkUifUV128(mce, a1, a2); 830 default: 831 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 832 VG_(tool_panic)("memcheck:mkUifU"); 833 } 834 } 835 836 /* --------- The Left-family of operations. --------- */ 837 838 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 839 tl_assert(isShadowAtom(mce,a1)); 840 /* It's safe to duplicate a1 since it's only an atom */ 841 return assignNew(mce, Ity_I8, 842 binop(Iop_Or8, a1, 843 assignNew(mce, Ity_I8, 844 /* unop(Iop_Neg8, a1)))); */ 845 binop(Iop_Sub8, mkU8(0), a1) ))); 846 } 847 848 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 849 tl_assert(isShadowAtom(mce,a1)); 850 /* It's safe to duplicate a1 since it's only an atom */ 851 return assignNew(mce, Ity_I16, 852 binop(Iop_Or16, a1, 853 assignNew(mce, Ity_I16, 854 /* unop(Iop_Neg16, a1)))); */ 855 binop(Iop_Sub16, mkU16(0), a1) ))); 856 } 857 858 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 859 tl_assert(isShadowAtom(mce,a1)); 860 /* It's safe to duplicate a1 since it's only an atom */ 861 return assignNew(mce, Ity_I32, 862 binop(Iop_Or32, a1, 863 assignNew(mce, Ity_I32, 864 /* unop(Iop_Neg32, a1)))); */ 865 binop(Iop_Sub32, mkU32(0), a1) ))); 866 } 867 868 /* --------- 'Improvement' functions for AND/OR. --------- */ 869 870 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 871 defined (0); all other -> undefined (1). 872 */ 873 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 874 { 875 tl_assert(isOriginalAtom(mce, data)); 876 tl_assert(isShadowAtom(mce, vbits)); 877 tl_assert(sameKindedAtoms(data, vbits)); 878 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits)); 879 } 880 881 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 882 { 883 tl_assert(isOriginalAtom(mce, data)); 884 tl_assert(isShadowAtom(mce, vbits)); 885 tl_assert(sameKindedAtoms(data, vbits)); 886 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits)); 887 } 888 889 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 890 { 891 tl_assert(isOriginalAtom(mce, data)); 892 tl_assert(isShadowAtom(mce, vbits)); 893 tl_assert(sameKindedAtoms(data, vbits)); 894 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits)); 895 } 896 897 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 898 { 899 tl_assert(isOriginalAtom(mce, data)); 900 tl_assert(isShadowAtom(mce, vbits)); 901 tl_assert(sameKindedAtoms(data, vbits)); 902 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits)); 903 } 904 905 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 906 { 907 tl_assert(isOriginalAtom(mce, data)); 908 tl_assert(isShadowAtom(mce, vbits)); 909 tl_assert(sameKindedAtoms(data, vbits)); 910 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 911 } 912 913 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 914 defined (0); all other -> undefined (1). 915 */ 916 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 917 { 918 tl_assert(isOriginalAtom(mce, data)); 919 tl_assert(isShadowAtom(mce, vbits)); 920 tl_assert(sameKindedAtoms(data, vbits)); 921 return assignNew( 922 mce, Ity_I8, 923 binop(Iop_Or8, 924 assignNew(mce, Ity_I8, unop(Iop_Not8, data)), 925 vbits) ); 926 } 927 928 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 929 { 930 tl_assert(isOriginalAtom(mce, data)); 931 tl_assert(isShadowAtom(mce, vbits)); 932 tl_assert(sameKindedAtoms(data, vbits)); 933 return assignNew( 934 mce, Ity_I16, 935 binop(Iop_Or16, 936 assignNew(mce, Ity_I16, unop(Iop_Not16, data)), 937 vbits) ); 938 } 939 940 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 941 { 942 tl_assert(isOriginalAtom(mce, data)); 943 tl_assert(isShadowAtom(mce, vbits)); 944 tl_assert(sameKindedAtoms(data, vbits)); 945 return assignNew( 946 mce, Ity_I32, 947 binop(Iop_Or32, 948 assignNew(mce, Ity_I32, unop(Iop_Not32, data)), 949 vbits) ); 950 } 951 952 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 953 { 954 tl_assert(isOriginalAtom(mce, data)); 955 tl_assert(isShadowAtom(mce, vbits)); 956 tl_assert(sameKindedAtoms(data, vbits)); 957 return assignNew( 958 mce, Ity_I64, 959 binop(Iop_Or64, 960 assignNew(mce, Ity_I64, unop(Iop_Not64, data)), 961 vbits) ); 962 } 963 964 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 965 { 966 tl_assert(isOriginalAtom(mce, data)); 967 tl_assert(isShadowAtom(mce, vbits)); 968 tl_assert(sameKindedAtoms(data, vbits)); 969 return assignNew( 970 mce, Ity_V128, 971 binop(Iop_OrV128, 972 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)), 973 vbits) ); 974 } 975 976 /* --------- Pessimising casts. --------- */ 977 978 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 979 { 980 IRType ty; 981 IRAtom* tmp1; 982 /* Note, dst_ty is a shadow type, not an original type. */ 983 /* First of all, collapse vbits down to a single bit. */ 984 tl_assert(isShadowAtom(mce,vbits)); 985 ty = typeOfIRExpr(mce->bb->tyenv, vbits); 986 tmp1 = NULL; 987 switch (ty) { 988 case Ity_I1: 989 tmp1 = vbits; 990 break; 991 case Ity_I8: 992 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0))); 993 break; 994 case Ity_I16: 995 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0))); 996 break; 997 case Ity_I32: 998 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0))); 999 break; 1000 case Ity_I64: 1001 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0))); 1002 break; 1003 default: 1004 VG_(tool_panic)("mkPCastTo(1)"); 1005 } 1006 tl_assert(tmp1); 1007 /* Now widen up to the dst type. */ 1008 switch (dst_ty) { 1009 case Ity_I1: 1010 return tmp1; 1011 case Ity_I8: 1012 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 1013 case Ity_I16: 1014 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 1015 case Ity_I32: 1016 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 1017 case Ity_I64: 1018 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 1019 case Ity_V128: 1020 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 1021 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 1022 return tmp1; 1023 default: 1024 ppIRType(dst_ty); 1025 VG_(tool_panic)("mkPCastTo(2)"); 1026 } 1027 } 1028 1029 1030 /*------------------------------------------------------------*/ 1031 /*--- Emit a test and complaint if something is undefined. ---*/ 1032 /*------------------------------------------------------------*/ 1033 1034 /* Set the annotations on a dirty helper to indicate that the stack 1035 pointer and instruction pointers might be read. This is the 1036 behaviour of all 'emit-a-complaint' style functions we might 1037 call. */ 1038 1039 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 1040 di->nFxState = 2; 1041 di->fxState[0].fx = Ifx_Read; 1042 di->fxState[0].offset = mce->layout->offset_SP; 1043 di->fxState[0].size = mce->layout->sizeof_SP; 1044 di->fxState[1].fx = Ifx_Read; 1045 di->fxState[1].offset = mce->layout->offset_IP; 1046 di->fxState[1].size = mce->layout->sizeof_IP; 1047 } 1048 1049 1050 /* Check the supplied **original** atom for undefinedness, and emit a 1051 complaint if so. Once that happens, mark it as defined. This is 1052 possible because the atom is either a tmp or literal. If it's a 1053 tmp, it will be shadowed by a tmp, and so we can set the shadow to 1054 be defined. In fact as mentioned above, we will have to allocate a 1055 new tmp to carry the new 'defined' shadow value, and update the 1056 original->tmp mapping accordingly; we cannot simply assign a new 1057 value to an existing shadow tmp as this breaks SSAness -- resulting 1058 in the post-instrumentation sanity checker spluttering in disapproval. 1059 */ 1060 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom ) 1061 { 1062 IRAtom* vatom; 1063 IRType ty; 1064 Int sz; 1065 IRDirty* di; 1066 IRAtom* cond; 1067 1068 /* Since the original expression is atomic, there's no duplicated 1069 work generated by making multiple V-expressions for it. So we 1070 don't really care about the possibility that someone else may 1071 also create a V-interpretion for it. */ 1072 tl_assert(isOriginalAtom(mce, atom)); 1073 vatom = expr2vbits( mce, atom ); 1074 tl_assert(isShadowAtom(mce, vatom)); 1075 tl_assert(sameKindedAtoms(atom, vatom)); 1076 1077 ty = typeOfIRExpr(mce->bb->tyenv, vatom); 1078 1079 /* sz is only used for constructing the error message */ 1080 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 1081 1082 cond = mkPCastTo( mce, Ity_I1, vatom ); 1083 /* cond will be 0 if all defined, and 1 if any not defined. */ 1084 1085 switch (sz) { 1086 case 0: 1087 di = unsafeIRDirty_0_N( 0/*regparms*/, 1088 "MC_(helperc_value_check0_fail)", 1089 &MC_(helperc_value_check0_fail), 1090 mkIRExprVec_0() 1091 ); 1092 break; 1093 case 1: 1094 di = unsafeIRDirty_0_N( 0/*regparms*/, 1095 "MC_(helperc_value_check1_fail)", 1096 &MC_(helperc_value_check1_fail), 1097 mkIRExprVec_0() 1098 ); 1099 break; 1100 case 4: 1101 di = unsafeIRDirty_0_N( 0/*regparms*/, 1102 "MC_(helperc_value_check4_fail)", 1103 &MC_(helperc_value_check4_fail), 1104 mkIRExprVec_0() 1105 ); 1106 break; 1107 default: 1108 di = unsafeIRDirty_0_N( 1/*regparms*/, 1109 "MC_(helperc_complain_undef)", 1110 &MC_(helperc_complain_undef), 1111 mkIRExprVec_1( mkIRExpr_HWord( sz )) 1112 ); 1113 break; 1114 } 1115 di->guard = cond; 1116 setHelperAnns( mce, di ); 1117 stmt( mce->bb, IRStmt_Dirty(di)); 1118 1119 /* Set the shadow tmp to be defined. First, update the 1120 orig->shadow tmp mapping to reflect the fact that this shadow is 1121 getting a new value. */ 1122 tl_assert(isIRAtom(vatom)); 1123 /* sameKindedAtoms ... */ 1124 if (vatom->tag == Iex_RdTmp) { 1125 tl_assert(atom->tag == Iex_RdTmp); 1126 newShadowTmp(mce, atom->Iex.RdTmp.tmp); 1127 assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp), 1128 definedOfType(ty)); 1129 } 1130 } 1131 1132 1133 /*------------------------------------------------------------*/ 1134 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 1135 /*------------------------------------------------------------*/ 1136 1137 /* Examine the always-defined sections declared in layout to see if 1138 the (offset,size) section is within one. Note, is is an error to 1139 partially fall into such a region: (offset,size) should either be 1140 completely in such a region or completely not-in such a region. 1141 */ 1142 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 1143 { 1144 Int minoffD, maxoffD, i; 1145 Int minoff = offset; 1146 Int maxoff = minoff + size - 1; 1147 tl_assert((minoff & ~0xFFFF) == 0); 1148 tl_assert((maxoff & ~0xFFFF) == 0); 1149 1150 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 1151 minoffD = mce->layout->alwaysDefd[i].offset; 1152 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 1153 tl_assert((minoffD & ~0xFFFF) == 0); 1154 tl_assert((maxoffD & ~0xFFFF) == 0); 1155 1156 if (maxoff < minoffD || maxoffD < minoff) 1157 continue; /* no overlap */ 1158 if (minoff >= minoffD && maxoff <= maxoffD) 1159 return True; /* completely contained in an always-defd section */ 1160 1161 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 1162 } 1163 return False; /* could not find any containing section */ 1164 } 1165 1166 1167 /* Generate into bb suitable actions to shadow this Put. If the state 1168 slice is marked 'always defined', do nothing. Otherwise, write the 1169 supplied V bits to the shadow state. We can pass in either an 1170 original atom or a V-atom, but not both. In the former case the 1171 relevant V-bits are then generated from the original. 1172 */ 1173 static 1174 void do_shadow_PUT ( MCEnv* mce, Int offset, 1175 IRAtom* atom, IRAtom* vatom ) 1176 { 1177 IRType ty; 1178 if (atom) { 1179 tl_assert(!vatom); 1180 tl_assert(isOriginalAtom(mce, atom)); 1181 vatom = expr2vbits( mce, atom ); 1182 } else { 1183 tl_assert(vatom); 1184 tl_assert(isShadowAtom(mce, vatom)); 1185 } 1186 1187 ty = typeOfIRExpr(mce->bb->tyenv, vatom); 1188 tl_assert(ty != Ity_I1); 1189 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1190 /* later: no ... */ 1191 /* emit code to emit a complaint if any of the vbits are 1. */ 1192 /* complainIfUndefined(mce, atom); */ 1193 } else { 1194 /* Do a plain shadow Put. */ 1195 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) ); 1196 } 1197 } 1198 1199 1200 /* Return an expression which contains the V bits corresponding to the 1201 given GETI (passed in in pieces). 1202 */ 1203 static 1204 void do_shadow_PUTI ( MCEnv* mce, 1205 IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom ) 1206 { 1207 IRAtom* vatom; 1208 IRType ty, tyS; 1209 Int arrSize;; 1210 1211 tl_assert(isOriginalAtom(mce,atom)); 1212 vatom = expr2vbits( mce, atom ); 1213 tl_assert(sameKindedAtoms(atom, vatom)); 1214 ty = descr->elemTy; 1215 tyS = shadowType(ty); 1216 arrSize = descr->nElems * sizeofIRType(ty); 1217 tl_assert(ty != Ity_I1); 1218 tl_assert(isOriginalAtom(mce,ix)); 1219 complainIfUndefined(mce,ix); 1220 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1221 /* later: no ... */ 1222 /* emit code to emit a complaint if any of the vbits are 1. */ 1223 /* complainIfUndefined(mce, atom); */ 1224 } else { 1225 /* Do a cloned version of the Put that refers to the shadow 1226 area. */ 1227 IRRegArray* new_descr 1228 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1229 tyS, descr->nElems); 1230 stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) )); 1231 } 1232 } 1233 1234 1235 /* Return an expression which contains the V bits corresponding to the 1236 given GET (passed in in pieces). 1237 */ 1238 static 1239 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 1240 { 1241 IRType tyS = shadowType(ty); 1242 tl_assert(ty != Ity_I1); 1243 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 1244 /* Always defined, return all zeroes of the relevant type */ 1245 return definedOfType(tyS); 1246 } else { 1247 /* return a cloned version of the Get that refers to the shadow 1248 area. */ 1249 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 1250 } 1251 } 1252 1253 1254 /* Return an expression which contains the V bits corresponding to the 1255 given GETI (passed in in pieces). 1256 */ 1257 static 1258 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias ) 1259 { 1260 IRType ty = descr->elemTy; 1261 IRType tyS = shadowType(ty); 1262 Int arrSize = descr->nElems * sizeofIRType(ty); 1263 tl_assert(ty != Ity_I1); 1264 tl_assert(isOriginalAtom(mce,ix)); 1265 complainIfUndefined(mce,ix); 1266 if (isAlwaysDefd(mce, descr->base, arrSize)) { 1267 /* Always defined, return all zeroes of the relevant type */ 1268 return definedOfType(tyS); 1269 } else { 1270 /* return a cloned version of the Get that refers to the shadow 1271 area. */ 1272 IRRegArray* new_descr 1273 = mkIRRegArray( descr->base + mce->layout->total_sizeB, 1274 tyS, descr->nElems); 1275 return IRExpr_GetI( new_descr, ix, bias ); 1276 } 1277 } 1278 1279 1280 /*------------------------------------------------------------*/ 1281 /*--- Generating approximations for unknown operations, ---*/ 1282 /*--- using lazy-propagate semantics ---*/ 1283 /*------------------------------------------------------------*/ 1284 1285 /* Lazy propagation of undefinedness from two values, resulting in the 1286 specified shadow type. 1287 */ 1288 static 1289 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 1290 { 1291 /* force everything via 32-bit intermediaries. */ 1292 IRAtom* at; 1293 tl_assert(isShadowAtom(mce,va1)); 1294 tl_assert(isShadowAtom(mce,va2)); 1295 at = mkPCastTo(mce, Ity_I32, va1); 1296 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 1297 at = mkPCastTo(mce, finalVty, at); 1298 return at; 1299 } 1300 1301 1302 /* Do the lazy propagation game from a null-terminated vector of 1303 atoms. This is presumably the arguments to a helper call, so the 1304 IRCallee info is also supplied in order that we can know which 1305 arguments should be ignored (via the .mcx_mask field). 1306 */ 1307 static 1308 IRAtom* mkLazyN ( MCEnv* mce, 1309 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 1310 { 1311 Int i; 1312 IRAtom* here; 1313 IRAtom* curr = definedOfType(Ity_I32); 1314 for (i = 0; exprvec[i]; i++) { 1315 tl_assert(i < 32); 1316 tl_assert(isOriginalAtom(mce, exprvec[i])); 1317 /* Only take notice of this arg if the callee's mc-exclusion 1318 mask does not say it is to be excluded. */ 1319 if (cee->mcx_mask & (1<<i)) { 1320 /* the arg is to be excluded from definedness checking. Do 1321 nothing. */ 1322 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 1323 } else { 1324 /* calculate the arg's definedness, and pessimistically merge 1325 it in. */ 1326 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) ); 1327 curr = mkUifU32(mce, here, curr); 1328 } 1329 } 1330 return mkPCastTo(mce, finalVtype, curr ); 1331 } 1332 1333 1334 /*------------------------------------------------------------*/ 1335 /*--- Generating expensive sequences for exact carry-chain ---*/ 1336 /*--- propagation in add/sub and related operations. ---*/ 1337 /*------------------------------------------------------------*/ 1338 1339 static 1340 __attribute__((unused)) 1341 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb, 1342 IRAtom* aa, IRAtom* bb ) 1343 { 1344 IRAtom *a_min, *b_min, *a_max, *b_max; 1345 IRType ty; 1346 IROp opAND, opOR, opXOR, opNOT, opADD; 1347 1348 tl_assert(isShadowAtom(mce,qaa)); 1349 tl_assert(isShadowAtom(mce,qbb)); 1350 tl_assert(isOriginalAtom(mce,aa)); 1351 tl_assert(isOriginalAtom(mce,bb)); 1352 tl_assert(sameKindedAtoms(qaa,aa)); 1353 tl_assert(sameKindedAtoms(qbb,bb)); 1354 1355 ty = Ity_I32; 1356 opAND = Iop_And32; 1357 opOR = Iop_Or32; 1358 opXOR = Iop_Xor32; 1359 opNOT = Iop_Not32; 1360 opADD = Iop_Add32; 1361 1362 // a_min = aa & ~qaa 1363 a_min = assignNew(mce,ty, 1364 binop(opAND, aa, 1365 assignNew(mce,ty, unop(opNOT, qaa)))); 1366 1367 // b_min = bb & ~qbb 1368 b_min = assignNew(mce,ty, 1369 binop(opAND, bb, 1370 assignNew(mce,ty, unop(opNOT, qbb)))); 1371 1372 // a_max = aa | qaa 1373 a_max = assignNew(mce,ty, binop(opOR, aa, qaa)); 1374 1375 // b_max = bb | qbb 1376 b_max = assignNew(mce,ty, binop(opOR, bb, qbb)); 1377 1378 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1379 return 1380 assignNew(mce,ty, 1381 binop( opOR, 1382 assignNew(mce,ty, binop(opOR, qaa, qbb)), 1383 assignNew(mce,ty, 1384 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)), 1385 assignNew(mce,ty, binop(opADD, a_max, b_max)) 1386 ) 1387 ) 1388 ) 1389 ); 1390 } 1391 1392 1393 /*------------------------------------------------------------*/ 1394 /*--- Helpers for dealing with vector primops. ---*/ 1395 /*------------------------------------------------------------*/ 1396 1397 /* Vector pessimisation -- pessimise within each lane individually. */ 1398 1399 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 1400 { 1401 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 1402 } 1403 1404 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 1405 { 1406 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 1407 } 1408 1409 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 1410 { 1411 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 1412 } 1413 1414 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 1415 { 1416 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 1417 } 1418 1419 1420 /* Here's a simple scheme capable of handling ops derived from SSE1 1421 code and while only generating ops that can be efficiently 1422 implemented in SSE1. */ 1423 1424 /* All-lanes versions are straightforward: 1425 1426 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 1427 1428 unary32Fx4(x,y) ==> PCast32x4(x#) 1429 1430 Lowest-lane-only versions are more complex: 1431 1432 binary32F0x4(x,y) ==> SetV128lo32( 1433 x#, 1434 PCast32(V128to32(UifUV128(x#,y#))) 1435 ) 1436 1437 This is perhaps not so obvious. In particular, it's faster to 1438 do a V128-bit UifU and then take the bottom 32 bits than the more 1439 obvious scheme of taking the bottom 32 bits of each operand 1440 and doing a 32-bit UifU. Basically since UifU is fast and 1441 chopping lanes off vector values is slow. 1442 1443 Finally: 1444 1445 unary32F0x4(x) ==> SetV128lo32( 1446 x#, 1447 PCast32(V128to32(x#)) 1448 ) 1449 1450 Where: 1451 1452 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 1453 PCast32x4(v#) = CmpNEZ32x4(v#) 1454 */ 1455 1456 static 1457 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1458 { 1459 IRAtom* at; 1460 tl_assert(isShadowAtom(mce, vatomX)); 1461 tl_assert(isShadowAtom(mce, vatomY)); 1462 at = mkUifUV128(mce, vatomX, vatomY); 1463 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at)); 1464 return at; 1465 } 1466 1467 static 1468 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 1469 { 1470 IRAtom* at; 1471 tl_assert(isShadowAtom(mce, vatomX)); 1472 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX)); 1473 return at; 1474 } 1475 1476 static 1477 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1478 { 1479 IRAtom* at; 1480 tl_assert(isShadowAtom(mce, vatomX)); 1481 tl_assert(isShadowAtom(mce, vatomY)); 1482 at = mkUifUV128(mce, vatomX, vatomY); 1483 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at)); 1484 at = mkPCastTo(mce, Ity_I32, at); 1485 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1486 return at; 1487 } 1488 1489 static 1490 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 1491 { 1492 IRAtom* at; 1493 tl_assert(isShadowAtom(mce, vatomX)); 1494 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX)); 1495 at = mkPCastTo(mce, Ity_I32, at); 1496 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1497 return at; 1498 } 1499 1500 /* --- ... and ... 64Fx2 versions of the same ... --- */ 1501 1502 static 1503 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1504 { 1505 IRAtom* at; 1506 tl_assert(isShadowAtom(mce, vatomX)); 1507 tl_assert(isShadowAtom(mce, vatomY)); 1508 at = mkUifUV128(mce, vatomX, vatomY); 1509 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at)); 1510 return at; 1511 } 1512 1513 static 1514 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1515 { 1516 IRAtom* at; 1517 tl_assert(isShadowAtom(mce, vatomX)); 1518 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX)); 1519 return at; 1520 } 1521 1522 static 1523 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1524 { 1525 IRAtom* at; 1526 tl_assert(isShadowAtom(mce, vatomX)); 1527 tl_assert(isShadowAtom(mce, vatomY)); 1528 at = mkUifUV128(mce, vatomX, vatomY); 1529 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at)); 1530 at = mkPCastTo(mce, Ity_I64, at); 1531 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1532 return at; 1533 } 1534 1535 static 1536 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 1537 { 1538 IRAtom* at; 1539 tl_assert(isShadowAtom(mce, vatomX)); 1540 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX)); 1541 at = mkPCastTo(mce, Ity_I64, at); 1542 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1543 return at; 1544 } 1545 1546 /* --- --- Vector saturated narrowing --- --- */ 1547 1548 /* This is quite subtle. What to do is simple: 1549 1550 Let the original narrowing op be QNarrowW{S,U}xN. Produce: 1551 1552 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2)) 1553 1554 Why this is right is not so simple. Consider a lane in the args, 1555 vatom1 or 2, doesn't matter. 1556 1557 After the PCast, that lane is all 0s (defined) or all 1558 1s(undefined). 1559 1560 Both signed and unsigned saturating narrowing of all 0s produces 1561 all 0s, which is what we want. 1562 1563 The all-1s case is more complex. Unsigned narrowing interprets an 1564 all-1s input as the largest unsigned integer, and so produces all 1565 1s as a result since that is the largest unsigned value at the 1566 smaller width. 1567 1568 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows 1569 to -1, so we still wind up with all 1s at the smaller width. 1570 1571 So: In short, pessimise the args, then apply the original narrowing 1572 op. 1573 */ 1574 static 1575 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op, 1576 IRAtom* vatom1, IRAtom* vatom2) 1577 { 1578 IRAtom *at1, *at2, *at3; 1579 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 1580 switch (narrow_op) { 1581 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break; 1582 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break; 1583 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break; 1584 default: VG_(tool_panic)("vectorNarrowV128"); 1585 } 1586 tl_assert(isShadowAtom(mce,vatom1)); 1587 tl_assert(isShadowAtom(mce,vatom2)); 1588 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1)); 1589 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2)); 1590 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2)); 1591 return at3; 1592 } 1593 1594 1595 /* --- --- Vector integer arithmetic --- --- */ 1596 1597 /* Simple ... UifU the args and per-lane pessimise the results. */ 1598 static 1599 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1600 { 1601 IRAtom* at; 1602 at = mkUifUV128(mce, vatom1, vatom2); 1603 at = mkPCast8x16(mce, at); 1604 return at; 1605 } 1606 1607 static 1608 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1609 { 1610 IRAtom* at; 1611 at = mkUifUV128(mce, vatom1, vatom2); 1612 at = mkPCast16x8(mce, at); 1613 return at; 1614 } 1615 1616 static 1617 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1618 { 1619 IRAtom* at; 1620 at = mkUifUV128(mce, vatom1, vatom2); 1621 at = mkPCast32x4(mce, at); 1622 return at; 1623 } 1624 1625 static 1626 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1627 { 1628 IRAtom* at; 1629 at = mkUifUV128(mce, vatom1, vatom2); 1630 at = mkPCast64x2(mce, at); 1631 return at; 1632 } 1633 1634 1635 /*------------------------------------------------------------*/ 1636 /*--- Generate shadow values from all kinds of IRExprs. ---*/ 1637 /*------------------------------------------------------------*/ 1638 1639 static 1640 IRAtom* expr2vbits_Binop ( MCEnv* mce, 1641 IROp op, 1642 IRAtom* atom1, IRAtom* atom2 ) 1643 { 1644 IRType and_or_ty; 1645 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 1646 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 1647 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 1648 1649 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 1650 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 1651 1652 tl_assert(isOriginalAtom(mce,atom1)); 1653 tl_assert(isOriginalAtom(mce,atom2)); 1654 tl_assert(isShadowAtom(mce,vatom1)); 1655 tl_assert(isShadowAtom(mce,vatom2)); 1656 tl_assert(sameKindedAtoms(atom1,vatom1)); 1657 tl_assert(sameKindedAtoms(atom2,vatom2)); 1658 switch (op) { 1659 1660 /* V128-bit SIMD (SSE2-esque) */ 1661 1662 case Iop_ShrN16x8: 1663 case Iop_ShrN32x4: 1664 case Iop_ShrN64x2: 1665 case Iop_SarN16x8: 1666 case Iop_SarN32x4: 1667 case Iop_ShlN16x8: 1668 case Iop_ShlN32x4: 1669 case Iop_ShlN64x2: 1670 /* Same scheme as with all other shifts. */ 1671 complainIfUndefined(mce, atom2); 1672 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2)); 1673 1674 case Iop_QSub8Ux16: 1675 case Iop_QSub8Sx16: 1676 case Iop_Sub8x16: 1677 case Iop_Min8Ux16: 1678 case Iop_Max8Ux16: 1679 case Iop_CmpGT8Sx16: 1680 case Iop_CmpEQ8x16: 1681 case Iop_Avg8Ux16: 1682 case Iop_QAdd8Ux16: 1683 case Iop_QAdd8Sx16: 1684 case Iop_Add8x16: 1685 return binary8Ix16(mce, vatom1, vatom2); 1686 1687 case Iop_QSub16Ux8: 1688 case Iop_QSub16Sx8: 1689 case Iop_Sub16x8: 1690 case Iop_Mul16x8: 1691 case Iop_MulHi16Sx8: 1692 case Iop_MulHi16Ux8: 1693 case Iop_Min16Sx8: 1694 case Iop_Max16Sx8: 1695 case Iop_CmpGT16Sx8: 1696 case Iop_CmpEQ16x8: 1697 case Iop_Avg16Ux8: 1698 case Iop_QAdd16Ux8: 1699 case Iop_QAdd16Sx8: 1700 case Iop_Add16x8: 1701 return binary16Ix8(mce, vatom1, vatom2); 1702 1703 case Iop_Sub32x4: 1704 case Iop_QSub32Sx4: 1705 case Iop_QSub32Ux4: 1706 case Iop_CmpGT32Sx4: 1707 case Iop_CmpEQ32x4: 1708 case Iop_Add32x4: 1709 case Iop_QAdd32Ux4: 1710 case Iop_QAdd32Sx4: 1711 return binary32Ix4(mce, vatom1, vatom2); 1712 1713 case Iop_Sub64x2: 1714 case Iop_QSub64Ux2: 1715 case Iop_QSub64Sx2: 1716 case Iop_Add64x2: 1717 case Iop_QAdd64Ux2: 1718 case Iop_QAdd64Sx2: 1719 return binary64Ix2(mce, vatom1, vatom2); 1720 1721 case Iop_QNarrowBin32Sto16Sx8: 1722 case Iop_QNarrowBin16Sto8Sx16: 1723 case Iop_QNarrowBin16Sto8Ux16: 1724 return vectorNarrowV128(mce, op, vatom1, vatom2); 1725 1726 case Iop_Sub64Fx2: 1727 case Iop_Mul64Fx2: 1728 case Iop_Min64Fx2: 1729 case Iop_Max64Fx2: 1730 case Iop_Div64Fx2: 1731 case Iop_CmpLT64Fx2: 1732 case Iop_CmpLE64Fx2: 1733 case Iop_CmpEQ64Fx2: 1734 case Iop_Add64Fx2: 1735 return binary64Fx2(mce, vatom1, vatom2); 1736 1737 case Iop_Sub64F0x2: 1738 case Iop_Mul64F0x2: 1739 case Iop_Min64F0x2: 1740 case Iop_Max64F0x2: 1741 case Iop_Div64F0x2: 1742 case Iop_CmpLT64F0x2: 1743 case Iop_CmpLE64F0x2: 1744 case Iop_CmpEQ64F0x2: 1745 case Iop_Add64F0x2: 1746 return binary64F0x2(mce, vatom1, vatom2); 1747 1748 /* V128-bit SIMD (SSE1-esque) */ 1749 1750 case Iop_Sub32Fx4: 1751 case Iop_Mul32Fx4: 1752 case Iop_Min32Fx4: 1753 case Iop_Max32Fx4: 1754 case Iop_Div32Fx4: 1755 case Iop_CmpLT32Fx4: 1756 case Iop_CmpLE32Fx4: 1757 case Iop_CmpEQ32Fx4: 1758 case Iop_Add32Fx4: 1759 return binary32Fx4(mce, vatom1, vatom2); 1760 1761 case Iop_Sub32F0x4: 1762 case Iop_Mul32F0x4: 1763 case Iop_Min32F0x4: 1764 case Iop_Max32F0x4: 1765 case Iop_Div32F0x4: 1766 case Iop_CmpLT32F0x4: 1767 case Iop_CmpLE32F0x4: 1768 case Iop_CmpEQ32F0x4: 1769 case Iop_Add32F0x4: 1770 return binary32F0x4(mce, vatom1, vatom2); 1771 1772 /* V128-bit data-steering */ 1773 case Iop_SetV128lo32: 1774 case Iop_SetV128lo64: 1775 case Iop_64HLtoV128: 1776 case Iop_InterleaveLO64x2: 1777 case Iop_InterleaveLO32x4: 1778 case Iop_InterleaveLO16x8: 1779 case Iop_InterleaveLO8x16: 1780 case Iop_InterleaveHI64x2: 1781 case Iop_InterleaveHI32x4: 1782 case Iop_InterleaveHI16x8: 1783 case Iop_InterleaveHI8x16: 1784 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2)); 1785 1786 /* Scalar floating point */ 1787 1788 // case Iop_RoundF64: 1789 case Iop_F64toI64S: 1790 case Iop_I64StoF64: 1791 /* First arg is I32 (rounding mode), second is F64 or I64 1792 (data). */ 1793 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 1794 1795 case Iop_PRemC3210F64: case Iop_PRem1C3210F64: 1796 /* Takes two F64 args. */ 1797 case Iop_F64toI32S: 1798 case Iop_F64toF32: 1799 /* First arg is I32 (rounding mode), second is F64 (data). */ 1800 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 1801 1802 case Iop_F64toI16S: 1803 /* First arg is I32 (rounding mode), second is F64 (data). */ 1804 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 1805 1806 case Iop_ScaleF64: 1807 case Iop_Yl2xF64: 1808 case Iop_Yl2xp1F64: 1809 case Iop_PRemF64: 1810 case Iop_AtanF64: 1811 case Iop_AddF64: 1812 case Iop_DivF64: 1813 case Iop_SubF64: 1814 case Iop_MulF64: 1815 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 1816 1817 case Iop_CmpF64: 1818 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 1819 1820 /* non-FP after here */ 1821 1822 case Iop_DivModU64to32: 1823 case Iop_DivModS64to32: 1824 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 1825 1826 case Iop_16HLto32: 1827 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2)); 1828 case Iop_32HLto64: 1829 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2)); 1830 1831 case Iop_MullS32: 1832 case Iop_MullU32: { 1833 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 1834 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 1835 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32)); 1836 } 1837 1838 case Iop_MullS16: 1839 case Iop_MullU16: { 1840 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 1841 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 1842 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16)); 1843 } 1844 1845 case Iop_MullS8: 1846 case Iop_MullU8: { 1847 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 1848 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 1849 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 1850 } 1851 1852 case Iop_Add32: 1853 # if 0 1854 return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2); 1855 # endif 1856 case Iop_Sub32: 1857 case Iop_Mul32: 1858 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 1859 1860 case Iop_Mul16: 1861 case Iop_Add16: 1862 case Iop_Sub16: 1863 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 1864 1865 case Iop_Sub8: 1866 case Iop_Add8: 1867 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 1868 1869 case Iop_CmpLE32S: case Iop_CmpLE32U: 1870 case Iop_CmpLT32U: case Iop_CmpLT32S: 1871 case Iop_CmpEQ32: case Iop_CmpNE32: 1872 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 1873 1874 case Iop_CmpEQ16: case Iop_CmpNE16: 1875 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 1876 1877 case Iop_CmpEQ8: case Iop_CmpNE8: 1878 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 1879 1880 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 1881 /* Complain if the shift amount is undefined. Then simply 1882 shift the first arg's V bits by the real shift amount. */ 1883 complainIfUndefined(mce, atom2); 1884 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2)); 1885 1886 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 1887 /* Same scheme as with 32-bit shifts. */ 1888 complainIfUndefined(mce, atom2); 1889 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2)); 1890 1891 case Iop_Shl8: case Iop_Shr8: 1892 /* Same scheme as with 32-bit shifts. */ 1893 complainIfUndefined(mce, atom2); 1894 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2)); 1895 1896 case Iop_Shl64: case Iop_Shr64: 1897 /* Same scheme as with 32-bit shifts. */ 1898 complainIfUndefined(mce, atom2); 1899 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2)); 1900 1901 case Iop_AndV128: 1902 uifu = mkUifUV128; difd = mkDifDV128; 1903 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 1904 case Iop_And64: 1905 uifu = mkUifU64; difd = mkDifD64; 1906 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 1907 case Iop_And32: 1908 uifu = mkUifU32; difd = mkDifD32; 1909 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 1910 case Iop_And16: 1911 uifu = mkUifU16; difd = mkDifD16; 1912 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 1913 case Iop_And8: 1914 uifu = mkUifU8; difd = mkDifD8; 1915 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 1916 1917 case Iop_OrV128: 1918 uifu = mkUifUV128; difd = mkDifDV128; 1919 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 1920 case Iop_Or64: 1921 uifu = mkUifU64; difd = mkDifD64; 1922 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 1923 case Iop_Or32: 1924 uifu = mkUifU32; difd = mkDifD32; 1925 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 1926 case Iop_Or16: 1927 uifu = mkUifU16; difd = mkDifD16; 1928 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 1929 case Iop_Or8: 1930 uifu = mkUifU8; difd = mkDifD8; 1931 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 1932 1933 do_And_Or: 1934 return 1935 assignNew( 1936 mce, 1937 and_or_ty, 1938 difd(mce, uifu(mce, vatom1, vatom2), 1939 difd(mce, improve(mce, atom1, vatom1), 1940 improve(mce, atom2, vatom2) ) ) ); 1941 1942 case Iop_Xor8: 1943 return mkUifU8(mce, vatom1, vatom2); 1944 case Iop_Xor16: 1945 return mkUifU16(mce, vatom1, vatom2); 1946 case Iop_Xor32: 1947 return mkUifU32(mce, vatom1, vatom2); 1948 case Iop_Xor64: 1949 return mkUifU64(mce, vatom1, vatom2); 1950 case Iop_XorV128: 1951 return mkUifUV128(mce, vatom1, vatom2); 1952 1953 default: 1954 ppIROp(op); 1955 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 1956 } 1957 } 1958 1959 1960 static 1961 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 1962 { 1963 IRAtom* vatom = expr2vbits( mce, atom ); 1964 tl_assert(isOriginalAtom(mce,atom)); 1965 switch (op) { 1966 1967 case Iop_Sqrt64Fx2: 1968 return unary64Fx2(mce, vatom); 1969 1970 case Iop_Sqrt64F0x2: 1971 return unary64F0x2(mce, vatom); 1972 1973 case Iop_Sqrt32Fx4: 1974 case Iop_RecipEst32Fx4: 1975 return unary32Fx4(mce, vatom); 1976 1977 case Iop_Sqrt32F0x4: 1978 case Iop_RSqrtEst32F0x4: 1979 case Iop_RecipEst32F0x4: 1980 return unary32F0x4(mce, vatom); 1981 1982 case Iop_32UtoV128: 1983 case Iop_64UtoV128: 1984 return assignNew(mce, Ity_V128, unop(op, vatom)); 1985 1986 case Iop_F32toF64: 1987 case Iop_I32StoF64: 1988 case Iop_NegF64: 1989 case Iop_SinF64: 1990 case Iop_CosF64: 1991 case Iop_TanF64: 1992 case Iop_SqrtF64: 1993 case Iop_AbsF64: 1994 case Iop_2xm1F64: 1995 return mkPCastTo(mce, Ity_I64, vatom); 1996 1997 case Iop_Clz32: 1998 case Iop_Ctz32: 1999 return mkPCastTo(mce, Ity_I32, vatom); 2000 2001 case Iop_32Sto64: 2002 case Iop_32Uto64: 2003 case Iop_V128to64: 2004 case Iop_V128HIto64: 2005 return assignNew(mce, Ity_I64, unop(op, vatom)); 2006 2007 case Iop_64to32: 2008 case Iop_64HIto32: 2009 case Iop_1Uto32: 2010 case Iop_8Uto32: 2011 case Iop_16Uto32: 2012 case Iop_16Sto32: 2013 case Iop_8Sto32: 2014 return assignNew(mce, Ity_I32, unop(op, vatom)); 2015 2016 case Iop_8Sto16: 2017 case Iop_8Uto16: 2018 case Iop_32to16: 2019 case Iop_32HIto16: 2020 return assignNew(mce, Ity_I16, unop(op, vatom)); 2021 2022 case Iop_1Uto8: 2023 case Iop_16to8: 2024 case Iop_32to8: 2025 return assignNew(mce, Ity_I8, unop(op, vatom)); 2026 2027 case Iop_32to1: 2028 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom)); 2029 2030 case Iop_ReinterpF64asI64: 2031 case Iop_ReinterpI64asF64: 2032 case Iop_ReinterpI32asF32: 2033 case Iop_NotV128: 2034 case Iop_Not64: 2035 case Iop_Not32: 2036 case Iop_Not16: 2037 case Iop_Not8: 2038 case Iop_Not1: 2039 return vatom; 2040 2041 default: 2042 ppIROp(op); 2043 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 2044 } 2045 } 2046 2047 2048 /* Worker function; do not call directly. */ 2049 static 2050 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias ) 2051 { 2052 void* helper; 2053 HChar* hname; 2054 IRDirty* di; 2055 IRTemp datavbits; 2056 IRAtom* addrAct; 2057 2058 tl_assert(isOriginalAtom(mce,addr)); 2059 2060 /* First, emit a definedness test for the address. This also sets 2061 the address (shadow) to 'defined' following the test. */ 2062 complainIfUndefined( mce, addr ); 2063 2064 /* Now cook up a call to the relevant helper function, to read the 2065 data V bits from shadow memory. */ 2066 ty = shadowType(ty); 2067 switch (ty) { 2068 case Ity_I64: helper = &MC_(helperc_LOADV8); 2069 hname = "MC_(helperc_LOADV8)"; 2070 break; 2071 case Ity_I32: helper = &MC_(helperc_LOADV4); 2072 hname = "MC_(helperc_LOADV4)"; 2073 break; 2074 case Ity_I16: helper = &MC_(helperc_LOADV2); 2075 hname = "MC_(helperc_LOADV2)"; 2076 break; 2077 case Ity_I8: helper = &MC_(helperc_LOADV1); 2078 hname = "MC_(helperc_LOADV1)"; 2079 break; 2080 default: ppIRType(ty); 2081 VG_(tool_panic)("memcheck:do_shadow_LDle"); 2082 } 2083 2084 /* Generate the actual address into addrAct. */ 2085 if (bias == 0) { 2086 addrAct = addr; 2087 } else { 2088 IROp mkAdd; 2089 IRAtom* eBias; 2090 IRType tyAddr = mce->hWordTy; 2091 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 2092 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 2093 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 2094 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) ); 2095 } 2096 2097 /* We need to have a place to park the V bits we're just about to 2098 read. */ 2099 datavbits = newIRTemp(mce->bb->tyenv, ty); 2100 di = unsafeIRDirty_1_N( datavbits, 2101 1/*regparms*/, hname, helper, 2102 mkIRExprVec_1( addrAct )); 2103 setHelperAnns( mce, di ); 2104 stmt( mce->bb, IRStmt_Dirty(di) ); 2105 2106 return mkexpr(datavbits); 2107 } 2108 2109 2110 static 2111 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias ) 2112 { 2113 IRAtom *v64hi, *v64lo; 2114 switch (shadowType(ty)) { 2115 case Ity_I8: 2116 case Ity_I16: 2117 case Ity_I32: 2118 case Ity_I64: 2119 return expr2vbits_LDle_WRK(mce, ty, addr, bias); 2120 case Ity_V128: 2121 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias); 2122 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8); 2123 return assignNew( mce, 2124 Ity_V128, 2125 binop(Iop_64HLtoV128, v64hi, v64lo)); 2126 default: 2127 VG_(tool_panic)("expr2vbits_LDle"); 2128 } 2129 } 2130 2131 2132 static 2133 IRAtom* expr2vbits_ITE ( MCEnv* mce, 2134 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse ) 2135 { 2136 IRAtom *vbitsC, *vbits0, *vbits1; 2137 IRType ty; 2138 /* Given ITE(cond,iftrue,iffalse), generate 2139 ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#) 2140 That is, steer the V bits like the originals, but trash the 2141 result if the steering value is undefined. This gives 2142 lazy propagation. */ 2143 tl_assert(isOriginalAtom(mce, cond)); 2144 tl_assert(isOriginalAtom(mce, iftrue)); 2145 tl_assert(isOriginalAtom(mce, iffalse)); 2146 2147 vbitsC = expr2vbits(mce, cond); 2148 vbits0 = expr2vbits(mce, iffalse); 2149 vbits1 = expr2vbits(mce, iftrue); 2150 ty = typeOfIRExpr(mce->bb->tyenv, vbits0); 2151 2152 return 2153 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)), 2154 mkPCastTo(mce, ty, vbitsC) ); 2155 } 2156 2157 /* --------- This is the main expression-handling function. --------- */ 2158 2159 static 2160 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 2161 { 2162 switch (e->tag) { 2163 2164 case Iex_Get: 2165 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 2166 2167 case Iex_GetI: 2168 return shadow_GETI( mce, e->Iex.GetI.descr, 2169 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2170 2171 case Iex_RdTmp: 2172 return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) ); 2173 2174 case Iex_Const: 2175 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e))); 2176 2177 case Iex_Binop: 2178 return expr2vbits_Binop( 2179 mce, 2180 e->Iex.Binop.op, 2181 e->Iex.Binop.arg1, e->Iex.Binop.arg2 2182 ); 2183 2184 case Iex_Unop: 2185 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 2186 2187 case Iex_Load: 2188 return expr2vbits_LDle( mce, e->Iex.Load.ty, 2189 e->Iex.Load.addr, 0/*addr bias*/ ); 2190 2191 case Iex_CCall: 2192 return mkLazyN( mce, e->Iex.CCall.args, 2193 e->Iex.CCall.retty, 2194 e->Iex.CCall.cee ); 2195 2196 case Iex_ITE: 2197 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue, 2198 e->Iex.ITE.iffalse); 2199 2200 default: 2201 VG_(printf)("\n"); 2202 ppIRExpr(e); 2203 VG_(printf)("\n"); 2204 VG_(tool_panic)("memcheck: expr2vbits"); 2205 } 2206 } 2207 2208 /*------------------------------------------------------------*/ 2209 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 2210 /*------------------------------------------------------------*/ 2211 2212 /* Widen a value to the host word size. */ 2213 2214 static 2215 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 2216 { 2217 IRType ty, tyH; 2218 2219 /* vatom is vbits-value and as such can only have a shadow type. */ 2220 tl_assert(isShadowAtom(mce,vatom)); 2221 2222 ty = typeOfIRExpr(mce->bb->tyenv, vatom); 2223 tyH = mce->hWordTy; 2224 2225 if (tyH == Ity_I32) { 2226 switch (ty) { 2227 case Ity_I32: return vatom; 2228 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom)); 2229 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom)); 2230 default: goto unhandled; 2231 } 2232 } else { 2233 goto unhandled; 2234 } 2235 unhandled: 2236 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 2237 VG_(tool_panic)("zwidenToHostWord"); 2238 } 2239 2240 2241 /* Generate a shadow store. addr is always the original address atom. 2242 You can pass in either originals or V-bits for the data atom, but 2243 obviously not both. */ 2244 2245 static 2246 void do_shadow_STle ( MCEnv* mce, 2247 IRAtom* addr, UInt bias, 2248 IRAtom* data, IRAtom* vdata ) 2249 { 2250 IROp mkAdd; 2251 IRType ty, tyAddr; 2252 IRDirty *di, *diLo64, *diHi64; 2253 IRAtom *addrAct, *addrLo64, *addrHi64; 2254 IRAtom *vdataLo64, *vdataHi64; 2255 IRAtom *eBias, *eBias0, *eBias8; 2256 void* helper = NULL; 2257 HChar* hname = NULL; 2258 2259 tyAddr = mce->hWordTy; 2260 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 2261 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 2262 2263 di = diLo64 = diHi64 = NULL; 2264 eBias = eBias0 = eBias8 = NULL; 2265 addrAct = addrLo64 = addrHi64 = NULL; 2266 vdataLo64 = vdataHi64 = NULL; 2267 2268 if (data) { 2269 tl_assert(!vdata); 2270 tl_assert(isOriginalAtom(mce, data)); 2271 tl_assert(bias == 0); 2272 vdata = expr2vbits( mce, data ); 2273 } else { 2274 tl_assert(vdata); 2275 } 2276 2277 tl_assert(isOriginalAtom(mce,addr)); 2278 tl_assert(isShadowAtom(mce,vdata)); 2279 2280 ty = typeOfIRExpr(mce->bb->tyenv, vdata); 2281 2282 /* First, emit a definedness test for the address. This also sets 2283 the address (shadow) to 'defined' following the test. */ 2284 complainIfUndefined( mce, addr ); 2285 2286 /* Now decide which helper function to call to write the data V 2287 bits into shadow memory. */ 2288 switch (ty) { 2289 case Ity_V128: /* we'll use the helper twice */ 2290 case Ity_I64: helper = &MC_(helperc_STOREV8); 2291 hname = "MC_(helperc_STOREV8)"; 2292 break; 2293 case Ity_I32: helper = &MC_(helperc_STOREV4); 2294 hname = "MC_(helperc_STOREV4)"; 2295 break; 2296 case Ity_I16: helper = &MC_(helperc_STOREV2); 2297 hname = "MC_(helperc_STOREV2)"; 2298 break; 2299 case Ity_I8: helper = &MC_(helperc_STOREV1); 2300 hname = "MC_(helperc_STOREV1)"; 2301 break; 2302 default: VG_(tool_panic)("memcheck:do_shadow_STle"); 2303 } 2304 2305 if (ty == Ity_V128) { 2306 2307 /* V128-bit case */ 2308 /* See comment in next clause re 64-bit regparms */ 2309 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 2310 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) ); 2311 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata)); 2312 diLo64 = unsafeIRDirty_0_N( 2313 1/*regparms*/, hname, helper, 2314 mkIRExprVec_2( addrLo64, vdataLo64 )); 2315 2316 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8); 2317 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) ); 2318 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 2319 diHi64 = unsafeIRDirty_0_N( 2320 1/*regparms*/, hname, helper, 2321 mkIRExprVec_2( addrHi64, vdataHi64 )); 2322 2323 setHelperAnns( mce, diLo64 ); 2324 setHelperAnns( mce, diHi64 ); 2325 stmt( mce->bb, IRStmt_Dirty(diLo64) ); 2326 stmt( mce->bb, IRStmt_Dirty(diHi64) ); 2327 2328 } else { 2329 2330 /* 8/16/32/64-bit cases */ 2331 /* Generate the actual address into addrAct. */ 2332 if (bias == 0) { 2333 addrAct = addr; 2334 } else { 2335 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 2336 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) ); 2337 } 2338 2339 if (ty == Ity_I64) { 2340 /* We can't do this with regparm 2 on 32-bit platforms, since 2341 the back ends aren't clever enough to handle 64-bit 2342 regparm args. Therefore be different. */ 2343 di = unsafeIRDirty_0_N( 2344 1/*regparms*/, hname, helper, 2345 mkIRExprVec_2( addrAct, vdata )); 2346 } else { 2347 di = unsafeIRDirty_0_N( 2348 2/*regparms*/, hname, helper, 2349 mkIRExprVec_2( addrAct, 2350 zwidenToHostWord( mce, vdata ))); 2351 } 2352 setHelperAnns( mce, di ); 2353 stmt( mce->bb, IRStmt_Dirty(di) ); 2354 } 2355 2356 } 2357 2358 2359 /* Do lazy pessimistic propagation through a dirty helper call, by 2360 looking at the annotations on it. This is the most complex part of 2361 Memcheck. */ 2362 2363 static IRType szToITy ( Int n ) 2364 { 2365 switch (n) { 2366 case 1: return Ity_I8; 2367 case 2: return Ity_I16; 2368 case 4: return Ity_I32; 2369 case 8: return Ity_I64; 2370 default: VG_(tool_panic)("szToITy(memcheck)"); 2371 } 2372 } 2373 2374 static 2375 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 2376 { 2377 Int i, n, offset, toDo, gSz, gOff; 2378 IRAtom *src, *here, *curr; 2379 IRType tyAddr, tySrc, tyDst; 2380 IRTemp dst; 2381 2382 /* First check the guard. */ 2383 complainIfUndefined(mce, d->guard); 2384 2385 /* Now round up all inputs and PCast over them. */ 2386 curr = definedOfType(Ity_I32); 2387 2388 /* Inputs: unmasked args */ 2389 for (i = 0; d->args[i]; i++) { 2390 if (d->cee->mcx_mask & (1<<i)) { 2391 /* ignore this arg */ 2392 } else { 2393 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) ); 2394 curr = mkUifU32(mce, here, curr); 2395 } 2396 } 2397 2398 /* Inputs: guest state that we read. */ 2399 for (i = 0; i < d->nFxState; i++) { 2400 tl_assert(d->fxState[i].fx != Ifx_None); 2401 if (d->fxState[i].fx == Ifx_Write) 2402 continue; 2403 2404 /* Ignore any sections marked as 'always defined'. */ 2405 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) { 2406 if (0) 2407 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 2408 d->fxState[i].offset, d->fxState[i].size ); 2409 continue; 2410 } 2411 2412 /* This state element is read or modified. So we need to 2413 consider it. If larger than 8 bytes, deal with it in 8-byte 2414 chunks. */ 2415 gSz = d->fxState[i].size; 2416 gOff = d->fxState[i].offset; 2417 tl_assert(gSz > 0); 2418 while (True) { 2419 if (gSz == 0) break; 2420 n = gSz <= 8 ? gSz : 8; 2421 /* update 'curr' with UifU of the state slice 2422 gOff .. gOff+n-1 */ 2423 tySrc = szToITy( n ); 2424 src = assignNew( mce, tySrc, 2425 shadow_GET(mce, gOff, tySrc ) ); 2426 here = mkPCastTo( mce, Ity_I32, src ); 2427 curr = mkUifU32(mce, here, curr); 2428 gSz -= n; 2429 gOff += n; 2430 } 2431 2432 } 2433 2434 /* Inputs: memory. First set up some info needed regardless of 2435 whether we're doing reads or writes. */ 2436 tyAddr = Ity_INVALID; 2437 2438 if (d->mFx != Ifx_None) { 2439 /* Because we may do multiple shadow loads/stores from the same 2440 base address, it's best to do a single test of its 2441 definedness right now. Post-instrumentation optimisation 2442 should remove all but this test. */ 2443 tl_assert(d->mAddr); 2444 complainIfUndefined(mce, d->mAddr); 2445 2446 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr); 2447 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 2448 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 2449 } 2450 2451 /* Deal with memory inputs (reads or modifies) */ 2452 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 2453 offset = 0; 2454 toDo = d->mSize; 2455 /* chew off 32-bit chunks */ 2456 while (toDo >= 4) { 2457 here = mkPCastTo( 2458 mce, Ity_I32, 2459 expr2vbits_LDle ( mce, Ity_I32, 2460 d->mAddr, d->mSize - toDo ) 2461 ); 2462 curr = mkUifU32(mce, here, curr); 2463 toDo -= 4; 2464 } 2465 /* chew off 16-bit chunks */ 2466 while (toDo >= 2) { 2467 here = mkPCastTo( 2468 mce, Ity_I32, 2469 expr2vbits_LDle ( mce, Ity_I16, 2470 d->mAddr, d->mSize - toDo ) 2471 ); 2472 curr = mkUifU32(mce, here, curr); 2473 toDo -= 2; 2474 } 2475 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 2476 } 2477 2478 /* Whew! So curr is a 32-bit V-value summarising pessimistically 2479 all the inputs to the helper. Now we need to re-distribute the 2480 results to all destinations. */ 2481 2482 /* Outputs: the destination temporary, if there is one. */ 2483 if (d->tmp != IRTemp_INVALID) { 2484 dst = findShadowTmp(mce, d->tmp); 2485 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp); 2486 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) ); 2487 } 2488 2489 /* Outputs: guest state that we write or modify. */ 2490 for (i = 0; i < d->nFxState; i++) { 2491 tl_assert(d->fxState[i].fx != Ifx_None); 2492 if (d->fxState[i].fx == Ifx_Read) 2493 continue; 2494 /* Ignore any sections marked as 'always defined'. */ 2495 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) 2496 continue; 2497 /* This state element is written or modified. So we need to 2498 consider it. If larger than 8 bytes, deal with it in 8-byte 2499 chunks. */ 2500 gSz = d->fxState[i].size; 2501 gOff = d->fxState[i].offset; 2502 tl_assert(gSz > 0); 2503 while (True) { 2504 if (gSz == 0) break; 2505 n = gSz <= 8 ? gSz : 8; 2506 /* Write suitably-casted 'curr' to the state slice 2507 gOff .. gOff+n-1 */ 2508 tyDst = szToITy( n ); 2509 do_shadow_PUT( mce, gOff, 2510 NULL, /* original atom */ 2511 mkPCastTo( mce, tyDst, curr ) ); 2512 gSz -= n; 2513 gOff += n; 2514 } 2515 } 2516 2517 /* Outputs: memory that we write or modify. */ 2518 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 2519 offset = 0; 2520 toDo = d->mSize; 2521 /* chew off 32-bit chunks */ 2522 while (toDo >= 4) { 2523 do_shadow_STle( mce, d->mAddr, d->mSize - toDo, 2524 NULL, /* original data */ 2525 mkPCastTo( mce, Ity_I32, curr ) ); 2526 toDo -= 4; 2527 } 2528 /* chew off 16-bit chunks */ 2529 while (toDo >= 2) { 2530 do_shadow_STle( mce, d->mAddr, d->mSize - toDo, 2531 NULL, /* original data */ 2532 mkPCastTo( mce, Ity_I16, curr ) ); 2533 toDo -= 2; 2534 } 2535 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 2536 } 2537 2538 } 2539 2540 2541 /*------------------------------------------------------------*/ 2542 /*--- Memcheck main ---*/ 2543 /*------------------------------------------------------------*/ 2544 2545 static Bool isBogusAtom ( IRAtom* at ) 2546 { 2547 ULong n = 0; 2548 IRConst* con; 2549 tl_assert(isIRAtom(at)); 2550 if (at->tag == Iex_RdTmp) 2551 return False; 2552 tl_assert(at->tag == Iex_Const); 2553 con = at->Iex.Const.con; 2554 switch (con->tag) { 2555 case Ico_U8: n = (ULong)con->Ico.U8; break; 2556 case Ico_U16: n = (ULong)con->Ico.U16; break; 2557 case Ico_U32: n = (ULong)con->Ico.U32; break; 2558 case Ico_U64: n = (ULong)con->Ico.U64; break; 2559 default: ppIRExpr(at); tl_assert(0); 2560 } 2561 /* VG_(printf)("%llx\n", n); */ 2562 return (n == 0xFEFEFEFF 2563 || n == 0x80808080 2564 || n == 0x1010101 2565 || n == 1010100); 2566 } 2567 2568 __attribute__((unused)) 2569 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 2570 { 2571 Int i; 2572 IRExpr* e; 2573 switch (st->tag) { 2574 case Ist_WrTmp: 2575 e = st->Ist.WrTmp.data; 2576 switch (e->tag) { 2577 case Iex_Get: 2578 case Iex_RdTmp: 2579 return False; 2580 case Iex_Unop: 2581 return isBogusAtom(e->Iex.Unop.arg); 2582 case Iex_Binop: 2583 return isBogusAtom(e->Iex.Binop.arg1) 2584 || isBogusAtom(e->Iex.Binop.arg2); 2585 case Iex_ITE: 2586 return isBogusAtom(e->Iex.ITE.cond) 2587 || isBogusAtom(e->Iex.ITE.iftrue) 2588 || isBogusAtom(e->Iex.ITE.iffalse); 2589 case Iex_Load: 2590 return isBogusAtom(e->Iex.Load.addr); 2591 case Iex_CCall: 2592 for (i = 0; e->Iex.CCall.args[i]; i++) 2593 if (isBogusAtom(e->Iex.CCall.args[i])) 2594 return True; 2595 return False; 2596 default: 2597 goto unhandled; 2598 } 2599 case Ist_Put: 2600 return isBogusAtom(st->Ist.Put.data); 2601 case Ist_Store: 2602 return isBogusAtom(st->Ist.Store.addr) 2603 || isBogusAtom(st->Ist.Store.data); 2604 case Ist_Exit: 2605 return isBogusAtom(st->Ist.Exit.guard); 2606 default: 2607 unhandled: 2608 ppIRStmt(st); 2609 VG_(tool_panic)("hasBogusLiterals"); 2610 } 2611 } 2612 2613 IRSB* mc_instrument ( void* closureV, 2614 IRSB* bb_in, VexGuestLayout* layout, 2615 VexGuestExtents* vge, 2616 IRType gWordTy, IRType hWordTy ) 2617 { 2618 Bool verboze = False; //True; 2619 2620 /* Bool hasBogusLiterals = False; */ 2621 2622 Int i, j, first_stmt; 2623 IRStmt* st; 2624 MCEnv mce; 2625 2626 /* Set up BB */ 2627 IRSB* bb = emptyIRSB(); 2628 bb->tyenv = deepCopyIRTypeEnv(bb_in->tyenv); 2629 bb->next = deepCopyIRExpr(bb_in->next); 2630 bb->jumpkind = bb_in->jumpkind; 2631 2632 /* Set up the running environment. Only .bb is modified as we go 2633 along. */ 2634 mce.bb = bb; 2635 mce.layout = layout; 2636 mce.n_originalTmps = bb->tyenv->types_used; 2637 mce.hWordTy = hWordTy; 2638 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp)); 2639 for (i = 0; i < mce.n_originalTmps; i++) 2640 mce.tmpMap[i] = IRTemp_INVALID; 2641 2642 /* Iterate over the stmts. */ 2643 2644 for (i = 0; i < bb_in->stmts_used; i++) { 2645 st = bb_in->stmts[i]; 2646 if (!st) continue; 2647 2648 tl_assert(isFlatIRStmt(st)); 2649 2650 /* 2651 if (!hasBogusLiterals) { 2652 hasBogusLiterals = checkForBogusLiterals(st); 2653 if (hasBogusLiterals) { 2654 VG_(printf)("bogus: "); 2655 ppIRStmt(st); 2656 VG_(printf)("\n"); 2657 } 2658 } 2659 */ 2660 first_stmt = bb->stmts_used; 2661 2662 if (verboze) { 2663 ppIRStmt(st); 2664 VG_(printf)("\n\n"); 2665 } 2666 2667 switch (st->tag) { 2668 2669 case Ist_WrTmp: 2670 assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp), 2671 expr2vbits( &mce, st->Ist.WrTmp.data) ); 2672 break; 2673 2674 case Ist_Put: 2675 do_shadow_PUT( &mce, 2676 st->Ist.Put.offset, 2677 st->Ist.Put.data, 2678 NULL /* shadow atom */ ); 2679 break; 2680 2681 case Ist_PutI: 2682 do_shadow_PUTI( &mce, 2683 st->Ist.PutI.details->descr, 2684 st->Ist.PutI.details->ix, 2685 st->Ist.PutI.details->bias, 2686 st->Ist.PutI.details->data ); 2687 break; 2688 2689 case Ist_Store: 2690 do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */, 2691 st->Ist.Store.data, 2692 NULL /* shadow data */ ); 2693 break; 2694 2695 case Ist_Exit: 2696 /* if (!hasBogusLiterals) */ 2697 complainIfUndefined( &mce, st->Ist.Exit.guard ); 2698 break; 2699 2700 case Ist_Dirty: 2701 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 2702 break; 2703 2704 case Ist_IMark: 2705 case Ist_NoOp: 2706 break; 2707 2708 default: 2709 VG_(printf)("\n"); 2710 ppIRStmt(st); 2711 VG_(printf)("\n"); 2712 VG_(tool_panic)("memcheck: unhandled IRStmt"); 2713 2714 } /* switch (st->tag) */ 2715 2716 if (verboze) { 2717 for (j = first_stmt; j < bb->stmts_used; j++) { 2718 VG_(printf)(" "); 2719 ppIRStmt(bb->stmts[j]); 2720 VG_(printf)("\n"); 2721 } 2722 VG_(printf)("\n"); 2723 } 2724 2725 addStmtToIRSB(bb, st); 2726 2727 } 2728 2729 /* Now we need to complain if the jump target is undefined. */ 2730 first_stmt = bb->stmts_used; 2731 2732 if (verboze) { 2733 VG_(printf)("bb->next = "); 2734 ppIRExpr(bb->next); 2735 VG_(printf)("\n\n"); 2736 } 2737 2738 complainIfUndefined( &mce, bb->next ); 2739 2740 if (verboze) { 2741 for (j = first_stmt; j < bb->stmts_used; j++) { 2742 VG_(printf)(" "); 2743 ppIRStmt(bb->stmts[j]); 2744 VG_(printf)("\n"); 2745 } 2746 VG_(printf)("\n"); 2747 } 2748 2749 return bb; 2750 } 2751 #endif /* UNUSED */ 2752 2753 /*--------------------------------------------------------------------*/ 2754 /*--- end test_main.c ---*/ 2755 /*--------------------------------------------------------------------*/ 2756