1 2 /*--------------------------------------------------------------------*/ 3 /*--- Ptrcheck: a pointer-use checker. ---*/ 4 /*--- This file checks stack and global array accesses. ---*/ 5 /*--- sg_main.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Ptrcheck, a Valgrind tool for checking pointer 10 use in programs. 11 12 Copyright (C) 2008-2010 OpenWorks Ltd 13 info (at) open-works.co.uk 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 32 Neither the names of the U.S. Department of Energy nor the 33 University of California nor the names of its contributors may be 34 used to endorse or promote products derived from this software 35 without prior written permission. 36 */ 37 38 #include "pub_tool_basics.h" 39 #include "pub_tool_libcbase.h" 40 #include "pub_tool_libcassert.h" 41 #include "pub_tool_libcprint.h" 42 #include "pub_tool_tooliface.h" 43 #include "pub_tool_wordfm.h" 44 #include "pub_tool_xarray.h" 45 #include "pub_tool_threadstate.h" 46 #include "pub_tool_mallocfree.h" 47 #include "pub_tool_machine.h" 48 #include "pub_tool_debuginfo.h" 49 #include "pub_tool_options.h" 50 51 #include "pc_common.h" 52 53 #include "sg_main.h" // self 54 55 56 static 57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/ 58 59 60 ////////////////////////////////////////////////////////////// 61 // // 62 // Basic Stuff // 63 // // 64 ////////////////////////////////////////////////////////////// 65 66 static inline Bool is_sane_TId ( ThreadId tid ) 67 { 68 return tid >= 0 && tid < VG_N_THREADS 69 && tid != VG_INVALID_THREADID; 70 } 71 72 static void* sg_malloc ( HChar* cc, SizeT n ) { 73 void* p; 74 tl_assert(n > 0); 75 p = VG_(malloc)( cc, n ); 76 tl_assert(p); 77 return p; 78 } 79 80 static void sg_free ( void* p ) { 81 tl_assert(p); 82 VG_(free)(p); 83 } 84 85 86 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the 87 first interval is lower, 1 if the first interval is higher, and 0 88 if there is any overlap. Redundant paranoia with casting is there 89 following what looked distinctly like a bug in gcc-4.1.2, in which 90 some of the comparisons were done signedly instead of 91 unsignedly. */ 92 inline 93 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1, 94 Addr a2, SizeT n2 ) { 95 UWord a1w = (UWord)a1; 96 UWord n1w = (UWord)n1; 97 UWord a2w = (UWord)a2; 98 UWord n2w = (UWord)n2; 99 tl_assert(n1w > 0 && n2w > 0); 100 if (a1w + n1w <= a2w) return -1L; 101 if (a2w + n2w <= a1w) return 1L; 102 return 0; 103 } 104 105 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained 106 within [aBig,aBig+nBig). */ 107 inline 108 static Bool is_subinterval_of ( Addr aBig, SizeT nBig, 109 Addr aSmall, SizeT nSmall ) { 110 tl_assert(nBig > 0 && nSmall > 0); 111 return aBig <= aSmall && aSmall + nSmall <= aBig + nBig; 112 } 113 114 inline 115 static Addr Addr__min ( Addr a1, Addr a2 ) { 116 return a1 < a2 ? a1 : a2; 117 } 118 119 inline 120 static Addr Addr__max ( Addr a1, Addr a2 ) { 121 return a1 < a2 ? a2 : a1; 122 } 123 124 125 ////////////////////////////////////////////////////////////// 126 // // 127 // StackBlocks Persistent Cache // 128 // // 129 ////////////////////////////////////////////////////////////// 130 131 /* We maintain a set of XArray* of StackBlocks. These are never 132 freed. When a new StackBlock vector is acquired from 133 VG_(di_get_local_blocks_at_ip), we compare it to the existing set. 134 If not present, it is added. If present, the just-acquired one is 135 freed and the copy used. 136 137 This simplifies storage management elsewhere. It allows us to 138 assume that a pointer to an XArray* of StackBlock is valid forever. 139 It also means there are no duplicates anywhere, which could be 140 important from a space point of view for programs that generate a 141 lot of translations, or where translations are frequently discarded 142 and re-made. 143 144 Note that we normalise the arrays by sorting the elements according 145 to an arbitrary total order, so as to avoid the situation that two 146 vectors describe the same set of variables but are not structurally 147 identical. */ 148 149 static inline Bool StackBlock__sane ( StackBlock* fb ) 150 { 151 if (fb->name[ sizeof(fb->name)-1 ] != 0) 152 return False; 153 if (fb->spRel != False && fb->spRel != True) 154 return False; 155 if (fb->isVec != False && fb->isVec != True) 156 return False; 157 return True; 158 } 159 160 /* Generate an arbitrary total ordering on StackBlocks. */ 161 static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 ) 162 { 163 Word r; 164 tl_assert(StackBlock__sane(fb1)); 165 tl_assert(StackBlock__sane(fb2)); 166 /* Hopefully the .base test hits most of the time. For the blocks 167 associated with any particular instruction, if the .base values 168 are the same then probably it doesn't make sense for the other 169 fields to be different. But this is supposed to be a completely 170 general structural total order, so we have to compare everything 171 anyway. */ 172 if (fb1->base < fb2->base) return -1; 173 if (fb1->base > fb2->base) return 1; 174 /* compare sizes */ 175 if (fb1->szB < fb2->szB) return -1; 176 if (fb1->szB > fb2->szB) return 1; 177 /* compare sp/fp flag */ 178 if (fb1->spRel < fb2->spRel) return -1; 179 if (fb1->spRel > fb2->spRel) return 1; 180 /* compare is/is-not array-typed flag */ 181 if (fb1->isVec < fb2->isVec) return -1; 182 if (fb1->isVec > fb2->isVec) return 1; 183 /* compare the name */ 184 r = (Word)VG_(strcmp)(fb1->name, fb2->name); 185 return r; 186 } 187 188 /* Returns True if all fields except .szB are the same. szBs may or 189 may not be the same; they are simply not consulted. */ 190 static Bool StackBlock__all_fields_except_szB_are_equal ( 191 StackBlock* fb1, 192 StackBlock* fb2 193 ) 194 { 195 tl_assert(StackBlock__sane(fb1)); 196 tl_assert(StackBlock__sane(fb2)); 197 return fb1->base == fb2->base 198 && fb1->spRel == fb2->spRel 199 && fb1->isVec == fb2->isVec 200 && 0 == VG_(strcmp)(fb1->name, fb2->name); 201 } 202 203 204 /* Generate an arbitrary total ordering on vectors of StackBlocks. */ 205 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s ) 206 { 207 Word i, r, n1, n2; 208 n1 = VG_(sizeXA)( fb1s ); 209 n2 = VG_(sizeXA)( fb2s ); 210 if (n1 < n2) return -1; 211 if (n1 > n2) return 1; 212 for (i = 0; i < n1; i++) { 213 StackBlock *fb1, *fb2; 214 fb1 = VG_(indexXA)( fb1s, i ); 215 fb2 = VG_(indexXA)( fb2s, i ); 216 r = StackBlock__cmp( fb1, fb2 ); 217 if (r != 0) return r; 218 } 219 tl_assert(i == n1 && i == n2); 220 return 0; 221 } 222 223 static void pp_StackBlocks ( XArray* sbs ) 224 { 225 Word i, n = VG_(sizeXA)( sbs ); 226 VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" ); 227 for (i = 0; i < n; i++) { 228 StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i ); 229 VG_(message)(Vg_DebugMsg, 230 " StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n", 231 sb->base, sb->szB, sb->spRel ? 'Y' : 'N', 232 sb->isVec ? 'Y' : 'N', &sb->name[0] 233 ); 234 } 235 VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" ); 236 } 237 238 239 /* ---------- The StackBlock vector cache ---------- */ 240 241 static WordFM* /* XArray* of StackBlock -> nothing */ 242 frameBlocks_set = NULL; 243 244 static void init_StackBlocks_set ( void ) 245 { 246 tl_assert(!frameBlocks_set); 247 frameBlocks_set 248 = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free, 249 (Word(*)(UWord,UWord))StackBlocks__cmp ); 250 tl_assert(frameBlocks_set); 251 } 252 253 /* Find the given StackBlock-vector in our collection thereof. If 254 found, deallocate the supplied one, and return the address of the 255 copy. If not found, add the supplied one to our collection and 256 return its address. */ 257 static XArray* /* of StackBlock */ 258 StackBlocks__find_and_dealloc__or_add 259 ( XArray* /* of StackBlock */ orig ) 260 { 261 UWord key, val; 262 263 /* First, normalise, as per comments above. */ 264 VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp ); 265 VG_(sortXA)( orig ); 266 267 /* Now get rid of any exact duplicates. */ 268 nuke_dups: 269 { Word r, w, nEQ, n = VG_(sizeXA)( orig ); 270 if (n >= 2) { 271 w = 0; 272 nEQ = 0; 273 for (r = 0; r < n; r++) { 274 if (r+1 < n) { 275 StackBlock* pR0 = VG_(indexXA)( orig, r+0 ); 276 StackBlock* pR1 = VG_(indexXA)( orig, r+1 ); 277 Word c = StackBlock__cmp(pR0,pR1); 278 tl_assert(c == -1 || c == 0); 279 if (c == 0) { nEQ++; continue; } 280 } 281 if (w != r) { 282 StackBlock* pW = VG_(indexXA)( orig, w ); 283 StackBlock* pR = VG_(indexXA)( orig, r ); 284 *pW = *pR; 285 } 286 w++; 287 } 288 tl_assert(r == n); 289 tl_assert(w + nEQ == n); 290 if (w < n) { 291 VG_(dropTailXA)( orig, n-w ); 292 } 293 if (0) VG_(printf)("delta %ld\n", n-w); 294 } 295 } 296 297 /* Deal with the following strangeness, where two otherwise 298 identical blocks are claimed to have different sizes. In which 299 case we use the larger size. */ 300 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" } 301 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" } 302 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" } 303 */ 304 { Word i, n = VG_(sizeXA)( orig ); 305 if (n >= 2) { 306 for (i = 0; i < n-1; i++) { 307 StackBlock* sb0 = VG_(indexXA)( orig, i+0 ); 308 StackBlock* sb1 = VG_(indexXA)( orig, i+1 ); 309 if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) { 310 /* They can't be identical because the previous tidying 311 pass would have removed the duplicates. And they 312 can't be > because the earlier sorting pass would 313 have ordered otherwise-identical descriptors 314 according to < on .szB fields. Hence: */ 315 tl_assert(sb0->szB < sb1->szB); 316 sb0->szB = sb1->szB; 317 /* This makes the blocks identical, at the size of the 318 larger one. Rather than go to all the hassle of 319 sliding the rest down, simply go back to the 320 remove-duplicates stage. The assertion guarantees 321 that we eventually make progress, since the rm-dups 322 stage will get rid of one of the blocks. This is 323 expected to happen only exceedingly rarely. */ 324 tl_assert(StackBlock__cmp(sb0,sb1) == 0); 325 goto nuke_dups; 326 } 327 } 328 } 329 } 330 331 /* If there are any blocks which overlap and have the same 332 fpRel-ness, junk the whole descriptor; it's obviously bogus. 333 Icc11 certainly generates bogus info from time to time. 334 335 This check is pretty weak; really we ought to have a stronger 336 sanity check. */ 337 { Word i, n = VG_(sizeXA)( orig ); 338 static Int moans = 3; 339 for (i = 0; i < n-1; i++) { 340 StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i ); 341 StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 ); 342 if (sb1->spRel == sb2->spRel 343 && (sb1->base >= sb2->base 344 || sb1->base + sb1->szB > sb2->base)) { 345 if (moans > 0 && !VG_(clo_xml)) { 346 moans--; 347 VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: " 348 "overlapping stack blocks\n"); 349 if (VG_(clo_verbosity) >= 2) 350 pp_StackBlocks(orig); 351 if (moans == 0) 352 VG_(message)(Vg_UserMsg, "Further instances of this " 353 "message will not be shown\n" ); 354 } 355 VG_(dropTailXA)( orig, VG_(sizeXA)( orig )); 356 break; 357 } 358 } 359 } 360 361 /* Now, do we have it already? */ 362 if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) { 363 /* yes */ 364 XArray* res; 365 tl_assert(val == 0); 366 tl_assert(key != (UWord)orig); 367 VG_(deleteXA)(orig); 368 res = (XArray*)key; 369 return res; 370 } else { 371 /* no */ 372 VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 ); 373 return orig; 374 } 375 } 376 377 /* Top level function for getting the StackBlock vector for a given 378 instruction. It is guaranteed that the returned pointer will be 379 valid for the entire rest of the run, and also that the addresses 380 of the individual elements of the array will not change. */ 381 382 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip ) 383 { 384 XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ ); 385 tl_assert(blocks); 386 return StackBlocks__find_and_dealloc__or_add( blocks ); 387 } 388 389 390 ////////////////////////////////////////////////////////////// 391 // // 392 // GlobalBlocks Persistent Cache // 393 // // 394 ////////////////////////////////////////////////////////////// 395 396 /* Generate an arbitrary total ordering on GlobalBlocks. */ 397 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 ) 398 { 399 Word r; 400 /* compare addrs */ 401 if (gb1->addr < gb2->addr) return -1; 402 if (gb1->addr > gb2->addr) return 1; 403 /* compare sizes */ 404 if (gb1->szB < gb2->szB) return -1; 405 if (gb1->szB > gb2->szB) return 1; 406 /* compare is/is-not array-typed flag */ 407 if (gb1->isVec < gb2->isVec) return -1; 408 if (gb1->isVec > gb2->isVec) return 1; 409 /* compare the name */ 410 r = (Word)VG_(strcmp)(gb1->name, gb2->name); 411 if (r != 0) return r; 412 /* compare the soname */ 413 r = (Word)VG_(strcmp)(gb1->soname, gb2->soname); 414 return r; 415 } 416 417 static WordFM* /* GlobalBlock* -> nothing */ 418 globalBlock_set = NULL; 419 420 static void init_GlobalBlock_set ( void ) 421 { 422 tl_assert(!globalBlock_set); 423 globalBlock_set 424 = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free, 425 (Word(*)(UWord,UWord))GlobalBlock__cmp ); 426 tl_assert(globalBlock_set); 427 } 428 429 430 /* Top level function for making GlobalBlocks persistent. Call here 431 with a non-persistent version, and the returned one is guaranteed 432 to be valid for the entire rest of the run. The supplied one is 433 copied, not stored, so can be freed after the call. */ 434 435 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig ) 436 { 437 UWord key, val; 438 /* Now, do we have it already? */ 439 if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) { 440 /* yes, return the copy */ 441 GlobalBlock* res; 442 tl_assert(val == 0); 443 res = (GlobalBlock*)key; 444 tl_assert(res != orig); 445 return res; 446 } else { 447 /* no. clone it, store the clone and return the clone's 448 address. */ 449 GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1", 450 sizeof(GlobalBlock) ); 451 tl_assert(clone); 452 *clone = *orig; 453 VG_(addToFM)( globalBlock_set, (UWord)clone, 0 ); 454 return clone; 455 } 456 } 457 458 459 ////////////////////////////////////////////////////////////// 460 // // 461 // Interval tree of StackTreeBlock // 462 // // 463 ////////////////////////////////////////////////////////////// 464 465 /* A node in a stack interval tree. Zero length intervals (.szB == 0) 466 are not allowed. 467 468 A stack interval tree is a (WordFM StackTreeNode* void). There is 469 one stack interval tree for each thread. 470 */ 471 typedef 472 struct { 473 Addr addr; 474 SizeT szB; /* copied from .descr->szB */ 475 StackBlock* descr; /* it's an instance of this block */ 476 UWord depth; /* depth of stack at time block was pushed */ 477 } 478 StackTreeNode; 479 480 static void pp_StackTree ( WordFM* sitree, HChar* who ) 481 { 482 UWord keyW, valW; 483 VG_(printf)("<<< BEGIN pp_StackTree %s\n", who ); 484 VG_(initIterFM)( sitree ); 485 while (VG_(nextIterFM)( sitree, &keyW, &valW )) { 486 StackTreeNode* nd = (StackTreeNode*)keyW; 487 VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB, 488 nd->descr, nd->descr->name, nd->descr->szB); 489 } 490 VG_(printf)(">>> END pp_StackTree %s\n", who ); 491 } 492 493 /* Interval comparison function for StackTreeNode */ 494 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1, 495 StackTreeNode* sn2 ) 496 { 497 return cmp_nonempty_intervals(sn1->addr, sn1->szB, 498 sn2->addr, sn2->szB); 499 } 500 501 /* Find the node holding 'a', if any. */ 502 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a ) 503 { 504 UWord keyW, valW; 505 StackTreeNode key; 506 tl_assert(sitree); 507 key.addr = a; 508 key.szB = 1; 509 if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) { 510 StackTreeNode* res = (StackTreeNode*)keyW; 511 tl_assert(valW == 0); 512 tl_assert(res != &key); 513 return res; 514 } else { 515 return NULL; 516 } 517 } 518 519 /* Note that the supplied XArray of FrameBlock must have been 520 made persistent already. */ 521 __attribute__((noinline)) 522 static void add_blocks_to_StackTree ( 523 /*MOD*/WordFM* sitree, 524 XArray* /* FrameBlock */ descrs, 525 XArray* /* Addr */ bases, 526 UWord depth 527 ) 528 { 529 Bool debug = (Bool)0; 530 Word i, nDescrs, nBases; 531 532 nDescrs = VG_(sizeXA)( descrs ), 533 nBases = VG_(sizeXA)( bases ); 534 tl_assert(nDescrs == nBases); 535 536 if (nDescrs == 0) return; 537 538 tl_assert(sitree); 539 if (debug) { 540 VG_(printf)("\ndepth = %lu\n", depth); 541 pp_StackTree( sitree, "add_blocks_to_StackTree-pre" ); 542 pp_StackBlocks(descrs); 543 } 544 545 for (i = 0; i < nDescrs; i++) { 546 Bool already_present; 547 StackTreeNode* nyu; 548 Addr addr = *(Addr*)VG_(indexXA)( bases, i ); 549 StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i ); 550 tl_assert(descr->szB > 0); 551 nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) ); 552 nyu->addr = addr; 553 nyu->szB = descr->szB; 554 nyu->descr = descr; 555 nyu->depth = depth; 556 if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB); 557 already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 ); 558 /* The interval can't already be there; else we have 559 overlapping stack blocks. */ 560 tl_assert(!already_present); 561 if (debug) { 562 pp_StackTree( sitree, "add_blocks_to_StackTree-step" ); 563 } 564 } 565 if (debug) { 566 pp_StackTree( sitree, "add_blocks_to_StackTree-post" ); 567 VG_(printf)("\n"); 568 } 569 } 570 571 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree, 572 XArray* /* Addr */ bases ) 573 { 574 UWord oldK, oldV; 575 Word i, nBases = VG_(sizeXA)( bases ); 576 for (i = 0; i < nBases; i++) { 577 Bool b; 578 Addr addr = *(Addr*)VG_(indexXA)( bases, i ); 579 StackTreeNode* nd = find_StackTreeNode(sitree, addr); 580 /* The interval must be there; we added it earlier when 581 the associated frame was created. */ 582 tl_assert(nd); 583 b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd ); 584 /* we just found the block! */ 585 tl_assert(b); 586 tl_assert(oldV == 0); 587 tl_assert(nd == (StackTreeNode*)oldK); 588 sg_free(nd); 589 } 590 } 591 592 593 static void delete_StackTree__kFin ( UWord keyW ) { 594 StackTreeNode* nd = (StackTreeNode*)keyW; 595 tl_assert(nd); 596 sg_free(nd); 597 } 598 static void delete_StackTree__vFin ( UWord valW ) { 599 tl_assert(valW == 0); 600 } 601 static void delete_StackTree ( WordFM* sitree ) 602 { 603 VG_(deleteFM)( sitree, 604 delete_StackTree__kFin, delete_StackTree__vFin ); 605 } 606 607 static WordFM* new_StackTree ( void ) { 608 return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free, 609 (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode ); 610 } 611 612 613 ////////////////////////////////////////////////////////////// 614 // // 615 // Interval tree of GlobalTreeBlock // 616 // // 617 ////////////////////////////////////////////////////////////// 618 619 /* A node in a global interval tree. Zero length intervals 620 (.szB == 0) are not allowed. 621 622 A global interval tree is a (WordFM GlobalTreeNode* void). There 623 is one global interval tree for the entire process. 624 */ 625 typedef 626 struct { 627 Addr addr; /* copied from .descr->addr */ 628 SizeT szB; /* copied from .descr->szB */ 629 GlobalBlock* descr; /* it's this block */ 630 } 631 GlobalTreeNode; 632 633 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) { 634 tl_assert(nd->descr); 635 VG_(printf)("GTNode [%#lx,+%ld) %s", 636 nd->addr, nd->szB, nd->descr->name); 637 } 638 639 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree, 640 HChar* who ) 641 { 642 UWord keyW, valW; 643 GlobalTreeNode* nd; 644 VG_(printf)("<<< GlobalBlockTree (%s)\n", who); 645 VG_(initIterFM)( gitree ); 646 while (VG_(nextIterFM)( gitree, &keyW, &valW )) { 647 tl_assert(valW == 0); 648 nd = (GlobalTreeNode*)keyW; 649 VG_(printf)(" "); 650 GlobalTreeNode__pp(nd); 651 VG_(printf)("\n"); 652 } 653 VG_(doneIterFM)( gitree ); 654 VG_(printf)(">>>\n"); 655 } 656 657 /* Interval comparison function for GlobalTreeNode */ 658 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1, 659 GlobalTreeNode* gn2 ) 660 { 661 return cmp_nonempty_intervals( gn1->addr, gn1->szB, 662 gn2->addr, gn2->szB ); 663 } 664 665 /* Find the node holding 'a', if any. */ 666 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a ) 667 { 668 UWord keyW, valW; 669 GlobalTreeNode key; 670 key.addr = a; 671 key.szB = 1; 672 if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) { 673 GlobalTreeNode* res = (GlobalTreeNode*)keyW; 674 tl_assert(valW == 0); 675 tl_assert(res != &key); 676 return res; 677 } else { 678 return NULL; 679 } 680 } 681 682 /* Note that the supplied GlobalBlock must have been made persistent 683 already. */ 684 static void add_block_to_GlobalTree ( 685 /*MOD*/WordFM* gitree, 686 GlobalBlock* descr 687 ) 688 { 689 Bool already_present; 690 GlobalTreeNode *nyu, *nd; 691 UWord keyW, valW; 692 static Int moans = 3; 693 694 tl_assert(descr->szB > 0); 695 nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) ); 696 nyu->addr = descr->addr; 697 nyu->szB = descr->szB; 698 nyu->descr = descr; 699 700 /* Basically it's an error to add a global block to the tree that 701 is already in the tree. However, detect and ignore attempts to 702 insert exact duplicates; they do appear for some reason 703 (possible a bug in m_debuginfo?) */ 704 already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu ); 705 if (already_present) { 706 tl_assert(valW == 0); 707 nd = (GlobalTreeNode*)keyW; 708 tl_assert(nd); 709 tl_assert(nd != nyu); 710 tl_assert(nd->descr); 711 tl_assert(nyu->descr); 712 if (nd->addr == nyu->addr && nd->szB == nyu->szB 713 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */ 714 /* Although it seems reasonable to demand that duplicate 715 blocks have identical names, that is too strict. For 716 example, reading debuginfo from glibc produces two 717 otherwise identical blocks with names "tzname" and 718 "__tzname". A constraint of the form "must be identical, 719 or one must be a substring of the other" would fix that. 720 However, such trickery is scuppered by the fact that we 721 truncate all variable names to 15 characters to make 722 storage management simpler, hence giving pairs like 723 "__EI___pthread_" (truncated) vs "__pthread_keys". So 724 it's simplest just to skip the name comparison 725 completely. */ 726 && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) { 727 /* exact duplicate; ignore it */ 728 sg_free(nyu); 729 return; 730 } 731 /* else fall through; the assertion below will catch it */ 732 } 733 734 already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 ); 735 /* The interval can't already be there; else we have 736 overlapping global blocks. */ 737 /* Unfortunately (25 Jan 09) at least icc11 has been seen to 738 generate overlapping block descriptions in the Dwarf3; clearly 739 bogus. */ 740 if (already_present && moans > 0 && !VG_(clo_xml)) { 741 moans--; 742 VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: " 743 "overlapping global blocks\n"); 744 if (VG_(clo_verbosity) >= 2) { 745 GlobalTree__pp( gitree, 746 "add_block_to_GlobalTree: non-exact duplicate" ); 747 VG_(printf)("Overlapping block: "); 748 GlobalTreeNode__pp(nyu); 749 VG_(printf)("\n"); 750 } 751 if (moans == 0) 752 VG_(message)(Vg_UserMsg, "Further instances of this " 753 "message will not be shown\n" ); 754 } 755 /* tl_assert(!already_present); */ 756 } 757 758 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree, 759 Addr a, SizeT szB ) 760 { 761 /* One easy way to do this: look up [a,a+szB) in the tree. That 762 will either succeed, producing a block which intersects that 763 range, in which case we delete it and repeat; or it will fail, 764 in which case there are no blocks intersecting the range, and we 765 can bring the process to a halt. */ 766 UWord keyW, valW, oldK, oldV; 767 GlobalTreeNode key, *nd; 768 Bool b, anyFound; 769 770 tl_assert(szB > 0); 771 772 anyFound = False; 773 774 key.addr = a; 775 key.szB = szB; 776 777 while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) { 778 anyFound = True; 779 nd = (GlobalTreeNode*)keyW; 780 tl_assert(valW == 0); 781 tl_assert(nd != &key); 782 tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0); 783 784 b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key ); 785 tl_assert(b); 786 tl_assert(oldV == 0); 787 tl_assert(oldK == keyW); /* check we deleted the node we just found */ 788 } 789 790 return anyFound; 791 } 792 793 794 ////////////////////////////////////////////////////////////// 795 // // 796 // Invar // 797 // // 798 ////////////////////////////////////////////////////////////// 799 800 /* An invariant, as resulting from watching the destination of a 801 memory referencing instruction. Initially is Inv_Unset until the 802 instruction makes a first access. */ 803 804 typedef 805 enum { 806 Inv_Unset=1, /* not established yet */ 807 Inv_Unknown, /* unknown location */ 808 Inv_Stack0, /* array-typed stack block in innermost frame */ 809 Inv_StackN, /* array-typed stack block in non-innermost frame */ 810 Inv_Global, /* array-typed global block */ 811 } 812 InvarTag; 813 814 typedef 815 struct { 816 InvarTag tag; 817 union { 818 struct { 819 } Unset; 820 struct { 821 } Unknown; 822 struct { 823 Addr addr; 824 SizeT szB; 825 StackBlock* descr; 826 } Stack0; /* innermost stack frame */ 827 struct { 828 /* Pointer to a node in the interval tree for 829 this thread. */ 830 StackTreeNode* nd; 831 } StackN; /* non-innermost stack frame */ 832 struct { 833 /* Pointer to a GlobalBlock in the interval tree of 834 global blocks. */ 835 GlobalTreeNode* nd; 836 } Global; 837 } 838 Inv; 839 } 840 Invar; 841 842 /* Partial debugging printing for an Invar. */ 843 static void pp_Invar ( Invar* i ) 844 { 845 switch (i->tag) { 846 case Inv_Unset: 847 VG_(printf)("Unset"); 848 break; 849 case Inv_Unknown: 850 VG_(printf)("Unknown"); 851 break; 852 case Inv_Stack0: 853 VG_(printf)("Stack0 [%#lx,+%lu)", 854 i->Inv.Stack0.addr, i->Inv.Stack0.szB); 855 break; 856 case Inv_StackN: 857 VG_(printf)("StackN [%#lx,+%lu)", 858 i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB); 859 break; 860 case Inv_Global: 861 VG_(printf)("Global [%#lx,+%lu)", 862 i->Inv.Global.nd->addr, i->Inv.Global.nd->szB); 863 break; 864 default: 865 tl_assert(0); 866 } 867 } 868 869 /* Compare two Invars for equality. */ 870 static Bool eq_Invar ( Invar* i1, Invar* i2 ) 871 { 872 tl_assert(i1->tag != Inv_Unset); 873 tl_assert(i2->tag != Inv_Unset); 874 if (i1->tag != i2->tag) 875 return False; 876 switch (i1->tag) { 877 case Inv_Unknown: 878 return True; 879 case Inv_Stack0: 880 return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr 881 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB; 882 case Inv_StackN: 883 return i1->Inv.StackN.nd == i2->Inv.StackN.nd; 884 case Inv_Global: 885 return i1->Inv.Global.nd == i2->Inv.Global.nd; 886 default: 887 tl_assert(0); 888 } 889 /*NOTREACHED*/ 890 tl_assert(0); 891 } 892 893 /* Print selected parts of an Invar, suitable for use in error 894 messages. */ 895 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth ) 896 { 897 HChar* str; 898 tl_assert(nBuf >= 96); 899 buf[0] = 0; 900 switch (inv->tag) { 901 case Inv_Unknown: 902 VG_(sprintf)(buf, "%s", "unknown"); 903 break; 904 case Inv_Stack0: 905 str = "array"; 906 VG_(sprintf)(buf, "stack %s \"%s\" in this frame", 907 str, inv->Inv.Stack0.descr->name ); 908 break; 909 case Inv_StackN: 910 str = "array"; 911 VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here", 912 str, inv->Inv.StackN.nd->descr->name, 913 depth - inv->Inv.StackN.nd->depth ); 914 break; 915 case Inv_Global: 916 str = "array"; 917 VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"", 918 str, inv->Inv.Global.nd->descr->name, 919 inv->Inv.Global.nd->descr->soname ); 920 break; 921 case Inv_Unset: 922 VG_(sprintf)(buf, "%s", "Unset!"); 923 break; 924 default: 925 tl_assert(0); 926 } 927 } 928 929 930 ////////////////////////////////////////////////////////////// 931 // // 932 // our globals // 933 // // 934 ////////////////////////////////////////////////////////////// 935 936 ////////////////////////////////////////////////////////////// 937 /// 938 939 #define N_QCACHE 16 940 941 /* Powers of two only, else the result will be chaos */ 942 #define QCACHE_ADVANCE_EVERY 16 943 944 /* Per-thread query cache. Note that the invar can only be Inv_StackN 945 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */ 946 typedef 947 struct { 948 Addr addr; 949 SizeT szB; 950 Invar inv; 951 } 952 QCElem; 953 954 typedef 955 struct { 956 Word nInUse; 957 QCElem elems[N_QCACHE]; 958 } 959 QCache; 960 961 static void QCache__invalidate ( QCache* qc ) { 962 tl_assert(qc->nInUse >= 0); 963 qc->nInUse = 0; 964 } 965 966 static void QCache__pp ( QCache* qc, HChar* who ) 967 { 968 Word i; 969 VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who); 970 for (i = 0; i < qc->nInUse; i++) { 971 VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB); 972 pp_Invar(&qc->elems[i].inv); 973 VG_(printf)("\n"); 974 } 975 VG_(printf)(">>>\n"); 976 } 977 978 static ULong stats__qcache_queries = 0; 979 static ULong stats__qcache_misses = 0; 980 static ULong stats__qcache_probes = 0; 981 982 /// 983 ////////////////////////////////////////////////////////////// 984 985 /* Each thread has: 986 * a shadow stack of StackFrames, which is a double-linked list 987 * an stack block interval tree 988 */ 989 static struct _StackFrame* shadowStacks[VG_N_THREADS]; 990 991 static WordFM* /* StackTreeNode */ siTrees[VG_N_THREADS]; 992 993 static QCache qcaches[VG_N_THREADS]; 994 995 996 /* Additionally, there is one global variable interval tree 997 for the entire process. 998 */ 999 static WordFM* /* GlobalTreeNode */ giTree; 1000 1001 1002 static void invalidate_all_QCaches ( void ) 1003 { 1004 Word i; 1005 for (i = 0; i < VG_N_THREADS; i++) { 1006 QCache__invalidate( &qcaches[i] ); 1007 } 1008 } 1009 1010 static void ourGlobals_init ( void ) 1011 { 1012 Word i; 1013 for (i = 0; i < VG_N_THREADS; i++) { 1014 shadowStacks[i] = NULL; 1015 siTrees[i] = NULL; 1016 } 1017 invalidate_all_QCaches(); 1018 giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free, 1019 (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode ); 1020 } 1021 1022 1023 ////////////////////////////////////////////////////////////// 1024 // // 1025 // Handle global variable load/unload events // 1026 // // 1027 ////////////////////////////////////////////////////////////// 1028 1029 static void acquire_globals ( ULong di_handle ) 1030 { 1031 Word n, i; 1032 XArray* /* of GlobalBlock */ gbs; 1033 if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle ); 1034 gbs = VG_(di_get_global_blocks_from_dihandle) 1035 (di_handle, True/*arrays only*/); 1036 if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs )); 1037 1038 n = VG_(sizeXA)( gbs ); 1039 for (i = 0; i < n; i++) { 1040 GlobalBlock* gbp; 1041 GlobalBlock* gb = VG_(indexXA)( gbs, i ); 1042 if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n", 1043 gb->szB, gb->addr, gb->soname, gb->name ); 1044 tl_assert(gb->szB > 0); 1045 /* Make a persistent copy of each GlobalBlock, and add it 1046 to the tree. */ 1047 gbp = get_persistent_GlobalBlock( gb ); 1048 add_block_to_GlobalTree( giTree, gbp ); 1049 } 1050 1051 VG_(deleteXA)( gbs ); 1052 } 1053 1054 1055 /* We only intercept these two because we need to see any di_handles 1056 that might arise from the mappings/allocations. */ 1057 void sg_new_mem_mmap( Addr a, SizeT len, 1058 Bool rr, Bool ww, Bool xx, ULong di_handle ) 1059 { 1060 if (di_handle > 0) 1061 acquire_globals(di_handle); 1062 } 1063 void sg_new_mem_startup( Addr a, SizeT len, 1064 Bool rr, Bool ww, Bool xx, ULong di_handle ) 1065 { 1066 if (di_handle > 0) 1067 acquire_globals(di_handle); 1068 } 1069 void sg_die_mem_munmap ( Addr a, SizeT len ) 1070 { 1071 Bool debug = (Bool)0; 1072 Bool overlap = False; 1073 1074 if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len ); 1075 1076 if (len == 0) 1077 return; 1078 1079 overlap = del_GlobalTree_range(giTree, a, len); 1080 1081 { /* redundant sanity check */ 1082 UWord keyW, valW; 1083 VG_(initIterFM)( giTree ); 1084 while (VG_(nextIterFM)( giTree, &keyW, &valW )) { 1085 GlobalTreeNode* nd = (GlobalTreeNode*)keyW; 1086 tl_assert(valW == 0); 1087 tl_assert(nd->szB > 0); 1088 tl_assert(nd->addr + nd->szB <= a 1089 || a + len <= nd->addr); 1090 } 1091 VG_(doneIterFM)( giTree ); 1092 } 1093 1094 if (!overlap) 1095 return; 1096 1097 /* Ok, the range contained some blocks. Therefore we'll need to 1098 visit all the Invars in all the thread shadow stacks, and 1099 convert all Inv_Global entries that intersect [a,a+len) to 1100 Inv_Unknown. */ 1101 tl_assert(len > 0); 1102 preen_global_Invars( a, len ); 1103 invalidate_all_QCaches(); 1104 } 1105 1106 1107 ////////////////////////////////////////////////////////////// 1108 // // 1109 // StackFrame // 1110 // // 1111 ////////////////////////////////////////////////////////////// 1112 1113 static ULong stats__total_accesses = 0; 1114 static ULong stats__classify_Stack0 = 0; 1115 static ULong stats__classify_StackN = 0; 1116 static ULong stats__classify_Global = 0; 1117 static ULong stats__classify_Unknown = 0; 1118 static ULong stats__Invars_preened = 0; 1119 static ULong stats__Invars_changed = 0; 1120 static ULong stats__t_i_b_empty = 0; 1121 static ULong stats__htab_fast = 0; 1122 static ULong stats__htab_searches = 0; 1123 static ULong stats__htab_probes = 0; 1124 static ULong stats__htab_resizes = 0; 1125 1126 1127 /* A dynamic instance of an instruction */ 1128 typedef 1129 struct { 1130 /* IMMUTABLE */ 1131 Addr insn_addr; /* NB! zero means 'not in use' */ 1132 XArray* blocks; /* XArray* of StackBlock, or NULL if none */ 1133 /* MUTABLE */ 1134 Invar invar; 1135 } 1136 IInstance; 1137 1138 1139 #define N_HTAB_FIXED 64 1140 1141 typedef 1142 struct _StackFrame { 1143 /* The sp when the frame was created, so we know when to get rid 1144 of it. */ 1145 Addr creation_sp; 1146 /* The stack frames for a thread are arranged as a doubly linked 1147 list. Obviously the outermost frame in the stack has .outer 1148 as NULL and the innermost in theory has .inner as NULL. 1149 However, when a function returns, we don't delete the 1150 just-vacated StackFrame. Instead, it is retained in the list 1151 and will be re-used when the next call happens. This is so 1152 as to avoid constantly having to dynamically allocate and 1153 deallocate frames. */ 1154 struct _StackFrame* inner; 1155 struct _StackFrame* outer; 1156 Word depth; /* 0 for outermost; increases inwards */ 1157 /* Information for each memory referencing instruction, for this 1158 instantiation of the function. The iinstances array is 1159 operated as a simple linear-probe hash table, which is 1160 dynamically expanded as necessary. Once critical thing is 1161 that an IInstance with a .insn_addr of zero is interpreted to 1162 mean that hash table slot is unused. This means we can't 1163 store an IInstance for address zero. */ 1164 /* Note that htab initially points to htab_fixed. If htab_fixed 1165 turns out not to be big enough then htab is made to point to 1166 dynamically allocated memory. But it's often the case that 1167 htab_fixed is big enough, so this optimisation saves a huge 1168 number of sg_malloc/sg_free call pairs. */ 1169 IInstance* htab; 1170 UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */ 1171 UWord htab_used; /* number of hash table slots currently in use */ 1172 /* If this frame is currently making a call, then the following 1173 are relevant. */ 1174 Addr sp_at_call; 1175 Addr fp_at_call; 1176 XArray* /* of Addr */ blocks_added_by_call; 1177 /* See comment just above */ 1178 IInstance htab_fixed[N_HTAB_FIXED]; 1179 } 1180 StackFrame; 1181 1182 1183 1184 1185 1186 /* Move this somewhere else? */ 1187 /* Visit all Invars in the entire system. If 'isHeap' is True, change 1188 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If 1189 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars 1190 instead. */ 1191 1192 __attribute__((noinline)) 1193 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len ) 1194 { 1195 stats__Invars_preened++; 1196 tl_assert(len > 0); 1197 tl_assert(inv); 1198 switch (inv->tag) { 1199 case Inv_Global: 1200 tl_assert(inv->Inv.Global.nd); 1201 tl_assert(inv->Inv.Global.nd->szB > 0); 1202 if (0) VG_(printf)("preen_Invar Global %#lx %lu\n", 1203 inv->Inv.Global.nd->addr, 1204 inv->Inv.Global.nd->szB); 1205 if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr, 1206 inv->Inv.Global.nd->szB)) { 1207 inv->tag = Inv_Unknown; 1208 stats__Invars_changed++; 1209 } 1210 break; 1211 case Inv_Stack0: 1212 case Inv_StackN: 1213 case Inv_Unknown: 1214 break; 1215 default: 1216 tl_assert(0); 1217 } 1218 } 1219 1220 __attribute__((noinline)) 1221 static void preen_global_Invars ( Addr a, SizeT len ) 1222 { 1223 Int i; 1224 UWord u; 1225 StackFrame* frame; 1226 tl_assert(len > 0); 1227 for (i = 0; i < VG_N_THREADS; i++) { 1228 frame = shadowStacks[i]; 1229 if (!frame) 1230 continue; /* no frames for this thread */ 1231 /* start from the innermost frame */ 1232 while (frame->inner) 1233 frame = frame->inner; 1234 tl_assert(frame->outer); 1235 /* work through the frames from innermost to outermost. The 1236 order isn't important; we just need to ensure we visit each 1237 frame once (including those which are not actually active, 1238 more 'inner' than the 'innermost active frame', viz, just 1239 hanging around waiting to be used, when the current innermost 1240 active frame makes more calls. See comments on definition of 1241 struct _StackFrame. */ 1242 for (; frame; frame = frame->outer) { 1243 UWord xx = 0; /* sanity check only; count of used htab entries */ 1244 if (!frame->htab) 1245 continue; /* frame not in use. See shadowStack_unwind(). */ 1246 for (u = 0; u < frame->htab_size; u++) { 1247 IInstance* ii = &frame->htab[u]; 1248 if (ii->insn_addr == 0) 1249 continue; /* not in use */ 1250 if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); } 1251 preen_global_Invar( &ii->invar, a, len ); 1252 xx++; 1253 } 1254 tl_assert(xx == frame->htab_used); 1255 } 1256 } 1257 } 1258 1259 1260 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits 1261 of the ip are guaranteed to be zero */ 1262 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) { 1263 return (ip >> 0) & (htab_size - 1); 1264 } 1265 1266 __attribute__((noinline)) 1267 static void initialise_II_hash_table ( StackFrame* sf ) 1268 { 1269 UWord i; 1270 sf->htab_size = N_HTAB_FIXED; /* initial hash table size */ 1271 sf->htab = &sf->htab_fixed[0]; 1272 tl_assert(sf->htab); 1273 sf->htab_used = 0; 1274 for (i = 0; i < sf->htab_size; i++) 1275 sf->htab[i].insn_addr = 0; /* NOT IN USE */ 1276 } 1277 1278 1279 __attribute__((noinline)) 1280 static void resize_II_hash_table ( StackFrame* sf ) 1281 { 1282 UWord i, j, ix, old_size, new_size; 1283 IInstance *old_htab, *new_htab, *old; 1284 1285 tl_assert(sf && sf->htab); 1286 old_size = sf->htab_size; 1287 new_size = 2 * old_size; 1288 old_htab = sf->htab; 1289 new_htab = sg_malloc( "di.sg_main.rIht.1", 1290 new_size * sizeof(IInstance) ); 1291 for (i = 0; i < new_size; i++) { 1292 new_htab[i].insn_addr = 0; /* NOT IN USE */ 1293 } 1294 for (i = 0; i < old_size; i++) { 1295 old = &old_htab[i]; 1296 if (old->insn_addr == 0 /* NOT IN USE */) 1297 continue; 1298 ix = compute_II_hash(old->insn_addr, new_size); 1299 /* find out where to put this, in the new table */ 1300 j = new_size; 1301 while (1) { 1302 if (new_htab[ix].insn_addr == 0) 1303 break; 1304 /* This can't ever happen, because it would mean the new 1305 table is full; that isn't allowed -- even the old table is 1306 only allowed to become half full. */ 1307 tl_assert(j > 0); 1308 j--; 1309 ix++; if (ix == new_size) ix = 0; 1310 } 1311 /* copy the old entry to this location */ 1312 tl_assert(ix < new_size); 1313 tl_assert(new_htab[ix].insn_addr == 0); 1314 new_htab[ix] = *old; 1315 tl_assert(new_htab[ix].insn_addr != 0); 1316 } 1317 /* all entries copied; free old table. */ 1318 if (old_htab != &sf->htab_fixed[0]) 1319 sg_free(old_htab); 1320 sf->htab = new_htab; 1321 sf->htab_size = new_size; 1322 /* check sf->htab_used is correct. Optional and a bit expensive 1323 but anyway: */ 1324 j = 0; 1325 for (i = 0; i < new_size; i++) { 1326 if (new_htab[i].insn_addr != 0) { 1327 j++; 1328 } 1329 } 1330 tl_assert(j == sf->htab_used); 1331 if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size); 1332 } 1333 1334 1335 __attribute__((noinline)) 1336 static IInstance* find_or_create_IInstance_SLOW ( 1337 StackFrame* sf, 1338 Addr ip, 1339 XArray* /* StackBlock */ ip_frameblocks 1340 ) 1341 { 1342 UWord i, ix; 1343 1344 stats__htab_searches++; 1345 1346 tl_assert(sf); 1347 tl_assert(sf->htab); 1348 1349 /* Make sure the table loading doesn't get too high. */ 1350 if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) { 1351 stats__htab_resizes++; 1352 resize_II_hash_table(sf); 1353 } 1354 tl_assert(2 * sf->htab_used <= sf->htab_size); 1355 1356 ix = compute_II_hash(ip, sf->htab_size); 1357 i = sf->htab_size; 1358 while (1) { 1359 stats__htab_probes++; 1360 /* Note that because of the way the fast-case handler works, 1361 these two tests are actually redundant in the first iteration 1362 of this loop. (Except they aren't redundant if the code just 1363 above resized the table first. :-) */ 1364 if (sf->htab[ix].insn_addr == ip) 1365 return &sf->htab[ix]; 1366 if (sf->htab[ix].insn_addr == 0) 1367 break; 1368 /* If i ever gets to zero and we have found neither what we're 1369 looking for nor an empty slot, the table must be full. Which 1370 isn't possible -- we monitor the load factor to ensure it 1371 doesn't get above say 50%; if that ever does happen the table 1372 is resized. */ 1373 tl_assert(i > 0); 1374 i--; 1375 ix++; 1376 if (ix == sf->htab_size) ix = 0; 1377 } 1378 1379 /* So now we've found a free slot at ix, and we can use that. */ 1380 tl_assert(sf->htab[ix].insn_addr == 0); 1381 1382 /* Add a new record in this slot. */ 1383 tl_assert(ip != 0); /* CAN'T REPRESENT THIS */ 1384 sf->htab[ix].insn_addr = ip; 1385 sf->htab[ix].blocks = ip_frameblocks; 1386 sf->htab[ix].invar.tag = Inv_Unset; 1387 sf->htab_used++; 1388 return &sf->htab[ix]; 1389 } 1390 1391 1392 inline 1393 static IInstance* find_or_create_IInstance ( 1394 StackFrame* sf, 1395 Addr ip, 1396 XArray* /* StackBlock */ ip_frameblocks 1397 ) 1398 { 1399 UWord ix = compute_II_hash(ip, sf->htab_size); 1400 /* Is it in the first slot we come to? */ 1401 if (LIKELY(sf->htab[ix].insn_addr == ip)) { 1402 stats__htab_fast++; 1403 return &sf->htab[ix]; 1404 } 1405 /* If the first slot we come to is empty, bag it. */ 1406 if (LIKELY(sf->htab[ix].insn_addr == 0)) { 1407 stats__htab_fast++; 1408 tl_assert(ip != 0); 1409 sf->htab[ix].insn_addr = ip; 1410 sf->htab[ix].blocks = ip_frameblocks; 1411 sf->htab[ix].invar.tag = Inv_Unset; 1412 sf->htab_used++; 1413 return &sf->htab[ix]; 1414 } 1415 /* Otherwise we hand off to the slow case, which searches other 1416 slots, and optionally resizes the table if necessary. */ 1417 return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks ); 1418 } 1419 1420 1421 __attribute__((noinline)) 1422 static Addr calculate_StackBlock_EA ( StackBlock* descr, 1423 Addr sp, Addr fp ) { 1424 UWord w1 = (UWord)descr->base; 1425 UWord w2 = (UWord)(descr->spRel ? sp : fp); 1426 UWord ea = w1 + w2; 1427 return ea; 1428 } 1429 1430 /* Given an array of StackBlocks, return an array of Addrs, holding 1431 their effective addresses. Caller deallocates result array. */ 1432 __attribute__((noinline)) 1433 static XArray* /* Addr */ calculate_StackBlock_EAs ( 1434 XArray* /* StackBlock */ blocks, 1435 Addr sp, Addr fp 1436 ) 1437 { 1438 XArray* res; 1439 Word i, n = VG_(sizeXA)( blocks ); 1440 tl_assert(n > 0); 1441 res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) ); 1442 for (i = 0; i < n; i++) { 1443 StackBlock* blk = VG_(indexXA)( blocks, i ); 1444 Addr ea = calculate_StackBlock_EA( blk, sp, fp ); 1445 VG_(addToXA)( res, &ea ); 1446 } 1447 return res; 1448 } 1449 1450 1451 /* Try to classify the block into which a memory access falls, and 1452 write the result in 'inv'. This writes all relevant fields of 1453 'inv'. */ 1454 __attribute__((noinline)) 1455 static void classify_address ( /*OUT*/Invar* inv, 1456 ThreadId tid, 1457 Addr ea, Addr sp, Addr fp, 1458 UWord szB, 1459 XArray* /* of StackBlock */ thisInstrBlocks ) 1460 { 1461 tl_assert(szB > 0); 1462 /* First, look in the stack blocks accessible in this instruction's 1463 frame. */ 1464 { 1465 Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks ); 1466 if (nBlocks == 0) stats__t_i_b_empty++; 1467 for (i = 0; i < nBlocks; i++) { 1468 StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i ); 1469 Addr bea = calculate_StackBlock_EA( descr, sp, fp ); 1470 if (bea <= ea && ea + szB <= bea + descr->szB) { 1471 /* found it */ 1472 inv->tag = Inv_Stack0; 1473 inv->Inv.Stack0.addr = bea; 1474 inv->Inv.Stack0.szB = descr->szB; 1475 inv->Inv.Stack0.descr = descr; 1476 stats__classify_Stack0++; 1477 return; 1478 } 1479 } 1480 } 1481 /* Look in this thread's query cache */ 1482 { Word i; 1483 QCache* cache = &qcaches[tid]; 1484 static UWord ctr = 0; 1485 stats__qcache_queries++; 1486 for (i = 0; i < cache->nInUse; i++) { 1487 if (0) /* expensive in a loop like this */ 1488 tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0); 1489 stats__qcache_probes++; 1490 if (is_subinterval_of(cache->elems[i].addr, 1491 cache->elems[i].szB, ea, szB)) { 1492 if (i > 0 1493 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) { 1494 QCElem tmp; 1495 tmp = cache->elems[i-1]; 1496 cache->elems[i-1] = cache->elems[i]; 1497 cache->elems[i] = tmp; 1498 i--; 1499 } 1500 *inv = cache->elems[i].inv; 1501 return; 1502 } 1503 } 1504 stats__qcache_misses++; 1505 } 1506 /* Ok, so it's not a block in the top frame. Perhaps it's a block 1507 in some calling frame? Consult this thread's stack-block 1508 interval tree to find out. */ 1509 { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea ); 1510 /* We know that [ea,ea+1) is in the block, but we need to 1511 restrict to the case where the whole access falls within 1512 it. */ 1513 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) { 1514 nd = NULL; 1515 } 1516 if (nd) { 1517 /* found it */ 1518 inv->tag = Inv_StackN; 1519 inv->Inv.StackN.nd = nd; 1520 stats__classify_StackN++; 1521 goto out; 1522 } 1523 } 1524 /* Not in a stack block. Try the global pool. */ 1525 { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea); 1526 /* We know that [ea,ea+1) is in the block, but we need to 1527 restrict to the case where the whole access falls within 1528 it. */ 1529 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) { 1530 nd = NULL; 1531 } 1532 if (nd) { 1533 /* found it */ 1534 inv->tag = Inv_Global; 1535 inv->Inv.Global.nd = nd; 1536 stats__classify_Global++; 1537 goto out; 1538 } 1539 } 1540 /* No idea - give up. */ 1541 inv->tag = Inv_Unknown; 1542 stats__classify_Unknown++; 1543 1544 /* Update the cache */ 1545 out: 1546 { Addr toadd_addr = 0; 1547 SizeT toadd_szB = 0; 1548 QCache* cache = &qcaches[tid]; 1549 1550 static UWord ctr = 0; 1551 Bool show = False; 1552 if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True; 1553 1554 if (show) QCache__pp(cache, "before upd"); 1555 1556 switch (inv->tag) { 1557 case Inv_Global: 1558 toadd_addr = inv->Inv.Global.nd->addr; 1559 toadd_szB = inv->Inv.Global.nd->szB; 1560 break; 1561 case Inv_StackN: 1562 toadd_addr = inv->Inv.StackN.nd->addr; 1563 toadd_szB = inv->Inv.StackN.nd->szB; 1564 break; 1565 case Inv_Unknown: { 1566 /* This is more complex. We need to figure out the 1567 intersection of the "holes" in the global and stack 1568 interval trees into which [ea,ea+szB) falls. This is 1569 further complicated by the fact that [ea,ea+szB) might 1570 not fall cleanly into a hole; it may instead fall across 1571 the boundary of a stack or global block. In that case 1572 we just ignore it and don't update the cache, since we 1573 have no way to represent this situation precisely. */ 1574 StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB; 1575 GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB; 1576 Addr gMin, gMax, sMin, sMax, uMin, uMax; 1577 Bool sOK, gOK; 1578 sNegInf.addr = 0; 1579 sNegInf.szB = 1; 1580 sPosInf.addr = ~(UWord)0; 1581 sPosInf.szB = 1; 1582 gNegInf.addr = 0; 1583 gNegInf.szB = 1; 1584 gPosInf.addr = ~(UWord)0; 1585 gPosInf.szB = 1; 1586 sKey.addr = ea; 1587 sKey.szB = szB; 1588 gKey.addr = ea; 1589 gKey.szB = szB; 1590 if (0) VG_(printf)("Tree sizes %ld %ld\n", 1591 VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree)); 1592 sOK = VG_(findBoundsFM)( siTrees[tid], 1593 (UWord*)&sLB, NULL/*unused*/, 1594 (UWord*)&sUB, NULL/*unused*/, 1595 (UWord)&sNegInf, 0/*unused*/, 1596 (UWord)&sPosInf, 0/*unused*/, 1597 (UWord)&sKey ); 1598 gOK = VG_(findBoundsFM)( giTree, 1599 (UWord*)&gLB, NULL/*unused*/, 1600 (UWord*)&gUB, NULL/*unused*/, 1601 (UWord)&gNegInf, 0/*unused*/, 1602 (UWord)&gPosInf, 0/*unused*/, 1603 (UWord)&gKey ); 1604 if (!(sOK && gOK)) { 1605 /* If this happens, then [ea,ea+szB) partially overlaps 1606 a heap or stack block. We can't represent that, so 1607 just forget it (should be very rare). However, do 1608 maximum sanity checks first. In such a 1609 partial overlap case, it can't be the case that both 1610 [ea] and [ea+szB-1] overlap the same block, since if 1611 that were indeed the case then it wouldn't be a 1612 partial overlap; rather it would simply fall inside 1613 that block entirely and we shouldn't be inside this 1614 conditional at all. */ 1615 if (!sOK) { 1616 StackTreeNode *ndFirst, *ndLast; 1617 ndFirst = find_StackTreeNode( siTrees[tid], ea ); 1618 ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 ); 1619 /* if both ends of the range fall inside a block, 1620 they can't be in the same block. */ 1621 if (ndFirst && ndLast) 1622 tl_assert(ndFirst != ndLast); 1623 /* for each end of the range, if it is in a block, 1624 the range as a whole can't be entirely within the 1625 block. */ 1626 if (ndFirst) 1627 tl_assert(!is_subinterval_of(ndFirst->addr, 1628 ndFirst->szB, ea, szB)); 1629 if (ndLast) 1630 tl_assert(!is_subinterval_of(ndLast->addr, 1631 ndLast->szB, ea, szB)); 1632 } 1633 if (!gOK) { 1634 GlobalTreeNode *ndFirst, *ndLast; 1635 ndFirst = find_GlobalTreeNode( giTree, ea ); 1636 ndLast = find_GlobalTreeNode( giTree, ea+szB-1 ); 1637 /* if both ends of the range fall inside a block, 1638 they can't be in the same block. */ 1639 if (ndFirst && ndLast) 1640 tl_assert(ndFirst != ndLast); 1641 /* for each end of the range, if it is in a block, 1642 the range as a whole can't be entirely within the 1643 block. */ 1644 if (ndFirst) 1645 tl_assert(!is_subinterval_of(ndFirst->addr, 1646 ndFirst->szB, ea, szB)); 1647 if (ndLast) 1648 tl_assert(!is_subinterval_of(ndLast->addr, 1649 ndLast->szB, ea, szB)); 1650 } 1651 if (0) VG_(printf)("overlapping blocks in cache\n"); 1652 return; 1653 } 1654 sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB); 1655 sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1); 1656 gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB); 1657 gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1); 1658 if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n", 1659 sMin, sMax, gMin, gMax); 1660 /* [sMin,sMax] and [gMin,gMax] must both contain 1661 [ea,ea+szB) (right?) That implies they must overlap at 1662 at least over [ea,ea+szB). */ 1663 tl_assert(sMin <= ea && ea+szB-1 <= sMax); 1664 tl_assert(gMin <= ea && ea+szB-1 <= gMax); 1665 /* So now compute their intersection. */ 1666 uMin = Addr__max( sMin, gMin ); 1667 uMax = Addr__min( sMax, gMax ); 1668 if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax); 1669 tl_assert(uMin <= uMax); 1670 tl_assert(uMin <= ea && ea+szB-1 <= uMax); 1671 /* Finally, we can park [uMin,uMax] in the cache. However, 1672 if uMax is ~0, we can't represent the difference; hence 1673 fudge uMax. */ 1674 if (uMin < uMax && uMax == ~(UWord)0) 1675 uMax--; 1676 toadd_addr = uMin; 1677 toadd_szB = uMax - uMin + 1; 1678 break; 1679 } 1680 default: 1681 /* We should only be caching info for the above 3 cases */ 1682 tl_assert(0); 1683 } /* switch (inv->tag) */ 1684 1685 { /* and actually add this to the cache, finally */ 1686 Word i; 1687 Word ip = cache->nInUse / 2; /* doesn't seem critical */ 1688 1689 if (cache->nInUse < N_QCACHE) 1690 cache->nInUse++; 1691 for (i = cache->nInUse-1; i > ip; i--) { 1692 cache->elems[i] = cache->elems[i-1]; 1693 } 1694 1695 tl_assert(toadd_szB > 0); 1696 cache->elems[ip].addr = toadd_addr; 1697 cache->elems[ip].szB = toadd_szB; 1698 cache->elems[ip].inv = *inv; 1699 } 1700 1701 if (show) QCache__pp(cache, "after upd"); 1702 1703 } 1704 } 1705 1706 1707 /* CALLED FROM GENERATED CODE */ 1708 static 1709 VG_REGPARM(3) 1710 void helperc__mem_access ( /* Known only at run time: */ 1711 Addr ea, Addr sp, Addr fp, 1712 /* Known at translation time: */ 1713 Word sszB, Addr ip, XArray* ip_frameBlocks ) 1714 { 1715 UWord szB; 1716 IInstance* iinstance; 1717 Invar* inv; 1718 Invar new_inv; 1719 ThreadId tid = VG_(get_running_tid)(); 1720 StackFrame* frame; 1721 HChar bufE[128], bufA[128]; 1722 1723 stats__total_accesses++; 1724 1725 tl_assert(is_sane_TId(tid)); 1726 frame = shadowStacks[tid]; 1727 tl_assert(frame); 1728 1729 /* Find the instance info for this instruction. */ 1730 tl_assert(ip_frameBlocks); 1731 iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks ); 1732 tl_assert(iinstance); 1733 tl_assert(iinstance->blocks == ip_frameBlocks); 1734 1735 szB = (sszB < 0) ? (-sszB) : sszB; 1736 tl_assert(szB > 0); 1737 1738 inv = &iinstance->invar; 1739 1740 /* Deal with first uses of instruction instances. */ 1741 if (inv->tag == Inv_Unset) { 1742 /* This is the first use of this instance of the instruction, so 1743 we can't make any check; we merely record what we saw, so we 1744 can compare it against what happens for 2nd and subsequent 1745 accesses. */ 1746 classify_address( inv, 1747 tid, ea, sp, fp, szB, iinstance->blocks ); 1748 tl_assert(inv->tag != Inv_Unset); 1749 return; 1750 } 1751 1752 /* So generate an Invar and see if it's different from what 1753 we had before. */ 1754 classify_address( &new_inv, 1755 tid, ea, sp, fp, szB, iinstance->blocks ); 1756 tl_assert(new_inv.tag != Inv_Unset); 1757 1758 /* Did we see something different from before? If no, then there's 1759 no error. */ 1760 if (eq_Invar(&new_inv, inv)) 1761 return; 1762 1763 tl_assert(inv->tag != Inv_Unset); 1764 1765 VG_(memset)(bufE, 0, sizeof(bufE)); 1766 show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth ); 1767 1768 VG_(memset)(bufA, 0, sizeof(bufA)); 1769 show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth ); 1770 1771 sg_record_error_SorG( tid, ea, sszB, bufE, bufA ); 1772 1773 /* And now install the new observation as "standard", so as to 1774 make future error messages make more sense. */ 1775 *inv = new_inv; 1776 } 1777 1778 1779 //////////////////////////////////////// 1780 /* Primary push-a-new-frame routine. Called indirectly from 1781 generated code. */ 1782 1783 static UWord stats__max_sitree_size = 0; 1784 static UWord stats__max_gitree_size = 0; 1785 1786 static 1787 void shadowStack_new_frame ( ThreadId tid, 1788 Addr sp_at_call_insn, 1789 Addr sp_post_call_insn, 1790 Addr fp_at_call_insn, 1791 Addr ip_post_call_insn, 1792 XArray* descrs_at_call_insn ) 1793 { 1794 StackFrame *callee, *caller; 1795 tl_assert(is_sane_TId(tid)); 1796 1797 caller = shadowStacks[tid]; 1798 tl_assert(caller); 1799 1800 if (caller->outer) { /* "this is not the outermost frame" */ 1801 tl_assert(caller->outer->inner == caller); 1802 tl_assert(caller->outer->depth >= 0); 1803 tl_assert(1 + caller->outer->depth == caller->depth); 1804 } else { 1805 tl_assert(caller->depth == 0); 1806 } 1807 1808 caller->sp_at_call = sp_at_call_insn; 1809 caller->fp_at_call = fp_at_call_insn; 1810 1811 if (descrs_at_call_insn) { 1812 tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 ); 1813 caller->blocks_added_by_call 1814 = calculate_StackBlock_EAs( descrs_at_call_insn, 1815 sp_at_call_insn, fp_at_call_insn ); 1816 if (caller->blocks_added_by_call) 1817 add_blocks_to_StackTree( siTrees[tid], 1818 descrs_at_call_insn, 1819 caller->blocks_added_by_call, 1820 caller->depth /* stack depth at which 1821 these blocks are 1822 considered to exist*/ ); 1823 if (1) { 1824 UWord s = VG_(sizeFM)( siTrees[tid] ); 1825 UWord g = VG_(sizeFM)( giTree ); 1826 Bool sb = s > stats__max_sitree_size; 1827 Bool gb = g > stats__max_gitree_size; 1828 if (sb) stats__max_sitree_size = s; 1829 if (gb) stats__max_gitree_size = g; 1830 if (0 && (sb || gb)) 1831 VG_(message)(Vg_DebugMsg, 1832 "exp-sgcheck: new max tree sizes: " 1833 "StackTree %ld, GlobalTree %ld\n", 1834 stats__max_sitree_size, stats__max_gitree_size ); 1835 } 1836 } else { 1837 caller->blocks_added_by_call = NULL; 1838 } 1839 1840 /* caller->blocks_added_by_call is used again (and then freed) when 1841 this frame is removed from the stack. */ 1842 1843 if (caller->inner) { 1844 callee = caller->inner; 1845 } else { 1846 callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame)); 1847 VG_(memset)(callee, 0, sizeof(StackFrame)); 1848 callee->outer = caller; 1849 caller->inner = callee; 1850 callee->depth = 1 + caller->depth; 1851 tl_assert(callee->inner == NULL); 1852 } 1853 1854 /* This sets up .htab, .htab_size and .htab_used */ 1855 initialise_II_hash_table( callee ); 1856 1857 callee->creation_sp = sp_post_call_insn; 1858 callee->sp_at_call = 0; // not actually required .. 1859 callee->fp_at_call = 0; // .. these 3 initialisations are .. 1860 callee->blocks_added_by_call = NULL; // .. just for cleanness 1861 1862 /* record the new running stack frame */ 1863 shadowStacks[tid] = callee; 1864 1865 /* and this thread's query cache is now invalid */ 1866 QCache__invalidate( &qcaches[tid] ); 1867 1868 if (0) 1869 { Word d = callee->depth; 1870 HChar fnname[80]; 1871 Bool ok; 1872 Addr ip = ip_post_call_insn; 1873 ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) ); 1874 while (d > 0) { 1875 VG_(printf)(" "); 1876 d--; 1877 } 1878 VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip); 1879 } 1880 } 1881 1882 /* CALLED FROM GENERATED CODE */ 1883 static 1884 VG_REGPARM(3) 1885 void helperc__new_frame ( Addr sp_post_call_insn, 1886 Addr fp_at_call_insn, 1887 Addr ip_post_call_insn, 1888 XArray* blocks_at_call_insn, 1889 Word sp_adjust ) 1890 { 1891 ThreadId tid = VG_(get_running_tid)(); 1892 Addr sp_at_call_insn = sp_post_call_insn + sp_adjust; 1893 shadowStack_new_frame( tid, 1894 sp_at_call_insn, 1895 sp_post_call_insn, 1896 fp_at_call_insn, 1897 ip_post_call_insn, 1898 blocks_at_call_insn ); 1899 } 1900 1901 1902 //////////////////////////////////////// 1903 /* Primary remove-frame(s) routine. Called indirectly from 1904 generated code. */ 1905 1906 __attribute__((noinline)) 1907 static void shadowStack_unwind ( ThreadId tid, Addr sp_now ) 1908 { 1909 StackFrame *innermost, *innermostOrig; 1910 tl_assert(is_sane_TId(tid)); 1911 innermost = shadowStacks[tid]; 1912 tl_assert(innermost); 1913 innermostOrig = innermost; 1914 //VG_(printf)("UNWIND sp_new = %p\n", sp_now); 1915 while (1) { 1916 if (!innermost->outer) 1917 break; 1918 if (innermost->inner) 1919 tl_assert(innermost->inner->outer == innermost); 1920 tl_assert(innermost->outer->inner == innermost); 1921 tl_assert(innermost->blocks_added_by_call == NULL); 1922 if (sp_now <= innermost->creation_sp) break; 1923 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp); 1924 tl_assert(innermost->htab); 1925 if (innermost->htab != &innermost->htab_fixed[0]) 1926 sg_free(innermost->htab); 1927 /* be on the safe side */ 1928 innermost->creation_sp = 0; 1929 innermost->htab = NULL; 1930 innermost->htab_size = 0; 1931 innermost->htab_used = 0; 1932 innermost->sp_at_call = 0; 1933 innermost->fp_at_call = 0; 1934 innermost->blocks_added_by_call = NULL; 1935 innermost = innermost->outer; 1936 1937 /* So now we're "back" in the calling frame. Remove from this 1938 thread's stack-interval-tree, the blocks added at the time of 1939 the call. */ 1940 1941 if (innermost->outer) { /* not at the outermost frame */ 1942 if (innermost->blocks_added_by_call == NULL) { 1943 } else { 1944 del_blocks_from_StackTree( siTrees[tid], 1945 innermost->blocks_added_by_call ); 1946 VG_(deleteXA)( innermost->blocks_added_by_call ); 1947 innermost->blocks_added_by_call = NULL; 1948 } 1949 } 1950 /* That completes the required tidying of the interval tree 1951 associated with the frame we just removed. */ 1952 1953 if (0) { 1954 Word d = innermost->depth; 1955 while (d > 0) { 1956 VG_(printf)(" "); 1957 d--; 1958 } 1959 VG_(printf)("X\n"); 1960 } 1961 1962 } 1963 1964 tl_assert(innermost); 1965 1966 if (innermost != innermostOrig) { 1967 shadowStacks[tid] = innermost; 1968 /* this thread's query cache is now invalid */ 1969 QCache__invalidate( &qcaches[tid] ); 1970 } 1971 } 1972 1973 1974 1975 ////////////////////////////////////////////////////////////// 1976 // // 1977 // Instrumentation // 1978 // // 1979 ////////////////////////////////////////////////////////////// 1980 1981 /* What does instrumentation need to do? 1982 1983 - at each Call transfer, generate a call to shadowStack_new_frame 1984 do this by manually inspecting the IR 1985 1986 - at each sp change, if the sp change is negative, 1987 call shadowStack_unwind 1988 do this by asking for SP-change analysis 1989 1990 - for each memory referencing instruction, 1991 call helperc__mem_access 1992 */ 1993 1994 /* A complication: sg_ instrumentation and h_ instrumentation need to 1995 be interleaved. Since the latter is a lot more complex than the 1996 former, we split the sg_ instrumentation here into four functions 1997 and let the h_ instrumenter call the four functions as it goes. 1998 Hence the h_ instrumenter drives the sg_ instrumenter. 1999 2000 To make this viable, the sg_ instrumenter carries what running 2001 state it needs in 'struct _SGEnv'. This is exported only 2002 abstractly from this file. 2003 */ 2004 2005 struct _SGEnv { 2006 /* the current insn's IP */ 2007 Addr64 curr_IP; 2008 /* whether the above is actually known */ 2009 Bool curr_IP_known; 2010 /* if we find a mem ref, is it the first for this insn? Used for 2011 detecting insns which make more than one memory ref, a situation 2012 we basically can't really handle properly; and so we ignore all 2013 but the first ref. */ 2014 Bool firstRef; 2015 /* READONLY */ 2016 IRTemp (*newIRTemp_cb)(IRType,void*); 2017 void* newIRTemp_opaque; 2018 }; 2019 2020 2021 /* --- Helper fns for instrumentation --- */ 2022 2023 static IRTemp gen_Get_SP ( struct _SGEnv* sge, 2024 IRSB* bbOut, 2025 VexGuestLayout* layout, 2026 Int hWordTy_szB ) 2027 { 2028 IRExpr* sp_expr; 2029 IRTemp sp_temp; 2030 IRType sp_type; 2031 /* This in effect forces the host and guest word sizes to be the 2032 same. */ 2033 tl_assert(hWordTy_szB == layout->sizeof_SP); 2034 sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32; 2035 sp_expr = IRExpr_Get( layout->offset_SP, sp_type ); 2036 sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque ); 2037 addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) ); 2038 return sp_temp; 2039 } 2040 2041 static IRTemp gen_Get_FP ( struct _SGEnv* sge, 2042 IRSB* bbOut, 2043 VexGuestLayout* layout, 2044 Int hWordTy_szB ) 2045 { 2046 IRExpr* fp_expr; 2047 IRTemp fp_temp; 2048 IRType fp_type; 2049 /* This in effect forces the host and guest word sizes to be the 2050 same. */ 2051 tl_assert(hWordTy_szB == layout->sizeof_SP); 2052 fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32; 2053 fp_expr = IRExpr_Get( layout->offset_FP, fp_type ); 2054 fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque ); 2055 addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) ); 2056 return fp_temp; 2057 } 2058 2059 static void instrument_mem_access ( struct _SGEnv* sge, 2060 IRSB* bbOut, 2061 IRExpr* addr, 2062 Int szB, 2063 Bool isStore, 2064 Int hWordTy_szB, 2065 Addr curr_IP, 2066 VexGuestLayout* layout ) 2067 { 2068 IRType tyAddr = Ity_INVALID; 2069 XArray* frameBlocks = NULL; 2070 2071 tl_assert(isIRAtom(addr)); 2072 tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8); 2073 2074 tyAddr = typeOfIRExpr( bbOut->tyenv, addr ); 2075 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 2076 2077 #if defined(VGA_x86) 2078 { UChar* p = (UChar*)curr_IP; 2079 // pop %ebp; RET 2080 if (p[-1] == 0x5d && p[0] == 0xc3) return; 2081 // pop %ebp; RET $imm16 2082 if (p[-1] == 0x5d && p[0] == 0xc2) return; 2083 // PUSH %EBP; mov %esp,%ebp 2084 if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return; 2085 } 2086 #endif 2087 2088 /* First off, find or create the StackBlocks for this instruction. */ 2089 frameBlocks = get_StackBlocks_for_IP( curr_IP ); 2090 tl_assert(frameBlocks); 2091 //if (VG_(sizeXA)( frameBlocks ) == 0) 2092 // frameBlocks = NULL; 2093 2094 /* Generate a call to "helperc__mem_access", passing: 2095 addr current_SP current_FP szB curr_IP frameBlocks 2096 */ 2097 { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB ); 2098 IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB ); 2099 IRExpr** args 2100 = mkIRExprVec_6( addr, 2101 IRExpr_RdTmp(t_SP), 2102 IRExpr_RdTmp(t_FP), 2103 mkIRExpr_HWord( isStore ? (-szB) : szB ), 2104 mkIRExpr_HWord( curr_IP ), 2105 mkIRExpr_HWord( (HWord)frameBlocks ) ); 2106 IRDirty* di 2107 = unsafeIRDirty_0_N( 3/*regparms*/, 2108 "helperc__mem_access", 2109 VG_(fnptr_to_fnentry)( &helperc__mem_access ), 2110 args ); 2111 2112 addStmtToIRSB( bbOut, IRStmt_Dirty(di) ); 2113 } 2114 } 2115 2116 2117 /* --- Instrumentation main (4 fns) --- */ 2118 2119 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*), 2120 void* newIRTemp_opaque ) 2121 { 2122 struct _SGEnv * env = sg_malloc("di.sg_main.sii.1", 2123 sizeof(struct _SGEnv)); 2124 tl_assert(env); 2125 env->curr_IP = 0; 2126 env->curr_IP_known = False; 2127 env->firstRef = True; 2128 env->newIRTemp_cb = newIRTemp_cb; 2129 env->newIRTemp_opaque = newIRTemp_opaque; 2130 return env; 2131 } 2132 2133 void sg_instrument_fini ( struct _SGEnv * env ) 2134 { 2135 sg_free(env); 2136 } 2137 2138 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env' 2139 as required. This must be called before 'st' itself is added to 2140 'sbOut'. */ 2141 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env, 2142 /*MOD*/IRSB* sbOut, 2143 IRStmt* st, 2144 VexGuestLayout* layout, 2145 IRType gWordTy, IRType hWordTy ) 2146 { 2147 if (!sg_clo_enable_sg_checks) 2148 return; 2149 2150 tl_assert(st); 2151 tl_assert(isFlatIRStmt(st)); 2152 switch (st->tag) { 2153 case Ist_NoOp: 2154 case Ist_AbiHint: 2155 case Ist_Put: 2156 case Ist_PutI: 2157 case Ist_MBE: 2158 /* None of these can contain any memory references. */ 2159 break; 2160 2161 case Ist_Exit: 2162 tl_assert(st->Ist.Exit.jk != Ijk_Call); 2163 /* else we must deal with a conditional call */ 2164 break; 2165 2166 case Ist_IMark: 2167 env->curr_IP_known = True; 2168 env->curr_IP = (Addr)st->Ist.IMark.addr; 2169 env->firstRef = True; 2170 break; 2171 2172 case Ist_Store: 2173 tl_assert(env->curr_IP_known); 2174 if (env->firstRef) { 2175 instrument_mem_access( 2176 env, sbOut, 2177 st->Ist.Store.addr, 2178 sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)), 2179 True/*isStore*/, 2180 sizeofIRType(hWordTy), 2181 env->curr_IP, layout 2182 ); 2183 env->firstRef = False; 2184 } 2185 break; 2186 2187 case Ist_WrTmp: { 2188 IRExpr* data = st->Ist.WrTmp.data; 2189 if (data->tag == Iex_Load) { 2190 tl_assert(env->curr_IP_known); 2191 if (env->firstRef) { 2192 instrument_mem_access( 2193 env, sbOut, 2194 data->Iex.Load.addr, 2195 sizeofIRType(data->Iex.Load.ty), 2196 False/*!isStore*/, 2197 sizeofIRType(hWordTy), 2198 env->curr_IP, layout 2199 ); 2200 env->firstRef = False; 2201 } 2202 } 2203 break; 2204 } 2205 2206 case Ist_Dirty: { 2207 Int dataSize; 2208 IRDirty* d = st->Ist.Dirty.details; 2209 if (d->mFx != Ifx_None) { 2210 /* This dirty helper accesses memory. Collect the 2211 details. */ 2212 tl_assert(env->curr_IP_known); 2213 if (env->firstRef) { 2214 tl_assert(d->mAddr != NULL); 2215 tl_assert(d->mSize != 0); 2216 dataSize = d->mSize; 2217 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 2218 instrument_mem_access( 2219 env, sbOut, d->mAddr, dataSize, False/*!isStore*/, 2220 sizeofIRType(hWordTy), env->curr_IP, layout 2221 ); 2222 } 2223 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 2224 instrument_mem_access( 2225 env, sbOut, d->mAddr, dataSize, True/*isStore*/, 2226 sizeofIRType(hWordTy), env->curr_IP, layout 2227 ); 2228 } 2229 env->firstRef = False; 2230 } 2231 } else { 2232 tl_assert(d->mAddr == NULL); 2233 tl_assert(d->mSize == 0); 2234 } 2235 break; 2236 } 2237 2238 case Ist_CAS: { 2239 /* We treat it as a read and a write of the location. I 2240 think that is the same behaviour as it was before IRCAS 2241 was introduced, since prior to that point, the Vex front 2242 ends would translate a lock-prefixed instruction into a 2243 (normal) read followed by a (normal) write. */ 2244 if (env->firstRef) { 2245 Int dataSize; 2246 IRCAS* cas = st->Ist.CAS.details; 2247 tl_assert(cas->addr != NULL); 2248 tl_assert(cas->dataLo != NULL); 2249 dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo)); 2250 if (cas->dataHi != NULL) 2251 dataSize *= 2; /* since it's a doubleword-CAS */ 2252 instrument_mem_access( 2253 env, sbOut, cas->addr, dataSize, False/*!isStore*/, 2254 sizeofIRType(hWordTy), env->curr_IP, layout 2255 ); 2256 instrument_mem_access( 2257 env, sbOut, cas->addr, dataSize, True/*isStore*/, 2258 sizeofIRType(hWordTy), env->curr_IP, layout 2259 ); 2260 env->firstRef = False; 2261 } 2262 break; 2263 } 2264 2265 default: 2266 tl_assert(0); 2267 2268 } /* switch (st->tag) */ 2269 } 2270 2271 2272 /* Add instrumentation for the final jump of an IRSB 'sbOut', and 2273 possibly modify 'env' as required. This must be the last 2274 instrumentation statement in the block. */ 2275 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env, 2276 /*MOD*/IRSB* sbOut, 2277 IRExpr* next, 2278 IRJumpKind jumpkind, 2279 VexGuestLayout* layout, 2280 IRType gWordTy, IRType hWordTy ) 2281 { 2282 if (!sg_clo_enable_sg_checks) 2283 return; 2284 2285 if (jumpkind == Ijk_Call) { 2286 // Assumes x86 or amd64 2287 IRTemp sp_post_call_insn, fp_post_call_insn; 2288 XArray* frameBlocks; 2289 IRExpr** args; 2290 IRDirty* di; 2291 sp_post_call_insn 2292 = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) ); 2293 fp_post_call_insn 2294 = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) ); 2295 tl_assert(env->curr_IP_known); 2296 frameBlocks = get_StackBlocks_for_IP( env->curr_IP ); 2297 tl_assert(frameBlocks); 2298 if (VG_(sizeXA)(frameBlocks) == 0) 2299 frameBlocks = NULL; 2300 args 2301 = mkIRExprVec_5( 2302 IRExpr_RdTmp(sp_post_call_insn), 2303 IRExpr_RdTmp(fp_post_call_insn), 2304 /* assume the call doesn't change FP */ 2305 next, 2306 mkIRExpr_HWord( (HWord)frameBlocks ), 2307 mkIRExpr_HWord( sizeofIRType(gWordTy) ) 2308 ); 2309 di = unsafeIRDirty_0_N( 2310 3/*regparms*/, 2311 "helperc__new_frame", 2312 VG_(fnptr_to_fnentry)( &helperc__new_frame ), 2313 args ); 2314 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 2315 } 2316 } 2317 2318 2319 ////////////////////////////////////////////////////////////// 2320 // // 2321 // end Instrumentation // 2322 // // 2323 ////////////////////////////////////////////////////////////// 2324 2325 2326 ////////////////////////////////////////////////////////////// 2327 // // 2328 // misc // 2329 // // 2330 ////////////////////////////////////////////////////////////// 2331 2332 /* Make a new empty stack frame that is suitable for being the 2333 outermost frame in a stack. It has a creation_sp of effectively 2334 infinity, so it can never be removed. */ 2335 static StackFrame* new_root_StackFrame ( void ) 2336 { 2337 StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame)); 2338 VG_(memset)( sframe, 0, sizeof(*sframe) ); 2339 sframe->creation_sp = ~0UL; 2340 2341 /* This sets up .htab, .htab_size and .htab_used */ 2342 initialise_II_hash_table( sframe ); 2343 2344 /* ->depth, ->outer, ->inner are 0, NULL, NULL */ 2345 2346 return sframe; 2347 } 2348 2349 /* Primary routine for setting up the shadow stack for a new thread. 2350 Note that this is used to create not only child thread stacks, but 2351 the root thread's stack too. We create a new stack with 2352 .creation_sp set to infinity, so that the outermost frame can never 2353 be removed (by shadowStack_unwind). The core calls this function 2354 as soon as a thread is created. We cannot yet get its SP value, 2355 since that may not yet be set. */ 2356 static void shadowStack_thread_create ( ThreadId parent, ThreadId child ) 2357 { 2358 tl_assert(is_sane_TId(child)); 2359 if (parent == VG_INVALID_THREADID) { 2360 /* creating the main thread's stack */ 2361 } else { 2362 tl_assert(is_sane_TId(parent)); 2363 tl_assert(parent != child); 2364 tl_assert(shadowStacks[parent] != NULL); 2365 tl_assert(siTrees[parent] != NULL); 2366 } 2367 2368 /* Create the child's stack. Bear in mind we may be re-using 2369 it. */ 2370 if (shadowStacks[child] == NULL) { 2371 /* First use of this stack. Just allocate an initial frame. */ 2372 tl_assert(siTrees[child] == NULL); 2373 } else { 2374 StackFrame *frame, *frame2; 2375 /* re-using a stack. */ 2376 /* get rid of the interval tree */ 2377 tl_assert(siTrees[child] != NULL); 2378 delete_StackTree( siTrees[child] ); 2379 siTrees[child] = NULL; 2380 /* Throw away all existing frames. */ 2381 frame = shadowStacks[child]; 2382 while (frame->outer) 2383 frame = frame->outer; 2384 tl_assert(frame->depth == 0); 2385 while (frame) { 2386 frame2 = frame->inner; 2387 if (frame2) tl_assert(1 + frame->depth == frame2->depth); 2388 sg_free(frame); 2389 frame = frame2; 2390 } 2391 shadowStacks[child] = NULL; 2392 } 2393 2394 tl_assert(shadowStacks[child] == NULL); 2395 tl_assert(siTrees[child] == NULL); 2396 2397 /* Set up the initial stack frame. */ 2398 shadowStacks[child] = new_root_StackFrame(); 2399 2400 /* and set up the child's stack block interval tree. */ 2401 siTrees[child] = new_StackTree(); 2402 } 2403 2404 /* Once a thread is ready to go, the core calls here. We take the 2405 opportunity to push a second frame on its stack, with the 2406 presumably valid SP value that is going to be used for the thread's 2407 startup. Hence we should always wind up with a valid outermost 2408 frame for the thread. */ 2409 static void shadowStack_set_initial_SP ( ThreadId tid ) 2410 { 2411 StackFrame* sf; 2412 tl_assert(is_sane_TId(tid)); 2413 sf = shadowStacks[tid]; 2414 tl_assert(sf != NULL); 2415 tl_assert(sf->outer == NULL); 2416 tl_assert(sf->inner == NULL); 2417 tl_assert(sf->creation_sp == ~0UL); 2418 shadowStack_new_frame( tid, 0, VG_(get_SP)(tid), 2419 0, VG_(get_IP)(tid), NULL ); 2420 } 2421 2422 2423 ////////////////////////////////////////////////////////////// 2424 // // 2425 // main-ish // 2426 // // 2427 ////////////////////////////////////////////////////////////// 2428 2429 /* CALLED indirectly FROM GENERATED CODE. Calls here are created by 2430 sp-change analysis, as requested in pc_pre_clo_int(). */ 2431 void sg_die_mem_stack ( Addr old_SP, SizeT len ) { 2432 ThreadId tid = VG_(get_running_tid)(); 2433 shadowStack_unwind( tid, old_SP+len ); 2434 } 2435 2436 void sg_pre_clo_init ( void ) { 2437 ourGlobals_init(); 2438 init_StackBlocks_set(); 2439 init_GlobalBlock_set(); 2440 } 2441 2442 void sg_post_clo_init ( void ) { 2443 } 2444 2445 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) { 2446 shadowStack_thread_create(parent, child); 2447 } 2448 2449 void sg_pre_thread_first_insn ( ThreadId tid ) { 2450 shadowStack_set_initial_SP(tid); 2451 } 2452 2453 void sg_fini(Int exitcode) 2454 { 2455 if (VG_(clo_stats)) { 2456 VG_(message)(Vg_DebugMsg, 2457 " sg_: %'llu total accesses, of which:\n", stats__total_accesses); 2458 VG_(message)(Vg_DebugMsg, 2459 " sg_: stack0: %'12llu classify\n", 2460 stats__classify_Stack0); 2461 VG_(message)(Vg_DebugMsg, 2462 " sg_: stackN: %'12llu classify\n", 2463 stats__classify_StackN); 2464 VG_(message)(Vg_DebugMsg, 2465 " sg_: global: %'12llu classify\n", 2466 stats__classify_Global); 2467 VG_(message)(Vg_DebugMsg, 2468 " sg_: unknown: %'12llu classify\n", 2469 stats__classify_Unknown); 2470 VG_(message)(Vg_DebugMsg, 2471 " sg_: %'llu Invars preened, of which %'llu changed\n", 2472 stats__Invars_preened, stats__Invars_changed); 2473 VG_(message)(Vg_DebugMsg, 2474 " sg_: t_i_b_MT: %'12llu\n", stats__t_i_b_empty); 2475 VG_(message)(Vg_DebugMsg, 2476 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses\n", 2477 stats__qcache_queries, stats__qcache_probes, stats__qcache_misses); 2478 VG_(message)(Vg_DebugMsg, 2479 " sg_: htab-fast: %'llu hits\n", 2480 stats__htab_fast); 2481 VG_(message)(Vg_DebugMsg, 2482 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes\n", 2483 stats__htab_searches, stats__htab_probes, stats__htab_resizes); 2484 } 2485 } 2486 2487 2488 /*--------------------------------------------------------------------*/ 2489 /*--- end sg_main.c ---*/ 2490 /*--------------------------------------------------------------------*/ 2491