1 2 /*--------------------------------------------------------------------*/ 3 /*--- LibHB: a library for implementing and checking ---*/ 4 /*--- the happens-before relationship in concurrent programs. ---*/ 5 /*--- libhb_main.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of LibHB, a library for implementing and checking 10 the happens-before relationship in concurrent programs. 11 12 Copyright (C) 2008-2013 OpenWorks Ltd 13 info (at) open-works.co.uk 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 #include "pub_tool_basics.h" 34 #include "pub_tool_poolalloc.h" 35 #include "pub_tool_libcassert.h" 36 #include "pub_tool_libcbase.h" 37 #include "pub_tool_libcprint.h" 38 #include "pub_tool_mallocfree.h" 39 #include "pub_tool_wordfm.h" 40 #include "pub_tool_sparsewa.h" 41 #include "pub_tool_xarray.h" 42 #include "pub_tool_oset.h" 43 #include "pub_tool_threadstate.h" 44 #include "pub_tool_aspacemgr.h" 45 #include "pub_tool_execontext.h" 46 #include "pub_tool_errormgr.h" 47 #include "pub_tool_options.h" // VG_(clo_stats) 48 #include "hg_basics.h" 49 #include "hg_wordset.h" 50 #include "hg_lock_n_thread.h" 51 #include "hg_errors.h" 52 53 #include "libhb.h" 54 55 56 ///////////////////////////////////////////////////////////////// 57 ///////////////////////////////////////////////////////////////// 58 // // 59 // Debugging #defines // 60 // // 61 ///////////////////////////////////////////////////////////////// 62 ///////////////////////////////////////////////////////////////// 63 64 /* Check the sanity of shadow values in the core memory state 65 machine. Change #if 0 to #if 1 to enable this. */ 66 #if 0 67 # define CHECK_MSM 1 68 #else 69 # define CHECK_MSM 0 70 #endif 71 72 73 /* Check sanity (reference counts, etc) in the conflicting access 74 machinery. Change #if 0 to #if 1 to enable this. */ 75 #if 0 76 # define CHECK_CEM 1 77 #else 78 # define CHECK_CEM 0 79 #endif 80 81 82 /* Check sanity in the compressed shadow memory machinery, 83 particularly in its caching innards. Unfortunately there's no 84 almost-zero-cost way to make them selectable at run time. Hence 85 set the #if 0 to #if 1 and rebuild if you want them. */ 86 #if 0 87 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */ 88 # define inline __attribute__((noinline)) 89 /* probably want to ditch -fomit-frame-pointer too */ 90 #else 91 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */ 92 #endif 93 94 95 ///////////////////////////////////////////////////////////////// 96 ///////////////////////////////////////////////////////////////// 97 // // 98 // data decls: VtsID // 99 // // 100 ///////////////////////////////////////////////////////////////// 101 ///////////////////////////////////////////////////////////////// 102 103 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30 104 bits, since they have to be packed into the lowest 30 bits of an 105 SVal. */ 106 typedef UInt VtsID; 107 #define VtsID_INVALID 0xFFFFFFFF 108 109 110 111 ///////////////////////////////////////////////////////////////// 112 ///////////////////////////////////////////////////////////////// 113 // // 114 // data decls: SVal // 115 // // 116 ///////////////////////////////////////////////////////////////// 117 ///////////////////////////////////////////////////////////////// 118 119 typedef ULong SVal; 120 121 /* This value has special significance to the implementation, and callers 122 may not store it in the shadow memory. */ 123 #define SVal_INVALID (3ULL << 62) 124 125 /* This is the default value for shadow memory. Initially the shadow 126 memory contains no accessible areas and so all reads produce this 127 value. TODO: make this caller-defineable. */ 128 #define SVal_NOACCESS (2ULL << 62) 129 130 131 132 ///////////////////////////////////////////////////////////////// 133 ///////////////////////////////////////////////////////////////// 134 // // 135 // data decls: ScalarTS // 136 // // 137 ///////////////////////////////////////////////////////////////// 138 ///////////////////////////////////////////////////////////////// 139 140 /* Scalar Timestamp. We have to store a lot of these, so there is 141 some effort to make them as small as possible. Logically they are 142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target. 143 We pack it into 64 bits by representing the Thr* using a ThrID, a 144 small integer (18 bits), and a 46 bit integer for the timestamp 145 number. The 46/18 split is arbitary, but has the effect that 146 Helgrind can only handle programs that create 2^18 or fewer threads 147 over their entire lifetime, and have no more than 2^46 timestamp 148 ticks (synchronisation operations on the same thread). 149 150 This doesn't seem like much of a limitation. 2^46 ticks is 151 7.06e+13, and if each tick (optimistically) takes the machine 1000 152 cycles to process, then the minimum time to process that many ticks 153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing 154 but VTS ticks, which isn't realistic. 155 156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is 157 32 since a ThrID is a UInt. 29 comes from the fact that 158 'Thr_n_RCEC', which records information about old accesses, packs 159 not only a ThrID but also 2+1 other bits (access size and 160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29. 161 162 NB2: thrid values are issued upwards from 1024, and values less 163 than that aren't valid. This isn't per se necessary (any order 164 will do, so long as they are unique), but it does help ensure they 165 are less likely to get confused with the various other kinds of 166 small-integer thread ids drifting around (eg, TId). See also NB5. 167 168 NB3: this probably also relies on the fact that Thr's are never 169 deallocated -- they exist forever. Hence the 1-1 mapping from 170 Thr's to thrid values (set up in Thr__new) persists forever. 171 172 NB4: temp_max_sized_VTS is allocated at startup and never freed. 173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS) 174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without 175 making the memory use for this go sky-high. With 176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems 177 like an OK tradeoff. If more than 256k threads need to be 178 supported, we could change SCALARTS_N_THRBITS to 20, which would 179 facilitate supporting 1 million threads at the cost of 8MB storage 180 for temp_max_sized_VTS. 181 182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses 183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0 184 must never be a valid ThrID. Given NB2 that's OK. 185 */ 186 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */ 187 188 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS) 189 typedef 190 struct { 191 ThrID thrid : SCALARTS_N_THRBITS; 192 ULong tym : SCALARTS_N_TYMBITS; 193 } 194 ScalarTS; 195 196 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1) 197 198 199 200 ///////////////////////////////////////////////////////////////// 201 ///////////////////////////////////////////////////////////////// 202 // // 203 // data decls: Filter // 204 // // 205 ///////////////////////////////////////////////////////////////// 206 ///////////////////////////////////////////////////////////////// 207 208 // baseline: 5, 9 209 #define FI_LINE_SZB_LOG2 5 210 #define FI_NUM_LINES_LOG2 10 211 212 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2) 213 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2) 214 215 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1)) 216 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK) 217 218 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \ 219 & (Addr)(FI_NUM_LINES-1) ) 220 221 222 /* In the lines, each 8 bytes are treated individually, and are mapped 223 to a UShort. Regardless of endianness of the underlying machine, 224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to 225 the highest address. 226 227 Of each bit pair, the higher numbered bit is set if a R has been 228 seen, so the actual layout is: 229 230 15 14 ... 01 00 231 232 R W for addr+7 ... R W for addr+0 233 234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555. 235 */ 236 237 /* tags are separated from lines. tags are Addrs and are 238 the base address of the line. */ 239 typedef 240 struct { 241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */ 242 } 243 FiLine; 244 245 typedef 246 struct { 247 Addr tags[FI_NUM_LINES]; 248 FiLine lines[FI_NUM_LINES]; 249 } 250 Filter; 251 252 253 254 ///////////////////////////////////////////////////////////////// 255 ///////////////////////////////////////////////////////////////// 256 // // 257 // data decls: Thr, ULong_n_EC // 258 // // 259 ///////////////////////////////////////////////////////////////// 260 ///////////////////////////////////////////////////////////////// 261 262 // Records stacks for H1 history mechanism (DRD-style) 263 typedef 264 struct { ULong ull; ExeContext* ec; } 265 ULong_n_EC; 266 267 268 /* How many of the above records to collect for each thread? Older 269 ones are dumped when we run out of space. 62.5k requires 1MB per 270 thread, since each ULong_n_EC record is 16 bytes long. When more 271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are 272 deleted to make space. Hence in the worst case we will be able to 273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2 274 Kw transitions (segments in this thread). For the current setting 275 that gives a guaranteed stack for at least the last 31.25k 276 segments. */ 277 #define N_KWs_N_STACKs_PER_THREAD 62500 278 279 280 struct _Thr { 281 /* Current VTSs for this thread. They change as we go along. viR 282 is the VTS to be used for reads, viW for writes. Usually they 283 are the same, but can differ when we deal with reader-writer 284 locks. It is always the case that 285 VtsID__cmpLEQ(viW,viR) == True 286 that is, viW must be the same, or lagging behind, viR. */ 287 VtsID viR; 288 VtsID viW; 289 290 /* Is initially False, and is set to True after the thread really 291 has done a low-level exit. When True, we expect to never see 292 any more memory references done by this thread. */ 293 Bool llexit_done; 294 295 /* Is initially False, and is set to True after the thread has been 296 joined with (reaped by some other thread). After this point, we 297 do not expect to see any uses of .viR or .viW, so it is safe to 298 set them to VtsID_INVALID. */ 299 Bool joinedwith_done; 300 301 /* A small integer giving a unique identity to this Thr. See 302 comments on the definition of ScalarTS for details. */ 303 ThrID thrid : SCALARTS_N_THRBITS; 304 305 /* A filter that removes references for which we believe that 306 msmcread/msmcwrite will not change the state, nor report a 307 race. */ 308 Filter* filter; 309 310 /* A pointer back to the top level Thread structure. There is a 311 1-1 mapping between Thread and Thr structures -- each Thr points 312 at its corresponding Thread, and vice versa. Really, Thr and 313 Thread should be merged into a single structure. */ 314 Thread* hgthread; 315 316 /* The ULongs (scalar Kws) in this accumulate in strictly 317 increasing order, without duplicates. This is important because 318 we need to be able to find a given scalar Kw in this array 319 later, by binary search. */ 320 XArray* /* ULong_n_EC */ local_Kws_n_stacks; 321 }; 322 323 324 325 ///////////////////////////////////////////////////////////////// 326 ///////////////////////////////////////////////////////////////// 327 // // 328 // data decls: SO // 329 // // 330 ///////////////////////////////////////////////////////////////// 331 ///////////////////////////////////////////////////////////////// 332 333 // (UInt) `echo "Synchronisation object" | md5sum` 334 #define SO_MAGIC 0x56b3c5b0U 335 336 struct _SO { 337 struct _SO* admin_prev; 338 struct _SO* admin_next; 339 VtsID viR; /* r-clock of sender */ 340 VtsID viW; /* w-clock of sender */ 341 UInt magic; 342 }; 343 344 345 346 ///////////////////////////////////////////////////////////////// 347 ///////////////////////////////////////////////////////////////// 348 // // 349 // Forward declarations // 350 // // 351 ///////////////////////////////////////////////////////////////// 352 ///////////////////////////////////////////////////////////////// 353 354 /* fwds for 355 Globals needed by other parts of the library. These are set 356 once at startup and then never changed. */ 357 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL; 358 static ExeContext* (*main_get_EC)( Thr* ) = NULL; 359 360 /* misc fn and data fwdses */ 361 static void VtsID__rcinc ( VtsID ii ); 362 static void VtsID__rcdec ( VtsID ii ); 363 364 static inline Bool SVal__isC ( SVal s ); 365 static inline VtsID SVal__unC_Rmin ( SVal s ); 366 static inline VtsID SVal__unC_Wmin ( SVal s ); 367 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ); 368 369 /* A double linked list of all the SO's. */ 370 SO* admin_SO; 371 372 373 374 ///////////////////////////////////////////////////////////////// 375 ///////////////////////////////////////////////////////////////// 376 // // 377 // SECTION BEGIN compressed shadow memory // 378 // // 379 ///////////////////////////////////////////////////////////////// 380 ///////////////////////////////////////////////////////////////// 381 382 #ifndef __HB_ZSM_H 383 #define __HB_ZSM_H 384 385 /* Initialise the library. Once initialised, it will (or may) call 386 rcinc and rcdec in response to all the calls below, in order to 387 allow the user to do reference counting on the SVals stored herein. 388 It is important to understand, however, that due to internal 389 caching, the reference counts are in general inaccurate, and can be 390 both above or below the true reference count for an item. In 391 particular, the library may indicate that the reference count for 392 an item is zero, when in fact it is not. 393 394 To make the reference counting exact and therefore non-pointless, 395 call zsm_flush_cache. Immediately after it returns, the reference 396 counts for all items, as deduced by the caller by observing calls 397 to rcinc and rcdec, will be correct, and so any items with a zero 398 reference count may be freed (or at least considered to be 399 unreferenced by this library). 400 */ 401 static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) ); 402 403 static void zsm_sset_range ( Addr, SizeT, SVal ); 404 static void zsm_scopy_range ( Addr, Addr, SizeT ); 405 static void zsm_flush_cache ( void ); 406 407 #endif /* ! __HB_ZSM_H */ 408 409 410 /* Round a up to the next multiple of N. N must be a power of 2 */ 411 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1)) 412 /* Round a down to the next multiple of N. N must be a power of 2 */ 413 #define ROUNDDN(a, N) ((a) & ~(N-1)) 414 415 416 417 /* ------ User-supplied RC functions ------ */ 418 static void(*rcinc)(SVal) = NULL; 419 static void(*rcdec)(SVal) = NULL; 420 421 422 /* ------ CacheLine ------ */ 423 424 #define N_LINE_BITS 6 /* must be >= 3 */ 425 #define N_LINE_ARANGE (1 << N_LINE_BITS) 426 #define N_LINE_TREES (N_LINE_ARANGE >> 3) 427 428 typedef 429 struct { 430 UShort descrs[N_LINE_TREES]; 431 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8 432 } 433 CacheLine; 434 435 #define TREE_DESCR_16_0 (1<<0) 436 #define TREE_DESCR_32_0 (1<<1) 437 #define TREE_DESCR_16_1 (1<<2) 438 #define TREE_DESCR_64 (1<<3) 439 #define TREE_DESCR_16_2 (1<<4) 440 #define TREE_DESCR_32_1 (1<<5) 441 #define TREE_DESCR_16_3 (1<<6) 442 #define TREE_DESCR_8_0 (1<<7) 443 #define TREE_DESCR_8_1 (1<<8) 444 #define TREE_DESCR_8_2 (1<<9) 445 #define TREE_DESCR_8_3 (1<<10) 446 #define TREE_DESCR_8_4 (1<<11) 447 #define TREE_DESCR_8_5 (1<<12) 448 #define TREE_DESCR_8_6 (1<<13) 449 #define TREE_DESCR_8_7 (1<<14) 450 #define TREE_DESCR_DTY (1<<15) 451 452 typedef 453 struct { 454 SVal dict[4]; /* can represent up to 4 diff values in the line */ 455 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit 456 dict indexes */ 457 /* if dict[0] == SVal_INVALID then dict[1] is the index of the 458 LineF to use, and dict[2..] are also SVal_INVALID. */ 459 } 460 LineZ; /* compressed rep for a cache line */ 461 462 typedef 463 struct { 464 Bool inUse; 465 SVal w64s[N_LINE_ARANGE]; 466 } 467 LineF; /* full rep for a cache line */ 468 469 /* Shadow memory. 470 Primary map is a WordFM Addr SecMap*. 471 SecMaps cover some page-size-ish section of address space and hold 472 a compressed representation. 473 CacheLine-sized chunks of SecMaps are copied into a Cache, being 474 decompressed when moved into the cache and recompressed on the 475 way out. Because of this, the cache must operate as a writeback 476 cache, not a writethrough one. 477 478 Each SecMap must hold a power-of-2 number of CacheLines. Hence 479 N_SECMAP_BITS must >= N_LINE_BITS. 480 */ 481 #define N_SECMAP_BITS 13 482 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS) 483 484 // # CacheLines held by a SecMap 485 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE) 486 487 /* The data in the SecMap is held in the array of LineZs. Each LineZ 488 either carries the required data directly, in a compressed 489 representation, or it holds (in .dict[0]) an index to the LineF in 490 .linesF that holds the full representation. 491 492 Currently-unused LineF's have their .inUse bit set to zero. 493 Since each in-use LineF is referred to be exactly one LineZ, 494 the number of .linesZ[] that refer to .linesF should equal 495 the number of .linesF[] that have .inUse == True. 496 497 RC obligations: the RCs presented to the user include exactly 498 the values in: 499 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID 500 * F reps that are in use (.inUse == True) 501 502 Hence the following actions at the following transitions are required: 503 504 F rep: .inUse==True -> .inUse==False -- rcdec_LineF 505 F rep: .inUse==False -> .inUse==True -- rcinc_LineF 506 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ 507 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ 508 */ 509 typedef 510 struct { 511 UInt magic; 512 LineZ linesZ[N_SECMAP_ZLINES]; 513 LineF* linesF; 514 UInt linesF_size; 515 } 516 SecMap; 517 518 #define SecMap_MAGIC 0x571e58cbU 519 520 static inline Bool is_sane_SecMap ( SecMap* sm ) { 521 return sm != NULL && sm->magic == SecMap_MAGIC; 522 } 523 524 /* ------ Cache ------ */ 525 526 #define N_WAY_BITS 16 527 #define N_WAY_NENT (1 << N_WAY_BITS) 528 529 /* Each tag is the address of the associated CacheLine, rounded down 530 to a CacheLine address boundary. A CacheLine size must be a power 531 of 2 and must be 8 or more. Hence an easy way to initialise the 532 cache so it is empty is to set all the tag values to any value % 8 533 != 0, eg 1. This means all queries in the cache initially miss. 534 It does however require us to detect and not writeback, any line 535 with a bogus tag. */ 536 typedef 537 struct { 538 CacheLine lyns0[N_WAY_NENT]; 539 Addr tags0[N_WAY_NENT]; 540 } 541 Cache; 542 543 static inline Bool is_valid_scache_tag ( Addr tag ) { 544 /* a valid tag should be naturally aligned to the start of 545 a CacheLine. */ 546 return 0 == (tag & (N_LINE_ARANGE - 1)); 547 } 548 549 550 /* --------- Primary data structures --------- */ 551 552 /* Shadow memory primary map */ 553 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */ 554 static Cache cache_shmem; 555 556 557 static UWord stats__secmaps_search = 0; // # SM finds 558 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs 559 static UWord stats__secmaps_allocd = 0; // # SecMaps issued 560 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered 561 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued 562 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage 563 static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued 564 static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage 565 static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter 566 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched 567 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back 568 static UWord stats__cache_F_fetches = 0; // # F lines fetched 569 static UWord stats__cache_F_wbacks = 0; // # F lines written back 570 static UWord stats__cache_invals = 0; // # cache invals 571 static UWord stats__cache_flushes = 0; // # cache flushes 572 static UWord stats__cache_totrefs = 0; // # total accesses 573 static UWord stats__cache_totmisses = 0; // # misses 574 static ULong stats__cache_make_New_arange = 0; // total arange made New 575 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps 576 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise 577 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64 578 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32 579 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16 580 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8 581 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64 582 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32 583 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16 584 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8 585 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8 586 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8 587 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8 588 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8 589 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8 590 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8 591 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split 592 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split 593 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split 594 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32 595 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16 596 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8 597 static UWord stats__vts__tick = 0; // # calls to VTS__tick 598 static UWord stats__vts__join = 0; // # calls to VTS__join 599 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ 600 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural 601 602 // # calls to VTS__cmp_structural w/ slow case 603 static UWord stats__vts__cmp_structural_slow = 0; 604 605 // # calls to VTS__indexAt_SLOW 606 static UWord stats__vts__indexat_slow = 0; 607 608 // # calls to vts_set__find__or__clone_and_add 609 static UWord stats__vts_set__focaa = 0; 610 611 // # calls to vts_set__find__or__clone_and_add that lead to an 612 // allocation 613 static UWord stats__vts_set__focaa_a = 0; 614 615 616 static inline Addr shmem__round_to_SecMap_base ( Addr a ) { 617 return a & ~(N_SECMAP_ARANGE - 1); 618 } 619 static inline UWord shmem__get_SecMap_offset ( Addr a ) { 620 return a & (N_SECMAP_ARANGE - 1); 621 } 622 623 624 /*----------------------------------------------------------------*/ 625 /*--- map_shmem :: WordFM Addr SecMap ---*/ 626 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/ 627 /*----------------------------------------------------------------*/ 628 629 /*--------------- SecMap allocation --------------- */ 630 631 static HChar* shmem__bigchunk_next = NULL; 632 static HChar* shmem__bigchunk_end1 = NULL; 633 634 static void* shmem__bigchunk_alloc ( SizeT n ) 635 { 636 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4; 637 tl_assert(n > 0); 638 n = VG_ROUNDUP(n, 16); 639 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1); 640 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next 641 <= (SSizeT)sHMEM__BIGCHUNK_SIZE); 642 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) { 643 if (0) 644 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n", 645 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next)); 646 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE ); 647 if (shmem__bigchunk_next == NULL) 648 VG_(out_of_memory_NORETURN)( 649 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE ); 650 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE; 651 } 652 tl_assert(shmem__bigchunk_next); 653 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) ); 654 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1); 655 shmem__bigchunk_next += n; 656 return shmem__bigchunk_next - n; 657 } 658 659 static SecMap* shmem__alloc_SecMap ( void ) 660 { 661 Word i, j; 662 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) ); 663 if (0) VG_(printf)("alloc_SecMap %p\n",sm); 664 tl_assert(sm); 665 sm->magic = SecMap_MAGIC; 666 for (i = 0; i < N_SECMAP_ZLINES; i++) { 667 sm->linesZ[i].dict[0] = SVal_NOACCESS; 668 sm->linesZ[i].dict[1] = SVal_INVALID; 669 sm->linesZ[i].dict[2] = SVal_INVALID; 670 sm->linesZ[i].dict[3] = SVal_INVALID; 671 for (j = 0; j < N_LINE_ARANGE/4; j++) 672 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */ 673 } 674 sm->linesF = NULL; 675 sm->linesF_size = 0; 676 stats__secmaps_allocd++; 677 stats__secmap_ga_space_covered += N_SECMAP_ARANGE; 678 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES; 679 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ); 680 return sm; 681 } 682 683 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt; 684 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} }; 685 686 static SecMap* shmem__find_SecMap ( Addr ga ) 687 { 688 SecMap* sm = NULL; 689 Addr gaKey = shmem__round_to_SecMap_base(ga); 690 // Cache 691 stats__secmaps_search++; 692 if (LIKELY(gaKey == smCache[0].gaKey)) 693 return smCache[0].sm; 694 if (LIKELY(gaKey == smCache[1].gaKey)) { 695 SMCacheEnt tmp = smCache[0]; 696 smCache[0] = smCache[1]; 697 smCache[1] = tmp; 698 return smCache[0].sm; 699 } 700 if (gaKey == smCache[2].gaKey) { 701 SMCacheEnt tmp = smCache[1]; 702 smCache[1] = smCache[2]; 703 smCache[2] = tmp; 704 return smCache[1].sm; 705 } 706 // end Cache 707 stats__secmaps_search_slow++; 708 if (VG_(lookupFM)( map_shmem, 709 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) { 710 tl_assert(sm != NULL); 711 smCache[2] = smCache[1]; 712 smCache[1] = smCache[0]; 713 smCache[0].gaKey = gaKey; 714 smCache[0].sm = sm; 715 } else { 716 tl_assert(sm == NULL); 717 } 718 return sm; 719 } 720 721 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga ) 722 { 723 SecMap* sm = shmem__find_SecMap ( ga ); 724 if (LIKELY(sm)) { 725 return sm; 726 } else { 727 /* create a new one */ 728 Addr gaKey = shmem__round_to_SecMap_base(ga); 729 sm = shmem__alloc_SecMap(); 730 tl_assert(sm); 731 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm ); 732 return sm; 733 } 734 } 735 736 737 /* ------------ LineF and LineZ related ------------ */ 738 739 static void rcinc_LineF ( LineF* lineF ) { 740 UWord i; 741 tl_assert(lineF->inUse); 742 for (i = 0; i < N_LINE_ARANGE; i++) 743 rcinc(lineF->w64s[i]); 744 } 745 746 static void rcdec_LineF ( LineF* lineF ) { 747 UWord i; 748 tl_assert(lineF->inUse); 749 for (i = 0; i < N_LINE_ARANGE; i++) 750 rcdec(lineF->w64s[i]); 751 } 752 753 static void rcinc_LineZ ( LineZ* lineZ ) { 754 tl_assert(lineZ->dict[0] != SVal_INVALID); 755 rcinc(lineZ->dict[0]); 756 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]); 757 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]); 758 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]); 759 } 760 761 static void rcdec_LineZ ( LineZ* lineZ ) { 762 tl_assert(lineZ->dict[0] != SVal_INVALID); 763 rcdec(lineZ->dict[0]); 764 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]); 765 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]); 766 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]); 767 } 768 769 inline 770 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) { 771 Word bix, shft, mask, prep; 772 tl_assert(ix >= 0); 773 bix = ix >> 2; 774 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */ 775 mask = 3 << shft; 776 prep = b2 << shft; 777 arr[bix] = (arr[bix] & ~mask) | prep; 778 } 779 780 inline 781 static UWord read_twobit_array ( UChar* arr, UWord ix ) { 782 Word bix, shft; 783 tl_assert(ix >= 0); 784 bix = ix >> 2; 785 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */ 786 return (arr[bix] >> shft) & 3; 787 } 788 789 /* Given address 'tag', find either the Z or F line containing relevant 790 data, so it can be read into the cache. 791 */ 792 static void find_ZF_for_reading ( /*OUT*/LineZ** zp, 793 /*OUT*/LineF** fp, Addr tag ) { 794 LineZ* lineZ; 795 LineF* lineF; 796 UWord zix; 797 SecMap* sm = shmem__find_or_alloc_SecMap(tag); 798 UWord smoff = shmem__get_SecMap_offset(tag); 799 /* since smoff is derived from a valid tag, it should be 800 cacheline-aligned. */ 801 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1))); 802 zix = smoff >> N_LINE_BITS; 803 tl_assert(zix < N_SECMAP_ZLINES); 804 lineZ = &sm->linesZ[zix]; 805 lineF = NULL; 806 if (lineZ->dict[0] == SVal_INVALID) { 807 UInt fix = (UInt)lineZ->dict[1]; 808 tl_assert(sm->linesF); 809 tl_assert(sm->linesF_size > 0); 810 tl_assert(fix >= 0 && fix < sm->linesF_size); 811 lineF = &sm->linesF[fix]; 812 tl_assert(lineF->inUse); 813 lineZ = NULL; 814 } 815 *zp = lineZ; 816 *fp = lineF; 817 } 818 819 /* Given address 'tag', return the relevant SecMap and the index of 820 the LineZ within it, in the expectation that the line is to be 821 overwritten. Regardless of whether 'tag' is currently associated 822 with a Z or F representation, to rcdec on the current 823 representation, in recognition of the fact that the contents are 824 just about to be overwritten. */ 825 static __attribute__((noinline)) 826 void find_Z_for_writing ( /*OUT*/SecMap** smp, 827 /*OUT*/Word* zixp, 828 Addr tag ) { 829 LineZ* lineZ; 830 LineF* lineF; 831 UWord zix; 832 SecMap* sm = shmem__find_or_alloc_SecMap(tag); 833 UWord smoff = shmem__get_SecMap_offset(tag); 834 /* since smoff is derived from a valid tag, it should be 835 cacheline-aligned. */ 836 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1))); 837 zix = smoff >> N_LINE_BITS; 838 tl_assert(zix < N_SECMAP_ZLINES); 839 lineZ = &sm->linesZ[zix]; 840 lineF = NULL; 841 /* re RCs, we are freeing up this LineZ/LineF so that new data can 842 be parked in it. Hence have to rcdec it accordingly. */ 843 /* If lineZ has an associated lineF, free it up. */ 844 if (lineZ->dict[0] == SVal_INVALID) { 845 UInt fix = (UInt)lineZ->dict[1]; 846 tl_assert(sm->linesF); 847 tl_assert(sm->linesF_size > 0); 848 tl_assert(fix >= 0 && fix < sm->linesF_size); 849 lineF = &sm->linesF[fix]; 850 tl_assert(lineF->inUse); 851 rcdec_LineF(lineF); 852 lineF->inUse = False; 853 } else { 854 rcdec_LineZ(lineZ); 855 } 856 *smp = sm; 857 *zixp = zix; 858 } 859 860 static __attribute__((noinline)) 861 void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) { 862 UInt i, new_size; 863 LineF* nyu; 864 865 if (sm->linesF) { 866 tl_assert(sm->linesF_size > 0); 867 } else { 868 tl_assert(sm->linesF_size == 0); 869 } 870 871 if (sm->linesF) { 872 for (i = 0; i < sm->linesF_size; i++) { 873 if (!sm->linesF[i].inUse) { 874 *fixp = (Word)i; 875 return; 876 } 877 } 878 } 879 880 /* No free F line found. Expand existing array and try again. */ 881 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size; 882 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)", 883 new_size * sizeof(LineF) ); 884 tl_assert(nyu); 885 886 stats__secmap_linesF_allocd += (new_size - sm->linesF_size); 887 stats__secmap_linesF_bytes += (new_size - sm->linesF_size) 888 * sizeof(LineF); 889 890 if (0) 891 VG_(printf)("SM %p: expand F array from %d to %d\n", 892 sm, (Int)sm->linesF_size, new_size); 893 894 for (i = 0; i < new_size; i++) 895 nyu[i].inUse = False; 896 897 if (sm->linesF) { 898 for (i = 0; i < sm->linesF_size; i++) { 899 tl_assert(sm->linesF[i].inUse); 900 nyu[i] = sm->linesF[i]; 901 } 902 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) ); 903 HG_(free)(sm->linesF); 904 } 905 906 sm->linesF = nyu; 907 sm->linesF_size = new_size; 908 909 for (i = 0; i < sm->linesF_size; i++) { 910 if (!sm->linesF[i].inUse) { 911 *fixp = (Word)i; 912 return; 913 } 914 } 915 916 /*NOTREACHED*/ 917 tl_assert(0); 918 } 919 920 921 /* ------------ CacheLine and implicit-tree related ------------ */ 922 923 __attribute__((unused)) 924 static void pp_CacheLine ( CacheLine* cl ) { 925 Word i; 926 if (!cl) { 927 VG_(printf)("%s","pp_CacheLine(NULL)\n"); 928 return; 929 } 930 for (i = 0; i < N_LINE_TREES; i++) 931 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]); 932 for (i = 0; i < N_LINE_ARANGE; i++) 933 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]); 934 } 935 936 static UChar descr_to_validbits ( UShort descr ) 937 { 938 /* a.k.a Party Time for gcc's constant folder */ 939 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \ 940 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \ 941 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \ 942 ( (b8_5) << 12) | ( (b8_4) << 11) | \ 943 ( (b8_3) << 10) | ( (b8_2) << 9) | \ 944 ( (b8_1) << 8) | ( (b8_0) << 7) | \ 945 ( (b16_3) << 6) | ( (b32_1) << 5) | \ 946 ( (b16_2) << 4) | ( (b64) << 3) | \ 947 ( (b16_1) << 2) | ( (b32_0) << 1) | \ 948 ( (b16_0) << 0) ) ) 949 950 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \ 951 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \ 952 ( (bit5) << 5) | ( (bit4) << 4) | \ 953 ( (bit3) << 3) | ( (bit2) << 2) | \ 954 ( (bit1) << 1) | ( (bit0) << 0) ) ) 955 956 /* these should all get folded out at compile time */ 957 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7); 958 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0); 959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3); 960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1); 961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2); 962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64); 963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1); 964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0); 965 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0); 966 967 switch (descr) { 968 /* 969 +--------------------------------- TREE_DESCR_8_7 970 | +------------------- TREE_DESCR_8_0 971 | | +---------------- TREE_DESCR_16_3 972 | | | +-------------- TREE_DESCR_32_1 973 | | | | +------------ TREE_DESCR_16_2 974 | | | | | +--------- TREE_DESCR_64 975 | | | | | | +------ TREE_DESCR_16_1 976 | | | | | | | +---- TREE_DESCR_32_0 977 | | | | | | | | +-- TREE_DESCR_16_0 978 | | | | | | | | | 979 | | | | | | | | | GRANULARITY, 7 -> 0 */ 980 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */ 981 return BYTE(1,1,1,1,1,1,1,1); 982 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */ 983 return BYTE(1,1,0,1,1,1,1,1); 984 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */ 985 return BYTE(0,1,1,1,1,1,1,1); 986 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */ 987 return BYTE(0,1,0,1,1,1,1,1); 988 989 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */ 990 return BYTE(1,1,1,1,1,1,0,1); 991 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */ 992 return BYTE(1,1,0,1,1,1,0,1); 993 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */ 994 return BYTE(0,1,1,1,1,1,0,1); 995 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */ 996 return BYTE(0,1,0,1,1,1,0,1); 997 998 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */ 999 return BYTE(1,1,1,1,0,1,1,1); 1000 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */ 1001 return BYTE(1,1,0,1,0,1,1,1); 1002 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */ 1003 return BYTE(0,1,1,1,0,1,1,1); 1004 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */ 1005 return BYTE(0,1,0,1,0,1,1,1); 1006 1007 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */ 1008 return BYTE(1,1,1,1,0,1,0,1); 1009 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */ 1010 return BYTE(1,1,0,1,0,1,0,1); 1011 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */ 1012 return BYTE(0,1,1,1,0,1,0,1); 1013 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */ 1014 return BYTE(0,1,0,1,0,1,0,1); 1015 1016 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */ 1017 return BYTE(0,0,0,1,1,1,1,1); 1018 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */ 1019 return BYTE(0,0,0,1,1,1,0,1); 1020 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */ 1021 return BYTE(0,0,0,1,0,1,1,1); 1022 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */ 1023 return BYTE(0,0,0,1,0,1,0,1); 1024 1025 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */ 1026 return BYTE(1,1,1,1,0,0,0,1); 1027 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */ 1028 return BYTE(1,1,0,1,0,0,0,1); 1029 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */ 1030 return BYTE(0,1,1,1,0,0,0,1); 1031 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */ 1032 return BYTE(0,1,0,1,0,0,0,1); 1033 1034 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */ 1035 return BYTE(0,0,0,1,0,0,0,1); 1036 1037 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */ 1038 return BYTE(0,0,0,0,0,0,0,1); 1039 1040 default: return BYTE(0,0,0,0,0,0,0,0); 1041 /* INVALID - any valid descr produces at least one 1042 valid bit in tree[0..7]*/ 1043 } 1044 /* NOTREACHED*/ 1045 tl_assert(0); 1046 1047 # undef DESCR 1048 # undef BYTE 1049 } 1050 1051 __attribute__((unused)) 1052 static Bool is_sane_Descr ( UShort descr ) { 1053 return descr_to_validbits(descr) != 0; 1054 } 1055 1056 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) { 1057 VG_(sprintf)(dst, 1058 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d", 1059 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0), 1060 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0), 1061 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0), 1062 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0), 1063 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0), 1064 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0), 1065 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0), 1066 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0), 1067 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0), 1068 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0), 1069 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0), 1070 (Int)((descr & TREE_DESCR_64) ? 1 : 0), 1071 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0), 1072 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0), 1073 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0) 1074 ); 1075 } 1076 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) { 1077 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d", 1078 (Int)((byte & 128) ? 1 : 0), 1079 (Int)((byte & 64) ? 1 : 0), 1080 (Int)((byte & 32) ? 1 : 0), 1081 (Int)((byte & 16) ? 1 : 0), 1082 (Int)((byte & 8) ? 1 : 0), 1083 (Int)((byte & 4) ? 1 : 0), 1084 (Int)((byte & 2) ? 1 : 0), 1085 (Int)((byte & 1) ? 1 : 0) 1086 ); 1087 } 1088 1089 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) { 1090 Word i; 1091 UChar validbits = descr_to_validbits(descr); 1092 HChar buf[128], buf2[128]; 1093 if (validbits == 0) 1094 goto bad; 1095 for (i = 0; i < 8; i++) { 1096 if (validbits & (1<<i)) { 1097 if (tree[i] == SVal_INVALID) 1098 goto bad; 1099 } else { 1100 if (tree[i] != SVal_INVALID) 1101 goto bad; 1102 } 1103 } 1104 return True; 1105 bad: 1106 sprintf_Descr( buf, descr ); 1107 sprintf_Byte( buf2, validbits ); 1108 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n"); 1109 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2); 1110 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf); 1111 for (i = 0; i < 8; i++) 1112 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]); 1113 VG_(printf)("%s","}\n"); 1114 return 0; 1115 } 1116 1117 static Bool is_sane_CacheLine ( CacheLine* cl ) 1118 { 1119 Word tno, cloff; 1120 1121 if (!cl) goto bad; 1122 1123 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1124 UShort descr = cl->descrs[tno]; 1125 SVal* tree = &cl->svals[cloff]; 1126 if (!is_sane_Descr_and_Tree(descr, tree)) 1127 goto bad; 1128 } 1129 tl_assert(cloff == N_LINE_ARANGE); 1130 return True; 1131 bad: 1132 pp_CacheLine(cl); 1133 return False; 1134 } 1135 1136 static UShort normalise_tree ( /*MOD*/SVal* tree ) 1137 { 1138 UShort descr; 1139 /* pre: incoming tree[0..7] does not have any invalid shvals, in 1140 particular no zeroes. */ 1141 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID 1142 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID 1143 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID 1144 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID)) 1145 tl_assert(0); 1146 1147 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5 1148 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2 1149 | TREE_DESCR_8_1 | TREE_DESCR_8_0; 1150 /* build 16-bit layer */ 1151 if (tree[1] == tree[0]) { 1152 tree[1] = SVal_INVALID; 1153 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0); 1154 descr |= TREE_DESCR_16_0; 1155 } 1156 if (tree[3] == tree[2]) { 1157 tree[3] = SVal_INVALID; 1158 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2); 1159 descr |= TREE_DESCR_16_1; 1160 } 1161 if (tree[5] == tree[4]) { 1162 tree[5] = SVal_INVALID; 1163 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4); 1164 descr |= TREE_DESCR_16_2; 1165 } 1166 if (tree[7] == tree[6]) { 1167 tree[7] = SVal_INVALID; 1168 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6); 1169 descr |= TREE_DESCR_16_3; 1170 } 1171 /* build 32-bit layer */ 1172 if (tree[2] == tree[0] 1173 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) { 1174 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */ 1175 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0); 1176 descr |= TREE_DESCR_32_0; 1177 } 1178 if (tree[6] == tree[4] 1179 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) { 1180 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */ 1181 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2); 1182 descr |= TREE_DESCR_32_1; 1183 } 1184 /* build 64-bit layer */ 1185 if (tree[4] == tree[0] 1186 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) { 1187 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */ 1188 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0); 1189 descr |= TREE_DESCR_64; 1190 } 1191 return descr; 1192 } 1193 1194 /* This takes a cacheline where all the data is at the leaves 1195 (w8[..]) and builds a correctly normalised tree. */ 1196 static void normalise_CacheLine ( /*MOD*/CacheLine* cl ) 1197 { 1198 Word tno, cloff; 1199 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1200 SVal* tree = &cl->svals[cloff]; 1201 cl->descrs[tno] = normalise_tree( tree ); 1202 } 1203 tl_assert(cloff == N_LINE_ARANGE); 1204 if (CHECK_ZSM) 1205 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1206 stats__cline_normalises++; 1207 } 1208 1209 1210 typedef struct { UChar count; SVal sval; } CountedSVal; 1211 1212 static 1213 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst, 1214 /*OUT*/Word* dstUsedP, 1215 Word nDst, CacheLine* src ) 1216 { 1217 Word tno, cloff, dstUsed; 1218 1219 tl_assert(nDst == N_LINE_ARANGE); 1220 dstUsed = 0; 1221 1222 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1223 UShort descr = src->descrs[tno]; 1224 SVal* tree = &src->svals[cloff]; 1225 1226 /* sequentialise the tree described by (descr,tree). */ 1227 # define PUT(_n,_v) \ 1228 do { dst[dstUsed ].count = (_n); \ 1229 dst[dstUsed++].sval = (_v); \ 1230 } while (0) 1231 1232 /* byte 0 */ 1233 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else 1234 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else 1235 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else 1236 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]); 1237 /* byte 1 */ 1238 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]); 1239 /* byte 2 */ 1240 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else 1241 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]); 1242 /* byte 3 */ 1243 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]); 1244 /* byte 4 */ 1245 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else 1246 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else 1247 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]); 1248 /* byte 5 */ 1249 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]); 1250 /* byte 6 */ 1251 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else 1252 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]); 1253 /* byte 7 */ 1254 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]); 1255 1256 # undef PUT 1257 /* END sequentialise the tree described by (descr,tree). */ 1258 1259 } 1260 tl_assert(cloff == N_LINE_ARANGE); 1261 tl_assert(dstUsed <= nDst); 1262 1263 *dstUsedP = dstUsed; 1264 } 1265 1266 /* Write the cacheline 'wix' to backing store. Where it ends up 1267 is determined by its tag field. */ 1268 static __attribute__((noinline)) void cacheline_wback ( UWord wix ) 1269 { 1270 Word i, j, k, m; 1271 Addr tag; 1272 SecMap* sm; 1273 CacheLine* cl; 1274 LineZ* lineZ; 1275 LineF* lineF; 1276 Word zix, fix, csvalsUsed; 1277 CountedSVal csvals[N_LINE_ARANGE]; 1278 SVal sv; 1279 1280 if (0) 1281 VG_(printf)("scache wback line %d\n", (Int)wix); 1282 1283 tl_assert(wix >= 0 && wix < N_WAY_NENT); 1284 1285 tag = cache_shmem.tags0[wix]; 1286 cl = &cache_shmem.lyns0[wix]; 1287 1288 /* The cache line may have been invalidated; if so, ignore it. */ 1289 if (!is_valid_scache_tag(tag)) 1290 return; 1291 1292 /* Where are we going to put it? */ 1293 sm = NULL; 1294 lineZ = NULL; 1295 lineF = NULL; 1296 zix = fix = -1; 1297 1298 /* find the Z line to write in and rcdec it or the associated F 1299 line. */ 1300 find_Z_for_writing( &sm, &zix, tag ); 1301 1302 tl_assert(sm); 1303 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES); 1304 lineZ = &sm->linesZ[zix]; 1305 1306 /* Generate the data to be stored */ 1307 if (CHECK_ZSM) 1308 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1309 1310 csvalsUsed = -1; 1311 sequentialise_CacheLine( csvals, &csvalsUsed, 1312 N_LINE_ARANGE, cl ); 1313 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE); 1314 if (0) VG_(printf)("%lu ", csvalsUsed); 1315 1316 lineZ->dict[0] = lineZ->dict[1] 1317 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 1318 1319 /* i indexes actual shadow values, k is cursor in csvals */ 1320 i = 0; 1321 for (k = 0; k < csvalsUsed; k++) { 1322 1323 sv = csvals[k].sval; 1324 if (CHECK_ZSM) 1325 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); 1326 /* do we already have it? */ 1327 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; } 1328 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; } 1329 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; } 1330 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; } 1331 /* no. look for a free slot. */ 1332 if (CHECK_ZSM) 1333 tl_assert(sv != SVal_INVALID); 1334 if (lineZ->dict[0] 1335 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; } 1336 if (lineZ->dict[1] 1337 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; } 1338 if (lineZ->dict[2] 1339 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; } 1340 if (lineZ->dict[3] 1341 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; } 1342 break; /* we'll have to use the f rep */ 1343 dict_ok: 1344 m = csvals[k].count; 1345 if (m == 8) { 1346 write_twobit_array( lineZ->ix2s, i+0, j ); 1347 write_twobit_array( lineZ->ix2s, i+1, j ); 1348 write_twobit_array( lineZ->ix2s, i+2, j ); 1349 write_twobit_array( lineZ->ix2s, i+3, j ); 1350 write_twobit_array( lineZ->ix2s, i+4, j ); 1351 write_twobit_array( lineZ->ix2s, i+5, j ); 1352 write_twobit_array( lineZ->ix2s, i+6, j ); 1353 write_twobit_array( lineZ->ix2s, i+7, j ); 1354 i += 8; 1355 } 1356 else if (m == 4) { 1357 write_twobit_array( lineZ->ix2s, i+0, j ); 1358 write_twobit_array( lineZ->ix2s, i+1, j ); 1359 write_twobit_array( lineZ->ix2s, i+2, j ); 1360 write_twobit_array( lineZ->ix2s, i+3, j ); 1361 i += 4; 1362 } 1363 else if (m == 1) { 1364 write_twobit_array( lineZ->ix2s, i+0, j ); 1365 i += 1; 1366 } 1367 else if (m == 2) { 1368 write_twobit_array( lineZ->ix2s, i+0, j ); 1369 write_twobit_array( lineZ->ix2s, i+1, j ); 1370 i += 2; 1371 } 1372 else { 1373 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */ 1374 } 1375 1376 } 1377 1378 if (LIKELY(i == N_LINE_ARANGE)) { 1379 /* Construction of the compressed representation was 1380 successful. */ 1381 rcinc_LineZ(lineZ); 1382 stats__cache_Z_wbacks++; 1383 } else { 1384 /* Cannot use the compressed(z) representation. Use the full(f) 1385 rep instead. */ 1386 tl_assert(i >= 0 && i < N_LINE_ARANGE); 1387 alloc_F_for_writing( sm, &fix ); 1388 tl_assert(sm->linesF); 1389 tl_assert(sm->linesF_size > 0); 1390 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size); 1391 lineF = &sm->linesF[fix]; 1392 tl_assert(!lineF->inUse); 1393 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 1394 lineZ->dict[1] = (SVal)fix; 1395 lineF->inUse = True; 1396 i = 0; 1397 for (k = 0; k < csvalsUsed; k++) { 1398 if (CHECK_ZSM) 1399 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); 1400 sv = csvals[k].sval; 1401 if (CHECK_ZSM) 1402 tl_assert(sv != SVal_INVALID); 1403 for (m = csvals[k].count; m > 0; m--) { 1404 lineF->w64s[i] = sv; 1405 i++; 1406 } 1407 } 1408 tl_assert(i == N_LINE_ARANGE); 1409 rcinc_LineF(lineF); 1410 stats__cache_F_wbacks++; 1411 } 1412 } 1413 1414 /* Fetch the cacheline 'wix' from the backing store. The tag 1415 associated with 'wix' is assumed to have already been filled in; 1416 hence that is used to determine where in the backing store to read 1417 from. */ 1418 static __attribute__((noinline)) void cacheline_fetch ( UWord wix ) 1419 { 1420 Word i; 1421 Addr tag; 1422 CacheLine* cl; 1423 LineZ* lineZ; 1424 LineF* lineF; 1425 1426 if (0) 1427 VG_(printf)("scache fetch line %d\n", (Int)wix); 1428 1429 tl_assert(wix >= 0 && wix < N_WAY_NENT); 1430 1431 tag = cache_shmem.tags0[wix]; 1432 cl = &cache_shmem.lyns0[wix]; 1433 1434 /* reject nonsense requests */ 1435 tl_assert(is_valid_scache_tag(tag)); 1436 1437 lineZ = NULL; 1438 lineF = NULL; 1439 find_ZF_for_reading( &lineZ, &lineF, tag ); 1440 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) ); 1441 1442 /* expand the data into the bottom layer of the tree, then get 1443 cacheline_normalise to build the descriptor array. */ 1444 if (lineF) { 1445 tl_assert(lineF->inUse); 1446 for (i = 0; i < N_LINE_ARANGE; i++) { 1447 cl->svals[i] = lineF->w64s[i]; 1448 } 1449 stats__cache_F_fetches++; 1450 } else { 1451 for (i = 0; i < N_LINE_ARANGE; i++) { 1452 SVal sv; 1453 UWord ix = read_twobit_array( lineZ->ix2s, i ); 1454 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */ 1455 sv = lineZ->dict[ix]; 1456 tl_assert(sv != SVal_INVALID); 1457 cl->svals[i] = sv; 1458 } 1459 stats__cache_Z_fetches++; 1460 } 1461 normalise_CacheLine( cl ); 1462 } 1463 1464 static void shmem__invalidate_scache ( void ) { 1465 Word wix; 1466 if (0) VG_(printf)("%s","scache inval\n"); 1467 tl_assert(!is_valid_scache_tag(1)); 1468 for (wix = 0; wix < N_WAY_NENT; wix++) { 1469 cache_shmem.tags0[wix] = 1/*INVALID*/; 1470 } 1471 stats__cache_invals++; 1472 } 1473 1474 static void shmem__flush_and_invalidate_scache ( void ) { 1475 Word wix; 1476 Addr tag; 1477 if (0) VG_(printf)("%s","scache flush and invalidate\n"); 1478 tl_assert(!is_valid_scache_tag(1)); 1479 for (wix = 0; wix < N_WAY_NENT; wix++) { 1480 tag = cache_shmem.tags0[wix]; 1481 if (tag == 1/*INVALID*/) { 1482 /* already invalid; nothing to do */ 1483 } else { 1484 tl_assert(is_valid_scache_tag(tag)); 1485 cacheline_wback( wix ); 1486 } 1487 cache_shmem.tags0[wix] = 1/*INVALID*/; 1488 } 1489 stats__cache_flushes++; 1490 stats__cache_invals++; 1491 } 1492 1493 1494 static inline Bool aligned16 ( Addr a ) { 1495 return 0 == (a & 1); 1496 } 1497 static inline Bool aligned32 ( Addr a ) { 1498 return 0 == (a & 3); 1499 } 1500 static inline Bool aligned64 ( Addr a ) { 1501 return 0 == (a & 7); 1502 } 1503 static inline UWord get_cacheline_offset ( Addr a ) { 1504 return (UWord)(a & (N_LINE_ARANGE - 1)); 1505 } 1506 static inline Addr cacheline_ROUNDUP ( Addr a ) { 1507 return ROUNDUP(a, N_LINE_ARANGE); 1508 } 1509 static inline Addr cacheline_ROUNDDN ( Addr a ) { 1510 return ROUNDDN(a, N_LINE_ARANGE); 1511 } 1512 static inline UWord get_treeno ( Addr a ) { 1513 return get_cacheline_offset(a) >> 3; 1514 } 1515 static inline UWord get_tree_offset ( Addr a ) { 1516 return a & 7; 1517 } 1518 1519 static __attribute__((noinline)) 1520 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */ 1521 static inline CacheLine* get_cacheline ( Addr a ) 1522 { 1523 /* tag is 'a' with the in-line offset masked out, 1524 eg a[31]..a[4] 0000 */ 1525 Addr tag = a & ~(N_LINE_ARANGE - 1); 1526 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 1527 stats__cache_totrefs++; 1528 if (LIKELY(tag == cache_shmem.tags0[wix])) { 1529 return &cache_shmem.lyns0[wix]; 1530 } else { 1531 return get_cacheline_MISS( a ); 1532 } 1533 } 1534 1535 static __attribute__((noinline)) 1536 CacheLine* get_cacheline_MISS ( Addr a ) 1537 { 1538 /* tag is 'a' with the in-line offset masked out, 1539 eg a[31]..a[4] 0000 */ 1540 1541 CacheLine* cl; 1542 Addr* tag_old_p; 1543 Addr tag = a & ~(N_LINE_ARANGE - 1); 1544 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 1545 1546 tl_assert(tag != cache_shmem.tags0[wix]); 1547 1548 /* Dump the old line into the backing store. */ 1549 stats__cache_totmisses++; 1550 1551 cl = &cache_shmem.lyns0[wix]; 1552 tag_old_p = &cache_shmem.tags0[wix]; 1553 1554 if (is_valid_scache_tag( *tag_old_p )) { 1555 /* EXPENSIVE and REDUNDANT: callee does it */ 1556 if (CHECK_ZSM) 1557 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1558 cacheline_wback( wix ); 1559 } 1560 /* and reload the new one */ 1561 *tag_old_p = tag; 1562 cacheline_fetch( wix ); 1563 if (CHECK_ZSM) 1564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1565 return cl; 1566 } 1567 1568 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1569 stats__cline_64to32pulldown++; 1570 switch (toff) { 1571 case 0: case 4: 1572 tl_assert(descr & TREE_DESCR_64); 1573 tree[4] = tree[0]; 1574 descr &= ~TREE_DESCR_64; 1575 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0); 1576 break; 1577 default: 1578 tl_assert(0); 1579 } 1580 return descr; 1581 } 1582 1583 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1584 stats__cline_32to16pulldown++; 1585 switch (toff) { 1586 case 0: case 2: 1587 if (!(descr & TREE_DESCR_32_0)) { 1588 descr = pulldown_to_32(tree, 0, descr); 1589 } 1590 tl_assert(descr & TREE_DESCR_32_0); 1591 tree[2] = tree[0]; 1592 descr &= ~TREE_DESCR_32_0; 1593 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0); 1594 break; 1595 case 4: case 6: 1596 if (!(descr & TREE_DESCR_32_1)) { 1597 descr = pulldown_to_32(tree, 4, descr); 1598 } 1599 tl_assert(descr & TREE_DESCR_32_1); 1600 tree[6] = tree[4]; 1601 descr &= ~TREE_DESCR_32_1; 1602 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2); 1603 break; 1604 default: 1605 tl_assert(0); 1606 } 1607 return descr; 1608 } 1609 1610 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1611 stats__cline_16to8pulldown++; 1612 switch (toff) { 1613 case 0: case 1: 1614 if (!(descr & TREE_DESCR_16_0)) { 1615 descr = pulldown_to_16(tree, 0, descr); 1616 } 1617 tl_assert(descr & TREE_DESCR_16_0); 1618 tree[1] = tree[0]; 1619 descr &= ~TREE_DESCR_16_0; 1620 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0); 1621 break; 1622 case 2: case 3: 1623 if (!(descr & TREE_DESCR_16_1)) { 1624 descr = pulldown_to_16(tree, 2, descr); 1625 } 1626 tl_assert(descr & TREE_DESCR_16_1); 1627 tree[3] = tree[2]; 1628 descr &= ~TREE_DESCR_16_1; 1629 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2); 1630 break; 1631 case 4: case 5: 1632 if (!(descr & TREE_DESCR_16_2)) { 1633 descr = pulldown_to_16(tree, 4, descr); 1634 } 1635 tl_assert(descr & TREE_DESCR_16_2); 1636 tree[5] = tree[4]; 1637 descr &= ~TREE_DESCR_16_2; 1638 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4); 1639 break; 1640 case 6: case 7: 1641 if (!(descr & TREE_DESCR_16_3)) { 1642 descr = pulldown_to_16(tree, 6, descr); 1643 } 1644 tl_assert(descr & TREE_DESCR_16_3); 1645 tree[7] = tree[6]; 1646 descr &= ~TREE_DESCR_16_3; 1647 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6); 1648 break; 1649 default: 1650 tl_assert(0); 1651 } 1652 return descr; 1653 } 1654 1655 1656 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) { 1657 UShort mask; 1658 switch (toff) { 1659 case 0: 1660 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0; 1661 tl_assert( (descr & mask) == mask ); 1662 descr &= ~mask; 1663 descr |= TREE_DESCR_16_0; 1664 break; 1665 case 2: 1666 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2; 1667 tl_assert( (descr & mask) == mask ); 1668 descr &= ~mask; 1669 descr |= TREE_DESCR_16_1; 1670 break; 1671 case 4: 1672 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4; 1673 tl_assert( (descr & mask) == mask ); 1674 descr &= ~mask; 1675 descr |= TREE_DESCR_16_2; 1676 break; 1677 case 6: 1678 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6; 1679 tl_assert( (descr & mask) == mask ); 1680 descr &= ~mask; 1681 descr |= TREE_DESCR_16_3; 1682 break; 1683 default: 1684 tl_assert(0); 1685 } 1686 return descr; 1687 } 1688 1689 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) { 1690 UShort mask; 1691 switch (toff) { 1692 case 0: 1693 if (!(descr & TREE_DESCR_16_0)) 1694 descr = pullup_descr_to_16(descr, 0); 1695 if (!(descr & TREE_DESCR_16_1)) 1696 descr = pullup_descr_to_16(descr, 2); 1697 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0; 1698 tl_assert( (descr & mask) == mask ); 1699 descr &= ~mask; 1700 descr |= TREE_DESCR_32_0; 1701 break; 1702 case 4: 1703 if (!(descr & TREE_DESCR_16_2)) 1704 descr = pullup_descr_to_16(descr, 4); 1705 if (!(descr & TREE_DESCR_16_3)) 1706 descr = pullup_descr_to_16(descr, 6); 1707 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2; 1708 tl_assert( (descr & mask) == mask ); 1709 descr &= ~mask; 1710 descr |= TREE_DESCR_32_1; 1711 break; 1712 default: 1713 tl_assert(0); 1714 } 1715 return descr; 1716 } 1717 1718 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) { 1719 switch (toff) { 1720 case 0: case 4: 1721 return 0 != (descr & TREE_DESCR_64); 1722 default: 1723 tl_assert(0); 1724 } 1725 } 1726 1727 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) { 1728 switch (toff) { 1729 case 0: 1730 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0)); 1731 case 2: 1732 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2)); 1733 case 4: 1734 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4)); 1735 case 6: 1736 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6)); 1737 default: 1738 tl_assert(0); 1739 } 1740 } 1741 1742 /* ------------ Cache management ------------ */ 1743 1744 static void zsm_flush_cache ( void ) 1745 { 1746 shmem__flush_and_invalidate_scache(); 1747 } 1748 1749 1750 static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) ) 1751 { 1752 tl_assert( sizeof(UWord) == sizeof(Addr) ); 1753 1754 rcinc = p_rcinc; 1755 rcdec = p_rcdec; 1756 1757 tl_assert(map_shmem == NULL); 1758 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)", 1759 HG_(free), 1760 NULL/*unboxed UWord cmp*/); 1761 tl_assert(map_shmem != NULL); 1762 shmem__invalidate_scache(); 1763 1764 /* a SecMap must contain an integral number of CacheLines */ 1765 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE)); 1766 /* also ... a CacheLine holds an integral number of trees */ 1767 tl_assert(0 == (N_LINE_ARANGE % 8)); 1768 } 1769 1770 ///////////////////////////////////////////////////////////////// 1771 ///////////////////////////////////////////////////////////////// 1772 // // 1773 // SECTION END compressed shadow memory // 1774 // // 1775 ///////////////////////////////////////////////////////////////// 1776 ///////////////////////////////////////////////////////////////// 1777 1778 1779 1780 ///////////////////////////////////////////////////////////////// 1781 ///////////////////////////////////////////////////////////////// 1782 // // 1783 // SECTION BEGIN vts primitives // 1784 // // 1785 ///////////////////////////////////////////////////////////////// 1786 ///////////////////////////////////////////////////////////////// 1787 1788 1789 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely 1790 being compact stand-ins for Thr*'s. Use these functions to map 1791 between them. */ 1792 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */ 1793 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */ 1794 1795 __attribute__((noreturn)) 1796 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs ) 1797 { 1798 if (due_to_nThrs) { 1799 const HChar* s = 1800 "\n" 1801 "Helgrind: cannot continue, run aborted: too many threads.\n" 1802 "Sorry. Helgrind can only handle programs that create\n" 1803 "%'llu or fewer threads over their entire lifetime.\n" 1804 "\n"; 1805 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024)); 1806 } else { 1807 const HChar* s = 1808 "\n" 1809 "Helgrind: cannot continue, run aborted: too many\n" 1810 "synchronisation events. Sorry. Helgrind can only handle\n" 1811 "programs which perform %'llu or fewer\n" 1812 "inter-thread synchronisation events (locks, unlocks, etc).\n" 1813 "\n"; 1814 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1); 1815 } 1816 VG_(exit)(1); 1817 /*NOTREACHED*/ 1818 tl_assert(0); /*wtf?!*/ 1819 } 1820 1821 1822 /* The dead thread (ThrID, actually) table. A thread may only be 1823 listed here if we have been notified thereof by libhb_async_exit. 1824 New entries are added at the end. The order isn't important, but 1825 the ThrID values must be unique. This table lists the identity of 1826 all threads that have ever died -- none are ever removed. We keep 1827 this table so as to be able to prune entries from VTSs. We don't 1828 actually need to keep the set of threads that have ever died -- 1829 only the threads that have died since the previous round of 1830 pruning. But it's useful for sanity check purposes to keep the 1831 entire set, so we do. */ 1832 static XArray* /* of ThrID */ verydead_thread_table = NULL; 1833 1834 /* Arbitrary total ordering on ThrIDs. */ 1835 static Int cmp__ThrID ( const void* v1, const void* v2 ) { 1836 ThrID id1 = *(const ThrID*)v1; 1837 ThrID id2 = *(const ThrID*)v2; 1838 if (id1 < id2) return -1; 1839 if (id1 > id2) return 1; 1840 return 0; 1841 } 1842 1843 static void verydead_thread_table_init ( void ) 1844 { 1845 tl_assert(!verydead_thread_table); 1846 verydead_thread_table 1847 = VG_(newXA)( HG_(zalloc), 1848 "libhb.verydead_thread_table_init.1", 1849 HG_(free), sizeof(ThrID) ); 1850 tl_assert(verydead_thread_table); 1851 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID); 1852 } 1853 1854 1855 /* A VTS contains .ts, its vector clock, and also .id, a field to hold 1856 a backlink for the caller's convenience. Since we have no idea 1857 what to set that to in the library, it always gets set to 1858 VtsID_INVALID. */ 1859 typedef 1860 struct { 1861 VtsID id; 1862 UInt usedTS; 1863 UInt sizeTS; 1864 ScalarTS ts[0]; 1865 } 1866 VTS; 1867 1868 /* Allocate a VTS capable of storing 'sizeTS' entries. */ 1869 static VTS* VTS__new ( const HChar* who, UInt sizeTS ); 1870 1871 /* Make a clone of 'vts', sizing the new array to exactly match the 1872 number of ScalarTSs present. */ 1873 static VTS* VTS__clone ( const HChar* who, VTS* vts ); 1874 1875 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new 1876 array is sized exactly to hold the number of required elements. 1877 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and 1878 must be in strictly increasing order. */ 1879 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel ); 1880 1881 /* Delete this VTS in its entirety. */ 1882 static void VTS__delete ( VTS* vts ); 1883 1884 /* Create a new singleton VTS in 'out'. Caller must have 1885 pre-allocated 'out' sufficiently big to hold the result in all 1886 possible cases. */ 1887 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym ); 1888 1889 /* Create in 'out' a VTS which is the same as 'vts' except with 1890 vts[me]++, so to speak. Caller must have pre-allocated 'out' 1891 sufficiently big to hold the result in all possible cases. */ 1892 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts ); 1893 1894 /* Create in 'out' a VTS which is the join (max) of 'a' and 1895 'b'. Caller must have pre-allocated 'out' sufficiently big to hold 1896 the result in all possible cases. */ 1897 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b ); 1898 1899 /* Compute the partial ordering relation of the two args. Although we 1900 could be completely general and return an enumeration value (EQ, 1901 LT, GT, UN), in fact we only need LEQ, and so we may as well 1902 hardwire that fact. 1903 1904 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an 1905 invald ThrID). In the latter case, the returned ThrID indicates 1906 the discovered point for which they are not. There may be more 1907 than one such point, but we only care about seeing one of them, not 1908 all of them. This rather strange convention is used because 1909 sometimes we want to know the actual index at which they first 1910 differ. */ 1911 static UInt VTS__cmpLEQ ( VTS* a, VTS* b ); 1912 1913 /* Compute an arbitrary structural (total) ordering on the two args, 1914 based on their VCs, so they can be looked up in a table, tree, etc. 1915 Returns -1, 0 or 1. */ 1916 static Word VTS__cmp_structural ( VTS* a, VTS* b ); 1917 1918 /* Debugging only. Display the given VTS in the buffer. */ 1919 static void VTS__show ( HChar* buf, Int nBuf, VTS* vts ); 1920 1921 /* Debugging only. Return vts[index], so to speak. */ 1922 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ); 1923 1924 /* Notify the VTS machinery that a thread has been declared 1925 comprehensively dead: that is, it has done an async exit AND it has 1926 been joined with. This should ensure that its local clocks (.viR 1927 and .viW) will never again change, and so all mentions of this 1928 thread from all VTSs in the system may be removed. */ 1929 static void VTS__declare_thread_very_dead ( Thr* idx ); 1930 1931 /*--------------- to do with Vector Timestamps ---------------*/ 1932 1933 static Bool is_sane_VTS ( VTS* vts ) 1934 { 1935 UWord i, n; 1936 ScalarTS *st1, *st2; 1937 if (!vts) return False; 1938 if (vts->usedTS > vts->sizeTS) return False; 1939 n = vts->usedTS; 1940 if (n == 1) { 1941 st1 = &vts->ts[0]; 1942 if (st1->tym == 0) 1943 return False; 1944 } 1945 else 1946 if (n >= 2) { 1947 for (i = 0; i < n-1; i++) { 1948 st1 = &vts->ts[i]; 1949 st2 = &vts->ts[i+1]; 1950 if (st1->thrid >= st2->thrid) 1951 return False; 1952 if (st1->tym == 0 || st2->tym == 0) 1953 return False; 1954 } 1955 } 1956 return True; 1957 } 1958 1959 1960 /* Create a new, empty VTS. 1961 */ 1962 static VTS* VTS__new ( const HChar* who, UInt sizeTS ) 1963 { 1964 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS)); 1965 tl_assert(vts->usedTS == 0); 1966 vts->sizeTS = sizeTS; 1967 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL; 1968 return vts; 1969 } 1970 1971 /* Clone this VTS. 1972 */ 1973 static VTS* VTS__clone ( const HChar* who, VTS* vts ) 1974 { 1975 tl_assert(vts); 1976 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 1977 UInt nTS = vts->usedTS; 1978 VTS* clone = VTS__new(who, nTS); 1979 clone->id = vts->id; 1980 clone->sizeTS = nTS; 1981 clone->usedTS = nTS; 1982 UInt i; 1983 for (i = 0; i < nTS; i++) { 1984 clone->ts[i] = vts->ts[i]; 1985 } 1986 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 1987 return clone; 1988 } 1989 1990 1991 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel' 1992 must be in strictly increasing order. We could obviously do this 1993 much more efficiently (in linear time) if necessary. 1994 */ 1995 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel ) 1996 { 1997 UInt i, j; 1998 tl_assert(vts); 1999 tl_assert(thridsToDel); 2000 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2001 UInt nTS = vts->usedTS; 2002 /* Figure out how many ScalarTSs will remain in the output. */ 2003 UInt nReq = nTS; 2004 for (i = 0; i < nTS; i++) { 2005 ThrID thrid = vts->ts[i].thrid; 2006 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL)) 2007 nReq--; 2008 } 2009 tl_assert(nReq <= nTS); 2010 /* Copy the ones that will remain. */ 2011 VTS* res = VTS__new(who, nReq); 2012 j = 0; 2013 for (i = 0; i < nTS; i++) { 2014 ThrID thrid = vts->ts[i].thrid; 2015 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL)) 2016 continue; 2017 res->ts[j++] = vts->ts[i]; 2018 } 2019 tl_assert(j == nReq); 2020 tl_assert(j == res->sizeTS); 2021 res->usedTS = j; 2022 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL); 2023 return res; 2024 } 2025 2026 2027 /* Delete this VTS in its entirety. 2028 */ 2029 static void VTS__delete ( VTS* vts ) 2030 { 2031 tl_assert(vts); 2032 tl_assert(vts->usedTS <= vts->sizeTS); 2033 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2034 HG_(free)(vts); 2035 } 2036 2037 2038 /* Create a new singleton VTS. 2039 */ 2040 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym ) 2041 { 2042 tl_assert(thr); 2043 tl_assert(tym >= 1); 2044 tl_assert(out); 2045 tl_assert(out->usedTS == 0); 2046 tl_assert(out->sizeTS >= 1); 2047 UInt hi = out->usedTS++; 2048 out->ts[hi].thrid = Thr__to_ThrID(thr); 2049 out->ts[hi].tym = tym; 2050 } 2051 2052 2053 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is 2054 not modified. 2055 */ 2056 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts ) 2057 { 2058 UInt i, n; 2059 ThrID me_thrid; 2060 Bool found = False; 2061 2062 stats__vts__tick++; 2063 2064 tl_assert(out); 2065 tl_assert(out->usedTS == 0); 2066 if (vts->usedTS >= ThrID_MAX_VALID) 2067 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 2068 tl_assert(out->sizeTS >= 1 + vts->usedTS); 2069 2070 tl_assert(me); 2071 me_thrid = Thr__to_ThrID(me); 2072 tl_assert(is_sane_VTS(vts)); 2073 n = vts->usedTS; 2074 2075 /* Copy all entries which precede 'me'. */ 2076 for (i = 0; i < n; i++) { 2077 ScalarTS* here = &vts->ts[i]; 2078 if (UNLIKELY(here->thrid >= me_thrid)) 2079 break; 2080 UInt hi = out->usedTS++; 2081 out->ts[hi] = *here; 2082 } 2083 2084 /* 'i' now indicates the next entry to copy, if any. 2085 There are 3 possibilities: 2086 (a) there is no next entry (we used them all up already): 2087 add (me_thrid,1) to the output, and quit 2088 (b) there is a next entry, and its thrid > me_thrid: 2089 add (me_thrid,1) to the output, then copy the remaining entries 2090 (c) there is a next entry, and its thrid == me_thrid: 2091 copy it to the output but increment its timestamp value. 2092 Then copy the remaining entries. (c) is the common case. 2093 */ 2094 tl_assert(i >= 0 && i <= n); 2095 if (i == n) { /* case (a) */ 2096 UInt hi = out->usedTS++; 2097 out->ts[hi].thrid = me_thrid; 2098 out->ts[hi].tym = 1; 2099 } else { 2100 /* cases (b) and (c) */ 2101 ScalarTS* here = &vts->ts[i]; 2102 if (me_thrid == here->thrid) { /* case (c) */ 2103 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) { 2104 /* We're hosed. We have to stop. */ 2105 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ ); 2106 } 2107 UInt hi = out->usedTS++; 2108 out->ts[hi].thrid = here->thrid; 2109 out->ts[hi].tym = here->tym + 1; 2110 i++; 2111 found = True; 2112 } else { /* case (b) */ 2113 UInt hi = out->usedTS++; 2114 out->ts[hi].thrid = me_thrid; 2115 out->ts[hi].tym = 1; 2116 } 2117 /* And copy any remaining entries. */ 2118 for (/*keepgoing*/; i < n; i++) { 2119 ScalarTS* here2 = &vts->ts[i]; 2120 UInt hi = out->usedTS++; 2121 out->ts[hi] = *here2; 2122 } 2123 } 2124 2125 tl_assert(is_sane_VTS(out)); 2126 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1)); 2127 tl_assert(out->usedTS <= out->sizeTS); 2128 } 2129 2130 2131 /* Return a new VTS constructed as the join (max) of the 2 args. 2132 Neither arg is modified. 2133 */ 2134 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b ) 2135 { 2136 UInt ia, ib, useda, usedb; 2137 ULong tyma, tymb, tymMax; 2138 ThrID thrid; 2139 UInt ncommon = 0; 2140 2141 stats__vts__join++; 2142 2143 tl_assert(a); 2144 tl_assert(b); 2145 useda = a->usedTS; 2146 usedb = b->usedTS; 2147 2148 tl_assert(out); 2149 tl_assert(out->usedTS == 0); 2150 /* overly conservative test, but doing better involves comparing 2151 the two VTSs, which we don't want to do at this point. */ 2152 if (useda + usedb >= ThrID_MAX_VALID) 2153 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 2154 tl_assert(out->sizeTS >= useda + usedb); 2155 2156 ia = ib = 0; 2157 2158 while (1) { 2159 2160 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn 2161 from a and b in order, where thrid is the next ThrID 2162 occurring in either a or b, and tyma/b are the relevant 2163 scalar timestamps, taking into account implicit zeroes. */ 2164 tl_assert(ia >= 0 && ia <= useda); 2165 tl_assert(ib >= 0 && ib <= usedb); 2166 2167 if (ia == useda && ib == usedb) { 2168 /* both empty - done */ 2169 break; 2170 2171 } else if (ia == useda && ib != usedb) { 2172 /* a empty, use up b */ 2173 ScalarTS* tmpb = &b->ts[ib]; 2174 thrid = tmpb->thrid; 2175 tyma = 0; 2176 tymb = tmpb->tym; 2177 ib++; 2178 2179 } else if (ia != useda && ib == usedb) { 2180 /* b empty, use up a */ 2181 ScalarTS* tmpa = &a->ts[ia]; 2182 thrid = tmpa->thrid; 2183 tyma = tmpa->tym; 2184 tymb = 0; 2185 ia++; 2186 2187 } else { 2188 /* both not empty; extract lowest-ThrID'd triple */ 2189 ScalarTS* tmpa = &a->ts[ia]; 2190 ScalarTS* tmpb = &b->ts[ib]; 2191 if (tmpa->thrid < tmpb->thrid) { 2192 /* a has the lowest unconsidered ThrID */ 2193 thrid = tmpa->thrid; 2194 tyma = tmpa->tym; 2195 tymb = 0; 2196 ia++; 2197 } else if (tmpa->thrid > tmpb->thrid) { 2198 /* b has the lowest unconsidered ThrID */ 2199 thrid = tmpb->thrid; 2200 tyma = 0; 2201 tymb = tmpb->tym; 2202 ib++; 2203 } else { 2204 /* they both next mention the same ThrID */ 2205 tl_assert(tmpa->thrid == tmpb->thrid); 2206 thrid = tmpa->thrid; /* == tmpb->thrid */ 2207 tyma = tmpa->tym; 2208 tymb = tmpb->tym; 2209 ia++; 2210 ib++; 2211 ncommon++; 2212 } 2213 } 2214 2215 /* having laboriously determined (thr, tyma, tymb), do something 2216 useful with it. */ 2217 tymMax = tyma > tymb ? tyma : tymb; 2218 if (tymMax > 0) { 2219 UInt hi = out->usedTS++; 2220 out->ts[hi].thrid = thrid; 2221 out->ts[hi].tym = tymMax; 2222 } 2223 2224 } 2225 2226 tl_assert(is_sane_VTS(out)); 2227 tl_assert(out->usedTS <= out->sizeTS); 2228 tl_assert(out->usedTS == useda + usedb - ncommon); 2229 } 2230 2231 2232 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if 2233 they are, or the first ThrID for which they are not (no valid ThrID 2234 has the value zero). This rather strange convention is used 2235 because sometimes we want to know the actual index at which they 2236 first differ. */ 2237 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b ) 2238 { 2239 Word ia, ib, useda, usedb; 2240 ULong tyma, tymb; 2241 2242 stats__vts__cmpLEQ++; 2243 2244 tl_assert(a); 2245 tl_assert(b); 2246 useda = a->usedTS; 2247 usedb = b->usedTS; 2248 2249 ia = ib = 0; 2250 2251 while (1) { 2252 2253 /* This logic is to enumerate doubles (tyma, tymb) drawn 2254 from a and b in order, and tyma/b are the relevant 2255 scalar timestamps, taking into account implicit zeroes. */ 2256 ThrID thrid; 2257 2258 tl_assert(ia >= 0 && ia <= useda); 2259 tl_assert(ib >= 0 && ib <= usedb); 2260 2261 if (ia == useda && ib == usedb) { 2262 /* both empty - done */ 2263 break; 2264 2265 } else if (ia == useda && ib != usedb) { 2266 /* a empty, use up b */ 2267 ScalarTS* tmpb = &b->ts[ib]; 2268 tyma = 0; 2269 tymb = tmpb->tym; 2270 thrid = tmpb->thrid; 2271 ib++; 2272 2273 } else if (ia != useda && ib == usedb) { 2274 /* b empty, use up a */ 2275 ScalarTS* tmpa = &a->ts[ia]; 2276 tyma = tmpa->tym; 2277 thrid = tmpa->thrid; 2278 tymb = 0; 2279 ia++; 2280 2281 } else { 2282 /* both not empty; extract lowest-ThrID'd triple */ 2283 ScalarTS* tmpa = &a->ts[ia]; 2284 ScalarTS* tmpb = &b->ts[ib]; 2285 if (tmpa->thrid < tmpb->thrid) { 2286 /* a has the lowest unconsidered ThrID */ 2287 tyma = tmpa->tym; 2288 thrid = tmpa->thrid; 2289 tymb = 0; 2290 ia++; 2291 } 2292 else 2293 if (tmpa->thrid > tmpb->thrid) { 2294 /* b has the lowest unconsidered ThrID */ 2295 tyma = 0; 2296 tymb = tmpb->tym; 2297 thrid = tmpb->thrid; 2298 ib++; 2299 } else { 2300 /* they both next mention the same ThrID */ 2301 tl_assert(tmpa->thrid == tmpb->thrid); 2302 tyma = tmpa->tym; 2303 thrid = tmpa->thrid; 2304 tymb = tmpb->tym; 2305 ia++; 2306 ib++; 2307 } 2308 } 2309 2310 /* having laboriously determined (tyma, tymb), do something 2311 useful with it. */ 2312 if (tyma > tymb) { 2313 /* not LEQ at this index. Quit, since the answer is 2314 determined already. */ 2315 tl_assert(thrid >= 1024); 2316 return thrid; 2317 } 2318 } 2319 2320 return 0; /* all points are LEQ => return an invalid ThrID */ 2321 } 2322 2323 2324 /* Compute an arbitrary structural (total) ordering on the two args, 2325 based on their VCs, so they can be looked up in a table, tree, etc. 2326 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be 2327 performance critical so there is some effort expended to make it sa 2328 fast as possible. 2329 */ 2330 Word VTS__cmp_structural ( VTS* a, VTS* b ) 2331 { 2332 /* We just need to generate an arbitrary total ordering based on 2333 a->ts and b->ts. Preferably do it in a way which comes across likely 2334 differences relatively quickly. */ 2335 Word i; 2336 Word useda = 0, usedb = 0; 2337 ScalarTS *ctsa = NULL, *ctsb = NULL; 2338 2339 stats__vts__cmp_structural++; 2340 2341 tl_assert(a); 2342 tl_assert(b); 2343 2344 ctsa = &a->ts[0]; useda = a->usedTS; 2345 ctsb = &b->ts[0]; usedb = b->usedTS; 2346 2347 if (LIKELY(useda == usedb)) { 2348 ScalarTS *tmpa = NULL, *tmpb = NULL; 2349 stats__vts__cmp_structural_slow++; 2350 /* Same length vectors. Find the first difference, if any, as 2351 fast as possible. */ 2352 for (i = 0; i < useda; i++) { 2353 tmpa = &ctsa[i]; 2354 tmpb = &ctsb[i]; 2355 if (LIKELY(tmpa->tym == tmpb->tym 2356 && tmpa->thrid == tmpb->thrid)) 2357 continue; 2358 else 2359 break; 2360 } 2361 if (UNLIKELY(i == useda)) { 2362 /* They're identical. */ 2363 return 0; 2364 } else { 2365 tl_assert(i >= 0 && i < useda); 2366 if (tmpa->tym < tmpb->tym) return -1; 2367 if (tmpa->tym > tmpb->tym) return 1; 2368 if (tmpa->thrid < tmpb->thrid) return -1; 2369 if (tmpa->thrid > tmpb->thrid) return 1; 2370 /* we just established them as non-identical, hence: */ 2371 } 2372 /*NOTREACHED*/ 2373 tl_assert(0); 2374 } 2375 2376 if (useda < usedb) return -1; 2377 if (useda > usedb) return 1; 2378 /*NOTREACHED*/ 2379 tl_assert(0); 2380 } 2381 2382 2383 /* Debugging only. Display the given VTS in the buffer. 2384 */ 2385 void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) 2386 { 2387 ScalarTS* st; 2388 HChar unit[64]; 2389 Word i, n; 2390 Int avail = nBuf; 2391 tl_assert(vts && vts->ts); 2392 tl_assert(nBuf > 16); 2393 buf[0] = '['; 2394 buf[1] = 0; 2395 n = vts->usedTS; 2396 for (i = 0; i < n; i++) { 2397 tl_assert(avail >= 40); 2398 st = &vts->ts[i]; 2399 VG_(memset)(unit, 0, sizeof(unit)); 2400 VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu", 2401 st->thrid, (ULong)st->tym); 2402 if (avail < VG_(strlen)(unit) + 40/*let's say*/) { 2403 VG_(strcat)(buf, " ...]"); 2404 buf[nBuf-1] = 0; 2405 return; 2406 } 2407 VG_(strcat)(buf, unit); 2408 avail -= VG_(strlen)(unit); 2409 } 2410 VG_(strcat)(buf, "]"); 2411 buf[nBuf-1] = 0; 2412 } 2413 2414 2415 /* Debugging only. Return vts[index], so to speak. 2416 */ 2417 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) 2418 { 2419 UWord i, n; 2420 ThrID idx_thrid = Thr__to_ThrID(idx); 2421 stats__vts__indexat_slow++; 2422 tl_assert(vts && vts->ts); 2423 n = vts->usedTS; 2424 for (i = 0; i < n; i++) { 2425 ScalarTS* st = &vts->ts[i]; 2426 if (st->thrid == idx_thrid) 2427 return st->tym; 2428 } 2429 return 0; 2430 } 2431 2432 2433 /* See comment on prototype above. 2434 */ 2435 static void VTS__declare_thread_very_dead ( Thr* thr ) 2436 { 2437 if (0) VG_(printf)("VTQ: tae %p\n", thr); 2438 2439 tl_assert(thr->llexit_done); 2440 tl_assert(thr->joinedwith_done); 2441 2442 ThrID nyu; 2443 nyu = Thr__to_ThrID(thr); 2444 VG_(addToXA)( verydead_thread_table, &nyu ); 2445 2446 /* We can only get here if we're assured that we'll never again 2447 need to look at this thread's ::viR or ::viW. Set them to 2448 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but 2449 mostly so that we don't wind up pruning them (as that would be 2450 nonsensical: the only interesting ScalarTS entry for a dead 2451 thread is its own index, and the pruning will remove that.). */ 2452 VtsID__rcdec(thr->viR); 2453 VtsID__rcdec(thr->viW); 2454 thr->viR = VtsID_INVALID; 2455 thr->viW = VtsID_INVALID; 2456 } 2457 2458 2459 ///////////////////////////////////////////////////////////////// 2460 ///////////////////////////////////////////////////////////////// 2461 // // 2462 // SECTION END vts primitives // 2463 // // 2464 ///////////////////////////////////////////////////////////////// 2465 ///////////////////////////////////////////////////////////////// 2466 2467 2468 2469 ///////////////////////////////////////////////////////////////// 2470 ///////////////////////////////////////////////////////////////// 2471 // // 2472 // SECTION BEGIN main library // 2473 // // 2474 ///////////////////////////////////////////////////////////////// 2475 ///////////////////////////////////////////////////////////////// 2476 2477 2478 ///////////////////////////////////////////////////////// 2479 // // 2480 // VTS set // 2481 // // 2482 ///////////////////////////////////////////////////////// 2483 2484 static WordFM* /* WordFM VTS* void */ vts_set = NULL; 2485 2486 static void vts_set_init ( void ) 2487 { 2488 tl_assert(!vts_set); 2489 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1", 2490 HG_(free), 2491 (Word(*)(UWord,UWord))VTS__cmp_structural ); 2492 tl_assert(vts_set); 2493 } 2494 2495 /* Given a VTS, look in vts_set to see if we already have a 2496 structurally identical one. If yes, return the pair (True, pointer 2497 to the existing one). If no, clone this one, add the clone to the 2498 set, and return (False, pointer to the clone). */ 2499 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand ) 2500 { 2501 UWord keyW, valW; 2502 stats__vts_set__focaa++; 2503 tl_assert(cand->id == VtsID_INVALID); 2504 /* lookup cand (by value) */ 2505 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) { 2506 /* found it */ 2507 tl_assert(valW == 0); 2508 /* if this fails, cand (by ref) was already present (!) */ 2509 tl_assert(keyW != (UWord)cand); 2510 *res = (VTS*)keyW; 2511 return True; 2512 } else { 2513 /* not present. Clone, add and return address of clone. */ 2514 stats__vts_set__focaa_a++; 2515 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand ); 2516 tl_assert(clone != cand); 2517 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ ); 2518 *res = clone; 2519 return False; 2520 } 2521 } 2522 2523 2524 ///////////////////////////////////////////////////////// 2525 // // 2526 // VTS table // 2527 // // 2528 ///////////////////////////////////////////////////////// 2529 2530 static void VtsID__invalidate_caches ( void ); /* fwds */ 2531 2532 /* A type to hold VTS table entries. Invariants: 2533 If .vts == NULL, then this entry is not in use, so: 2534 - .rc == 0 2535 - this entry is on the freelist (unfortunately, does not imply 2536 any constraints on value for .freelink) 2537 If .vts != NULL, then this entry is in use: 2538 - .vts is findable in vts_set 2539 - .vts->id == this entry number 2540 - no specific value for .rc (even 0 is OK) 2541 - this entry is not on freelist, so .freelink == VtsID_INVALID 2542 */ 2543 typedef 2544 struct { 2545 VTS* vts; /* vts, in vts_set */ 2546 UWord rc; /* reference count - enough for entire aspace */ 2547 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */ 2548 VtsID remap; /* used only during pruning */ 2549 } 2550 VtsTE; 2551 2552 /* The VTS table. */ 2553 static XArray* /* of VtsTE */ vts_tab = NULL; 2554 2555 /* An index into the VTS table, indicating the start of the list of 2556 free (available for use) entries. If the list is empty, this is 2557 VtsID_INVALID. */ 2558 static VtsID vts_tab_freelist = VtsID_INVALID; 2559 2560 /* Do a GC of vts_tab when the freelist becomes empty AND the size of 2561 vts_tab equals or exceeds this size. After GC, the value here is 2562 set appropriately so as to check for the next GC point. */ 2563 static Word vts_next_GC_at = 1000; 2564 2565 static void vts_tab_init ( void ) 2566 { 2567 vts_tab 2568 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1", 2569 HG_(free), sizeof(VtsTE) ); 2570 vts_tab_freelist 2571 = VtsID_INVALID; 2572 tl_assert(vts_tab); 2573 } 2574 2575 /* Add ii to the free list, checking that it looks out-of-use. */ 2576 static void add_to_free_list ( VtsID ii ) 2577 { 2578 VtsTE* ie = VG_(indexXA)( vts_tab, ii ); 2579 tl_assert(ie->vts == NULL); 2580 tl_assert(ie->rc == 0); 2581 tl_assert(ie->freelink == VtsID_INVALID); 2582 ie->freelink = vts_tab_freelist; 2583 vts_tab_freelist = ii; 2584 } 2585 2586 /* Get an entry from the free list. This will return VtsID_INVALID if 2587 the free list is empty. */ 2588 static VtsID get_from_free_list ( void ) 2589 { 2590 VtsID ii; 2591 VtsTE* ie; 2592 if (vts_tab_freelist == VtsID_INVALID) 2593 return VtsID_INVALID; 2594 ii = vts_tab_freelist; 2595 ie = VG_(indexXA)( vts_tab, ii ); 2596 tl_assert(ie->vts == NULL); 2597 tl_assert(ie->rc == 0); 2598 vts_tab_freelist = ie->freelink; 2599 return ii; 2600 } 2601 2602 /* Produce a new VtsID that can be used, either by getting it from 2603 the freelist, or, if that is empty, by expanding vts_tab. */ 2604 static VtsID get_new_VtsID ( void ) 2605 { 2606 VtsID ii; 2607 VtsTE te; 2608 ii = get_from_free_list(); 2609 if (ii != VtsID_INVALID) 2610 return ii; 2611 te.vts = NULL; 2612 te.rc = 0; 2613 te.freelink = VtsID_INVALID; 2614 te.remap = VtsID_INVALID; 2615 ii = (VtsID)VG_(addToXA)( vts_tab, &te ); 2616 return ii; 2617 } 2618 2619 2620 /* Indirect callback from lib_zsm. */ 2621 static void VtsID__rcinc ( VtsID ii ) 2622 { 2623 VtsTE* ie; 2624 /* VG_(indexXA) does a range check for us */ 2625 ie = VG_(indexXA)( vts_tab, ii ); 2626 tl_assert(ie->vts); /* else it's not in use */ 2627 tl_assert(ie->rc < ~0UL); /* else we can't continue */ 2628 tl_assert(ie->vts->id == ii); 2629 ie->rc++; 2630 } 2631 2632 /* Indirect callback from lib_zsm. */ 2633 static void VtsID__rcdec ( VtsID ii ) 2634 { 2635 VtsTE* ie; 2636 /* VG_(indexXA) does a range check for us */ 2637 ie = VG_(indexXA)( vts_tab, ii ); 2638 tl_assert(ie->vts); /* else it's not in use */ 2639 tl_assert(ie->rc > 0); /* else RC snafu */ 2640 tl_assert(ie->vts->id == ii); 2641 ie->rc--; 2642 } 2643 2644 2645 /* Look up 'cand' in our collection of VTSs. If present, return the 2646 VtsID for the pre-existing version. If not present, clone it, add 2647 the clone to both vts_tab and vts_set, allocate a fresh VtsID for 2648 it, and return that. */ 2649 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand ) 2650 { 2651 VTS* in_tab = NULL; 2652 tl_assert(cand->id == VtsID_INVALID); 2653 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand ); 2654 tl_assert(in_tab); 2655 if (already_have) { 2656 /* We already have a copy of 'cand'. Use that. */ 2657 VtsTE* ie; 2658 tl_assert(in_tab->id != VtsID_INVALID); 2659 ie = VG_(indexXA)( vts_tab, in_tab->id ); 2660 tl_assert(ie->vts == in_tab); 2661 return in_tab->id; 2662 } else { 2663 VtsID ii = get_new_VtsID(); 2664 VtsTE* ie = VG_(indexXA)( vts_tab, ii ); 2665 ie->vts = in_tab; 2666 ie->rc = 0; 2667 ie->freelink = VtsID_INVALID; 2668 in_tab->id = ii; 2669 return ii; 2670 } 2671 } 2672 2673 2674 static void show_vts_stats ( const HChar* caller ) 2675 { 2676 UWord nSet, nTab, nLive; 2677 ULong totrc; 2678 UWord n, i; 2679 nSet = VG_(sizeFM)( vts_set ); 2680 nTab = VG_(sizeXA)( vts_tab ); 2681 totrc = 0; 2682 nLive = 0; 2683 n = VG_(sizeXA)( vts_tab ); 2684 for (i = 0; i < n; i++) { 2685 VtsTE* ie = VG_(indexXA)( vts_tab, i ); 2686 if (ie->vts) { 2687 nLive++; 2688 totrc += (ULong)ie->rc; 2689 } else { 2690 tl_assert(ie->rc == 0); 2691 } 2692 } 2693 VG_(printf)(" show_vts_stats %s\n", caller); 2694 VG_(printf)(" vts_tab size %4lu\n", nTab); 2695 VG_(printf)(" vts_tab live %4lu\n", nLive); 2696 VG_(printf)(" vts_set size %4lu\n", nSet); 2697 VG_(printf)(" total rc %4llu\n", totrc); 2698 } 2699 2700 2701 /* --- Helpers for VtsID pruning --- */ 2702 2703 static 2704 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab, 2705 /*MOD*/XArray* /* of VtsTE */ new_tab, 2706 VtsID* ii ) 2707 { 2708 VtsTE *old_te, *new_te; 2709 VtsID old_id, new_id; 2710 /* We're relying here on VG_(indexXA)'s range checking to assert on 2711 any stupid values, in particular *ii == VtsID_INVALID. */ 2712 old_id = *ii; 2713 old_te = VG_(indexXA)( old_tab, old_id ); 2714 old_te->rc--; 2715 new_id = old_te->remap; 2716 new_te = VG_(indexXA)( new_tab, new_id ); 2717 new_te->rc++; 2718 *ii = new_id; 2719 } 2720 2721 static 2722 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab, 2723 /*MOD*/XArray* /* of VtsTE */ new_tab, 2724 SVal* s ) 2725 { 2726 SVal old_sv, new_sv; 2727 old_sv = *s; 2728 if (SVal__isC(old_sv)) { 2729 VtsID rMin, wMin; 2730 rMin = SVal__unC_Rmin(old_sv); 2731 wMin = SVal__unC_Wmin(old_sv); 2732 remap_VtsID( old_tab, new_tab, &rMin ); 2733 remap_VtsID( old_tab, new_tab, &wMin ); 2734 new_sv = SVal__mkC( rMin, wMin ); 2735 *s = new_sv; 2736 } 2737 } 2738 2739 2740 /* NOT TO BE CALLED FROM WITHIN libzsm. */ 2741 __attribute__((noinline)) 2742 static void vts_tab__do_GC ( Bool show_stats ) 2743 { 2744 UWord i, nTab, nLive, nFreed; 2745 2746 /* ---------- BEGIN VTS GC ---------- */ 2747 /* check this is actually necessary. */ 2748 tl_assert(vts_tab_freelist == VtsID_INVALID); 2749 2750 /* empty the caches for partial order checks and binary joins. We 2751 could do better and prune out the entries to be deleted, but it 2752 ain't worth the hassle. */ 2753 VtsID__invalidate_caches(); 2754 2755 /* First, make the reference counts up to date. */ 2756 zsm_flush_cache(); 2757 2758 nTab = VG_(sizeXA)( vts_tab ); 2759 2760 if (show_stats) { 2761 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab); 2762 show_vts_stats("before GC"); 2763 } 2764 2765 /* Now we can inspect the entire vts_tab. Any entries with zero 2766 .rc fields are now no longer in use and can be put back on the 2767 free list, removed from vts_set, and deleted. */ 2768 nFreed = 0; 2769 for (i = 0; i < nTab; i++) { 2770 Bool present; 2771 UWord oldK = 0, oldV = 12345; 2772 VtsTE* te = VG_(indexXA)( vts_tab, i ); 2773 if (te->vts == NULL) { 2774 tl_assert(te->rc == 0); 2775 continue; /* already on the free list (presumably) */ 2776 } 2777 if (te->rc > 0) 2778 continue; /* in use */ 2779 /* Ok, we got one we can free. */ 2780 tl_assert(te->vts->id == i); 2781 /* first, remove it from vts_set. */ 2782 present = VG_(delFromFM)( vts_set, 2783 &oldK, &oldV, (UWord)te->vts ); 2784 tl_assert(present); /* else it isn't in vts_set ?! */ 2785 tl_assert(oldV == 0); /* no info stored in vts_set val fields */ 2786 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */ 2787 /* now free the VTS itself */ 2788 VTS__delete(te->vts); 2789 te->vts = NULL; 2790 /* and finally put this entry on the free list */ 2791 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */ 2792 add_to_free_list( i ); 2793 nFreed++; 2794 } 2795 2796 /* Now figure out when the next GC should be. We'll allow the 2797 number of VTSs to double before GCing again. Except of course 2798 that since we can't (or, at least, don't) shrink vts_tab, we 2799 can't set the threshhold value smaller than it. */ 2800 tl_assert(nFreed <= nTab); 2801 nLive = nTab - nFreed; 2802 tl_assert(nLive >= 0 && nLive <= nTab); 2803 vts_next_GC_at = 2 * nLive; 2804 if (vts_next_GC_at < nTab) 2805 vts_next_GC_at = nTab; 2806 2807 if (show_stats) { 2808 show_vts_stats("after GC"); 2809 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at); 2810 } 2811 2812 if (VG_(clo_stats)) { 2813 static UInt ctr = 1; 2814 tl_assert(nTab > 0); 2815 VG_(message)(Vg_DebugMsg, 2816 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n", 2817 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab); 2818 } 2819 /* ---------- END VTS GC ---------- */ 2820 2821 /* Decide whether to do VTS pruning. We have one of three 2822 settings. */ 2823 static UInt pruning_auto_ctr = 0; /* do not make non-static */ 2824 2825 Bool do_pruning = False; 2826 switch (HG_(clo_vts_pruning)) { 2827 case 0: /* never */ 2828 break; 2829 case 1: /* auto */ 2830 do_pruning = (++pruning_auto_ctr % 5) == 0; 2831 break; 2832 case 2: /* always */ 2833 do_pruning = True; 2834 break; 2835 default: 2836 tl_assert(0); 2837 } 2838 2839 /* The rest of this routine only handles pruning, so we can 2840 quit at this point if it is not to be done. */ 2841 if (!do_pruning) 2842 return; 2843 2844 /* ---------- BEGIN VTS PRUNING ---------- */ 2845 /* We begin by sorting the backing table on its .thr values, so as 2846 to (1) check they are unique [else something has gone wrong, 2847 since it means we must have seen some Thr* exiting more than 2848 once, which can't happen], and (2) so that we can quickly look 2849 up the dead-thread entries as we work through the VTSs. */ 2850 VG_(sortXA)( verydead_thread_table ); 2851 /* Sanity check: check for unique .sts.thr values. */ 2852 UWord nBT = VG_(sizeXA)( verydead_thread_table ); 2853 if (nBT > 0) { 2854 ThrID thrid1, thrid2; 2855 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 ); 2856 for (i = 1; i < nBT; i++) { 2857 thrid1 = thrid2; 2858 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i ); 2859 tl_assert(thrid1 < thrid2); 2860 } 2861 } 2862 /* Ok, so the dead thread table has unique and in-order keys. */ 2863 2864 /* We will run through the old table, and create a new table and 2865 set, at the same time setting the .remap entries in the old 2866 table to point to the new entries. Then, visit every VtsID in 2867 the system, and replace all of them with new ones, using the 2868 .remap entries in the old table. Finally, we can delete the old 2869 table and set. */ 2870 2871 XArray* /* of VtsTE */ new_tab 2872 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab", 2873 HG_(free), sizeof(VtsTE) ); 2874 2875 /* WordFM VTS* void */ 2876 WordFM* new_set 2877 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set", 2878 HG_(free), 2879 (Word(*)(UWord,UWord))VTS__cmp_structural ); 2880 2881 /* Visit each old VTS. For each one: 2882 2883 * make a pruned version 2884 2885 * search new_set for the pruned version, yielding either 2886 Nothing (not present) or the new VtsID for it. 2887 2888 * if not present, allocate a new VtsID for it, insert (pruned 2889 VTS, new VtsID) in the tree, and set 2890 remap_table[old VtsID] = new VtsID. 2891 2892 * if present, set remap_table[old VtsID] = new VtsID, where 2893 new VtsID was determined by the tree lookup. Then free up 2894 the clone. 2895 */ 2896 2897 UWord nBeforePruning = 0, nAfterPruning = 0; 2898 UWord nSTSsBefore = 0, nSTSsAfter = 0; 2899 VtsID new_VtsID_ctr = 0; 2900 2901 for (i = 0; i < nTab; i++) { 2902 2903 /* For each old VTS .. */ 2904 VtsTE* old_te = VG_(indexXA)( vts_tab, i ); 2905 VTS* old_vts = old_te->vts; 2906 tl_assert(old_te->remap == VtsID_INVALID); 2907 2908 /* Skip it if not in use */ 2909 if (old_te->rc == 0) { 2910 tl_assert(old_vts == NULL); 2911 continue; 2912 } 2913 tl_assert(old_vts != NULL); 2914 tl_assert(old_vts->id == i); 2915 tl_assert(old_vts->ts != NULL); 2916 2917 /* It is in use. Make a pruned version. */ 2918 nBeforePruning++; 2919 nSTSsBefore += old_vts->usedTS; 2920 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts", 2921 old_vts, verydead_thread_table); 2922 tl_assert(new_vts->sizeTS == new_vts->usedTS); 2923 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS]) 2924 == 0x0ddC0ffeeBadF00dULL); 2925 2926 /* Get rid of the old VTS and the tree entry. It's a bit more 2927 complex to incrementally delete the VTSs now than to nuke 2928 them all after we're done, but the upside is that we don't 2929 wind up temporarily storing potentially two complete copies 2930 of each VTS and hence spiking memory use. */ 2931 UWord oldK = 0, oldV = 12345; 2932 Bool present = VG_(delFromFM)( vts_set, 2933 &oldK, &oldV, (UWord)old_vts ); 2934 tl_assert(present); /* else it isn't in vts_set ?! */ 2935 tl_assert(oldV == 0); /* no info stored in vts_set val fields */ 2936 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */ 2937 /* now free the VTS itself */ 2938 VTS__delete(old_vts); 2939 old_te->vts = NULL; 2940 old_vts = NULL; 2941 2942 /* NO MENTIONS of old_vts allowed beyond this point. */ 2943 2944 /* Ok, we have the pruned copy in new_vts. See if a 2945 structurally identical version is already present in new_set. 2946 If so, delete the one we just made and move on; if not, add 2947 it. */ 2948 VTS* identical_version = NULL; 2949 UWord valW = 12345; 2950 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW, 2951 (UWord)new_vts)) { 2952 // already have it 2953 tl_assert(valW == 0); 2954 tl_assert(identical_version != NULL); 2955 tl_assert(identical_version != new_vts); 2956 VTS__delete(new_vts); 2957 new_vts = identical_version; 2958 tl_assert(new_vts->id != VtsID_INVALID); 2959 } else { 2960 tl_assert(valW == 12345); 2961 tl_assert(identical_version == NULL); 2962 new_vts->id = new_VtsID_ctr++; 2963 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0); 2964 tl_assert(!b); 2965 VtsTE new_te; 2966 new_te.vts = new_vts; 2967 new_te.rc = 0; 2968 new_te.freelink = VtsID_INVALID; 2969 new_te.remap = VtsID_INVALID; 2970 Word j = VG_(addToXA)( new_tab, &new_te ); 2971 tl_assert(j <= i); 2972 tl_assert(j == new_VtsID_ctr - 1); 2973 // stats 2974 nAfterPruning++; 2975 nSTSsAfter += new_vts->usedTS; 2976 } 2977 old_te->remap = new_vts->id; 2978 2979 } /* for (i = 0; i < nTab; i++) */ 2980 2981 /* At this point, we have: 2982 * the old VTS table, with its .remap entries set, 2983 and with all .vts == NULL. 2984 * the old VTS tree should be empty, since it and the old VTSs 2985 it contained have been incrementally deleted was we worked 2986 through the old table. 2987 * the new VTS table, with all .rc == 0, all .freelink and .remap 2988 == VtsID_INVALID. 2989 * the new VTS tree. 2990 */ 2991 tl_assert( VG_(sizeFM)(vts_set) == 0 ); 2992 2993 /* Now actually apply the mapping. */ 2994 /* Visit all the VtsIDs in the entire system. Where do we expect 2995 to find them? 2996 (a) in shadow memory -- the LineZs and LineFs 2997 (b) in our collection of struct _Thrs. 2998 (c) in our collection of struct _SOs. 2999 Nowhere else, AFAICS. Not in the zsm cache, because that just 3000 got invalidated. 3001 3002 Using the .remap fields in vts_tab, map each old VtsID to a new 3003 VtsID. For each old VtsID, dec its rc; and for each new one, 3004 inc it. This sets up the new refcounts, and it also gives a 3005 cheap sanity check of the old ones: all old refcounts should be 3006 zero after this operation. 3007 */ 3008 3009 /* Do the mappings for (a) above: iterate over the Primary shadow 3010 mem map (WordFM Addr SecMap*). */ 3011 UWord secmapW = 0; 3012 VG_(initIterFM)( map_shmem ); 3013 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) { 3014 UWord j; 3015 SecMap* sm = (SecMap*)secmapW; 3016 tl_assert(sm->magic == SecMap_MAGIC); 3017 /* Deal with the LineZs */ 3018 for (i = 0; i < N_SECMAP_ZLINES; i++) { 3019 LineZ* lineZ = &sm->linesZ[i]; 3020 if (lineZ->dict[0] == SVal_INVALID) 3021 continue; /* not in use -- data is in F rep instead */ 3022 for (j = 0; j < 4; j++) 3023 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]); 3024 } 3025 /* Deal with the LineFs */ 3026 for (i = 0; i < sm->linesF_size; i++) { 3027 LineF* lineF = &sm->linesF[i]; 3028 if (!lineF->inUse) 3029 continue; 3030 for (j = 0; j < N_LINE_ARANGE; j++) 3031 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]); 3032 } 3033 } 3034 VG_(doneIterFM)( map_shmem ); 3035 3036 /* Do the mappings for (b) above: visit our collection of struct 3037 _Thrs. */ 3038 Thread* hgthread = get_admin_threads(); 3039 tl_assert(hgthread); 3040 while (hgthread) { 3041 Thr* hbthr = hgthread->hbthr; 3042 tl_assert(hbthr); 3043 /* Threads that are listed in the prunable set have their viR 3044 and viW set to VtsID_INVALID, so we can't mess with them. */ 3045 if (hbthr->llexit_done && hbthr->joinedwith_done) { 3046 tl_assert(hbthr->viR == VtsID_INVALID); 3047 tl_assert(hbthr->viW == VtsID_INVALID); 3048 hgthread = hgthread->admin; 3049 continue; 3050 } 3051 remap_VtsID( vts_tab, new_tab, &hbthr->viR ); 3052 remap_VtsID( vts_tab, new_tab, &hbthr->viW ); 3053 hgthread = hgthread->admin; 3054 } 3055 3056 /* Do the mappings for (c) above: visit the struct _SOs. */ 3057 SO* so = admin_SO; 3058 while (so) { 3059 if (so->viR != VtsID_INVALID) 3060 remap_VtsID( vts_tab, new_tab, &so->viR ); 3061 if (so->viW != VtsID_INVALID) 3062 remap_VtsID( vts_tab, new_tab, &so->viW ); 3063 so = so->admin_next; 3064 } 3065 3066 /* So, we're nearly done (with this incredibly complex operation). 3067 Check the refcounts for the old VtsIDs all fell to zero, as 3068 expected. Any failure is serious. */ 3069 for (i = 0; i < nTab; i++) { 3070 VtsTE* te = VG_(indexXA)( vts_tab, i ); 3071 tl_assert(te->vts == NULL); 3072 /* This is the assert proper. Note we're also asserting 3073 zeroness for old entries which are unmapped (hence have 3074 .remap == VtsID_INVALID). That's OK. */ 3075 tl_assert(te->rc == 0); 3076 } 3077 3078 /* Install the new table and set. */ 3079 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/); 3080 vts_set = new_set; 3081 VG_(deleteXA)( vts_tab ); 3082 vts_tab = new_tab; 3083 3084 /* The freelist of vts_tab entries is empty now, because we've 3085 compacted all of the live entries at the low end of the 3086 table. */ 3087 vts_tab_freelist = VtsID_INVALID; 3088 3089 /* Sanity check vts_set and vts_tab. */ 3090 3091 /* Because all the live entries got slid down to the bottom of vts_tab: */ 3092 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set )); 3093 3094 /* Assert that the vts_tab and vts_set entries point at each other 3095 in the required way */ 3096 UWord wordK = 0, wordV = 0; 3097 VG_(initIterFM)( vts_set ); 3098 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) { 3099 tl_assert(wordK != 0); 3100 tl_assert(wordV == 0); 3101 VTS* vts = (VTS*)wordK; 3102 tl_assert(vts->id != VtsID_INVALID); 3103 VtsTE* te = VG_(indexXA)( vts_tab, vts->id ); 3104 tl_assert(te->vts == vts); 3105 } 3106 VG_(doneIterFM)( vts_set ); 3107 3108 /* Also iterate over the table, and check each entry is 3109 plausible. */ 3110 nTab = VG_(sizeXA)( vts_tab ); 3111 for (i = 0; i < nTab; i++) { 3112 VtsTE* te = VG_(indexXA)( vts_tab, i ); 3113 tl_assert(te->vts); 3114 tl_assert(te->vts->id == i); 3115 tl_assert(te->rc > 0); /* 'cos we just GC'd */ 3116 tl_assert(te->freelink == VtsID_INVALID); /* in use */ 3117 tl_assert(te->remap == VtsID_INVALID); /* not relevant */ 3118 } 3119 3120 /* And we're done. Bwahahaha. Ha. Ha. Ha. */ 3121 if (VG_(clo_stats)) { 3122 static UInt ctr = 1; 3123 tl_assert(nTab > 0); 3124 VG_(message)( 3125 Vg_DebugMsg, 3126 "libhb: VTS PR: #%u before %lu (avg sz %lu) " 3127 "after %lu (avg sz %lu)\n", 3128 ctr++, 3129 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1), 3130 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1) 3131 ); 3132 } 3133 if (0) 3134 VG_(printf)("VTQ: before pruning %lu (avg sz %lu), " 3135 "after pruning %lu (avg sz %lu)\n", 3136 nBeforePruning, nSTSsBefore / nBeforePruning, 3137 nAfterPruning, nSTSsAfter / nAfterPruning); 3138 /* ---------- END VTS PRUNING ---------- */ 3139 } 3140 3141 3142 ///////////////////////////////////////////////////////// 3143 // // 3144 // Vts IDs // 3145 // // 3146 ///////////////////////////////////////////////////////// 3147 3148 ////////////////////////// 3149 /* A temporary, max-sized VTS which is used as a temporary (the first 3150 argument) in VTS__singleton, VTS__tick and VTS__join operations. */ 3151 static VTS* temp_max_sized_VTS = NULL; 3152 3153 ////////////////////////// 3154 static ULong stats__cmpLEQ_queries = 0; 3155 static ULong stats__cmpLEQ_misses = 0; 3156 static ULong stats__join2_queries = 0; 3157 static ULong stats__join2_misses = 0; 3158 3159 static inline UInt ROL32 ( UInt w, Int n ) { 3160 w = (w << n) | (w >> (32-n)); 3161 return w; 3162 } 3163 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) { 3164 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13); 3165 return hash % nTab; 3166 } 3167 3168 #define N_CMPLEQ_CACHE 1023 3169 static 3170 struct { VtsID vi1; VtsID vi2; Bool leq; } 3171 cmpLEQ_cache[N_CMPLEQ_CACHE]; 3172 3173 #define N_JOIN2_CACHE 1023 3174 static 3175 struct { VtsID vi1; VtsID vi2; VtsID res; } 3176 join2_cache[N_JOIN2_CACHE]; 3177 3178 static void VtsID__invalidate_caches ( void ) { 3179 Int i; 3180 for (i = 0; i < N_CMPLEQ_CACHE; i++) { 3181 cmpLEQ_cache[i].vi1 = VtsID_INVALID; 3182 cmpLEQ_cache[i].vi2 = VtsID_INVALID; 3183 cmpLEQ_cache[i].leq = False; 3184 } 3185 for (i = 0; i < N_JOIN2_CACHE; i++) { 3186 join2_cache[i].vi1 = VtsID_INVALID; 3187 join2_cache[i].vi2 = VtsID_INVALID; 3188 join2_cache[i].res = VtsID_INVALID; 3189 } 3190 } 3191 ////////////////////////// 3192 3193 //static Bool VtsID__is_valid ( VtsID vi ) { 3194 // VtsTE* ve; 3195 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab )) 3196 // return False; 3197 // ve = VG_(indexXA)( vts_tab, vi ); 3198 // if (!ve->vts) 3199 // return False; 3200 // tl_assert(ve->vts->id == vi); 3201 // return True; 3202 //} 3203 3204 static VTS* VtsID__to_VTS ( VtsID vi ) { 3205 VtsTE* te = VG_(indexXA)( vts_tab, vi ); 3206 tl_assert(te->vts); 3207 return te->vts; 3208 } 3209 3210 static void VtsID__pp ( VtsID vi ) { 3211 HChar buf[100]; 3212 VTS* vts = VtsID__to_VTS(vi); 3213 VTS__show( buf, sizeof(buf)-1, vts ); 3214 buf[sizeof(buf)-1] = 0; 3215 VG_(printf)("%s", buf); 3216 } 3217 3218 /* compute partial ordering relation of vi1 and vi2. */ 3219 __attribute__((noinline)) 3220 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) { 3221 UInt hash; 3222 Bool leq; 3223 VTS *v1, *v2; 3224 //if (vi1 == vi2) return True; 3225 tl_assert(vi1 != vi2); 3226 ////++ 3227 stats__cmpLEQ_queries++; 3228 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE); 3229 if (cmpLEQ_cache[hash].vi1 == vi1 3230 && cmpLEQ_cache[hash].vi2 == vi2) 3231 return cmpLEQ_cache[hash].leq; 3232 stats__cmpLEQ_misses++; 3233 ////-- 3234 v1 = VtsID__to_VTS(vi1); 3235 v2 = VtsID__to_VTS(vi2); 3236 leq = VTS__cmpLEQ( v1, v2 ) == 0; 3237 ////++ 3238 cmpLEQ_cache[hash].vi1 = vi1; 3239 cmpLEQ_cache[hash].vi2 = vi2; 3240 cmpLEQ_cache[hash].leq = leq; 3241 ////-- 3242 return leq; 3243 } 3244 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) { 3245 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2); 3246 } 3247 3248 /* compute binary join */ 3249 __attribute__((noinline)) 3250 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) { 3251 UInt hash; 3252 VtsID res; 3253 VTS *vts1, *vts2; 3254 //if (vi1 == vi2) return vi1; 3255 tl_assert(vi1 != vi2); 3256 ////++ 3257 stats__join2_queries++; 3258 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE); 3259 if (join2_cache[hash].vi1 == vi1 3260 && join2_cache[hash].vi2 == vi2) 3261 return join2_cache[hash].res; 3262 stats__join2_misses++; 3263 ////-- 3264 vts1 = VtsID__to_VTS(vi1); 3265 vts2 = VtsID__to_VTS(vi2); 3266 temp_max_sized_VTS->usedTS = 0; 3267 VTS__join(temp_max_sized_VTS, vts1,vts2); 3268 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3269 ////++ 3270 join2_cache[hash].vi1 = vi1; 3271 join2_cache[hash].vi2 = vi2; 3272 join2_cache[hash].res = res; 3273 ////-- 3274 return res; 3275 } 3276 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) { 3277 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2); 3278 } 3279 3280 /* create a singleton VTS, namely [thr:1] */ 3281 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) { 3282 temp_max_sized_VTS->usedTS = 0; 3283 VTS__singleton(temp_max_sized_VTS, thr,tym); 3284 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3285 } 3286 3287 /* tick operation, creates value 1 if specified index is absent */ 3288 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) { 3289 VTS* vts = VtsID__to_VTS(vi); 3290 temp_max_sized_VTS->usedTS = 0; 3291 VTS__tick(temp_max_sized_VTS, idx,vts); 3292 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3293 } 3294 3295 /* index into a VTS (only for assertions) */ 3296 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) { 3297 VTS* vts = VtsID__to_VTS(vi); 3298 return VTS__indexAt_SLOW( vts, idx ); 3299 } 3300 3301 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or 3302 any, really) element in vi1 which is pointwise greater-than the 3303 corresponding element in vi2. If no such element exists, return 3304 NULL. This needs to be fairly quick since it is called every time 3305 a race is detected. */ 3306 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 ) 3307 { 3308 VTS *vts1, *vts2; 3309 Thr* diffthr; 3310 ThrID diffthrid; 3311 tl_assert(vi1 != vi2); 3312 vts1 = VtsID__to_VTS(vi1); 3313 vts2 = VtsID__to_VTS(vi2); 3314 tl_assert(vts1 != vts2); 3315 diffthrid = VTS__cmpLEQ(vts1, vts2); 3316 diffthr = Thr__from_ThrID(diffthrid); 3317 tl_assert(diffthr); /* else they are LEQ ! */ 3318 return diffthr; 3319 } 3320 3321 3322 ///////////////////////////////////////////////////////// 3323 // // 3324 // Filters // 3325 // // 3326 ///////////////////////////////////////////////////////// 3327 3328 /* Forget everything we know -- clear the filter and let everything 3329 through. This needs to be as fast as possible, since it is called 3330 every time the running thread changes, and every time a thread's 3331 vector clocks change, which can be quite frequent. The obvious 3332 fast way to do this is simply to stuff in tags which we know are 3333 not going to match anything, since they're not aligned to the start 3334 of a line. */ 3335 static void Filter__clear ( Filter* fi, const HChar* who ) 3336 { 3337 UWord i; 3338 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who); 3339 for (i = 0; i < FI_NUM_LINES; i += 8) { 3340 fi->tags[i+0] = 1; /* impossible value -- cannot match */ 3341 fi->tags[i+1] = 1; 3342 fi->tags[i+2] = 1; 3343 fi->tags[i+3] = 1; 3344 fi->tags[i+4] = 1; 3345 fi->tags[i+5] = 1; 3346 fi->tags[i+6] = 1; 3347 fi->tags[i+7] = 1; 3348 } 3349 tl_assert(i == FI_NUM_LINES); 3350 } 3351 3352 /* Clearing an arbitrary range in the filter. Unfortunately 3353 we have to do this due to core-supplied new/die-mem events. */ 3354 3355 static void Filter__clear_1byte ( Filter* fi, Addr a ) 3356 { 3357 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3358 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3359 FiLine* line = &fi->lines[lineno]; 3360 UWord loff = (a - atag) / 8; 3361 UShort mask = 0x3 << (2 * (a & 7)); 3362 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */ 3363 if (LIKELY( fi->tags[lineno] == atag )) { 3364 /* hit. clear the bits. */ 3365 UShort u16 = line->u16s[loff]; 3366 line->u16s[loff] = u16 & ~mask; /* clear them */ 3367 } else { 3368 /* miss. The filter doesn't hold this address, so ignore. */ 3369 } 3370 } 3371 3372 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a ) 3373 { 3374 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3375 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3376 FiLine* line = &fi->lines[lineno]; 3377 UWord loff = (a - atag) / 8; 3378 if (LIKELY( fi->tags[lineno] == atag )) { 3379 line->u16s[loff] = 0; 3380 } else { 3381 /* miss. The filter doesn't hold this address, so ignore. */ 3382 } 3383 } 3384 3385 static void Filter__clear_range ( Filter* fi, Addr a, UWord len ) 3386 { 3387 //VG_(printf)("%lu ", len); 3388 /* slowly do part preceding 8-alignment */ 3389 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) { 3390 Filter__clear_1byte( fi, a ); 3391 a++; 3392 len--; 3393 } 3394 /* vector loop */ 3395 while (len >= 8) { 3396 Filter__clear_8bytes_aligned( fi, a ); 3397 a += 8; 3398 len -= 8; 3399 } 3400 /* slowly do tail */ 3401 while (UNLIKELY(len > 0)) { 3402 Filter__clear_1byte( fi, a ); 3403 a++; 3404 len--; 3405 } 3406 } 3407 3408 3409 /* ------ Read handlers for the filter. ------ */ 3410 3411 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a ) 3412 { 3413 if (UNLIKELY( !VG_IS_8_ALIGNED(a) )) 3414 return False; 3415 { 3416 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3417 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3418 FiLine* line = &fi->lines[lineno]; 3419 UWord loff = (a - atag) / 8; 3420 UShort mask = 0xAAAA; 3421 if (LIKELY( fi->tags[lineno] == atag )) { 3422 /* hit. check line and update. */ 3423 UShort u16 = line->u16s[loff]; 3424 Bool ok = (u16 & mask) == mask; /* all R bits set? */ 3425 line->u16s[loff] = u16 | mask; /* set them */ 3426 return ok; 3427 } else { 3428 /* miss. nuke existing line and re-use it. */ 3429 UWord i; 3430 fi->tags[lineno] = atag; 3431 for (i = 0; i < FI_LINE_SZB / 8; i++) 3432 line->u16s[i] = 0; 3433 line->u16s[loff] = mask; 3434 return False; 3435 } 3436 } 3437 } 3438 3439 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a ) 3440 { 3441 if (UNLIKELY( !VG_IS_4_ALIGNED(a) )) 3442 return False; 3443 { 3444 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3445 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3446 FiLine* line = &fi->lines[lineno]; 3447 UWord loff = (a - atag) / 8; 3448 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */ 3449 if (LIKELY( fi->tags[lineno] == atag )) { 3450 /* hit. check line and update. */ 3451 UShort u16 = line->u16s[loff]; 3452 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */ 3453 line->u16s[loff] = u16 | mask; /* set them */ 3454 return ok; 3455 } else { 3456 /* miss. nuke existing line and re-use it. */ 3457 UWord i; 3458 fi->tags[lineno] = atag; 3459 for (i = 0; i < FI_LINE_SZB / 8; i++) 3460 line->u16s[i] = 0; 3461 line->u16s[loff] = mask; 3462 return False; 3463 } 3464 } 3465 } 3466 3467 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a ) 3468 { 3469 if (UNLIKELY( !VG_IS_2_ALIGNED(a) )) 3470 return False; 3471 { 3472 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3473 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3474 FiLine* line = &fi->lines[lineno]; 3475 UWord loff = (a - atag) / 8; 3476 UShort mask = 0xA << (2 * (a & 6)); 3477 /* mask is A000, 0A00, 00A0 or 000A */ 3478 if (LIKELY( fi->tags[lineno] == atag )) { 3479 /* hit. check line and update. */ 3480 UShort u16 = line->u16s[loff]; 3481 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */ 3482 line->u16s[loff] = u16 | mask; /* set them */ 3483 return ok; 3484 } else { 3485 /* miss. nuke existing line and re-use it. */ 3486 UWord i; 3487 fi->tags[lineno] = atag; 3488 for (i = 0; i < FI_LINE_SZB / 8; i++) 3489 line->u16s[i] = 0; 3490 line->u16s[loff] = mask; 3491 return False; 3492 } 3493 } 3494 } 3495 3496 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a ) 3497 { 3498 { 3499 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3500 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3501 FiLine* line = &fi->lines[lineno]; 3502 UWord loff = (a - atag) / 8; 3503 UShort mask = 0x2 << (2 * (a & 7)); 3504 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */ 3505 if (LIKELY( fi->tags[lineno] == atag )) { 3506 /* hit. check line and update. */ 3507 UShort u16 = line->u16s[loff]; 3508 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */ 3509 line->u16s[loff] = u16 | mask; /* set them */ 3510 return ok; 3511 } else { 3512 /* miss. nuke existing line and re-use it. */ 3513 UWord i; 3514 fi->tags[lineno] = atag; 3515 for (i = 0; i < FI_LINE_SZB / 8; i++) 3516 line->u16s[i] = 0; 3517 line->u16s[loff] = mask; 3518 return False; 3519 } 3520 } 3521 } 3522 3523 3524 /* ------ Write handlers for the filter. ------ */ 3525 3526 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a ) 3527 { 3528 if (UNLIKELY( !VG_IS_8_ALIGNED(a) )) 3529 return False; 3530 { 3531 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3532 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3533 FiLine* line = &fi->lines[lineno]; 3534 UWord loff = (a - atag) / 8; 3535 UShort mask = 0xFFFF; 3536 if (LIKELY( fi->tags[lineno] == atag )) { 3537 /* hit. check line and update. */ 3538 UShort u16 = line->u16s[loff]; 3539 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */ 3540 line->u16s[loff] = u16 | mask; /* set them */ 3541 return ok; 3542 } else { 3543 /* miss. nuke existing line and re-use it. */ 3544 UWord i; 3545 fi->tags[lineno] = atag; 3546 for (i = 0; i < FI_LINE_SZB / 8; i++) 3547 line->u16s[i] = 0; 3548 line->u16s[loff] = mask; 3549 return False; 3550 } 3551 } 3552 } 3553 3554 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a ) 3555 { 3556 if (UNLIKELY( !VG_IS_4_ALIGNED(a) )) 3557 return False; 3558 { 3559 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3560 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3561 FiLine* line = &fi->lines[lineno]; 3562 UWord loff = (a - atag) / 8; 3563 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */ 3564 if (LIKELY( fi->tags[lineno] == atag )) { 3565 /* hit. check line and update. */ 3566 UShort u16 = line->u16s[loff]; 3567 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */ 3568 line->u16s[loff] = u16 | mask; /* set them */ 3569 return ok; 3570 } else { 3571 /* miss. nuke existing line and re-use it. */ 3572 UWord i; 3573 fi->tags[lineno] = atag; 3574 for (i = 0; i < FI_LINE_SZB / 8; i++) 3575 line->u16s[i] = 0; 3576 line->u16s[loff] = mask; 3577 return False; 3578 } 3579 } 3580 } 3581 3582 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a ) 3583 { 3584 if (UNLIKELY( !VG_IS_2_ALIGNED(a) )) 3585 return False; 3586 { 3587 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3588 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3589 FiLine* line = &fi->lines[lineno]; 3590 UWord loff = (a - atag) / 8; 3591 UShort mask = 0xF << (2 * (a & 6)); 3592 /* mask is F000, 0F00, 00F0 or 000F */ 3593 if (LIKELY( fi->tags[lineno] == atag )) { 3594 /* hit. check line and update. */ 3595 UShort u16 = line->u16s[loff]; 3596 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */ 3597 line->u16s[loff] = u16 | mask; /* set them */ 3598 return ok; 3599 } else { 3600 /* miss. nuke existing line and re-use it. */ 3601 UWord i; 3602 fi->tags[lineno] = atag; 3603 for (i = 0; i < FI_LINE_SZB / 8; i++) 3604 line->u16s[i] = 0; 3605 line->u16s[loff] = mask; 3606 return False; 3607 } 3608 } 3609 } 3610 3611 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a ) 3612 { 3613 { 3614 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3615 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3616 FiLine* line = &fi->lines[lineno]; 3617 UWord loff = (a - atag) / 8; 3618 UShort mask = 0x3 << (2 * (a & 7)); 3619 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */ 3620 if (LIKELY( fi->tags[lineno] == atag )) { 3621 /* hit. check line and update. */ 3622 UShort u16 = line->u16s[loff]; 3623 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */ 3624 line->u16s[loff] = u16 | mask; /* set them */ 3625 return ok; 3626 } else { 3627 /* miss. nuke existing line and re-use it. */ 3628 UWord i; 3629 fi->tags[lineno] = atag; 3630 for (i = 0; i < FI_LINE_SZB / 8; i++) 3631 line->u16s[i] = 0; 3632 line->u16s[loff] = mask; 3633 return False; 3634 } 3635 } 3636 } 3637 3638 3639 ///////////////////////////////////////////////////////// 3640 // // 3641 // Threads // 3642 // // 3643 ///////////////////////////////////////////////////////// 3644 3645 /* Maps ThrID values to their Thr*s (which contain ThrID values that 3646 should point back to the relevant slot in the array. Lowest 3647 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */ 3648 static XArray* /* of Thr* */ thrid_to_thr_map = NULL; 3649 3650 /* And a counter to dole out ThrID values. For rationale/background, 3651 see comments on definition of ScalarTS (far) above. */ 3652 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */ 3653 3654 static ThrID Thr__to_ThrID ( Thr* thr ) { 3655 return thr->thrid; 3656 } 3657 static Thr* Thr__from_ThrID ( UInt thrid ) { 3658 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 ); 3659 tl_assert(thr->thrid == thrid); 3660 return thr; 3661 } 3662 3663 static Thr* Thr__new ( void ) 3664 { 3665 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) ); 3666 thr->viR = VtsID_INVALID; 3667 thr->viW = VtsID_INVALID; 3668 thr->llexit_done = False; 3669 thr->joinedwith_done = False; 3670 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) ); 3671 if (HG_(clo_history_level) == 1) 3672 thr->local_Kws_n_stacks 3673 = VG_(newXA)( HG_(zalloc), 3674 "libhb.Thr__new.3 (local_Kws_and_stacks)", 3675 HG_(free), sizeof(ULong_n_EC) ); 3676 3677 /* Add this Thr* <-> ThrID binding to the mapping, and 3678 cross-check */ 3679 if (!thrid_to_thr_map) { 3680 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4", 3681 HG_(free), sizeof(Thr*) ); 3682 tl_assert(thrid_to_thr_map); 3683 } 3684 3685 if (thrid_counter >= ThrID_MAX_VALID) { 3686 /* We're hosed. We have to stop. */ 3687 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 3688 } 3689 3690 thr->thrid = thrid_counter++; 3691 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr ); 3692 tl_assert(ix + 1024 == thr->thrid); 3693 3694 return thr; 3695 } 3696 3697 static void note_local_Kw_n_stack_for ( Thr* thr ) 3698 { 3699 Word nPresent; 3700 ULong_n_EC pair; 3701 tl_assert(thr); 3702 3703 // We only collect this info at history level 1 (approx) 3704 if (HG_(clo_history_level) != 1) 3705 return; 3706 3707 /* This is the scalar Kw for thr. */ 3708 pair.ull = VtsID__indexAt( thr->viW, thr ); 3709 pair.ec = main_get_EC( thr ); 3710 tl_assert(pair.ec); 3711 tl_assert(thr->local_Kws_n_stacks); 3712 3713 /* check that we're not adding duplicates */ 3714 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks ); 3715 3716 /* Throw away old stacks, if necessary. We can't accumulate stuff 3717 indefinitely. */ 3718 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) { 3719 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 ); 3720 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks ); 3721 if (0) 3722 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n", 3723 thr, pair.ull, pair.ec ); 3724 } 3725 3726 if (nPresent > 0) { 3727 ULong_n_EC* prevPair 3728 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 ); 3729 tl_assert( prevPair->ull <= pair.ull ); 3730 } 3731 3732 if (nPresent == 0) 3733 pair.ec = NULL; 3734 3735 VG_(addToXA)( thr->local_Kws_n_stacks, &pair ); 3736 3737 if (0) 3738 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n", 3739 thr, pair.ull, pair.ec ); 3740 if (0) 3741 VG_(pp_ExeContext)(pair.ec); 3742 } 3743 3744 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1, 3745 const ULong_n_EC* pair2 ) 3746 { 3747 if (pair1->ull < pair2->ull) return -1; 3748 if (pair1->ull > pair2->ull) return 1; 3749 return 0; 3750 } 3751 3752 3753 ///////////////////////////////////////////////////////// 3754 // // 3755 // Shadow Values // 3756 // // 3757 ///////////////////////////////////////////////////////// 3758 3759 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by 3760 // hb_zsm.h. We have to do everything else here. 3761 3762 /* SVal is 64 bit unsigned int. 3763 3764 <---------30---------> <---------30---------> 3765 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin) 3766 10 X--------------------X XX X--------------------X A: SVal_NOACCESS 3767 11 0--------------------0 00 0--------------------0 A: SVal_INVALID 3768 3769 */ 3770 #define SVAL_TAGMASK (3ULL << 62) 3771 3772 static inline Bool SVal__isC ( SVal s ) { 3773 return (0ULL << 62) == (s & SVAL_TAGMASK); 3774 } 3775 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) { 3776 //tl_assert(VtsID__is_valid(rmini)); 3777 //tl_assert(VtsID__is_valid(wmini)); 3778 return (((ULong)rmini) << 32) | ((ULong)wmini); 3779 } 3780 static inline VtsID SVal__unC_Rmin ( SVal s ) { 3781 tl_assert(SVal__isC(s)); 3782 return (VtsID)(s >> 32); 3783 } 3784 static inline VtsID SVal__unC_Wmin ( SVal s ) { 3785 tl_assert(SVal__isC(s)); 3786 return (VtsID)(s & 0xFFFFFFFFULL); 3787 } 3788 3789 static inline Bool SVal__isA ( SVal s ) { 3790 return (2ULL << 62) == (s & SVAL_TAGMASK); 3791 } 3792 static inline SVal SVal__mkA ( void ) { 3793 return 2ULL << 62; 3794 } 3795 3796 /* Direct callback from lib_zsm. */ 3797 static void SVal__rcinc ( SVal s ) { 3798 if (SVal__isC(s)) { 3799 VtsID__rcinc( SVal__unC_Rmin(s) ); 3800 VtsID__rcinc( SVal__unC_Wmin(s) ); 3801 } 3802 } 3803 3804 /* Direct callback from lib_zsm. */ 3805 static void SVal__rcdec ( SVal s ) { 3806 if (SVal__isC(s)) { 3807 VtsID__rcdec( SVal__unC_Rmin(s) ); 3808 VtsID__rcdec( SVal__unC_Wmin(s) ); 3809 } 3810 } 3811 3812 3813 ///////////////////////////////////////////////////////// 3814 // // 3815 // Change-event map2 // 3816 // // 3817 ///////////////////////////////////////////////////////// 3818 3819 #define EVENT_MAP_GC_DISCARD_FRACTION 0.5 3820 3821 /* This is in two parts: 3822 3823 1. A hash table of RCECs. This is a set of reference-counted stack 3824 traces. When the reference count of a stack trace becomes zero, 3825 it is removed from the set and freed up. The intent is to have 3826 a set of stack traces which can be referred to from (2), but to 3827 only represent each one once. The set is indexed/searched by 3828 ordering on the stack trace vectors. 3829 3830 2. A SparseWA of OldRefs. These store information about each old 3831 ref that we need to record. It is indexed by address of the 3832 location for which the information is recorded. For LRU 3833 purposes, each OldRef also contains a generation number, 3834 indicating when it was most recently accessed. 3835 3836 The important part of an OldRef is, however, its accs[] array. 3837 This is an array of N_OLDREF_ACCS which binds (thread, R/W, 3838 size) triples to RCECs. This allows us to collect the last 3839 access-traceback by up to N_OLDREF_ACCS different triples for 3840 this location. The accs[] array is a MTF-array. If a binding 3841 falls off the end, that's too bad -- we will lose info about 3842 that triple's access to this location. 3843 3844 When the SparseWA becomes too big, we can throw away the OldRefs 3845 whose generation numbers are below some threshold; hence doing 3846 approximate LRU discarding. For each discarded OldRef we must 3847 of course decrement the reference count on the all RCECs it 3848 refers to, in order that entries from (1) eventually get 3849 discarded too. 3850 3851 A major improvement in reliability of this mechanism would be to 3852 have a dynamically sized OldRef.accs[] array, so no entries ever 3853 fall off the end. In investigations (Dec 08) it appears that a 3854 major cause for the non-availability of conflicting-access traces 3855 in race reports is caused by the fixed size of this array. I 3856 suspect for most OldRefs, only a few entries are used, but for a 3857 minority of cases there is an overflow, leading to info lossage. 3858 Investigations also suggest this is very workload and scheduling 3859 sensitive. Therefore a dynamic sizing would be better. 3860 3861 However, dynamic sizing would defeat the use of a PoolAllocator 3862 for OldRef structures. And that's important for performance. So 3863 it's not straightforward to do. 3864 */ 3865 3866 3867 static UWord stats__ctxt_rcdec1 = 0; 3868 static UWord stats__ctxt_rcdec2 = 0; 3869 static UWord stats__ctxt_rcdec3 = 0; 3870 static UWord stats__ctxt_rcdec_calls = 0; 3871 static UWord stats__ctxt_rcdec_discards = 0; 3872 static UWord stats__ctxt_rcdec1_eq = 0; 3873 3874 static UWord stats__ctxt_tab_curr = 0; 3875 static UWord stats__ctxt_tab_max = 0; 3876 3877 static UWord stats__ctxt_tab_qs = 0; 3878 static UWord stats__ctxt_tab_cmps = 0; 3879 3880 3881 /////////////////////////////////////////////////////// 3882 //// Part (1): A hash table of RCECs 3883 /// 3884 3885 #define N_FRAMES 8 3886 3887 // (UInt) `echo "Reference Counted Execution Context" | md5sum` 3888 #define RCEC_MAGIC 0xab88abb2UL 3889 3890 //#define N_RCEC_TAB 98317 /* prime */ 3891 #define N_RCEC_TAB 196613 /* prime */ 3892 3893 typedef 3894 struct _RCEC { 3895 UWord magic; /* sanity check only */ 3896 struct _RCEC* next; 3897 UWord rc; 3898 UWord rcX; /* used for crosschecking */ 3899 UWord frames_hash; /* hash of all the frames */ 3900 UWord frames[N_FRAMES]; 3901 } 3902 RCEC; 3903 3904 static RCEC** contextTab = NULL; /* hash table of RCEC*s */ 3905 3906 3907 /* Gives an arbitrary total order on RCEC .frames fields */ 3908 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) { 3909 Word i; 3910 tl_assert(ec1 && ec1->magic == RCEC_MAGIC); 3911 tl_assert(ec2 && ec2->magic == RCEC_MAGIC); 3912 if (ec1->frames_hash < ec2->frames_hash) return -1; 3913 if (ec1->frames_hash > ec2->frames_hash) return 1; 3914 for (i = 0; i < N_FRAMES; i++) { 3915 if (ec1->frames[i] < ec2->frames[i]) return -1; 3916 if (ec1->frames[i] > ec2->frames[i]) return 1; 3917 } 3918 return 0; 3919 } 3920 3921 3922 /* Dec the ref of this RCEC. */ 3923 static void ctxt__rcdec ( RCEC* ec ) 3924 { 3925 stats__ctxt_rcdec_calls++; 3926 tl_assert(ec && ec->magic == RCEC_MAGIC); 3927 tl_assert(ec->rc > 0); 3928 ec->rc--; 3929 } 3930 3931 static void ctxt__rcinc ( RCEC* ec ) 3932 { 3933 tl_assert(ec && ec->magic == RCEC_MAGIC); 3934 ec->rc++; 3935 } 3936 3937 3938 //////////// BEGIN RCEC pool allocator 3939 static PoolAlloc* rcec_pool_allocator; 3940 3941 static RCEC* alloc_RCEC ( void ) { 3942 return VG_(allocEltPA) ( rcec_pool_allocator ); 3943 } 3944 3945 static void free_RCEC ( RCEC* rcec ) { 3946 tl_assert(rcec->magic == RCEC_MAGIC); 3947 VG_(freeEltPA)( rcec_pool_allocator, rcec ); 3948 } 3949 //////////// END RCEC pool allocator 3950 3951 3952 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and 3953 move it one step closer the the front of the list, so as to make 3954 subsequent searches for it cheaper. */ 3955 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec ) 3956 { 3957 RCEC *ec0, *ec1, *ec2; 3958 if (ec == *headp) 3959 tl_assert(0); /* already at head of list */ 3960 tl_assert(ec != NULL); 3961 ec0 = *headp; 3962 ec1 = NULL; 3963 ec2 = NULL; 3964 while (True) { 3965 if (ec0 == NULL || ec0 == ec) break; 3966 ec2 = ec1; 3967 ec1 = ec0; 3968 ec0 = ec0->next; 3969 } 3970 tl_assert(ec0 == ec); 3971 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) { 3972 RCEC* tmp; 3973 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's 3974 predecessor. Swap ec0 and ec1, that is, move ec0 one step 3975 closer to the start of the list. */ 3976 tl_assert(ec2->next == ec1); 3977 tl_assert(ec1->next == ec0); 3978 tmp = ec0->next; 3979 ec2->next = ec0; 3980 ec0->next = ec1; 3981 ec1->next = tmp; 3982 } 3983 else 3984 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) { 3985 /* it's second in the list. */ 3986 tl_assert(*headp == ec1); 3987 tl_assert(ec1->next == ec0); 3988 ec1->next = ec0->next; 3989 ec0->next = ec1; 3990 *headp = ec0; 3991 } 3992 } 3993 3994 3995 /* Find the given RCEC in the tree, and return a pointer to it. Or, 3996 if not present, add the given one to the tree (by making a copy of 3997 it, so the caller can immediately deallocate the original) and 3998 return a pointer to the copy. The caller can safely have 'example' 3999 on its stack, since we will always return a pointer to a copy of 4000 it, not to the original. Note that the inserted node will have .rc 4001 of zero and so the caller must immediatly increment it. */ 4002 __attribute__((noinline)) 4003 static RCEC* ctxt__find_or_add ( RCEC* example ) 4004 { 4005 UWord hent; 4006 RCEC* copy; 4007 tl_assert(example && example->magic == RCEC_MAGIC); 4008 tl_assert(example->rc == 0); 4009 4010 /* Search the hash table to see if we already have it. */ 4011 stats__ctxt_tab_qs++; 4012 hent = example->frames_hash % N_RCEC_TAB; 4013 copy = contextTab[hent]; 4014 while (1) { 4015 if (!copy) break; 4016 tl_assert(copy->magic == RCEC_MAGIC); 4017 stats__ctxt_tab_cmps++; 4018 if (0 == RCEC__cmp_by_frames(copy, example)) break; 4019 copy = copy->next; 4020 } 4021 4022 if (copy) { 4023 tl_assert(copy != example); 4024 /* optimisation: if it's not at the head of its list, move 1 4025 step fwds, to make future searches cheaper */ 4026 if (copy != contextTab[hent]) { 4027 move_RCEC_one_step_forward( &contextTab[hent], copy ); 4028 } 4029 } else { 4030 copy = alloc_RCEC(); 4031 tl_assert(copy != example); 4032 *copy = *example; 4033 copy->next = contextTab[hent]; 4034 contextTab[hent] = copy; 4035 stats__ctxt_tab_curr++; 4036 if (stats__ctxt_tab_curr > stats__ctxt_tab_max) 4037 stats__ctxt_tab_max = stats__ctxt_tab_curr; 4038 } 4039 return copy; 4040 } 4041 4042 static inline UWord ROLW ( UWord w, Int n ) 4043 { 4044 Int bpw = 8 * sizeof(UWord); 4045 w = (w << n) | (w >> (bpw-n)); 4046 return w; 4047 } 4048 4049 __attribute__((noinline)) 4050 static RCEC* get_RCEC ( Thr* thr ) 4051 { 4052 UWord hash, i; 4053 RCEC example; 4054 example.magic = RCEC_MAGIC; 4055 example.rc = 0; 4056 example.rcX = 0; 4057 example.next = NULL; 4058 main_get_stacktrace( thr, &example.frames[0], N_FRAMES ); 4059 hash = 0; 4060 for (i = 0; i < N_FRAMES; i++) { 4061 hash ^= example.frames[i]; 4062 hash = ROLW(hash, 19); 4063 } 4064 example.frames_hash = hash; 4065 return ctxt__find_or_add( &example ); 4066 } 4067 4068 /////////////////////////////////////////////////////// 4069 //// Part (2): 4070 /// A SparseWA guest-addr -> OldRef, that refers to (1) 4071 /// 4072 4073 // (UInt) `echo "Old Reference Information" | md5sum` 4074 #define OldRef_MAGIC 0x30b1f075UL 4075 4076 /* Records an access: a thread, a context (size & writeness) and the 4077 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 = 4078 2, 10 = 4, 11 = 8. 4079 */ 4080 typedef 4081 struct { 4082 RCEC* rcec; 4083 WordSetID locksHeldW; 4084 UInt thrid : SCALARTS_N_THRBITS; 4085 UInt szLg2B : 2; 4086 UInt isW : 1; 4087 } 4088 Thr_n_RCEC; 4089 4090 #define N_OLDREF_ACCS 5 4091 4092 typedef 4093 struct { 4094 UWord magic; /* sanity check only */ 4095 UWord gen; /* when most recently accessed */ 4096 /* or free list when not in use */ 4097 /* unused slots in this array have .thrid == 0, which is invalid */ 4098 Thr_n_RCEC accs[N_OLDREF_ACCS]; 4099 } 4100 OldRef; 4101 4102 4103 //////////// BEGIN OldRef pool allocator 4104 static PoolAlloc* oldref_pool_allocator; 4105 4106 static OldRef* alloc_OldRef ( void ) { 4107 return VG_(allocEltPA) ( oldref_pool_allocator ); 4108 } 4109 4110 static void free_OldRef ( OldRef* r ) { 4111 tl_assert(r->magic == OldRef_MAGIC); 4112 VG_(freeEltPA)( oldref_pool_allocator, r ); 4113 } 4114 //////////// END OldRef pool allocator 4115 4116 4117 static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */ 4118 static UWord oldrefGen = 0; /* current LRU generation # */ 4119 static UWord oldrefTreeN = 0; /* # elems in oldrefTree */ 4120 static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */ 4121 4122 inline static UInt min_UInt ( UInt a, UInt b ) { 4123 return a < b ? a : b; 4124 } 4125 4126 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the 4127 first interval is lower, 1 if the first interval is higher, and 0 4128 if there is any overlap. Redundant paranoia with casting is there 4129 following what looked distinctly like a bug in gcc-4.1.2, in which 4130 some of the comparisons were done signedly instead of 4131 unsignedly. */ 4132 /* Copied from exp-ptrcheck/sg_main.c */ 4133 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1, 4134 Addr a2, SizeT n2 ) { 4135 UWord a1w = (UWord)a1; 4136 UWord n1w = (UWord)n1; 4137 UWord a2w = (UWord)a2; 4138 UWord n2w = (UWord)n2; 4139 tl_assert(n1w > 0 && n2w > 0); 4140 if (a1w + n1w <= a2w) return -1L; 4141 if (a2w + n2w <= a1w) return 1L; 4142 return 0; 4143 } 4144 4145 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr ) 4146 { 4147 OldRef* ref; 4148 RCEC* rcec; 4149 Word i, j; 4150 UWord keyW, valW; 4151 Bool b; 4152 4153 tl_assert(thr); 4154 ThrID thrid = thr->thrid; 4155 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */ 4156 4157 WordSetID locksHeldW = thr->hgthread->locksetW; 4158 4159 rcec = get_RCEC( thr ); 4160 ctxt__rcinc(rcec); 4161 4162 UInt szLg2B = 0; 4163 switch (szB) { 4164 /* This doesn't look particularly branch-predictor friendly. */ 4165 case 1: szLg2B = 0; break; 4166 case 2: szLg2B = 1; break; 4167 case 4: szLg2B = 2; break; 4168 case 8: szLg2B = 3; break; 4169 default: tl_assert(0); 4170 } 4171 4172 /* Look in the map to see if we already have a record for this 4173 address. */ 4174 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a ); 4175 4176 if (b) { 4177 4178 /* We already have a record for this address. We now need to 4179 see if we have a stack trace pertaining to this (thrid, R/W, 4180 size) triple. */ 4181 tl_assert(keyW == a); 4182 ref = (OldRef*)valW; 4183 tl_assert(ref->magic == OldRef_MAGIC); 4184 4185 for (i = 0; i < N_OLDREF_ACCS; i++) { 4186 if (ref->accs[i].thrid != thrid) 4187 continue; 4188 if (ref->accs[i].szLg2B != szLg2B) 4189 continue; 4190 if (ref->accs[i].isW != (UInt)(isW & 1)) 4191 continue; 4192 /* else we have a match, so stop looking. */ 4193 break; 4194 } 4195 4196 if (i < N_OLDREF_ACCS) { 4197 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */ 4198 if (i > 0) { 4199 Thr_n_RCEC tmp = ref->accs[i-1]; 4200 ref->accs[i-1] = ref->accs[i]; 4201 ref->accs[i] = tmp; 4202 i--; 4203 } 4204 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++; 4205 stats__ctxt_rcdec1++; 4206 ctxt__rcdec( ref->accs[i].rcec ); 4207 tl_assert(ref->accs[i].thrid == thrid); 4208 /* Update the RCEC and the W-held lockset. */ 4209 ref->accs[i].rcec = rcec; 4210 ref->accs[i].locksHeldW = locksHeldW; 4211 } else { 4212 /* No entry for this (thread, R/W, size, nWHeld) quad. 4213 Shuffle all of them down one slot, and put the new entry 4214 at the start of the array. */ 4215 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) { 4216 /* the last slot is in use. We must dec the rc on the 4217 associated rcec. */ 4218 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec); 4219 stats__ctxt_rcdec2++; 4220 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF)) 4221 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2); 4222 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec ); 4223 } else { 4224 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec); 4225 } 4226 for (j = N_OLDREF_ACCS-1; j >= 1; j--) 4227 ref->accs[j] = ref->accs[j-1]; 4228 ref->accs[0].thrid = thrid; 4229 ref->accs[0].szLg2B = szLg2B; 4230 ref->accs[0].isW = (UInt)(isW & 1); 4231 ref->accs[0].locksHeldW = locksHeldW; 4232 ref->accs[0].rcec = rcec; 4233 /* thrid==0 is used to signify an empty slot, so we can't 4234 add zero thrid (such a ThrID is invalid anyway). */ 4235 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */ 4236 } 4237 4238 ref->gen = oldrefGen; 4239 4240 } else { 4241 4242 /* We don't have a record for this address. Create a new one. */ 4243 if (oldrefTreeN >= oldrefGenIncAt) { 4244 oldrefGen++; 4245 oldrefGenIncAt = oldrefTreeN + 50000; 4246 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n", 4247 oldrefGen, oldrefTreeN ); 4248 } 4249 4250 ref = alloc_OldRef(); 4251 ref->magic = OldRef_MAGIC; 4252 ref->gen = oldrefGen; 4253 ref->accs[0].thrid = thrid; 4254 ref->accs[0].szLg2B = szLg2B; 4255 ref->accs[0].isW = (UInt)(isW & 1); 4256 ref->accs[0].locksHeldW = locksHeldW; 4257 ref->accs[0].rcec = rcec; 4258 4259 /* thrid==0 is used to signify an empty slot, so we can't 4260 add zero thrid (such a ThrID is invalid anyway). */ 4261 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */ 4262 4263 /* Clear out the rest of the entries */ 4264 for (j = 1; j < N_OLDREF_ACCS; j++) { 4265 ref->accs[j].rcec = NULL; 4266 ref->accs[j].thrid = 0; 4267 ref->accs[j].szLg2B = 0; 4268 ref->accs[j].isW = 0; 4269 ref->accs[j].locksHeldW = 0; 4270 } 4271 VG_(addToSWA)( oldrefTree, a, (UWord)ref ); 4272 oldrefTreeN++; 4273 4274 } 4275 } 4276 4277 4278 /* Extract info from the conflicting-access machinery. */ 4279 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC, 4280 /*OUT*/Thr** resThr, 4281 /*OUT*/SizeT* resSzB, 4282 /*OUT*/Bool* resIsW, 4283 /*OUT*/WordSetID* locksHeldW, 4284 Thr* thr, Addr a, SizeT szB, Bool isW ) 4285 { 4286 Word i, j; 4287 OldRef* ref; 4288 UWord keyW, valW; 4289 Bool b; 4290 4291 ThrID cand_thrid; 4292 RCEC* cand_rcec; 4293 Bool cand_isW; 4294 SizeT cand_szB; 4295 WordSetID cand_locksHeldW; 4296 Addr cand_a; 4297 4298 Addr toCheck[15]; 4299 Int nToCheck = 0; 4300 4301 tl_assert(thr); 4302 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1); 4303 4304 ThrID thrid = thr->thrid; 4305 4306 toCheck[nToCheck++] = a; 4307 for (i = -7; i < (Word)szB; i++) { 4308 if (i != 0) 4309 toCheck[nToCheck++] = a + i; 4310 } 4311 tl_assert(nToCheck <= 15); 4312 4313 /* Now see if we can find a suitable matching event for 4314 any of the addresses in toCheck[0 .. nToCheck-1]. */ 4315 for (j = 0; j < nToCheck; j++) { 4316 4317 cand_a = toCheck[j]; 4318 // VG_(printf)("test %ld %p\n", j, cand_a); 4319 4320 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a ); 4321 if (!b) 4322 continue; 4323 4324 ref = (OldRef*)valW; 4325 tl_assert(keyW == cand_a); 4326 tl_assert(ref->magic == OldRef_MAGIC); 4327 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */ 4328 4329 cand_thrid = 0; /* invalid; see comments in event_map_bind */ 4330 cand_rcec = NULL; 4331 cand_isW = False; 4332 cand_szB = 0; 4333 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */ 4334 4335 for (i = 0; i < N_OLDREF_ACCS; i++) { 4336 Thr_n_RCEC* cand = &ref->accs[i]; 4337 cand_rcec = cand->rcec; 4338 cand_thrid = cand->thrid; 4339 cand_isW = (Bool)cand->isW; 4340 cand_szB = 1 << cand->szLg2B; 4341 cand_locksHeldW = cand->locksHeldW; 4342 4343 if (cand_thrid == 0) 4344 /* This slot isn't in use. Ignore it. */ 4345 continue; 4346 4347 if (cand_thrid == thrid) 4348 /* This is an access by the same thread, but we're only 4349 interested in accesses from other threads. Ignore. */ 4350 continue; 4351 4352 if ((!cand_isW) && (!isW)) 4353 /* We don't want to report a read racing against another 4354 read; that's stupid. So in this case move on. */ 4355 continue; 4356 4357 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0) 4358 /* No overlap with the access we're asking about. Ignore. */ 4359 continue; 4360 4361 /* We have a match. Stop searching. */ 4362 break; 4363 } 4364 4365 tl_assert(i >= 0 && i <= N_OLDREF_ACCS); 4366 4367 if (i < N_OLDREF_ACCS) { 4368 Int n, maxNFrames; 4369 /* return with success */ 4370 tl_assert(cand_thrid); 4371 tl_assert(cand_rcec); 4372 tl_assert(cand_rcec->magic == RCEC_MAGIC); 4373 tl_assert(cand_szB >= 1); 4374 /* Count how many non-zero frames we have. */ 4375 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size)); 4376 for (n = 0; n < maxNFrames; n++) { 4377 if (0 == cand_rcec->frames[n]) break; 4378 } 4379 *resEC = VG_(make_ExeContext_from_StackTrace) 4380 (cand_rcec->frames, n); 4381 *resThr = Thr__from_ThrID(cand_thrid); 4382 *resSzB = cand_szB; 4383 *resIsW = cand_isW; 4384 *locksHeldW = cand_locksHeldW; 4385 return True; 4386 } 4387 4388 /* consider next address in toCheck[] */ 4389 } /* for (j = 0; j < nToCheck; j++) */ 4390 4391 /* really didn't find anything. */ 4392 return False; 4393 } 4394 4395 static void event_map_init ( void ) 4396 { 4397 Word i; 4398 4399 /* Context (RCEC) pool allocator */ 4400 rcec_pool_allocator = VG_(newPA) ( 4401 sizeof(RCEC), 4402 1000 /* RCECs per pool */, 4403 HG_(zalloc), 4404 "libhb.event_map_init.1 (RCEC pools)", 4405 HG_(free) 4406 ); 4407 4408 /* Context table */ 4409 tl_assert(!contextTab); 4410 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)", 4411 N_RCEC_TAB * sizeof(RCEC*) ); 4412 tl_assert(contextTab); 4413 for (i = 0; i < N_RCEC_TAB; i++) 4414 contextTab[i] = NULL; 4415 4416 /* Oldref pool allocator */ 4417 oldref_pool_allocator = VG_(newPA)( 4418 sizeof(OldRef), 4419 1000 /* OldRefs per pool */, 4420 HG_(zalloc), 4421 "libhb.event_map_init.3 (OldRef pools)", 4422 HG_(free) 4423 ); 4424 4425 /* Oldref tree */ 4426 tl_assert(!oldrefTree); 4427 oldrefTree = VG_(newSWA)( 4428 HG_(zalloc), 4429 "libhb.event_map_init.4 (oldref tree)", 4430 HG_(free) 4431 ); 4432 tl_assert(oldrefTree); 4433 4434 oldrefGen = 0; 4435 oldrefGenIncAt = 0; 4436 oldrefTreeN = 0; 4437 } 4438 4439 static void event_map__check_reference_counts ( Bool before ) 4440 { 4441 RCEC* rcec; 4442 OldRef* oldref; 4443 Word i; 4444 UWord nEnts = 0; 4445 UWord keyW, valW; 4446 4447 /* Set the 'check' reference counts to zero. Also, optionally 4448 check that the real reference counts are non-zero. We allow 4449 these to fall to zero before a GC, but the GC must get rid of 4450 all those that are zero, hence none should be zero after a 4451 GC. */ 4452 for (i = 0; i < N_RCEC_TAB; i++) { 4453 for (rcec = contextTab[i]; rcec; rcec = rcec->next) { 4454 nEnts++; 4455 tl_assert(rcec); 4456 tl_assert(rcec->magic == RCEC_MAGIC); 4457 if (!before) 4458 tl_assert(rcec->rc > 0); 4459 rcec->rcX = 0; 4460 } 4461 } 4462 4463 /* check that the stats are sane */ 4464 tl_assert(nEnts == stats__ctxt_tab_curr); 4465 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max); 4466 4467 /* visit all the referencing points, inc check ref counts */ 4468 VG_(initIterSWA)( oldrefTree ); 4469 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4470 oldref = (OldRef*)valW; 4471 tl_assert(oldref->magic == OldRef_MAGIC); 4472 for (i = 0; i < N_OLDREF_ACCS; i++) { 4473 ThrID aThrID = oldref->accs[i].thrid; 4474 RCEC* aRef = oldref->accs[i].rcec; 4475 if (aThrID != 0) { 4476 tl_assert(aRef); 4477 tl_assert(aRef->magic == RCEC_MAGIC); 4478 aRef->rcX++; 4479 } else { 4480 tl_assert(!aRef); 4481 } 4482 } 4483 } 4484 4485 /* compare check ref counts with actual */ 4486 for (i = 0; i < N_RCEC_TAB; i++) { 4487 for (rcec = contextTab[i]; rcec; rcec = rcec->next) { 4488 tl_assert(rcec->rc == rcec->rcX); 4489 } 4490 } 4491 } 4492 4493 __attribute__((noinline)) 4494 static void event_map_maybe_GC ( void ) 4495 { 4496 OldRef* oldref; 4497 UWord keyW, valW, retained, maxGen; 4498 XArray* refs2del; 4499 Word i, j, n2del; 4500 4501 UWord* genMap = NULL; 4502 UWord genMap_min = 0; 4503 UWord genMap_size = 0; 4504 4505 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size))) 4506 return; 4507 4508 if (0) 4509 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN); 4510 4511 /* Check for sane command line params. Limit values must match 4512 those in hg_process_cmd_line_option. */ 4513 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 ); 4514 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 ); 4515 4516 /* Check our counting is sane (expensive) */ 4517 if (CHECK_CEM) 4518 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree )); 4519 4520 /* Check the reference counts (expensive) */ 4521 if (CHECK_CEM) 4522 event_map__check_reference_counts( True/*before*/ ); 4523 4524 /* Compute the distribution of generation values in the ref tree. 4525 There are likely only to be a few different generation numbers 4526 in the whole tree, but we don't know what they are. Hence use a 4527 dynamically resized array of counters. The array is genMap[0 4528 .. genMap_size-1], where genMap[0] is the count for the 4529 generation number genMap_min, genMap[1] is the count for 4530 genMap_min+1, etc. If a new number is seen outside the range 4531 [genMap_min .. genMap_min + genMap_size - 1] then the array is 4532 copied into a larger array, and genMap_min and genMap_size are 4533 adjusted accordingly. */ 4534 4535 /* genMap :: generation-number -> count-of-nodes-with-that-number */ 4536 4537 VG_(initIterSWA)( oldrefTree ); 4538 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4539 4540 UWord ea, key; 4541 oldref = (OldRef*)valW; 4542 key = oldref->gen; 4543 4544 /* BEGIN find 'ea', which is the index in genMap holding the 4545 count for generation number 'key'. */ 4546 if (UNLIKELY(genMap == NULL)) { 4547 /* deal with the first key to be seen, so that the following 4548 cases don't need to handle the complexity of a NULL count 4549 array. */ 4550 genMap_min = key; 4551 genMap_size = 1; 4552 genMap = HG_(zalloc)( "libhb.emmG.1a", 4553 genMap_size * sizeof(UWord) ); 4554 ea = 0; 4555 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n", 4556 key, genMap_min, genMap_min+genMap_size- 1 ); 4557 } 4558 else 4559 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) { 4560 /* this is the expected (almost-always-happens) case: 'key' 4561 is already mapped in the array. */ 4562 ea = key - genMap_min; 4563 } 4564 else 4565 if (key < genMap_min) { 4566 /* 'key' appears before the start of the current array. 4567 Extend the current array by allocating a larger one and 4568 copying the current one to the upper end of it. */ 4569 Word more; 4570 UWord* map2; 4571 more = genMap_min - key; 4572 tl_assert(more > 0); 4573 map2 = HG_(zalloc)( "libhb.emmG.1b", 4574 (genMap_size + more) * sizeof(UWord) ); 4575 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) ); 4576 HG_(free)( genMap ); 4577 genMap = map2; 4578 genMap_size += more; 4579 genMap_min -= more; 4580 ea = 0; 4581 tl_assert(genMap_min == key); 4582 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n", 4583 key, genMap_min, genMap_min+genMap_size- 1 ); 4584 } 4585 else { 4586 /* 'key' appears after the end of the current array. Extend 4587 the current array by allocating a larger one and copying 4588 the current one to the lower end of it. */ 4589 Word more; 4590 UWord* map2; 4591 tl_assert(key >= genMap_min + genMap_size); 4592 more = key - (genMap_min + genMap_size) + 1; 4593 tl_assert(more > 0); 4594 map2 = HG_(zalloc)( "libhb.emmG.1c", 4595 (genMap_size + more) * sizeof(UWord) ); 4596 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) ); 4597 HG_(free)( genMap ); 4598 genMap = map2; 4599 genMap_size += more; 4600 ea = genMap_size - 1;; 4601 tl_assert(genMap_min + genMap_size - 1 == key); 4602 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n", 4603 key, genMap_min, genMap_min+genMap_size- 1 ); 4604 } 4605 /* END find 'ea' from 'key' */ 4606 4607 tl_assert(ea >= 0 && ea < genMap_size); 4608 /* and the whole point of this elaborate computation of 'ea' is .. */ 4609 genMap[ea]++; 4610 } 4611 4612 tl_assert(genMap); 4613 tl_assert(genMap_size > 0); 4614 4615 /* Sanity check what we just computed */ 4616 { UWord sum = 0; 4617 for (i = 0; i < genMap_size; i++) { 4618 if (0) VG_(printf)(" xxx: gen %ld has %lu\n", 4619 i + genMap_min, genMap[i] ); 4620 sum += genMap[i]; 4621 } 4622 tl_assert(sum == oldrefTreeN); 4623 } 4624 4625 /* Figure out how many generations to throw away */ 4626 retained = oldrefTreeN; 4627 maxGen = 0; 4628 4629 for (i = 0; i < genMap_size; i++) { 4630 keyW = i + genMap_min; 4631 valW = genMap[i]; 4632 tl_assert(keyW > 0); /* can't allow a generation # 0 */ 4633 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW ); 4634 tl_assert(keyW >= maxGen); 4635 tl_assert(retained >= valW); 4636 if (retained - valW 4637 > (UWord)(HG_(clo_conflict_cache_size) 4638 * EVENT_MAP_GC_DISCARD_FRACTION)) { 4639 retained -= valW; 4640 maxGen = keyW; 4641 } else { 4642 break; 4643 } 4644 } 4645 4646 HG_(free)(genMap); 4647 4648 tl_assert(retained >= 0 && retained <= oldrefTreeN); 4649 4650 /* Now make up a big list of the oldrefTree entries we want to 4651 delete. We can't simultaneously traverse the tree and delete 4652 stuff from it, so first we need to copy them off somewhere 4653 else. (sigh) */ 4654 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2", 4655 HG_(free), sizeof(Addr) ); 4656 4657 if (retained < oldrefTreeN) { 4658 4659 /* This is the normal (expected) case. We discard any ref whose 4660 generation number <= maxGen. */ 4661 VG_(initIterSWA)( oldrefTree ); 4662 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4663 oldref = (OldRef*)valW; 4664 tl_assert(oldref->magic == OldRef_MAGIC); 4665 if (oldref->gen <= maxGen) { 4666 VG_(addToXA)( refs2del, &keyW ); 4667 } 4668 } 4669 if (VG_(clo_stats)) { 4670 VG_(message)(Vg_DebugMsg, 4671 "libhb: EvM GC: delete generations %lu and below, " 4672 "retaining %lu entries\n", 4673 maxGen, retained ); 4674 } 4675 4676 } else { 4677 4678 static UInt rand_seed = 0; /* leave as static */ 4679 4680 /* Degenerate case: there's only one generation in the entire 4681 tree, so we need to have some other way of deciding which 4682 refs to throw away. Just throw out half of them randomly. */ 4683 tl_assert(retained == oldrefTreeN); 4684 VG_(initIterSWA)( oldrefTree ); 4685 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4686 UInt n; 4687 oldref = (OldRef*)valW; 4688 tl_assert(oldref->magic == OldRef_MAGIC); 4689 n = VG_(random)( &rand_seed ); 4690 if ((n & 0xFFF) < 0x800) { 4691 VG_(addToXA)( refs2del, &keyW ); 4692 retained--; 4693 } 4694 } 4695 if (VG_(clo_stats)) { 4696 VG_(message)(Vg_DebugMsg, 4697 "libhb: EvM GC: randomly delete half the entries, " 4698 "retaining %lu entries\n", 4699 retained ); 4700 } 4701 4702 } 4703 4704 n2del = VG_(sizeXA)( refs2del ); 4705 tl_assert(n2del == (Word)(oldrefTreeN - retained)); 4706 4707 if (0) VG_(printf)("%s","deleting entries\n"); 4708 for (i = 0; i < n2del; i++) { 4709 Bool b; 4710 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i ); 4711 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del ); 4712 tl_assert(b); 4713 tl_assert(keyW == ga2del); 4714 oldref = (OldRef*)valW; 4715 for (j = 0; j < N_OLDREF_ACCS; j++) { 4716 ThrID aThrID = oldref->accs[j].thrid; 4717 RCEC* aRef = oldref->accs[j].rcec; 4718 if (aRef) { 4719 tl_assert(aThrID != 0); 4720 stats__ctxt_rcdec3++; 4721 ctxt__rcdec( aRef ); 4722 } else { 4723 tl_assert(aThrID == 0); 4724 } 4725 } 4726 4727 free_OldRef( oldref ); 4728 } 4729 4730 VG_(deleteXA)( refs2del ); 4731 4732 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained ); 4733 4734 oldrefTreeN = retained; 4735 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */ 4736 4737 /* Throw away all RCECs with zero reference counts */ 4738 for (i = 0; i < N_RCEC_TAB; i++) { 4739 RCEC** pp = &contextTab[i]; 4740 RCEC* p = *pp; 4741 while (p) { 4742 if (p->rc == 0) { 4743 *pp = p->next; 4744 free_RCEC(p); 4745 p = *pp; 4746 tl_assert(stats__ctxt_tab_curr > 0); 4747 stats__ctxt_tab_curr--; 4748 } else { 4749 pp = &p->next; 4750 p = p->next; 4751 } 4752 } 4753 } 4754 4755 /* Check the reference counts (expensive) */ 4756 if (CHECK_CEM) 4757 event_map__check_reference_counts( False/*after*/ ); 4758 4759 //if (0) 4760 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n", 4761 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree)); 4762 4763 } 4764 4765 4766 ///////////////////////////////////////////////////////// 4767 // // 4768 // Core MSM // 4769 // // 4770 ///////////////////////////////////////////////////////// 4771 4772 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19 4773 Nov 08, and again after [...], 4774 June 09. */ 4775 4776 static ULong stats__msmcread = 0; 4777 static ULong stats__msmcread_change = 0; 4778 static ULong stats__msmcwrite = 0; 4779 static ULong stats__msmcwrite_change = 0; 4780 4781 /* Some notes on the H1 history mechanism: 4782 4783 Transition rules are: 4784 4785 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw) 4786 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw) 4787 4788 After any access by a thread T to a location L, L's constraint pair 4789 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock. 4790 4791 After a race by thread T conflicting with some previous access by 4792 some other thread U, for a location with constraint (before 4793 processing the later access) (Cr,Cw), then Cw[U] is the segment in 4794 which the previously access lies. 4795 4796 Hence in record_race_info, we pass in Cfailed and Kfailed, which 4797 are compared so as to find out which thread(s) this access 4798 conflicts with. Once that is established, we also require the 4799 pre-update Cw for the location, so we can index into it for those 4800 threads, to get the scalar clock values for the point at which the 4801 former accesses were made. (In fact we only bother to do any of 4802 this for an arbitrarily chosen one of the conflicting threads, as 4803 that's simpler, it avoids flooding the user with vast amounts of 4804 mostly useless information, and because the program is wrong if it 4805 contains any races at all -- so we don't really need to show all 4806 conflicting access pairs initially, so long as we only show none if 4807 none exist). 4808 4809 --- 4810 4811 That requires the auxiliary proof that 4812 4813 (Cr `join` Kw)[T] == Kw[T] 4814 4815 Why should that be true? Because for any thread T, Kw[T] >= the 4816 scalar clock value for T known by any other thread. In other 4817 words, because T's value for its own scalar clock is at least as up 4818 to date as the value for it known by any other thread (that is true 4819 for both the R- and W- scalar clocks). Hence no other thread will 4820 be able to feed in a value for that element (indirectly via a 4821 constraint) which will exceed Kw[T], and hence the join cannot 4822 cause that particular element to advance. 4823 */ 4824 4825 __attribute__((noinline)) 4826 static void record_race_info ( Thr* acc_thr, 4827 Addr acc_addr, SizeT szB, Bool isWrite, 4828 VtsID Cfailed, 4829 VtsID Kfailed, 4830 VtsID Cw ) 4831 { 4832 /* Call here to report a race. We just hand it onwards to 4833 HG_(record_error_Race). If that in turn discovers that the 4834 error is going to be collected, then, at history_level 2, that 4835 queries the conflicting-event map. The alternative would be to 4836 query it right here. But that causes a lot of pointless queries 4837 for errors which will shortly be discarded as duplicates, and 4838 can become a performance overhead; so we defer the query until 4839 we know the error is not a duplicate. */ 4840 4841 /* Stacks for the bounds of the (or one of the) conflicting 4842 segment(s). These are only set at history_level 1. */ 4843 ExeContext* hist1_seg_start = NULL; 4844 ExeContext* hist1_seg_end = NULL; 4845 Thread* hist1_conf_thr = NULL; 4846 4847 tl_assert(acc_thr); 4848 tl_assert(acc_thr->hgthread); 4849 tl_assert(acc_thr->hgthread->hbthr == acc_thr); 4850 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2); 4851 4852 if (HG_(clo_history_level) == 1) { 4853 Bool found; 4854 Word firstIx, lastIx; 4855 ULong_n_EC key; 4856 4857 /* At history_level 1, we must round up the relevant stack-pair 4858 for the conflicting segment right now. This is because 4859 deferring it is complex; we can't (easily) put Kfailed and 4860 Cfailed into the XError and wait for later without 4861 getting tied up in difficulties with VtsID reference 4862 counting. So just do it now. */ 4863 Thr* confThr; 4864 ULong confTym = 0; 4865 /* Which thread are we in conflict with? There may be more than 4866 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily 4867 (in fact it's the one with the lowest Thr* value). */ 4868 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed ); 4869 /* This must exist! since if it was NULL then there's no 4870 conflict (semantics of return value of 4871 VtsID__findFirst_notLEQ), and msmc{read,write}, which has 4872 called us, just checked exactly this -- that there was in 4873 fact a race. */ 4874 tl_assert(confThr); 4875 4876 /* Get the scalar clock value that the conflicting thread 4877 introduced into the constraint. A careful examination of the 4878 base machine rules shows that this must be the same as the 4879 conflicting thread's scalar clock when it created this 4880 constraint. Hence we know the scalar clock of the 4881 conflicting thread when the conflicting access was made. */ 4882 confTym = VtsID__indexAt( Cfailed, confThr ); 4883 4884 /* Using this scalar clock, index into the conflicting thread's 4885 collection of stack traces made each time its vector clock 4886 (hence its scalar clock) changed. This gives the stack 4887 traces at the start and end of the conflicting segment (well, 4888 as per comment just above, of one of the conflicting 4889 segments, if there are more than one). */ 4890 key.ull = confTym; 4891 key.ec = NULL; 4892 /* tl_assert(confThr); -- asserted just above */ 4893 tl_assert(confThr->local_Kws_n_stacks); 4894 firstIx = lastIx = 0; 4895 found = VG_(lookupXA_UNSAFE)( 4896 confThr->local_Kws_n_stacks, 4897 &key, &firstIx, &lastIx, 4898 (XACmpFn_t)cmp__ULong_n_EC__by_ULong 4899 ); 4900 if (0) VG_(printf)("record_race_info %u %u %u confThr %p " 4901 "confTym %llu found %d (%lu,%lu)\n", 4902 Cfailed, Kfailed, Cw, 4903 confThr, confTym, found, firstIx, lastIx); 4904 /* We can't indefinitely collect stack traces at VTS 4905 transitions, since we'd eventually run out of memory. Hence 4906 note_local_Kw_n_stack_for will eventually throw away old 4907 ones, which in turn means we might fail to find index value 4908 confTym in the array. */ 4909 if (found) { 4910 ULong_n_EC *pair_start, *pair_end; 4911 pair_start 4912 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx ); 4913 hist1_seg_start = pair_start->ec; 4914 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) { 4915 pair_end 4916 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, 4917 lastIx+1 ); 4918 /* from properties of VG_(lookupXA) and the comparison fn used: */ 4919 tl_assert(pair_start->ull < pair_end->ull); 4920 hist1_seg_end = pair_end->ec; 4921 /* Could do a bit better here. It may be that pair_end 4922 doesn't have a stack, but the following entries in the 4923 array have the same scalar Kw and to have a stack. So 4924 we should search a bit further along the array than 4925 lastIx+1 if hist1_seg_end is NULL. */ 4926 } else { 4927 if (!confThr->llexit_done) 4928 hist1_seg_end = main_get_EC( confThr ); 4929 } 4930 // seg_start could be NULL iff this is the first stack in the thread 4931 //if (seg_start) VG_(pp_ExeContext)(seg_start); 4932 //if (seg_end) VG_(pp_ExeContext)(seg_end); 4933 hist1_conf_thr = confThr->hgthread; 4934 } 4935 } 4936 4937 HG_(record_error_Race)( acc_thr->hgthread, acc_addr, 4938 szB, isWrite, 4939 hist1_conf_thr, hist1_seg_start, hist1_seg_end ); 4940 } 4941 4942 static Bool is_sane_SVal_C ( SVal sv ) { 4943 Bool leq; 4944 if (!SVal__isC(sv)) return True; 4945 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) ); 4946 return leq; 4947 } 4948 4949 4950 /* Compute new state following a read */ 4951 static inline SVal msmcread ( SVal svOld, 4952 /* The following are only needed for 4953 creating error reports. */ 4954 Thr* acc_thr, 4955 Addr acc_addr, SizeT szB ) 4956 { 4957 SVal svNew = SVal_INVALID; 4958 stats__msmcread++; 4959 4960 /* Redundant sanity check on the constraints */ 4961 if (CHECK_MSM) { 4962 tl_assert(is_sane_SVal_C(svOld)); 4963 } 4964 4965 if (LIKELY(SVal__isC(svOld))) { 4966 VtsID tviR = acc_thr->viR; 4967 VtsID tviW = acc_thr->viW; 4968 VtsID rmini = SVal__unC_Rmin(svOld); 4969 VtsID wmini = SVal__unC_Wmin(svOld); 4970 Bool leq = VtsID__cmpLEQ(rmini,tviR); 4971 if (LIKELY(leq)) { 4972 /* no race */ 4973 /* Note: RWLOCK subtlety: use tviW, not tviR */ 4974 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) ); 4975 goto out; 4976 } else { 4977 /* assert on sanity of constraints. */ 4978 Bool leqxx = VtsID__cmpLEQ(rmini,wmini); 4979 tl_assert(leqxx); 4980 // same as in non-race case 4981 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) ); 4982 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/, 4983 rmini, /* Cfailed */ 4984 tviR, /* Kfailed */ 4985 wmini /* Cw */ ); 4986 goto out; 4987 } 4988 } 4989 if (SVal__isA(svOld)) { 4990 /* reading no-access memory (sigh); leave unchanged */ 4991 /* check for no pollution */ 4992 tl_assert(svOld == SVal_NOACCESS); 4993 svNew = SVal_NOACCESS; 4994 goto out; 4995 } 4996 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld); 4997 tl_assert(0); 4998 4999 out: 5000 if (CHECK_MSM) { 5001 tl_assert(is_sane_SVal_C(svNew)); 5002 } 5003 if (UNLIKELY(svNew != svOld)) { 5004 tl_assert(svNew != SVal_INVALID); 5005 if (HG_(clo_history_level) >= 2 5006 && SVal__isC(svOld) && SVal__isC(svNew)) { 5007 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr ); 5008 stats__msmcread_change++; 5009 } 5010 } 5011 return svNew; 5012 } 5013 5014 5015 /* Compute new state following a write */ 5016 static inline SVal msmcwrite ( SVal svOld, 5017 /* The following are only needed for 5018 creating error reports. */ 5019 Thr* acc_thr, 5020 Addr acc_addr, SizeT szB ) 5021 { 5022 SVal svNew = SVal_INVALID; 5023 stats__msmcwrite++; 5024 5025 /* Redundant sanity check on the constraints */ 5026 if (CHECK_MSM) { 5027 tl_assert(is_sane_SVal_C(svOld)); 5028 } 5029 5030 if (LIKELY(SVal__isC(svOld))) { 5031 VtsID tviW = acc_thr->viW; 5032 VtsID wmini = SVal__unC_Wmin(svOld); 5033 Bool leq = VtsID__cmpLEQ(wmini,tviW); 5034 if (LIKELY(leq)) { 5035 /* no race */ 5036 svNew = SVal__mkC( tviW, tviW ); 5037 goto out; 5038 } else { 5039 VtsID rmini = SVal__unC_Rmin(svOld); 5040 /* assert on sanity of constraints. */ 5041 Bool leqxx = VtsID__cmpLEQ(rmini,wmini); 5042 tl_assert(leqxx); 5043 // same as in non-race case 5044 // proof: in the non-race case, we have 5045 // rmini <= wmini (invar on constraints) 5046 // tviW <= tviR (invar on thread clocks) 5047 // wmini <= tviW (from run-time check) 5048 // hence from transitivity of <= we have 5049 // rmini <= wmini <= tviW 5050 // and so join(rmini,tviW) == tviW 5051 // and join(wmini,tviW) == tviW 5052 // qed. 5053 svNew = SVal__mkC( VtsID__join2(rmini, tviW), 5054 VtsID__join2(wmini, tviW) ); 5055 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/, 5056 wmini, /* Cfailed */ 5057 tviW, /* Kfailed */ 5058 wmini /* Cw */ ); 5059 goto out; 5060 } 5061 } 5062 if (SVal__isA(svOld)) { 5063 /* writing no-access memory (sigh); leave unchanged */ 5064 /* check for no pollution */ 5065 tl_assert(svOld == SVal_NOACCESS); 5066 svNew = SVal_NOACCESS; 5067 goto out; 5068 } 5069 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld); 5070 tl_assert(0); 5071 5072 out: 5073 if (CHECK_MSM) { 5074 tl_assert(is_sane_SVal_C(svNew)); 5075 } 5076 if (UNLIKELY(svNew != svOld)) { 5077 tl_assert(svNew != SVal_INVALID); 5078 if (HG_(clo_history_level) >= 2 5079 && SVal__isC(svOld) && SVal__isC(svNew)) { 5080 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr ); 5081 stats__msmcwrite_change++; 5082 } 5083 } 5084 return svNew; 5085 } 5086 5087 5088 ///////////////////////////////////////////////////////// 5089 // // 5090 // Apply core MSM to specific memory locations // 5091 // // 5092 ///////////////////////////////////////////////////////// 5093 5094 /*------------- ZSM accesses: 8 bit sapply ------------- */ 5095 5096 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) { 5097 CacheLine* cl; 5098 UWord cloff, tno, toff; 5099 SVal svOld, svNew; 5100 UShort descr; 5101 stats__cline_cread08s++; 5102 cl = get_cacheline(a); 5103 cloff = get_cacheline_offset(a); 5104 tno = get_treeno(a); 5105 toff = get_tree_offset(a); /* == 0 .. 7 */ 5106 descr = cl->descrs[tno]; 5107 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5108 SVal* tree = &cl->svals[tno << 3]; 5109 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5110 if (CHECK_ZSM) 5111 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5112 } 5113 svOld = cl->svals[cloff]; 5114 svNew = msmcread( svOld, thr,a,1 ); 5115 if (CHECK_ZSM) 5116 tl_assert(svNew != SVal_INVALID); 5117 cl->svals[cloff] = svNew; 5118 } 5119 5120 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) { 5121 CacheLine* cl; 5122 UWord cloff, tno, toff; 5123 SVal svOld, svNew; 5124 UShort descr; 5125 stats__cline_cwrite08s++; 5126 cl = get_cacheline(a); 5127 cloff = get_cacheline_offset(a); 5128 tno = get_treeno(a); 5129 toff = get_tree_offset(a); /* == 0 .. 7 */ 5130 descr = cl->descrs[tno]; 5131 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5132 SVal* tree = &cl->svals[tno << 3]; 5133 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5134 if (CHECK_ZSM) 5135 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5136 } 5137 svOld = cl->svals[cloff]; 5138 svNew = msmcwrite( svOld, thr,a,1 ); 5139 if (CHECK_ZSM) 5140 tl_assert(svNew != SVal_INVALID); 5141 cl->svals[cloff] = svNew; 5142 } 5143 5144 /*------------- ZSM accesses: 16 bit sapply ------------- */ 5145 5146 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) { 5147 CacheLine* cl; 5148 UWord cloff, tno, toff; 5149 SVal svOld, svNew; 5150 UShort descr; 5151 stats__cline_cread16s++; 5152 if (UNLIKELY(!aligned16(a))) goto slowcase; 5153 cl = get_cacheline(a); 5154 cloff = get_cacheline_offset(a); 5155 tno = get_treeno(a); 5156 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */ 5157 descr = cl->descrs[tno]; 5158 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) { 5159 if (valid_value_is_below_me_16(descr, toff)) { 5160 goto slowcase; 5161 } else { 5162 SVal* tree = &cl->svals[tno << 3]; 5163 cl->descrs[tno] = pulldown_to_16(tree, toff, descr); 5164 } 5165 if (CHECK_ZSM) 5166 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5167 } 5168 svOld = cl->svals[cloff]; 5169 svNew = msmcread( svOld, thr,a,2 ); 5170 if (CHECK_ZSM) 5171 tl_assert(svNew != SVal_INVALID); 5172 cl->svals[cloff] = svNew; 5173 return; 5174 slowcase: /* misaligned, or must go further down the tree */ 5175 stats__cline_16to8splits++; 5176 zsm_sapply08__msmcread( thr, a + 0 ); 5177 zsm_sapply08__msmcread( thr, a + 1 ); 5178 } 5179 5180 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) { 5181 CacheLine* cl; 5182 UWord cloff, tno, toff; 5183 SVal svOld, svNew; 5184 UShort descr; 5185 stats__cline_cwrite16s++; 5186 if (UNLIKELY(!aligned16(a))) goto slowcase; 5187 cl = get_cacheline(a); 5188 cloff = get_cacheline_offset(a); 5189 tno = get_treeno(a); 5190 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */ 5191 descr = cl->descrs[tno]; 5192 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) { 5193 if (valid_value_is_below_me_16(descr, toff)) { 5194 goto slowcase; 5195 } else { 5196 SVal* tree = &cl->svals[tno << 3]; 5197 cl->descrs[tno] = pulldown_to_16(tree, toff, descr); 5198 } 5199 if (CHECK_ZSM) 5200 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5201 } 5202 svOld = cl->svals[cloff]; 5203 svNew = msmcwrite( svOld, thr,a,2 ); 5204 if (CHECK_ZSM) 5205 tl_assert(svNew != SVal_INVALID); 5206 cl->svals[cloff] = svNew; 5207 return; 5208 slowcase: /* misaligned, or must go further down the tree */ 5209 stats__cline_16to8splits++; 5210 zsm_sapply08__msmcwrite( thr, a + 0 ); 5211 zsm_sapply08__msmcwrite( thr, a + 1 ); 5212 } 5213 5214 /*------------- ZSM accesses: 32 bit sapply ------------- */ 5215 5216 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) { 5217 CacheLine* cl; 5218 UWord cloff, tno, toff; 5219 SVal svOld, svNew; 5220 UShort descr; 5221 stats__cline_cread32s++; 5222 if (UNLIKELY(!aligned32(a))) goto slowcase; 5223 cl = get_cacheline(a); 5224 cloff = get_cacheline_offset(a); 5225 tno = get_treeno(a); 5226 toff = get_tree_offset(a); /* == 0 or 4 */ 5227 descr = cl->descrs[tno]; 5228 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) { 5229 if (valid_value_is_above_me_32(descr, toff)) { 5230 SVal* tree = &cl->svals[tno << 3]; 5231 cl->descrs[tno] = pulldown_to_32(tree, toff, descr); 5232 } else { 5233 goto slowcase; 5234 } 5235 if (CHECK_ZSM) 5236 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5237 } 5238 svOld = cl->svals[cloff]; 5239 svNew = msmcread( svOld, thr,a,4 ); 5240 if (CHECK_ZSM) 5241 tl_assert(svNew != SVal_INVALID); 5242 cl->svals[cloff] = svNew; 5243 return; 5244 slowcase: /* misaligned, or must go further down the tree */ 5245 stats__cline_32to16splits++; 5246 zsm_sapply16__msmcread( thr, a + 0 ); 5247 zsm_sapply16__msmcread( thr, a + 2 ); 5248 } 5249 5250 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) { 5251 CacheLine* cl; 5252 UWord cloff, tno, toff; 5253 SVal svOld, svNew; 5254 UShort descr; 5255 stats__cline_cwrite32s++; 5256 if (UNLIKELY(!aligned32(a))) goto slowcase; 5257 cl = get_cacheline(a); 5258 cloff = get_cacheline_offset(a); 5259 tno = get_treeno(a); 5260 toff = get_tree_offset(a); /* == 0 or 4 */ 5261 descr = cl->descrs[tno]; 5262 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) { 5263 if (valid_value_is_above_me_32(descr, toff)) { 5264 SVal* tree = &cl->svals[tno << 3]; 5265 cl->descrs[tno] = pulldown_to_32(tree, toff, descr); 5266 } else { 5267 goto slowcase; 5268 } 5269 if (CHECK_ZSM) 5270 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5271 } 5272 svOld = cl->svals[cloff]; 5273 svNew = msmcwrite( svOld, thr,a,4 ); 5274 if (CHECK_ZSM) 5275 tl_assert(svNew != SVal_INVALID); 5276 cl->svals[cloff] = svNew; 5277 return; 5278 slowcase: /* misaligned, or must go further down the tree */ 5279 stats__cline_32to16splits++; 5280 zsm_sapply16__msmcwrite( thr, a + 0 ); 5281 zsm_sapply16__msmcwrite( thr, a + 2 ); 5282 } 5283 5284 /*------------- ZSM accesses: 64 bit sapply ------------- */ 5285 5286 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) { 5287 CacheLine* cl; 5288 UWord cloff, tno; 5289 //UWord toff; 5290 SVal svOld, svNew; 5291 UShort descr; 5292 stats__cline_cread64s++; 5293 if (UNLIKELY(!aligned64(a))) goto slowcase; 5294 cl = get_cacheline(a); 5295 cloff = get_cacheline_offset(a); 5296 tno = get_treeno(a); 5297 //toff = get_tree_offset(a); /* == 0, unused */ 5298 descr = cl->descrs[tno]; 5299 if (UNLIKELY( !(descr & TREE_DESCR_64) )) { 5300 goto slowcase; 5301 } 5302 svOld = cl->svals[cloff]; 5303 svNew = msmcread( svOld, thr,a,8 ); 5304 if (CHECK_ZSM) 5305 tl_assert(svNew != SVal_INVALID); 5306 cl->svals[cloff] = svNew; 5307 return; 5308 slowcase: /* misaligned, or must go further down the tree */ 5309 stats__cline_64to32splits++; 5310 zsm_sapply32__msmcread( thr, a + 0 ); 5311 zsm_sapply32__msmcread( thr, a + 4 ); 5312 } 5313 5314 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) { 5315 CacheLine* cl; 5316 UWord cloff, tno; 5317 //UWord toff; 5318 SVal svOld, svNew; 5319 UShort descr; 5320 stats__cline_cwrite64s++; 5321 if (UNLIKELY(!aligned64(a))) goto slowcase; 5322 cl = get_cacheline(a); 5323 cloff = get_cacheline_offset(a); 5324 tno = get_treeno(a); 5325 //toff = get_tree_offset(a); /* == 0, unused */ 5326 descr = cl->descrs[tno]; 5327 if (UNLIKELY( !(descr & TREE_DESCR_64) )) { 5328 goto slowcase; 5329 } 5330 svOld = cl->svals[cloff]; 5331 svNew = msmcwrite( svOld, thr,a,8 ); 5332 if (CHECK_ZSM) 5333 tl_assert(svNew != SVal_INVALID); 5334 cl->svals[cloff] = svNew; 5335 return; 5336 slowcase: /* misaligned, or must go further down the tree */ 5337 stats__cline_64to32splits++; 5338 zsm_sapply32__msmcwrite( thr, a + 0 ); 5339 zsm_sapply32__msmcwrite( thr, a + 4 ); 5340 } 5341 5342 /*--------------- ZSM accesses: 8 bit swrite --------------- */ 5343 5344 static 5345 void zsm_swrite08 ( Addr a, SVal svNew ) { 5346 CacheLine* cl; 5347 UWord cloff, tno, toff; 5348 UShort descr; 5349 stats__cline_swrite08s++; 5350 cl = get_cacheline(a); 5351 cloff = get_cacheline_offset(a); 5352 tno = get_treeno(a); 5353 toff = get_tree_offset(a); /* == 0 .. 7 */ 5354 descr = cl->descrs[tno]; 5355 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5356 SVal* tree = &cl->svals[tno << 3]; 5357 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5358 if (CHECK_ZSM) 5359 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5360 } 5361 tl_assert(svNew != SVal_INVALID); 5362 cl->svals[cloff] = svNew; 5363 } 5364 5365 /*--------------- ZSM accesses: 16 bit swrite --------------- */ 5366 5367 static 5368 void zsm_swrite16 ( Addr a, SVal svNew ) { 5369 CacheLine* cl; 5370 UWord cloff, tno, toff; 5371 UShort descr; 5372 stats__cline_swrite16s++; 5373 if (UNLIKELY(!aligned16(a))) goto slowcase; 5374 cl = get_cacheline(a); 5375 cloff = get_cacheline_offset(a); 5376 tno = get_treeno(a); 5377 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */ 5378 descr = cl->descrs[tno]; 5379 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) { 5380 if (valid_value_is_below_me_16(descr, toff)) { 5381 /* Writing at this level. Need to fix up 'descr'. */ 5382 cl->descrs[tno] = pullup_descr_to_16(descr, toff); 5383 /* At this point, the tree does not match cl->descr[tno] any 5384 more. The assignments below will fix it up. */ 5385 } else { 5386 /* We can't indiscriminately write on the w16 node as in the 5387 w64 case, as that might make the node inconsistent with 5388 its parent. So first, pull down to this level. */ 5389 SVal* tree = &cl->svals[tno << 3]; 5390 cl->descrs[tno] = pulldown_to_16(tree, toff, descr); 5391 if (CHECK_ZSM) 5392 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5393 } 5394 } 5395 tl_assert(svNew != SVal_INVALID); 5396 cl->svals[cloff + 0] = svNew; 5397 cl->svals[cloff + 1] = SVal_INVALID; 5398 return; 5399 slowcase: /* misaligned */ 5400 stats__cline_16to8splits++; 5401 zsm_swrite08( a + 0, svNew ); 5402 zsm_swrite08( a + 1, svNew ); 5403 } 5404 5405 /*--------------- ZSM accesses: 32 bit swrite --------------- */ 5406 5407 static 5408 void zsm_swrite32 ( Addr a, SVal svNew ) { 5409 CacheLine* cl; 5410 UWord cloff, tno, toff; 5411 UShort descr; 5412 stats__cline_swrite32s++; 5413 if (UNLIKELY(!aligned32(a))) goto slowcase; 5414 cl = get_cacheline(a); 5415 cloff = get_cacheline_offset(a); 5416 tno = get_treeno(a); 5417 toff = get_tree_offset(a); /* == 0 or 4 */ 5418 descr = cl->descrs[tno]; 5419 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) { 5420 if (valid_value_is_above_me_32(descr, toff)) { 5421 /* We can't indiscriminately write on the w32 node as in the 5422 w64 case, as that might make the node inconsistent with 5423 its parent. So first, pull down to this level. */ 5424 SVal* tree = &cl->svals[tno << 3]; 5425 cl->descrs[tno] = pulldown_to_32(tree, toff, descr); 5426 if (CHECK_ZSM) 5427 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5428 } else { 5429 /* Writing at this level. Need to fix up 'descr'. */ 5430 cl->descrs[tno] = pullup_descr_to_32(descr, toff); 5431 /* At this point, the tree does not match cl->descr[tno] any 5432 more. The assignments below will fix it up. */ 5433 } 5434 } 5435 tl_assert(svNew != SVal_INVALID); 5436 cl->svals[cloff + 0] = svNew; 5437 cl->svals[cloff + 1] = SVal_INVALID; 5438 cl->svals[cloff + 2] = SVal_INVALID; 5439 cl->svals[cloff + 3] = SVal_INVALID; 5440 return; 5441 slowcase: /* misaligned */ 5442 stats__cline_32to16splits++; 5443 zsm_swrite16( a + 0, svNew ); 5444 zsm_swrite16( a + 2, svNew ); 5445 } 5446 5447 /*--------------- ZSM accesses: 64 bit swrite --------------- */ 5448 5449 static 5450 void zsm_swrite64 ( Addr a, SVal svNew ) { 5451 CacheLine* cl; 5452 UWord cloff, tno; 5453 //UWord toff; 5454 stats__cline_swrite64s++; 5455 if (UNLIKELY(!aligned64(a))) goto slowcase; 5456 cl = get_cacheline(a); 5457 cloff = get_cacheline_offset(a); 5458 tno = get_treeno(a); 5459 //toff = get_tree_offset(a); /* == 0, unused */ 5460 cl->descrs[tno] = TREE_DESCR_64; 5461 tl_assert(svNew != SVal_INVALID); 5462 cl->svals[cloff + 0] = svNew; 5463 cl->svals[cloff + 1] = SVal_INVALID; 5464 cl->svals[cloff + 2] = SVal_INVALID; 5465 cl->svals[cloff + 3] = SVal_INVALID; 5466 cl->svals[cloff + 4] = SVal_INVALID; 5467 cl->svals[cloff + 5] = SVal_INVALID; 5468 cl->svals[cloff + 6] = SVal_INVALID; 5469 cl->svals[cloff + 7] = SVal_INVALID; 5470 return; 5471 slowcase: /* misaligned */ 5472 stats__cline_64to32splits++; 5473 zsm_swrite32( a + 0, svNew ); 5474 zsm_swrite32( a + 4, svNew ); 5475 } 5476 5477 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */ 5478 5479 static 5480 SVal zsm_sread08 ( Addr a ) { 5481 CacheLine* cl; 5482 UWord cloff, tno, toff; 5483 UShort descr; 5484 stats__cline_sread08s++; 5485 cl = get_cacheline(a); 5486 cloff = get_cacheline_offset(a); 5487 tno = get_treeno(a); 5488 toff = get_tree_offset(a); /* == 0 .. 7 */ 5489 descr = cl->descrs[tno]; 5490 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5491 SVal* tree = &cl->svals[tno << 3]; 5492 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5493 } 5494 return cl->svals[cloff]; 5495 } 5496 5497 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) { 5498 SVal sv; 5499 stats__cline_scopy08s++; 5500 sv = zsm_sread08( src ); 5501 zsm_swrite08( dst, sv ); 5502 } 5503 5504 5505 /* Block-copy states (needed for implementing realloc()). Note this 5506 doesn't change the filtering arrangements. The caller of 5507 zsm_scopy_range needs to attend to that. */ 5508 5509 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len ) 5510 { 5511 SizeT i; 5512 if (len == 0) 5513 return; 5514 5515 /* assert for non-overlappingness */ 5516 tl_assert(src+len <= dst || dst+len <= src); 5517 5518 /* To be simple, just copy byte by byte. But so as not to wreck 5519 performance for later accesses to dst[0 .. len-1], normalise 5520 destination lines as we finish with them, and also normalise the 5521 line containing the first and last address. */ 5522 for (i = 0; i < len; i++) { 5523 Bool normalise 5524 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */ 5525 || i == 0 /* first in range */ 5526 || i == len-1; /* last in range */ 5527 zsm_scopy08( src+i, dst+i, normalise ); 5528 } 5529 } 5530 5531 5532 /* For setting address ranges to a given value. Has considerable 5533 sophistication so as to avoid generating large numbers of pointless 5534 cache loads/writebacks for large ranges. */ 5535 5536 /* Do small ranges in-cache, in the obvious way. */ 5537 static 5538 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew ) 5539 { 5540 /* fast track a couple of common cases */ 5541 if (len == 4 && aligned32(a)) { 5542 zsm_swrite32( a, svNew ); 5543 return; 5544 } 5545 if (len == 8 && aligned64(a)) { 5546 zsm_swrite64( a, svNew ); 5547 return; 5548 } 5549 5550 /* be completely general (but as efficient as possible) */ 5551 if (len == 0) return; 5552 5553 if (!aligned16(a) && len >= 1) { 5554 zsm_swrite08( a, svNew ); 5555 a += 1; 5556 len -= 1; 5557 tl_assert(aligned16(a)); 5558 } 5559 if (len == 0) return; 5560 5561 if (!aligned32(a) && len >= 2) { 5562 zsm_swrite16( a, svNew ); 5563 a += 2; 5564 len -= 2; 5565 tl_assert(aligned32(a)); 5566 } 5567 if (len == 0) return; 5568 5569 if (!aligned64(a) && len >= 4) { 5570 zsm_swrite32( a, svNew ); 5571 a += 4; 5572 len -= 4; 5573 tl_assert(aligned64(a)); 5574 } 5575 if (len == 0) return; 5576 5577 if (len >= 8) { 5578 tl_assert(aligned64(a)); 5579 while (len >= 8) { 5580 zsm_swrite64( a, svNew ); 5581 a += 8; 5582 len -= 8; 5583 } 5584 tl_assert(aligned64(a)); 5585 } 5586 if (len == 0) return; 5587 5588 if (len >= 4) 5589 tl_assert(aligned32(a)); 5590 if (len >= 4) { 5591 zsm_swrite32( a, svNew ); 5592 a += 4; 5593 len -= 4; 5594 } 5595 if (len == 0) return; 5596 5597 if (len >= 2) 5598 tl_assert(aligned16(a)); 5599 if (len >= 2) { 5600 zsm_swrite16( a, svNew ); 5601 a += 2; 5602 len -= 2; 5603 } 5604 if (len == 0) return; 5605 5606 if (len >= 1) { 5607 zsm_swrite08( a, svNew ); 5608 //a += 1; 5609 len -= 1; 5610 } 5611 tl_assert(len == 0); 5612 } 5613 5614 5615 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But 5616 for larger ranges, try to operate directly on the out-of-cache 5617 representation, rather than dragging lines into the cache, 5618 overwriting them, and forcing them out. This turns out to be an 5619 important performance optimisation. 5620 5621 Note that this doesn't change the filtering arrangements. The 5622 caller of zsm_sset_range needs to attend to that. */ 5623 5624 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew ) 5625 { 5626 tl_assert(svNew != SVal_INVALID); 5627 stats__cache_make_New_arange += (ULong)len; 5628 5629 if (0 && len > 500) 5630 VG_(printf)("make New ( %#lx, %ld )\n", a, len ); 5631 5632 if (0) { 5633 static UWord n_New_in_cache = 0; 5634 static UWord n_New_not_in_cache = 0; 5635 /* tag is 'a' with the in-line offset masked out, 5636 eg a[31]..a[4] 0000 */ 5637 Addr tag = a & ~(N_LINE_ARANGE - 1); 5638 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 5639 if (LIKELY(tag == cache_shmem.tags0[wix])) { 5640 n_New_in_cache++; 5641 } else { 5642 n_New_not_in_cache++; 5643 } 5644 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000)) 5645 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n", 5646 n_New_in_cache, n_New_not_in_cache ); 5647 } 5648 5649 if (LIKELY(len < 2 * N_LINE_ARANGE)) { 5650 zsm_sset_range_SMALL( a, len, svNew ); 5651 } else { 5652 Addr before_start = a; 5653 Addr aligned_start = cacheline_ROUNDUP(a); 5654 Addr after_start = cacheline_ROUNDDN(a + len); 5655 UWord before_len = aligned_start - before_start; 5656 UWord aligned_len = after_start - aligned_start; 5657 UWord after_len = a + len - after_start; 5658 tl_assert(before_start <= aligned_start); 5659 tl_assert(aligned_start <= after_start); 5660 tl_assert(before_len < N_LINE_ARANGE); 5661 tl_assert(after_len < N_LINE_ARANGE); 5662 tl_assert(get_cacheline_offset(aligned_start) == 0); 5663 if (get_cacheline_offset(a) == 0) { 5664 tl_assert(before_len == 0); 5665 tl_assert(a == aligned_start); 5666 } 5667 if (get_cacheline_offset(a+len) == 0) { 5668 tl_assert(after_len == 0); 5669 tl_assert(after_start == a+len); 5670 } 5671 if (before_len > 0) { 5672 zsm_sset_range_SMALL( before_start, before_len, svNew ); 5673 } 5674 if (after_len > 0) { 5675 zsm_sset_range_SMALL( after_start, after_len, svNew ); 5676 } 5677 stats__cache_make_New_inZrep += (ULong)aligned_len; 5678 5679 while (1) { 5680 Addr tag; 5681 UWord wix; 5682 if (aligned_start >= after_start) 5683 break; 5684 tl_assert(get_cacheline_offset(aligned_start) == 0); 5685 tag = aligned_start & ~(N_LINE_ARANGE - 1); 5686 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1); 5687 if (tag == cache_shmem.tags0[wix]) { 5688 UWord i; 5689 for (i = 0; i < N_LINE_ARANGE / 8; i++) 5690 zsm_swrite64( aligned_start + i * 8, svNew ); 5691 } else { 5692 UWord i; 5693 Word zix; 5694 SecMap* sm; 5695 LineZ* lineZ; 5696 /* This line is not in the cache. Do not force it in; instead 5697 modify it in-place. */ 5698 /* find the Z line to write in and rcdec it or the 5699 associated F line. */ 5700 find_Z_for_writing( &sm, &zix, tag ); 5701 tl_assert(sm); 5702 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES); 5703 lineZ = &sm->linesZ[zix]; 5704 lineZ->dict[0] = svNew; 5705 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 5706 for (i = 0; i < N_LINE_ARANGE/4; i++) 5707 lineZ->ix2s[i] = 0; /* all refer to dict[0] */ 5708 rcinc_LineZ(lineZ); 5709 } 5710 aligned_start += N_LINE_ARANGE; 5711 aligned_len -= N_LINE_ARANGE; 5712 } 5713 tl_assert(aligned_start == after_start); 5714 tl_assert(aligned_len == 0); 5715 } 5716 } 5717 5718 5719 ///////////////////////////////////////////////////////// 5720 // // 5721 // Front-filtering accesses // 5722 // // 5723 ///////////////////////////////////////////////////////// 5724 5725 static UWord stats__f_ac = 0; 5726 static UWord stats__f_sk = 0; 5727 5728 #if 0 5729 # define STATS__F_SHOW \ 5730 do { \ 5731 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \ 5732 VG_(printf)("filters: ac %lu sk %lu\n", \ 5733 stats__f_ac, stats__f_sk); \ 5734 } while (0) 5735 #else 5736 # define STATS__F_SHOW /* */ 5737 #endif 5738 5739 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) { 5740 stats__f_ac++; 5741 STATS__F_SHOW; 5742 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) { 5743 stats__f_sk++; 5744 return; 5745 } 5746 zsm_sapply08__msmcwrite(thr, a); 5747 } 5748 5749 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) { 5750 stats__f_ac++; 5751 STATS__F_SHOW; 5752 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) { 5753 stats__f_sk++; 5754 return; 5755 } 5756 zsm_sapply16__msmcwrite(thr, a); 5757 } 5758 5759 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) { 5760 stats__f_ac++; 5761 STATS__F_SHOW; 5762 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) { 5763 stats__f_sk++; 5764 return; 5765 } 5766 zsm_sapply32__msmcwrite(thr, a); 5767 } 5768 5769 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) { 5770 stats__f_ac++; 5771 STATS__F_SHOW; 5772 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) { 5773 stats__f_sk++; 5774 return; 5775 } 5776 zsm_sapply64__msmcwrite(thr, a); 5777 } 5778 5779 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len ) 5780 { 5781 /* fast track a couple of common cases */ 5782 if (len == 4 && aligned32(a)) { 5783 zsm_sapply32_f__msmcwrite( thr, a ); 5784 return; 5785 } 5786 if (len == 8 && aligned64(a)) { 5787 zsm_sapply64_f__msmcwrite( thr, a ); 5788 return; 5789 } 5790 5791 /* be completely general (but as efficient as possible) */ 5792 if (len == 0) return; 5793 5794 if (!aligned16(a) && len >= 1) { 5795 zsm_sapply08_f__msmcwrite( thr, a ); 5796 a += 1; 5797 len -= 1; 5798 tl_assert(aligned16(a)); 5799 } 5800 if (len == 0) return; 5801 5802 if (!aligned32(a) && len >= 2) { 5803 zsm_sapply16_f__msmcwrite( thr, a ); 5804 a += 2; 5805 len -= 2; 5806 tl_assert(aligned32(a)); 5807 } 5808 if (len == 0) return; 5809 5810 if (!aligned64(a) && len >= 4) { 5811 zsm_sapply32_f__msmcwrite( thr, a ); 5812 a += 4; 5813 len -= 4; 5814 tl_assert(aligned64(a)); 5815 } 5816 if (len == 0) return; 5817 5818 if (len >= 8) { 5819 tl_assert(aligned64(a)); 5820 while (len >= 8) { 5821 zsm_sapply64_f__msmcwrite( thr, a ); 5822 a += 8; 5823 len -= 8; 5824 } 5825 tl_assert(aligned64(a)); 5826 } 5827 if (len == 0) return; 5828 5829 if (len >= 4) 5830 tl_assert(aligned32(a)); 5831 if (len >= 4) { 5832 zsm_sapply32_f__msmcwrite( thr, a ); 5833 a += 4; 5834 len -= 4; 5835 } 5836 if (len == 0) return; 5837 5838 if (len >= 2) 5839 tl_assert(aligned16(a)); 5840 if (len >= 2) { 5841 zsm_sapply16_f__msmcwrite( thr, a ); 5842 a += 2; 5843 len -= 2; 5844 } 5845 if (len == 0) return; 5846 5847 if (len >= 1) { 5848 zsm_sapply08_f__msmcwrite( thr, a ); 5849 //a += 1; 5850 len -= 1; 5851 } 5852 tl_assert(len == 0); 5853 } 5854 5855 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) { 5856 stats__f_ac++; 5857 STATS__F_SHOW; 5858 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) { 5859 stats__f_sk++; 5860 return; 5861 } 5862 zsm_sapply08__msmcread(thr, a); 5863 } 5864 5865 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) { 5866 stats__f_ac++; 5867 STATS__F_SHOW; 5868 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) { 5869 stats__f_sk++; 5870 return; 5871 } 5872 zsm_sapply16__msmcread(thr, a); 5873 } 5874 5875 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) { 5876 stats__f_ac++; 5877 STATS__F_SHOW; 5878 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) { 5879 stats__f_sk++; 5880 return; 5881 } 5882 zsm_sapply32__msmcread(thr, a); 5883 } 5884 5885 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) { 5886 stats__f_ac++; 5887 STATS__F_SHOW; 5888 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) { 5889 stats__f_sk++; 5890 return; 5891 } 5892 zsm_sapply64__msmcread(thr, a); 5893 } 5894 5895 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len ) 5896 { 5897 /* fast track a couple of common cases */ 5898 if (len == 4 && aligned32(a)) { 5899 zsm_sapply32_f__msmcread( thr, a ); 5900 return; 5901 } 5902 if (len == 8 && aligned64(a)) { 5903 zsm_sapply64_f__msmcread( thr, a ); 5904 return; 5905 } 5906 5907 /* be completely general (but as efficient as possible) */ 5908 if (len == 0) return; 5909 5910 if (!aligned16(a) && len >= 1) { 5911 zsm_sapply08_f__msmcread( thr, a ); 5912 a += 1; 5913 len -= 1; 5914 tl_assert(aligned16(a)); 5915 } 5916 if (len == 0) return; 5917 5918 if (!aligned32(a) && len >= 2) { 5919 zsm_sapply16_f__msmcread( thr, a ); 5920 a += 2; 5921 len -= 2; 5922 tl_assert(aligned32(a)); 5923 } 5924 if (len == 0) return; 5925 5926 if (!aligned64(a) && len >= 4) { 5927 zsm_sapply32_f__msmcread( thr, a ); 5928 a += 4; 5929 len -= 4; 5930 tl_assert(aligned64(a)); 5931 } 5932 if (len == 0) return; 5933 5934 if (len >= 8) { 5935 tl_assert(aligned64(a)); 5936 while (len >= 8) { 5937 zsm_sapply64_f__msmcread( thr, a ); 5938 a += 8; 5939 len -= 8; 5940 } 5941 tl_assert(aligned64(a)); 5942 } 5943 if (len == 0) return; 5944 5945 if (len >= 4) 5946 tl_assert(aligned32(a)); 5947 if (len >= 4) { 5948 zsm_sapply32_f__msmcread( thr, a ); 5949 a += 4; 5950 len -= 4; 5951 } 5952 if (len == 0) return; 5953 5954 if (len >= 2) 5955 tl_assert(aligned16(a)); 5956 if (len >= 2) { 5957 zsm_sapply16_f__msmcread( thr, a ); 5958 a += 2; 5959 len -= 2; 5960 } 5961 if (len == 0) return; 5962 5963 if (len >= 1) { 5964 zsm_sapply08_f__msmcread( thr, a ); 5965 //a += 1; 5966 len -= 1; 5967 } 5968 tl_assert(len == 0); 5969 } 5970 5971 void libhb_Thr_resumes ( Thr* thr ) 5972 { 5973 if (0) VG_(printf)("resume %p\n", thr); 5974 tl_assert(thr); 5975 tl_assert(!thr->llexit_done); 5976 Filter__clear(thr->filter, "libhb_Thr_resumes"); 5977 /* A kludge, but .. if this thread doesn't have any marker stacks 5978 at all, get one right now. This is easier than figuring out 5979 exactly when at thread startup we can and can't take a stack 5980 snapshot. */ 5981 if (HG_(clo_history_level) == 1) { 5982 tl_assert(thr->local_Kws_n_stacks); 5983 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0) 5984 note_local_Kw_n_stack_for(thr); 5985 } 5986 } 5987 5988 5989 ///////////////////////////////////////////////////////// 5990 // // 5991 // Synchronisation objects // 5992 // // 5993 ///////////////////////////////////////////////////////// 5994 5995 /* A double linked list of all the SO's. */ 5996 SO* admin_SO = NULL; 5997 5998 static SO* SO__Alloc ( void ) 5999 { 6000 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) ); 6001 so->viR = VtsID_INVALID; 6002 so->viW = VtsID_INVALID; 6003 so->magic = SO_MAGIC; 6004 /* Add to double linked list */ 6005 if (admin_SO) { 6006 tl_assert(admin_SO->admin_prev == NULL); 6007 admin_SO->admin_prev = so; 6008 so->admin_next = admin_SO; 6009 } else { 6010 so->admin_next = NULL; 6011 } 6012 so->admin_prev = NULL; 6013 admin_SO = so; 6014 /* */ 6015 return so; 6016 } 6017 6018 static void SO__Dealloc ( SO* so ) 6019 { 6020 tl_assert(so); 6021 tl_assert(so->magic == SO_MAGIC); 6022 if (so->viR == VtsID_INVALID) { 6023 tl_assert(so->viW == VtsID_INVALID); 6024 } else { 6025 tl_assert(so->viW != VtsID_INVALID); 6026 VtsID__rcdec(so->viR); 6027 VtsID__rcdec(so->viW); 6028 } 6029 so->magic = 0; 6030 /* Del from double linked list */ 6031 if (so->admin_prev) 6032 so->admin_prev->admin_next = so->admin_next; 6033 if (so->admin_next) 6034 so->admin_next->admin_prev = so->admin_prev; 6035 if (so == admin_SO) 6036 admin_SO = so->admin_next; 6037 /* */ 6038 HG_(free)( so ); 6039 } 6040 6041 6042 ///////////////////////////////////////////////////////// 6043 // // 6044 // Top Level API // 6045 // // 6046 ///////////////////////////////////////////////////////// 6047 6048 static void show_thread_state ( const HChar* str, Thr* t ) 6049 { 6050 if (1) return; 6051 if (t->viR == t->viW) { 6052 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR ); 6053 VtsID__pp( t->viR ); 6054 VG_(printf)("%s","\n"); 6055 } else { 6056 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR ); 6057 VtsID__pp( t->viR ); 6058 VG_(printf)(" viW %u==", t->viW); 6059 VtsID__pp( t->viW ); 6060 VG_(printf)("%s","\n"); 6061 } 6062 } 6063 6064 6065 Thr* libhb_init ( 6066 void (*get_stacktrace)( Thr*, Addr*, UWord ), 6067 ExeContext* (*get_EC)( Thr* ) 6068 ) 6069 { 6070 Thr* thr; 6071 VtsID vi; 6072 6073 // We will have to have to store a large number of these, 6074 // so make sure they're the size we expect them to be. 6075 tl_assert(sizeof(ScalarTS) == 8); 6076 6077 /* because first 1024 unusable */ 6078 tl_assert(SCALARTS_N_THRBITS >= 11); 6079 /* so as to fit in a UInt w/ 3 bits to spare (see defn of 6080 Thr_n_RCEC). */ 6081 tl_assert(SCALARTS_N_THRBITS <= 29); 6082 6083 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words 6084 (32-bit). It's not correctness-critical, but there are a lot of 6085 them, so it's important from a space viewpoint. Unfortunately 6086 we simply can't pack it into 2 words on a 32-bit target. */ 6087 if (sizeof(UWord) == 8) { 6088 tl_assert(sizeof(Thr_n_RCEC) == 16); 6089 } else { 6090 tl_assert(sizeof(Thr_n_RCEC) == 12); 6091 } 6092 6093 /* Word sets really are 32 bits. Even on a 64 bit target. */ 6094 tl_assert(sizeof(WordSetID) == 4); 6095 tl_assert(sizeof(WordSet) == sizeof(WordSetID)); 6096 6097 tl_assert(get_stacktrace); 6098 tl_assert(get_EC); 6099 main_get_stacktrace = get_stacktrace; 6100 main_get_EC = get_EC; 6101 6102 // No need to initialise hg_wordfm. 6103 // No need to initialise hg_wordset. 6104 6105 /* Allocated once and never deallocated. Used as a temporary in 6106 VTS singleton, tick and join operations. */ 6107 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID ); 6108 temp_max_sized_VTS->id = VtsID_INVALID; 6109 verydead_thread_table_init(); 6110 vts_set_init(); 6111 vts_tab_init(); 6112 event_map_init(); 6113 VtsID__invalidate_caches(); 6114 6115 // initialise shadow memory 6116 zsm_init( SVal__rcinc, SVal__rcdec ); 6117 6118 thr = Thr__new(); 6119 vi = VtsID__mk_Singleton( thr, 1 ); 6120 thr->viR = vi; 6121 thr->viW = vi; 6122 VtsID__rcinc(thr->viR); 6123 VtsID__rcinc(thr->viW); 6124 6125 show_thread_state(" root", thr); 6126 return thr; 6127 } 6128 6129 6130 Thr* libhb_create ( Thr* parent ) 6131 { 6132 /* The child's VTSs are copies of the parent's VTSs, but ticked at 6133 the child's index. Since the child's index is guaranteed 6134 unique, it has never been seen before, so the implicit value 6135 before the tick is zero and after that is one. */ 6136 Thr* child = Thr__new(); 6137 6138 child->viR = VtsID__tick( parent->viR, child ); 6139 child->viW = VtsID__tick( parent->viW, child ); 6140 Filter__clear(child->filter, "libhb_create(child)"); 6141 VtsID__rcinc(child->viR); 6142 VtsID__rcinc(child->viW); 6143 /* We need to do note_local_Kw_n_stack_for( child ), but it's too 6144 early for that - it may not have a valid TId yet. So, let 6145 libhb_Thr_resumes pick it up the first time the thread runs. */ 6146 6147 tl_assert(VtsID__indexAt( child->viR, child ) == 1); 6148 tl_assert(VtsID__indexAt( child->viW, child ) == 1); 6149 6150 /* and the parent has to move along too */ 6151 VtsID__rcdec(parent->viR); 6152 VtsID__rcdec(parent->viW); 6153 parent->viR = VtsID__tick( parent->viR, parent ); 6154 parent->viW = VtsID__tick( parent->viW, parent ); 6155 Filter__clear(parent->filter, "libhb_create(parent)"); 6156 VtsID__rcinc(parent->viR); 6157 VtsID__rcinc(parent->viW); 6158 note_local_Kw_n_stack_for( parent ); 6159 6160 show_thread_state(" child", child); 6161 show_thread_state("parent", parent); 6162 6163 return child; 6164 } 6165 6166 /* Shut down the library, and print stats (in fact that's _all_ 6167 this is for. */ 6168 void libhb_shutdown ( Bool show_stats ) 6169 { 6170 if (show_stats) { 6171 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n"); 6172 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n", 6173 stats__secmaps_allocd, 6174 stats__secmap_ga_space_covered); 6175 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n", 6176 stats__secmap_linesZ_allocd, 6177 stats__secmap_linesZ_bytes); 6178 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n", 6179 stats__secmap_linesF_allocd, 6180 stats__secmap_linesF_bytes); 6181 VG_(printf)(" secmaps: %'10lu iterator steppings\n", 6182 stats__secmap_iterator_steppings); 6183 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n", 6184 stats__secmaps_search, stats__secmaps_search_slow); 6185 6186 VG_(printf)("%s","\n"); 6187 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n", 6188 stats__cache_totrefs, stats__cache_totmisses ); 6189 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n", 6190 stats__cache_Z_fetches, stats__cache_F_fetches ); 6191 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n", 6192 stats__cache_Z_wbacks, stats__cache_F_wbacks ); 6193 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n", 6194 stats__cache_invals, stats__cache_flushes ); 6195 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n", 6196 stats__cache_make_New_arange, 6197 stats__cache_make_New_inZrep); 6198 6199 VG_(printf)("%s","\n"); 6200 VG_(printf)(" cline: %'10lu normalises\n", 6201 stats__cline_normalises ); 6202 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n", 6203 stats__cline_cread64s, 6204 stats__cline_cread32s, 6205 stats__cline_cread16s, 6206 stats__cline_cread08s ); 6207 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n", 6208 stats__cline_cwrite64s, 6209 stats__cline_cwrite32s, 6210 stats__cline_cwrite16s, 6211 stats__cline_cwrite08s ); 6212 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n", 6213 stats__cline_swrite64s, 6214 stats__cline_swrite32s, 6215 stats__cline_swrite16s, 6216 stats__cline_swrite08s ); 6217 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n", 6218 stats__cline_sread08s, stats__cline_scopy08s ); 6219 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n", 6220 stats__cline_64to32splits, 6221 stats__cline_32to16splits, 6222 stats__cline_16to8splits ); 6223 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n", 6224 stats__cline_64to32pulldown, 6225 stats__cline_32to16pulldown, 6226 stats__cline_16to8pulldown ); 6227 if (0) 6228 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n", 6229 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE); 6230 6231 VG_(printf)("%s","\n"); 6232 6233 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n", 6234 stats__msmcread, stats__msmcread_change); 6235 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n", 6236 stats__msmcwrite, stats__msmcwrite_change); 6237 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n", 6238 stats__cmpLEQ_queries, stats__cmpLEQ_misses); 6239 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n", 6240 stats__join2_queries, stats__join2_misses); 6241 6242 VG_(printf)("%s","\n"); 6243 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n", 6244 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ ); 6245 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n", 6246 stats__vts__cmp_structural, stats__vts__cmp_structural_slow ); 6247 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n", 6248 stats__vts_set__focaa, stats__vts_set__focaa_a ); 6249 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n", 6250 stats__vts__indexat_slow ); 6251 6252 VG_(printf)("%s","\n"); 6253 VG_(printf)( 6254 " libhb: %ld entries in vts_table (approximately %lu bytes)\n", 6255 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE) 6256 ); 6257 VG_(printf)( " libhb: %lu entries in vts_set\n", 6258 VG_(sizeFM)( vts_set ) ); 6259 6260 VG_(printf)("%s","\n"); 6261 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n", 6262 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq, 6263 stats__ctxt_rcdec2, 6264 stats__ctxt_rcdec3 ); 6265 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n", 6266 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards); 6267 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n", 6268 (UWord)N_RCEC_TAB, 6269 stats__ctxt_tab_curr ); 6270 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n", 6271 stats__ctxt_tab_qs, 6272 stats__ctxt_tab_cmps ); 6273 #if 0 6274 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode)); 6275 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag)); 6276 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord)); 6277 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine)); 6278 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ)); 6279 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF)); 6280 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap)); 6281 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache)); 6282 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt)); 6283 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal)); 6284 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS)); 6285 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS)); 6286 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE)); 6287 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo)); 6288 6289 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray)); 6290 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM)); 6291 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr)); 6292 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO)); 6293 #endif 6294 6295 VG_(printf)("%s","<<< END libhb stats >>>\n"); 6296 VG_(printf)("%s","\n"); 6297 6298 } 6299 } 6300 6301 /* Receive notification that a thread has low level exited. The 6302 significance here is that we do not expect to see any more memory 6303 references from it. */ 6304 void libhb_async_exit ( Thr* thr ) 6305 { 6306 tl_assert(thr); 6307 tl_assert(!thr->llexit_done); 6308 thr->llexit_done = True; 6309 6310 /* free up Filter and local_Kws_n_stacks (well, actually not the 6311 latter ..) */ 6312 tl_assert(thr->filter); 6313 HG_(free)(thr->filter); 6314 thr->filter = NULL; 6315 6316 /* Tell the VTS mechanism this thread has exited, so it can 6317 participate in VTS pruning. Note this can only happen if the 6318 thread has both ll_exited and has been joined with. */ 6319 if (thr->joinedwith_done) 6320 VTS__declare_thread_very_dead(thr); 6321 6322 /* Another space-accuracy tradeoff. Do we want to be able to show 6323 H1 history for conflicts in threads which have since exited? If 6324 yes, then we better not free up thr->local_Kws_n_stacks. The 6325 downside is a potential per-thread leak of up to 6326 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the 6327 XArray average overcommit factor is (1.5 I'd guess). */ 6328 // hence: 6329 // VG_(deleteXA)(thr->local_Kws_n_stacks); 6330 // thr->local_Kws_n_stacks = NULL; 6331 } 6332 6333 /* Receive notification that a thread has been joined with. The 6334 significance here is that we do not expect to see any further 6335 references to its vector clocks (Thr::viR and Thr::viW). */ 6336 void libhb_joinedwith_done ( Thr* thr ) 6337 { 6338 tl_assert(thr); 6339 /* Caller must ensure that this is only ever called once per Thr. */ 6340 tl_assert(!thr->joinedwith_done); 6341 thr->joinedwith_done = True; 6342 if (thr->llexit_done) 6343 VTS__declare_thread_very_dead(thr); 6344 } 6345 6346 6347 /* Both Segs and SOs point to VTSs. However, there is no sharing, so 6348 a Seg that points at a VTS is its one-and-only owner, and ditto for 6349 a SO that points at a VTS. */ 6350 6351 SO* libhb_so_alloc ( void ) 6352 { 6353 return SO__Alloc(); 6354 } 6355 6356 void libhb_so_dealloc ( SO* so ) 6357 { 6358 tl_assert(so); 6359 tl_assert(so->magic == SO_MAGIC); 6360 SO__Dealloc(so); 6361 } 6362 6363 /* See comments in libhb.h for details on the meaning of 6364 strong vs weak sends and strong vs weak receives. */ 6365 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send ) 6366 { 6367 /* Copy the VTSs from 'thr' into the sync object, and then move 6368 the thread along one step. */ 6369 6370 tl_assert(so); 6371 tl_assert(so->magic == SO_MAGIC); 6372 6373 /* stay sane .. a thread's read-clock must always lead or be the 6374 same as its write-clock */ 6375 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR); 6376 tl_assert(leq); 6377 } 6378 6379 /* since we're overwriting the VtsIDs in the SO, we need to drop 6380 any references made by the previous contents thereof */ 6381 if (so->viR == VtsID_INVALID) { 6382 tl_assert(so->viW == VtsID_INVALID); 6383 so->viR = thr->viR; 6384 so->viW = thr->viW; 6385 VtsID__rcinc(so->viR); 6386 VtsID__rcinc(so->viW); 6387 } else { 6388 /* In a strong send, we dump any previous VC in the SO and 6389 install the sending thread's VC instead. For a weak send we 6390 must join2 with what's already there. */ 6391 tl_assert(so->viW != VtsID_INVALID); 6392 VtsID__rcdec(so->viR); 6393 VtsID__rcdec(so->viW); 6394 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR ); 6395 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW ); 6396 VtsID__rcinc(so->viR); 6397 VtsID__rcinc(so->viW); 6398 } 6399 6400 /* move both parent clocks along */ 6401 VtsID__rcdec(thr->viR); 6402 VtsID__rcdec(thr->viW); 6403 thr->viR = VtsID__tick( thr->viR, thr ); 6404 thr->viW = VtsID__tick( thr->viW, thr ); 6405 if (!thr->llexit_done) { 6406 Filter__clear(thr->filter, "libhb_so_send"); 6407 note_local_Kw_n_stack_for(thr); 6408 } 6409 VtsID__rcinc(thr->viR); 6410 VtsID__rcinc(thr->viW); 6411 6412 if (strong_send) 6413 show_thread_state("s-send", thr); 6414 else 6415 show_thread_state("w-send", thr); 6416 } 6417 6418 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv ) 6419 { 6420 tl_assert(so); 6421 tl_assert(so->magic == SO_MAGIC); 6422 6423 if (so->viR != VtsID_INVALID) { 6424 tl_assert(so->viW != VtsID_INVALID); 6425 6426 /* Weak receive (basically, an R-acquisition of a R-W lock). 6427 This advances the read-clock of the receiver, but not the 6428 write-clock. */ 6429 VtsID__rcdec(thr->viR); 6430 thr->viR = VtsID__join2( thr->viR, so->viR ); 6431 VtsID__rcinc(thr->viR); 6432 6433 /* At one point (r10589) it seemed safest to tick the clocks for 6434 the receiving thread after the join. But on reflection, I 6435 wonder if that might cause it to 'overtake' constraints, 6436 which could lead to missing races. So, back out that part of 6437 r10589. */ 6438 //VtsID__rcdec(thr->viR); 6439 //thr->viR = VtsID__tick( thr->viR, thr ); 6440 //VtsID__rcinc(thr->viR); 6441 6442 /* For a strong receive, we also advance the receiver's write 6443 clock, which means the receive as a whole is essentially 6444 equivalent to a W-acquisition of a R-W lock. */ 6445 if (strong_recv) { 6446 VtsID__rcdec(thr->viW); 6447 thr->viW = VtsID__join2( thr->viW, so->viW ); 6448 VtsID__rcinc(thr->viW); 6449 6450 /* See comment just above, re r10589. */ 6451 //VtsID__rcdec(thr->viW); 6452 //thr->viW = VtsID__tick( thr->viW, thr ); 6453 //VtsID__rcinc(thr->viW); 6454 } 6455 6456 if (thr->filter) 6457 Filter__clear(thr->filter, "libhb_so_recv"); 6458 note_local_Kw_n_stack_for(thr); 6459 6460 if (strong_recv) 6461 show_thread_state("s-recv", thr); 6462 else 6463 show_thread_state("w-recv", thr); 6464 6465 } else { 6466 tl_assert(so->viW == VtsID_INVALID); 6467 /* Deal with degenerate case: 'so' has no vts, so there has been 6468 no message posted to it. Just ignore this case. */ 6469 show_thread_state("d-recv", thr); 6470 } 6471 } 6472 6473 Bool libhb_so_everSent ( SO* so ) 6474 { 6475 if (so->viR == VtsID_INVALID) { 6476 tl_assert(so->viW == VtsID_INVALID); 6477 return False; 6478 } else { 6479 tl_assert(so->viW != VtsID_INVALID); 6480 return True; 6481 } 6482 } 6483 6484 #define XXX1 0 // 0x67a106c 6485 #define XXX2 0 6486 6487 static inline Bool TRACEME(Addr a, SizeT szB) { 6488 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True; 6489 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True; 6490 return False; 6491 } 6492 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s ) 6493 { 6494 SVal sv = zsm_sread08(a); 6495 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv); 6496 show_thread_state("", thr); 6497 VG_(printf)("%s","\n"); 6498 } 6499 6500 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB ) 6501 { 6502 SVal sv = SVal__mkC(thr->viW, thr->viW); 6503 tl_assert(is_sane_SVal_C(sv)); 6504 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before"); 6505 zsm_sset_range( a, szB, sv ); 6506 Filter__clear_range( thr->filter, a, szB ); 6507 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after "); 6508 } 6509 6510 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB ) 6511 { 6512 /* do nothing */ 6513 } 6514 6515 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB ) 6516 { 6517 /* This really does put the requested range in NoAccess. It's 6518 expensive though. */ 6519 SVal sv = SVal_NOACCESS; 6520 tl_assert(is_sane_SVal_C(sv)); 6521 zsm_sset_range( a, szB, sv ); 6522 Filter__clear_range( thr->filter, a, szB ); 6523 } 6524 6525 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB ) 6526 { 6527 SVal sv = SVal_NOACCESS; 6528 tl_assert(is_sane_SVal_C(sv)); 6529 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before"); 6530 zsm_sset_range( a, szB, sv ); 6531 Filter__clear_range( thr->filter, a, szB ); 6532 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after "); 6533 } 6534 6535 Thread* libhb_get_Thr_hgthread ( Thr* thr ) { 6536 tl_assert(thr); 6537 return thr->hgthread; 6538 } 6539 6540 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) { 6541 tl_assert(thr); 6542 thr->hgthread = hgthread; 6543 } 6544 6545 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len ) 6546 { 6547 zsm_scopy_range(src, dst, len); 6548 Filter__clear_range( thr->filter, dst, len ); 6549 } 6550 6551 void libhb_maybe_GC ( void ) 6552 { 6553 event_map_maybe_GC(); 6554 /* If there are still freelist entries available, no need for a 6555 GC. */ 6556 if (vts_tab_freelist != VtsID_INVALID) 6557 return; 6558 /* So all the table entries are full, and we're having to expand 6559 the table. But did we hit the threshhold point yet? */ 6560 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at) 6561 return; 6562 vts_tab__do_GC( False/*don't show stats*/ ); 6563 } 6564 6565 6566 ///////////////////////////////////////////////////////////////// 6567 ///////////////////////////////////////////////////////////////// 6568 // // 6569 // SECTION END main library // 6570 // // 6571 ///////////////////////////////////////////////////////////////// 6572 ///////////////////////////////////////////////////////////////// 6573 6574 /*--------------------------------------------------------------------*/ 6575 /*--- end libhb_main.c ---*/ 6576 /*--------------------------------------------------------------------*/ 6577