1 2 /*--------------------------------------------------------------------*/ 3 /*--- LibHB: a library for implementing and checking ---*/ 4 /*--- the happens-before relationship in concurrent programs. ---*/ 5 /*--- libhb_main.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of LibHB, a library for implementing and checking 10 the happens-before relationship in concurrent programs. 11 12 Copyright (C) 2008-2012 OpenWorks Ltd 13 info (at) open-works.co.uk 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 #include "pub_tool_basics.h" 34 #include "pub_tool_poolalloc.h" 35 #include "pub_tool_libcassert.h" 36 #include "pub_tool_libcbase.h" 37 #include "pub_tool_libcprint.h" 38 #include "pub_tool_mallocfree.h" 39 #include "pub_tool_wordfm.h" 40 #include "pub_tool_sparsewa.h" 41 #include "pub_tool_xarray.h" 42 #include "pub_tool_oset.h" 43 #include "pub_tool_threadstate.h" 44 #include "pub_tool_aspacemgr.h" 45 #include "pub_tool_execontext.h" 46 #include "pub_tool_errormgr.h" 47 #include "pub_tool_options.h" // VG_(clo_stats) 48 #include "hg_basics.h" 49 #include "hg_wordset.h" 50 #include "hg_lock_n_thread.h" 51 #include "hg_errors.h" 52 53 #include "libhb.h" 54 55 56 ///////////////////////////////////////////////////////////////// 57 ///////////////////////////////////////////////////////////////// 58 // // 59 // Debugging #defines // 60 // // 61 ///////////////////////////////////////////////////////////////// 62 ///////////////////////////////////////////////////////////////// 63 64 /* Check the sanity of shadow values in the core memory state 65 machine. Change #if 0 to #if 1 to enable this. */ 66 #if 0 67 # define CHECK_MSM 1 68 #else 69 # define CHECK_MSM 0 70 #endif 71 72 73 /* Check sanity (reference counts, etc) in the conflicting access 74 machinery. Change #if 0 to #if 1 to enable this. */ 75 #if 0 76 # define CHECK_CEM 1 77 #else 78 # define CHECK_CEM 0 79 #endif 80 81 82 /* Check sanity in the compressed shadow memory machinery, 83 particularly in its caching innards. Unfortunately there's no 84 almost-zero-cost way to make them selectable at run time. Hence 85 set the #if 0 to #if 1 and rebuild if you want them. */ 86 #if 0 87 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */ 88 # define inline __attribute__((noinline)) 89 /* probably want to ditch -fomit-frame-pointer too */ 90 #else 91 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */ 92 #endif 93 94 95 ///////////////////////////////////////////////////////////////// 96 ///////////////////////////////////////////////////////////////// 97 // // 98 // data decls: VtsID // 99 // // 100 ///////////////////////////////////////////////////////////////// 101 ///////////////////////////////////////////////////////////////// 102 103 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30 104 bits, since they have to be packed into the lowest 30 bits of an 105 SVal. */ 106 typedef UInt VtsID; 107 #define VtsID_INVALID 0xFFFFFFFF 108 109 110 111 ///////////////////////////////////////////////////////////////// 112 ///////////////////////////////////////////////////////////////// 113 // // 114 // data decls: SVal // 115 // // 116 ///////////////////////////////////////////////////////////////// 117 ///////////////////////////////////////////////////////////////// 118 119 typedef ULong SVal; 120 121 /* This value has special significance to the implementation, and callers 122 may not store it in the shadow memory. */ 123 #define SVal_INVALID (3ULL << 62) 124 125 /* This is the default value for shadow memory. Initially the shadow 126 memory contains no accessible areas and so all reads produce this 127 value. TODO: make this caller-defineable. */ 128 #define SVal_NOACCESS (2ULL << 62) 129 130 131 132 ///////////////////////////////////////////////////////////////// 133 ///////////////////////////////////////////////////////////////// 134 // // 135 // data decls: ScalarTS // 136 // // 137 ///////////////////////////////////////////////////////////////// 138 ///////////////////////////////////////////////////////////////// 139 140 /* Scalar Timestamp. We have to store a lot of these, so there is 141 some effort to make them as small as possible. Logically they are 142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target. 143 We pack it into 64 bits by representing the Thr* using a ThrID, a 144 small integer (18 bits), and a 46 bit integer for the timestamp 145 number. The 46/18 split is arbitary, but has the effect that 146 Helgrind can only handle programs that create 2^18 or fewer threads 147 over their entire lifetime, and have no more than 2^46 timestamp 148 ticks (synchronisation operations on the same thread). 149 150 This doesn't seem like much of a limitation. 2^46 ticks is 151 7.06e+13, and if each tick (optimistically) takes the machine 1000 152 cycles to process, then the minimum time to process that many ticks 153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing 154 but VTS ticks, which isn't realistic. 155 156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is 157 32 since a ThrID is a UInt. 29 comes from the fact that 158 'Thr_n_RCEC', which records information about old accesses, packs 159 not only a ThrID but also 2+1 other bits (access size and 160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29. 161 162 NB2: thrid values are issued upwards from 1024, and values less 163 than that aren't valid. This isn't per se necessary (any order 164 will do, so long as they are unique), but it does help ensure they 165 are less likely to get confused with the various other kinds of 166 small-integer thread ids drifting around (eg, TId). See also NB5. 167 168 NB3: this probably also relies on the fact that Thr's are never 169 deallocated -- they exist forever. Hence the 1-1 mapping from 170 Thr's to thrid values (set up in Thr__new) persists forever. 171 172 NB4: temp_max_sized_VTS is allocated at startup and never freed. 173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS) 174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without 175 making the memory use for this go sky-high. With 176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems 177 like an OK tradeoff. If more than 256k threads need to be 178 supported, we could change SCALARTS_N_THRBITS to 20, which would 179 facilitate supporting 1 million threads at the cost of 8MB storage 180 for temp_max_sized_VTS. 181 182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses 183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0 184 must never be a valid ThrID. Given NB2 that's OK. 185 */ 186 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */ 187 188 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS) 189 typedef 190 struct { 191 ThrID thrid : SCALARTS_N_THRBITS; 192 ULong tym : SCALARTS_N_TYMBITS; 193 } 194 ScalarTS; 195 196 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1) 197 198 199 200 ///////////////////////////////////////////////////////////////// 201 ///////////////////////////////////////////////////////////////// 202 // // 203 // data decls: Filter // 204 // // 205 ///////////////////////////////////////////////////////////////// 206 ///////////////////////////////////////////////////////////////// 207 208 // baseline: 5, 9 209 #define FI_LINE_SZB_LOG2 5 210 #define FI_NUM_LINES_LOG2 10 211 212 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2) 213 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2) 214 215 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1)) 216 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK) 217 218 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \ 219 & (Addr)(FI_NUM_LINES-1) ) 220 221 222 /* In the lines, each 8 bytes are treated individually, and are mapped 223 to a UShort. Regardless of endianness of the underlying machine, 224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to 225 the highest address. 226 227 Of each bit pair, the higher numbered bit is set if a R has been 228 seen, so the actual layout is: 229 230 15 14 ... 01 00 231 232 R W for addr+7 ... R W for addr+0 233 234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555. 235 */ 236 237 /* tags are separated from lines. tags are Addrs and are 238 the base address of the line. */ 239 typedef 240 struct { 241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */ 242 } 243 FiLine; 244 245 typedef 246 struct { 247 Addr tags[FI_NUM_LINES]; 248 FiLine lines[FI_NUM_LINES]; 249 } 250 Filter; 251 252 253 254 ///////////////////////////////////////////////////////////////// 255 ///////////////////////////////////////////////////////////////// 256 // // 257 // data decls: Thr, ULong_n_EC // 258 // // 259 ///////////////////////////////////////////////////////////////// 260 ///////////////////////////////////////////////////////////////// 261 262 // Records stacks for H1 history mechanism (DRD-style) 263 typedef 264 struct { ULong ull; ExeContext* ec; } 265 ULong_n_EC; 266 267 268 /* How many of the above records to collect for each thread? Older 269 ones are dumped when we run out of space. 62.5k requires 1MB per 270 thread, since each ULong_n_EC record is 16 bytes long. When more 271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are 272 deleted to make space. Hence in the worst case we will be able to 273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2 274 Kw transitions (segments in this thread). For the current setting 275 that gives a guaranteed stack for at least the last 31.25k 276 segments. */ 277 #define N_KWs_N_STACKs_PER_THREAD 62500 278 279 280 struct _Thr { 281 /* Current VTSs for this thread. They change as we go along. viR 282 is the VTS to be used for reads, viW for writes. Usually they 283 are the same, but can differ when we deal with reader-writer 284 locks. It is always the case that 285 VtsID__cmpLEQ(viW,viR) == True 286 that is, viW must be the same, or lagging behind, viR. */ 287 VtsID viR; 288 VtsID viW; 289 290 /* Is initially False, and is set to True after the thread really 291 has done a low-level exit. When True, we expect to never see 292 any more memory references done by this thread. */ 293 Bool llexit_done; 294 295 /* Is initially False, and is set to True after the thread has been 296 joined with (reaped by some other thread). After this point, we 297 do not expect to see any uses of .viR or .viW, so it is safe to 298 set them to VtsID_INVALID. */ 299 Bool joinedwith_done; 300 301 /* A small integer giving a unique identity to this Thr. See 302 comments on the definition of ScalarTS for details. */ 303 ThrID thrid : SCALARTS_N_THRBITS; 304 305 /* A filter that removes references for which we believe that 306 msmcread/msmcwrite will not change the state, nor report a 307 race. */ 308 Filter* filter; 309 310 /* A pointer back to the top level Thread structure. There is a 311 1-1 mapping between Thread and Thr structures -- each Thr points 312 at its corresponding Thread, and vice versa. Really, Thr and 313 Thread should be merged into a single structure. */ 314 Thread* hgthread; 315 316 /* The ULongs (scalar Kws) in this accumulate in strictly 317 increasing order, without duplicates. This is important because 318 we need to be able to find a given scalar Kw in this array 319 later, by binary search. */ 320 XArray* /* ULong_n_EC */ local_Kws_n_stacks; 321 }; 322 323 324 325 ///////////////////////////////////////////////////////////////// 326 ///////////////////////////////////////////////////////////////// 327 // // 328 // data decls: SO // 329 // // 330 ///////////////////////////////////////////////////////////////// 331 ///////////////////////////////////////////////////////////////// 332 333 // (UInt) `echo "Synchronisation object" | md5sum` 334 #define SO_MAGIC 0x56b3c5b0U 335 336 struct _SO { 337 struct _SO* admin_prev; 338 struct _SO* admin_next; 339 VtsID viR; /* r-clock of sender */ 340 VtsID viW; /* w-clock of sender */ 341 UInt magic; 342 }; 343 344 345 346 ///////////////////////////////////////////////////////////////// 347 ///////////////////////////////////////////////////////////////// 348 // // 349 // Forward declarations // 350 // // 351 ///////////////////////////////////////////////////////////////// 352 ///////////////////////////////////////////////////////////////// 353 354 /* fwds for 355 Globals needed by other parts of the library. These are set 356 once at startup and then never changed. */ 357 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL; 358 static ExeContext* (*main_get_EC)( Thr* ) = NULL; 359 360 /* misc fn and data fwdses */ 361 static void VtsID__rcinc ( VtsID ii ); 362 static void VtsID__rcdec ( VtsID ii ); 363 364 static inline Bool SVal__isC ( SVal s ); 365 static inline VtsID SVal__unC_Rmin ( SVal s ); 366 static inline VtsID SVal__unC_Wmin ( SVal s ); 367 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ); 368 369 /* A double linked list of all the SO's. */ 370 SO* admin_SO; 371 372 373 374 ///////////////////////////////////////////////////////////////// 375 ///////////////////////////////////////////////////////////////// 376 // // 377 // SECTION BEGIN compressed shadow memory // 378 // // 379 ///////////////////////////////////////////////////////////////// 380 ///////////////////////////////////////////////////////////////// 381 382 #ifndef __HB_ZSM_H 383 #define __HB_ZSM_H 384 385 /* Initialise the library. Once initialised, it will (or may) call 386 rcinc and rcdec in response to all the calls below, in order to 387 allow the user to do reference counting on the SVals stored herein. 388 It is important to understand, however, that due to internal 389 caching, the reference counts are in general inaccurate, and can be 390 both above or below the true reference count for an item. In 391 particular, the library may indicate that the reference count for 392 an item is zero, when in fact it is not. 393 394 To make the reference counting exact and therefore non-pointless, 395 call zsm_flush_cache. Immediately after it returns, the reference 396 counts for all items, as deduced by the caller by observing calls 397 to rcinc and rcdec, will be correct, and so any items with a zero 398 reference count may be freed (or at least considered to be 399 unreferenced by this library). 400 */ 401 static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) ); 402 403 static void zsm_sset_range ( Addr, SizeT, SVal ); 404 static void zsm_scopy_range ( Addr, Addr, SizeT ); 405 static void zsm_flush_cache ( void ); 406 407 #endif /* ! __HB_ZSM_H */ 408 409 410 /* Round a up to the next multiple of N. N must be a power of 2 */ 411 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1)) 412 /* Round a down to the next multiple of N. N must be a power of 2 */ 413 #define ROUNDDN(a, N) ((a) & ~(N-1)) 414 415 416 417 /* ------ User-supplied RC functions ------ */ 418 static void(*rcinc)(SVal) = NULL; 419 static void(*rcdec)(SVal) = NULL; 420 421 422 /* ------ CacheLine ------ */ 423 424 #define N_LINE_BITS 6 /* must be >= 3 */ 425 #define N_LINE_ARANGE (1 << N_LINE_BITS) 426 #define N_LINE_TREES (N_LINE_ARANGE >> 3) 427 428 typedef 429 struct { 430 UShort descrs[N_LINE_TREES]; 431 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8 432 } 433 CacheLine; 434 435 #define TREE_DESCR_16_0 (1<<0) 436 #define TREE_DESCR_32_0 (1<<1) 437 #define TREE_DESCR_16_1 (1<<2) 438 #define TREE_DESCR_64 (1<<3) 439 #define TREE_DESCR_16_2 (1<<4) 440 #define TREE_DESCR_32_1 (1<<5) 441 #define TREE_DESCR_16_3 (1<<6) 442 #define TREE_DESCR_8_0 (1<<7) 443 #define TREE_DESCR_8_1 (1<<8) 444 #define TREE_DESCR_8_2 (1<<9) 445 #define TREE_DESCR_8_3 (1<<10) 446 #define TREE_DESCR_8_4 (1<<11) 447 #define TREE_DESCR_8_5 (1<<12) 448 #define TREE_DESCR_8_6 (1<<13) 449 #define TREE_DESCR_8_7 (1<<14) 450 #define TREE_DESCR_DTY (1<<15) 451 452 typedef 453 struct { 454 SVal dict[4]; /* can represent up to 4 diff values in the line */ 455 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit 456 dict indexes */ 457 /* if dict[0] == SVal_INVALID then dict[1] is the index of the 458 LineF to use, and dict[2..] are also SVal_INVALID. */ 459 } 460 LineZ; /* compressed rep for a cache line */ 461 462 typedef 463 struct { 464 Bool inUse; 465 SVal w64s[N_LINE_ARANGE]; 466 } 467 LineF; /* full rep for a cache line */ 468 469 /* Shadow memory. 470 Primary map is a WordFM Addr SecMap*. 471 SecMaps cover some page-size-ish section of address space and hold 472 a compressed representation. 473 CacheLine-sized chunks of SecMaps are copied into a Cache, being 474 decompressed when moved into the cache and recompressed on the 475 way out. Because of this, the cache must operate as a writeback 476 cache, not a writethrough one. 477 478 Each SecMap must hold a power-of-2 number of CacheLines. Hence 479 N_SECMAP_BITS must >= N_LINE_BITS. 480 */ 481 #define N_SECMAP_BITS 13 482 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS) 483 484 // # CacheLines held by a SecMap 485 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE) 486 487 /* The data in the SecMap is held in the array of LineZs. Each LineZ 488 either carries the required data directly, in a compressed 489 representation, or it holds (in .dict[0]) an index to the LineF in 490 .linesF that holds the full representation. 491 492 Currently-unused LineF's have their .inUse bit set to zero. 493 Since each in-use LineF is referred to be exactly one LineZ, 494 the number of .linesZ[] that refer to .linesF should equal 495 the number of .linesF[] that have .inUse == True. 496 497 RC obligations: the RCs presented to the user include exactly 498 the values in: 499 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID 500 * F reps that are in use (.inUse == True) 501 502 Hence the following actions at the following transitions are required: 503 504 F rep: .inUse==True -> .inUse==False -- rcdec_LineF 505 F rep: .inUse==False -> .inUse==True -- rcinc_LineF 506 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ 507 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ 508 */ 509 typedef 510 struct { 511 UInt magic; 512 LineZ linesZ[N_SECMAP_ZLINES]; 513 LineF* linesF; 514 UInt linesF_size; 515 } 516 SecMap; 517 518 #define SecMap_MAGIC 0x571e58cbU 519 520 static inline Bool is_sane_SecMap ( SecMap* sm ) { 521 return sm != NULL && sm->magic == SecMap_MAGIC; 522 } 523 524 /* ------ Cache ------ */ 525 526 #define N_WAY_BITS 16 527 #define N_WAY_NENT (1 << N_WAY_BITS) 528 529 /* Each tag is the address of the associated CacheLine, rounded down 530 to a CacheLine address boundary. A CacheLine size must be a power 531 of 2 and must be 8 or more. Hence an easy way to initialise the 532 cache so it is empty is to set all the tag values to any value % 8 533 != 0, eg 1. This means all queries in the cache initially miss. 534 It does however require us to detect and not writeback, any line 535 with a bogus tag. */ 536 typedef 537 struct { 538 CacheLine lyns0[N_WAY_NENT]; 539 Addr tags0[N_WAY_NENT]; 540 } 541 Cache; 542 543 static inline Bool is_valid_scache_tag ( Addr tag ) { 544 /* a valid tag should be naturally aligned to the start of 545 a CacheLine. */ 546 return 0 == (tag & (N_LINE_ARANGE - 1)); 547 } 548 549 550 /* --------- Primary data structures --------- */ 551 552 /* Shadow memory primary map */ 553 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */ 554 static Cache cache_shmem; 555 556 557 static UWord stats__secmaps_search = 0; // # SM finds 558 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs 559 static UWord stats__secmaps_allocd = 0; // # SecMaps issued 560 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered 561 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued 562 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage 563 static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued 564 static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage 565 static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter 566 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched 567 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back 568 static UWord stats__cache_F_fetches = 0; // # F lines fetched 569 static UWord stats__cache_F_wbacks = 0; // # F lines written back 570 static UWord stats__cache_invals = 0; // # cache invals 571 static UWord stats__cache_flushes = 0; // # cache flushes 572 static UWord stats__cache_totrefs = 0; // # total accesses 573 static UWord stats__cache_totmisses = 0; // # misses 574 static ULong stats__cache_make_New_arange = 0; // total arange made New 575 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps 576 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise 577 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64 578 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32 579 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16 580 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8 581 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64 582 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32 583 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16 584 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8 585 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8 586 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8 587 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8 588 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8 589 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8 590 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8 591 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split 592 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split 593 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split 594 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32 595 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16 596 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8 597 static UWord stats__vts__tick = 0; // # calls to VTS__tick 598 static UWord stats__vts__join = 0; // # calls to VTS__join 599 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ 600 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural 601 602 // # calls to VTS__cmp_structural w/ slow case 603 static UWord stats__vts__cmp_structural_slow = 0; 604 605 // # calls to VTS__indexAt_SLOW 606 static UWord stats__vts__indexat_slow = 0; 607 608 // # calls to vts_set__find__or__clone_and_add 609 static UWord stats__vts_set__focaa = 0; 610 611 // # calls to vts_set__find__or__clone_and_add that lead to an 612 // allocation 613 static UWord stats__vts_set__focaa_a = 0; 614 615 616 static inline Addr shmem__round_to_SecMap_base ( Addr a ) { 617 return a & ~(N_SECMAP_ARANGE - 1); 618 } 619 static inline UWord shmem__get_SecMap_offset ( Addr a ) { 620 return a & (N_SECMAP_ARANGE - 1); 621 } 622 623 624 /*----------------------------------------------------------------*/ 625 /*--- map_shmem :: WordFM Addr SecMap ---*/ 626 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/ 627 /*----------------------------------------------------------------*/ 628 629 /*--------------- SecMap allocation --------------- */ 630 631 static HChar* shmem__bigchunk_next = NULL; 632 static HChar* shmem__bigchunk_end1 = NULL; 633 634 static void* shmem__bigchunk_alloc ( SizeT n ) 635 { 636 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4; 637 tl_assert(n > 0); 638 n = VG_ROUNDUP(n, 16); 639 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1); 640 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next 641 <= (SSizeT)sHMEM__BIGCHUNK_SIZE); 642 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) { 643 if (0) 644 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n", 645 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next)); 646 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE ); 647 if (shmem__bigchunk_next == NULL) 648 VG_(out_of_memory_NORETURN)( 649 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE ); 650 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE; 651 } 652 tl_assert(shmem__bigchunk_next); 653 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) ); 654 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1); 655 shmem__bigchunk_next += n; 656 return shmem__bigchunk_next - n; 657 } 658 659 static SecMap* shmem__alloc_SecMap ( void ) 660 { 661 Word i, j; 662 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) ); 663 if (0) VG_(printf)("alloc_SecMap %p\n",sm); 664 tl_assert(sm); 665 sm->magic = SecMap_MAGIC; 666 for (i = 0; i < N_SECMAP_ZLINES; i++) { 667 sm->linesZ[i].dict[0] = SVal_NOACCESS; 668 sm->linesZ[i].dict[1] = SVal_INVALID; 669 sm->linesZ[i].dict[2] = SVal_INVALID; 670 sm->linesZ[i].dict[3] = SVal_INVALID; 671 for (j = 0; j < N_LINE_ARANGE/4; j++) 672 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */ 673 } 674 sm->linesF = NULL; 675 sm->linesF_size = 0; 676 stats__secmaps_allocd++; 677 stats__secmap_ga_space_covered += N_SECMAP_ARANGE; 678 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES; 679 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ); 680 return sm; 681 } 682 683 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt; 684 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} }; 685 686 static SecMap* shmem__find_SecMap ( Addr ga ) 687 { 688 SecMap* sm = NULL; 689 Addr gaKey = shmem__round_to_SecMap_base(ga); 690 // Cache 691 stats__secmaps_search++; 692 if (LIKELY(gaKey == smCache[0].gaKey)) 693 return smCache[0].sm; 694 if (LIKELY(gaKey == smCache[1].gaKey)) { 695 SMCacheEnt tmp = smCache[0]; 696 smCache[0] = smCache[1]; 697 smCache[1] = tmp; 698 return smCache[0].sm; 699 } 700 if (gaKey == smCache[2].gaKey) { 701 SMCacheEnt tmp = smCache[1]; 702 smCache[1] = smCache[2]; 703 smCache[2] = tmp; 704 return smCache[1].sm; 705 } 706 // end Cache 707 stats__secmaps_search_slow++; 708 if (VG_(lookupFM)( map_shmem, 709 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) { 710 tl_assert(sm != NULL); 711 smCache[2] = smCache[1]; 712 smCache[1] = smCache[0]; 713 smCache[0].gaKey = gaKey; 714 smCache[0].sm = sm; 715 } else { 716 tl_assert(sm == NULL); 717 } 718 return sm; 719 } 720 721 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga ) 722 { 723 SecMap* sm = shmem__find_SecMap ( ga ); 724 if (LIKELY(sm)) { 725 return sm; 726 } else { 727 /* create a new one */ 728 Addr gaKey = shmem__round_to_SecMap_base(ga); 729 sm = shmem__alloc_SecMap(); 730 tl_assert(sm); 731 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm ); 732 return sm; 733 } 734 } 735 736 737 /* ------------ LineF and LineZ related ------------ */ 738 739 static void rcinc_LineF ( LineF* lineF ) { 740 UWord i; 741 tl_assert(lineF->inUse); 742 for (i = 0; i < N_LINE_ARANGE; i++) 743 rcinc(lineF->w64s[i]); 744 } 745 746 static void rcdec_LineF ( LineF* lineF ) { 747 UWord i; 748 tl_assert(lineF->inUse); 749 for (i = 0; i < N_LINE_ARANGE; i++) 750 rcdec(lineF->w64s[i]); 751 } 752 753 static void rcinc_LineZ ( LineZ* lineZ ) { 754 tl_assert(lineZ->dict[0] != SVal_INVALID); 755 rcinc(lineZ->dict[0]); 756 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]); 757 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]); 758 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]); 759 } 760 761 static void rcdec_LineZ ( LineZ* lineZ ) { 762 tl_assert(lineZ->dict[0] != SVal_INVALID); 763 rcdec(lineZ->dict[0]); 764 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]); 765 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]); 766 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]); 767 } 768 769 inline 770 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) { 771 Word bix, shft, mask, prep; 772 tl_assert(ix >= 0); 773 bix = ix >> 2; 774 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */ 775 mask = 3 << shft; 776 prep = b2 << shft; 777 arr[bix] = (arr[bix] & ~mask) | prep; 778 } 779 780 inline 781 static UWord read_twobit_array ( UChar* arr, UWord ix ) { 782 Word bix, shft; 783 tl_assert(ix >= 0); 784 bix = ix >> 2; 785 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */ 786 return (arr[bix] >> shft) & 3; 787 } 788 789 /* Given address 'tag', find either the Z or F line containing relevant 790 data, so it can be read into the cache. 791 */ 792 static void find_ZF_for_reading ( /*OUT*/LineZ** zp, 793 /*OUT*/LineF** fp, Addr tag ) { 794 LineZ* lineZ; 795 LineF* lineF; 796 UWord zix; 797 SecMap* sm = shmem__find_or_alloc_SecMap(tag); 798 UWord smoff = shmem__get_SecMap_offset(tag); 799 /* since smoff is derived from a valid tag, it should be 800 cacheline-aligned. */ 801 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1))); 802 zix = smoff >> N_LINE_BITS; 803 tl_assert(zix < N_SECMAP_ZLINES); 804 lineZ = &sm->linesZ[zix]; 805 lineF = NULL; 806 if (lineZ->dict[0] == SVal_INVALID) { 807 UInt fix = (UInt)lineZ->dict[1]; 808 tl_assert(sm->linesF); 809 tl_assert(sm->linesF_size > 0); 810 tl_assert(fix >= 0 && fix < sm->linesF_size); 811 lineF = &sm->linesF[fix]; 812 tl_assert(lineF->inUse); 813 lineZ = NULL; 814 } 815 *zp = lineZ; 816 *fp = lineF; 817 } 818 819 /* Given address 'tag', return the relevant SecMap and the index of 820 the LineZ within it, in the expectation that the line is to be 821 overwritten. Regardless of whether 'tag' is currently associated 822 with a Z or F representation, to rcdec on the current 823 representation, in recognition of the fact that the contents are 824 just about to be overwritten. */ 825 static __attribute__((noinline)) 826 void find_Z_for_writing ( /*OUT*/SecMap** smp, 827 /*OUT*/Word* zixp, 828 Addr tag ) { 829 LineZ* lineZ; 830 LineF* lineF; 831 UWord zix; 832 SecMap* sm = shmem__find_or_alloc_SecMap(tag); 833 UWord smoff = shmem__get_SecMap_offset(tag); 834 /* since smoff is derived from a valid tag, it should be 835 cacheline-aligned. */ 836 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1))); 837 zix = smoff >> N_LINE_BITS; 838 tl_assert(zix < N_SECMAP_ZLINES); 839 lineZ = &sm->linesZ[zix]; 840 lineF = NULL; 841 /* re RCs, we are freeing up this LineZ/LineF so that new data can 842 be parked in it. Hence have to rcdec it accordingly. */ 843 /* If lineZ has an associated lineF, free it up. */ 844 if (lineZ->dict[0] == SVal_INVALID) { 845 UInt fix = (UInt)lineZ->dict[1]; 846 tl_assert(sm->linesF); 847 tl_assert(sm->linesF_size > 0); 848 tl_assert(fix >= 0 && fix < sm->linesF_size); 849 lineF = &sm->linesF[fix]; 850 tl_assert(lineF->inUse); 851 rcdec_LineF(lineF); 852 lineF->inUse = False; 853 } else { 854 rcdec_LineZ(lineZ); 855 } 856 *smp = sm; 857 *zixp = zix; 858 } 859 860 static __attribute__((noinline)) 861 void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) { 862 UInt i, new_size; 863 LineF* nyu; 864 865 if (sm->linesF) { 866 tl_assert(sm->linesF_size > 0); 867 } else { 868 tl_assert(sm->linesF_size == 0); 869 } 870 871 if (sm->linesF) { 872 for (i = 0; i < sm->linesF_size; i++) { 873 if (!sm->linesF[i].inUse) { 874 *fixp = (Word)i; 875 return; 876 } 877 } 878 } 879 880 /* No free F line found. Expand existing array and try again. */ 881 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size; 882 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)", 883 new_size * sizeof(LineF) ); 884 tl_assert(nyu); 885 886 stats__secmap_linesF_allocd += (new_size - sm->linesF_size); 887 stats__secmap_linesF_bytes += (new_size - sm->linesF_size) 888 * sizeof(LineF); 889 890 if (0) 891 VG_(printf)("SM %p: expand F array from %d to %d\n", 892 sm, (Int)sm->linesF_size, new_size); 893 894 for (i = 0; i < new_size; i++) 895 nyu[i].inUse = False; 896 897 if (sm->linesF) { 898 for (i = 0; i < sm->linesF_size; i++) { 899 tl_assert(sm->linesF[i].inUse); 900 nyu[i] = sm->linesF[i]; 901 } 902 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) ); 903 HG_(free)(sm->linesF); 904 } 905 906 sm->linesF = nyu; 907 sm->linesF_size = new_size; 908 909 for (i = 0; i < sm->linesF_size; i++) { 910 if (!sm->linesF[i].inUse) { 911 *fixp = (Word)i; 912 return; 913 } 914 } 915 916 /*NOTREACHED*/ 917 tl_assert(0); 918 } 919 920 921 /* ------------ CacheLine and implicit-tree related ------------ */ 922 923 __attribute__((unused)) 924 static void pp_CacheLine ( CacheLine* cl ) { 925 Word i; 926 if (!cl) { 927 VG_(printf)("%s","pp_CacheLine(NULL)\n"); 928 return; 929 } 930 for (i = 0; i < N_LINE_TREES; i++) 931 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]); 932 for (i = 0; i < N_LINE_ARANGE; i++) 933 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]); 934 } 935 936 static UChar descr_to_validbits ( UShort descr ) 937 { 938 /* a.k.a Party Time for gcc's constant folder */ 939 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \ 940 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \ 941 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \ 942 ( (b8_5) << 12) | ( (b8_4) << 11) | \ 943 ( (b8_3) << 10) | ( (b8_2) << 9) | \ 944 ( (b8_1) << 8) | ( (b8_0) << 7) | \ 945 ( (b16_3) << 6) | ( (b32_1) << 5) | \ 946 ( (b16_2) << 4) | ( (b64) << 3) | \ 947 ( (b16_1) << 2) | ( (b32_0) << 1) | \ 948 ( (b16_0) << 0) ) ) 949 950 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \ 951 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \ 952 ( (bit5) << 5) | ( (bit4) << 4) | \ 953 ( (bit3) << 3) | ( (bit2) << 2) | \ 954 ( (bit1) << 1) | ( (bit0) << 0) ) ) 955 956 /* these should all get folded out at compile time */ 957 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7); 958 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0); 959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3); 960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1); 961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2); 962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64); 963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1); 964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0); 965 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0); 966 967 switch (descr) { 968 /* 969 +--------------------------------- TREE_DESCR_8_7 970 | +------------------- TREE_DESCR_8_0 971 | | +---------------- TREE_DESCR_16_3 972 | | | +-------------- TREE_DESCR_32_1 973 | | | | +------------ TREE_DESCR_16_2 974 | | | | | +--------- TREE_DESCR_64 975 | | | | | | +------ TREE_DESCR_16_1 976 | | | | | | | +---- TREE_DESCR_32_0 977 | | | | | | | | +-- TREE_DESCR_16_0 978 | | | | | | | | | 979 | | | | | | | | | GRANULARITY, 7 -> 0 */ 980 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */ 981 return BYTE(1,1,1,1,1,1,1,1); 982 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */ 983 return BYTE(1,1,0,1,1,1,1,1); 984 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */ 985 return BYTE(0,1,1,1,1,1,1,1); 986 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */ 987 return BYTE(0,1,0,1,1,1,1,1); 988 989 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */ 990 return BYTE(1,1,1,1,1,1,0,1); 991 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */ 992 return BYTE(1,1,0,1,1,1,0,1); 993 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */ 994 return BYTE(0,1,1,1,1,1,0,1); 995 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */ 996 return BYTE(0,1,0,1,1,1,0,1); 997 998 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */ 999 return BYTE(1,1,1,1,0,1,1,1); 1000 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */ 1001 return BYTE(1,1,0,1,0,1,1,1); 1002 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */ 1003 return BYTE(0,1,1,1,0,1,1,1); 1004 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */ 1005 return BYTE(0,1,0,1,0,1,1,1); 1006 1007 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */ 1008 return BYTE(1,1,1,1,0,1,0,1); 1009 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */ 1010 return BYTE(1,1,0,1,0,1,0,1); 1011 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */ 1012 return BYTE(0,1,1,1,0,1,0,1); 1013 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */ 1014 return BYTE(0,1,0,1,0,1,0,1); 1015 1016 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */ 1017 return BYTE(0,0,0,1,1,1,1,1); 1018 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */ 1019 return BYTE(0,0,0,1,1,1,0,1); 1020 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */ 1021 return BYTE(0,0,0,1,0,1,1,1); 1022 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */ 1023 return BYTE(0,0,0,1,0,1,0,1); 1024 1025 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */ 1026 return BYTE(1,1,1,1,0,0,0,1); 1027 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */ 1028 return BYTE(1,1,0,1,0,0,0,1); 1029 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */ 1030 return BYTE(0,1,1,1,0,0,0,1); 1031 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */ 1032 return BYTE(0,1,0,1,0,0,0,1); 1033 1034 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */ 1035 return BYTE(0,0,0,1,0,0,0,1); 1036 1037 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */ 1038 return BYTE(0,0,0,0,0,0,0,1); 1039 1040 default: return BYTE(0,0,0,0,0,0,0,0); 1041 /* INVALID - any valid descr produces at least one 1042 valid bit in tree[0..7]*/ 1043 } 1044 /* NOTREACHED*/ 1045 tl_assert(0); 1046 1047 # undef DESCR 1048 # undef BYTE 1049 } 1050 1051 __attribute__((unused)) 1052 static Bool is_sane_Descr ( UShort descr ) { 1053 return descr_to_validbits(descr) != 0; 1054 } 1055 1056 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) { 1057 VG_(sprintf)(dst, 1058 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d", 1059 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0), 1060 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0), 1061 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0), 1062 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0), 1063 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0), 1064 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0), 1065 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0), 1066 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0), 1067 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0), 1068 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0), 1069 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0), 1070 (Int)((descr & TREE_DESCR_64) ? 1 : 0), 1071 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0), 1072 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0), 1073 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0) 1074 ); 1075 } 1076 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) { 1077 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d", 1078 (Int)((byte & 128) ? 1 : 0), 1079 (Int)((byte & 64) ? 1 : 0), 1080 (Int)((byte & 32) ? 1 : 0), 1081 (Int)((byte & 16) ? 1 : 0), 1082 (Int)((byte & 8) ? 1 : 0), 1083 (Int)((byte & 4) ? 1 : 0), 1084 (Int)((byte & 2) ? 1 : 0), 1085 (Int)((byte & 1) ? 1 : 0) 1086 ); 1087 } 1088 1089 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) { 1090 Word i; 1091 UChar validbits = descr_to_validbits(descr); 1092 HChar buf[128], buf2[128]; 1093 if (validbits == 0) 1094 goto bad; 1095 for (i = 0; i < 8; i++) { 1096 if (validbits & (1<<i)) { 1097 if (tree[i] == SVal_INVALID) 1098 goto bad; 1099 } else { 1100 if (tree[i] != SVal_INVALID) 1101 goto bad; 1102 } 1103 } 1104 return True; 1105 bad: 1106 sprintf_Descr( buf, descr ); 1107 sprintf_Byte( buf2, validbits ); 1108 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n"); 1109 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2); 1110 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf); 1111 for (i = 0; i < 8; i++) 1112 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]); 1113 VG_(printf)("%s","}\n"); 1114 return 0; 1115 } 1116 1117 static Bool is_sane_CacheLine ( CacheLine* cl ) 1118 { 1119 Word tno, cloff; 1120 1121 if (!cl) goto bad; 1122 1123 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1124 UShort descr = cl->descrs[tno]; 1125 SVal* tree = &cl->svals[cloff]; 1126 if (!is_sane_Descr_and_Tree(descr, tree)) 1127 goto bad; 1128 } 1129 tl_assert(cloff == N_LINE_ARANGE); 1130 return True; 1131 bad: 1132 pp_CacheLine(cl); 1133 return False; 1134 } 1135 1136 static UShort normalise_tree ( /*MOD*/SVal* tree ) 1137 { 1138 UShort descr; 1139 /* pre: incoming tree[0..7] does not have any invalid shvals, in 1140 particular no zeroes. */ 1141 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID 1142 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID 1143 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID 1144 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID)) 1145 tl_assert(0); 1146 1147 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5 1148 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2 1149 | TREE_DESCR_8_1 | TREE_DESCR_8_0; 1150 /* build 16-bit layer */ 1151 if (tree[1] == tree[0]) { 1152 tree[1] = SVal_INVALID; 1153 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0); 1154 descr |= TREE_DESCR_16_0; 1155 } 1156 if (tree[3] == tree[2]) { 1157 tree[3] = SVal_INVALID; 1158 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2); 1159 descr |= TREE_DESCR_16_1; 1160 } 1161 if (tree[5] == tree[4]) { 1162 tree[5] = SVal_INVALID; 1163 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4); 1164 descr |= TREE_DESCR_16_2; 1165 } 1166 if (tree[7] == tree[6]) { 1167 tree[7] = SVal_INVALID; 1168 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6); 1169 descr |= TREE_DESCR_16_3; 1170 } 1171 /* build 32-bit layer */ 1172 if (tree[2] == tree[0] 1173 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) { 1174 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */ 1175 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0); 1176 descr |= TREE_DESCR_32_0; 1177 } 1178 if (tree[6] == tree[4] 1179 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) { 1180 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */ 1181 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2); 1182 descr |= TREE_DESCR_32_1; 1183 } 1184 /* build 64-bit layer */ 1185 if (tree[4] == tree[0] 1186 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) { 1187 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */ 1188 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0); 1189 descr |= TREE_DESCR_64; 1190 } 1191 return descr; 1192 } 1193 1194 /* This takes a cacheline where all the data is at the leaves 1195 (w8[..]) and builds a correctly normalised tree. */ 1196 static void normalise_CacheLine ( /*MOD*/CacheLine* cl ) 1197 { 1198 Word tno, cloff; 1199 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1200 SVal* tree = &cl->svals[cloff]; 1201 cl->descrs[tno] = normalise_tree( tree ); 1202 } 1203 tl_assert(cloff == N_LINE_ARANGE); 1204 if (CHECK_ZSM) 1205 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1206 stats__cline_normalises++; 1207 } 1208 1209 1210 typedef struct { UChar count; SVal sval; } CountedSVal; 1211 1212 static 1213 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst, 1214 /*OUT*/Word* dstUsedP, 1215 Word nDst, CacheLine* src ) 1216 { 1217 Word tno, cloff, dstUsed; 1218 1219 tl_assert(nDst == N_LINE_ARANGE); 1220 dstUsed = 0; 1221 1222 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1223 UShort descr = src->descrs[tno]; 1224 SVal* tree = &src->svals[cloff]; 1225 1226 /* sequentialise the tree described by (descr,tree). */ 1227 # define PUT(_n,_v) \ 1228 do { dst[dstUsed ].count = (_n); \ 1229 dst[dstUsed++].sval = (_v); \ 1230 } while (0) 1231 1232 /* byte 0 */ 1233 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else 1234 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else 1235 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else 1236 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]); 1237 /* byte 1 */ 1238 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]); 1239 /* byte 2 */ 1240 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else 1241 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]); 1242 /* byte 3 */ 1243 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]); 1244 /* byte 4 */ 1245 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else 1246 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else 1247 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]); 1248 /* byte 5 */ 1249 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]); 1250 /* byte 6 */ 1251 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else 1252 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]); 1253 /* byte 7 */ 1254 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]); 1255 1256 # undef PUT 1257 /* END sequentialise the tree described by (descr,tree). */ 1258 1259 } 1260 tl_assert(cloff == N_LINE_ARANGE); 1261 tl_assert(dstUsed <= nDst); 1262 1263 *dstUsedP = dstUsed; 1264 } 1265 1266 /* Write the cacheline 'wix' to backing store. Where it ends up 1267 is determined by its tag field. */ 1268 static __attribute__((noinline)) void cacheline_wback ( UWord wix ) 1269 { 1270 Word i, j, k, m; 1271 Addr tag; 1272 SecMap* sm; 1273 CacheLine* cl; 1274 LineZ* lineZ; 1275 LineF* lineF; 1276 Word zix, fix, csvalsUsed; 1277 CountedSVal csvals[N_LINE_ARANGE]; 1278 SVal sv; 1279 1280 if (0) 1281 VG_(printf)("scache wback line %d\n", (Int)wix); 1282 1283 tl_assert(wix >= 0 && wix < N_WAY_NENT); 1284 1285 tag = cache_shmem.tags0[wix]; 1286 cl = &cache_shmem.lyns0[wix]; 1287 1288 /* The cache line may have been invalidated; if so, ignore it. */ 1289 if (!is_valid_scache_tag(tag)) 1290 return; 1291 1292 /* Where are we going to put it? */ 1293 sm = NULL; 1294 lineZ = NULL; 1295 lineF = NULL; 1296 zix = fix = -1; 1297 1298 /* find the Z line to write in and rcdec it or the associated F 1299 line. */ 1300 find_Z_for_writing( &sm, &zix, tag ); 1301 1302 tl_assert(sm); 1303 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES); 1304 lineZ = &sm->linesZ[zix]; 1305 1306 /* Generate the data to be stored */ 1307 if (CHECK_ZSM) 1308 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1309 1310 csvalsUsed = -1; 1311 sequentialise_CacheLine( csvals, &csvalsUsed, 1312 N_LINE_ARANGE, cl ); 1313 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE); 1314 if (0) VG_(printf)("%lu ", csvalsUsed); 1315 1316 lineZ->dict[0] = lineZ->dict[1] 1317 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 1318 1319 /* i indexes actual shadow values, k is cursor in csvals */ 1320 i = 0; 1321 for (k = 0; k < csvalsUsed; k++) { 1322 1323 sv = csvals[k].sval; 1324 if (CHECK_ZSM) 1325 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); 1326 /* do we already have it? */ 1327 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; } 1328 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; } 1329 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; } 1330 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; } 1331 /* no. look for a free slot. */ 1332 if (CHECK_ZSM) 1333 tl_assert(sv != SVal_INVALID); 1334 if (lineZ->dict[0] 1335 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; } 1336 if (lineZ->dict[1] 1337 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; } 1338 if (lineZ->dict[2] 1339 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; } 1340 if (lineZ->dict[3] 1341 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; } 1342 break; /* we'll have to use the f rep */ 1343 dict_ok: 1344 m = csvals[k].count; 1345 if (m == 8) { 1346 write_twobit_array( lineZ->ix2s, i+0, j ); 1347 write_twobit_array( lineZ->ix2s, i+1, j ); 1348 write_twobit_array( lineZ->ix2s, i+2, j ); 1349 write_twobit_array( lineZ->ix2s, i+3, j ); 1350 write_twobit_array( lineZ->ix2s, i+4, j ); 1351 write_twobit_array( lineZ->ix2s, i+5, j ); 1352 write_twobit_array( lineZ->ix2s, i+6, j ); 1353 write_twobit_array( lineZ->ix2s, i+7, j ); 1354 i += 8; 1355 } 1356 else if (m == 4) { 1357 write_twobit_array( lineZ->ix2s, i+0, j ); 1358 write_twobit_array( lineZ->ix2s, i+1, j ); 1359 write_twobit_array( lineZ->ix2s, i+2, j ); 1360 write_twobit_array( lineZ->ix2s, i+3, j ); 1361 i += 4; 1362 } 1363 else if (m == 1) { 1364 write_twobit_array( lineZ->ix2s, i+0, j ); 1365 i += 1; 1366 } 1367 else if (m == 2) { 1368 write_twobit_array( lineZ->ix2s, i+0, j ); 1369 write_twobit_array( lineZ->ix2s, i+1, j ); 1370 i += 2; 1371 } 1372 else { 1373 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */ 1374 } 1375 1376 } 1377 1378 if (LIKELY(i == N_LINE_ARANGE)) { 1379 /* Construction of the compressed representation was 1380 successful. */ 1381 rcinc_LineZ(lineZ); 1382 stats__cache_Z_wbacks++; 1383 } else { 1384 /* Cannot use the compressed(z) representation. Use the full(f) 1385 rep instead. */ 1386 tl_assert(i >= 0 && i < N_LINE_ARANGE); 1387 alloc_F_for_writing( sm, &fix ); 1388 tl_assert(sm->linesF); 1389 tl_assert(sm->linesF_size > 0); 1390 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size); 1391 lineF = &sm->linesF[fix]; 1392 tl_assert(!lineF->inUse); 1393 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 1394 lineZ->dict[1] = (SVal)fix; 1395 lineF->inUse = True; 1396 i = 0; 1397 for (k = 0; k < csvalsUsed; k++) { 1398 if (CHECK_ZSM) 1399 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); 1400 sv = csvals[k].sval; 1401 if (CHECK_ZSM) 1402 tl_assert(sv != SVal_INVALID); 1403 for (m = csvals[k].count; m > 0; m--) { 1404 lineF->w64s[i] = sv; 1405 i++; 1406 } 1407 } 1408 tl_assert(i == N_LINE_ARANGE); 1409 rcinc_LineF(lineF); 1410 stats__cache_F_wbacks++; 1411 } 1412 } 1413 1414 /* Fetch the cacheline 'wix' from the backing store. The tag 1415 associated with 'wix' is assumed to have already been filled in; 1416 hence that is used to determine where in the backing store to read 1417 from. */ 1418 static __attribute__((noinline)) void cacheline_fetch ( UWord wix ) 1419 { 1420 Word i; 1421 Addr tag; 1422 CacheLine* cl; 1423 LineZ* lineZ; 1424 LineF* lineF; 1425 1426 if (0) 1427 VG_(printf)("scache fetch line %d\n", (Int)wix); 1428 1429 tl_assert(wix >= 0 && wix < N_WAY_NENT); 1430 1431 tag = cache_shmem.tags0[wix]; 1432 cl = &cache_shmem.lyns0[wix]; 1433 1434 /* reject nonsense requests */ 1435 tl_assert(is_valid_scache_tag(tag)); 1436 1437 lineZ = NULL; 1438 lineF = NULL; 1439 find_ZF_for_reading( &lineZ, &lineF, tag ); 1440 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) ); 1441 1442 /* expand the data into the bottom layer of the tree, then get 1443 cacheline_normalise to build the descriptor array. */ 1444 if (lineF) { 1445 tl_assert(lineF->inUse); 1446 for (i = 0; i < N_LINE_ARANGE; i++) { 1447 cl->svals[i] = lineF->w64s[i]; 1448 } 1449 stats__cache_F_fetches++; 1450 } else { 1451 for (i = 0; i < N_LINE_ARANGE; i++) { 1452 SVal sv; 1453 UWord ix = read_twobit_array( lineZ->ix2s, i ); 1454 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */ 1455 sv = lineZ->dict[ix]; 1456 tl_assert(sv != SVal_INVALID); 1457 cl->svals[i] = sv; 1458 } 1459 stats__cache_Z_fetches++; 1460 } 1461 normalise_CacheLine( cl ); 1462 } 1463 1464 static void shmem__invalidate_scache ( void ) { 1465 Word wix; 1466 if (0) VG_(printf)("%s","scache inval\n"); 1467 tl_assert(!is_valid_scache_tag(1)); 1468 for (wix = 0; wix < N_WAY_NENT; wix++) { 1469 cache_shmem.tags0[wix] = 1/*INVALID*/; 1470 } 1471 stats__cache_invals++; 1472 } 1473 1474 static void shmem__flush_and_invalidate_scache ( void ) { 1475 Word wix; 1476 Addr tag; 1477 if (0) VG_(printf)("%s","scache flush and invalidate\n"); 1478 tl_assert(!is_valid_scache_tag(1)); 1479 for (wix = 0; wix < N_WAY_NENT; wix++) { 1480 tag = cache_shmem.tags0[wix]; 1481 if (tag == 1/*INVALID*/) { 1482 /* already invalid; nothing to do */ 1483 } else { 1484 tl_assert(is_valid_scache_tag(tag)); 1485 cacheline_wback( wix ); 1486 } 1487 cache_shmem.tags0[wix] = 1/*INVALID*/; 1488 } 1489 stats__cache_flushes++; 1490 stats__cache_invals++; 1491 } 1492 1493 1494 static inline Bool aligned16 ( Addr a ) { 1495 return 0 == (a & 1); 1496 } 1497 static inline Bool aligned32 ( Addr a ) { 1498 return 0 == (a & 3); 1499 } 1500 static inline Bool aligned64 ( Addr a ) { 1501 return 0 == (a & 7); 1502 } 1503 static inline UWord get_cacheline_offset ( Addr a ) { 1504 return (UWord)(a & (N_LINE_ARANGE - 1)); 1505 } 1506 static inline Addr cacheline_ROUNDUP ( Addr a ) { 1507 return ROUNDUP(a, N_LINE_ARANGE); 1508 } 1509 static inline Addr cacheline_ROUNDDN ( Addr a ) { 1510 return ROUNDDN(a, N_LINE_ARANGE); 1511 } 1512 static inline UWord get_treeno ( Addr a ) { 1513 return get_cacheline_offset(a) >> 3; 1514 } 1515 static inline UWord get_tree_offset ( Addr a ) { 1516 return a & 7; 1517 } 1518 1519 static __attribute__((noinline)) 1520 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */ 1521 static inline CacheLine* get_cacheline ( Addr a ) 1522 { 1523 /* tag is 'a' with the in-line offset masked out, 1524 eg a[31]..a[4] 0000 */ 1525 Addr tag = a & ~(N_LINE_ARANGE - 1); 1526 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 1527 stats__cache_totrefs++; 1528 if (LIKELY(tag == cache_shmem.tags0[wix])) { 1529 return &cache_shmem.lyns0[wix]; 1530 } else { 1531 return get_cacheline_MISS( a ); 1532 } 1533 } 1534 1535 static __attribute__((noinline)) 1536 CacheLine* get_cacheline_MISS ( Addr a ) 1537 { 1538 /* tag is 'a' with the in-line offset masked out, 1539 eg a[31]..a[4] 0000 */ 1540 1541 CacheLine* cl; 1542 Addr* tag_old_p; 1543 Addr tag = a & ~(N_LINE_ARANGE - 1); 1544 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 1545 1546 tl_assert(tag != cache_shmem.tags0[wix]); 1547 1548 /* Dump the old line into the backing store. */ 1549 stats__cache_totmisses++; 1550 1551 cl = &cache_shmem.lyns0[wix]; 1552 tag_old_p = &cache_shmem.tags0[wix]; 1553 1554 if (is_valid_scache_tag( *tag_old_p )) { 1555 /* EXPENSIVE and REDUNDANT: callee does it */ 1556 if (CHECK_ZSM) 1557 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1558 cacheline_wback( wix ); 1559 } 1560 /* and reload the new one */ 1561 *tag_old_p = tag; 1562 cacheline_fetch( wix ); 1563 if (CHECK_ZSM) 1564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1565 return cl; 1566 } 1567 1568 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1569 stats__cline_64to32pulldown++; 1570 switch (toff) { 1571 case 0: case 4: 1572 tl_assert(descr & TREE_DESCR_64); 1573 tree[4] = tree[0]; 1574 descr &= ~TREE_DESCR_64; 1575 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0); 1576 break; 1577 default: 1578 tl_assert(0); 1579 } 1580 return descr; 1581 } 1582 1583 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1584 stats__cline_32to16pulldown++; 1585 switch (toff) { 1586 case 0: case 2: 1587 if (!(descr & TREE_DESCR_32_0)) { 1588 descr = pulldown_to_32(tree, 0, descr); 1589 } 1590 tl_assert(descr & TREE_DESCR_32_0); 1591 tree[2] = tree[0]; 1592 descr &= ~TREE_DESCR_32_0; 1593 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0); 1594 break; 1595 case 4: case 6: 1596 if (!(descr & TREE_DESCR_32_1)) { 1597 descr = pulldown_to_32(tree, 4, descr); 1598 } 1599 tl_assert(descr & TREE_DESCR_32_1); 1600 tree[6] = tree[4]; 1601 descr &= ~TREE_DESCR_32_1; 1602 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2); 1603 break; 1604 default: 1605 tl_assert(0); 1606 } 1607 return descr; 1608 } 1609 1610 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1611 stats__cline_16to8pulldown++; 1612 switch (toff) { 1613 case 0: case 1: 1614 if (!(descr & TREE_DESCR_16_0)) { 1615 descr = pulldown_to_16(tree, 0, descr); 1616 } 1617 tl_assert(descr & TREE_DESCR_16_0); 1618 tree[1] = tree[0]; 1619 descr &= ~TREE_DESCR_16_0; 1620 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0); 1621 break; 1622 case 2: case 3: 1623 if (!(descr & TREE_DESCR_16_1)) { 1624 descr = pulldown_to_16(tree, 2, descr); 1625 } 1626 tl_assert(descr & TREE_DESCR_16_1); 1627 tree[3] = tree[2]; 1628 descr &= ~TREE_DESCR_16_1; 1629 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2); 1630 break; 1631 case 4: case 5: 1632 if (!(descr & TREE_DESCR_16_2)) { 1633 descr = pulldown_to_16(tree, 4, descr); 1634 } 1635 tl_assert(descr & TREE_DESCR_16_2); 1636 tree[5] = tree[4]; 1637 descr &= ~TREE_DESCR_16_2; 1638 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4); 1639 break; 1640 case 6: case 7: 1641 if (!(descr & TREE_DESCR_16_3)) { 1642 descr = pulldown_to_16(tree, 6, descr); 1643 } 1644 tl_assert(descr & TREE_DESCR_16_3); 1645 tree[7] = tree[6]; 1646 descr &= ~TREE_DESCR_16_3; 1647 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6); 1648 break; 1649 default: 1650 tl_assert(0); 1651 } 1652 return descr; 1653 } 1654 1655 1656 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) { 1657 UShort mask; 1658 switch (toff) { 1659 case 0: 1660 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0; 1661 tl_assert( (descr & mask) == mask ); 1662 descr &= ~mask; 1663 descr |= TREE_DESCR_16_0; 1664 break; 1665 case 2: 1666 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2; 1667 tl_assert( (descr & mask) == mask ); 1668 descr &= ~mask; 1669 descr |= TREE_DESCR_16_1; 1670 break; 1671 case 4: 1672 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4; 1673 tl_assert( (descr & mask) == mask ); 1674 descr &= ~mask; 1675 descr |= TREE_DESCR_16_2; 1676 break; 1677 case 6: 1678 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6; 1679 tl_assert( (descr & mask) == mask ); 1680 descr &= ~mask; 1681 descr |= TREE_DESCR_16_3; 1682 break; 1683 default: 1684 tl_assert(0); 1685 } 1686 return descr; 1687 } 1688 1689 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) { 1690 UShort mask; 1691 switch (toff) { 1692 case 0: 1693 if (!(descr & TREE_DESCR_16_0)) 1694 descr = pullup_descr_to_16(descr, 0); 1695 if (!(descr & TREE_DESCR_16_1)) 1696 descr = pullup_descr_to_16(descr, 2); 1697 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0; 1698 tl_assert( (descr & mask) == mask ); 1699 descr &= ~mask; 1700 descr |= TREE_DESCR_32_0; 1701 break; 1702 case 4: 1703 if (!(descr & TREE_DESCR_16_2)) 1704 descr = pullup_descr_to_16(descr, 4); 1705 if (!(descr & TREE_DESCR_16_3)) 1706 descr = pullup_descr_to_16(descr, 6); 1707 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2; 1708 tl_assert( (descr & mask) == mask ); 1709 descr &= ~mask; 1710 descr |= TREE_DESCR_32_1; 1711 break; 1712 default: 1713 tl_assert(0); 1714 } 1715 return descr; 1716 } 1717 1718 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) { 1719 switch (toff) { 1720 case 0: case 4: 1721 return 0 != (descr & TREE_DESCR_64); 1722 default: 1723 tl_assert(0); 1724 } 1725 } 1726 1727 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) { 1728 switch (toff) { 1729 case 0: 1730 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0)); 1731 case 2: 1732 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2)); 1733 case 4: 1734 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4)); 1735 case 6: 1736 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6)); 1737 default: 1738 tl_assert(0); 1739 } 1740 } 1741 1742 /* ------------ Cache management ------------ */ 1743 1744 static void zsm_flush_cache ( void ) 1745 { 1746 shmem__flush_and_invalidate_scache(); 1747 } 1748 1749 1750 static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) ) 1751 { 1752 tl_assert( sizeof(UWord) == sizeof(Addr) ); 1753 1754 rcinc = p_rcinc; 1755 rcdec = p_rcdec; 1756 1757 tl_assert(map_shmem == NULL); 1758 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)", 1759 HG_(free), 1760 NULL/*unboxed UWord cmp*/); 1761 tl_assert(map_shmem != NULL); 1762 shmem__invalidate_scache(); 1763 1764 /* a SecMap must contain an integral number of CacheLines */ 1765 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE)); 1766 /* also ... a CacheLine holds an integral number of trees */ 1767 tl_assert(0 == (N_LINE_ARANGE % 8)); 1768 } 1769 1770 ///////////////////////////////////////////////////////////////// 1771 ///////////////////////////////////////////////////////////////// 1772 // // 1773 // SECTION END compressed shadow memory // 1774 // // 1775 ///////////////////////////////////////////////////////////////// 1776 ///////////////////////////////////////////////////////////////// 1777 1778 1779 1780 ///////////////////////////////////////////////////////////////// 1781 ///////////////////////////////////////////////////////////////// 1782 // // 1783 // SECTION BEGIN vts primitives // 1784 // // 1785 ///////////////////////////////////////////////////////////////// 1786 ///////////////////////////////////////////////////////////////// 1787 1788 1789 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely 1790 being compact stand-ins for Thr*'s. Use these functions to map 1791 between them. */ 1792 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */ 1793 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */ 1794 1795 __attribute__((noreturn)) 1796 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs ) 1797 { 1798 if (due_to_nThrs) { 1799 HChar* s = 1800 "\n" 1801 "Helgrind: cannot continue, run aborted: too many threads.\n" 1802 "Sorry. Helgrind can only handle programs that create\n" 1803 "%'llu or fewer threads over their entire lifetime.\n" 1804 "\n"; 1805 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024)); 1806 } else { 1807 HChar* s = 1808 "\n" 1809 "Helgrind: cannot continue, run aborted: too many\n" 1810 "synchronisation events. Sorry. Helgrind can only handle\n" 1811 "programs which perform %'llu or fewer\n" 1812 "inter-thread synchronisation events (locks, unlocks, etc).\n" 1813 "\n"; 1814 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1); 1815 } 1816 VG_(exit)(1); 1817 /*NOTREACHED*/ 1818 tl_assert(0); /*wtf?!*/ 1819 } 1820 1821 1822 /* The dead thread (ThrID, actually) table. A thread may only be 1823 listed here if we have been notified thereof by libhb_async_exit. 1824 New entries are added at the end. The order isn't important, but 1825 the ThrID values must be unique. This table lists the identity of 1826 all threads that have ever died -- none are ever removed. We keep 1827 this table so as to be able to prune entries from VTSs. We don't 1828 actually need to keep the set of threads that have ever died -- 1829 only the threads that have died since the previous round of 1830 pruning. But it's useful for sanity check purposes to keep the 1831 entire set, so we do. */ 1832 static XArray* /* of ThrID */ verydead_thread_table = NULL; 1833 1834 /* Arbitrary total ordering on ThrIDs. */ 1835 static Int cmp__ThrID ( void* v1, void* v2 ) { 1836 ThrID id1 = *(ThrID*)v1; 1837 ThrID id2 = *(ThrID*)v2; 1838 if (id1 < id2) return -1; 1839 if (id1 > id2) return 1; 1840 return 0; 1841 } 1842 1843 static void verydead_thread_table_init ( void ) 1844 { 1845 tl_assert(!verydead_thread_table); 1846 verydead_thread_table 1847 = VG_(newXA)( HG_(zalloc), 1848 "libhb.verydead_thread_table_init.1", 1849 HG_(free), sizeof(ThrID) ); 1850 tl_assert(verydead_thread_table); 1851 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID); 1852 } 1853 1854 1855 /* A VTS contains .ts, its vector clock, and also .id, a field to hold 1856 a backlink for the caller's convenience. Since we have no idea 1857 what to set that to in the library, it always gets set to 1858 VtsID_INVALID. */ 1859 typedef 1860 struct { 1861 VtsID id; 1862 UInt usedTS; 1863 UInt sizeTS; 1864 ScalarTS ts[0]; 1865 } 1866 VTS; 1867 1868 /* Allocate a VTS capable of storing 'sizeTS' entries. */ 1869 static VTS* VTS__new ( HChar* who, UInt sizeTS ); 1870 1871 /* Make a clone of 'vts', sizing the new array to exactly match the 1872 number of ScalarTSs present. */ 1873 static VTS* VTS__clone ( HChar* who, VTS* vts ); 1874 1875 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new 1876 array is sized exactly to hold the number of required elements. 1877 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and 1878 must be in strictly increasing order. */ 1879 static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel ); 1880 1881 /* Delete this VTS in its entirety. */ 1882 static void VTS__delete ( VTS* vts ); 1883 1884 /* Create a new singleton VTS in 'out'. Caller must have 1885 pre-allocated 'out' sufficiently big to hold the result in all 1886 possible cases. */ 1887 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym ); 1888 1889 /* Create in 'out' a VTS which is the same as 'vts' except with 1890 vts[me]++, so to speak. Caller must have pre-allocated 'out' 1891 sufficiently big to hold the result in all possible cases. */ 1892 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts ); 1893 1894 /* Create in 'out' a VTS which is the join (max) of 'a' and 1895 'b'. Caller must have pre-allocated 'out' sufficiently big to hold 1896 the result in all possible cases. */ 1897 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b ); 1898 1899 /* Compute the partial ordering relation of the two args. Although we 1900 could be completely general and return an enumeration value (EQ, 1901 LT, GT, UN), in fact we only need LEQ, and so we may as well 1902 hardwire that fact. 1903 1904 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an 1905 invald ThrID). In the latter case, the returned ThrID indicates 1906 the discovered point for which they are not. There may be more 1907 than one such point, but we only care about seeing one of them, not 1908 all of them. This rather strange convention is used because 1909 sometimes we want to know the actual index at which they first 1910 differ. */ 1911 static UInt VTS__cmpLEQ ( VTS* a, VTS* b ); 1912 1913 /* Compute an arbitrary structural (total) ordering on the two args, 1914 based on their VCs, so they can be looked up in a table, tree, etc. 1915 Returns -1, 0 or 1. */ 1916 static Word VTS__cmp_structural ( VTS* a, VTS* b ); 1917 1918 /* Debugging only. Display the given VTS in the buffer. */ 1919 static void VTS__show ( HChar* buf, Int nBuf, VTS* vts ); 1920 1921 /* Debugging only. Return vts[index], so to speak. */ 1922 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ); 1923 1924 /* Notify the VTS machinery that a thread has been declared 1925 comprehensively dead: that is, it has done an async exit AND it has 1926 been joined with. This should ensure that its local clocks (.viR 1927 and .viW) will never again change, and so all mentions of this 1928 thread from all VTSs in the system may be removed. */ 1929 static void VTS__declare_thread_very_dead ( Thr* idx ); 1930 1931 /*--------------- to do with Vector Timestamps ---------------*/ 1932 1933 static Bool is_sane_VTS ( VTS* vts ) 1934 { 1935 UWord i, n; 1936 ScalarTS *st1, *st2; 1937 if (!vts) return False; 1938 if (!vts->ts) return False; 1939 if (vts->usedTS > vts->sizeTS) return False; 1940 n = vts->usedTS; 1941 if (n == 1) { 1942 st1 = &vts->ts[0]; 1943 if (st1->tym == 0) 1944 return False; 1945 } 1946 else 1947 if (n >= 2) { 1948 for (i = 0; i < n-1; i++) { 1949 st1 = &vts->ts[i]; 1950 st2 = &vts->ts[i+1]; 1951 if (st1->thrid >= st2->thrid) 1952 return False; 1953 if (st1->tym == 0 || st2->tym == 0) 1954 return False; 1955 } 1956 } 1957 return True; 1958 } 1959 1960 1961 /* Create a new, empty VTS. 1962 */ 1963 static VTS* VTS__new ( HChar* who, UInt sizeTS ) 1964 { 1965 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS)); 1966 tl_assert(vts->usedTS == 0); 1967 vts->sizeTS = sizeTS; 1968 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL; 1969 return vts; 1970 } 1971 1972 /* Clone this VTS. 1973 */ 1974 static VTS* VTS__clone ( HChar* who, VTS* vts ) 1975 { 1976 tl_assert(vts); 1977 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 1978 UInt nTS = vts->usedTS; 1979 VTS* clone = VTS__new(who, nTS); 1980 clone->id = vts->id; 1981 clone->sizeTS = nTS; 1982 clone->usedTS = nTS; 1983 UInt i; 1984 for (i = 0; i < nTS; i++) { 1985 clone->ts[i] = vts->ts[i]; 1986 } 1987 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 1988 return clone; 1989 } 1990 1991 1992 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel' 1993 must be in strictly increasing order. We could obviously do this 1994 much more efficiently (in linear time) if necessary. 1995 */ 1996 static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel ) 1997 { 1998 UInt i, j; 1999 tl_assert(vts); 2000 tl_assert(thridsToDel); 2001 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2002 UInt nTS = vts->usedTS; 2003 /* Figure out how many ScalarTSs will remain in the output. */ 2004 UInt nReq = nTS; 2005 for (i = 0; i < nTS; i++) { 2006 ThrID thrid = vts->ts[i].thrid; 2007 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL)) 2008 nReq--; 2009 } 2010 tl_assert(nReq <= nTS); 2011 /* Copy the ones that will remain. */ 2012 VTS* res = VTS__new(who, nReq); 2013 j = 0; 2014 for (i = 0; i < nTS; i++) { 2015 ThrID thrid = vts->ts[i].thrid; 2016 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL)) 2017 continue; 2018 res->ts[j++] = vts->ts[i]; 2019 } 2020 tl_assert(j == nReq); 2021 tl_assert(j == res->sizeTS); 2022 res->usedTS = j; 2023 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL); 2024 return res; 2025 } 2026 2027 2028 /* Delete this VTS in its entirety. 2029 */ 2030 static void VTS__delete ( VTS* vts ) 2031 { 2032 tl_assert(vts); 2033 tl_assert(vts->usedTS <= vts->sizeTS); 2034 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2035 HG_(free)(vts); 2036 } 2037 2038 2039 /* Create a new singleton VTS. 2040 */ 2041 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym ) 2042 { 2043 tl_assert(thr); 2044 tl_assert(tym >= 1); 2045 tl_assert(out); 2046 tl_assert(out->usedTS == 0); 2047 tl_assert(out->sizeTS >= 1); 2048 UInt hi = out->usedTS++; 2049 out->ts[hi].thrid = Thr__to_ThrID(thr); 2050 out->ts[hi].tym = tym; 2051 } 2052 2053 2054 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is 2055 not modified. 2056 */ 2057 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts ) 2058 { 2059 UInt i, n; 2060 ThrID me_thrid; 2061 Bool found = False; 2062 2063 stats__vts__tick++; 2064 2065 tl_assert(out); 2066 tl_assert(out->usedTS == 0); 2067 if (vts->usedTS >= ThrID_MAX_VALID) 2068 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 2069 tl_assert(out->sizeTS >= 1 + vts->usedTS); 2070 2071 tl_assert(me); 2072 me_thrid = Thr__to_ThrID(me); 2073 tl_assert(is_sane_VTS(vts)); 2074 n = vts->usedTS; 2075 2076 /* Copy all entries which precede 'me'. */ 2077 for (i = 0; i < n; i++) { 2078 ScalarTS* here = &vts->ts[i]; 2079 if (UNLIKELY(here->thrid >= me_thrid)) 2080 break; 2081 UInt hi = out->usedTS++; 2082 out->ts[hi] = *here; 2083 } 2084 2085 /* 'i' now indicates the next entry to copy, if any. 2086 There are 3 possibilities: 2087 (a) there is no next entry (we used them all up already): 2088 add (me_thrid,1) to the output, and quit 2089 (b) there is a next entry, and its thrid > me_thrid: 2090 add (me_thrid,1) to the output, then copy the remaining entries 2091 (c) there is a next entry, and its thrid == me_thrid: 2092 copy it to the output but increment its timestamp value. 2093 Then copy the remaining entries. (c) is the common case. 2094 */ 2095 tl_assert(i >= 0 && i <= n); 2096 if (i == n) { /* case (a) */ 2097 UInt hi = out->usedTS++; 2098 out->ts[hi].thrid = me_thrid; 2099 out->ts[hi].tym = 1; 2100 } else { 2101 /* cases (b) and (c) */ 2102 ScalarTS* here = &vts->ts[i]; 2103 if (me_thrid == here->thrid) { /* case (c) */ 2104 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) { 2105 /* We're hosed. We have to stop. */ 2106 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ ); 2107 } 2108 UInt hi = out->usedTS++; 2109 out->ts[hi].thrid = here->thrid; 2110 out->ts[hi].tym = here->tym + 1; 2111 i++; 2112 found = True; 2113 } else { /* case (b) */ 2114 UInt hi = out->usedTS++; 2115 out->ts[hi].thrid = me_thrid; 2116 out->ts[hi].tym = 1; 2117 } 2118 /* And copy any remaining entries. */ 2119 for (/*keepgoing*/; i < n; i++) { 2120 ScalarTS* here2 = &vts->ts[i]; 2121 UInt hi = out->usedTS++; 2122 out->ts[hi] = *here2; 2123 } 2124 } 2125 2126 tl_assert(is_sane_VTS(out)); 2127 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1)); 2128 tl_assert(out->usedTS <= out->sizeTS); 2129 } 2130 2131 2132 /* Return a new VTS constructed as the join (max) of the 2 args. 2133 Neither arg is modified. 2134 */ 2135 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b ) 2136 { 2137 UInt ia, ib, useda, usedb; 2138 ULong tyma, tymb, tymMax; 2139 ThrID thrid; 2140 UInt ncommon = 0; 2141 2142 stats__vts__join++; 2143 2144 tl_assert(a); 2145 tl_assert(b); 2146 useda = a->usedTS; 2147 usedb = b->usedTS; 2148 2149 tl_assert(out); 2150 tl_assert(out->usedTS == 0); 2151 /* overly conservative test, but doing better involves comparing 2152 the two VTSs, which we don't want to do at this point. */ 2153 if (useda + usedb >= ThrID_MAX_VALID) 2154 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 2155 tl_assert(out->sizeTS >= useda + usedb); 2156 2157 ia = ib = 0; 2158 2159 while (1) { 2160 2161 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn 2162 from a and b in order, where thrid is the next ThrID 2163 occurring in either a or b, and tyma/b are the relevant 2164 scalar timestamps, taking into account implicit zeroes. */ 2165 tl_assert(ia >= 0 && ia <= useda); 2166 tl_assert(ib >= 0 && ib <= usedb); 2167 2168 if (ia == useda && ib == usedb) { 2169 /* both empty - done */ 2170 break; 2171 2172 } else if (ia == useda && ib != usedb) { 2173 /* a empty, use up b */ 2174 ScalarTS* tmpb = &b->ts[ib]; 2175 thrid = tmpb->thrid; 2176 tyma = 0; 2177 tymb = tmpb->tym; 2178 ib++; 2179 2180 } else if (ia != useda && ib == usedb) { 2181 /* b empty, use up a */ 2182 ScalarTS* tmpa = &a->ts[ia]; 2183 thrid = tmpa->thrid; 2184 tyma = tmpa->tym; 2185 tymb = 0; 2186 ia++; 2187 2188 } else { 2189 /* both not empty; extract lowest-ThrID'd triple */ 2190 ScalarTS* tmpa = &a->ts[ia]; 2191 ScalarTS* tmpb = &b->ts[ib]; 2192 if (tmpa->thrid < tmpb->thrid) { 2193 /* a has the lowest unconsidered ThrID */ 2194 thrid = tmpa->thrid; 2195 tyma = tmpa->tym; 2196 tymb = 0; 2197 ia++; 2198 } else if (tmpa->thrid > tmpb->thrid) { 2199 /* b has the lowest unconsidered ThrID */ 2200 thrid = tmpb->thrid; 2201 tyma = 0; 2202 tymb = tmpb->tym; 2203 ib++; 2204 } else { 2205 /* they both next mention the same ThrID */ 2206 tl_assert(tmpa->thrid == tmpb->thrid); 2207 thrid = tmpa->thrid; /* == tmpb->thrid */ 2208 tyma = tmpa->tym; 2209 tymb = tmpb->tym; 2210 ia++; 2211 ib++; 2212 ncommon++; 2213 } 2214 } 2215 2216 /* having laboriously determined (thr, tyma, tymb), do something 2217 useful with it. */ 2218 tymMax = tyma > tymb ? tyma : tymb; 2219 if (tymMax > 0) { 2220 UInt hi = out->usedTS++; 2221 out->ts[hi].thrid = thrid; 2222 out->ts[hi].tym = tymMax; 2223 } 2224 2225 } 2226 2227 tl_assert(is_sane_VTS(out)); 2228 tl_assert(out->usedTS <= out->sizeTS); 2229 tl_assert(out->usedTS == useda + usedb - ncommon); 2230 } 2231 2232 2233 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if 2234 they are, or the first ThrID for which they are not (no valid ThrID 2235 has the value zero). This rather strange convention is used 2236 because sometimes we want to know the actual index at which they 2237 first differ. */ 2238 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b ) 2239 { 2240 Word ia, ib, useda, usedb; 2241 ULong tyma, tymb; 2242 2243 stats__vts__cmpLEQ++; 2244 2245 tl_assert(a); 2246 tl_assert(b); 2247 useda = a->usedTS; 2248 usedb = b->usedTS; 2249 2250 ia = ib = 0; 2251 2252 while (1) { 2253 2254 /* This logic is to enumerate doubles (tyma, tymb) drawn 2255 from a and b in order, and tyma/b are the relevant 2256 scalar timestamps, taking into account implicit zeroes. */ 2257 ThrID thrid; 2258 2259 tl_assert(ia >= 0 && ia <= useda); 2260 tl_assert(ib >= 0 && ib <= usedb); 2261 2262 if (ia == useda && ib == usedb) { 2263 /* both empty - done */ 2264 break; 2265 2266 } else if (ia == useda && ib != usedb) { 2267 /* a empty, use up b */ 2268 ScalarTS* tmpb = &b->ts[ib]; 2269 tyma = 0; 2270 tymb = tmpb->tym; 2271 thrid = tmpb->thrid; 2272 ib++; 2273 2274 } else if (ia != useda && ib == usedb) { 2275 /* b empty, use up a */ 2276 ScalarTS* tmpa = &a->ts[ia]; 2277 tyma = tmpa->tym; 2278 thrid = tmpa->thrid; 2279 tymb = 0; 2280 ia++; 2281 2282 } else { 2283 /* both not empty; extract lowest-ThrID'd triple */ 2284 ScalarTS* tmpa = &a->ts[ia]; 2285 ScalarTS* tmpb = &b->ts[ib]; 2286 if (tmpa->thrid < tmpb->thrid) { 2287 /* a has the lowest unconsidered ThrID */ 2288 tyma = tmpa->tym; 2289 thrid = tmpa->thrid; 2290 tymb = 0; 2291 ia++; 2292 } 2293 else 2294 if (tmpa->thrid > tmpb->thrid) { 2295 /* b has the lowest unconsidered ThrID */ 2296 tyma = 0; 2297 tymb = tmpb->tym; 2298 thrid = tmpb->thrid; 2299 ib++; 2300 } else { 2301 /* they both next mention the same ThrID */ 2302 tl_assert(tmpa->thrid == tmpb->thrid); 2303 tyma = tmpa->tym; 2304 thrid = tmpa->thrid; 2305 tymb = tmpb->tym; 2306 ia++; 2307 ib++; 2308 } 2309 } 2310 2311 /* having laboriously determined (tyma, tymb), do something 2312 useful with it. */ 2313 if (tyma > tymb) { 2314 /* not LEQ at this index. Quit, since the answer is 2315 determined already. */ 2316 tl_assert(thrid >= 1024); 2317 return thrid; 2318 } 2319 } 2320 2321 return 0; /* all points are LEQ => return an invalid ThrID */ 2322 } 2323 2324 2325 /* Compute an arbitrary structural (total) ordering on the two args, 2326 based on their VCs, so they can be looked up in a table, tree, etc. 2327 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be 2328 performance critical so there is some effort expended to make it sa 2329 fast as possible. 2330 */ 2331 Word VTS__cmp_structural ( VTS* a, VTS* b ) 2332 { 2333 /* We just need to generate an arbitrary total ordering based on 2334 a->ts and b->ts. Preferably do it in a way which comes across likely 2335 differences relatively quickly. */ 2336 Word i; 2337 Word useda = 0, usedb = 0; 2338 ScalarTS *ctsa = NULL, *ctsb = NULL; 2339 2340 stats__vts__cmp_structural++; 2341 2342 tl_assert(a); 2343 tl_assert(b); 2344 2345 ctsa = &a->ts[0]; useda = a->usedTS; 2346 ctsb = &b->ts[0]; usedb = b->usedTS; 2347 2348 if (LIKELY(useda == usedb)) { 2349 ScalarTS *tmpa = NULL, *tmpb = NULL; 2350 stats__vts__cmp_structural_slow++; 2351 /* Same length vectors. Find the first difference, if any, as 2352 fast as possible. */ 2353 for (i = 0; i < useda; i++) { 2354 tmpa = &ctsa[i]; 2355 tmpb = &ctsb[i]; 2356 if (LIKELY(tmpa->tym == tmpb->tym 2357 && tmpa->thrid == tmpb->thrid)) 2358 continue; 2359 else 2360 break; 2361 } 2362 if (UNLIKELY(i == useda)) { 2363 /* They're identical. */ 2364 return 0; 2365 } else { 2366 tl_assert(i >= 0 && i < useda); 2367 if (tmpa->tym < tmpb->tym) return -1; 2368 if (tmpa->tym > tmpb->tym) return 1; 2369 if (tmpa->thrid < tmpb->thrid) return -1; 2370 if (tmpa->thrid > tmpb->thrid) return 1; 2371 /* we just established them as non-identical, hence: */ 2372 } 2373 /*NOTREACHED*/ 2374 tl_assert(0); 2375 } 2376 2377 if (useda < usedb) return -1; 2378 if (useda > usedb) return 1; 2379 /*NOTREACHED*/ 2380 tl_assert(0); 2381 } 2382 2383 2384 /* Debugging only. Display the given VTS in the buffer. 2385 */ 2386 void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) 2387 { 2388 ScalarTS* st; 2389 HChar unit[64]; 2390 Word i, n; 2391 Int avail = nBuf; 2392 tl_assert(vts && vts->ts); 2393 tl_assert(nBuf > 16); 2394 buf[0] = '['; 2395 buf[1] = 0; 2396 n = vts->usedTS; 2397 for (i = 0; i < n; i++) { 2398 tl_assert(avail >= 40); 2399 st = &vts->ts[i]; 2400 VG_(memset)(unit, 0, sizeof(unit)); 2401 VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu", 2402 st->thrid, (ULong)st->tym); 2403 if (avail < VG_(strlen)(unit) + 40/*let's say*/) { 2404 VG_(strcat)(buf, " ...]"); 2405 buf[nBuf-1] = 0; 2406 return; 2407 } 2408 VG_(strcat)(buf, unit); 2409 avail -= VG_(strlen)(unit); 2410 } 2411 VG_(strcat)(buf, "]"); 2412 buf[nBuf-1] = 0; 2413 } 2414 2415 2416 /* Debugging only. Return vts[index], so to speak. 2417 */ 2418 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) 2419 { 2420 UWord i, n; 2421 ThrID idx_thrid = Thr__to_ThrID(idx); 2422 stats__vts__indexat_slow++; 2423 tl_assert(vts && vts->ts); 2424 n = vts->usedTS; 2425 for (i = 0; i < n; i++) { 2426 ScalarTS* st = &vts->ts[i]; 2427 if (st->thrid == idx_thrid) 2428 return st->tym; 2429 } 2430 return 0; 2431 } 2432 2433 2434 /* See comment on prototype above. 2435 */ 2436 static void VTS__declare_thread_very_dead ( Thr* thr ) 2437 { 2438 if (0) VG_(printf)("VTQ: tae %p\n", thr); 2439 2440 tl_assert(thr->llexit_done); 2441 tl_assert(thr->joinedwith_done); 2442 2443 ThrID nyu; 2444 nyu = Thr__to_ThrID(thr); 2445 VG_(addToXA)( verydead_thread_table, &nyu ); 2446 2447 /* We can only get here if we're assured that we'll never again 2448 need to look at this thread's ::viR or ::viW. Set them to 2449 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but 2450 mostly so that we don't wind up pruning them (as that would be 2451 nonsensical: the only interesting ScalarTS entry for a dead 2452 thread is its own index, and the pruning will remove that.). */ 2453 VtsID__rcdec(thr->viR); 2454 VtsID__rcdec(thr->viW); 2455 thr->viR = VtsID_INVALID; 2456 thr->viW = VtsID_INVALID; 2457 } 2458 2459 2460 ///////////////////////////////////////////////////////////////// 2461 ///////////////////////////////////////////////////////////////// 2462 // // 2463 // SECTION END vts primitives // 2464 // // 2465 ///////////////////////////////////////////////////////////////// 2466 ///////////////////////////////////////////////////////////////// 2467 2468 2469 2470 ///////////////////////////////////////////////////////////////// 2471 ///////////////////////////////////////////////////////////////// 2472 // // 2473 // SECTION BEGIN main library // 2474 // // 2475 ///////////////////////////////////////////////////////////////// 2476 ///////////////////////////////////////////////////////////////// 2477 2478 2479 ///////////////////////////////////////////////////////// 2480 // // 2481 // VTS set // 2482 // // 2483 ///////////////////////////////////////////////////////// 2484 2485 static WordFM* /* WordFM VTS* void */ vts_set = NULL; 2486 2487 static void vts_set_init ( void ) 2488 { 2489 tl_assert(!vts_set); 2490 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1", 2491 HG_(free), 2492 (Word(*)(UWord,UWord))VTS__cmp_structural ); 2493 tl_assert(vts_set); 2494 } 2495 2496 /* Given a VTS, look in vts_set to see if we already have a 2497 structurally identical one. If yes, return the pair (True, pointer 2498 to the existing one). If no, clone this one, add the clone to the 2499 set, and return (False, pointer to the clone). */ 2500 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand ) 2501 { 2502 UWord keyW, valW; 2503 stats__vts_set__focaa++; 2504 tl_assert(cand->id == VtsID_INVALID); 2505 /* lookup cand (by value) */ 2506 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) { 2507 /* found it */ 2508 tl_assert(valW == 0); 2509 /* if this fails, cand (by ref) was already present (!) */ 2510 tl_assert(keyW != (UWord)cand); 2511 *res = (VTS*)keyW; 2512 return True; 2513 } else { 2514 /* not present. Clone, add and return address of clone. */ 2515 stats__vts_set__focaa_a++; 2516 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand ); 2517 tl_assert(clone != cand); 2518 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ ); 2519 *res = clone; 2520 return False; 2521 } 2522 } 2523 2524 2525 ///////////////////////////////////////////////////////// 2526 // // 2527 // VTS table // 2528 // // 2529 ///////////////////////////////////////////////////////// 2530 2531 static void VtsID__invalidate_caches ( void ); /* fwds */ 2532 2533 /* A type to hold VTS table entries. Invariants: 2534 If .vts == NULL, then this entry is not in use, so: 2535 - .rc == 0 2536 - this entry is on the freelist (unfortunately, does not imply 2537 any constraints on value for .freelink) 2538 If .vts != NULL, then this entry is in use: 2539 - .vts is findable in vts_set 2540 - .vts->id == this entry number 2541 - no specific value for .rc (even 0 is OK) 2542 - this entry is not on freelist, so .freelink == VtsID_INVALID 2543 */ 2544 typedef 2545 struct { 2546 VTS* vts; /* vts, in vts_set */ 2547 UWord rc; /* reference count - enough for entire aspace */ 2548 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */ 2549 VtsID remap; /* used only during pruning */ 2550 } 2551 VtsTE; 2552 2553 /* The VTS table. */ 2554 static XArray* /* of VtsTE */ vts_tab = NULL; 2555 2556 /* An index into the VTS table, indicating the start of the list of 2557 free (available for use) entries. If the list is empty, this is 2558 VtsID_INVALID. */ 2559 static VtsID vts_tab_freelist = VtsID_INVALID; 2560 2561 /* Do a GC of vts_tab when the freelist becomes empty AND the size of 2562 vts_tab equals or exceeds this size. After GC, the value here is 2563 set appropriately so as to check for the next GC point. */ 2564 static Word vts_next_GC_at = 1000; 2565 2566 static void vts_tab_init ( void ) 2567 { 2568 vts_tab 2569 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1", 2570 HG_(free), sizeof(VtsTE) ); 2571 vts_tab_freelist 2572 = VtsID_INVALID; 2573 tl_assert(vts_tab); 2574 } 2575 2576 /* Add ii to the free list, checking that it looks out-of-use. */ 2577 static void add_to_free_list ( VtsID ii ) 2578 { 2579 VtsTE* ie = VG_(indexXA)( vts_tab, ii ); 2580 tl_assert(ie->vts == NULL); 2581 tl_assert(ie->rc == 0); 2582 tl_assert(ie->freelink == VtsID_INVALID); 2583 ie->freelink = vts_tab_freelist; 2584 vts_tab_freelist = ii; 2585 } 2586 2587 /* Get an entry from the free list. This will return VtsID_INVALID if 2588 the free list is empty. */ 2589 static VtsID get_from_free_list ( void ) 2590 { 2591 VtsID ii; 2592 VtsTE* ie; 2593 if (vts_tab_freelist == VtsID_INVALID) 2594 return VtsID_INVALID; 2595 ii = vts_tab_freelist; 2596 ie = VG_(indexXA)( vts_tab, ii ); 2597 tl_assert(ie->vts == NULL); 2598 tl_assert(ie->rc == 0); 2599 vts_tab_freelist = ie->freelink; 2600 return ii; 2601 } 2602 2603 /* Produce a new VtsID that can be used, either by getting it from 2604 the freelist, or, if that is empty, by expanding vts_tab. */ 2605 static VtsID get_new_VtsID ( void ) 2606 { 2607 VtsID ii; 2608 VtsTE te; 2609 ii = get_from_free_list(); 2610 if (ii != VtsID_INVALID) 2611 return ii; 2612 te.vts = NULL; 2613 te.rc = 0; 2614 te.freelink = VtsID_INVALID; 2615 te.remap = VtsID_INVALID; 2616 ii = (VtsID)VG_(addToXA)( vts_tab, &te ); 2617 return ii; 2618 } 2619 2620 2621 /* Indirect callback from lib_zsm. */ 2622 static void VtsID__rcinc ( VtsID ii ) 2623 { 2624 VtsTE* ie; 2625 /* VG_(indexXA) does a range check for us */ 2626 ie = VG_(indexXA)( vts_tab, ii ); 2627 tl_assert(ie->vts); /* else it's not in use */ 2628 tl_assert(ie->rc < ~0UL); /* else we can't continue */ 2629 tl_assert(ie->vts->id == ii); 2630 ie->rc++; 2631 } 2632 2633 /* Indirect callback from lib_zsm. */ 2634 static void VtsID__rcdec ( VtsID ii ) 2635 { 2636 VtsTE* ie; 2637 /* VG_(indexXA) does a range check for us */ 2638 ie = VG_(indexXA)( vts_tab, ii ); 2639 tl_assert(ie->vts); /* else it's not in use */ 2640 tl_assert(ie->rc > 0); /* else RC snafu */ 2641 tl_assert(ie->vts->id == ii); 2642 ie->rc--; 2643 } 2644 2645 2646 /* Look up 'cand' in our collection of VTSs. If present, return the 2647 VtsID for the pre-existing version. If not present, clone it, add 2648 the clone to both vts_tab and vts_set, allocate a fresh VtsID for 2649 it, and return that. */ 2650 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand ) 2651 { 2652 VTS* in_tab = NULL; 2653 tl_assert(cand->id == VtsID_INVALID); 2654 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand ); 2655 tl_assert(in_tab); 2656 if (already_have) { 2657 /* We already have a copy of 'cand'. Use that. */ 2658 VtsTE* ie; 2659 tl_assert(in_tab->id != VtsID_INVALID); 2660 ie = VG_(indexXA)( vts_tab, in_tab->id ); 2661 tl_assert(ie->vts == in_tab); 2662 return in_tab->id; 2663 } else { 2664 VtsID ii = get_new_VtsID(); 2665 VtsTE* ie = VG_(indexXA)( vts_tab, ii ); 2666 ie->vts = in_tab; 2667 ie->rc = 0; 2668 ie->freelink = VtsID_INVALID; 2669 in_tab->id = ii; 2670 return ii; 2671 } 2672 } 2673 2674 2675 static void show_vts_stats ( HChar* caller ) 2676 { 2677 UWord nSet, nTab, nLive; 2678 ULong totrc; 2679 UWord n, i; 2680 nSet = VG_(sizeFM)( vts_set ); 2681 nTab = VG_(sizeXA)( vts_tab ); 2682 totrc = 0; 2683 nLive = 0; 2684 n = VG_(sizeXA)( vts_tab ); 2685 for (i = 0; i < n; i++) { 2686 VtsTE* ie = VG_(indexXA)( vts_tab, i ); 2687 if (ie->vts) { 2688 nLive++; 2689 totrc += (ULong)ie->rc; 2690 } else { 2691 tl_assert(ie->rc == 0); 2692 } 2693 } 2694 VG_(printf)(" show_vts_stats %s\n", caller); 2695 VG_(printf)(" vts_tab size %4lu\n", nTab); 2696 VG_(printf)(" vts_tab live %4lu\n", nLive); 2697 VG_(printf)(" vts_set size %4lu\n", nSet); 2698 VG_(printf)(" total rc %4llu\n", totrc); 2699 } 2700 2701 2702 /* --- Helpers for VtsID pruning --- */ 2703 2704 static 2705 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab, 2706 /*MOD*/XArray* /* of VtsTE */ new_tab, 2707 VtsID* ii ) 2708 { 2709 VtsTE *old_te, *new_te; 2710 VtsID old_id, new_id; 2711 /* We're relying here on VG_(indexXA)'s range checking to assert on 2712 any stupid values, in particular *ii == VtsID_INVALID. */ 2713 old_id = *ii; 2714 old_te = VG_(indexXA)( old_tab, old_id ); 2715 old_te->rc--; 2716 new_id = old_te->remap; 2717 new_te = VG_(indexXA)( new_tab, new_id ); 2718 new_te->rc++; 2719 *ii = new_id; 2720 } 2721 2722 static 2723 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab, 2724 /*MOD*/XArray* /* of VtsTE */ new_tab, 2725 SVal* s ) 2726 { 2727 SVal old_sv, new_sv; 2728 old_sv = *s; 2729 if (SVal__isC(old_sv)) { 2730 VtsID rMin, wMin; 2731 rMin = SVal__unC_Rmin(old_sv); 2732 wMin = SVal__unC_Wmin(old_sv); 2733 remap_VtsID( old_tab, new_tab, &rMin ); 2734 remap_VtsID( old_tab, new_tab, &wMin ); 2735 new_sv = SVal__mkC( rMin, wMin ); 2736 *s = new_sv; 2737 } 2738 } 2739 2740 2741 /* NOT TO BE CALLED FROM WITHIN libzsm. */ 2742 __attribute__((noinline)) 2743 static void vts_tab__do_GC ( Bool show_stats ) 2744 { 2745 UWord i, nTab, nLive, nFreed; 2746 2747 /* ---------- BEGIN VTS GC ---------- */ 2748 /* check this is actually necessary. */ 2749 tl_assert(vts_tab_freelist == VtsID_INVALID); 2750 2751 /* empty the caches for partial order checks and binary joins. We 2752 could do better and prune out the entries to be deleted, but it 2753 ain't worth the hassle. */ 2754 VtsID__invalidate_caches(); 2755 2756 /* First, make the reference counts up to date. */ 2757 zsm_flush_cache(); 2758 2759 nTab = VG_(sizeXA)( vts_tab ); 2760 2761 if (show_stats) { 2762 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab); 2763 show_vts_stats("before GC"); 2764 } 2765 2766 /* Now we can inspect the entire vts_tab. Any entries with zero 2767 .rc fields are now no longer in use and can be put back on the 2768 free list, removed from vts_set, and deleted. */ 2769 nFreed = 0; 2770 for (i = 0; i < nTab; i++) { 2771 Bool present; 2772 UWord oldK = 0, oldV = 12345; 2773 VtsTE* te = VG_(indexXA)( vts_tab, i ); 2774 if (te->vts == NULL) { 2775 tl_assert(te->rc == 0); 2776 continue; /* already on the free list (presumably) */ 2777 } 2778 if (te->rc > 0) 2779 continue; /* in use */ 2780 /* Ok, we got one we can free. */ 2781 tl_assert(te->vts->id == i); 2782 /* first, remove it from vts_set. */ 2783 present = VG_(delFromFM)( vts_set, 2784 &oldK, &oldV, (UWord)te->vts ); 2785 tl_assert(present); /* else it isn't in vts_set ?! */ 2786 tl_assert(oldV == 0); /* no info stored in vts_set val fields */ 2787 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */ 2788 /* now free the VTS itself */ 2789 VTS__delete(te->vts); 2790 te->vts = NULL; 2791 /* and finally put this entry on the free list */ 2792 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */ 2793 add_to_free_list( i ); 2794 nFreed++; 2795 } 2796 2797 /* Now figure out when the next GC should be. We'll allow the 2798 number of VTSs to double before GCing again. Except of course 2799 that since we can't (or, at least, don't) shrink vts_tab, we 2800 can't set the threshhold value smaller than it. */ 2801 tl_assert(nFreed <= nTab); 2802 nLive = nTab - nFreed; 2803 tl_assert(nLive >= 0 && nLive <= nTab); 2804 vts_next_GC_at = 2 * nLive; 2805 if (vts_next_GC_at < nTab) 2806 vts_next_GC_at = nTab; 2807 2808 if (show_stats) { 2809 show_vts_stats("after GC"); 2810 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at); 2811 } 2812 2813 if (VG_(clo_stats)) { 2814 static UInt ctr = 1; 2815 tl_assert(nTab > 0); 2816 VG_(message)(Vg_DebugMsg, 2817 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n", 2818 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab); 2819 } 2820 /* ---------- END VTS GC ---------- */ 2821 2822 /* Decide whether to do VTS pruning. We have one of three 2823 settings. */ 2824 static UInt pruning_auto_ctr = 0; /* do not make non-static */ 2825 2826 Bool do_pruning = False; 2827 switch (HG_(clo_vts_pruning)) { 2828 case 0: /* never */ 2829 break; 2830 case 1: /* auto */ 2831 do_pruning = (++pruning_auto_ctr % 5) == 0; 2832 break; 2833 case 2: /* always */ 2834 do_pruning = True; 2835 break; 2836 default: 2837 tl_assert(0); 2838 } 2839 2840 /* The rest of this routine only handles pruning, so we can 2841 quit at this point if it is not to be done. */ 2842 if (!do_pruning) 2843 return; 2844 2845 /* ---------- BEGIN VTS PRUNING ---------- */ 2846 /* We begin by sorting the backing table on its .thr values, so as 2847 to (1) check they are unique [else something has gone wrong, 2848 since it means we must have seen some Thr* exiting more than 2849 once, which can't happen], and (2) so that we can quickly look 2850 up the dead-thread entries as we work through the VTSs. */ 2851 VG_(sortXA)( verydead_thread_table ); 2852 /* Sanity check: check for unique .sts.thr values. */ 2853 UWord nBT = VG_(sizeXA)( verydead_thread_table ); 2854 if (nBT > 0) { 2855 ThrID thrid1, thrid2; 2856 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 ); 2857 for (i = 1; i < nBT; i++) { 2858 thrid1 = thrid2; 2859 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i ); 2860 tl_assert(thrid1 < thrid2); 2861 } 2862 } 2863 /* Ok, so the dead thread table has unique and in-order keys. */ 2864 2865 /* We will run through the old table, and create a new table and 2866 set, at the same time setting the .remap entries in the old 2867 table to point to the new entries. Then, visit every VtsID in 2868 the system, and replace all of them with new ones, using the 2869 .remap entries in the old table. Finally, we can delete the old 2870 table and set. */ 2871 2872 XArray* /* of VtsTE */ new_tab 2873 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab", 2874 HG_(free), sizeof(VtsTE) ); 2875 2876 /* WordFM VTS* void */ 2877 WordFM* new_set 2878 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set", 2879 HG_(free), 2880 (Word(*)(UWord,UWord))VTS__cmp_structural ); 2881 2882 /* Visit each old VTS. For each one: 2883 2884 * make a pruned version 2885 2886 * search new_set for the pruned version, yielding either 2887 Nothing (not present) or the new VtsID for it. 2888 2889 * if not present, allocate a new VtsID for it, insert (pruned 2890 VTS, new VtsID) in the tree, and set 2891 remap_table[old VtsID] = new VtsID. 2892 2893 * if present, set remap_table[old VtsID] = new VtsID, where 2894 new VtsID was determined by the tree lookup. Then free up 2895 the clone. 2896 */ 2897 2898 UWord nBeforePruning = 0, nAfterPruning = 0; 2899 UWord nSTSsBefore = 0, nSTSsAfter = 0; 2900 VtsID new_VtsID_ctr = 0; 2901 2902 for (i = 0; i < nTab; i++) { 2903 2904 /* For each old VTS .. */ 2905 VtsTE* old_te = VG_(indexXA)( vts_tab, i ); 2906 VTS* old_vts = old_te->vts; 2907 tl_assert(old_te->remap == VtsID_INVALID); 2908 2909 /* Skip it if not in use */ 2910 if (old_te->rc == 0) { 2911 tl_assert(old_vts == NULL); 2912 continue; 2913 } 2914 tl_assert(old_vts != NULL); 2915 tl_assert(old_vts->id == i); 2916 tl_assert(old_vts->ts != NULL); 2917 2918 /* It is in use. Make a pruned version. */ 2919 nBeforePruning++; 2920 nSTSsBefore += old_vts->usedTS; 2921 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts", 2922 old_vts, verydead_thread_table); 2923 tl_assert(new_vts->sizeTS == new_vts->usedTS); 2924 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS]) 2925 == 0x0ddC0ffeeBadF00dULL); 2926 2927 /* Get rid of the old VTS and the tree entry. It's a bit more 2928 complex to incrementally delete the VTSs now than to nuke 2929 them all after we're done, but the upside is that we don't 2930 wind up temporarily storing potentially two complete copies 2931 of each VTS and hence spiking memory use. */ 2932 UWord oldK = 0, oldV = 12345; 2933 Bool present = VG_(delFromFM)( vts_set, 2934 &oldK, &oldV, (UWord)old_vts ); 2935 tl_assert(present); /* else it isn't in vts_set ?! */ 2936 tl_assert(oldV == 0); /* no info stored in vts_set val fields */ 2937 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */ 2938 /* now free the VTS itself */ 2939 VTS__delete(old_vts); 2940 old_te->vts = NULL; 2941 old_vts = NULL; 2942 2943 /* NO MENTIONS of old_vts allowed beyond this point. */ 2944 2945 /* Ok, we have the pruned copy in new_vts. See if a 2946 structurally identical version is already present in new_set. 2947 If so, delete the one we just made and move on; if not, add 2948 it. */ 2949 VTS* identical_version = NULL; 2950 UWord valW = 12345; 2951 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW, 2952 (UWord)new_vts)) { 2953 // already have it 2954 tl_assert(valW == 0); 2955 tl_assert(identical_version != NULL); 2956 tl_assert(identical_version != new_vts); 2957 VTS__delete(new_vts); 2958 new_vts = identical_version; 2959 tl_assert(new_vts->id != VtsID_INVALID); 2960 } else { 2961 tl_assert(valW == 12345); 2962 tl_assert(identical_version == NULL); 2963 new_vts->id = new_VtsID_ctr++; 2964 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0); 2965 tl_assert(!b); 2966 VtsTE new_te; 2967 new_te.vts = new_vts; 2968 new_te.rc = 0; 2969 new_te.freelink = VtsID_INVALID; 2970 new_te.remap = VtsID_INVALID; 2971 Word j = VG_(addToXA)( new_tab, &new_te ); 2972 tl_assert(j <= i); 2973 tl_assert(j == new_VtsID_ctr - 1); 2974 // stats 2975 nAfterPruning++; 2976 nSTSsAfter += new_vts->usedTS; 2977 } 2978 old_te->remap = new_vts->id; 2979 2980 } /* for (i = 0; i < nTab; i++) */ 2981 2982 /* At this point, we have: 2983 * the old VTS table, with its .remap entries set, 2984 and with all .vts == NULL. 2985 * the old VTS tree should be empty, since it and the old VTSs 2986 it contained have been incrementally deleted was we worked 2987 through the old table. 2988 * the new VTS table, with all .rc == 0, all .freelink and .remap 2989 == VtsID_INVALID. 2990 * the new VTS tree. 2991 */ 2992 tl_assert( VG_(sizeFM)(vts_set) == 0 ); 2993 2994 /* Now actually apply the mapping. */ 2995 /* Visit all the VtsIDs in the entire system. Where do we expect 2996 to find them? 2997 (a) in shadow memory -- the LineZs and LineFs 2998 (b) in our collection of struct _Thrs. 2999 (c) in our collection of struct _SOs. 3000 Nowhere else, AFAICS. Not in the zsm cache, because that just 3001 got invalidated. 3002 3003 Using the .remap fields in vts_tab, map each old VtsID to a new 3004 VtsID. For each old VtsID, dec its rc; and for each new one, 3005 inc it. This sets up the new refcounts, and it also gives a 3006 cheap sanity check of the old ones: all old refcounts should be 3007 zero after this operation. 3008 */ 3009 3010 /* Do the mappings for (a) above: iterate over the Primary shadow 3011 mem map (WordFM Addr SecMap*). */ 3012 UWord secmapW = 0; 3013 VG_(initIterFM)( map_shmem ); 3014 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) { 3015 UWord j; 3016 SecMap* sm = (SecMap*)secmapW; 3017 tl_assert(sm->magic == SecMap_MAGIC); 3018 /* Deal with the LineZs */ 3019 for (i = 0; i < N_SECMAP_ZLINES; i++) { 3020 LineZ* lineZ = &sm->linesZ[i]; 3021 if (lineZ->dict[0] == SVal_INVALID) 3022 continue; /* not in use -- data is in F rep instead */ 3023 for (j = 0; j < 4; j++) 3024 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]); 3025 } 3026 /* Deal with the LineFs */ 3027 for (i = 0; i < sm->linesF_size; i++) { 3028 LineF* lineF = &sm->linesF[i]; 3029 if (!lineF->inUse) 3030 continue; 3031 for (j = 0; j < N_LINE_ARANGE; j++) 3032 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]); 3033 } 3034 } 3035 VG_(doneIterFM)( map_shmem ); 3036 3037 /* Do the mappings for (b) above: visit our collection of struct 3038 _Thrs. */ 3039 Thread* hgthread = get_admin_threads(); 3040 tl_assert(hgthread); 3041 while (hgthread) { 3042 Thr* hbthr = hgthread->hbthr; 3043 tl_assert(hbthr); 3044 /* Threads that are listed in the prunable set have their viR 3045 and viW set to VtsID_INVALID, so we can't mess with them. */ 3046 if (hbthr->llexit_done && hbthr->joinedwith_done) { 3047 tl_assert(hbthr->viR == VtsID_INVALID); 3048 tl_assert(hbthr->viW == VtsID_INVALID); 3049 hgthread = hgthread->admin; 3050 continue; 3051 } 3052 remap_VtsID( vts_tab, new_tab, &hbthr->viR ); 3053 remap_VtsID( vts_tab, new_tab, &hbthr->viW ); 3054 hgthread = hgthread->admin; 3055 } 3056 3057 /* Do the mappings for (c) above: visit the struct _SOs. */ 3058 SO* so = admin_SO; 3059 while (so) { 3060 if (so->viR != VtsID_INVALID) 3061 remap_VtsID( vts_tab, new_tab, &so->viR ); 3062 if (so->viW != VtsID_INVALID) 3063 remap_VtsID( vts_tab, new_tab, &so->viW ); 3064 so = so->admin_next; 3065 } 3066 3067 /* So, we're nearly done (with this incredibly complex operation). 3068 Check the refcounts for the old VtsIDs all fell to zero, as 3069 expected. Any failure is serious. */ 3070 for (i = 0; i < nTab; i++) { 3071 VtsTE* te = VG_(indexXA)( vts_tab, i ); 3072 tl_assert(te->vts == NULL); 3073 /* This is the assert proper. Note we're also asserting 3074 zeroness for old entries which are unmapped (hence have 3075 .remap == VtsID_INVALID). That's OK. */ 3076 tl_assert(te->rc == 0); 3077 } 3078 3079 /* Install the new table and set. */ 3080 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/); 3081 vts_set = new_set; 3082 VG_(deleteXA)( vts_tab ); 3083 vts_tab = new_tab; 3084 3085 /* The freelist of vts_tab entries is empty now, because we've 3086 compacted all of the live entries at the low end of the 3087 table. */ 3088 vts_tab_freelist = VtsID_INVALID; 3089 3090 /* Sanity check vts_set and vts_tab. */ 3091 3092 /* Because all the live entries got slid down to the bottom of vts_tab: */ 3093 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set )); 3094 3095 /* Assert that the vts_tab and vts_set entries point at each other 3096 in the required way */ 3097 UWord wordK = 0, wordV = 0; 3098 VG_(initIterFM)( vts_set ); 3099 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) { 3100 tl_assert(wordK != 0); 3101 tl_assert(wordV == 0); 3102 VTS* vts = (VTS*)wordK; 3103 tl_assert(vts->id != VtsID_INVALID); 3104 VtsTE* te = VG_(indexXA)( vts_tab, vts->id ); 3105 tl_assert(te->vts == vts); 3106 } 3107 VG_(doneIterFM)( vts_set ); 3108 3109 /* Also iterate over the table, and check each entry is 3110 plausible. */ 3111 nTab = VG_(sizeXA)( vts_tab ); 3112 for (i = 0; i < nTab; i++) { 3113 VtsTE* te = VG_(indexXA)( vts_tab, i ); 3114 tl_assert(te->vts); 3115 tl_assert(te->vts->id == i); 3116 tl_assert(te->rc > 0); /* 'cos we just GC'd */ 3117 tl_assert(te->freelink == VtsID_INVALID); /* in use */ 3118 tl_assert(te->remap == VtsID_INVALID); /* not relevant */ 3119 } 3120 3121 /* And we're done. Bwahahaha. Ha. Ha. Ha. */ 3122 if (VG_(clo_stats)) { 3123 static UInt ctr = 1; 3124 tl_assert(nTab > 0); 3125 VG_(message)( 3126 Vg_DebugMsg, 3127 "libhb: VTS PR: #%u before %lu (avg sz %lu) " 3128 "after %lu (avg sz %lu)\n", 3129 ctr++, 3130 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1), 3131 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1) 3132 ); 3133 } 3134 if (0) 3135 VG_(printf)("VTQ: before pruning %lu (avg sz %lu), " 3136 "after pruning %lu (avg sz %lu)\n", 3137 nBeforePruning, nSTSsBefore / nBeforePruning, 3138 nAfterPruning, nSTSsAfter / nAfterPruning); 3139 /* ---------- END VTS PRUNING ---------- */ 3140 } 3141 3142 3143 ///////////////////////////////////////////////////////// 3144 // // 3145 // Vts IDs // 3146 // // 3147 ///////////////////////////////////////////////////////// 3148 3149 ////////////////////////// 3150 /* A temporary, max-sized VTS which is used as a temporary (the first 3151 argument) in VTS__singleton, VTS__tick and VTS__join operations. */ 3152 static VTS* temp_max_sized_VTS = NULL; 3153 3154 ////////////////////////// 3155 static ULong stats__cmpLEQ_queries = 0; 3156 static ULong stats__cmpLEQ_misses = 0; 3157 static ULong stats__join2_queries = 0; 3158 static ULong stats__join2_misses = 0; 3159 3160 static inline UInt ROL32 ( UInt w, Int n ) { 3161 w = (w << n) | (w >> (32-n)); 3162 return w; 3163 } 3164 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) { 3165 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13); 3166 return hash % nTab; 3167 } 3168 3169 #define N_CMPLEQ_CACHE 1023 3170 static 3171 struct { VtsID vi1; VtsID vi2; Bool leq; } 3172 cmpLEQ_cache[N_CMPLEQ_CACHE]; 3173 3174 #define N_JOIN2_CACHE 1023 3175 static 3176 struct { VtsID vi1; VtsID vi2; VtsID res; } 3177 join2_cache[N_JOIN2_CACHE]; 3178 3179 static void VtsID__invalidate_caches ( void ) { 3180 Int i; 3181 for (i = 0; i < N_CMPLEQ_CACHE; i++) { 3182 cmpLEQ_cache[i].vi1 = VtsID_INVALID; 3183 cmpLEQ_cache[i].vi2 = VtsID_INVALID; 3184 cmpLEQ_cache[i].leq = False; 3185 } 3186 for (i = 0; i < N_JOIN2_CACHE; i++) { 3187 join2_cache[i].vi1 = VtsID_INVALID; 3188 join2_cache[i].vi2 = VtsID_INVALID; 3189 join2_cache[i].res = VtsID_INVALID; 3190 } 3191 } 3192 ////////////////////////// 3193 3194 //static Bool VtsID__is_valid ( VtsID vi ) { 3195 // VtsTE* ve; 3196 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab )) 3197 // return False; 3198 // ve = VG_(indexXA)( vts_tab, vi ); 3199 // if (!ve->vts) 3200 // return False; 3201 // tl_assert(ve->vts->id == vi); 3202 // return True; 3203 //} 3204 3205 static VTS* VtsID__to_VTS ( VtsID vi ) { 3206 VtsTE* te = VG_(indexXA)( vts_tab, vi ); 3207 tl_assert(te->vts); 3208 return te->vts; 3209 } 3210 3211 static void VtsID__pp ( VtsID vi ) { 3212 HChar buf[100]; 3213 VTS* vts = VtsID__to_VTS(vi); 3214 VTS__show( buf, sizeof(buf)-1, vts ); 3215 buf[sizeof(buf)-1] = 0; 3216 VG_(printf)("%s", buf); 3217 } 3218 3219 /* compute partial ordering relation of vi1 and vi2. */ 3220 __attribute__((noinline)) 3221 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) { 3222 UInt hash; 3223 Bool leq; 3224 VTS *v1, *v2; 3225 //if (vi1 == vi2) return True; 3226 tl_assert(vi1 != vi2); 3227 ////++ 3228 stats__cmpLEQ_queries++; 3229 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE); 3230 if (cmpLEQ_cache[hash].vi1 == vi1 3231 && cmpLEQ_cache[hash].vi2 == vi2) 3232 return cmpLEQ_cache[hash].leq; 3233 stats__cmpLEQ_misses++; 3234 ////-- 3235 v1 = VtsID__to_VTS(vi1); 3236 v2 = VtsID__to_VTS(vi2); 3237 leq = VTS__cmpLEQ( v1, v2 ) == 0; 3238 ////++ 3239 cmpLEQ_cache[hash].vi1 = vi1; 3240 cmpLEQ_cache[hash].vi2 = vi2; 3241 cmpLEQ_cache[hash].leq = leq; 3242 ////-- 3243 return leq; 3244 } 3245 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) { 3246 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2); 3247 } 3248 3249 /* compute binary join */ 3250 __attribute__((noinline)) 3251 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) { 3252 UInt hash; 3253 VtsID res; 3254 VTS *vts1, *vts2; 3255 //if (vi1 == vi2) return vi1; 3256 tl_assert(vi1 != vi2); 3257 ////++ 3258 stats__join2_queries++; 3259 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE); 3260 if (join2_cache[hash].vi1 == vi1 3261 && join2_cache[hash].vi2 == vi2) 3262 return join2_cache[hash].res; 3263 stats__join2_misses++; 3264 ////-- 3265 vts1 = VtsID__to_VTS(vi1); 3266 vts2 = VtsID__to_VTS(vi2); 3267 temp_max_sized_VTS->usedTS = 0; 3268 VTS__join(temp_max_sized_VTS, vts1,vts2); 3269 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3270 ////++ 3271 join2_cache[hash].vi1 = vi1; 3272 join2_cache[hash].vi2 = vi2; 3273 join2_cache[hash].res = res; 3274 ////-- 3275 return res; 3276 } 3277 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) { 3278 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2); 3279 } 3280 3281 /* create a singleton VTS, namely [thr:1] */ 3282 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) { 3283 temp_max_sized_VTS->usedTS = 0; 3284 VTS__singleton(temp_max_sized_VTS, thr,tym); 3285 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3286 } 3287 3288 /* tick operation, creates value 1 if specified index is absent */ 3289 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) { 3290 VTS* vts = VtsID__to_VTS(vi); 3291 temp_max_sized_VTS->usedTS = 0; 3292 VTS__tick(temp_max_sized_VTS, idx,vts); 3293 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3294 } 3295 3296 /* index into a VTS (only for assertions) */ 3297 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) { 3298 VTS* vts = VtsID__to_VTS(vi); 3299 return VTS__indexAt_SLOW( vts, idx ); 3300 } 3301 3302 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or 3303 any, really) element in vi1 which is pointwise greater-than the 3304 corresponding element in vi2. If no such element exists, return 3305 NULL. This needs to be fairly quick since it is called every time 3306 a race is detected. */ 3307 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 ) 3308 { 3309 VTS *vts1, *vts2; 3310 Thr* diffthr; 3311 ThrID diffthrid; 3312 tl_assert(vi1 != vi2); 3313 vts1 = VtsID__to_VTS(vi1); 3314 vts2 = VtsID__to_VTS(vi2); 3315 tl_assert(vts1 != vts2); 3316 diffthrid = VTS__cmpLEQ(vts1, vts2); 3317 diffthr = Thr__from_ThrID(diffthrid); 3318 tl_assert(diffthr); /* else they are LEQ ! */ 3319 return diffthr; 3320 } 3321 3322 3323 ///////////////////////////////////////////////////////// 3324 // // 3325 // Filters // 3326 // // 3327 ///////////////////////////////////////////////////////// 3328 3329 /* Forget everything we know -- clear the filter and let everything 3330 through. This needs to be as fast as possible, since it is called 3331 every time the running thread changes, and every time a thread's 3332 vector clocks change, which can be quite frequent. The obvious 3333 fast way to do this is simply to stuff in tags which we know are 3334 not going to match anything, since they're not aligned to the start 3335 of a line. */ 3336 static void Filter__clear ( Filter* fi, HChar* who ) 3337 { 3338 UWord i; 3339 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who); 3340 for (i = 0; i < FI_NUM_LINES; i += 8) { 3341 fi->tags[i+0] = 1; /* impossible value -- cannot match */ 3342 fi->tags[i+1] = 1; 3343 fi->tags[i+2] = 1; 3344 fi->tags[i+3] = 1; 3345 fi->tags[i+4] = 1; 3346 fi->tags[i+5] = 1; 3347 fi->tags[i+6] = 1; 3348 fi->tags[i+7] = 1; 3349 } 3350 tl_assert(i == FI_NUM_LINES); 3351 } 3352 3353 /* Clearing an arbitrary range in the filter. Unfortunately 3354 we have to do this due to core-supplied new/die-mem events. */ 3355 3356 static void Filter__clear_1byte ( Filter* fi, Addr a ) 3357 { 3358 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3359 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3360 FiLine* line = &fi->lines[lineno]; 3361 UWord loff = (a - atag) / 8; 3362 UShort mask = 0x3 << (2 * (a & 7)); 3363 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */ 3364 if (LIKELY( fi->tags[lineno] == atag )) { 3365 /* hit. clear the bits. */ 3366 UShort u16 = line->u16s[loff]; 3367 line->u16s[loff] = u16 & ~mask; /* clear them */ 3368 } else { 3369 /* miss. The filter doesn't hold this address, so ignore. */ 3370 } 3371 } 3372 3373 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a ) 3374 { 3375 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3376 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3377 FiLine* line = &fi->lines[lineno]; 3378 UWord loff = (a - atag) / 8; 3379 if (LIKELY( fi->tags[lineno] == atag )) { 3380 line->u16s[loff] = 0; 3381 } else { 3382 /* miss. The filter doesn't hold this address, so ignore. */ 3383 } 3384 } 3385 3386 static void Filter__clear_range ( Filter* fi, Addr a, UWord len ) 3387 { 3388 //VG_(printf)("%lu ", len); 3389 /* slowly do part preceding 8-alignment */ 3390 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) { 3391 Filter__clear_1byte( fi, a ); 3392 a++; 3393 len--; 3394 } 3395 /* vector loop */ 3396 while (len >= 8) { 3397 Filter__clear_8bytes_aligned( fi, a ); 3398 a += 8; 3399 len -= 8; 3400 } 3401 /* slowly do tail */ 3402 while (UNLIKELY(len > 0)) { 3403 Filter__clear_1byte( fi, a ); 3404 a++; 3405 len--; 3406 } 3407 } 3408 3409 3410 /* ------ Read handlers for the filter. ------ */ 3411 3412 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a ) 3413 { 3414 if (UNLIKELY( !VG_IS_8_ALIGNED(a) )) 3415 return False; 3416 { 3417 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3418 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3419 FiLine* line = &fi->lines[lineno]; 3420 UWord loff = (a - atag) / 8; 3421 UShort mask = 0xAAAA; 3422 if (LIKELY( fi->tags[lineno] == atag )) { 3423 /* hit. check line and update. */ 3424 UShort u16 = line->u16s[loff]; 3425 Bool ok = (u16 & mask) == mask; /* all R bits set? */ 3426 line->u16s[loff] = u16 | mask; /* set them */ 3427 return ok; 3428 } else { 3429 /* miss. nuke existing line and re-use it. */ 3430 UWord i; 3431 fi->tags[lineno] = atag; 3432 for (i = 0; i < FI_LINE_SZB / 8; i++) 3433 line->u16s[i] = 0; 3434 line->u16s[loff] = mask; 3435 return False; 3436 } 3437 } 3438 } 3439 3440 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a ) 3441 { 3442 if (UNLIKELY( !VG_IS_4_ALIGNED(a) )) 3443 return False; 3444 { 3445 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3446 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3447 FiLine* line = &fi->lines[lineno]; 3448 UWord loff = (a - atag) / 8; 3449 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */ 3450 if (LIKELY( fi->tags[lineno] == atag )) { 3451 /* hit. check line and update. */ 3452 UShort u16 = line->u16s[loff]; 3453 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */ 3454 line->u16s[loff] = u16 | mask; /* set them */ 3455 return ok; 3456 } else { 3457 /* miss. nuke existing line and re-use it. */ 3458 UWord i; 3459 fi->tags[lineno] = atag; 3460 for (i = 0; i < FI_LINE_SZB / 8; i++) 3461 line->u16s[i] = 0; 3462 line->u16s[loff] = mask; 3463 return False; 3464 } 3465 } 3466 } 3467 3468 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a ) 3469 { 3470 if (UNLIKELY( !VG_IS_2_ALIGNED(a) )) 3471 return False; 3472 { 3473 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3474 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3475 FiLine* line = &fi->lines[lineno]; 3476 UWord loff = (a - atag) / 8; 3477 UShort mask = 0xA << (2 * (a & 6)); 3478 /* mask is A000, 0A00, 00A0 or 000A */ 3479 if (LIKELY( fi->tags[lineno] == atag )) { 3480 /* hit. check line and update. */ 3481 UShort u16 = line->u16s[loff]; 3482 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */ 3483 line->u16s[loff] = u16 | mask; /* set them */ 3484 return ok; 3485 } else { 3486 /* miss. nuke existing line and re-use it. */ 3487 UWord i; 3488 fi->tags[lineno] = atag; 3489 for (i = 0; i < FI_LINE_SZB / 8; i++) 3490 line->u16s[i] = 0; 3491 line->u16s[loff] = mask; 3492 return False; 3493 } 3494 } 3495 } 3496 3497 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a ) 3498 { 3499 { 3500 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3501 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3502 FiLine* line = &fi->lines[lineno]; 3503 UWord loff = (a - atag) / 8; 3504 UShort mask = 0x2 << (2 * (a & 7)); 3505 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */ 3506 if (LIKELY( fi->tags[lineno] == atag )) { 3507 /* hit. check line and update. */ 3508 UShort u16 = line->u16s[loff]; 3509 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */ 3510 line->u16s[loff] = u16 | mask; /* set them */ 3511 return ok; 3512 } else { 3513 /* miss. nuke existing line and re-use it. */ 3514 UWord i; 3515 fi->tags[lineno] = atag; 3516 for (i = 0; i < FI_LINE_SZB / 8; i++) 3517 line->u16s[i] = 0; 3518 line->u16s[loff] = mask; 3519 return False; 3520 } 3521 } 3522 } 3523 3524 3525 /* ------ Write handlers for the filter. ------ */ 3526 3527 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a ) 3528 { 3529 if (UNLIKELY( !VG_IS_8_ALIGNED(a) )) 3530 return False; 3531 { 3532 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3533 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3534 FiLine* line = &fi->lines[lineno]; 3535 UWord loff = (a - atag) / 8; 3536 UShort mask = 0xFFFF; 3537 if (LIKELY( fi->tags[lineno] == atag )) { 3538 /* hit. check line and update. */ 3539 UShort u16 = line->u16s[loff]; 3540 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */ 3541 line->u16s[loff] = u16 | mask; /* set them */ 3542 return ok; 3543 } else { 3544 /* miss. nuke existing line and re-use it. */ 3545 UWord i; 3546 fi->tags[lineno] = atag; 3547 for (i = 0; i < FI_LINE_SZB / 8; i++) 3548 line->u16s[i] = 0; 3549 line->u16s[loff] = mask; 3550 return False; 3551 } 3552 } 3553 } 3554 3555 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a ) 3556 { 3557 if (UNLIKELY( !VG_IS_4_ALIGNED(a) )) 3558 return False; 3559 { 3560 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3561 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3562 FiLine* line = &fi->lines[lineno]; 3563 UWord loff = (a - atag) / 8; 3564 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */ 3565 if (LIKELY( fi->tags[lineno] == atag )) { 3566 /* hit. check line and update. */ 3567 UShort u16 = line->u16s[loff]; 3568 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */ 3569 line->u16s[loff] = u16 | mask; /* set them */ 3570 return ok; 3571 } else { 3572 /* miss. nuke existing line and re-use it. */ 3573 UWord i; 3574 fi->tags[lineno] = atag; 3575 for (i = 0; i < FI_LINE_SZB / 8; i++) 3576 line->u16s[i] = 0; 3577 line->u16s[loff] = mask; 3578 return False; 3579 } 3580 } 3581 } 3582 3583 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a ) 3584 { 3585 if (UNLIKELY( !VG_IS_2_ALIGNED(a) )) 3586 return False; 3587 { 3588 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3589 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3590 FiLine* line = &fi->lines[lineno]; 3591 UWord loff = (a - atag) / 8; 3592 UShort mask = 0xF << (2 * (a & 6)); 3593 /* mask is F000, 0F00, 00F0 or 000F */ 3594 if (LIKELY( fi->tags[lineno] == atag )) { 3595 /* hit. check line and update. */ 3596 UShort u16 = line->u16s[loff]; 3597 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */ 3598 line->u16s[loff] = u16 | mask; /* set them */ 3599 return ok; 3600 } else { 3601 /* miss. nuke existing line and re-use it. */ 3602 UWord i; 3603 fi->tags[lineno] = atag; 3604 for (i = 0; i < FI_LINE_SZB / 8; i++) 3605 line->u16s[i] = 0; 3606 line->u16s[loff] = mask; 3607 return False; 3608 } 3609 } 3610 } 3611 3612 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a ) 3613 { 3614 { 3615 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3616 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3617 FiLine* line = &fi->lines[lineno]; 3618 UWord loff = (a - atag) / 8; 3619 UShort mask = 0x3 << (2 * (a & 7)); 3620 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */ 3621 if (LIKELY( fi->tags[lineno] == atag )) { 3622 /* hit. check line and update. */ 3623 UShort u16 = line->u16s[loff]; 3624 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */ 3625 line->u16s[loff] = u16 | mask; /* set them */ 3626 return ok; 3627 } else { 3628 /* miss. nuke existing line and re-use it. */ 3629 UWord i; 3630 fi->tags[lineno] = atag; 3631 for (i = 0; i < FI_LINE_SZB / 8; i++) 3632 line->u16s[i] = 0; 3633 line->u16s[loff] = mask; 3634 return False; 3635 } 3636 } 3637 } 3638 3639 3640 ///////////////////////////////////////////////////////// 3641 // // 3642 // Threads // 3643 // // 3644 ///////////////////////////////////////////////////////// 3645 3646 /* Maps ThrID values to their Thr*s (which contain ThrID values that 3647 should point back to the relevant slot in the array. Lowest 3648 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */ 3649 static XArray* /* of Thr* */ thrid_to_thr_map = NULL; 3650 3651 /* And a counter to dole out ThrID values. For rationale/background, 3652 see comments on definition of ScalarTS (far) above. */ 3653 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */ 3654 3655 static ThrID Thr__to_ThrID ( Thr* thr ) { 3656 return thr->thrid; 3657 } 3658 static Thr* Thr__from_ThrID ( UInt thrid ) { 3659 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 ); 3660 tl_assert(thr->thrid == thrid); 3661 return thr; 3662 } 3663 3664 static Thr* Thr__new ( void ) 3665 { 3666 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) ); 3667 thr->viR = VtsID_INVALID; 3668 thr->viW = VtsID_INVALID; 3669 thr->llexit_done = False; 3670 thr->joinedwith_done = False; 3671 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) ); 3672 /* We only really need this at history level 1, but unfortunately 3673 this routine is called before the command line processing is 3674 done (sigh), so we can't rely on HG_(clo_history_level) at this 3675 point. Hence always allocate it. Bah. */ 3676 thr->local_Kws_n_stacks 3677 = VG_(newXA)( HG_(zalloc), 3678 "libhb.Thr__new.3 (local_Kws_and_stacks)", 3679 HG_(free), sizeof(ULong_n_EC) ); 3680 3681 /* Add this Thr* <-> ThrID binding to the mapping, and 3682 cross-check */ 3683 if (!thrid_to_thr_map) { 3684 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4", 3685 HG_(free), sizeof(Thr*) ); 3686 tl_assert(thrid_to_thr_map); 3687 } 3688 3689 if (thrid_counter >= ThrID_MAX_VALID) { 3690 /* We're hosed. We have to stop. */ 3691 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 3692 } 3693 3694 thr->thrid = thrid_counter++; 3695 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr ); 3696 tl_assert(ix + 1024 == thr->thrid); 3697 3698 return thr; 3699 } 3700 3701 static void note_local_Kw_n_stack_for ( Thr* thr ) 3702 { 3703 Word nPresent; 3704 ULong_n_EC pair; 3705 tl_assert(thr); 3706 3707 // We only collect this info at history level 1 (approx) 3708 if (HG_(clo_history_level) != 1) 3709 return; 3710 3711 /* This is the scalar Kw for thr. */ 3712 pair.ull = VtsID__indexAt( thr->viW, thr ); 3713 pair.ec = main_get_EC( thr ); 3714 tl_assert(pair.ec); 3715 tl_assert(thr->local_Kws_n_stacks); 3716 3717 /* check that we're not adding duplicates */ 3718 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks ); 3719 3720 /* Throw away old stacks, if necessary. We can't accumulate stuff 3721 indefinitely. */ 3722 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) { 3723 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 ); 3724 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks ); 3725 if (0) 3726 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n", 3727 thr, pair.ull, pair.ec ); 3728 } 3729 3730 if (nPresent > 0) { 3731 ULong_n_EC* prevPair 3732 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 ); 3733 tl_assert( prevPair->ull <= pair.ull ); 3734 } 3735 3736 if (nPresent == 0) 3737 pair.ec = NULL; 3738 3739 VG_(addToXA)( thr->local_Kws_n_stacks, &pair ); 3740 3741 if (0) 3742 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n", 3743 thr, pair.ull, pair.ec ); 3744 if (0) 3745 VG_(pp_ExeContext)(pair.ec); 3746 } 3747 3748 static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 ) 3749 { 3750 if (pair1->ull < pair2->ull) return -1; 3751 if (pair1->ull > pair2->ull) return 1; 3752 return 0; 3753 } 3754 3755 3756 ///////////////////////////////////////////////////////// 3757 // // 3758 // Shadow Values // 3759 // // 3760 ///////////////////////////////////////////////////////// 3761 3762 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by 3763 // hb_zsm.h. We have to do everything else here. 3764 3765 /* SVal is 64 bit unsigned int. 3766 3767 <---------30---------> <---------30---------> 3768 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin) 3769 10 X--------------------X XX X--------------------X A: SVal_NOACCESS 3770 11 0--------------------0 00 0--------------------0 A: SVal_INVALID 3771 3772 */ 3773 #define SVAL_TAGMASK (3ULL << 62) 3774 3775 static inline Bool SVal__isC ( SVal s ) { 3776 return (0ULL << 62) == (s & SVAL_TAGMASK); 3777 } 3778 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) { 3779 //tl_assert(VtsID__is_valid(rmini)); 3780 //tl_assert(VtsID__is_valid(wmini)); 3781 return (((ULong)rmini) << 32) | ((ULong)wmini); 3782 } 3783 static inline VtsID SVal__unC_Rmin ( SVal s ) { 3784 tl_assert(SVal__isC(s)); 3785 return (VtsID)(s >> 32); 3786 } 3787 static inline VtsID SVal__unC_Wmin ( SVal s ) { 3788 tl_assert(SVal__isC(s)); 3789 return (VtsID)(s & 0xFFFFFFFFULL); 3790 } 3791 3792 static inline Bool SVal__isA ( SVal s ) { 3793 return (2ULL << 62) == (s & SVAL_TAGMASK); 3794 } 3795 static inline SVal SVal__mkA ( void ) { 3796 return 2ULL << 62; 3797 } 3798 3799 /* Direct callback from lib_zsm. */ 3800 static void SVal__rcinc ( SVal s ) { 3801 if (SVal__isC(s)) { 3802 VtsID__rcinc( SVal__unC_Rmin(s) ); 3803 VtsID__rcinc( SVal__unC_Wmin(s) ); 3804 } 3805 } 3806 3807 /* Direct callback from lib_zsm. */ 3808 static void SVal__rcdec ( SVal s ) { 3809 if (SVal__isC(s)) { 3810 VtsID__rcdec( SVal__unC_Rmin(s) ); 3811 VtsID__rcdec( SVal__unC_Wmin(s) ); 3812 } 3813 } 3814 3815 3816 ///////////////////////////////////////////////////////// 3817 // // 3818 // Change-event map2 // 3819 // // 3820 ///////////////////////////////////////////////////////// 3821 3822 #define EVENT_MAP_GC_DISCARD_FRACTION 0.5 3823 3824 /* This is in two parts: 3825 3826 1. A hash table of RCECs. This is a set of reference-counted stack 3827 traces. When the reference count of a stack trace becomes zero, 3828 it is removed from the set and freed up. The intent is to have 3829 a set of stack traces which can be referred to from (2), but to 3830 only represent each one once. The set is indexed/searched by 3831 ordering on the stack trace vectors. 3832 3833 2. A SparseWA of OldRefs. These store information about each old 3834 ref that we need to record. It is indexed by address of the 3835 location for which the information is recorded. For LRU 3836 purposes, each OldRef also contains a generation number, 3837 indicating when it was most recently accessed. 3838 3839 The important part of an OldRef is, however, its accs[] array. 3840 This is an array of N_OLDREF_ACCS which binds (thread, R/W, 3841 size) triples to RCECs. This allows us to collect the last 3842 access-traceback by up to N_OLDREF_ACCS different triples for 3843 this location. The accs[] array is a MTF-array. If a binding 3844 falls off the end, that's too bad -- we will lose info about 3845 that triple's access to this location. 3846 3847 When the SparseWA becomes too big, we can throw away the OldRefs 3848 whose generation numbers are below some threshold; hence doing 3849 approximate LRU discarding. For each discarded OldRef we must 3850 of course decrement the reference count on the all RCECs it 3851 refers to, in order that entries from (1) eventually get 3852 discarded too. 3853 3854 A major improvement in reliability of this mechanism would be to 3855 have a dynamically sized OldRef.accs[] array, so no entries ever 3856 fall off the end. In investigations (Dec 08) it appears that a 3857 major cause for the non-availability of conflicting-access traces 3858 in race reports is caused by the fixed size of this array. I 3859 suspect for most OldRefs, only a few entries are used, but for a 3860 minority of cases there is an overflow, leading to info lossage. 3861 Investigations also suggest this is very workload and scheduling 3862 sensitive. Therefore a dynamic sizing would be better. 3863 3864 However, dynamic sizing would defeat the use of a PoolAllocator 3865 for OldRef structures. And that's important for performance. So 3866 it's not straightforward to do. 3867 */ 3868 3869 3870 static UWord stats__ctxt_rcdec1 = 0; 3871 static UWord stats__ctxt_rcdec2 = 0; 3872 static UWord stats__ctxt_rcdec3 = 0; 3873 static UWord stats__ctxt_rcdec_calls = 0; 3874 static UWord stats__ctxt_rcdec_discards = 0; 3875 static UWord stats__ctxt_rcdec1_eq = 0; 3876 3877 static UWord stats__ctxt_tab_curr = 0; 3878 static UWord stats__ctxt_tab_max = 0; 3879 3880 static UWord stats__ctxt_tab_qs = 0; 3881 static UWord stats__ctxt_tab_cmps = 0; 3882 3883 3884 /////////////////////////////////////////////////////// 3885 //// Part (1): A hash table of RCECs 3886 /// 3887 3888 #define N_FRAMES 8 3889 3890 // (UInt) `echo "Reference Counted Execution Context" | md5sum` 3891 #define RCEC_MAGIC 0xab88abb2UL 3892 3893 //#define N_RCEC_TAB 98317 /* prime */ 3894 #define N_RCEC_TAB 196613 /* prime */ 3895 3896 typedef 3897 struct _RCEC { 3898 UWord magic; /* sanity check only */ 3899 struct _RCEC* next; 3900 UWord rc; 3901 UWord rcX; /* used for crosschecking */ 3902 UWord frames_hash; /* hash of all the frames */ 3903 UWord frames[N_FRAMES]; 3904 } 3905 RCEC; 3906 3907 static RCEC** contextTab = NULL; /* hash table of RCEC*s */ 3908 3909 3910 /* Gives an arbitrary total order on RCEC .frames fields */ 3911 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) { 3912 Word i; 3913 tl_assert(ec1 && ec1->magic == RCEC_MAGIC); 3914 tl_assert(ec2 && ec2->magic == RCEC_MAGIC); 3915 if (ec1->frames_hash < ec2->frames_hash) return -1; 3916 if (ec1->frames_hash > ec2->frames_hash) return 1; 3917 for (i = 0; i < N_FRAMES; i++) { 3918 if (ec1->frames[i] < ec2->frames[i]) return -1; 3919 if (ec1->frames[i] > ec2->frames[i]) return 1; 3920 } 3921 return 0; 3922 } 3923 3924 3925 /* Dec the ref of this RCEC. */ 3926 static void ctxt__rcdec ( RCEC* ec ) 3927 { 3928 stats__ctxt_rcdec_calls++; 3929 tl_assert(ec && ec->magic == RCEC_MAGIC); 3930 tl_assert(ec->rc > 0); 3931 ec->rc--; 3932 } 3933 3934 static void ctxt__rcinc ( RCEC* ec ) 3935 { 3936 tl_assert(ec && ec->magic == RCEC_MAGIC); 3937 ec->rc++; 3938 } 3939 3940 3941 //////////// BEGIN RCEC pool allocator 3942 static PoolAlloc* rcec_pool_allocator; 3943 3944 static RCEC* alloc_RCEC ( void ) { 3945 return VG_(allocEltPA) ( rcec_pool_allocator ); 3946 } 3947 3948 static void free_RCEC ( RCEC* rcec ) { 3949 tl_assert(rcec->magic == RCEC_MAGIC); 3950 VG_(freeEltPA)( rcec_pool_allocator, rcec ); 3951 } 3952 //////////// END RCEC pool allocator 3953 3954 3955 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and 3956 move it one step closer the the front of the list, so as to make 3957 subsequent searches for it cheaper. */ 3958 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec ) 3959 { 3960 RCEC *ec0, *ec1, *ec2; 3961 if (ec == *headp) 3962 tl_assert(0); /* already at head of list */ 3963 tl_assert(ec != NULL); 3964 ec0 = *headp; 3965 ec1 = NULL; 3966 ec2 = NULL; 3967 while (True) { 3968 if (ec0 == NULL || ec0 == ec) break; 3969 ec2 = ec1; 3970 ec1 = ec0; 3971 ec0 = ec0->next; 3972 } 3973 tl_assert(ec0 == ec); 3974 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) { 3975 RCEC* tmp; 3976 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's 3977 predecessor. Swap ec0 and ec1, that is, move ec0 one step 3978 closer to the start of the list. */ 3979 tl_assert(ec2->next == ec1); 3980 tl_assert(ec1->next == ec0); 3981 tmp = ec0->next; 3982 ec2->next = ec0; 3983 ec0->next = ec1; 3984 ec1->next = tmp; 3985 } 3986 else 3987 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) { 3988 /* it's second in the list. */ 3989 tl_assert(*headp == ec1); 3990 tl_assert(ec1->next == ec0); 3991 ec1->next = ec0->next; 3992 ec0->next = ec1; 3993 *headp = ec0; 3994 } 3995 } 3996 3997 3998 /* Find the given RCEC in the tree, and return a pointer to it. Or, 3999 if not present, add the given one to the tree (by making a copy of 4000 it, so the caller can immediately deallocate the original) and 4001 return a pointer to the copy. The caller can safely have 'example' 4002 on its stack, since we will always return a pointer to a copy of 4003 it, not to the original. Note that the inserted node will have .rc 4004 of zero and so the caller must immediatly increment it. */ 4005 __attribute__((noinline)) 4006 static RCEC* ctxt__find_or_add ( RCEC* example ) 4007 { 4008 UWord hent; 4009 RCEC* copy; 4010 tl_assert(example && example->magic == RCEC_MAGIC); 4011 tl_assert(example->rc == 0); 4012 4013 /* Search the hash table to see if we already have it. */ 4014 stats__ctxt_tab_qs++; 4015 hent = example->frames_hash % N_RCEC_TAB; 4016 copy = contextTab[hent]; 4017 while (1) { 4018 if (!copy) break; 4019 tl_assert(copy->magic == RCEC_MAGIC); 4020 stats__ctxt_tab_cmps++; 4021 if (0 == RCEC__cmp_by_frames(copy, example)) break; 4022 copy = copy->next; 4023 } 4024 4025 if (copy) { 4026 tl_assert(copy != example); 4027 /* optimisation: if it's not at the head of its list, move 1 4028 step fwds, to make future searches cheaper */ 4029 if (copy != contextTab[hent]) { 4030 move_RCEC_one_step_forward( &contextTab[hent], copy ); 4031 } 4032 } else { 4033 copy = alloc_RCEC(); 4034 tl_assert(copy != example); 4035 *copy = *example; 4036 copy->next = contextTab[hent]; 4037 contextTab[hent] = copy; 4038 stats__ctxt_tab_curr++; 4039 if (stats__ctxt_tab_curr > stats__ctxt_tab_max) 4040 stats__ctxt_tab_max = stats__ctxt_tab_curr; 4041 } 4042 return copy; 4043 } 4044 4045 static inline UWord ROLW ( UWord w, Int n ) 4046 { 4047 Int bpw = 8 * sizeof(UWord); 4048 w = (w << n) | (w >> (bpw-n)); 4049 return w; 4050 } 4051 4052 __attribute__((noinline)) 4053 static RCEC* get_RCEC ( Thr* thr ) 4054 { 4055 UWord hash, i; 4056 RCEC example; 4057 example.magic = RCEC_MAGIC; 4058 example.rc = 0; 4059 example.rcX = 0; 4060 main_get_stacktrace( thr, &example.frames[0], N_FRAMES ); 4061 hash = 0; 4062 for (i = 0; i < N_FRAMES; i++) { 4063 hash ^= example.frames[i]; 4064 hash = ROLW(hash, 19); 4065 } 4066 example.frames_hash = hash; 4067 return ctxt__find_or_add( &example ); 4068 } 4069 4070 /////////////////////////////////////////////////////// 4071 //// Part (2): 4072 /// A SparseWA guest-addr -> OldRef, that refers to (1) 4073 /// 4074 4075 // (UInt) `echo "Old Reference Information" | md5sum` 4076 #define OldRef_MAGIC 0x30b1f075UL 4077 4078 /* Records an access: a thread, a context (size & writeness) and the 4079 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 = 4080 2, 10 = 4, 11 = 8. 4081 */ 4082 typedef 4083 struct { 4084 RCEC* rcec; 4085 WordSetID locksHeldW; 4086 UInt thrid : SCALARTS_N_THRBITS; 4087 UInt szLg2B : 2; 4088 UInt isW : 1; 4089 } 4090 Thr_n_RCEC; 4091 4092 #define N_OLDREF_ACCS 5 4093 4094 typedef 4095 struct { 4096 UWord magic; /* sanity check only */ 4097 UWord gen; /* when most recently accessed */ 4098 /* or free list when not in use */ 4099 /* unused slots in this array have .thrid == 0, which is invalid */ 4100 Thr_n_RCEC accs[N_OLDREF_ACCS]; 4101 } 4102 OldRef; 4103 4104 4105 //////////// BEGIN OldRef pool allocator 4106 static PoolAlloc* oldref_pool_allocator; 4107 4108 static OldRef* alloc_OldRef ( void ) { 4109 return VG_(allocEltPA) ( oldref_pool_allocator ); 4110 } 4111 4112 static void free_OldRef ( OldRef* r ) { 4113 tl_assert(r->magic == OldRef_MAGIC); 4114 VG_(freeEltPA)( oldref_pool_allocator, r ); 4115 } 4116 //////////// END OldRef pool allocator 4117 4118 4119 static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */ 4120 static UWord oldrefGen = 0; /* current LRU generation # */ 4121 static UWord oldrefTreeN = 0; /* # elems in oldrefTree */ 4122 static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */ 4123 4124 inline static UInt min_UInt ( UInt a, UInt b ) { 4125 return a < b ? a : b; 4126 } 4127 4128 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the 4129 first interval is lower, 1 if the first interval is higher, and 0 4130 if there is any overlap. Redundant paranoia with casting is there 4131 following what looked distinctly like a bug in gcc-4.1.2, in which 4132 some of the comparisons were done signedly instead of 4133 unsignedly. */ 4134 /* Copied from exp-ptrcheck/sg_main.c */ 4135 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1, 4136 Addr a2, SizeT n2 ) { 4137 UWord a1w = (UWord)a1; 4138 UWord n1w = (UWord)n1; 4139 UWord a2w = (UWord)a2; 4140 UWord n2w = (UWord)n2; 4141 tl_assert(n1w > 0 && n2w > 0); 4142 if (a1w + n1w <= a2w) return -1L; 4143 if (a2w + n2w <= a1w) return 1L; 4144 return 0; 4145 } 4146 4147 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr ) 4148 { 4149 OldRef* ref; 4150 RCEC* rcec; 4151 Word i, j; 4152 UWord keyW, valW; 4153 Bool b; 4154 4155 tl_assert(thr); 4156 ThrID thrid = thr->thrid; 4157 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */ 4158 4159 WordSetID locksHeldW = thr->hgthread->locksetW; 4160 4161 rcec = get_RCEC( thr ); 4162 ctxt__rcinc(rcec); 4163 4164 UInt szLg2B = 0; 4165 switch (szB) { 4166 /* This doesn't look particularly branch-predictor friendly. */ 4167 case 1: szLg2B = 0; break; 4168 case 2: szLg2B = 1; break; 4169 case 4: szLg2B = 2; break; 4170 case 8: szLg2B = 3; break; 4171 default: tl_assert(0); 4172 } 4173 4174 /* Look in the map to see if we already have a record for this 4175 address. */ 4176 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a ); 4177 4178 if (b) { 4179 4180 /* We already have a record for this address. We now need to 4181 see if we have a stack trace pertaining to this (thrid, R/W, 4182 size) triple. */ 4183 tl_assert(keyW == a); 4184 ref = (OldRef*)valW; 4185 tl_assert(ref->magic == OldRef_MAGIC); 4186 4187 for (i = 0; i < N_OLDREF_ACCS; i++) { 4188 if (ref->accs[i].thrid != thrid) 4189 continue; 4190 if (ref->accs[i].szLg2B != szLg2B) 4191 continue; 4192 if (ref->accs[i].isW != (UInt)(isW & 1)) 4193 continue; 4194 /* else we have a match, so stop looking. */ 4195 break; 4196 } 4197 4198 if (i < N_OLDREF_ACCS) { 4199 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */ 4200 if (i > 0) { 4201 Thr_n_RCEC tmp = ref->accs[i-1]; 4202 ref->accs[i-1] = ref->accs[i]; 4203 ref->accs[i] = tmp; 4204 i--; 4205 } 4206 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++; 4207 stats__ctxt_rcdec1++; 4208 ctxt__rcdec( ref->accs[i].rcec ); 4209 tl_assert(ref->accs[i].thrid == thrid); 4210 /* Update the RCEC and the W-held lockset. */ 4211 ref->accs[i].rcec = rcec; 4212 ref->accs[i].locksHeldW = locksHeldW; 4213 } else { 4214 /* No entry for this (thread, R/W, size, nWHeld) quad. 4215 Shuffle all of them down one slot, and put the new entry 4216 at the start of the array. */ 4217 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) { 4218 /* the last slot is in use. We must dec the rc on the 4219 associated rcec. */ 4220 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec); 4221 stats__ctxt_rcdec2++; 4222 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF)) 4223 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2); 4224 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec ); 4225 } else { 4226 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec); 4227 } 4228 for (j = N_OLDREF_ACCS-1; j >= 1; j--) 4229 ref->accs[j] = ref->accs[j-1]; 4230 ref->accs[0].thrid = thrid; 4231 ref->accs[0].szLg2B = szLg2B; 4232 ref->accs[0].isW = (UInt)(isW & 1); 4233 ref->accs[0].locksHeldW = locksHeldW; 4234 ref->accs[0].rcec = rcec; 4235 /* thrid==0 is used to signify an empty slot, so we can't 4236 add zero thrid (such a ThrID is invalid anyway). */ 4237 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */ 4238 } 4239 4240 ref->gen = oldrefGen; 4241 4242 } else { 4243 4244 /* We don't have a record for this address. Create a new one. */ 4245 if (oldrefTreeN >= oldrefGenIncAt) { 4246 oldrefGen++; 4247 oldrefGenIncAt = oldrefTreeN + 50000; 4248 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n", 4249 oldrefGen, oldrefTreeN ); 4250 } 4251 4252 ref = alloc_OldRef(); 4253 ref->magic = OldRef_MAGIC; 4254 ref->gen = oldrefGen; 4255 ref->accs[0].thrid = thrid; 4256 ref->accs[0].szLg2B = szLg2B; 4257 ref->accs[0].isW = (UInt)(isW & 1); 4258 ref->accs[0].locksHeldW = locksHeldW; 4259 ref->accs[0].rcec = rcec; 4260 4261 /* thrid==0 is used to signify an empty slot, so we can't 4262 add zero thrid (such a ThrID is invalid anyway). */ 4263 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */ 4264 4265 /* Clear out the rest of the entries */ 4266 for (j = 1; j < N_OLDREF_ACCS; j++) { 4267 ref->accs[j].rcec = NULL; 4268 ref->accs[j].thrid = 0; 4269 ref->accs[j].szLg2B = 0; 4270 ref->accs[j].isW = 0; 4271 ref->accs[j].locksHeldW = 0; 4272 } 4273 VG_(addToSWA)( oldrefTree, a, (UWord)ref ); 4274 oldrefTreeN++; 4275 4276 } 4277 } 4278 4279 4280 /* Extract info from the conflicting-access machinery. */ 4281 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC, 4282 /*OUT*/Thr** resThr, 4283 /*OUT*/SizeT* resSzB, 4284 /*OUT*/Bool* resIsW, 4285 /*OUT*/WordSetID* locksHeldW, 4286 Thr* thr, Addr a, SizeT szB, Bool isW ) 4287 { 4288 Word i, j; 4289 OldRef* ref; 4290 UWord keyW, valW; 4291 Bool b; 4292 4293 ThrID cand_thrid; 4294 RCEC* cand_rcec; 4295 Bool cand_isW; 4296 SizeT cand_szB; 4297 WordSetID cand_locksHeldW; 4298 Addr cand_a; 4299 4300 Addr toCheck[15]; 4301 Int nToCheck = 0; 4302 4303 tl_assert(thr); 4304 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1); 4305 4306 ThrID thrid = thr->thrid; 4307 4308 toCheck[nToCheck++] = a; 4309 for (i = -7; i < (Word)szB; i++) { 4310 if (i != 0) 4311 toCheck[nToCheck++] = a + i; 4312 } 4313 tl_assert(nToCheck <= 15); 4314 4315 /* Now see if we can find a suitable matching event for 4316 any of the addresses in toCheck[0 .. nToCheck-1]. */ 4317 for (j = 0; j < nToCheck; j++) { 4318 4319 cand_a = toCheck[j]; 4320 // VG_(printf)("test %ld %p\n", j, cand_a); 4321 4322 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a ); 4323 if (!b) 4324 continue; 4325 4326 ref = (OldRef*)valW; 4327 tl_assert(keyW == cand_a); 4328 tl_assert(ref->magic == OldRef_MAGIC); 4329 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */ 4330 4331 cand_thrid = 0; /* invalid; see comments in event_map_bind */ 4332 cand_rcec = NULL; 4333 cand_isW = False; 4334 cand_szB = 0; 4335 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */ 4336 4337 for (i = 0; i < N_OLDREF_ACCS; i++) { 4338 Thr_n_RCEC* cand = &ref->accs[i]; 4339 cand_rcec = cand->rcec; 4340 cand_thrid = cand->thrid; 4341 cand_isW = (Bool)cand->isW; 4342 cand_szB = 1 << cand->szLg2B; 4343 cand_locksHeldW = cand->locksHeldW; 4344 4345 if (cand_thrid == 0) 4346 /* This slot isn't in use. Ignore it. */ 4347 continue; 4348 4349 if (cand_thrid == thrid) 4350 /* This is an access by the same thread, but we're only 4351 interested in accesses from other threads. Ignore. */ 4352 continue; 4353 4354 if ((!cand_isW) && (!isW)) 4355 /* We don't want to report a read racing against another 4356 read; that's stupid. So in this case move on. */ 4357 continue; 4358 4359 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0) 4360 /* No overlap with the access we're asking about. Ignore. */ 4361 continue; 4362 4363 /* We have a match. Stop searching. */ 4364 break; 4365 } 4366 4367 tl_assert(i >= 0 && i <= N_OLDREF_ACCS); 4368 4369 if (i < N_OLDREF_ACCS) { 4370 Int n, maxNFrames; 4371 /* return with success */ 4372 tl_assert(cand_thrid); 4373 tl_assert(cand_rcec); 4374 tl_assert(cand_rcec->magic == RCEC_MAGIC); 4375 tl_assert(cand_szB >= 1); 4376 /* Count how many non-zero frames we have. */ 4377 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size)); 4378 for (n = 0; n < maxNFrames; n++) { 4379 if (0 == cand_rcec->frames[n]) break; 4380 } 4381 *resEC = VG_(make_ExeContext_from_StackTrace) 4382 (cand_rcec->frames, n); 4383 *resThr = Thr__from_ThrID(cand_thrid); 4384 *resSzB = cand_szB; 4385 *resIsW = cand_isW; 4386 *locksHeldW = cand_locksHeldW; 4387 return True; 4388 } 4389 4390 /* consider next address in toCheck[] */ 4391 } /* for (j = 0; j < nToCheck; j++) */ 4392 4393 /* really didn't find anything. */ 4394 return False; 4395 } 4396 4397 static void event_map_init ( void ) 4398 { 4399 Word i; 4400 4401 /* Context (RCEC) pool allocator */ 4402 rcec_pool_allocator = VG_(newPA) ( 4403 sizeof(RCEC), 4404 1000 /* RCECs per pool */, 4405 HG_(zalloc), 4406 "libhb.event_map_init.1 (RCEC pools)", 4407 HG_(free) 4408 ); 4409 4410 /* Context table */ 4411 tl_assert(!contextTab); 4412 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)", 4413 N_RCEC_TAB * sizeof(RCEC*) ); 4414 tl_assert(contextTab); 4415 for (i = 0; i < N_RCEC_TAB; i++) 4416 contextTab[i] = NULL; 4417 4418 /* Oldref pool allocator */ 4419 oldref_pool_allocator = VG_(newPA)( 4420 sizeof(OldRef), 4421 1000 /* OldRefs per pool */, 4422 HG_(zalloc), 4423 "libhb.event_map_init.3 (OldRef pools)", 4424 HG_(free) 4425 ); 4426 4427 /* Oldref tree */ 4428 tl_assert(!oldrefTree); 4429 oldrefTree = VG_(newSWA)( 4430 HG_(zalloc), 4431 "libhb.event_map_init.4 (oldref tree)", 4432 HG_(free) 4433 ); 4434 tl_assert(oldrefTree); 4435 4436 oldrefGen = 0; 4437 oldrefGenIncAt = 0; 4438 oldrefTreeN = 0; 4439 } 4440 4441 static void event_map__check_reference_counts ( Bool before ) 4442 { 4443 RCEC* rcec; 4444 OldRef* oldref; 4445 Word i; 4446 UWord nEnts = 0; 4447 UWord keyW, valW; 4448 4449 /* Set the 'check' reference counts to zero. Also, optionally 4450 check that the real reference counts are non-zero. We allow 4451 these to fall to zero before a GC, but the GC must get rid of 4452 all those that are zero, hence none should be zero after a 4453 GC. */ 4454 for (i = 0; i < N_RCEC_TAB; i++) { 4455 for (rcec = contextTab[i]; rcec; rcec = rcec->next) { 4456 nEnts++; 4457 tl_assert(rcec); 4458 tl_assert(rcec->magic == RCEC_MAGIC); 4459 if (!before) 4460 tl_assert(rcec->rc > 0); 4461 rcec->rcX = 0; 4462 } 4463 } 4464 4465 /* check that the stats are sane */ 4466 tl_assert(nEnts == stats__ctxt_tab_curr); 4467 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max); 4468 4469 /* visit all the referencing points, inc check ref counts */ 4470 VG_(initIterSWA)( oldrefTree ); 4471 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4472 oldref = (OldRef*)valW; 4473 tl_assert(oldref->magic == OldRef_MAGIC); 4474 for (i = 0; i < N_OLDREF_ACCS; i++) { 4475 ThrID aThrID = oldref->accs[i].thrid; 4476 RCEC* aRef = oldref->accs[i].rcec; 4477 if (aThrID != 0) { 4478 tl_assert(aRef); 4479 tl_assert(aRef->magic == RCEC_MAGIC); 4480 aRef->rcX++; 4481 } else { 4482 tl_assert(!aRef); 4483 } 4484 } 4485 } 4486 4487 /* compare check ref counts with actual */ 4488 for (i = 0; i < N_RCEC_TAB; i++) { 4489 for (rcec = contextTab[i]; rcec; rcec = rcec->next) { 4490 tl_assert(rcec->rc == rcec->rcX); 4491 } 4492 } 4493 } 4494 4495 __attribute__((noinline)) 4496 static void event_map_maybe_GC ( void ) 4497 { 4498 OldRef* oldref; 4499 UWord keyW, valW, retained, maxGen; 4500 XArray* refs2del; 4501 Word i, j, n2del; 4502 4503 UWord* genMap = NULL; 4504 UWord genMap_min = 0; 4505 UWord genMap_size = 0; 4506 4507 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size))) 4508 return; 4509 4510 if (0) 4511 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN); 4512 4513 /* Check for sane command line params. Limit values must match 4514 those in hg_process_cmd_line_option. */ 4515 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 ); 4516 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 ); 4517 4518 /* Check our counting is sane (expensive) */ 4519 if (CHECK_CEM) 4520 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree )); 4521 4522 /* Check the reference counts (expensive) */ 4523 if (CHECK_CEM) 4524 event_map__check_reference_counts( True/*before*/ ); 4525 4526 /* Compute the distribution of generation values in the ref tree. 4527 There are likely only to be a few different generation numbers 4528 in the whole tree, but we don't know what they are. Hence use a 4529 dynamically resized array of counters. The array is genMap[0 4530 .. genMap_size-1], where genMap[0] is the count for the 4531 generation number genMap_min, genMap[1] is the count for 4532 genMap_min+1, etc. If a new number is seen outside the range 4533 [genMap_min .. genMap_min + genMap_size - 1] then the array is 4534 copied into a larger array, and genMap_min and genMap_size are 4535 adjusted accordingly. */ 4536 4537 /* genMap :: generation-number -> count-of-nodes-with-that-number */ 4538 4539 VG_(initIterSWA)( oldrefTree ); 4540 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4541 4542 UWord ea, key; 4543 oldref = (OldRef*)valW; 4544 key = oldref->gen; 4545 4546 /* BEGIN find 'ea', which is the index in genMap holding the 4547 count for generation number 'key'. */ 4548 if (UNLIKELY(genMap == NULL)) { 4549 /* deal with the first key to be seen, so that the following 4550 cases don't need to handle the complexity of a NULL count 4551 array. */ 4552 genMap_min = key; 4553 genMap_size = 1; 4554 genMap = HG_(zalloc)( "libhb.emmG.1a", 4555 genMap_size * sizeof(UWord) ); 4556 ea = 0; 4557 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n", 4558 key, genMap_min, genMap_min+genMap_size- 1 ); 4559 } 4560 else 4561 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) { 4562 /* this is the expected (almost-always-happens) case: 'key' 4563 is already mapped in the array. */ 4564 ea = key - genMap_min; 4565 } 4566 else 4567 if (key < genMap_min) { 4568 /* 'key' appears before the start of the current array. 4569 Extend the current array by allocating a larger one and 4570 copying the current one to the upper end of it. */ 4571 Word more; 4572 UWord* map2; 4573 more = genMap_min - key; 4574 tl_assert(more > 0); 4575 map2 = HG_(zalloc)( "libhb.emmG.1b", 4576 (genMap_size + more) * sizeof(UWord) ); 4577 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) ); 4578 HG_(free)( genMap ); 4579 genMap = map2; 4580 genMap_size += more; 4581 genMap_min -= more; 4582 ea = 0; 4583 tl_assert(genMap_min == key); 4584 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n", 4585 key, genMap_min, genMap_min+genMap_size- 1 ); 4586 } 4587 else { 4588 /* 'key' appears after the end of the current array. Extend 4589 the current array by allocating a larger one and copying 4590 the current one to the lower end of it. */ 4591 Word more; 4592 UWord* map2; 4593 tl_assert(key >= genMap_min + genMap_size); 4594 more = key - (genMap_min + genMap_size) + 1; 4595 tl_assert(more > 0); 4596 map2 = HG_(zalloc)( "libhb.emmG.1c", 4597 (genMap_size + more) * sizeof(UWord) ); 4598 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) ); 4599 HG_(free)( genMap ); 4600 genMap = map2; 4601 genMap_size += more; 4602 ea = genMap_size - 1;; 4603 tl_assert(genMap_min + genMap_size - 1 == key); 4604 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n", 4605 key, genMap_min, genMap_min+genMap_size- 1 ); 4606 } 4607 /* END find 'ea' from 'key' */ 4608 4609 tl_assert(ea >= 0 && ea < genMap_size); 4610 /* and the whole point of this elaborate computation of 'ea' is .. */ 4611 genMap[ea]++; 4612 } 4613 4614 tl_assert(genMap); 4615 tl_assert(genMap_size > 0); 4616 4617 /* Sanity check what we just computed */ 4618 { UWord sum = 0; 4619 for (i = 0; i < genMap_size; i++) { 4620 if (0) VG_(printf)(" xxx: gen %ld has %lu\n", 4621 i + genMap_min, genMap[i] ); 4622 sum += genMap[i]; 4623 } 4624 tl_assert(sum == oldrefTreeN); 4625 } 4626 4627 /* Figure out how many generations to throw away */ 4628 retained = oldrefTreeN; 4629 maxGen = 0; 4630 4631 for (i = 0; i < genMap_size; i++) { 4632 keyW = i + genMap_min; 4633 valW = genMap[i]; 4634 tl_assert(keyW > 0); /* can't allow a generation # 0 */ 4635 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW ); 4636 tl_assert(keyW >= maxGen); 4637 tl_assert(retained >= valW); 4638 if (retained - valW 4639 > (UWord)(HG_(clo_conflict_cache_size) 4640 * EVENT_MAP_GC_DISCARD_FRACTION)) { 4641 retained -= valW; 4642 maxGen = keyW; 4643 } else { 4644 break; 4645 } 4646 } 4647 4648 HG_(free)(genMap); 4649 4650 tl_assert(retained >= 0 && retained <= oldrefTreeN); 4651 4652 /* Now make up a big list of the oldrefTree entries we want to 4653 delete. We can't simultaneously traverse the tree and delete 4654 stuff from it, so first we need to copy them off somewhere 4655 else. (sigh) */ 4656 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2", 4657 HG_(free), sizeof(Addr) ); 4658 4659 if (retained < oldrefTreeN) { 4660 4661 /* This is the normal (expected) case. We discard any ref whose 4662 generation number <= maxGen. */ 4663 VG_(initIterSWA)( oldrefTree ); 4664 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4665 oldref = (OldRef*)valW; 4666 tl_assert(oldref->magic == OldRef_MAGIC); 4667 if (oldref->gen <= maxGen) { 4668 VG_(addToXA)( refs2del, &keyW ); 4669 } 4670 } 4671 if (VG_(clo_stats)) { 4672 VG_(message)(Vg_DebugMsg, 4673 "libhb: EvM GC: delete generations %lu and below, " 4674 "retaining %lu entries\n", 4675 maxGen, retained ); 4676 } 4677 4678 } else { 4679 4680 static UInt rand_seed = 0; /* leave as static */ 4681 4682 /* Degenerate case: there's only one generation in the entire 4683 tree, so we need to have some other way of deciding which 4684 refs to throw away. Just throw out half of them randomly. */ 4685 tl_assert(retained == oldrefTreeN); 4686 VG_(initIterSWA)( oldrefTree ); 4687 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) { 4688 UInt n; 4689 oldref = (OldRef*)valW; 4690 tl_assert(oldref->magic == OldRef_MAGIC); 4691 n = VG_(random)( &rand_seed ); 4692 if ((n & 0xFFF) < 0x800) { 4693 VG_(addToXA)( refs2del, &keyW ); 4694 retained--; 4695 } 4696 } 4697 if (VG_(clo_stats)) { 4698 VG_(message)(Vg_DebugMsg, 4699 "libhb: EvM GC: randomly delete half the entries, " 4700 "retaining %lu entries\n", 4701 retained ); 4702 } 4703 4704 } 4705 4706 n2del = VG_(sizeXA)( refs2del ); 4707 tl_assert(n2del == (Word)(oldrefTreeN - retained)); 4708 4709 if (0) VG_(printf)("%s","deleting entries\n"); 4710 for (i = 0; i < n2del; i++) { 4711 Bool b; 4712 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i ); 4713 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del ); 4714 tl_assert(b); 4715 tl_assert(keyW == ga2del); 4716 oldref = (OldRef*)valW; 4717 for (j = 0; j < N_OLDREF_ACCS; j++) { 4718 ThrID aThrID = oldref->accs[j].thrid; 4719 RCEC* aRef = oldref->accs[j].rcec; 4720 if (aRef) { 4721 tl_assert(aThrID != 0); 4722 stats__ctxt_rcdec3++; 4723 ctxt__rcdec( aRef ); 4724 } else { 4725 tl_assert(aThrID == 0); 4726 } 4727 } 4728 4729 free_OldRef( oldref ); 4730 } 4731 4732 VG_(deleteXA)( refs2del ); 4733 4734 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained ); 4735 4736 oldrefTreeN = retained; 4737 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */ 4738 4739 /* Throw away all RCECs with zero reference counts */ 4740 for (i = 0; i < N_RCEC_TAB; i++) { 4741 RCEC** pp = &contextTab[i]; 4742 RCEC* p = *pp; 4743 while (p) { 4744 if (p->rc == 0) { 4745 *pp = p->next; 4746 free_RCEC(p); 4747 p = *pp; 4748 tl_assert(stats__ctxt_tab_curr > 0); 4749 stats__ctxt_tab_curr--; 4750 } else { 4751 pp = &p->next; 4752 p = p->next; 4753 } 4754 } 4755 } 4756 4757 /* Check the reference counts (expensive) */ 4758 if (CHECK_CEM) 4759 event_map__check_reference_counts( False/*after*/ ); 4760 4761 //if (0) 4762 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n", 4763 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree)); 4764 4765 } 4766 4767 4768 ///////////////////////////////////////////////////////// 4769 // // 4770 // Core MSM // 4771 // // 4772 ///////////////////////////////////////////////////////// 4773 4774 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19 4775 Nov 08, and again after [...], 4776 June 09. */ 4777 4778 static ULong stats__msmcread = 0; 4779 static ULong stats__msmcread_change = 0; 4780 static ULong stats__msmcwrite = 0; 4781 static ULong stats__msmcwrite_change = 0; 4782 4783 /* Some notes on the H1 history mechanism: 4784 4785 Transition rules are: 4786 4787 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw) 4788 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw) 4789 4790 After any access by a thread T to a location L, L's constraint pair 4791 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock. 4792 4793 After a race by thread T conflicting with some previous access by 4794 some other thread U, for a location with constraint (before 4795 processing the later access) (Cr,Cw), then Cw[U] is the segment in 4796 which the previously access lies. 4797 4798 Hence in record_race_info, we pass in Cfailed and Kfailed, which 4799 are compared so as to find out which thread(s) this access 4800 conflicts with. Once that is established, we also require the 4801 pre-update Cw for the location, so we can index into it for those 4802 threads, to get the scalar clock values for the point at which the 4803 former accesses were made. (In fact we only bother to do any of 4804 this for an arbitrarily chosen one of the conflicting threads, as 4805 that's simpler, it avoids flooding the user with vast amounts of 4806 mostly useless information, and because the program is wrong if it 4807 contains any races at all -- so we don't really need to show all 4808 conflicting access pairs initially, so long as we only show none if 4809 none exist). 4810 4811 --- 4812 4813 That requires the auxiliary proof that 4814 4815 (Cr `join` Kw)[T] == Kw[T] 4816 4817 Why should that be true? Because for any thread T, Kw[T] >= the 4818 scalar clock value for T known by any other thread. In other 4819 words, because T's value for its own scalar clock is at least as up 4820 to date as the value for it known by any other thread (that is true 4821 for both the R- and W- scalar clocks). Hence no other thread will 4822 be able to feed in a value for that element (indirectly via a 4823 constraint) which will exceed Kw[T], and hence the join cannot 4824 cause that particular element to advance. 4825 */ 4826 4827 __attribute__((noinline)) 4828 static void record_race_info ( Thr* acc_thr, 4829 Addr acc_addr, SizeT szB, Bool isWrite, 4830 VtsID Cfailed, 4831 VtsID Kfailed, 4832 VtsID Cw ) 4833 { 4834 /* Call here to report a race. We just hand it onwards to 4835 HG_(record_error_Race). If that in turn discovers that the 4836 error is going to be collected, then, at history_level 2, that 4837 queries the conflicting-event map. The alternative would be to 4838 query it right here. But that causes a lot of pointless queries 4839 for errors which will shortly be discarded as duplicates, and 4840 can become a performance overhead; so we defer the query until 4841 we know the error is not a duplicate. */ 4842 4843 /* Stacks for the bounds of the (or one of the) conflicting 4844 segment(s). These are only set at history_level 1. */ 4845 ExeContext* hist1_seg_start = NULL; 4846 ExeContext* hist1_seg_end = NULL; 4847 Thread* hist1_conf_thr = NULL; 4848 4849 tl_assert(acc_thr); 4850 tl_assert(acc_thr->hgthread); 4851 tl_assert(acc_thr->hgthread->hbthr == acc_thr); 4852 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2); 4853 4854 if (HG_(clo_history_level) == 1) { 4855 Bool found; 4856 Word firstIx, lastIx; 4857 ULong_n_EC key; 4858 4859 /* At history_level 1, we must round up the relevant stack-pair 4860 for the conflicting segment right now. This is because 4861 deferring it is complex; we can't (easily) put Kfailed and 4862 Cfailed into the XError and wait for later without 4863 getting tied up in difficulties with VtsID reference 4864 counting. So just do it now. */ 4865 Thr* confThr; 4866 ULong confTym = 0; 4867 /* Which thread are we in conflict with? There may be more than 4868 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily 4869 (in fact it's the one with the lowest Thr* value). */ 4870 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed ); 4871 /* This must exist! since if it was NULL then there's no 4872 conflict (semantics of return value of 4873 VtsID__findFirst_notLEQ), and msmc{read,write}, which has 4874 called us, just checked exactly this -- that there was in 4875 fact a race. */ 4876 tl_assert(confThr); 4877 4878 /* Get the scalar clock value that the conflicting thread 4879 introduced into the constraint. A careful examination of the 4880 base machine rules shows that this must be the same as the 4881 conflicting thread's scalar clock when it created this 4882 constraint. Hence we know the scalar clock of the 4883 conflicting thread when the conflicting access was made. */ 4884 confTym = VtsID__indexAt( Cfailed, confThr ); 4885 4886 /* Using this scalar clock, index into the conflicting thread's 4887 collection of stack traces made each time its vector clock 4888 (hence its scalar clock) changed. This gives the stack 4889 traces at the start and end of the conflicting segment (well, 4890 as per comment just above, of one of the conflicting 4891 segments, if there are more than one). */ 4892 key.ull = confTym; 4893 key.ec = NULL; 4894 /* tl_assert(confThr); -- asserted just above */ 4895 tl_assert(confThr->local_Kws_n_stacks); 4896 firstIx = lastIx = 0; 4897 found = VG_(lookupXA_UNSAFE)( 4898 confThr->local_Kws_n_stacks, 4899 &key, &firstIx, &lastIx, 4900 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong 4901 ); 4902 if (0) VG_(printf)("record_race_info %u %u %u confThr %p " 4903 "confTym %llu found %d (%lu,%lu)\n", 4904 Cfailed, Kfailed, Cw, 4905 confThr, confTym, found, firstIx, lastIx); 4906 /* We can't indefinitely collect stack traces at VTS 4907 transitions, since we'd eventually run out of memory. Hence 4908 note_local_Kw_n_stack_for will eventually throw away old 4909 ones, which in turn means we might fail to find index value 4910 confTym in the array. */ 4911 if (found) { 4912 ULong_n_EC *pair_start, *pair_end; 4913 pair_start 4914 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx ); 4915 hist1_seg_start = pair_start->ec; 4916 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) { 4917 pair_end 4918 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, 4919 lastIx+1 ); 4920 /* from properties of VG_(lookupXA) and the comparison fn used: */ 4921 tl_assert(pair_start->ull < pair_end->ull); 4922 hist1_seg_end = pair_end->ec; 4923 /* Could do a bit better here. It may be that pair_end 4924 doesn't have a stack, but the following entries in the 4925 array have the same scalar Kw and to have a stack. So 4926 we should search a bit further along the array than 4927 lastIx+1 if hist1_seg_end is NULL. */ 4928 } else { 4929 if (!confThr->llexit_done) 4930 hist1_seg_end = main_get_EC( confThr ); 4931 } 4932 // seg_start could be NULL iff this is the first stack in the thread 4933 //if (seg_start) VG_(pp_ExeContext)(seg_start); 4934 //if (seg_end) VG_(pp_ExeContext)(seg_end); 4935 hist1_conf_thr = confThr->hgthread; 4936 } 4937 } 4938 4939 HG_(record_error_Race)( acc_thr->hgthread, acc_addr, 4940 szB, isWrite, 4941 hist1_conf_thr, hist1_seg_start, hist1_seg_end ); 4942 } 4943 4944 static Bool is_sane_SVal_C ( SVal sv ) { 4945 Bool leq; 4946 if (!SVal__isC(sv)) return True; 4947 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) ); 4948 return leq; 4949 } 4950 4951 4952 /* Compute new state following a read */ 4953 static inline SVal msmcread ( SVal svOld, 4954 /* The following are only needed for 4955 creating error reports. */ 4956 Thr* acc_thr, 4957 Addr acc_addr, SizeT szB ) 4958 { 4959 SVal svNew = SVal_INVALID; 4960 stats__msmcread++; 4961 4962 /* Redundant sanity check on the constraints */ 4963 if (CHECK_MSM) { 4964 tl_assert(is_sane_SVal_C(svOld)); 4965 } 4966 4967 if (LIKELY(SVal__isC(svOld))) { 4968 VtsID tviR = acc_thr->viR; 4969 VtsID tviW = acc_thr->viW; 4970 VtsID rmini = SVal__unC_Rmin(svOld); 4971 VtsID wmini = SVal__unC_Wmin(svOld); 4972 Bool leq = VtsID__cmpLEQ(rmini,tviR); 4973 if (LIKELY(leq)) { 4974 /* no race */ 4975 /* Note: RWLOCK subtlety: use tviW, not tviR */ 4976 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) ); 4977 goto out; 4978 } else { 4979 /* assert on sanity of constraints. */ 4980 Bool leqxx = VtsID__cmpLEQ(rmini,wmini); 4981 tl_assert(leqxx); 4982 // same as in non-race case 4983 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) ); 4984 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/, 4985 rmini, /* Cfailed */ 4986 tviR, /* Kfailed */ 4987 wmini /* Cw */ ); 4988 goto out; 4989 } 4990 } 4991 if (SVal__isA(svOld)) { 4992 /* reading no-access memory (sigh); leave unchanged */ 4993 /* check for no pollution */ 4994 tl_assert(svOld == SVal_NOACCESS); 4995 svNew = SVal_NOACCESS; 4996 goto out; 4997 } 4998 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld); 4999 tl_assert(0); 5000 5001 out: 5002 if (CHECK_MSM) { 5003 tl_assert(is_sane_SVal_C(svNew)); 5004 } 5005 if (UNLIKELY(svNew != svOld)) { 5006 tl_assert(svNew != SVal_INVALID); 5007 if (HG_(clo_history_level) >= 2 5008 && SVal__isC(svOld) && SVal__isC(svNew)) { 5009 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr ); 5010 stats__msmcread_change++; 5011 } 5012 } 5013 return svNew; 5014 } 5015 5016 5017 /* Compute new state following a write */ 5018 static inline SVal msmcwrite ( SVal svOld, 5019 /* The following are only needed for 5020 creating error reports. */ 5021 Thr* acc_thr, 5022 Addr acc_addr, SizeT szB ) 5023 { 5024 SVal svNew = SVal_INVALID; 5025 stats__msmcwrite++; 5026 5027 /* Redundant sanity check on the constraints */ 5028 if (CHECK_MSM) { 5029 tl_assert(is_sane_SVal_C(svOld)); 5030 } 5031 5032 if (LIKELY(SVal__isC(svOld))) { 5033 VtsID tviW = acc_thr->viW; 5034 VtsID wmini = SVal__unC_Wmin(svOld); 5035 Bool leq = VtsID__cmpLEQ(wmini,tviW); 5036 if (LIKELY(leq)) { 5037 /* no race */ 5038 svNew = SVal__mkC( tviW, tviW ); 5039 goto out; 5040 } else { 5041 VtsID rmini = SVal__unC_Rmin(svOld); 5042 /* assert on sanity of constraints. */ 5043 Bool leqxx = VtsID__cmpLEQ(rmini,wmini); 5044 tl_assert(leqxx); 5045 // same as in non-race case 5046 // proof: in the non-race case, we have 5047 // rmini <= wmini (invar on constraints) 5048 // tviW <= tviR (invar on thread clocks) 5049 // wmini <= tviW (from run-time check) 5050 // hence from transitivity of <= we have 5051 // rmini <= wmini <= tviW 5052 // and so join(rmini,tviW) == tviW 5053 // and join(wmini,tviW) == tviW 5054 // qed. 5055 svNew = SVal__mkC( VtsID__join2(rmini, tviW), 5056 VtsID__join2(wmini, tviW) ); 5057 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/, 5058 wmini, /* Cfailed */ 5059 tviW, /* Kfailed */ 5060 wmini /* Cw */ ); 5061 goto out; 5062 } 5063 } 5064 if (SVal__isA(svOld)) { 5065 /* writing no-access memory (sigh); leave unchanged */ 5066 /* check for no pollution */ 5067 tl_assert(svOld == SVal_NOACCESS); 5068 svNew = SVal_NOACCESS; 5069 goto out; 5070 } 5071 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld); 5072 tl_assert(0); 5073 5074 out: 5075 if (CHECK_MSM) { 5076 tl_assert(is_sane_SVal_C(svNew)); 5077 } 5078 if (UNLIKELY(svNew != svOld)) { 5079 tl_assert(svNew != SVal_INVALID); 5080 if (HG_(clo_history_level) >= 2 5081 && SVal__isC(svOld) && SVal__isC(svNew)) { 5082 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr ); 5083 stats__msmcwrite_change++; 5084 } 5085 } 5086 return svNew; 5087 } 5088 5089 5090 ///////////////////////////////////////////////////////// 5091 // // 5092 // Apply core MSM to specific memory locations // 5093 // // 5094 ///////////////////////////////////////////////////////// 5095 5096 /*------------- ZSM accesses: 8 bit sapply ------------- */ 5097 5098 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) { 5099 CacheLine* cl; 5100 UWord cloff, tno, toff; 5101 SVal svOld, svNew; 5102 UShort descr; 5103 stats__cline_cread08s++; 5104 cl = get_cacheline(a); 5105 cloff = get_cacheline_offset(a); 5106 tno = get_treeno(a); 5107 toff = get_tree_offset(a); /* == 0 .. 7 */ 5108 descr = cl->descrs[tno]; 5109 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5110 SVal* tree = &cl->svals[tno << 3]; 5111 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5112 if (CHECK_ZSM) 5113 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5114 } 5115 svOld = cl->svals[cloff]; 5116 svNew = msmcread( svOld, thr,a,1 ); 5117 if (CHECK_ZSM) 5118 tl_assert(svNew != SVal_INVALID); 5119 cl->svals[cloff] = svNew; 5120 } 5121 5122 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) { 5123 CacheLine* cl; 5124 UWord cloff, tno, toff; 5125 SVal svOld, svNew; 5126 UShort descr; 5127 stats__cline_cwrite08s++; 5128 cl = get_cacheline(a); 5129 cloff = get_cacheline_offset(a); 5130 tno = get_treeno(a); 5131 toff = get_tree_offset(a); /* == 0 .. 7 */ 5132 descr = cl->descrs[tno]; 5133 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5134 SVal* tree = &cl->svals[tno << 3]; 5135 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5136 if (CHECK_ZSM) 5137 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5138 } 5139 svOld = cl->svals[cloff]; 5140 svNew = msmcwrite( svOld, thr,a,1 ); 5141 if (CHECK_ZSM) 5142 tl_assert(svNew != SVal_INVALID); 5143 cl->svals[cloff] = svNew; 5144 } 5145 5146 /*------------- ZSM accesses: 16 bit sapply ------------- */ 5147 5148 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) { 5149 CacheLine* cl; 5150 UWord cloff, tno, toff; 5151 SVal svOld, svNew; 5152 UShort descr; 5153 stats__cline_cread16s++; 5154 if (UNLIKELY(!aligned16(a))) goto slowcase; 5155 cl = get_cacheline(a); 5156 cloff = get_cacheline_offset(a); 5157 tno = get_treeno(a); 5158 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */ 5159 descr = cl->descrs[tno]; 5160 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) { 5161 if (valid_value_is_below_me_16(descr, toff)) { 5162 goto slowcase; 5163 } else { 5164 SVal* tree = &cl->svals[tno << 3]; 5165 cl->descrs[tno] = pulldown_to_16(tree, toff, descr); 5166 } 5167 if (CHECK_ZSM) 5168 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5169 } 5170 svOld = cl->svals[cloff]; 5171 svNew = msmcread( svOld, thr,a,2 ); 5172 if (CHECK_ZSM) 5173 tl_assert(svNew != SVal_INVALID); 5174 cl->svals[cloff] = svNew; 5175 return; 5176 slowcase: /* misaligned, or must go further down the tree */ 5177 stats__cline_16to8splits++; 5178 zsm_sapply08__msmcread( thr, a + 0 ); 5179 zsm_sapply08__msmcread( thr, a + 1 ); 5180 } 5181 5182 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) { 5183 CacheLine* cl; 5184 UWord cloff, tno, toff; 5185 SVal svOld, svNew; 5186 UShort descr; 5187 stats__cline_cwrite16s++; 5188 if (UNLIKELY(!aligned16(a))) goto slowcase; 5189 cl = get_cacheline(a); 5190 cloff = get_cacheline_offset(a); 5191 tno = get_treeno(a); 5192 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */ 5193 descr = cl->descrs[tno]; 5194 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) { 5195 if (valid_value_is_below_me_16(descr, toff)) { 5196 goto slowcase; 5197 } else { 5198 SVal* tree = &cl->svals[tno << 3]; 5199 cl->descrs[tno] = pulldown_to_16(tree, toff, descr); 5200 } 5201 if (CHECK_ZSM) 5202 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5203 } 5204 svOld = cl->svals[cloff]; 5205 svNew = msmcwrite( svOld, thr,a,2 ); 5206 if (CHECK_ZSM) 5207 tl_assert(svNew != SVal_INVALID); 5208 cl->svals[cloff] = svNew; 5209 return; 5210 slowcase: /* misaligned, or must go further down the tree */ 5211 stats__cline_16to8splits++; 5212 zsm_sapply08__msmcwrite( thr, a + 0 ); 5213 zsm_sapply08__msmcwrite( thr, a + 1 ); 5214 } 5215 5216 /*------------- ZSM accesses: 32 bit sapply ------------- */ 5217 5218 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) { 5219 CacheLine* cl; 5220 UWord cloff, tno, toff; 5221 SVal svOld, svNew; 5222 UShort descr; 5223 stats__cline_cread32s++; 5224 if (UNLIKELY(!aligned32(a))) goto slowcase; 5225 cl = get_cacheline(a); 5226 cloff = get_cacheline_offset(a); 5227 tno = get_treeno(a); 5228 toff = get_tree_offset(a); /* == 0 or 4 */ 5229 descr = cl->descrs[tno]; 5230 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) { 5231 if (valid_value_is_above_me_32(descr, toff)) { 5232 SVal* tree = &cl->svals[tno << 3]; 5233 cl->descrs[tno] = pulldown_to_32(tree, toff, descr); 5234 } else { 5235 goto slowcase; 5236 } 5237 if (CHECK_ZSM) 5238 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5239 } 5240 svOld = cl->svals[cloff]; 5241 svNew = msmcread( svOld, thr,a,4 ); 5242 if (CHECK_ZSM) 5243 tl_assert(svNew != SVal_INVALID); 5244 cl->svals[cloff] = svNew; 5245 return; 5246 slowcase: /* misaligned, or must go further down the tree */ 5247 stats__cline_32to16splits++; 5248 zsm_sapply16__msmcread( thr, a + 0 ); 5249 zsm_sapply16__msmcread( thr, a + 2 ); 5250 } 5251 5252 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) { 5253 CacheLine* cl; 5254 UWord cloff, tno, toff; 5255 SVal svOld, svNew; 5256 UShort descr; 5257 stats__cline_cwrite32s++; 5258 if (UNLIKELY(!aligned32(a))) goto slowcase; 5259 cl = get_cacheline(a); 5260 cloff = get_cacheline_offset(a); 5261 tno = get_treeno(a); 5262 toff = get_tree_offset(a); /* == 0 or 4 */ 5263 descr = cl->descrs[tno]; 5264 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) { 5265 if (valid_value_is_above_me_32(descr, toff)) { 5266 SVal* tree = &cl->svals[tno << 3]; 5267 cl->descrs[tno] = pulldown_to_32(tree, toff, descr); 5268 } else { 5269 goto slowcase; 5270 } 5271 if (CHECK_ZSM) 5272 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5273 } 5274 svOld = cl->svals[cloff]; 5275 svNew = msmcwrite( svOld, thr,a,4 ); 5276 if (CHECK_ZSM) 5277 tl_assert(svNew != SVal_INVALID); 5278 cl->svals[cloff] = svNew; 5279 return; 5280 slowcase: /* misaligned, or must go further down the tree */ 5281 stats__cline_32to16splits++; 5282 zsm_sapply16__msmcwrite( thr, a + 0 ); 5283 zsm_sapply16__msmcwrite( thr, a + 2 ); 5284 } 5285 5286 /*------------- ZSM accesses: 64 bit sapply ------------- */ 5287 5288 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) { 5289 CacheLine* cl; 5290 UWord cloff, tno; 5291 //UWord toff; 5292 SVal svOld, svNew; 5293 UShort descr; 5294 stats__cline_cread64s++; 5295 if (UNLIKELY(!aligned64(a))) goto slowcase; 5296 cl = get_cacheline(a); 5297 cloff = get_cacheline_offset(a); 5298 tno = get_treeno(a); 5299 //toff = get_tree_offset(a); /* == 0, unused */ 5300 descr = cl->descrs[tno]; 5301 if (UNLIKELY( !(descr & TREE_DESCR_64) )) { 5302 goto slowcase; 5303 } 5304 svOld = cl->svals[cloff]; 5305 svNew = msmcread( svOld, thr,a,8 ); 5306 if (CHECK_ZSM) 5307 tl_assert(svNew != SVal_INVALID); 5308 cl->svals[cloff] = svNew; 5309 return; 5310 slowcase: /* misaligned, or must go further down the tree */ 5311 stats__cline_64to32splits++; 5312 zsm_sapply32__msmcread( thr, a + 0 ); 5313 zsm_sapply32__msmcread( thr, a + 4 ); 5314 } 5315 5316 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) { 5317 CacheLine* cl; 5318 UWord cloff, tno; 5319 //UWord toff; 5320 SVal svOld, svNew; 5321 UShort descr; 5322 stats__cline_cwrite64s++; 5323 if (UNLIKELY(!aligned64(a))) goto slowcase; 5324 cl = get_cacheline(a); 5325 cloff = get_cacheline_offset(a); 5326 tno = get_treeno(a); 5327 //toff = get_tree_offset(a); /* == 0, unused */ 5328 descr = cl->descrs[tno]; 5329 if (UNLIKELY( !(descr & TREE_DESCR_64) )) { 5330 goto slowcase; 5331 } 5332 svOld = cl->svals[cloff]; 5333 svNew = msmcwrite( svOld, thr,a,8 ); 5334 if (CHECK_ZSM) 5335 tl_assert(svNew != SVal_INVALID); 5336 cl->svals[cloff] = svNew; 5337 return; 5338 slowcase: /* misaligned, or must go further down the tree */ 5339 stats__cline_64to32splits++; 5340 zsm_sapply32__msmcwrite( thr, a + 0 ); 5341 zsm_sapply32__msmcwrite( thr, a + 4 ); 5342 } 5343 5344 /*--------------- ZSM accesses: 8 bit swrite --------------- */ 5345 5346 static 5347 void zsm_swrite08 ( Addr a, SVal svNew ) { 5348 CacheLine* cl; 5349 UWord cloff, tno, toff; 5350 UShort descr; 5351 stats__cline_swrite08s++; 5352 cl = get_cacheline(a); 5353 cloff = get_cacheline_offset(a); 5354 tno = get_treeno(a); 5355 toff = get_tree_offset(a); /* == 0 .. 7 */ 5356 descr = cl->descrs[tno]; 5357 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5358 SVal* tree = &cl->svals[tno << 3]; 5359 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5360 if (CHECK_ZSM) 5361 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5362 } 5363 tl_assert(svNew != SVal_INVALID); 5364 cl->svals[cloff] = svNew; 5365 } 5366 5367 /*--------------- ZSM accesses: 16 bit swrite --------------- */ 5368 5369 static 5370 void zsm_swrite16 ( Addr a, SVal svNew ) { 5371 CacheLine* cl; 5372 UWord cloff, tno, toff; 5373 UShort descr; 5374 stats__cline_swrite16s++; 5375 if (UNLIKELY(!aligned16(a))) goto slowcase; 5376 cl = get_cacheline(a); 5377 cloff = get_cacheline_offset(a); 5378 tno = get_treeno(a); 5379 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */ 5380 descr = cl->descrs[tno]; 5381 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) { 5382 if (valid_value_is_below_me_16(descr, toff)) { 5383 /* Writing at this level. Need to fix up 'descr'. */ 5384 cl->descrs[tno] = pullup_descr_to_16(descr, toff); 5385 /* At this point, the tree does not match cl->descr[tno] any 5386 more. The assignments below will fix it up. */ 5387 } else { 5388 /* We can't indiscriminately write on the w16 node as in the 5389 w64 case, as that might make the node inconsistent with 5390 its parent. So first, pull down to this level. */ 5391 SVal* tree = &cl->svals[tno << 3]; 5392 cl->descrs[tno] = pulldown_to_16(tree, toff, descr); 5393 if (CHECK_ZSM) 5394 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5395 } 5396 } 5397 tl_assert(svNew != SVal_INVALID); 5398 cl->svals[cloff + 0] = svNew; 5399 cl->svals[cloff + 1] = SVal_INVALID; 5400 return; 5401 slowcase: /* misaligned */ 5402 stats__cline_16to8splits++; 5403 zsm_swrite08( a + 0, svNew ); 5404 zsm_swrite08( a + 1, svNew ); 5405 } 5406 5407 /*--------------- ZSM accesses: 32 bit swrite --------------- */ 5408 5409 static 5410 void zsm_swrite32 ( Addr a, SVal svNew ) { 5411 CacheLine* cl; 5412 UWord cloff, tno, toff; 5413 UShort descr; 5414 stats__cline_swrite32s++; 5415 if (UNLIKELY(!aligned32(a))) goto slowcase; 5416 cl = get_cacheline(a); 5417 cloff = get_cacheline_offset(a); 5418 tno = get_treeno(a); 5419 toff = get_tree_offset(a); /* == 0 or 4 */ 5420 descr = cl->descrs[tno]; 5421 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) { 5422 if (valid_value_is_above_me_32(descr, toff)) { 5423 /* We can't indiscriminately write on the w32 node as in the 5424 w64 case, as that might make the node inconsistent with 5425 its parent. So first, pull down to this level. */ 5426 SVal* tree = &cl->svals[tno << 3]; 5427 cl->descrs[tno] = pulldown_to_32(tree, toff, descr); 5428 if (CHECK_ZSM) 5429 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 5430 } else { 5431 /* Writing at this level. Need to fix up 'descr'. */ 5432 cl->descrs[tno] = pullup_descr_to_32(descr, toff); 5433 /* At this point, the tree does not match cl->descr[tno] any 5434 more. The assignments below will fix it up. */ 5435 } 5436 } 5437 tl_assert(svNew != SVal_INVALID); 5438 cl->svals[cloff + 0] = svNew; 5439 cl->svals[cloff + 1] = SVal_INVALID; 5440 cl->svals[cloff + 2] = SVal_INVALID; 5441 cl->svals[cloff + 3] = SVal_INVALID; 5442 return; 5443 slowcase: /* misaligned */ 5444 stats__cline_32to16splits++; 5445 zsm_swrite16( a + 0, svNew ); 5446 zsm_swrite16( a + 2, svNew ); 5447 } 5448 5449 /*--------------- ZSM accesses: 64 bit swrite --------------- */ 5450 5451 static 5452 void zsm_swrite64 ( Addr a, SVal svNew ) { 5453 CacheLine* cl; 5454 UWord cloff, tno; 5455 //UWord toff; 5456 stats__cline_swrite64s++; 5457 if (UNLIKELY(!aligned64(a))) goto slowcase; 5458 cl = get_cacheline(a); 5459 cloff = get_cacheline_offset(a); 5460 tno = get_treeno(a); 5461 //toff = get_tree_offset(a); /* == 0, unused */ 5462 cl->descrs[tno] = TREE_DESCR_64; 5463 tl_assert(svNew != SVal_INVALID); 5464 cl->svals[cloff + 0] = svNew; 5465 cl->svals[cloff + 1] = SVal_INVALID; 5466 cl->svals[cloff + 2] = SVal_INVALID; 5467 cl->svals[cloff + 3] = SVal_INVALID; 5468 cl->svals[cloff + 4] = SVal_INVALID; 5469 cl->svals[cloff + 5] = SVal_INVALID; 5470 cl->svals[cloff + 6] = SVal_INVALID; 5471 cl->svals[cloff + 7] = SVal_INVALID; 5472 return; 5473 slowcase: /* misaligned */ 5474 stats__cline_64to32splits++; 5475 zsm_swrite32( a + 0, svNew ); 5476 zsm_swrite32( a + 4, svNew ); 5477 } 5478 5479 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */ 5480 5481 static 5482 SVal zsm_sread08 ( Addr a ) { 5483 CacheLine* cl; 5484 UWord cloff, tno, toff; 5485 UShort descr; 5486 stats__cline_sread08s++; 5487 cl = get_cacheline(a); 5488 cloff = get_cacheline_offset(a); 5489 tno = get_treeno(a); 5490 toff = get_tree_offset(a); /* == 0 .. 7 */ 5491 descr = cl->descrs[tno]; 5492 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) { 5493 SVal* tree = &cl->svals[tno << 3]; 5494 cl->descrs[tno] = pulldown_to_8(tree, toff, descr); 5495 } 5496 return cl->svals[cloff]; 5497 } 5498 5499 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) { 5500 SVal sv; 5501 stats__cline_scopy08s++; 5502 sv = zsm_sread08( src ); 5503 zsm_swrite08( dst, sv ); 5504 } 5505 5506 5507 /* Block-copy states (needed for implementing realloc()). Note this 5508 doesn't change the filtering arrangements. The caller of 5509 zsm_scopy_range needs to attend to that. */ 5510 5511 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len ) 5512 { 5513 SizeT i; 5514 if (len == 0) 5515 return; 5516 5517 /* assert for non-overlappingness */ 5518 tl_assert(src+len <= dst || dst+len <= src); 5519 5520 /* To be simple, just copy byte by byte. But so as not to wreck 5521 performance for later accesses to dst[0 .. len-1], normalise 5522 destination lines as we finish with them, and also normalise the 5523 line containing the first and last address. */ 5524 for (i = 0; i < len; i++) { 5525 Bool normalise 5526 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */ 5527 || i == 0 /* first in range */ 5528 || i == len-1; /* last in range */ 5529 zsm_scopy08( src+i, dst+i, normalise ); 5530 } 5531 } 5532 5533 5534 /* For setting address ranges to a given value. Has considerable 5535 sophistication so as to avoid generating large numbers of pointless 5536 cache loads/writebacks for large ranges. */ 5537 5538 /* Do small ranges in-cache, in the obvious way. */ 5539 static 5540 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew ) 5541 { 5542 /* fast track a couple of common cases */ 5543 if (len == 4 && aligned32(a)) { 5544 zsm_swrite32( a, svNew ); 5545 return; 5546 } 5547 if (len == 8 && aligned64(a)) { 5548 zsm_swrite64( a, svNew ); 5549 return; 5550 } 5551 5552 /* be completely general (but as efficient as possible) */ 5553 if (len == 0) return; 5554 5555 if (!aligned16(a) && len >= 1) { 5556 zsm_swrite08( a, svNew ); 5557 a += 1; 5558 len -= 1; 5559 tl_assert(aligned16(a)); 5560 } 5561 if (len == 0) return; 5562 5563 if (!aligned32(a) && len >= 2) { 5564 zsm_swrite16( a, svNew ); 5565 a += 2; 5566 len -= 2; 5567 tl_assert(aligned32(a)); 5568 } 5569 if (len == 0) return; 5570 5571 if (!aligned64(a) && len >= 4) { 5572 zsm_swrite32( a, svNew ); 5573 a += 4; 5574 len -= 4; 5575 tl_assert(aligned64(a)); 5576 } 5577 if (len == 0) return; 5578 5579 if (len >= 8) { 5580 tl_assert(aligned64(a)); 5581 while (len >= 8) { 5582 zsm_swrite64( a, svNew ); 5583 a += 8; 5584 len -= 8; 5585 } 5586 tl_assert(aligned64(a)); 5587 } 5588 if (len == 0) return; 5589 5590 if (len >= 4) 5591 tl_assert(aligned32(a)); 5592 if (len >= 4) { 5593 zsm_swrite32( a, svNew ); 5594 a += 4; 5595 len -= 4; 5596 } 5597 if (len == 0) return; 5598 5599 if (len >= 2) 5600 tl_assert(aligned16(a)); 5601 if (len >= 2) { 5602 zsm_swrite16( a, svNew ); 5603 a += 2; 5604 len -= 2; 5605 } 5606 if (len == 0) return; 5607 5608 if (len >= 1) { 5609 zsm_swrite08( a, svNew ); 5610 //a += 1; 5611 len -= 1; 5612 } 5613 tl_assert(len == 0); 5614 } 5615 5616 5617 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But 5618 for larger ranges, try to operate directly on the out-of-cache 5619 representation, rather than dragging lines into the cache, 5620 overwriting them, and forcing them out. This turns out to be an 5621 important performance optimisation. 5622 5623 Note that this doesn't change the filtering arrangements. The 5624 caller of zsm_sset_range needs to attend to that. */ 5625 5626 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew ) 5627 { 5628 tl_assert(svNew != SVal_INVALID); 5629 stats__cache_make_New_arange += (ULong)len; 5630 5631 if (0 && len > 500) 5632 VG_(printf)("make New ( %#lx, %ld )\n", a, len ); 5633 5634 if (0) { 5635 static UWord n_New_in_cache = 0; 5636 static UWord n_New_not_in_cache = 0; 5637 /* tag is 'a' with the in-line offset masked out, 5638 eg a[31]..a[4] 0000 */ 5639 Addr tag = a & ~(N_LINE_ARANGE - 1); 5640 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 5641 if (LIKELY(tag == cache_shmem.tags0[wix])) { 5642 n_New_in_cache++; 5643 } else { 5644 n_New_not_in_cache++; 5645 } 5646 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000)) 5647 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n", 5648 n_New_in_cache, n_New_not_in_cache ); 5649 } 5650 5651 if (LIKELY(len < 2 * N_LINE_ARANGE)) { 5652 zsm_sset_range_SMALL( a, len, svNew ); 5653 } else { 5654 Addr before_start = a; 5655 Addr aligned_start = cacheline_ROUNDUP(a); 5656 Addr after_start = cacheline_ROUNDDN(a + len); 5657 UWord before_len = aligned_start - before_start; 5658 UWord aligned_len = after_start - aligned_start; 5659 UWord after_len = a + len - after_start; 5660 tl_assert(before_start <= aligned_start); 5661 tl_assert(aligned_start <= after_start); 5662 tl_assert(before_len < N_LINE_ARANGE); 5663 tl_assert(after_len < N_LINE_ARANGE); 5664 tl_assert(get_cacheline_offset(aligned_start) == 0); 5665 if (get_cacheline_offset(a) == 0) { 5666 tl_assert(before_len == 0); 5667 tl_assert(a == aligned_start); 5668 } 5669 if (get_cacheline_offset(a+len) == 0) { 5670 tl_assert(after_len == 0); 5671 tl_assert(after_start == a+len); 5672 } 5673 if (before_len > 0) { 5674 zsm_sset_range_SMALL( before_start, before_len, svNew ); 5675 } 5676 if (after_len > 0) { 5677 zsm_sset_range_SMALL( after_start, after_len, svNew ); 5678 } 5679 stats__cache_make_New_inZrep += (ULong)aligned_len; 5680 5681 while (1) { 5682 Addr tag; 5683 UWord wix; 5684 if (aligned_start >= after_start) 5685 break; 5686 tl_assert(get_cacheline_offset(aligned_start) == 0); 5687 tag = aligned_start & ~(N_LINE_ARANGE - 1); 5688 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1); 5689 if (tag == cache_shmem.tags0[wix]) { 5690 UWord i; 5691 for (i = 0; i < N_LINE_ARANGE / 8; i++) 5692 zsm_swrite64( aligned_start + i * 8, svNew ); 5693 } else { 5694 UWord i; 5695 Word zix; 5696 SecMap* sm; 5697 LineZ* lineZ; 5698 /* This line is not in the cache. Do not force it in; instead 5699 modify it in-place. */ 5700 /* find the Z line to write in and rcdec it or the 5701 associated F line. */ 5702 find_Z_for_writing( &sm, &zix, tag ); 5703 tl_assert(sm); 5704 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES); 5705 lineZ = &sm->linesZ[zix]; 5706 lineZ->dict[0] = svNew; 5707 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 5708 for (i = 0; i < N_LINE_ARANGE/4; i++) 5709 lineZ->ix2s[i] = 0; /* all refer to dict[0] */ 5710 rcinc_LineZ(lineZ); 5711 } 5712 aligned_start += N_LINE_ARANGE; 5713 aligned_len -= N_LINE_ARANGE; 5714 } 5715 tl_assert(aligned_start == after_start); 5716 tl_assert(aligned_len == 0); 5717 } 5718 } 5719 5720 5721 ///////////////////////////////////////////////////////// 5722 // // 5723 // Front-filtering accesses // 5724 // // 5725 ///////////////////////////////////////////////////////// 5726 5727 static UWord stats__f_ac = 0; 5728 static UWord stats__f_sk = 0; 5729 5730 #if 0 5731 # define STATS__F_SHOW \ 5732 do { \ 5733 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \ 5734 VG_(printf)("filters: ac %lu sk %lu\n", \ 5735 stats__f_ac, stats__f_sk); \ 5736 } while (0) 5737 #else 5738 # define STATS__F_SHOW /* */ 5739 #endif 5740 5741 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) { 5742 stats__f_ac++; 5743 STATS__F_SHOW; 5744 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) { 5745 stats__f_sk++; 5746 return; 5747 } 5748 zsm_sapply08__msmcwrite(thr, a); 5749 } 5750 5751 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) { 5752 stats__f_ac++; 5753 STATS__F_SHOW; 5754 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) { 5755 stats__f_sk++; 5756 return; 5757 } 5758 zsm_sapply16__msmcwrite(thr, a); 5759 } 5760 5761 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) { 5762 stats__f_ac++; 5763 STATS__F_SHOW; 5764 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) { 5765 stats__f_sk++; 5766 return; 5767 } 5768 zsm_sapply32__msmcwrite(thr, a); 5769 } 5770 5771 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) { 5772 stats__f_ac++; 5773 STATS__F_SHOW; 5774 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) { 5775 stats__f_sk++; 5776 return; 5777 } 5778 zsm_sapply64__msmcwrite(thr, a); 5779 } 5780 5781 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len ) 5782 { 5783 /* fast track a couple of common cases */ 5784 if (len == 4 && aligned32(a)) { 5785 zsm_sapply32_f__msmcwrite( thr, a ); 5786 return; 5787 } 5788 if (len == 8 && aligned64(a)) { 5789 zsm_sapply64_f__msmcwrite( thr, a ); 5790 return; 5791 } 5792 5793 /* be completely general (but as efficient as possible) */ 5794 if (len == 0) return; 5795 5796 if (!aligned16(a) && len >= 1) { 5797 zsm_sapply08_f__msmcwrite( thr, a ); 5798 a += 1; 5799 len -= 1; 5800 tl_assert(aligned16(a)); 5801 } 5802 if (len == 0) return; 5803 5804 if (!aligned32(a) && len >= 2) { 5805 zsm_sapply16_f__msmcwrite( thr, a ); 5806 a += 2; 5807 len -= 2; 5808 tl_assert(aligned32(a)); 5809 } 5810 if (len == 0) return; 5811 5812 if (!aligned64(a) && len >= 4) { 5813 zsm_sapply32_f__msmcwrite( thr, a ); 5814 a += 4; 5815 len -= 4; 5816 tl_assert(aligned64(a)); 5817 } 5818 if (len == 0) return; 5819 5820 if (len >= 8) { 5821 tl_assert(aligned64(a)); 5822 while (len >= 8) { 5823 zsm_sapply64_f__msmcwrite( thr, a ); 5824 a += 8; 5825 len -= 8; 5826 } 5827 tl_assert(aligned64(a)); 5828 } 5829 if (len == 0) return; 5830 5831 if (len >= 4) 5832 tl_assert(aligned32(a)); 5833 if (len >= 4) { 5834 zsm_sapply32_f__msmcwrite( thr, a ); 5835 a += 4; 5836 len -= 4; 5837 } 5838 if (len == 0) return; 5839 5840 if (len >= 2) 5841 tl_assert(aligned16(a)); 5842 if (len >= 2) { 5843 zsm_sapply16_f__msmcwrite( thr, a ); 5844 a += 2; 5845 len -= 2; 5846 } 5847 if (len == 0) return; 5848 5849 if (len >= 1) { 5850 zsm_sapply08_f__msmcwrite( thr, a ); 5851 //a += 1; 5852 len -= 1; 5853 } 5854 tl_assert(len == 0); 5855 } 5856 5857 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) { 5858 stats__f_ac++; 5859 STATS__F_SHOW; 5860 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) { 5861 stats__f_sk++; 5862 return; 5863 } 5864 zsm_sapply08__msmcread(thr, a); 5865 } 5866 5867 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) { 5868 stats__f_ac++; 5869 STATS__F_SHOW; 5870 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) { 5871 stats__f_sk++; 5872 return; 5873 } 5874 zsm_sapply16__msmcread(thr, a); 5875 } 5876 5877 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) { 5878 stats__f_ac++; 5879 STATS__F_SHOW; 5880 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) { 5881 stats__f_sk++; 5882 return; 5883 } 5884 zsm_sapply32__msmcread(thr, a); 5885 } 5886 5887 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) { 5888 stats__f_ac++; 5889 STATS__F_SHOW; 5890 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) { 5891 stats__f_sk++; 5892 return; 5893 } 5894 zsm_sapply64__msmcread(thr, a); 5895 } 5896 5897 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len ) 5898 { 5899 /* fast track a couple of common cases */ 5900 if (len == 4 && aligned32(a)) { 5901 zsm_sapply32_f__msmcread( thr, a ); 5902 return; 5903 } 5904 if (len == 8 && aligned64(a)) { 5905 zsm_sapply64_f__msmcread( thr, a ); 5906 return; 5907 } 5908 5909 /* be completely general (but as efficient as possible) */ 5910 if (len == 0) return; 5911 5912 if (!aligned16(a) && len >= 1) { 5913 zsm_sapply08_f__msmcread( thr, a ); 5914 a += 1; 5915 len -= 1; 5916 tl_assert(aligned16(a)); 5917 } 5918 if (len == 0) return; 5919 5920 if (!aligned32(a) && len >= 2) { 5921 zsm_sapply16_f__msmcread( thr, a ); 5922 a += 2; 5923 len -= 2; 5924 tl_assert(aligned32(a)); 5925 } 5926 if (len == 0) return; 5927 5928 if (!aligned64(a) && len >= 4) { 5929 zsm_sapply32_f__msmcread( thr, a ); 5930 a += 4; 5931 len -= 4; 5932 tl_assert(aligned64(a)); 5933 } 5934 if (len == 0) return; 5935 5936 if (len >= 8) { 5937 tl_assert(aligned64(a)); 5938 while (len >= 8) { 5939 zsm_sapply64_f__msmcread( thr, a ); 5940 a += 8; 5941 len -= 8; 5942 } 5943 tl_assert(aligned64(a)); 5944 } 5945 if (len == 0) return; 5946 5947 if (len >= 4) 5948 tl_assert(aligned32(a)); 5949 if (len >= 4) { 5950 zsm_sapply32_f__msmcread( thr, a ); 5951 a += 4; 5952 len -= 4; 5953 } 5954 if (len == 0) return; 5955 5956 if (len >= 2) 5957 tl_assert(aligned16(a)); 5958 if (len >= 2) { 5959 zsm_sapply16_f__msmcread( thr, a ); 5960 a += 2; 5961 len -= 2; 5962 } 5963 if (len == 0) return; 5964 5965 if (len >= 1) { 5966 zsm_sapply08_f__msmcread( thr, a ); 5967 //a += 1; 5968 len -= 1; 5969 } 5970 tl_assert(len == 0); 5971 } 5972 5973 void libhb_Thr_resumes ( Thr* thr ) 5974 { 5975 if (0) VG_(printf)("resume %p\n", thr); 5976 tl_assert(thr); 5977 tl_assert(!thr->llexit_done); 5978 Filter__clear(thr->filter, "libhb_Thr_resumes"); 5979 /* A kludge, but .. if this thread doesn't have any marker stacks 5980 at all, get one right now. This is easier than figuring out 5981 exactly when at thread startup we can and can't take a stack 5982 snapshot. */ 5983 if (HG_(clo_history_level) == 1) { 5984 tl_assert(thr->local_Kws_n_stacks); 5985 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0) 5986 note_local_Kw_n_stack_for(thr); 5987 } 5988 } 5989 5990 5991 ///////////////////////////////////////////////////////// 5992 // // 5993 // Synchronisation objects // 5994 // // 5995 ///////////////////////////////////////////////////////// 5996 5997 /* A double linked list of all the SO's. */ 5998 SO* admin_SO = NULL; 5999 6000 static SO* SO__Alloc ( void ) 6001 { 6002 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) ); 6003 so->viR = VtsID_INVALID; 6004 so->viW = VtsID_INVALID; 6005 so->magic = SO_MAGIC; 6006 /* Add to double linked list */ 6007 if (admin_SO) { 6008 tl_assert(admin_SO->admin_prev == NULL); 6009 admin_SO->admin_prev = so; 6010 so->admin_next = admin_SO; 6011 } else { 6012 so->admin_next = NULL; 6013 } 6014 so->admin_prev = NULL; 6015 admin_SO = so; 6016 /* */ 6017 return so; 6018 } 6019 6020 static void SO__Dealloc ( SO* so ) 6021 { 6022 tl_assert(so); 6023 tl_assert(so->magic == SO_MAGIC); 6024 if (so->viR == VtsID_INVALID) { 6025 tl_assert(so->viW == VtsID_INVALID); 6026 } else { 6027 tl_assert(so->viW != VtsID_INVALID); 6028 VtsID__rcdec(so->viR); 6029 VtsID__rcdec(so->viW); 6030 } 6031 so->magic = 0; 6032 /* Del from double linked list */ 6033 if (so->admin_prev) 6034 so->admin_prev->admin_next = so->admin_next; 6035 if (so->admin_next) 6036 so->admin_next->admin_prev = so->admin_prev; 6037 if (so == admin_SO) 6038 admin_SO = so->admin_next; 6039 /* */ 6040 HG_(free)( so ); 6041 } 6042 6043 6044 ///////////////////////////////////////////////////////// 6045 // // 6046 // Top Level API // 6047 // // 6048 ///////////////////////////////////////////////////////// 6049 6050 static void show_thread_state ( HChar* str, Thr* t ) 6051 { 6052 if (1) return; 6053 if (t->viR == t->viW) { 6054 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR ); 6055 VtsID__pp( t->viR ); 6056 VG_(printf)("%s","\n"); 6057 } else { 6058 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR ); 6059 VtsID__pp( t->viR ); 6060 VG_(printf)(" viW %u==", t->viW); 6061 VtsID__pp( t->viW ); 6062 VG_(printf)("%s","\n"); 6063 } 6064 } 6065 6066 6067 Thr* libhb_init ( 6068 void (*get_stacktrace)( Thr*, Addr*, UWord ), 6069 ExeContext* (*get_EC)( Thr* ) 6070 ) 6071 { 6072 Thr* thr; 6073 VtsID vi; 6074 6075 // We will have to have to store a large number of these, 6076 // so make sure they're the size we expect them to be. 6077 tl_assert(sizeof(ScalarTS) == 8); 6078 6079 /* because first 1024 unusable */ 6080 tl_assert(SCALARTS_N_THRBITS >= 11); 6081 /* so as to fit in a UInt w/ 3 bits to spare (see defn of 6082 Thr_n_RCEC). */ 6083 tl_assert(SCALARTS_N_THRBITS <= 29); 6084 6085 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words 6086 (32-bit). It's not correctness-critical, but there are a lot of 6087 them, so it's important from a space viewpoint. Unfortunately 6088 we simply can't pack it into 2 words on a 32-bit target. */ 6089 if (sizeof(UWord) == 8) { 6090 tl_assert(sizeof(Thr_n_RCEC) == 16); 6091 } else { 6092 tl_assert(sizeof(Thr_n_RCEC) == 12); 6093 } 6094 6095 /* Word sets really are 32 bits. Even on a 64 bit target. */ 6096 tl_assert(sizeof(WordSetID) == 4); 6097 tl_assert(sizeof(WordSet) == sizeof(WordSetID)); 6098 6099 tl_assert(get_stacktrace); 6100 tl_assert(get_EC); 6101 main_get_stacktrace = get_stacktrace; 6102 main_get_EC = get_EC; 6103 6104 // No need to initialise hg_wordfm. 6105 // No need to initialise hg_wordset. 6106 6107 /* Allocated once and never deallocated. Used as a temporary in 6108 VTS singleton, tick and join operations. */ 6109 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID ); 6110 temp_max_sized_VTS->id = VtsID_INVALID; 6111 verydead_thread_table_init(); 6112 vts_set_init(); 6113 vts_tab_init(); 6114 event_map_init(); 6115 VtsID__invalidate_caches(); 6116 6117 // initialise shadow memory 6118 zsm_init( SVal__rcinc, SVal__rcdec ); 6119 6120 thr = Thr__new(); 6121 vi = VtsID__mk_Singleton( thr, 1 ); 6122 thr->viR = vi; 6123 thr->viW = vi; 6124 VtsID__rcinc(thr->viR); 6125 VtsID__rcinc(thr->viW); 6126 6127 show_thread_state(" root", thr); 6128 return thr; 6129 } 6130 6131 6132 Thr* libhb_create ( Thr* parent ) 6133 { 6134 /* The child's VTSs are copies of the parent's VTSs, but ticked at 6135 the child's index. Since the child's index is guaranteed 6136 unique, it has never been seen before, so the implicit value 6137 before the tick is zero and after that is one. */ 6138 Thr* child = Thr__new(); 6139 6140 child->viR = VtsID__tick( parent->viR, child ); 6141 child->viW = VtsID__tick( parent->viW, child ); 6142 Filter__clear(child->filter, "libhb_create(child)"); 6143 VtsID__rcinc(child->viR); 6144 VtsID__rcinc(child->viW); 6145 /* We need to do note_local_Kw_n_stack_for( child ), but it's too 6146 early for that - it may not have a valid TId yet. So, let 6147 libhb_Thr_resumes pick it up the first time the thread runs. */ 6148 6149 tl_assert(VtsID__indexAt( child->viR, child ) == 1); 6150 tl_assert(VtsID__indexAt( child->viW, child ) == 1); 6151 6152 /* and the parent has to move along too */ 6153 VtsID__rcdec(parent->viR); 6154 VtsID__rcdec(parent->viW); 6155 parent->viR = VtsID__tick( parent->viR, parent ); 6156 parent->viW = VtsID__tick( parent->viW, parent ); 6157 Filter__clear(parent->filter, "libhb_create(parent)"); 6158 VtsID__rcinc(parent->viR); 6159 VtsID__rcinc(parent->viW); 6160 note_local_Kw_n_stack_for( parent ); 6161 6162 show_thread_state(" child", child); 6163 show_thread_state("parent", parent); 6164 6165 return child; 6166 } 6167 6168 /* Shut down the library, and print stats (in fact that's _all_ 6169 this is for. */ 6170 void libhb_shutdown ( Bool show_stats ) 6171 { 6172 if (show_stats) { 6173 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n"); 6174 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n", 6175 stats__secmaps_allocd, 6176 stats__secmap_ga_space_covered); 6177 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n", 6178 stats__secmap_linesZ_allocd, 6179 stats__secmap_linesZ_bytes); 6180 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n", 6181 stats__secmap_linesF_allocd, 6182 stats__secmap_linesF_bytes); 6183 VG_(printf)(" secmaps: %'10lu iterator steppings\n", 6184 stats__secmap_iterator_steppings); 6185 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n", 6186 stats__secmaps_search, stats__secmaps_search_slow); 6187 6188 VG_(printf)("%s","\n"); 6189 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n", 6190 stats__cache_totrefs, stats__cache_totmisses ); 6191 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n", 6192 stats__cache_Z_fetches, stats__cache_F_fetches ); 6193 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n", 6194 stats__cache_Z_wbacks, stats__cache_F_wbacks ); 6195 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n", 6196 stats__cache_invals, stats__cache_flushes ); 6197 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n", 6198 stats__cache_make_New_arange, 6199 stats__cache_make_New_inZrep); 6200 6201 VG_(printf)("%s","\n"); 6202 VG_(printf)(" cline: %'10lu normalises\n", 6203 stats__cline_normalises ); 6204 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n", 6205 stats__cline_cread64s, 6206 stats__cline_cread32s, 6207 stats__cline_cread16s, 6208 stats__cline_cread08s ); 6209 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n", 6210 stats__cline_cwrite64s, 6211 stats__cline_cwrite32s, 6212 stats__cline_cwrite16s, 6213 stats__cline_cwrite08s ); 6214 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n", 6215 stats__cline_swrite64s, 6216 stats__cline_swrite32s, 6217 stats__cline_swrite16s, 6218 stats__cline_swrite08s ); 6219 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n", 6220 stats__cline_sread08s, stats__cline_scopy08s ); 6221 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n", 6222 stats__cline_64to32splits, 6223 stats__cline_32to16splits, 6224 stats__cline_16to8splits ); 6225 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n", 6226 stats__cline_64to32pulldown, 6227 stats__cline_32to16pulldown, 6228 stats__cline_16to8pulldown ); 6229 if (0) 6230 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n", 6231 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE); 6232 6233 VG_(printf)("%s","\n"); 6234 6235 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n", 6236 stats__msmcread, stats__msmcread_change); 6237 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n", 6238 stats__msmcwrite, stats__msmcwrite_change); 6239 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n", 6240 stats__cmpLEQ_queries, stats__cmpLEQ_misses); 6241 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n", 6242 stats__join2_queries, stats__join2_misses); 6243 6244 VG_(printf)("%s","\n"); 6245 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n", 6246 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ ); 6247 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n", 6248 stats__vts__cmp_structural, stats__vts__cmp_structural_slow ); 6249 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n", 6250 stats__vts_set__focaa, stats__vts_set__focaa_a ); 6251 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n", 6252 stats__vts__indexat_slow ); 6253 6254 VG_(printf)("%s","\n"); 6255 VG_(printf)( 6256 " libhb: %ld entries in vts_table (approximately %lu bytes)\n", 6257 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE) 6258 ); 6259 VG_(printf)( " libhb: %lu entries in vts_set\n", 6260 VG_(sizeFM)( vts_set ) ); 6261 6262 VG_(printf)("%s","\n"); 6263 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n", 6264 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq, 6265 stats__ctxt_rcdec2, 6266 stats__ctxt_rcdec3 ); 6267 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n", 6268 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards); 6269 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n", 6270 (UWord)N_RCEC_TAB, 6271 stats__ctxt_tab_curr ); 6272 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n", 6273 stats__ctxt_tab_qs, 6274 stats__ctxt_tab_cmps ); 6275 #if 0 6276 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode)); 6277 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag)); 6278 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord)); 6279 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine)); 6280 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ)); 6281 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF)); 6282 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap)); 6283 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache)); 6284 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt)); 6285 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal)); 6286 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS)); 6287 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS)); 6288 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE)); 6289 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo)); 6290 6291 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray)); 6292 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM)); 6293 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr)); 6294 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO)); 6295 #endif 6296 6297 VG_(printf)("%s","<<< END libhb stats >>>\n"); 6298 VG_(printf)("%s","\n"); 6299 6300 } 6301 } 6302 6303 /* Receive notification that a thread has low level exited. The 6304 significance here is that we do not expect to see any more memory 6305 references from it. */ 6306 void libhb_async_exit ( Thr* thr ) 6307 { 6308 tl_assert(thr); 6309 tl_assert(!thr->llexit_done); 6310 thr->llexit_done = True; 6311 6312 /* free up Filter and local_Kws_n_stacks (well, actually not the 6313 latter ..) */ 6314 tl_assert(thr->filter); 6315 HG_(free)(thr->filter); 6316 thr->filter = NULL; 6317 6318 /* Tell the VTS mechanism this thread has exited, so it can 6319 participate in VTS pruning. Note this can only happen if the 6320 thread has both ll_exited and has been joined with. */ 6321 if (thr->joinedwith_done) 6322 VTS__declare_thread_very_dead(thr); 6323 6324 /* Another space-accuracy tradeoff. Do we want to be able to show 6325 H1 history for conflicts in threads which have since exited? If 6326 yes, then we better not free up thr->local_Kws_n_stacks. The 6327 downside is a potential per-thread leak of up to 6328 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the 6329 XArray average overcommit factor is (1.5 I'd guess). */ 6330 // hence: 6331 // VG_(deleteXA)(thr->local_Kws_n_stacks); 6332 // thr->local_Kws_n_stacks = NULL; 6333 } 6334 6335 /* Receive notification that a thread has been joined with. The 6336 significance here is that we do not expect to see any further 6337 references to its vector clocks (Thr::viR and Thr::viW). */ 6338 void libhb_joinedwith_done ( Thr* thr ) 6339 { 6340 tl_assert(thr); 6341 /* Caller must ensure that this is only ever called once per Thr. */ 6342 tl_assert(!thr->joinedwith_done); 6343 thr->joinedwith_done = True; 6344 if (thr->llexit_done) 6345 VTS__declare_thread_very_dead(thr); 6346 } 6347 6348 6349 /* Both Segs and SOs point to VTSs. However, there is no sharing, so 6350 a Seg that points at a VTS is its one-and-only owner, and ditto for 6351 a SO that points at a VTS. */ 6352 6353 SO* libhb_so_alloc ( void ) 6354 { 6355 return SO__Alloc(); 6356 } 6357 6358 void libhb_so_dealloc ( SO* so ) 6359 { 6360 tl_assert(so); 6361 tl_assert(so->magic == SO_MAGIC); 6362 SO__Dealloc(so); 6363 } 6364 6365 /* See comments in libhb.h for details on the meaning of 6366 strong vs weak sends and strong vs weak receives. */ 6367 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send ) 6368 { 6369 /* Copy the VTSs from 'thr' into the sync object, and then move 6370 the thread along one step. */ 6371 6372 tl_assert(so); 6373 tl_assert(so->magic == SO_MAGIC); 6374 6375 /* stay sane .. a thread's read-clock must always lead or be the 6376 same as its write-clock */ 6377 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR); 6378 tl_assert(leq); 6379 } 6380 6381 /* since we're overwriting the VtsIDs in the SO, we need to drop 6382 any references made by the previous contents thereof */ 6383 if (so->viR == VtsID_INVALID) { 6384 tl_assert(so->viW == VtsID_INVALID); 6385 so->viR = thr->viR; 6386 so->viW = thr->viW; 6387 VtsID__rcinc(so->viR); 6388 VtsID__rcinc(so->viW); 6389 } else { 6390 /* In a strong send, we dump any previous VC in the SO and 6391 install the sending thread's VC instead. For a weak send we 6392 must join2 with what's already there. */ 6393 tl_assert(so->viW != VtsID_INVALID); 6394 VtsID__rcdec(so->viR); 6395 VtsID__rcdec(so->viW); 6396 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR ); 6397 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW ); 6398 VtsID__rcinc(so->viR); 6399 VtsID__rcinc(so->viW); 6400 } 6401 6402 /* move both parent clocks along */ 6403 VtsID__rcdec(thr->viR); 6404 VtsID__rcdec(thr->viW); 6405 thr->viR = VtsID__tick( thr->viR, thr ); 6406 thr->viW = VtsID__tick( thr->viW, thr ); 6407 if (!thr->llexit_done) { 6408 Filter__clear(thr->filter, "libhb_so_send"); 6409 note_local_Kw_n_stack_for(thr); 6410 } 6411 VtsID__rcinc(thr->viR); 6412 VtsID__rcinc(thr->viW); 6413 6414 if (strong_send) 6415 show_thread_state("s-send", thr); 6416 else 6417 show_thread_state("w-send", thr); 6418 } 6419 6420 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv ) 6421 { 6422 tl_assert(so); 6423 tl_assert(so->magic == SO_MAGIC); 6424 6425 if (so->viR != VtsID_INVALID) { 6426 tl_assert(so->viW != VtsID_INVALID); 6427 6428 /* Weak receive (basically, an R-acquisition of a R-W lock). 6429 This advances the read-clock of the receiver, but not the 6430 write-clock. */ 6431 VtsID__rcdec(thr->viR); 6432 thr->viR = VtsID__join2( thr->viR, so->viR ); 6433 VtsID__rcinc(thr->viR); 6434 6435 /* At one point (r10589) it seemed safest to tick the clocks for 6436 the receiving thread after the join. But on reflection, I 6437 wonder if that might cause it to 'overtake' constraints, 6438 which could lead to missing races. So, back out that part of 6439 r10589. */ 6440 //VtsID__rcdec(thr->viR); 6441 //thr->viR = VtsID__tick( thr->viR, thr ); 6442 //VtsID__rcinc(thr->viR); 6443 6444 /* For a strong receive, we also advance the receiver's write 6445 clock, which means the receive as a whole is essentially 6446 equivalent to a W-acquisition of a R-W lock. */ 6447 if (strong_recv) { 6448 VtsID__rcdec(thr->viW); 6449 thr->viW = VtsID__join2( thr->viW, so->viW ); 6450 VtsID__rcinc(thr->viW); 6451 6452 /* See comment just above, re r10589. */ 6453 //VtsID__rcdec(thr->viW); 6454 //thr->viW = VtsID__tick( thr->viW, thr ); 6455 //VtsID__rcinc(thr->viW); 6456 } 6457 6458 if (thr->filter) 6459 Filter__clear(thr->filter, "libhb_so_recv"); 6460 note_local_Kw_n_stack_for(thr); 6461 6462 if (strong_recv) 6463 show_thread_state("s-recv", thr); 6464 else 6465 show_thread_state("w-recv", thr); 6466 6467 } else { 6468 tl_assert(so->viW == VtsID_INVALID); 6469 /* Deal with degenerate case: 'so' has no vts, so there has been 6470 no message posted to it. Just ignore this case. */ 6471 show_thread_state("d-recv", thr); 6472 } 6473 } 6474 6475 Bool libhb_so_everSent ( SO* so ) 6476 { 6477 if (so->viR == VtsID_INVALID) { 6478 tl_assert(so->viW == VtsID_INVALID); 6479 return False; 6480 } else { 6481 tl_assert(so->viW != VtsID_INVALID); 6482 return True; 6483 } 6484 } 6485 6486 #define XXX1 0 // 0x67a106c 6487 #define XXX2 0 6488 6489 static inline Bool TRACEME(Addr a, SizeT szB) { 6490 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True; 6491 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True; 6492 return False; 6493 } 6494 static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) { 6495 SVal sv = zsm_sread08(a); 6496 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv); 6497 show_thread_state("", thr); 6498 VG_(printf)("%s","\n"); 6499 } 6500 6501 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB ) 6502 { 6503 SVal sv = SVal__mkC(thr->viW, thr->viW); 6504 tl_assert(is_sane_SVal_C(sv)); 6505 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before"); 6506 zsm_sset_range( a, szB, sv ); 6507 Filter__clear_range( thr->filter, a, szB ); 6508 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after "); 6509 } 6510 6511 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB ) 6512 { 6513 /* do nothing */ 6514 } 6515 6516 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB ) 6517 { 6518 /* This really does put the requested range in NoAccess. It's 6519 expensive though. */ 6520 SVal sv = SVal_NOACCESS; 6521 tl_assert(is_sane_SVal_C(sv)); 6522 zsm_sset_range( a, szB, sv ); 6523 Filter__clear_range( thr->filter, a, szB ); 6524 } 6525 6526 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB ) 6527 { 6528 SVal sv = SVal_NOACCESS; 6529 tl_assert(is_sane_SVal_C(sv)); 6530 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before"); 6531 zsm_sset_range( a, szB, sv ); 6532 Filter__clear_range( thr->filter, a, szB ); 6533 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after "); 6534 } 6535 6536 Thread* libhb_get_Thr_hgthread ( Thr* thr ) { 6537 tl_assert(thr); 6538 return thr->hgthread; 6539 } 6540 6541 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) { 6542 tl_assert(thr); 6543 thr->hgthread = hgthread; 6544 } 6545 6546 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len ) 6547 { 6548 zsm_scopy_range(src, dst, len); 6549 Filter__clear_range( thr->filter, dst, len ); 6550 } 6551 6552 void libhb_maybe_GC ( void ) 6553 { 6554 event_map_maybe_GC(); 6555 /* If there are still freelist entries available, no need for a 6556 GC. */ 6557 if (vts_tab_freelist != VtsID_INVALID) 6558 return; 6559 /* So all the table entries are full, and we're having to expand 6560 the table. But did we hit the threshhold point yet? */ 6561 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at) 6562 return; 6563 vts_tab__do_GC( False/*don't show stats*/ ); 6564 } 6565 6566 6567 ///////////////////////////////////////////////////////////////// 6568 ///////////////////////////////////////////////////////////////// 6569 // // 6570 // SECTION END main library // 6571 // // 6572 ///////////////////////////////////////////////////////////////// 6573 ///////////////////////////////////////////////////////////////// 6574 6575 /*--------------------------------------------------------------------*/ 6576 /*--- end libhb_main.c ---*/ 6577 /*--------------------------------------------------------------------*/ 6578