1 2 /*--------------------------------------------------------------------*/ 3 /*--- LibHB: a library for implementing and checking ---*/ 4 /*--- the happens-before relationship in concurrent programs. ---*/ 5 /*--- libhb_main.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of LibHB, a library for implementing and checking 10 the happens-before relationship in concurrent programs. 11 12 Copyright (C) 2008-2017 OpenWorks Ltd 13 info (at) open-works.co.uk 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 #include "pub_tool_basics.h" 34 #include "pub_tool_poolalloc.h" 35 #include "pub_tool_libcassert.h" 36 #include "pub_tool_libcbase.h" 37 #include "pub_tool_libcprint.h" 38 #include "pub_tool_mallocfree.h" 39 #include "pub_tool_wordfm.h" 40 #include "pub_tool_hashtable.h" 41 #include "pub_tool_xarray.h" 42 #include "pub_tool_oset.h" 43 #include "pub_tool_threadstate.h" 44 #include "pub_tool_aspacemgr.h" 45 #include "pub_tool_stacktrace.h" 46 #include "pub_tool_execontext.h" 47 #include "pub_tool_errormgr.h" 48 #include "pub_tool_options.h" // VG_(clo_stats) 49 #include "hg_basics.h" 50 #include "hg_wordset.h" 51 #include "hg_lock_n_thread.h" 52 #include "hg_errors.h" 53 54 #include "libhb.h" 55 56 57 ///////////////////////////////////////////////////////////////// 58 ///////////////////////////////////////////////////////////////// 59 // // 60 // Debugging #defines // 61 // // 62 ///////////////////////////////////////////////////////////////// 63 ///////////////////////////////////////////////////////////////// 64 65 /* Check the sanity of shadow values in the core memory state 66 machine. Change #if 0 to #if 1 to enable this. */ 67 #if 0 68 # define CHECK_MSM 1 69 #else 70 # define CHECK_MSM 0 71 #endif 72 73 74 /* Check sanity (reference counts, etc) in the conflicting access 75 machinery. Change #if 0 to #if 1 to enable this. */ 76 #if 0 77 # define CHECK_CEM 1 78 #else 79 # define CHECK_CEM 0 80 #endif 81 82 83 /* Check sanity in the compressed shadow memory machinery, 84 particularly in its caching innards. Unfortunately there's no 85 almost-zero-cost way to make them selectable at run time. Hence 86 set the #if 0 to #if 1 and rebuild if you want them. */ 87 #if 0 88 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */ 89 # define inline __attribute__((noinline)) 90 /* probably want to ditch -fomit-frame-pointer too */ 91 #else 92 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */ 93 #endif 94 95 96 ///////////////////////////////////////////////////////////////// 97 ///////////////////////////////////////////////////////////////// 98 // // 99 // data decls: VtsID // 100 // // 101 ///////////////////////////////////////////////////////////////// 102 ///////////////////////////////////////////////////////////////// 103 104 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30 105 bits, since they have to be packed into the lowest 30 bits of an 106 SVal. */ 107 typedef UInt VtsID; 108 #define VtsID_INVALID 0xFFFFFFFF 109 110 111 112 ///////////////////////////////////////////////////////////////// 113 ///////////////////////////////////////////////////////////////// 114 // // 115 // data decls: SVal // 116 // // 117 ///////////////////////////////////////////////////////////////// 118 ///////////////////////////////////////////////////////////////// 119 120 typedef ULong SVal; 121 122 /* This value has special significance to the implementation, and callers 123 may not store it in the shadow memory. */ 124 #define SVal_INVALID (3ULL << 62) 125 126 /* This is the default value for shadow memory. Initially the shadow 127 memory contains no accessible areas and so all reads produce this 128 value. TODO: make this caller-defineable. */ 129 #define SVal_NOACCESS (2ULL << 62) 130 131 132 133 ///////////////////////////////////////////////////////////////// 134 ///////////////////////////////////////////////////////////////// 135 // // 136 // data decls: ScalarTS // 137 // // 138 ///////////////////////////////////////////////////////////////// 139 ///////////////////////////////////////////////////////////////// 140 141 /* Scalar Timestamp. We have to store a lot of these, so there is 142 some effort to make them as small as possible. Logically they are 143 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target. 144 We pack it into 64 bits by representing the Thr* using a ThrID, a 145 small integer (18 bits), and a 46 bit integer for the timestamp 146 number. The 46/18 split is arbitrary, but has the effect that 147 Helgrind can only handle programs that create 2^18 or fewer threads 148 over their entire lifetime, and have no more than 2^46 timestamp 149 ticks (synchronisation operations on the same thread). 150 151 This doesn't seem like much of a limitation. 2^46 ticks is 152 7.06e+13, and if each tick (optimistically) takes the machine 1000 153 cycles to process, then the minimum time to process that many ticks 154 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing 155 but VTS ticks, which isn't realistic. 156 157 NB1: SCALARTS_N_THRBITS must be 27 or lower. The obvious limit is 158 32 since a ThrID is a UInt. 27 comes from the fact that 159 'Thr_n_RCEC', which records information about old accesses, packs 160 in tsw not only a ThrID but also minimum 4+1 other bits (access size 161 and writeness) in a UInt, hence limiting size to 32-(4+1) == 27. 162 163 NB2: thrid values are issued upwards from 1024, and values less 164 than that aren't valid. This isn't per se necessary (any order 165 will do, so long as they are unique), but it does help ensure they 166 are less likely to get confused with the various other kinds of 167 small-integer thread ids drifting around (eg, TId). 168 So, SCALARTS_N_THRBITS must be 11 or more. 169 See also NB5. 170 171 NB3: this probably also relies on the fact that Thr's are never 172 deallocated -- they exist forever. Hence the 1-1 mapping from 173 Thr's to thrid values (set up in Thr__new) persists forever. 174 175 NB4: temp_max_sized_VTS is allocated at startup and never freed. 176 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS) 177 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without 178 making the memory use for this go sky-high. With 179 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems 180 like an OK tradeoff. If more than 256k threads need to be 181 supported, we could change SCALARTS_N_THRBITS to 20, which would 182 facilitate supporting 1 million threads at the cost of 8MB storage 183 for temp_max_sized_VTS. 184 185 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses 186 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0 187 must never be a valid ThrID. Given NB2 that's OK. 188 */ 189 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 27 inclusive, 190 See NB1 and NB2 above. */ 191 192 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS) 193 typedef 194 struct { 195 ThrID thrid : SCALARTS_N_THRBITS; 196 ULong tym : SCALARTS_N_TYMBITS; 197 } 198 ScalarTS; 199 200 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1) 201 202 203 204 ///////////////////////////////////////////////////////////////// 205 ///////////////////////////////////////////////////////////////// 206 // // 207 // data decls: Filter // 208 // // 209 ///////////////////////////////////////////////////////////////// 210 ///////////////////////////////////////////////////////////////// 211 212 // baseline: 5, 9 213 #define FI_LINE_SZB_LOG2 5 214 #define FI_NUM_LINES_LOG2 10 215 216 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2) 217 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2) 218 219 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1)) 220 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK) 221 222 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \ 223 & (Addr)(FI_NUM_LINES-1) ) 224 225 226 /* In the lines, each 8 bytes are treated individually, and are mapped 227 to a UShort. Regardless of endianness of the underlying machine, 228 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to 229 the highest address. 230 231 Of each bit pair, the higher numbered bit is set if a R has been 232 seen, so the actual layout is: 233 234 15 14 ... 01 00 235 236 R W for addr+7 ... R W for addr+0 237 238 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555. 239 */ 240 241 /* tags are separated from lines. tags are Addrs and are 242 the base address of the line. */ 243 typedef 244 struct { 245 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */ 246 } 247 FiLine; 248 249 typedef 250 struct { 251 Addr tags[FI_NUM_LINES]; 252 FiLine lines[FI_NUM_LINES]; 253 } 254 Filter; 255 256 257 258 ///////////////////////////////////////////////////////////////// 259 ///////////////////////////////////////////////////////////////// 260 // // 261 // data decls: Thr, ULong_n_EC // 262 // // 263 ///////////////////////////////////////////////////////////////// 264 ///////////////////////////////////////////////////////////////// 265 266 // Records stacks for H1 history mechanism (DRD-style) 267 typedef 268 struct { ULong ull; ExeContext* ec; } 269 ULong_n_EC; 270 271 272 /* How many of the above records to collect for each thread? Older 273 ones are dumped when we run out of space. 62.5k requires 1MB per 274 thread, since each ULong_n_EC record is 16 bytes long. When more 275 than N_KWs_N_STACKs_PER_THREAD are present, the older half are 276 deleted to make space. Hence in the worst case we will be able to 277 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2 278 Kw transitions (segments in this thread). For the current setting 279 that gives a guaranteed stack for at least the last 31.25k 280 segments. */ 281 #define N_KWs_N_STACKs_PER_THREAD 62500 282 283 284 struct _Thr { 285 /* Current VTSs for this thread. They change as we go along. viR 286 is the VTS to be used for reads, viW for writes. Usually they 287 are the same, but can differ when we deal with reader-writer 288 locks. It is always the case that 289 VtsID__cmpLEQ(viW,viR) == True 290 that is, viW must be the same, or lagging behind, viR. */ 291 VtsID viR; 292 VtsID viW; 293 294 /* Is initially False, and is set to True after the thread really 295 has done a low-level exit. When True, we expect to never see 296 any more memory references done by this thread. */ 297 Bool llexit_done; 298 299 /* Is initially False, and is set to True after the thread has been 300 joined with (reaped by some other thread). After this point, we 301 do not expect to see any uses of .viR or .viW, so it is safe to 302 set them to VtsID_INVALID. */ 303 Bool joinedwith_done; 304 305 /* A small integer giving a unique identity to this Thr. See 306 comments on the definition of ScalarTS for details. */ 307 ThrID thrid : SCALARTS_N_THRBITS; 308 309 /* A filter that removes references for which we believe that 310 msmcread/msmcwrite will not change the state, nor report a 311 race. */ 312 Filter* filter; 313 314 /* A pointer back to the top level Thread structure. There is a 315 1-1 mapping between Thread and Thr structures -- each Thr points 316 at its corresponding Thread, and vice versa. Really, Thr and 317 Thread should be merged into a single structure. */ 318 Thread* hgthread; 319 320 /* The ULongs (scalar Kws) in this accumulate in strictly 321 increasing order, without duplicates. This is important because 322 we need to be able to find a given scalar Kw in this array 323 later, by binary search. */ 324 XArray* /* ULong_n_EC */ local_Kws_n_stacks; 325 }; 326 327 328 329 ///////////////////////////////////////////////////////////////// 330 ///////////////////////////////////////////////////////////////// 331 // // 332 // data decls: SO // 333 // // 334 ///////////////////////////////////////////////////////////////// 335 ///////////////////////////////////////////////////////////////// 336 337 // (UInt) `echo "Synchronisation object" | md5sum` 338 #define SO_MAGIC 0x56b3c5b0U 339 340 struct _SO { 341 struct _SO* admin_prev; 342 struct _SO* admin_next; 343 VtsID viR; /* r-clock of sender */ 344 VtsID viW; /* w-clock of sender */ 345 UInt magic; 346 }; 347 348 349 350 ///////////////////////////////////////////////////////////////// 351 ///////////////////////////////////////////////////////////////// 352 // // 353 // Forward declarations // 354 // // 355 ///////////////////////////////////////////////////////////////// 356 ///////////////////////////////////////////////////////////////// 357 358 /* fwds for 359 Globals needed by other parts of the library. These are set 360 once at startup and then never changed. */ 361 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL; 362 static ExeContext* (*main_get_EC)( Thr* ) = NULL; 363 364 /* misc fn and data fwdses */ 365 static void VtsID__rcinc ( VtsID ii ); 366 static void VtsID__rcdec ( VtsID ii ); 367 368 static inline Bool SVal__isC ( SVal s ); 369 static inline VtsID SVal__unC_Rmin ( SVal s ); 370 static inline VtsID SVal__unC_Wmin ( SVal s ); 371 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ); 372 static inline void SVal__rcinc ( SVal s ); 373 static inline void SVal__rcdec ( SVal s ); 374 /* SVal in LineZ are used to store various pointers. */ 375 static inline void *SVal2Ptr (SVal s); 376 static inline SVal Ptr2SVal (void* ptr); 377 378 /* A double linked list of all the SO's. */ 379 SO* admin_SO; 380 381 382 383 ///////////////////////////////////////////////////////////////// 384 ///////////////////////////////////////////////////////////////// 385 // // 386 // SECTION BEGIN compressed shadow memory // 387 // // 388 ///////////////////////////////////////////////////////////////// 389 ///////////////////////////////////////////////////////////////// 390 391 #ifndef __HB_ZSM_H 392 #define __HB_ZSM_H 393 394 /* Initialise the library. Once initialised, it will (or may) call 395 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to 396 allow the user to do reference counting on the SVals stored herein. 397 It is important to understand, however, that due to internal 398 caching, the reference counts are in general inaccurate, and can be 399 both above or below the true reference count for an item. In 400 particular, the library may indicate that the reference count for 401 an item is zero, when in fact it is not. 402 403 To make the reference counting exact and therefore non-pointless, 404 call zsm_flush_cache. Immediately after it returns, the reference 405 counts for all items, as deduced by the caller by observing calls 406 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a 407 zero reference count may be freed (or at least considered to be 408 unreferenced by this library). 409 */ 410 static void zsm_init ( void ); 411 412 static void zsm_sset_range ( Addr, SizeT, SVal ); 413 static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew ); 414 static void zsm_scopy_range ( Addr, Addr, SizeT ); 415 static void zsm_flush_cache ( void ); 416 417 #endif /* ! __HB_ZSM_H */ 418 419 420 /* Round a up to the next multiple of N. N must be a power of 2 */ 421 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1)) 422 /* Round a down to the next multiple of N. N must be a power of 2 */ 423 #define ROUNDDN(a, N) ((a) & ~(N-1)) 424 425 /* True if a belongs in range [start, start + szB[ 426 (i.e. start + szB is excluded). */ 427 static inline Bool address_in_range (Addr a, Addr start, SizeT szB) 428 { 429 /* Checking start <= a && a < start + szB. 430 As start and a are unsigned addresses, the condition can 431 be simplified. */ 432 if (CHECK_ZSM) 433 tl_assert ((a - start < szB) 434 == (start <= a 435 && a < start + szB)); 436 return a - start < szB; 437 } 438 439 /* ------ CacheLine ------ */ 440 441 #define N_LINE_BITS 6 /* must be >= 3 */ 442 #define N_LINE_ARANGE (1 << N_LINE_BITS) 443 #define N_LINE_TREES (N_LINE_ARANGE >> 3) 444 445 typedef 446 struct { 447 UShort descrs[N_LINE_TREES]; 448 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8 449 } 450 CacheLine; 451 452 #define TREE_DESCR_16_0 (1<<0) 453 #define TREE_DESCR_32_0 (1<<1) 454 #define TREE_DESCR_16_1 (1<<2) 455 #define TREE_DESCR_64 (1<<3) 456 #define TREE_DESCR_16_2 (1<<4) 457 #define TREE_DESCR_32_1 (1<<5) 458 #define TREE_DESCR_16_3 (1<<6) 459 #define TREE_DESCR_8_0 (1<<7) 460 #define TREE_DESCR_8_1 (1<<8) 461 #define TREE_DESCR_8_2 (1<<9) 462 #define TREE_DESCR_8_3 (1<<10) 463 #define TREE_DESCR_8_4 (1<<11) 464 #define TREE_DESCR_8_5 (1<<12) 465 #define TREE_DESCR_8_6 (1<<13) 466 #define TREE_DESCR_8_7 (1<<14) 467 #define TREE_DESCR_DTY (1<<15) 468 469 typedef 470 struct { 471 SVal dict[4]; /* can represent up to 4 diff values in the line */ 472 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit 473 dict indexes */ 474 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the 475 LineF to use, and dict[2..] are also SVal_INVALID. */ 476 } 477 LineZ; /* compressed rep for a cache line */ 478 479 /* LineZ.dict[1] is used to store various pointers: 480 * In the first lineZ of a free SecMap, it points to the next free SecMap. 481 * In a lineZ for which we need to use a lineF, it points to the lineF. */ 482 483 484 typedef 485 struct { 486 SVal w64s[N_LINE_ARANGE]; 487 } 488 LineF; /* full rep for a cache line */ 489 490 /* We use a pool allocator for LineF, as LineF is relatively small, 491 and we will often alloc/release such lines. */ 492 static PoolAlloc* LineF_pool_allocator; 493 494 /* SVal in a lineZ are used to store various pointers. 495 Below are conversion functions to support that. */ 496 static inline LineF *LineF_Ptr (LineZ *lineZ) 497 { 498 tl_assert(lineZ->dict[0] == SVal_INVALID); 499 return SVal2Ptr (lineZ->dict[1]); 500 } 501 502 /* Shadow memory. 503 Primary map is a WordFM Addr SecMap*. 504 SecMaps cover some page-size-ish section of address space and hold 505 a compressed representation. 506 CacheLine-sized chunks of SecMaps are copied into a Cache, being 507 decompressed when moved into the cache and recompressed on the 508 way out. Because of this, the cache must operate as a writeback 509 cache, not a writethrough one. 510 511 Each SecMap must hold a power-of-2 number of CacheLines. Hence 512 N_SECMAP_BITS must >= N_LINE_BITS. 513 */ 514 #define N_SECMAP_BITS 13 515 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS) 516 517 // # CacheLines held by a SecMap 518 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE) 519 520 /* The data in the SecMap is held in the array of LineZs. Each LineZ 521 either carries the required data directly, in a compressed 522 representation, or it holds (in .dict[1]) a pointer to a LineF 523 that holds the full representation. 524 525 As each in-use LineF is referred to by exactly one LineZ, 526 the number of .linesZ[] that refer to a lineF should equal 527 the number of used lineF. 528 529 RC obligations: the RCs presented to the user include exactly 530 the values in: 531 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID 532 * F reps that are in use 533 534 Hence the following actions at the following transitions are required: 535 536 F rep: alloc'd -> freed -- rcdec_LineF 537 F rep: -> alloc'd -- rcinc_LineF 538 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ 539 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ 540 */ 541 542 typedef 543 struct { 544 UInt magic; 545 LineZ linesZ[N_SECMAP_ZLINES]; 546 } 547 SecMap; 548 549 #define SecMap_MAGIC 0x571e58cbU 550 551 // (UInt) `echo "Free SecMap" | md5sum` 552 #define SecMap_free_MAGIC 0x5a977f30U 553 554 __attribute__((unused)) 555 static inline Bool is_sane_SecMap ( SecMap* sm ) { 556 return sm != NULL && sm->magic == SecMap_MAGIC; 557 } 558 559 /* ------ Cache ------ */ 560 561 #define N_WAY_BITS 16 562 #define N_WAY_NENT (1 << N_WAY_BITS) 563 564 /* Each tag is the address of the associated CacheLine, rounded down 565 to a CacheLine address boundary. A CacheLine size must be a power 566 of 2 and must be 8 or more. Hence an easy way to initialise the 567 cache so it is empty is to set all the tag values to any value % 8 568 != 0, eg 1. This means all queries in the cache initially miss. 569 It does however require us to detect and not writeback, any line 570 with a bogus tag. */ 571 typedef 572 struct { 573 CacheLine lyns0[N_WAY_NENT]; 574 Addr tags0[N_WAY_NENT]; 575 } 576 Cache; 577 578 static inline Bool is_valid_scache_tag ( Addr tag ) { 579 /* a valid tag should be naturally aligned to the start of 580 a CacheLine. */ 581 return 0 == (tag & (N_LINE_ARANGE - 1)); 582 } 583 584 585 /* --------- Primary data structures --------- */ 586 587 /* Shadow memory primary map */ 588 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */ 589 static Cache cache_shmem; 590 591 592 static UWord stats__secmaps_search = 0; // # SM finds 593 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs 594 static UWord stats__secmaps_allocd = 0; // # SecMaps issued 595 static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live' 596 static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done. 597 static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan 598 static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess 599 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered 600 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued 601 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage 602 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched 603 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back 604 static UWord stats__cache_F_fetches = 0; // # F lines fetched 605 static UWord stats__cache_F_wbacks = 0; // # F lines written back 606 static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals 607 static UWord stats__cache_totrefs = 0; // # total accesses 608 static UWord stats__cache_totmisses = 0; // # misses 609 static ULong stats__cache_make_New_arange = 0; // total arange made New 610 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps 611 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise 612 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64 613 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32 614 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16 615 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8 616 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64 617 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32 618 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16 619 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8 620 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8 621 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8 622 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8 623 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8 624 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8 625 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8 626 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split 627 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split 628 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split 629 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32 630 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16 631 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8 632 static UWord stats__vts__tick = 0; // # calls to VTS__tick 633 static UWord stats__vts__join = 0; // # calls to VTS__join 634 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ 635 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural 636 static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC 637 static UWord stats__vts_pruning = 0; // # nr of vts pruning 638 639 // # calls to VTS__cmp_structural w/ slow case 640 static UWord stats__vts__cmp_structural_slow = 0; 641 642 // # calls to VTS__indexAt_SLOW 643 static UWord stats__vts__indexat_slow = 0; 644 645 // # calls to vts_set__find__or__clone_and_add 646 static UWord stats__vts_set__focaa = 0; 647 648 // # calls to vts_set__find__or__clone_and_add that lead to an 649 // allocation 650 static UWord stats__vts_set__focaa_a = 0; 651 652 653 static inline Addr shmem__round_to_SecMap_base ( Addr a ) { 654 return a & ~(N_SECMAP_ARANGE - 1); 655 } 656 static inline UWord shmem__get_SecMap_offset ( Addr a ) { 657 return a & (N_SECMAP_ARANGE - 1); 658 } 659 660 661 /*----------------------------------------------------------------*/ 662 /*--- map_shmem :: WordFM Addr SecMap ---*/ 663 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/ 664 /*----------------------------------------------------------------*/ 665 666 /*--------------- SecMap allocation --------------- */ 667 668 static HChar* shmem__bigchunk_next = NULL; 669 static HChar* shmem__bigchunk_end1 = NULL; 670 671 static void* shmem__bigchunk_alloc ( SizeT n ) 672 { 673 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4; 674 tl_assert(n > 0); 675 n = VG_ROUNDUP(n, 16); 676 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1); 677 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next 678 <= (SSizeT)sHMEM__BIGCHUNK_SIZE); 679 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) { 680 if (0) 681 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n", 682 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next)); 683 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE ); 684 if (shmem__bigchunk_next == NULL) 685 VG_(out_of_memory_NORETURN)( 686 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE ); 687 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE; 688 } 689 tl_assert(shmem__bigchunk_next); 690 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) ); 691 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1); 692 shmem__bigchunk_next += n; 693 return shmem__bigchunk_next - n; 694 } 695 696 /* SecMap changed to be fully SVal_NOACCESS are inserted in a list of 697 recycled SecMap. When a new SecMap is needed, a recycled SecMap 698 will be used in preference to allocating a new SecMap. */ 699 /* We make a linked list of SecMap. The first LineZ is re-used to 700 implement the linked list. */ 701 /* Returns the SecMap following sm in the free list. 702 NULL if sm is the last SecMap. sm must be on the free list. */ 703 static inline SecMap *SecMap_freelist_next ( SecMap* sm ) 704 { 705 tl_assert (sm); 706 tl_assert (sm->magic == SecMap_free_MAGIC); 707 return SVal2Ptr (sm->linesZ[0].dict[1]); 708 } 709 static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next ) 710 { 711 tl_assert (sm); 712 tl_assert (sm->magic == SecMap_free_MAGIC); 713 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC); 714 sm->linesZ[0].dict[1] = Ptr2SVal (next); 715 } 716 717 static SecMap *SecMap_freelist = NULL; 718 static UWord SecMap_freelist_length(void) 719 { 720 SecMap *sm; 721 UWord n = 0; 722 723 sm = SecMap_freelist; 724 while (sm) { 725 n++; 726 sm = SecMap_freelist_next (sm); 727 } 728 return n; 729 } 730 731 static void push_SecMap_on_freelist(SecMap* sm) 732 { 733 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm); 734 sm->magic = SecMap_free_MAGIC; 735 set_SecMap_freelist_next(sm, SecMap_freelist); 736 SecMap_freelist = sm; 737 } 738 /* Returns a free SecMap if there is one. 739 Otherwise, returns NULL. */ 740 static SecMap *pop_SecMap_from_freelist(void) 741 { 742 SecMap *sm; 743 744 sm = SecMap_freelist; 745 if (sm) { 746 tl_assert (sm->magic == SecMap_free_MAGIC); 747 SecMap_freelist = SecMap_freelist_next (sm); 748 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm); 749 } 750 return sm; 751 } 752 753 static SecMap* shmem__alloc_or_recycle_SecMap ( void ) 754 { 755 Word i, j; 756 SecMap* sm = pop_SecMap_from_freelist(); 757 758 if (!sm) { 759 sm = shmem__bigchunk_alloc( sizeof(SecMap) ); 760 stats__secmaps_allocd++; 761 stats__secmap_ga_space_covered += N_SECMAP_ARANGE; 762 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES; 763 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ); 764 } 765 if (0) VG_(printf)("alloc_SecMap %p\n",sm); 766 tl_assert(sm); 767 sm->magic = SecMap_MAGIC; 768 for (i = 0; i < N_SECMAP_ZLINES; i++) { 769 sm->linesZ[i].dict[0] = SVal_NOACCESS; 770 sm->linesZ[i].dict[1] = SVal_INVALID; 771 sm->linesZ[i].dict[2] = SVal_INVALID; 772 sm->linesZ[i].dict[3] = SVal_INVALID; 773 for (j = 0; j < N_LINE_ARANGE/4; j++) 774 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */ 775 } 776 return sm; 777 } 778 779 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt; 780 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} }; 781 782 static SecMap* shmem__find_SecMap ( Addr ga ) 783 { 784 SecMap* sm = NULL; 785 Addr gaKey = shmem__round_to_SecMap_base(ga); 786 // Cache 787 stats__secmaps_search++; 788 if (LIKELY(gaKey == smCache[0].gaKey)) 789 return smCache[0].sm; 790 if (LIKELY(gaKey == smCache[1].gaKey)) { 791 SMCacheEnt tmp = smCache[0]; 792 smCache[0] = smCache[1]; 793 smCache[1] = tmp; 794 return smCache[0].sm; 795 } 796 if (gaKey == smCache[2].gaKey) { 797 SMCacheEnt tmp = smCache[1]; 798 smCache[1] = smCache[2]; 799 smCache[2] = tmp; 800 return smCache[1].sm; 801 } 802 // end Cache 803 stats__secmaps_search_slow++; 804 if (VG_(lookupFM)( map_shmem, 805 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) { 806 tl_assert(sm != NULL); 807 smCache[2] = smCache[1]; 808 smCache[1] = smCache[0]; 809 smCache[0].gaKey = gaKey; 810 smCache[0].sm = sm; 811 } else { 812 tl_assert(sm == NULL); 813 } 814 return sm; 815 } 816 817 /* Scan the SecMap and count the SecMap that can be GC-ed. 818 If really, really does the GC of the SecMap. */ 819 /* NOT TO BE CALLED FROM WITHIN libzsm. */ 820 static UWord next_SecMap_GC_at = 1000; 821 __attribute__((noinline)) 822 static UWord shmem__SecMap_do_GC(Bool really) 823 { 824 UWord secmapW = 0; 825 Addr gaKey; 826 UWord examined = 0; 827 UWord ok_GCed = 0; 828 829 /* First invalidate the smCache */ 830 smCache[0].gaKey = 1; 831 smCache[1].gaKey = 1; 832 smCache[2].gaKey = 1; 833 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0])); 834 835 VG_(initIterFM)( map_shmem ); 836 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) { 837 UWord i; 838 UWord j; 839 UWord n_linesF = 0; 840 SecMap* sm = (SecMap*)secmapW; 841 tl_assert(sm->magic == SecMap_MAGIC); 842 Bool ok_to_GC = True; 843 844 examined++; 845 846 /* Deal with the LineZs and the possible LineF of a LineZ. */ 847 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) { 848 LineZ* lineZ = &sm->linesZ[i]; 849 if (lineZ->dict[0] != SVal_INVALID) { 850 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS 851 && !SVal__isC (lineZ->dict[1]) 852 && !SVal__isC (lineZ->dict[2]) 853 && !SVal__isC (lineZ->dict[3]); 854 } else { 855 LineF *lineF = LineF_Ptr(lineZ); 856 n_linesF++; 857 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++) 858 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS; 859 } 860 } 861 if (ok_to_GC) 862 ok_GCed++; 863 if (ok_to_GC && really) { 864 SecMap *fm_sm; 865 Addr fm_gaKey; 866 /* We cannot remove a SecMap from map_shmem while iterating. 867 So, stop iteration, remove from map_shmem, recreate the iteration 868 on the next SecMap. */ 869 VG_(doneIterFM) ( map_shmem ); 870 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS. 871 We just need to free the lineF referenced by the linesZ. */ 872 if (n_linesF > 0) { 873 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) { 874 LineZ* lineZ = &sm->linesZ[i]; 875 if (lineZ->dict[0] == SVal_INVALID) { 876 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) ); 877 n_linesF--; 878 } 879 } 880 } 881 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey)) 882 tl_assert (0); 883 stats__secmaps_in_map_shmem--; 884 tl_assert (gaKey == fm_gaKey); 885 tl_assert (sm == fm_sm); 886 stats__secmaps_scanGCed++; 887 push_SecMap_on_freelist (sm); 888 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE); 889 } 890 } 891 VG_(doneIterFM)( map_shmem ); 892 893 if (really) { 894 stats__secmaps_scanGC++; 895 /* Next GC when we approach the max allocated */ 896 next_SecMap_GC_at = stats__secmaps_allocd - 1000; 897 /* Unless we GCed less than 10%. We then allow to alloc 10% 898 more before GCing. This avoids doing a lot of costly GC 899 for the worst case : the 'growing phase' of an application 900 that allocates a lot of memory. 901 Worst can can be reproduced e.g. by 902 perf/memrw -t 30000000 -b 1000 -r 1 -l 1 903 that allocates around 30Gb of memory. */ 904 if (ok_GCed < stats__secmaps_allocd/10) 905 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10; 906 907 } 908 909 if (VG_(clo_stats) && really) { 910 VG_(message)(Vg_DebugMsg, 911 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu," 912 " next GC at %lu\n", 913 stats__secmaps_scanGC, examined, ok_GCed, 914 next_SecMap_GC_at); 915 } 916 917 return ok_GCed; 918 } 919 920 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga ) 921 { 922 SecMap* sm = shmem__find_SecMap ( ga ); 923 if (LIKELY(sm)) { 924 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm)); 925 return sm; 926 } else { 927 /* create a new one */ 928 Addr gaKey = shmem__round_to_SecMap_base(ga); 929 sm = shmem__alloc_or_recycle_SecMap(); 930 tl_assert(sm); 931 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm ); 932 stats__secmaps_in_map_shmem++; 933 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm)); 934 return sm; 935 } 936 } 937 938 /* Returns the nr of linesF which are in use. Note: this is scanning 939 the secmap wordFM. So, this is to be used for statistics only. */ 940 __attribute__((noinline)) 941 static UWord shmem__SecMap_used_linesF(void) 942 { 943 UWord secmapW = 0; 944 Addr gaKey; 945 UWord inUse = 0; 946 947 VG_(initIterFM)( map_shmem ); 948 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) { 949 UWord i; 950 SecMap* sm = (SecMap*)secmapW; 951 tl_assert(sm->magic == SecMap_MAGIC); 952 953 for (i = 0; i < N_SECMAP_ZLINES; i++) { 954 LineZ* lineZ = &sm->linesZ[i]; 955 if (lineZ->dict[0] == SVal_INVALID) 956 inUse++; 957 } 958 } 959 VG_(doneIterFM)( map_shmem ); 960 961 return inUse; 962 } 963 964 /* ------------ LineF and LineZ related ------------ */ 965 966 static void rcinc_LineF ( LineF* lineF ) { 967 UWord i; 968 for (i = 0; i < N_LINE_ARANGE; i++) 969 SVal__rcinc(lineF->w64s[i]); 970 } 971 972 static void rcdec_LineF ( LineF* lineF ) { 973 UWord i; 974 for (i = 0; i < N_LINE_ARANGE; i++) 975 SVal__rcdec(lineF->w64s[i]); 976 } 977 978 static void rcinc_LineZ ( LineZ* lineZ ) { 979 tl_assert(lineZ->dict[0] != SVal_INVALID); 980 SVal__rcinc(lineZ->dict[0]); 981 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]); 982 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]); 983 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]); 984 } 985 986 static void rcdec_LineZ ( LineZ* lineZ ) { 987 tl_assert(lineZ->dict[0] != SVal_INVALID); 988 SVal__rcdec(lineZ->dict[0]); 989 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]); 990 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]); 991 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]); 992 } 993 994 inline 995 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) { 996 Word bix, shft, mask, prep; 997 tl_assert(ix >= 0); 998 bix = ix >> 2; 999 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */ 1000 mask = 3 << shft; 1001 prep = b2 << shft; 1002 arr[bix] = (arr[bix] & ~mask) | prep; 1003 } 1004 1005 inline 1006 static UWord read_twobit_array ( UChar* arr, UWord ix ) { 1007 Word bix, shft; 1008 tl_assert(ix >= 0); 1009 bix = ix >> 2; 1010 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */ 1011 return (arr[bix] >> shft) & 3; 1012 } 1013 1014 /* We cache one free lineF, to avoid pool allocator calls. 1015 Measurement on firefox has shown that this avoids more than 90% 1016 of the PA calls. */ 1017 static LineF *free_lineF = NULL; 1018 1019 /* Allocates a lineF for LineZ. Sets lineZ in a state indicating 1020 lineF has to be used. */ 1021 static inline LineF *alloc_LineF_for_Z (LineZ *lineZ) 1022 { 1023 LineF *lineF; 1024 1025 tl_assert(lineZ->dict[0] == SVal_INVALID); 1026 1027 if (LIKELY(free_lineF)) { 1028 lineF = free_lineF; 1029 free_lineF = NULL; 1030 } else { 1031 lineF = VG_(allocEltPA) ( LineF_pool_allocator ); 1032 } 1033 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 1034 lineZ->dict[1] = Ptr2SVal (lineF); 1035 1036 return lineF; 1037 } 1038 1039 /* rcdec the LineF of lineZ, frees the lineF, and sets lineZ 1040 back to its initial state SVal_NOACCESS (i.e. ready to be 1041 read or written just after SecMap allocation). */ 1042 static inline void clear_LineF_of_Z (LineZ *lineZ) 1043 { 1044 LineF *lineF = LineF_Ptr(lineZ); 1045 1046 rcdec_LineF(lineF); 1047 if (UNLIKELY(free_lineF)) { 1048 VG_(freeEltPA)( LineF_pool_allocator, lineF ); 1049 } else { 1050 free_lineF = lineF; 1051 } 1052 lineZ->dict[0] = SVal_NOACCESS; 1053 lineZ->dict[1] = SVal_INVALID; 1054 } 1055 1056 /* Given address 'tag', find either the Z or F line containing relevant 1057 data, so it can be read into the cache. 1058 */ 1059 static void find_ZF_for_reading ( /*OUT*/LineZ** zp, 1060 /*OUT*/LineF** fp, Addr tag ) { 1061 LineZ* lineZ; 1062 LineF* lineF; 1063 UWord zix; 1064 SecMap* sm = shmem__find_or_alloc_SecMap(tag); 1065 UWord smoff = shmem__get_SecMap_offset(tag); 1066 /* since smoff is derived from a valid tag, it should be 1067 cacheline-aligned. */ 1068 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1))); 1069 zix = smoff >> N_LINE_BITS; 1070 tl_assert(zix < N_SECMAP_ZLINES); 1071 lineZ = &sm->linesZ[zix]; 1072 lineF = NULL; 1073 if (lineZ->dict[0] == SVal_INVALID) { 1074 lineF = LineF_Ptr (lineZ); 1075 lineZ = NULL; 1076 } 1077 *zp = lineZ; 1078 *fp = lineF; 1079 } 1080 1081 /* Given address 'tag', return the relevant SecMap and the index of 1082 the LineZ within it, in the expectation that the line is to be 1083 overwritten. Regardless of whether 'tag' is currently associated 1084 with a Z or F representation, to rcdec on the current 1085 representation, in recognition of the fact that the contents are 1086 just about to be overwritten. */ 1087 static __attribute__((noinline)) 1088 void find_Z_for_writing ( /*OUT*/SecMap** smp, 1089 /*OUT*/Word* zixp, 1090 Addr tag ) { 1091 LineZ* lineZ; 1092 UWord zix; 1093 SecMap* sm = shmem__find_or_alloc_SecMap(tag); 1094 UWord smoff = shmem__get_SecMap_offset(tag); 1095 /* since smoff is derived from a valid tag, it should be 1096 cacheline-aligned. */ 1097 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1))); 1098 zix = smoff >> N_LINE_BITS; 1099 tl_assert(zix < N_SECMAP_ZLINES); 1100 lineZ = &sm->linesZ[zix]; 1101 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data 1102 can be parked in it. Hence have to rcdec it accordingly. */ 1103 /* If lineZ has an associated lineF, free it up. */ 1104 if (lineZ->dict[0] == SVal_INVALID) 1105 clear_LineF_of_Z(lineZ); 1106 else 1107 rcdec_LineZ(lineZ); 1108 *smp = sm; 1109 *zixp = zix; 1110 } 1111 1112 /* ------------ CacheLine and implicit-tree related ------------ */ 1113 1114 __attribute__((unused)) 1115 static void pp_CacheLine ( CacheLine* cl ) { 1116 Word i; 1117 if (!cl) { 1118 VG_(printf)("%s","pp_CacheLine(NULL)\n"); 1119 return; 1120 } 1121 for (i = 0; i < N_LINE_TREES; i++) 1122 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]); 1123 for (i = 0; i < N_LINE_ARANGE; i++) 1124 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]); 1125 } 1126 1127 static UChar descr_to_validbits ( UShort descr ) 1128 { 1129 /* a.k.a Party Time for gcc's constant folder */ 1130 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \ 1131 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \ 1132 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \ 1133 ( (b8_5) << 12) | ( (b8_4) << 11) | \ 1134 ( (b8_3) << 10) | ( (b8_2) << 9) | \ 1135 ( (b8_1) << 8) | ( (b8_0) << 7) | \ 1136 ( (b16_3) << 6) | ( (b32_1) << 5) | \ 1137 ( (b16_2) << 4) | ( (b64) << 3) | \ 1138 ( (b16_1) << 2) | ( (b32_0) << 1) | \ 1139 ( (b16_0) << 0) ) ) 1140 1141 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \ 1142 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \ 1143 ( (bit5) << 5) | ( (bit4) << 4) | \ 1144 ( (bit3) << 3) | ( (bit2) << 2) | \ 1145 ( (bit1) << 1) | ( (bit0) << 0) ) ) 1146 1147 /* these should all get folded out at compile time */ 1148 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7); 1149 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0); 1150 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3); 1151 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1); 1152 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2); 1153 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64); 1154 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1); 1155 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0); 1156 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0); 1157 1158 switch (descr) { 1159 /* 1160 +--------------------------------- TREE_DESCR_8_7 1161 | +------------------- TREE_DESCR_8_0 1162 | | +---------------- TREE_DESCR_16_3 1163 | | | +-------------- TREE_DESCR_32_1 1164 | | | | +------------ TREE_DESCR_16_2 1165 | | | | | +--------- TREE_DESCR_64 1166 | | | | | | +------ TREE_DESCR_16_1 1167 | | | | | | | +---- TREE_DESCR_32_0 1168 | | | | | | | | +-- TREE_DESCR_16_0 1169 | | | | | | | | | 1170 | | | | | | | | | GRANULARITY, 7 -> 0 */ 1171 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */ 1172 return BYTE(1,1,1,1,1,1,1,1); 1173 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */ 1174 return BYTE(1,1,0,1,1,1,1,1); 1175 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */ 1176 return BYTE(0,1,1,1,1,1,1,1); 1177 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */ 1178 return BYTE(0,1,0,1,1,1,1,1); 1179 1180 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */ 1181 return BYTE(1,1,1,1,1,1,0,1); 1182 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */ 1183 return BYTE(1,1,0,1,1,1,0,1); 1184 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */ 1185 return BYTE(0,1,1,1,1,1,0,1); 1186 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */ 1187 return BYTE(0,1,0,1,1,1,0,1); 1188 1189 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */ 1190 return BYTE(1,1,1,1,0,1,1,1); 1191 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */ 1192 return BYTE(1,1,0,1,0,1,1,1); 1193 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */ 1194 return BYTE(0,1,1,1,0,1,1,1); 1195 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */ 1196 return BYTE(0,1,0,1,0,1,1,1); 1197 1198 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */ 1199 return BYTE(1,1,1,1,0,1,0,1); 1200 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */ 1201 return BYTE(1,1,0,1,0,1,0,1); 1202 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */ 1203 return BYTE(0,1,1,1,0,1,0,1); 1204 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */ 1205 return BYTE(0,1,0,1,0,1,0,1); 1206 1207 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */ 1208 return BYTE(0,0,0,1,1,1,1,1); 1209 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */ 1210 return BYTE(0,0,0,1,1,1,0,1); 1211 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */ 1212 return BYTE(0,0,0,1,0,1,1,1); 1213 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */ 1214 return BYTE(0,0,0,1,0,1,0,1); 1215 1216 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */ 1217 return BYTE(1,1,1,1,0,0,0,1); 1218 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */ 1219 return BYTE(1,1,0,1,0,0,0,1); 1220 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */ 1221 return BYTE(0,1,1,1,0,0,0,1); 1222 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */ 1223 return BYTE(0,1,0,1,0,0,0,1); 1224 1225 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */ 1226 return BYTE(0,0,0,1,0,0,0,1); 1227 1228 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */ 1229 return BYTE(0,0,0,0,0,0,0,1); 1230 1231 default: return BYTE(0,0,0,0,0,0,0,0); 1232 /* INVALID - any valid descr produces at least one 1233 valid bit in tree[0..7]*/ 1234 } 1235 /* NOTREACHED*/ 1236 tl_assert(0); 1237 1238 # undef DESCR 1239 # undef BYTE 1240 } 1241 1242 __attribute__((unused)) 1243 static Bool is_sane_Descr ( UShort descr ) { 1244 return descr_to_validbits(descr) != 0; 1245 } 1246 1247 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) { 1248 VG_(sprintf)(dst, 1249 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d", 1250 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0), 1251 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0), 1252 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0), 1253 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0), 1254 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0), 1255 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0), 1256 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0), 1257 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0), 1258 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0), 1259 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0), 1260 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0), 1261 (Int)((descr & TREE_DESCR_64) ? 1 : 0), 1262 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0), 1263 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0), 1264 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0) 1265 ); 1266 } 1267 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) { 1268 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d", 1269 (Int)((byte & 128) ? 1 : 0), 1270 (Int)((byte & 64) ? 1 : 0), 1271 (Int)((byte & 32) ? 1 : 0), 1272 (Int)((byte & 16) ? 1 : 0), 1273 (Int)((byte & 8) ? 1 : 0), 1274 (Int)((byte & 4) ? 1 : 0), 1275 (Int)((byte & 2) ? 1 : 0), 1276 (Int)((byte & 1) ? 1 : 0) 1277 ); 1278 } 1279 1280 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) { 1281 Word i; 1282 UChar validbits = descr_to_validbits(descr); 1283 HChar buf[128], buf2[128]; // large enough 1284 if (validbits == 0) 1285 goto bad; 1286 for (i = 0; i < 8; i++) { 1287 if (validbits & (1<<i)) { 1288 if (tree[i] == SVal_INVALID) 1289 goto bad; 1290 } else { 1291 if (tree[i] != SVal_INVALID) 1292 goto bad; 1293 } 1294 } 1295 return True; 1296 bad: 1297 sprintf_Descr( buf, descr ); 1298 sprintf_Byte( buf2, validbits ); 1299 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n"); 1300 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2); 1301 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf); 1302 for (i = 0; i < 8; i++) 1303 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]); 1304 VG_(printf)("%s","}\n"); 1305 return 0; 1306 } 1307 1308 static Bool is_sane_CacheLine ( CacheLine* cl ) 1309 { 1310 Word tno, cloff; 1311 1312 if (!cl) goto bad; 1313 1314 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1315 UShort descr = cl->descrs[tno]; 1316 SVal* tree = &cl->svals[cloff]; 1317 if (!is_sane_Descr_and_Tree(descr, tree)) 1318 goto bad; 1319 } 1320 tl_assert(cloff == N_LINE_ARANGE); 1321 return True; 1322 bad: 1323 pp_CacheLine(cl); 1324 return False; 1325 } 1326 1327 static UShort normalise_tree ( /*MOD*/SVal* tree ) 1328 { 1329 UShort descr; 1330 /* pre: incoming tree[0..7] does not have any invalid shvals, in 1331 particular no zeroes. */ 1332 if (CHECK_ZSM 1333 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID 1334 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID 1335 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID 1336 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID)) 1337 tl_assert(0); 1338 1339 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5 1340 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2 1341 | TREE_DESCR_8_1 | TREE_DESCR_8_0; 1342 /* build 16-bit layer */ 1343 if (tree[1] == tree[0]) { 1344 tree[1] = SVal_INVALID; 1345 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0); 1346 descr |= TREE_DESCR_16_0; 1347 } 1348 if (tree[3] == tree[2]) { 1349 tree[3] = SVal_INVALID; 1350 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2); 1351 descr |= TREE_DESCR_16_1; 1352 } 1353 if (tree[5] == tree[4]) { 1354 tree[5] = SVal_INVALID; 1355 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4); 1356 descr |= TREE_DESCR_16_2; 1357 } 1358 if (tree[7] == tree[6]) { 1359 tree[7] = SVal_INVALID; 1360 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6); 1361 descr |= TREE_DESCR_16_3; 1362 } 1363 /* build 32-bit layer */ 1364 if (tree[2] == tree[0] 1365 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) { 1366 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */ 1367 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0); 1368 descr |= TREE_DESCR_32_0; 1369 } 1370 if (tree[6] == tree[4] 1371 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) { 1372 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */ 1373 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2); 1374 descr |= TREE_DESCR_32_1; 1375 } 1376 /* build 64-bit layer */ 1377 if (tree[4] == tree[0] 1378 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) { 1379 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */ 1380 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0); 1381 descr |= TREE_DESCR_64; 1382 } 1383 return descr; 1384 } 1385 1386 /* This takes a cacheline where all the data is at the leaves 1387 (w8[..]) and builds a correctly normalised tree. */ 1388 static void normalise_CacheLine ( /*MOD*/CacheLine* cl ) 1389 { 1390 Word tno, cloff; 1391 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1392 SVal* tree = &cl->svals[cloff]; 1393 cl->descrs[tno] = normalise_tree( tree ); 1394 } 1395 tl_assert(cloff == N_LINE_ARANGE); 1396 if (CHECK_ZSM) 1397 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1398 stats__cline_normalises++; 1399 } 1400 1401 1402 typedef struct { UChar count; SVal sval; } CountedSVal; 1403 1404 static 1405 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst, 1406 /*OUT*/Word* dstUsedP, 1407 Word nDst, CacheLine* src ) 1408 { 1409 Word tno, cloff, dstUsed; 1410 1411 tl_assert(nDst == N_LINE_ARANGE); 1412 dstUsed = 0; 1413 1414 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { 1415 UShort descr = src->descrs[tno]; 1416 SVal* tree = &src->svals[cloff]; 1417 1418 /* sequentialise the tree described by (descr,tree). */ 1419 # define PUT(_n,_v) \ 1420 do { dst[dstUsed ].count = (_n); \ 1421 dst[dstUsed++].sval = (_v); \ 1422 } while (0) 1423 1424 /* byte 0 */ 1425 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else 1426 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else 1427 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else 1428 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]); 1429 /* byte 1 */ 1430 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]); 1431 /* byte 2 */ 1432 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else 1433 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]); 1434 /* byte 3 */ 1435 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]); 1436 /* byte 4 */ 1437 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else 1438 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else 1439 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]); 1440 /* byte 5 */ 1441 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]); 1442 /* byte 6 */ 1443 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else 1444 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]); 1445 /* byte 7 */ 1446 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]); 1447 1448 # undef PUT 1449 /* END sequentialise the tree described by (descr,tree). */ 1450 1451 } 1452 tl_assert(cloff == N_LINE_ARANGE); 1453 tl_assert(dstUsed <= nDst); 1454 1455 *dstUsedP = dstUsed; 1456 } 1457 1458 /* Write the cacheline 'wix' to backing store. Where it ends up 1459 is determined by its tag field. */ 1460 static __attribute__((noinline)) void cacheline_wback ( UWord wix ) 1461 { 1462 Word i, j, k, m; 1463 Addr tag; 1464 SecMap* sm; 1465 CacheLine* cl; 1466 LineZ* lineZ; 1467 LineF* lineF; 1468 Word zix, fix, csvalsUsed; 1469 CountedSVal csvals[N_LINE_ARANGE]; 1470 SVal sv; 1471 1472 if (0) 1473 VG_(printf)("scache wback line %d\n", (Int)wix); 1474 1475 tl_assert(wix >= 0 && wix < N_WAY_NENT); 1476 1477 tag = cache_shmem.tags0[wix]; 1478 cl = &cache_shmem.lyns0[wix]; 1479 1480 /* The cache line may have been invalidated; if so, ignore it. */ 1481 if (!is_valid_scache_tag(tag)) 1482 return; 1483 1484 /* Where are we going to put it? */ 1485 sm = NULL; 1486 lineZ = NULL; 1487 lineF = NULL; 1488 zix = fix = -1; 1489 1490 /* find the Z line to write in and rcdec it or the associated F 1491 line. */ 1492 find_Z_for_writing( &sm, &zix, tag ); 1493 1494 tl_assert(sm); 1495 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES); 1496 lineZ = &sm->linesZ[zix]; 1497 1498 /* Generate the data to be stored */ 1499 if (CHECK_ZSM) 1500 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1501 1502 csvalsUsed = -1; 1503 sequentialise_CacheLine( csvals, &csvalsUsed, 1504 N_LINE_ARANGE, cl ); 1505 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE); 1506 if (0) VG_(printf)("%ld ", csvalsUsed); 1507 1508 lineZ->dict[0] = lineZ->dict[1] 1509 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 1510 1511 /* i indexes actual shadow values, k is cursor in csvals */ 1512 i = 0; 1513 for (k = 0; k < csvalsUsed; k++) { 1514 1515 sv = csvals[k].sval; 1516 if (CHECK_ZSM) 1517 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); 1518 /* do we already have it? */ 1519 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; } 1520 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; } 1521 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; } 1522 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; } 1523 /* no. look for a free slot. */ 1524 if (CHECK_ZSM) 1525 tl_assert(sv != SVal_INVALID); 1526 if (lineZ->dict[0] 1527 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; } 1528 if (lineZ->dict[1] 1529 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; } 1530 if (lineZ->dict[2] 1531 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; } 1532 if (lineZ->dict[3] 1533 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; } 1534 break; /* we'll have to use the f rep */ 1535 dict_ok: 1536 m = csvals[k].count; 1537 if (m == 8) { 1538 write_twobit_array( lineZ->ix2s, i+0, j ); 1539 write_twobit_array( lineZ->ix2s, i+1, j ); 1540 write_twobit_array( lineZ->ix2s, i+2, j ); 1541 write_twobit_array( lineZ->ix2s, i+3, j ); 1542 write_twobit_array( lineZ->ix2s, i+4, j ); 1543 write_twobit_array( lineZ->ix2s, i+5, j ); 1544 write_twobit_array( lineZ->ix2s, i+6, j ); 1545 write_twobit_array( lineZ->ix2s, i+7, j ); 1546 i += 8; 1547 } 1548 else if (m == 4) { 1549 write_twobit_array( lineZ->ix2s, i+0, j ); 1550 write_twobit_array( lineZ->ix2s, i+1, j ); 1551 write_twobit_array( lineZ->ix2s, i+2, j ); 1552 write_twobit_array( lineZ->ix2s, i+3, j ); 1553 i += 4; 1554 } 1555 else if (m == 1) { 1556 write_twobit_array( lineZ->ix2s, i+0, j ); 1557 i += 1; 1558 } 1559 else if (m == 2) { 1560 write_twobit_array( lineZ->ix2s, i+0, j ); 1561 write_twobit_array( lineZ->ix2s, i+1, j ); 1562 i += 2; 1563 } 1564 else { 1565 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */ 1566 } 1567 1568 } 1569 1570 if (LIKELY(i == N_LINE_ARANGE)) { 1571 /* Construction of the compressed representation was 1572 successful. */ 1573 rcinc_LineZ(lineZ); 1574 stats__cache_Z_wbacks++; 1575 } else { 1576 /* Cannot use the compressed(z) representation. Use the full(f) 1577 rep instead. */ 1578 tl_assert(i >= 0 && i < N_LINE_ARANGE); 1579 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID; 1580 lineF = alloc_LineF_for_Z (lineZ); 1581 i = 0; 1582 for (k = 0; k < csvalsUsed; k++) { 1583 if (CHECK_ZSM) 1584 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); 1585 sv = csvals[k].sval; 1586 if (CHECK_ZSM) 1587 tl_assert(sv != SVal_INVALID); 1588 for (m = csvals[k].count; m > 0; m--) { 1589 lineF->w64s[i] = sv; 1590 i++; 1591 } 1592 } 1593 tl_assert(i == N_LINE_ARANGE); 1594 rcinc_LineF(lineF); 1595 stats__cache_F_wbacks++; 1596 } 1597 } 1598 1599 /* Fetch the cacheline 'wix' from the backing store. The tag 1600 associated with 'wix' is assumed to have already been filled in; 1601 hence that is used to determine where in the backing store to read 1602 from. */ 1603 static __attribute__((noinline)) void cacheline_fetch ( UWord wix ) 1604 { 1605 Word i; 1606 Addr tag; 1607 CacheLine* cl; 1608 LineZ* lineZ; 1609 LineF* lineF; 1610 1611 if (0) 1612 VG_(printf)("scache fetch line %d\n", (Int)wix); 1613 1614 tl_assert(wix >= 0 && wix < N_WAY_NENT); 1615 1616 tag = cache_shmem.tags0[wix]; 1617 cl = &cache_shmem.lyns0[wix]; 1618 1619 /* reject nonsense requests */ 1620 tl_assert(is_valid_scache_tag(tag)); 1621 1622 lineZ = NULL; 1623 lineF = NULL; 1624 find_ZF_for_reading( &lineZ, &lineF, tag ); 1625 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) ); 1626 1627 /* expand the data into the bottom layer of the tree, then get 1628 cacheline_normalise to build the descriptor array. */ 1629 if (lineF) { 1630 for (i = 0; i < N_LINE_ARANGE; i++) { 1631 cl->svals[i] = lineF->w64s[i]; 1632 } 1633 stats__cache_F_fetches++; 1634 } else { 1635 for (i = 0; i < N_LINE_ARANGE; i++) { 1636 UWord ix = read_twobit_array( lineZ->ix2s, i ); 1637 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3); 1638 cl->svals[i] = lineZ->dict[ix]; 1639 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID); 1640 } 1641 stats__cache_Z_fetches++; 1642 } 1643 normalise_CacheLine( cl ); 1644 } 1645 1646 /* Invalid the cachelines corresponding to the given range, which 1647 must start and end on a cacheline boundary. */ 1648 static void shmem__invalidate_scache_range (Addr ga, SizeT szB) 1649 { 1650 Word wix; 1651 1652 /* ga must be on a cacheline boundary. */ 1653 tl_assert (is_valid_scache_tag (ga)); 1654 /* szB must be a multiple of cacheline size. */ 1655 tl_assert (0 == (szB & (N_LINE_ARANGE - 1))); 1656 1657 1658 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1); 1659 Word nwix = szB / N_LINE_ARANGE; 1660 1661 if (nwix > N_WAY_NENT) 1662 nwix = N_WAY_NENT; // no need to check several times the same entry. 1663 1664 for (wix = 0; wix < nwix; wix++) { 1665 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB)) 1666 cache_shmem.tags0[ga_ix] = 1/*INVALID*/; 1667 ga_ix++; 1668 if (UNLIKELY(ga_ix == N_WAY_NENT)) 1669 ga_ix = 0; 1670 } 1671 } 1672 1673 1674 static void shmem__flush_and_invalidate_scache ( void ) { 1675 Word wix; 1676 Addr tag; 1677 if (0) VG_(printf)("%s","scache flush and invalidate\n"); 1678 tl_assert(!is_valid_scache_tag(1)); 1679 for (wix = 0; wix < N_WAY_NENT; wix++) { 1680 tag = cache_shmem.tags0[wix]; 1681 if (tag == 1/*INVALID*/) { 1682 /* already invalid; nothing to do */ 1683 } else { 1684 tl_assert(is_valid_scache_tag(tag)); 1685 cacheline_wback( wix ); 1686 } 1687 cache_shmem.tags0[wix] = 1/*INVALID*/; 1688 } 1689 stats__cache_flushes_invals++; 1690 } 1691 1692 1693 static inline Bool aligned16 ( Addr a ) { 1694 return 0 == (a & 1); 1695 } 1696 static inline Bool aligned32 ( Addr a ) { 1697 return 0 == (a & 3); 1698 } 1699 static inline Bool aligned64 ( Addr a ) { 1700 return 0 == (a & 7); 1701 } 1702 static inline UWord get_cacheline_offset ( Addr a ) { 1703 return (UWord)(a & (N_LINE_ARANGE - 1)); 1704 } 1705 static inline Addr cacheline_ROUNDUP ( Addr a ) { 1706 return ROUNDUP(a, N_LINE_ARANGE); 1707 } 1708 static inline Addr cacheline_ROUNDDN ( Addr a ) { 1709 return ROUNDDN(a, N_LINE_ARANGE); 1710 } 1711 static inline UWord get_treeno ( Addr a ) { 1712 return get_cacheline_offset(a) >> 3; 1713 } 1714 static inline UWord get_tree_offset ( Addr a ) { 1715 return a & 7; 1716 } 1717 1718 static __attribute__((noinline)) 1719 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */ 1720 static inline CacheLine* get_cacheline ( Addr a ) 1721 { 1722 /* tag is 'a' with the in-line offset masked out, 1723 eg a[31]..a[4] 0000 */ 1724 Addr tag = a & ~(N_LINE_ARANGE - 1); 1725 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 1726 stats__cache_totrefs++; 1727 if (LIKELY(tag == cache_shmem.tags0[wix])) { 1728 return &cache_shmem.lyns0[wix]; 1729 } else { 1730 return get_cacheline_MISS( a ); 1731 } 1732 } 1733 1734 static __attribute__((noinline)) 1735 CacheLine* get_cacheline_MISS ( Addr a ) 1736 { 1737 /* tag is 'a' with the in-line offset masked out, 1738 eg a[31]..a[4] 0000 */ 1739 1740 CacheLine* cl; 1741 Addr* tag_old_p; 1742 Addr tag = a & ~(N_LINE_ARANGE - 1); 1743 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); 1744 1745 tl_assert(tag != cache_shmem.tags0[wix]); 1746 1747 /* Dump the old line into the backing store. */ 1748 stats__cache_totmisses++; 1749 1750 cl = &cache_shmem.lyns0[wix]; 1751 tag_old_p = &cache_shmem.tags0[wix]; 1752 1753 if (is_valid_scache_tag( *tag_old_p )) { 1754 /* EXPENSIVE and REDUNDANT: callee does it */ 1755 if (CHECK_ZSM) 1756 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1757 cacheline_wback( wix ); 1758 } 1759 /* and reload the new one */ 1760 *tag_old_p = tag; 1761 cacheline_fetch( wix ); 1762 if (CHECK_ZSM) 1763 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ 1764 return cl; 1765 } 1766 1767 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1768 stats__cline_64to32pulldown++; 1769 switch (toff) { 1770 case 0: case 4: 1771 tl_assert(descr & TREE_DESCR_64); 1772 tree[4] = tree[0]; 1773 descr &= ~TREE_DESCR_64; 1774 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0); 1775 break; 1776 default: 1777 tl_assert(0); 1778 } 1779 return descr; 1780 } 1781 1782 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1783 stats__cline_32to16pulldown++; 1784 switch (toff) { 1785 case 0: case 2: 1786 if (!(descr & TREE_DESCR_32_0)) { 1787 descr = pulldown_to_32(tree, 0, descr); 1788 } 1789 tl_assert(descr & TREE_DESCR_32_0); 1790 tree[2] = tree[0]; 1791 descr &= ~TREE_DESCR_32_0; 1792 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0); 1793 break; 1794 case 4: case 6: 1795 if (!(descr & TREE_DESCR_32_1)) { 1796 descr = pulldown_to_32(tree, 4, descr); 1797 } 1798 tl_assert(descr & TREE_DESCR_32_1); 1799 tree[6] = tree[4]; 1800 descr &= ~TREE_DESCR_32_1; 1801 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2); 1802 break; 1803 default: 1804 tl_assert(0); 1805 } 1806 return descr; 1807 } 1808 1809 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) { 1810 stats__cline_16to8pulldown++; 1811 switch (toff) { 1812 case 0: case 1: 1813 if (!(descr & TREE_DESCR_16_0)) { 1814 descr = pulldown_to_16(tree, 0, descr); 1815 } 1816 tl_assert(descr & TREE_DESCR_16_0); 1817 tree[1] = tree[0]; 1818 descr &= ~TREE_DESCR_16_0; 1819 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0); 1820 break; 1821 case 2: case 3: 1822 if (!(descr & TREE_DESCR_16_1)) { 1823 descr = pulldown_to_16(tree, 2, descr); 1824 } 1825 tl_assert(descr & TREE_DESCR_16_1); 1826 tree[3] = tree[2]; 1827 descr &= ~TREE_DESCR_16_1; 1828 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2); 1829 break; 1830 case 4: case 5: 1831 if (!(descr & TREE_DESCR_16_2)) { 1832 descr = pulldown_to_16(tree, 4, descr); 1833 } 1834 tl_assert(descr & TREE_DESCR_16_2); 1835 tree[5] = tree[4]; 1836 descr &= ~TREE_DESCR_16_2; 1837 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4); 1838 break; 1839 case 6: case 7: 1840 if (!(descr & TREE_DESCR_16_3)) { 1841 descr = pulldown_to_16(tree, 6, descr); 1842 } 1843 tl_assert(descr & TREE_DESCR_16_3); 1844 tree[7] = tree[6]; 1845 descr &= ~TREE_DESCR_16_3; 1846 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6); 1847 break; 1848 default: 1849 tl_assert(0); 1850 } 1851 return descr; 1852 } 1853 1854 1855 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) { 1856 UShort mask; 1857 switch (toff) { 1858 case 0: 1859 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0; 1860 tl_assert( (descr & mask) == mask ); 1861 descr &= ~mask; 1862 descr |= TREE_DESCR_16_0; 1863 break; 1864 case 2: 1865 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2; 1866 tl_assert( (descr & mask) == mask ); 1867 descr &= ~mask; 1868 descr |= TREE_DESCR_16_1; 1869 break; 1870 case 4: 1871 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4; 1872 tl_assert( (descr & mask) == mask ); 1873 descr &= ~mask; 1874 descr |= TREE_DESCR_16_2; 1875 break; 1876 case 6: 1877 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6; 1878 tl_assert( (descr & mask) == mask ); 1879 descr &= ~mask; 1880 descr |= TREE_DESCR_16_3; 1881 break; 1882 default: 1883 tl_assert(0); 1884 } 1885 return descr; 1886 } 1887 1888 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) { 1889 UShort mask; 1890 switch (toff) { 1891 case 0: 1892 if (!(descr & TREE_DESCR_16_0)) 1893 descr = pullup_descr_to_16(descr, 0); 1894 if (!(descr & TREE_DESCR_16_1)) 1895 descr = pullup_descr_to_16(descr, 2); 1896 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0; 1897 tl_assert( (descr & mask) == mask ); 1898 descr &= ~mask; 1899 descr |= TREE_DESCR_32_0; 1900 break; 1901 case 4: 1902 if (!(descr & TREE_DESCR_16_2)) 1903 descr = pullup_descr_to_16(descr, 4); 1904 if (!(descr & TREE_DESCR_16_3)) 1905 descr = pullup_descr_to_16(descr, 6); 1906 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2; 1907 tl_assert( (descr & mask) == mask ); 1908 descr &= ~mask; 1909 descr |= TREE_DESCR_32_1; 1910 break; 1911 default: 1912 tl_assert(0); 1913 } 1914 return descr; 1915 } 1916 1917 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) { 1918 switch (toff) { 1919 case 0: case 4: 1920 return 0 != (descr & TREE_DESCR_64); 1921 default: 1922 tl_assert(0); 1923 } 1924 } 1925 1926 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) { 1927 switch (toff) { 1928 case 0: 1929 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0)); 1930 case 2: 1931 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2)); 1932 case 4: 1933 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4)); 1934 case 6: 1935 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6)); 1936 default: 1937 tl_assert(0); 1938 } 1939 } 1940 1941 /* ------------ Cache management ------------ */ 1942 1943 static void zsm_flush_cache ( void ) 1944 { 1945 shmem__flush_and_invalidate_scache(); 1946 } 1947 1948 1949 static void zsm_init ( void ) 1950 { 1951 tl_assert( sizeof(UWord) == sizeof(Addr) ); 1952 1953 tl_assert(map_shmem == NULL); 1954 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)", 1955 HG_(free), 1956 NULL/*unboxed UWord cmp*/); 1957 /* Invalidate all cache entries. */ 1958 tl_assert(!is_valid_scache_tag(1)); 1959 for (UWord wix = 0; wix < N_WAY_NENT; wix++) { 1960 cache_shmem.tags0[wix] = 1/*INVALID*/; 1961 } 1962 1963 LineF_pool_allocator = VG_(newPA) ( 1964 sizeof(LineF), 1965 /* Nr elements/pool to fill a core arena block 1966 taking some arena overhead into account. */ 1967 (4 * 1024 * 1024 - 200)/sizeof(LineF), 1968 HG_(zalloc), 1969 "libhb.LineF_storage.pool", 1970 HG_(free) 1971 ); 1972 1973 /* a SecMap must contain an integral number of CacheLines */ 1974 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE)); 1975 /* also ... a CacheLine holds an integral number of trees */ 1976 tl_assert(0 == (N_LINE_ARANGE % 8)); 1977 } 1978 1979 ///////////////////////////////////////////////////////////////// 1980 ///////////////////////////////////////////////////////////////// 1981 // // 1982 // SECTION END compressed shadow memory // 1983 // // 1984 ///////////////////////////////////////////////////////////////// 1985 ///////////////////////////////////////////////////////////////// 1986 1987 1988 1989 ///////////////////////////////////////////////////////////////// 1990 ///////////////////////////////////////////////////////////////// 1991 // // 1992 // SECTION BEGIN vts primitives // 1993 // // 1994 ///////////////////////////////////////////////////////////////// 1995 ///////////////////////////////////////////////////////////////// 1996 1997 1998 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely 1999 being compact stand-ins for Thr*'s. Use these functions to map 2000 between them. */ 2001 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */ 2002 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */ 2003 2004 __attribute__((noreturn)) 2005 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs ) 2006 { 2007 if (due_to_nThrs) { 2008 const HChar* s = 2009 "\n" 2010 "Helgrind: cannot continue, run aborted: too many threads.\n" 2011 "Sorry. Helgrind can only handle programs that create\n" 2012 "%'llu or fewer threads over their entire lifetime.\n" 2013 "\n"; 2014 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024)); 2015 } else { 2016 const HChar* s = 2017 "\n" 2018 "Helgrind: cannot continue, run aborted: too many\n" 2019 "synchronisation events. Sorry. Helgrind can only handle\n" 2020 "programs which perform %'llu or fewer\n" 2021 "inter-thread synchronisation events (locks, unlocks, etc).\n" 2022 "\n"; 2023 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1); 2024 } 2025 VG_(exit)(1); 2026 /*NOTREACHED*/ 2027 tl_assert(0); /*wtf?!*/ 2028 } 2029 2030 2031 /* The dead thread (ThrID, actually) tables. A thread may only be 2032 listed here if we have been notified thereof by libhb_async_exit. 2033 New entries are added at the end. The order isn't important, but 2034 the ThrID values must be unique. 2035 verydead_thread_table_not_pruned lists the identity of the threads 2036 that died since the previous round of pruning. 2037 Once pruning is done, these ThrID are added in verydead_thread_table. 2038 We don't actually need to keep the set of threads that have ever died -- 2039 only the threads that have died since the previous round of 2040 pruning. But it's useful for sanity check purposes to keep the 2041 entire set, so we do. */ 2042 static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL; 2043 static XArray* /* of ThrID */ verydead_thread_table = NULL; 2044 2045 /* Arbitrary total ordering on ThrIDs. */ 2046 static Int cmp__ThrID ( const void* v1, const void* v2 ) { 2047 ThrID id1 = *(const ThrID*)v1; 2048 ThrID id2 = *(const ThrID*)v2; 2049 if (id1 < id2) return -1; 2050 if (id1 > id2) return 1; 2051 return 0; 2052 } 2053 2054 static void verydead_thread_tables_init ( void ) 2055 { 2056 tl_assert(!verydead_thread_table); 2057 tl_assert(!verydead_thread_table_not_pruned); 2058 verydead_thread_table 2059 = VG_(newXA)( HG_(zalloc), 2060 "libhb.verydead_thread_table_init.1", 2061 HG_(free), sizeof(ThrID) ); 2062 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID); 2063 verydead_thread_table_not_pruned 2064 = VG_(newXA)( HG_(zalloc), 2065 "libhb.verydead_thread_table_init.2", 2066 HG_(free), sizeof(ThrID) ); 2067 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID); 2068 } 2069 2070 static void verydead_thread_table_sort_and_check (XArray* thrids) 2071 { 2072 UWord i; 2073 2074 VG_(sortXA)( thrids ); 2075 /* Sanity check: check for unique .sts.thr values. */ 2076 UWord nBT = VG_(sizeXA)( thrids ); 2077 if (nBT > 0) { 2078 ThrID thrid1, thrid2; 2079 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 ); 2080 for (i = 1; i < nBT; i++) { 2081 thrid1 = thrid2; 2082 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i ); 2083 tl_assert(thrid1 < thrid2); 2084 } 2085 } 2086 /* Ok, so the dead thread table thrids has unique and in-order keys. */ 2087 } 2088 2089 /* A VTS contains .ts, its vector clock, and also .id, a field to hold 2090 a backlink for the caller's convenience. Since we have no idea 2091 what to set that to in the library, it always gets set to 2092 VtsID_INVALID. */ 2093 typedef 2094 struct { 2095 VtsID id; 2096 UInt usedTS; 2097 UInt sizeTS; 2098 ScalarTS ts[0]; 2099 } 2100 VTS; 2101 2102 /* Allocate a VTS capable of storing 'sizeTS' entries. */ 2103 static VTS* VTS__new ( const HChar* who, UInt sizeTS ); 2104 2105 /* Make a clone of 'vts', sizing the new array to exactly match the 2106 number of ScalarTSs present. */ 2107 static VTS* VTS__clone ( const HChar* who, VTS* vts ); 2108 2109 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new 2110 array is sized exactly to hold the number of required elements. 2111 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and 2112 must be in strictly increasing order. */ 2113 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel ); 2114 2115 /* Delete this VTS in its entirety. */ 2116 static void VTS__delete ( VTS* vts ); 2117 2118 /* Create a new singleton VTS in 'out'. Caller must have 2119 pre-allocated 'out' sufficiently big to hold the result in all 2120 possible cases. */ 2121 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym ); 2122 2123 /* Create in 'out' a VTS which is the same as 'vts' except with 2124 vts[me]++, so to speak. Caller must have pre-allocated 'out' 2125 sufficiently big to hold the result in all possible cases. */ 2126 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts ); 2127 2128 /* Create in 'out' a VTS which is the join (max) of 'a' and 2129 'b'. Caller must have pre-allocated 'out' sufficiently big to hold 2130 the result in all possible cases. */ 2131 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b ); 2132 2133 /* Compute the partial ordering relation of the two args. Although we 2134 could be completely general and return an enumeration value (EQ, 2135 LT, GT, UN), in fact we only need LEQ, and so we may as well 2136 hardwire that fact. 2137 2138 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an 2139 invald ThrID). In the latter case, the returned ThrID indicates 2140 the discovered point for which they are not. There may be more 2141 than one such point, but we only care about seeing one of them, not 2142 all of them. This rather strange convention is used because 2143 sometimes we want to know the actual index at which they first 2144 differ. */ 2145 static UInt VTS__cmpLEQ ( VTS* a, VTS* b ); 2146 2147 /* Compute an arbitrary structural (total) ordering on the two args, 2148 based on their VCs, so they can be looked up in a table, tree, etc. 2149 Returns -1, 0 or 1. */ 2150 static Word VTS__cmp_structural ( VTS* a, VTS* b ); 2151 2152 /* Debugging only. Display the given VTS. */ 2153 static void VTS__show ( const VTS* vts ); 2154 2155 /* Debugging only. Return vts[index], so to speak. */ 2156 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ); 2157 2158 /* Notify the VTS machinery that a thread has been declared 2159 comprehensively dead: that is, it has done an async exit AND it has 2160 been joined with. This should ensure that its local clocks (.viR 2161 and .viW) will never again change, and so all mentions of this 2162 thread from all VTSs in the system may be removed. */ 2163 static void VTS__declare_thread_very_dead ( Thr* idx ); 2164 2165 /*--------------- to do with Vector Timestamps ---------------*/ 2166 2167 static Bool is_sane_VTS ( VTS* vts ) 2168 { 2169 UWord i, n; 2170 ScalarTS *st1, *st2; 2171 if (!vts) return False; 2172 if (vts->usedTS > vts->sizeTS) return False; 2173 n = vts->usedTS; 2174 if (n == 1) { 2175 st1 = &vts->ts[0]; 2176 if (st1->tym == 0) 2177 return False; 2178 } 2179 else 2180 if (n >= 2) { 2181 for (i = 0; i < n-1; i++) { 2182 st1 = &vts->ts[i]; 2183 st2 = &vts->ts[i+1]; 2184 if (st1->thrid >= st2->thrid) 2185 return False; 2186 if (st1->tym == 0 || st2->tym == 0) 2187 return False; 2188 } 2189 } 2190 return True; 2191 } 2192 2193 2194 /* Create a new, empty VTS. 2195 */ 2196 static VTS* VTS__new ( const HChar* who, UInt sizeTS ) 2197 { 2198 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS)); 2199 tl_assert(vts->usedTS == 0); 2200 vts->sizeTS = sizeTS; 2201 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL; 2202 return vts; 2203 } 2204 2205 /* Clone this VTS. 2206 */ 2207 static VTS* VTS__clone ( const HChar* who, VTS* vts ) 2208 { 2209 tl_assert(vts); 2210 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2211 UInt nTS = vts->usedTS; 2212 VTS* clone = VTS__new(who, nTS); 2213 clone->id = vts->id; 2214 clone->sizeTS = nTS; 2215 clone->usedTS = nTS; 2216 UInt i; 2217 for (i = 0; i < nTS; i++) { 2218 clone->ts[i] = vts->ts[i]; 2219 } 2220 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2221 return clone; 2222 } 2223 2224 2225 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel' 2226 must be in strictly increasing order. We could obviously do this 2227 much more efficiently (in linear time) if necessary. 2228 */ 2229 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel ) 2230 { 2231 UInt i, j; 2232 tl_assert(vts); 2233 tl_assert(thridsToDel); 2234 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2235 UInt nTS = vts->usedTS; 2236 /* Figure out how many ScalarTSs will remain in the output. */ 2237 UInt nReq = nTS; 2238 for (i = 0; i < nTS; i++) { 2239 ThrID thrid = vts->ts[i].thrid; 2240 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL)) 2241 nReq--; 2242 } 2243 tl_assert(nReq <= nTS); 2244 /* Copy the ones that will remain. */ 2245 VTS* res = VTS__new(who, nReq); 2246 j = 0; 2247 for (i = 0; i < nTS; i++) { 2248 ThrID thrid = vts->ts[i].thrid; 2249 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL)) 2250 continue; 2251 res->ts[j++] = vts->ts[i]; 2252 } 2253 tl_assert(j == nReq); 2254 tl_assert(j == res->sizeTS); 2255 res->usedTS = j; 2256 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL); 2257 return res; 2258 } 2259 2260 2261 /* Delete this VTS in its entirety. 2262 */ 2263 static void VTS__delete ( VTS* vts ) 2264 { 2265 tl_assert(vts); 2266 tl_assert(vts->usedTS <= vts->sizeTS); 2267 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL); 2268 HG_(free)(vts); 2269 } 2270 2271 2272 /* Create a new singleton VTS. 2273 */ 2274 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym ) 2275 { 2276 tl_assert(thr); 2277 tl_assert(tym >= 1); 2278 tl_assert(out); 2279 tl_assert(out->usedTS == 0); 2280 tl_assert(out->sizeTS >= 1); 2281 UInt hi = out->usedTS++; 2282 out->ts[hi].thrid = Thr__to_ThrID(thr); 2283 out->ts[hi].tym = tym; 2284 } 2285 2286 2287 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is 2288 not modified. 2289 */ 2290 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts ) 2291 { 2292 UInt i, n; 2293 ThrID me_thrid; 2294 Bool found = False; 2295 2296 stats__vts__tick++; 2297 2298 tl_assert(out); 2299 tl_assert(out->usedTS == 0); 2300 if (vts->usedTS >= ThrID_MAX_VALID) 2301 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 2302 tl_assert(out->sizeTS >= 1 + vts->usedTS); 2303 2304 tl_assert(me); 2305 me_thrid = Thr__to_ThrID(me); 2306 tl_assert(is_sane_VTS(vts)); 2307 n = vts->usedTS; 2308 2309 /* Copy all entries which precede 'me'. */ 2310 for (i = 0; i < n; i++) { 2311 ScalarTS* here = &vts->ts[i]; 2312 if (UNLIKELY(here->thrid >= me_thrid)) 2313 break; 2314 UInt hi = out->usedTS++; 2315 out->ts[hi] = *here; 2316 } 2317 2318 /* 'i' now indicates the next entry to copy, if any. 2319 There are 3 possibilities: 2320 (a) there is no next entry (we used them all up already): 2321 add (me_thrid,1) to the output, and quit 2322 (b) there is a next entry, and its thrid > me_thrid: 2323 add (me_thrid,1) to the output, then copy the remaining entries 2324 (c) there is a next entry, and its thrid == me_thrid: 2325 copy it to the output but increment its timestamp value. 2326 Then copy the remaining entries. (c) is the common case. 2327 */ 2328 tl_assert(i >= 0 && i <= n); 2329 if (i == n) { /* case (a) */ 2330 UInt hi = out->usedTS++; 2331 out->ts[hi].thrid = me_thrid; 2332 out->ts[hi].tym = 1; 2333 } else { 2334 /* cases (b) and (c) */ 2335 ScalarTS* here = &vts->ts[i]; 2336 if (me_thrid == here->thrid) { /* case (c) */ 2337 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) { 2338 /* We're hosed. We have to stop. */ 2339 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ ); 2340 } 2341 UInt hi = out->usedTS++; 2342 out->ts[hi].thrid = here->thrid; 2343 out->ts[hi].tym = here->tym + 1; 2344 i++; 2345 found = True; 2346 } else { /* case (b) */ 2347 UInt hi = out->usedTS++; 2348 out->ts[hi].thrid = me_thrid; 2349 out->ts[hi].tym = 1; 2350 } 2351 /* And copy any remaining entries. */ 2352 for (/*keepgoing*/; i < n; i++) { 2353 ScalarTS* here2 = &vts->ts[i]; 2354 UInt hi = out->usedTS++; 2355 out->ts[hi] = *here2; 2356 } 2357 } 2358 2359 tl_assert(is_sane_VTS(out)); 2360 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1)); 2361 tl_assert(out->usedTS <= out->sizeTS); 2362 } 2363 2364 2365 /* Return a new VTS constructed as the join (max) of the 2 args. 2366 Neither arg is modified. 2367 */ 2368 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b ) 2369 { 2370 UInt ia, ib, useda, usedb; 2371 ULong tyma, tymb, tymMax; 2372 ThrID thrid; 2373 UInt ncommon = 0; 2374 2375 stats__vts__join++; 2376 2377 tl_assert(a); 2378 tl_assert(b); 2379 useda = a->usedTS; 2380 usedb = b->usedTS; 2381 2382 tl_assert(out); 2383 tl_assert(out->usedTS == 0); 2384 /* overly conservative test, but doing better involves comparing 2385 the two VTSs, which we don't want to do at this point. */ 2386 if (useda + usedb >= ThrID_MAX_VALID) 2387 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 2388 tl_assert(out->sizeTS >= useda + usedb); 2389 2390 ia = ib = 0; 2391 2392 while (1) { 2393 2394 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn 2395 from a and b in order, where thrid is the next ThrID 2396 occurring in either a or b, and tyma/b are the relevant 2397 scalar timestamps, taking into account implicit zeroes. */ 2398 tl_assert(ia >= 0 && ia <= useda); 2399 tl_assert(ib >= 0 && ib <= usedb); 2400 2401 if (ia == useda && ib == usedb) { 2402 /* both empty - done */ 2403 break; 2404 2405 } else if (ia == useda && ib != usedb) { 2406 /* a empty, use up b */ 2407 ScalarTS* tmpb = &b->ts[ib]; 2408 thrid = tmpb->thrid; 2409 tyma = 0; 2410 tymb = tmpb->tym; 2411 ib++; 2412 2413 } else if (ia != useda && ib == usedb) { 2414 /* b empty, use up a */ 2415 ScalarTS* tmpa = &a->ts[ia]; 2416 thrid = tmpa->thrid; 2417 tyma = tmpa->tym; 2418 tymb = 0; 2419 ia++; 2420 2421 } else { 2422 /* both not empty; extract lowest-ThrID'd triple */ 2423 ScalarTS* tmpa = &a->ts[ia]; 2424 ScalarTS* tmpb = &b->ts[ib]; 2425 if (tmpa->thrid < tmpb->thrid) { 2426 /* a has the lowest unconsidered ThrID */ 2427 thrid = tmpa->thrid; 2428 tyma = tmpa->tym; 2429 tymb = 0; 2430 ia++; 2431 } else if (tmpa->thrid > tmpb->thrid) { 2432 /* b has the lowest unconsidered ThrID */ 2433 thrid = tmpb->thrid; 2434 tyma = 0; 2435 tymb = tmpb->tym; 2436 ib++; 2437 } else { 2438 /* they both next mention the same ThrID */ 2439 tl_assert(tmpa->thrid == tmpb->thrid); 2440 thrid = tmpa->thrid; /* == tmpb->thrid */ 2441 tyma = tmpa->tym; 2442 tymb = tmpb->tym; 2443 ia++; 2444 ib++; 2445 ncommon++; 2446 } 2447 } 2448 2449 /* having laboriously determined (thr, tyma, tymb), do something 2450 useful with it. */ 2451 tymMax = tyma > tymb ? tyma : tymb; 2452 if (tymMax > 0) { 2453 UInt hi = out->usedTS++; 2454 out->ts[hi].thrid = thrid; 2455 out->ts[hi].tym = tymMax; 2456 } 2457 2458 } 2459 2460 tl_assert(is_sane_VTS(out)); 2461 tl_assert(out->usedTS <= out->sizeTS); 2462 tl_assert(out->usedTS == useda + usedb - ncommon); 2463 } 2464 2465 2466 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if 2467 they are, or the first ThrID for which they are not (no valid ThrID 2468 has the value zero). This rather strange convention is used 2469 because sometimes we want to know the actual index at which they 2470 first differ. */ 2471 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b ) 2472 { 2473 Word ia, ib, useda, usedb; 2474 ULong tyma, tymb; 2475 2476 stats__vts__cmpLEQ++; 2477 2478 tl_assert(a); 2479 tl_assert(b); 2480 useda = a->usedTS; 2481 usedb = b->usedTS; 2482 2483 ia = ib = 0; 2484 2485 while (1) { 2486 2487 /* This logic is to enumerate doubles (tyma, tymb) drawn 2488 from a and b in order, and tyma/b are the relevant 2489 scalar timestamps, taking into account implicit zeroes. */ 2490 ThrID thrid; 2491 2492 tl_assert(ia >= 0 && ia <= useda); 2493 tl_assert(ib >= 0 && ib <= usedb); 2494 2495 if (ia == useda && ib == usedb) { 2496 /* both empty - done */ 2497 break; 2498 2499 } else if (ia == useda && ib != usedb) { 2500 /* a empty, use up b */ 2501 ScalarTS* tmpb = &b->ts[ib]; 2502 tyma = 0; 2503 tymb = tmpb->tym; 2504 thrid = tmpb->thrid; 2505 ib++; 2506 2507 } else if (ia != useda && ib == usedb) { 2508 /* b empty, use up a */ 2509 ScalarTS* tmpa = &a->ts[ia]; 2510 tyma = tmpa->tym; 2511 thrid = tmpa->thrid; 2512 tymb = 0; 2513 ia++; 2514 2515 } else { 2516 /* both not empty; extract lowest-ThrID'd triple */ 2517 ScalarTS* tmpa = &a->ts[ia]; 2518 ScalarTS* tmpb = &b->ts[ib]; 2519 if (tmpa->thrid < tmpb->thrid) { 2520 /* a has the lowest unconsidered ThrID */ 2521 tyma = tmpa->tym; 2522 thrid = tmpa->thrid; 2523 tymb = 0; 2524 ia++; 2525 } 2526 else 2527 if (tmpa->thrid > tmpb->thrid) { 2528 /* b has the lowest unconsidered ThrID */ 2529 tyma = 0; 2530 tymb = tmpb->tym; 2531 thrid = tmpb->thrid; 2532 ib++; 2533 } else { 2534 /* they both next mention the same ThrID */ 2535 tl_assert(tmpa->thrid == tmpb->thrid); 2536 tyma = tmpa->tym; 2537 thrid = tmpa->thrid; 2538 tymb = tmpb->tym; 2539 ia++; 2540 ib++; 2541 } 2542 } 2543 2544 /* having laboriously determined (tyma, tymb), do something 2545 useful with it. */ 2546 if (tyma > tymb) { 2547 /* not LEQ at this index. Quit, since the answer is 2548 determined already. */ 2549 tl_assert(thrid >= 1024); 2550 return thrid; 2551 } 2552 } 2553 2554 return 0; /* all points are LEQ => return an invalid ThrID */ 2555 } 2556 2557 2558 /* Compute an arbitrary structural (total) ordering on the two args, 2559 based on their VCs, so they can be looked up in a table, tree, etc. 2560 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be 2561 performance critical so there is some effort expended to make it sa 2562 fast as possible. 2563 */ 2564 Word VTS__cmp_structural ( VTS* a, VTS* b ) 2565 { 2566 /* We just need to generate an arbitrary total ordering based on 2567 a->ts and b->ts. Preferably do it in a way which comes across likely 2568 differences relatively quickly. */ 2569 Word i; 2570 Word useda = 0, usedb = 0; 2571 ScalarTS *ctsa = NULL, *ctsb = NULL; 2572 2573 stats__vts__cmp_structural++; 2574 2575 tl_assert(a); 2576 tl_assert(b); 2577 2578 ctsa = &a->ts[0]; useda = a->usedTS; 2579 ctsb = &b->ts[0]; usedb = b->usedTS; 2580 2581 if (LIKELY(useda == usedb)) { 2582 ScalarTS *tmpa = NULL, *tmpb = NULL; 2583 stats__vts__cmp_structural_slow++; 2584 /* Same length vectors. Find the first difference, if any, as 2585 fast as possible. */ 2586 for (i = 0; i < useda; i++) { 2587 tmpa = &ctsa[i]; 2588 tmpb = &ctsb[i]; 2589 if (LIKELY(tmpa->tym == tmpb->tym 2590 && tmpa->thrid == tmpb->thrid)) 2591 continue; 2592 else 2593 break; 2594 } 2595 if (UNLIKELY(i == useda)) { 2596 /* They're identical. */ 2597 return 0; 2598 } else { 2599 tl_assert(i >= 0 && i < useda); 2600 if (tmpa->tym < tmpb->tym) return -1; 2601 if (tmpa->tym > tmpb->tym) return 1; 2602 if (tmpa->thrid < tmpb->thrid) return -1; 2603 if (tmpa->thrid > tmpb->thrid) return 1; 2604 /* we just established them as non-identical, hence: */ 2605 } 2606 /*NOTREACHED*/ 2607 tl_assert(0); 2608 } 2609 2610 if (useda < usedb) return -1; 2611 if (useda > usedb) return 1; 2612 /*NOTREACHED*/ 2613 tl_assert(0); 2614 } 2615 2616 2617 /* Debugging only. Display the given VTS. 2618 */ 2619 static void VTS__show ( const VTS* vts ) 2620 { 2621 Word i, n; 2622 tl_assert(vts); 2623 2624 VG_(printf)("["); 2625 n = vts->usedTS; 2626 for (i = 0; i < n; i++) { 2627 const ScalarTS *st = &vts->ts[i]; 2628 VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym); 2629 } 2630 VG_(printf)("]"); 2631 } 2632 2633 2634 /* Debugging only. Return vts[index], so to speak. 2635 */ 2636 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) 2637 { 2638 UWord i, n; 2639 ThrID idx_thrid = Thr__to_ThrID(idx); 2640 stats__vts__indexat_slow++; 2641 tl_assert(vts); 2642 n = vts->usedTS; 2643 for (i = 0; i < n; i++) { 2644 ScalarTS* st = &vts->ts[i]; 2645 if (st->thrid == idx_thrid) 2646 return st->tym; 2647 } 2648 return 0; 2649 } 2650 2651 2652 /* See comment on prototype above. 2653 */ 2654 static void VTS__declare_thread_very_dead ( Thr* thr ) 2655 { 2656 if (0) VG_(printf)("VTQ: tae %p\n", thr); 2657 2658 tl_assert(thr->llexit_done); 2659 tl_assert(thr->joinedwith_done); 2660 2661 ThrID nyu; 2662 nyu = Thr__to_ThrID(thr); 2663 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu ); 2664 2665 /* We can only get here if we're assured that we'll never again 2666 need to look at this thread's ::viR or ::viW. Set them to 2667 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but 2668 mostly so that we don't wind up pruning them (as that would be 2669 nonsensical: the only interesting ScalarTS entry for a dead 2670 thread is its own index, and the pruning will remove that.). */ 2671 VtsID__rcdec(thr->viR); 2672 VtsID__rcdec(thr->viW); 2673 thr->viR = VtsID_INVALID; 2674 thr->viW = VtsID_INVALID; 2675 } 2676 2677 2678 ///////////////////////////////////////////////////////////////// 2679 ///////////////////////////////////////////////////////////////// 2680 // // 2681 // SECTION END vts primitives // 2682 // // 2683 ///////////////////////////////////////////////////////////////// 2684 ///////////////////////////////////////////////////////////////// 2685 2686 2687 2688 ///////////////////////////////////////////////////////////////// 2689 ///////////////////////////////////////////////////////////////// 2690 // // 2691 // SECTION BEGIN main library // 2692 // // 2693 ///////////////////////////////////////////////////////////////// 2694 ///////////////////////////////////////////////////////////////// 2695 2696 2697 ///////////////////////////////////////////////////////// 2698 // // 2699 // VTS set // 2700 // // 2701 ///////////////////////////////////////////////////////// 2702 2703 static WordFM* /* WordFM VTS* void */ vts_set = NULL; 2704 2705 static void vts_set_init ( void ) 2706 { 2707 tl_assert(!vts_set); 2708 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1", 2709 HG_(free), 2710 (Word(*)(UWord,UWord))VTS__cmp_structural ); 2711 } 2712 2713 /* Given a VTS, look in vts_set to see if we already have a 2714 structurally identical one. If yes, return the pair (True, pointer 2715 to the existing one). If no, clone this one, add the clone to the 2716 set, and return (False, pointer to the clone). */ 2717 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand ) 2718 { 2719 UWord keyW, valW; 2720 stats__vts_set__focaa++; 2721 tl_assert(cand->id == VtsID_INVALID); 2722 /* lookup cand (by value) */ 2723 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) { 2724 /* found it */ 2725 tl_assert(valW == 0); 2726 /* if this fails, cand (by ref) was already present (!) */ 2727 tl_assert(keyW != (UWord)cand); 2728 *res = (VTS*)keyW; 2729 return True; 2730 } else { 2731 /* not present. Clone, add and return address of clone. */ 2732 stats__vts_set__focaa_a++; 2733 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand ); 2734 tl_assert(clone != cand); 2735 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ ); 2736 *res = clone; 2737 return False; 2738 } 2739 } 2740 2741 2742 ///////////////////////////////////////////////////////// 2743 // // 2744 // VTS table // 2745 // // 2746 ///////////////////////////////////////////////////////// 2747 2748 static void VtsID__invalidate_caches ( void ); /* fwds */ 2749 2750 /* A type to hold VTS table entries. Invariants: 2751 If .vts == NULL, then this entry is not in use, so: 2752 - .rc == 0 2753 - this entry is on the freelist (unfortunately, does not imply 2754 any constraints on value for u.freelink) 2755 If .vts != NULL, then this entry is in use: 2756 - .vts is findable in vts_set 2757 - .vts->id == this entry number 2758 - no specific value for .rc (even 0 is OK) 2759 - this entry is not on freelist, so u.freelink == VtsID_INVALID 2760 */ 2761 typedef 2762 struct { 2763 VTS* vts; /* vts, in vts_set */ 2764 UWord rc; /* reference count - enough for entire aspace */ 2765 union { 2766 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */ 2767 VtsID remap; /* used only during pruning, for used entries */ 2768 } u; 2769 /* u.freelink only used when vts == NULL, 2770 u.remap only used when vts != NULL, during pruning. */ 2771 } 2772 VtsTE; 2773 2774 /* The VTS table. */ 2775 static XArray* /* of VtsTE */ vts_tab = NULL; 2776 2777 /* An index into the VTS table, indicating the start of the list of 2778 free (available for use) entries. If the list is empty, this is 2779 VtsID_INVALID. */ 2780 static VtsID vts_tab_freelist = VtsID_INVALID; 2781 2782 /* Do a GC of vts_tab when the freelist becomes empty AND the size of 2783 vts_tab equals or exceeds this size. After GC, the value here is 2784 set appropriately so as to check for the next GC point. */ 2785 static Word vts_next_GC_at = 1000; 2786 2787 static void vts_tab_init ( void ) 2788 { 2789 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1", 2790 HG_(free), sizeof(VtsTE) ); 2791 vts_tab_freelist = VtsID_INVALID; 2792 } 2793 2794 /* Add ii to the free list, checking that it looks out-of-use. */ 2795 static void add_to_free_list ( VtsID ii ) 2796 { 2797 VtsTE* ie = VG_(indexXA)( vts_tab, ii ); 2798 tl_assert(ie->vts == NULL); 2799 tl_assert(ie->rc == 0); 2800 tl_assert(ie->u.freelink == VtsID_INVALID); 2801 ie->u.freelink = vts_tab_freelist; 2802 vts_tab_freelist = ii; 2803 } 2804 2805 /* Get an entry from the free list. This will return VtsID_INVALID if 2806 the free list is empty. */ 2807 static VtsID get_from_free_list ( void ) 2808 { 2809 VtsID ii; 2810 VtsTE* ie; 2811 if (vts_tab_freelist == VtsID_INVALID) 2812 return VtsID_INVALID; 2813 ii = vts_tab_freelist; 2814 ie = VG_(indexXA)( vts_tab, ii ); 2815 tl_assert(ie->vts == NULL); 2816 tl_assert(ie->rc == 0); 2817 vts_tab_freelist = ie->u.freelink; 2818 return ii; 2819 } 2820 2821 /* Produce a new VtsID that can be used, either by getting it from 2822 the freelist, or, if that is empty, by expanding vts_tab. */ 2823 static VtsID get_new_VtsID ( void ) 2824 { 2825 VtsID ii; 2826 VtsTE te; 2827 ii = get_from_free_list(); 2828 if (ii != VtsID_INVALID) 2829 return ii; 2830 te.vts = NULL; 2831 te.rc = 0; 2832 te.u.freelink = VtsID_INVALID; 2833 ii = (VtsID)VG_(addToXA)( vts_tab, &te ); 2834 return ii; 2835 } 2836 2837 2838 /* Indirect callback from lib_zsm. */ 2839 static void VtsID__rcinc ( VtsID ii ) 2840 { 2841 VtsTE* ie; 2842 /* VG_(indexXA) does a range check for us */ 2843 ie = VG_(indexXA)( vts_tab, ii ); 2844 tl_assert(ie->vts); /* else it's not in use */ 2845 tl_assert(ie->rc < ~0UL); /* else we can't continue */ 2846 tl_assert(ie->vts->id == ii); 2847 ie->rc++; 2848 } 2849 2850 /* Indirect callback from lib_zsm. */ 2851 static void VtsID__rcdec ( VtsID ii ) 2852 { 2853 VtsTE* ie; 2854 /* VG_(indexXA) does a range check for us */ 2855 ie = VG_(indexXA)( vts_tab, ii ); 2856 tl_assert(ie->vts); /* else it's not in use */ 2857 tl_assert(ie->rc > 0); /* else RC snafu */ 2858 tl_assert(ie->vts->id == ii); 2859 ie->rc--; 2860 } 2861 2862 2863 /* Look up 'cand' in our collection of VTSs. If present, return the 2864 VtsID for the pre-existing version. If not present, clone it, add 2865 the clone to both vts_tab and vts_set, allocate a fresh VtsID for 2866 it, and return that. */ 2867 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand ) 2868 { 2869 VTS* in_tab = NULL; 2870 tl_assert(cand->id == VtsID_INVALID); 2871 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand ); 2872 tl_assert(in_tab); 2873 if (already_have) { 2874 /* We already have a copy of 'cand'. Use that. */ 2875 VtsTE* ie; 2876 tl_assert(in_tab->id != VtsID_INVALID); 2877 ie = VG_(indexXA)( vts_tab, in_tab->id ); 2878 tl_assert(ie->vts == in_tab); 2879 return in_tab->id; 2880 } else { 2881 VtsID ii = get_new_VtsID(); 2882 VtsTE* ie = VG_(indexXA)( vts_tab, ii ); 2883 ie->vts = in_tab; 2884 ie->rc = 0; 2885 ie->u.freelink = VtsID_INVALID; 2886 in_tab->id = ii; 2887 return ii; 2888 } 2889 } 2890 2891 2892 static void show_vts_stats ( const HChar* caller ) 2893 { 2894 UWord nSet, nTab, nLive; 2895 ULong totrc; 2896 UWord n, i; 2897 nSet = VG_(sizeFM)( vts_set ); 2898 nTab = VG_(sizeXA)( vts_tab ); 2899 totrc = 0; 2900 nLive = 0; 2901 n = VG_(sizeXA)( vts_tab ); 2902 for (i = 0; i < n; i++) { 2903 VtsTE* ie = VG_(indexXA)( vts_tab, i ); 2904 if (ie->vts) { 2905 nLive++; 2906 totrc += (ULong)ie->rc; 2907 } else { 2908 tl_assert(ie->rc == 0); 2909 } 2910 } 2911 VG_(printf)(" show_vts_stats %s\n", caller); 2912 VG_(printf)(" vts_tab size %4lu\n", nTab); 2913 VG_(printf)(" vts_tab live %4lu\n", nLive); 2914 VG_(printf)(" vts_set size %4lu\n", nSet); 2915 VG_(printf)(" total rc %4llu\n", totrc); 2916 } 2917 2918 2919 /* --- Helpers for VtsID pruning --- */ 2920 2921 static 2922 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab, 2923 /*MOD*/XArray* /* of VtsTE */ new_tab, 2924 VtsID* ii ) 2925 { 2926 VtsTE *old_te, *new_te; 2927 VtsID old_id, new_id; 2928 /* We're relying here on VG_(indexXA)'s range checking to assert on 2929 any stupid values, in particular *ii == VtsID_INVALID. */ 2930 old_id = *ii; 2931 old_te = VG_(indexXA)( old_tab, old_id ); 2932 old_te->rc--; 2933 new_id = old_te->u.remap; 2934 new_te = VG_(indexXA)( new_tab, new_id ); 2935 new_te->rc++; 2936 *ii = new_id; 2937 } 2938 2939 static 2940 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab, 2941 /*MOD*/XArray* /* of VtsTE */ new_tab, 2942 SVal* s ) 2943 { 2944 SVal old_sv, new_sv; 2945 old_sv = *s; 2946 if (SVal__isC(old_sv)) { 2947 VtsID rMin, wMin; 2948 rMin = SVal__unC_Rmin(old_sv); 2949 wMin = SVal__unC_Wmin(old_sv); 2950 remap_VtsID( old_tab, new_tab, &rMin ); 2951 remap_VtsID( old_tab, new_tab, &wMin ); 2952 new_sv = SVal__mkC( rMin, wMin ); 2953 *s = new_sv; 2954 } 2955 } 2956 2957 2958 /* NOT TO BE CALLED FROM WITHIN libzsm. */ 2959 __attribute__((noinline)) 2960 static void vts_tab__do_GC ( Bool show_stats ) 2961 { 2962 UWord i, nTab, nLive, nFreed; 2963 2964 /* ---------- BEGIN VTS GC ---------- */ 2965 /* check this is actually necessary. */ 2966 tl_assert(vts_tab_freelist == VtsID_INVALID); 2967 2968 /* empty the caches for partial order checks and binary joins. We 2969 could do better and prune out the entries to be deleted, but it 2970 ain't worth the hassle. */ 2971 VtsID__invalidate_caches(); 2972 2973 /* First, make the reference counts up to date. */ 2974 zsm_flush_cache(); 2975 2976 nTab = VG_(sizeXA)( vts_tab ); 2977 2978 if (show_stats) { 2979 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab); 2980 show_vts_stats("before GC"); 2981 } 2982 2983 /* Now we can inspect the entire vts_tab. Any entries with zero 2984 .rc fields are now no longer in use and can be put back on the 2985 free list, removed from vts_set, and deleted. */ 2986 nFreed = 0; 2987 for (i = 0; i < nTab; i++) { 2988 Bool present; 2989 UWord oldK = 0, oldV = 12345; 2990 VtsTE* te = VG_(indexXA)( vts_tab, i ); 2991 if (te->vts == NULL) { 2992 tl_assert(te->rc == 0); 2993 continue; /* already on the free list (presumably) */ 2994 } 2995 if (te->rc > 0) 2996 continue; /* in use */ 2997 /* Ok, we got one we can free. */ 2998 tl_assert(te->vts->id == i); 2999 /* first, remove it from vts_set. */ 3000 present = VG_(delFromFM)( vts_set, 3001 &oldK, &oldV, (UWord)te->vts ); 3002 tl_assert(present); /* else it isn't in vts_set ?! */ 3003 tl_assert(oldV == 0); /* no info stored in vts_set val fields */ 3004 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */ 3005 /* now free the VTS itself */ 3006 VTS__delete(te->vts); 3007 te->vts = NULL; 3008 /* and finally put this entry on the free list */ 3009 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */ 3010 add_to_free_list( i ); 3011 nFreed++; 3012 } 3013 3014 /* Now figure out when the next GC should be. We'll allow the 3015 number of VTSs to double before GCing again. Except of course 3016 that since we can't (or, at least, don't) shrink vts_tab, we 3017 can't set the threshold value smaller than it. */ 3018 tl_assert(nFreed <= nTab); 3019 nLive = nTab - nFreed; 3020 tl_assert(nLive >= 0 && nLive <= nTab); 3021 vts_next_GC_at = 2 * nLive; 3022 if (vts_next_GC_at < nTab) 3023 vts_next_GC_at = nTab; 3024 3025 if (show_stats) { 3026 show_vts_stats("after GC"); 3027 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at); 3028 } 3029 3030 stats__vts_tab_GC++; 3031 if (VG_(clo_stats)) { 3032 tl_assert(nTab > 0); 3033 VG_(message)(Vg_DebugMsg, 3034 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n", 3035 stats__vts_tab_GC, 3036 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab); 3037 } 3038 /* ---------- END VTS GC ---------- */ 3039 3040 /* Decide whether to do VTS pruning. We have one of three 3041 settings. */ 3042 static UInt pruning_auto_ctr = 0; /* do not make non-static */ 3043 3044 Bool do_pruning = False; 3045 switch (HG_(clo_vts_pruning)) { 3046 case 0: /* never */ 3047 break; 3048 case 1: /* auto */ 3049 do_pruning = (++pruning_auto_ctr % 5) == 0; 3050 break; 3051 case 2: /* always */ 3052 do_pruning = True; 3053 break; 3054 default: 3055 tl_assert(0); 3056 } 3057 3058 /* The rest of this routine only handles pruning, so we can 3059 quit at this point if it is not to be done. */ 3060 if (!do_pruning) 3061 return; 3062 /* No need to do pruning if no thread died since the last pruning as 3063 no VtsTE can be pruned. */ 3064 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0) 3065 return; 3066 3067 /* ---------- BEGIN VTS PRUNING ---------- */ 3068 /* Sort and check the very dead threads that died since the last pruning. 3069 Sorting is used for the check and so that we can quickly look 3070 up the dead-thread entries as we work through the VTSs. */ 3071 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned); 3072 3073 /* We will run through the old table, and create a new table and 3074 set, at the same time setting the u.remap entries in the old 3075 table to point to the new entries. Then, visit every VtsID in 3076 the system, and replace all of them with new ones, using the 3077 u.remap entries in the old table. Finally, we can delete the old 3078 table and set. */ 3079 3080 XArray* /* of VtsTE */ new_tab 3081 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab", 3082 HG_(free), sizeof(VtsTE) ); 3083 3084 /* WordFM VTS* void */ 3085 WordFM* new_set 3086 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set", 3087 HG_(free), 3088 (Word(*)(UWord,UWord))VTS__cmp_structural ); 3089 3090 /* Visit each old VTS. For each one: 3091 3092 * make a pruned version 3093 3094 * search new_set for the pruned version, yielding either 3095 Nothing (not present) or the new VtsID for it. 3096 3097 * if not present, allocate a new VtsID for it, insert (pruned 3098 VTS, new VtsID) in the tree, and set 3099 remap_table[old VtsID] = new VtsID. 3100 3101 * if present, set remap_table[old VtsID] = new VtsID, where 3102 new VtsID was determined by the tree lookup. Then free up 3103 the clone. 3104 */ 3105 3106 UWord nBeforePruning = 0, nAfterPruning = 0; 3107 UWord nSTSsBefore = 0, nSTSsAfter = 0; 3108 VtsID new_VtsID_ctr = 0; 3109 3110 for (i = 0; i < nTab; i++) { 3111 3112 /* For each old VTS .. */ 3113 VtsTE* old_te = VG_(indexXA)( vts_tab, i ); 3114 VTS* old_vts = old_te->vts; 3115 3116 /* Skip it if not in use */ 3117 if (old_te->rc == 0) { 3118 tl_assert(old_vts == NULL); 3119 continue; 3120 } 3121 tl_assert(old_te->u.remap == VtsID_INVALID); 3122 tl_assert(old_vts != NULL); 3123 tl_assert(old_vts->id == i); 3124 tl_assert(old_vts->ts != NULL); 3125 3126 /* It is in use. Make a pruned version. */ 3127 nBeforePruning++; 3128 nSTSsBefore += old_vts->usedTS; 3129 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts", 3130 old_vts, verydead_thread_table_not_pruned); 3131 tl_assert(new_vts->sizeTS == new_vts->usedTS); 3132 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS]) 3133 == 0x0ddC0ffeeBadF00dULL); 3134 3135 /* Get rid of the old VTS and the tree entry. It's a bit more 3136 complex to incrementally delete the VTSs now than to nuke 3137 them all after we're done, but the upside is that we don't 3138 wind up temporarily storing potentially two complete copies 3139 of each VTS and hence spiking memory use. */ 3140 UWord oldK = 0, oldV = 12345; 3141 Bool present = VG_(delFromFM)( vts_set, 3142 &oldK, &oldV, (UWord)old_vts ); 3143 tl_assert(present); /* else it isn't in vts_set ?! */ 3144 tl_assert(oldV == 0); /* no info stored in vts_set val fields */ 3145 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */ 3146 /* now free the VTS itself */ 3147 VTS__delete(old_vts); 3148 old_te->vts = NULL; 3149 old_vts = NULL; 3150 3151 /* NO MENTIONS of old_vts allowed beyond this point. */ 3152 3153 /* Ok, we have the pruned copy in new_vts. See if a 3154 structurally identical version is already present in new_set. 3155 If so, delete the one we just made and move on; if not, add 3156 it. */ 3157 VTS* identical_version = NULL; 3158 UWord valW = 12345; 3159 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW, 3160 (UWord)new_vts)) { 3161 // already have it 3162 tl_assert(valW == 0); 3163 tl_assert(identical_version != NULL); 3164 tl_assert(identical_version != new_vts); 3165 VTS__delete(new_vts); 3166 new_vts = identical_version; 3167 tl_assert(new_vts->id != VtsID_INVALID); 3168 } else { 3169 tl_assert(valW == 12345); 3170 tl_assert(identical_version == NULL); 3171 new_vts->id = new_VtsID_ctr++; 3172 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0); 3173 tl_assert(!b); 3174 VtsTE new_te; 3175 new_te.vts = new_vts; 3176 new_te.rc = 0; 3177 new_te.u.freelink = VtsID_INVALID; 3178 Word j = VG_(addToXA)( new_tab, &new_te ); 3179 tl_assert(j <= i); 3180 tl_assert(j == new_VtsID_ctr - 1); 3181 // stats 3182 nAfterPruning++; 3183 nSTSsAfter += new_vts->usedTS; 3184 } 3185 old_te->u.remap = new_vts->id; 3186 3187 } /* for (i = 0; i < nTab; i++) */ 3188 3189 /* Move very dead thread from verydead_thread_table_not_pruned to 3190 verydead_thread_table. Sort and check verydead_thread_table 3191 to verify a thread was reported very dead only once. */ 3192 { 3193 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned); 3194 3195 for (i = 0; i < nBT; i++) { 3196 ThrID thrid = 3197 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i ); 3198 VG_(addToXA)( verydead_thread_table, &thrid ); 3199 } 3200 verydead_thread_table_sort_and_check (verydead_thread_table); 3201 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT); 3202 } 3203 3204 /* At this point, we have: 3205 * the old VTS table, with its u.remap entries set, 3206 and with all .vts == NULL. 3207 * the old VTS tree should be empty, since it and the old VTSs 3208 it contained have been incrementally deleted was we worked 3209 through the old table. 3210 * the new VTS table, with all .rc == 0, all u.freelink and u.remap 3211 == VtsID_INVALID. 3212 * the new VTS tree. 3213 */ 3214 tl_assert( VG_(sizeFM)(vts_set) == 0 ); 3215 3216 /* Now actually apply the mapping. */ 3217 /* Visit all the VtsIDs in the entire system. Where do we expect 3218 to find them? 3219 (a) in shadow memory -- the LineZs and LineFs 3220 (b) in our collection of struct _Thrs. 3221 (c) in our collection of struct _SOs. 3222 Nowhere else, AFAICS. Not in the zsm cache, because that just 3223 got invalidated. 3224 3225 Using the u.remap fields in vts_tab, map each old VtsID to a new 3226 VtsID. For each old VtsID, dec its rc; and for each new one, 3227 inc it. This sets up the new refcounts, and it also gives a 3228 cheap sanity check of the old ones: all old refcounts should be 3229 zero after this operation. 3230 */ 3231 3232 /* Do the mappings for (a) above: iterate over the Primary shadow 3233 mem map (WordFM Addr SecMap*). */ 3234 UWord secmapW = 0; 3235 VG_(initIterFM)( map_shmem ); 3236 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) { 3237 UWord j; 3238 SecMap* sm = (SecMap*)secmapW; 3239 tl_assert(sm->magic == SecMap_MAGIC); 3240 /* Deal with the LineZs */ 3241 for (i = 0; i < N_SECMAP_ZLINES; i++) { 3242 LineZ* lineZ = &sm->linesZ[i]; 3243 if (lineZ->dict[0] != SVal_INVALID) { 3244 for (j = 0; j < 4; j++) 3245 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]); 3246 } else { 3247 LineF* lineF = SVal2Ptr (lineZ->dict[1]); 3248 for (j = 0; j < N_LINE_ARANGE; j++) 3249 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]); 3250 } 3251 } 3252 } 3253 VG_(doneIterFM)( map_shmem ); 3254 3255 /* Do the mappings for (b) above: visit our collection of struct 3256 _Thrs. */ 3257 Thread* hgthread = get_admin_threads(); 3258 tl_assert(hgthread); 3259 while (hgthread) { 3260 Thr* hbthr = hgthread->hbthr; 3261 tl_assert(hbthr); 3262 /* Threads that are listed in the prunable set have their viR 3263 and viW set to VtsID_INVALID, so we can't mess with them. */ 3264 if (hbthr->llexit_done && hbthr->joinedwith_done) { 3265 tl_assert(hbthr->viR == VtsID_INVALID); 3266 tl_assert(hbthr->viW == VtsID_INVALID); 3267 hgthread = hgthread->admin; 3268 continue; 3269 } 3270 remap_VtsID( vts_tab, new_tab, &hbthr->viR ); 3271 remap_VtsID( vts_tab, new_tab, &hbthr->viW ); 3272 hgthread = hgthread->admin; 3273 } 3274 3275 /* Do the mappings for (c) above: visit the struct _SOs. */ 3276 SO* so = admin_SO; 3277 while (so) { 3278 if (so->viR != VtsID_INVALID) 3279 remap_VtsID( vts_tab, new_tab, &so->viR ); 3280 if (so->viW != VtsID_INVALID) 3281 remap_VtsID( vts_tab, new_tab, &so->viW ); 3282 so = so->admin_next; 3283 } 3284 3285 /* So, we're nearly done (with this incredibly complex operation). 3286 Check the refcounts for the old VtsIDs all fell to zero, as 3287 expected. Any failure is serious. */ 3288 for (i = 0; i < nTab; i++) { 3289 VtsTE* te = VG_(indexXA)( vts_tab, i ); 3290 tl_assert(te->vts == NULL); 3291 /* This is the assert proper. Note we're also asserting 3292 zeroness for old entries which are unmapped. That's OK. */ 3293 tl_assert(te->rc == 0); 3294 } 3295 3296 /* Install the new table and set. */ 3297 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/); 3298 vts_set = new_set; 3299 VG_(deleteXA)( vts_tab ); 3300 vts_tab = new_tab; 3301 3302 /* The freelist of vts_tab entries is empty now, because we've 3303 compacted all of the live entries at the low end of the 3304 table. */ 3305 vts_tab_freelist = VtsID_INVALID; 3306 3307 /* Sanity check vts_set and vts_tab. */ 3308 3309 /* Because all the live entries got slid down to the bottom of vts_tab: */ 3310 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set )); 3311 3312 /* Assert that the vts_tab and vts_set entries point at each other 3313 in the required way */ 3314 UWord wordK = 0, wordV = 0; 3315 VG_(initIterFM)( vts_set ); 3316 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) { 3317 tl_assert(wordK != 0); 3318 tl_assert(wordV == 0); 3319 VTS* vts = (VTS*)wordK; 3320 tl_assert(vts->id != VtsID_INVALID); 3321 VtsTE* te = VG_(indexXA)( vts_tab, vts->id ); 3322 tl_assert(te->vts == vts); 3323 } 3324 VG_(doneIterFM)( vts_set ); 3325 3326 /* Also iterate over the table, and check each entry is 3327 plausible. */ 3328 nTab = VG_(sizeXA)( vts_tab ); 3329 for (i = 0; i < nTab; i++) { 3330 VtsTE* te = VG_(indexXA)( vts_tab, i ); 3331 tl_assert(te->vts); 3332 tl_assert(te->vts->id == i); 3333 tl_assert(te->rc > 0); /* 'cos we just GC'd */ 3334 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */ 3335 /* value of te->u.remap not relevant */ 3336 } 3337 3338 /* And we're done. Bwahahaha. Ha. Ha. Ha. */ 3339 stats__vts_pruning++; 3340 if (VG_(clo_stats)) { 3341 tl_assert(nTab > 0); 3342 VG_(message)( 3343 Vg_DebugMsg, 3344 "libhb: VTS PR: #%lu before %lu (avg sz %lu) " 3345 "after %lu (avg sz %lu)\n", 3346 stats__vts_pruning, 3347 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1), 3348 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1) 3349 ); 3350 } 3351 /* ---------- END VTS PRUNING ---------- */ 3352 } 3353 3354 3355 ///////////////////////////////////////////////////////// 3356 // // 3357 // Vts IDs // 3358 // // 3359 ///////////////////////////////////////////////////////// 3360 3361 ////////////////////////// 3362 /* A temporary, max-sized VTS which is used as a temporary (the first 3363 argument) in VTS__singleton, VTS__tick and VTS__join operations. */ 3364 static VTS* temp_max_sized_VTS = NULL; 3365 3366 ////////////////////////// 3367 static ULong stats__cmpLEQ_queries = 0; 3368 static ULong stats__cmpLEQ_misses = 0; 3369 static ULong stats__join2_queries = 0; 3370 static ULong stats__join2_misses = 0; 3371 3372 static inline UInt ROL32 ( UInt w, Int n ) { 3373 w = (w << n) | (w >> (32-n)); 3374 return w; 3375 } 3376 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) { 3377 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13); 3378 return hash % nTab; 3379 } 3380 3381 #define N_CMPLEQ_CACHE 1023 3382 static 3383 struct { VtsID vi1; VtsID vi2; Bool leq; } 3384 cmpLEQ_cache[N_CMPLEQ_CACHE]; 3385 3386 #define N_JOIN2_CACHE 1023 3387 static 3388 struct { VtsID vi1; VtsID vi2; VtsID res; } 3389 join2_cache[N_JOIN2_CACHE]; 3390 3391 static void VtsID__invalidate_caches ( void ) { 3392 Int i; 3393 for (i = 0; i < N_CMPLEQ_CACHE; i++) { 3394 cmpLEQ_cache[i].vi1 = VtsID_INVALID; 3395 cmpLEQ_cache[i].vi2 = VtsID_INVALID; 3396 cmpLEQ_cache[i].leq = False; 3397 } 3398 for (i = 0; i < N_JOIN2_CACHE; i++) { 3399 join2_cache[i].vi1 = VtsID_INVALID; 3400 join2_cache[i].vi2 = VtsID_INVALID; 3401 join2_cache[i].res = VtsID_INVALID; 3402 } 3403 } 3404 ////////////////////////// 3405 3406 //static Bool VtsID__is_valid ( VtsID vi ) { 3407 // VtsTE* ve; 3408 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab )) 3409 // return False; 3410 // ve = VG_(indexXA)( vts_tab, vi ); 3411 // if (!ve->vts) 3412 // return False; 3413 // tl_assert(ve->vts->id == vi); 3414 // return True; 3415 //} 3416 3417 static VTS* VtsID__to_VTS ( VtsID vi ) { 3418 VtsTE* te = VG_(indexXA)( vts_tab, vi ); 3419 tl_assert(te->vts); 3420 return te->vts; 3421 } 3422 3423 static void VtsID__pp ( VtsID vi ) { 3424 VTS* vts = VtsID__to_VTS(vi); 3425 VTS__show( vts ); 3426 } 3427 3428 /* compute partial ordering relation of vi1 and vi2. */ 3429 __attribute__((noinline)) 3430 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) { 3431 UInt hash; 3432 Bool leq; 3433 VTS *v1, *v2; 3434 //if (vi1 == vi2) return True; 3435 tl_assert(vi1 != vi2); 3436 ////++ 3437 stats__cmpLEQ_queries++; 3438 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE); 3439 if (cmpLEQ_cache[hash].vi1 == vi1 3440 && cmpLEQ_cache[hash].vi2 == vi2) 3441 return cmpLEQ_cache[hash].leq; 3442 stats__cmpLEQ_misses++; 3443 ////-- 3444 v1 = VtsID__to_VTS(vi1); 3445 v2 = VtsID__to_VTS(vi2); 3446 leq = VTS__cmpLEQ( v1, v2 ) == 0; 3447 ////++ 3448 cmpLEQ_cache[hash].vi1 = vi1; 3449 cmpLEQ_cache[hash].vi2 = vi2; 3450 cmpLEQ_cache[hash].leq = leq; 3451 ////-- 3452 return leq; 3453 } 3454 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) { 3455 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2); 3456 } 3457 3458 /* compute binary join */ 3459 __attribute__((noinline)) 3460 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) { 3461 UInt hash; 3462 VtsID res; 3463 VTS *vts1, *vts2; 3464 //if (vi1 == vi2) return vi1; 3465 tl_assert(vi1 != vi2); 3466 ////++ 3467 stats__join2_queries++; 3468 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE); 3469 if (join2_cache[hash].vi1 == vi1 3470 && join2_cache[hash].vi2 == vi2) 3471 return join2_cache[hash].res; 3472 stats__join2_misses++; 3473 ////-- 3474 vts1 = VtsID__to_VTS(vi1); 3475 vts2 = VtsID__to_VTS(vi2); 3476 temp_max_sized_VTS->usedTS = 0; 3477 VTS__join(temp_max_sized_VTS, vts1,vts2); 3478 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3479 ////++ 3480 join2_cache[hash].vi1 = vi1; 3481 join2_cache[hash].vi2 = vi2; 3482 join2_cache[hash].res = res; 3483 ////-- 3484 return res; 3485 } 3486 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) { 3487 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2); 3488 } 3489 3490 /* create a singleton VTS, namely [thr:1] */ 3491 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) { 3492 temp_max_sized_VTS->usedTS = 0; 3493 VTS__singleton(temp_max_sized_VTS, thr,tym); 3494 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3495 } 3496 3497 /* tick operation, creates value 1 if specified index is absent */ 3498 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) { 3499 VTS* vts = VtsID__to_VTS(vi); 3500 temp_max_sized_VTS->usedTS = 0; 3501 VTS__tick(temp_max_sized_VTS, idx,vts); 3502 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS); 3503 } 3504 3505 /* index into a VTS (only for assertions) */ 3506 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) { 3507 VTS* vts = VtsID__to_VTS(vi); 3508 return VTS__indexAt_SLOW( vts, idx ); 3509 } 3510 3511 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or 3512 any, really) element in vi1 which is pointwise greater-than the 3513 corresponding element in vi2. If no such element exists, return 3514 NULL. This needs to be fairly quick since it is called every time 3515 a race is detected. */ 3516 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 ) 3517 { 3518 VTS *vts1, *vts2; 3519 Thr* diffthr; 3520 ThrID diffthrid; 3521 tl_assert(vi1 != vi2); 3522 vts1 = VtsID__to_VTS(vi1); 3523 vts2 = VtsID__to_VTS(vi2); 3524 tl_assert(vts1 != vts2); 3525 diffthrid = VTS__cmpLEQ(vts1, vts2); 3526 diffthr = Thr__from_ThrID(diffthrid); 3527 tl_assert(diffthr); /* else they are LEQ ! */ 3528 return diffthr; 3529 } 3530 3531 3532 ///////////////////////////////////////////////////////// 3533 // // 3534 // Filters // 3535 // // 3536 ///////////////////////////////////////////////////////// 3537 3538 /* Forget everything we know -- clear the filter and let everything 3539 through. This needs to be as fast as possible, since it is called 3540 every time the running thread changes, and every time a thread's 3541 vector clocks change, which can be quite frequent. The obvious 3542 fast way to do this is simply to stuff in tags which we know are 3543 not going to match anything, since they're not aligned to the start 3544 of a line. */ 3545 static void Filter__clear ( Filter* fi, const HChar* who ) 3546 { 3547 UWord i; 3548 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who); 3549 for (i = 0; i < FI_NUM_LINES; i += 8) { 3550 fi->tags[i+0] = 1; /* impossible value -- cannot match */ 3551 fi->tags[i+1] = 1; 3552 fi->tags[i+2] = 1; 3553 fi->tags[i+3] = 1; 3554 fi->tags[i+4] = 1; 3555 fi->tags[i+5] = 1; 3556 fi->tags[i+6] = 1; 3557 fi->tags[i+7] = 1; 3558 } 3559 tl_assert(i == FI_NUM_LINES); 3560 } 3561 3562 /* Clearing an arbitrary range in the filter. Unfortunately 3563 we have to do this due to core-supplied new/die-mem events. */ 3564 3565 static void Filter__clear_1byte ( Filter* fi, Addr a ) 3566 { 3567 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3568 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3569 FiLine* line = &fi->lines[lineno]; 3570 UWord loff = (a - atag) / 8; 3571 UShort mask = 0x3 << (2 * (a & 7)); 3572 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */ 3573 if (LIKELY( fi->tags[lineno] == atag )) { 3574 /* hit. clear the bits. */ 3575 UShort u16 = line->u16s[loff]; 3576 line->u16s[loff] = u16 & ~mask; /* clear them */ 3577 } else { 3578 /* miss. The filter doesn't hold this address, so ignore. */ 3579 } 3580 } 3581 3582 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a ) 3583 { 3584 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3585 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3586 FiLine* line = &fi->lines[lineno]; 3587 UWord loff = (a - atag) / 8; 3588 if (LIKELY( fi->tags[lineno] == atag )) { 3589 line->u16s[loff] = 0; 3590 } else { 3591 /* miss. The filter doesn't hold this address, so ignore. */ 3592 } 3593 } 3594 3595 /* Only used to verify the fast Filter__clear_range */ 3596 __attribute__((unused)) 3597 static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len ) 3598 { 3599 tl_assert (CHECK_ZSM); 3600 3601 /* slowly do part preceding 8-alignment */ 3602 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) { 3603 Filter__clear_1byte( fi, a ); 3604 a++; 3605 len--; 3606 } 3607 /* vector loop */ 3608 while (len >= 8) { 3609 Filter__clear_8bytes_aligned( fi, a ); 3610 a += 8; 3611 len -= 8; 3612 } 3613 /* slowly do tail */ 3614 while (UNLIKELY(len > 0)) { 3615 Filter__clear_1byte( fi, a ); 3616 a++; 3617 len--; 3618 } 3619 } 3620 3621 static void Filter__clear_range ( Filter* fi, Addr a, UWord len ) 3622 { 3623 # if CHECK_ZSM > 0 3624 /* We check the below more complex algorithm with the simple one. 3625 This check is very expensive : we do first the slow way on a 3626 copy of the data, then do it the fast way. On RETURN, we check 3627 the two values are equal. */ 3628 Filter fi_check = *fi; 3629 Filter__clear_range_SLOW(&fi_check, a, len); 3630 # define RETURN goto check_and_return 3631 # else 3632 # define RETURN return 3633 # endif 3634 3635 Addr begtag = FI_GET_TAG(a); /* tag of range begin */ 3636 3637 Addr end = a + len - 1; 3638 Addr endtag = FI_GET_TAG(end); /* tag of range end. */ 3639 3640 UWord rlen = len; /* remaining length to clear */ 3641 3642 Addr c = a; /* Current position we are clearing. */ 3643 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */ 3644 FiLine* cline; /* Current line we are clearing */ 3645 UWord cloff; /* Current offset in line we are clearing, when clearing 3646 partial lines. */ 3647 3648 UShort u16; 3649 3650 STATIC_ASSERT (FI_LINE_SZB == 32); 3651 // Below assumes filter lines are 32 bytes 3652 3653 if (LIKELY(fi->tags[clineno] == begtag)) { 3654 /* LIKELY for the heavy caller VG_(unknown_SP_update). */ 3655 /* First filter line matches begtag. 3656 If c is not at the filter line begin, the below will clear 3657 the filter line bytes starting from c. */ 3658 cline = &fi->lines[clineno]; 3659 cloff = (c - begtag) / 8; 3660 3661 /* First the byte(s) needed to reach 8-alignment */ 3662 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) { 3663 /* hiB is the nr of bytes (higher addresses) from c to reach 3664 8-aligment. */ 3665 UWord hiB = 8 - (c & 7); 3666 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[ 3667 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte 3668 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */ 3669 UShort mask = 0xFFFF << (16 - 2*hiB); 3670 3671 u16 = cline->u16s[cloff]; 3672 if (LIKELY(rlen >= hiB)) { 3673 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */ 3674 rlen -= hiB; 3675 c += hiB; 3676 cloff += 1; 3677 } else { 3678 /* Only have the bits for rlen bytes bytes. */ 3679 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen))); 3680 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */ 3681 RETURN; // We have cleared all what we can. 3682 } 3683 } 3684 /* c is now 8 aligned. Clear by 8 aligned bytes, 3685 till c is filter-line aligned */ 3686 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) { 3687 cline->u16s[cloff] = 0; 3688 c += 8; 3689 rlen -= 8; 3690 cloff += 1; 3691 } 3692 } else { 3693 c = begtag + FI_LINE_SZB; 3694 if (c > end) 3695 RETURN; // We have cleared all what we can. 3696 rlen -= c - a; 3697 } 3698 // We have changed c, so re-establish clineno. 3699 clineno = FI_GET_LINENO(c); 3700 3701 if (rlen >= FI_LINE_SZB) { 3702 /* Here, c is filter line-aligned. Clear all full lines that 3703 overlap with the range starting at c, made of a full lines */ 3704 UWord nfull = rlen / FI_LINE_SZB; 3705 UWord full_len = nfull * FI_LINE_SZB; 3706 rlen -= full_len; 3707 if (nfull > FI_NUM_LINES) 3708 nfull = FI_NUM_LINES; // no need to check several times the same entry. 3709 3710 for (UWord n = 0; n < nfull; n++) { 3711 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) { 3712 cline = &fi->lines[clineno]; 3713 cline->u16s[0] = 0; 3714 cline->u16s[1] = 0; 3715 cline->u16s[2] = 0; 3716 cline->u16s[3] = 0; 3717 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0])); 3718 } 3719 clineno++; 3720 if (UNLIKELY(clineno == FI_NUM_LINES)) 3721 clineno = 0; 3722 } 3723 3724 c += full_len; 3725 clineno = FI_GET_LINENO(c); 3726 } 3727 3728 if (CHECK_ZSM) { 3729 tl_assert(VG_IS_8_ALIGNED(c)); 3730 tl_assert(clineno == FI_GET_LINENO(c)); 3731 } 3732 3733 /* Do the last filter line, if it was not cleared as a full filter line */ 3734 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) { 3735 cline = &fi->lines[clineno]; 3736 cloff = (c - endtag) / 8; 3737 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag); 3738 3739 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than 3740 8 bytes. */ 3741 while (rlen >= 8) { 3742 cline->u16s[cloff] = 0; 3743 c += 8; 3744 rlen -= 8; 3745 cloff += 1; 3746 } 3747 /* Then the remaining byte(s) */ 3748 if (rlen > 0) { 3749 /* nr of bytes from c to reach end. */ 3750 UWord loB = rlen; 3751 /* Compute mask representing loB bytes [c..c+loB[ : 3752 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */ 3753 UShort mask = 0xFFFF >> (16 - 2*loB); 3754 3755 u16 = cline->u16s[cloff]; 3756 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */ 3757 } 3758 } 3759 3760 # if CHECK_ZSM > 0 3761 check_and_return: 3762 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0); 3763 # endif 3764 # undef RETURN 3765 } 3766 3767 /* ------ Read handlers for the filter. ------ */ 3768 3769 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a ) 3770 { 3771 if (UNLIKELY( !VG_IS_8_ALIGNED(a) )) 3772 return False; 3773 { 3774 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3775 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3776 FiLine* line = &fi->lines[lineno]; 3777 UWord loff = (a - atag) / 8; 3778 UShort mask = 0xAAAA; 3779 if (LIKELY( fi->tags[lineno] == atag )) { 3780 /* hit. check line and update. */ 3781 UShort u16 = line->u16s[loff]; 3782 Bool ok = (u16 & mask) == mask; /* all R bits set? */ 3783 line->u16s[loff] = u16 | mask; /* set them */ 3784 return ok; 3785 } else { 3786 /* miss. nuke existing line and re-use it. */ 3787 UWord i; 3788 fi->tags[lineno] = atag; 3789 for (i = 0; i < FI_LINE_SZB / 8; i++) 3790 line->u16s[i] = 0; 3791 line->u16s[loff] = mask; 3792 return False; 3793 } 3794 } 3795 } 3796 3797 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a ) 3798 { 3799 if (UNLIKELY( !VG_IS_4_ALIGNED(a) )) 3800 return False; 3801 { 3802 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3803 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3804 FiLine* line = &fi->lines[lineno]; 3805 UWord loff = (a - atag) / 8; 3806 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */ 3807 if (LIKELY( fi->tags[lineno] == atag )) { 3808 /* hit. check line and update. */ 3809 UShort u16 = line->u16s[loff]; 3810 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */ 3811 line->u16s[loff] = u16 | mask; /* set them */ 3812 return ok; 3813 } else { 3814 /* miss. nuke existing line and re-use it. */ 3815 UWord i; 3816 fi->tags[lineno] = atag; 3817 for (i = 0; i < FI_LINE_SZB / 8; i++) 3818 line->u16s[i] = 0; 3819 line->u16s[loff] = mask; 3820 return False; 3821 } 3822 } 3823 } 3824 3825 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a ) 3826 { 3827 if (UNLIKELY( !VG_IS_2_ALIGNED(a) )) 3828 return False; 3829 { 3830 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3831 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3832 FiLine* line = &fi->lines[lineno]; 3833 UWord loff = (a - atag) / 8; 3834 UShort mask = 0xA << (2 * (a & 6)); 3835 /* mask is A000, 0A00, 00A0 or 000A */ 3836 if (LIKELY( fi->tags[lineno] == atag )) { 3837 /* hit. check line and update. */ 3838 UShort u16 = line->u16s[loff]; 3839 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */ 3840 line->u16s[loff] = u16 | mask; /* set them */ 3841 return ok; 3842 } else { 3843 /* miss. nuke existing line and re-use it. */ 3844 UWord i; 3845 fi->tags[lineno] = atag; 3846 for (i = 0; i < FI_LINE_SZB / 8; i++) 3847 line->u16s[i] = 0; 3848 line->u16s[loff] = mask; 3849 return False; 3850 } 3851 } 3852 } 3853 3854 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a ) 3855 { 3856 { 3857 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3858 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3859 FiLine* line = &fi->lines[lineno]; 3860 UWord loff = (a - atag) / 8; 3861 UShort mask = 0x2 << (2 * (a & 7)); 3862 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */ 3863 if (LIKELY( fi->tags[lineno] == atag )) { 3864 /* hit. check line and update. */ 3865 UShort u16 = line->u16s[loff]; 3866 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */ 3867 line->u16s[loff] = u16 | mask; /* set them */ 3868 return ok; 3869 } else { 3870 /* miss. nuke existing line and re-use it. */ 3871 UWord i; 3872 fi->tags[lineno] = atag; 3873 for (i = 0; i < FI_LINE_SZB / 8; i++) 3874 line->u16s[i] = 0; 3875 line->u16s[loff] = mask; 3876 return False; 3877 } 3878 } 3879 } 3880 3881 3882 /* ------ Write handlers for the filter. ------ */ 3883 3884 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a ) 3885 { 3886 if (UNLIKELY( !VG_IS_8_ALIGNED(a) )) 3887 return False; 3888 { 3889 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3890 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3891 FiLine* line = &fi->lines[lineno]; 3892 UWord loff = (a - atag) / 8; 3893 UShort mask = 0xFFFF; 3894 if (LIKELY( fi->tags[lineno] == atag )) { 3895 /* hit. check line and update. */ 3896 UShort u16 = line->u16s[loff]; 3897 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */ 3898 line->u16s[loff] = u16 | mask; /* set them */ 3899 return ok; 3900 } else { 3901 /* miss. nuke existing line and re-use it. */ 3902 UWord i; 3903 fi->tags[lineno] = atag; 3904 for (i = 0; i < FI_LINE_SZB / 8; i++) 3905 line->u16s[i] = 0; 3906 line->u16s[loff] = mask; 3907 return False; 3908 } 3909 } 3910 } 3911 3912 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a ) 3913 { 3914 if (UNLIKELY( !VG_IS_4_ALIGNED(a) )) 3915 return False; 3916 { 3917 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3918 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3919 FiLine* line = &fi->lines[lineno]; 3920 UWord loff = (a - atag) / 8; 3921 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */ 3922 if (LIKELY( fi->tags[lineno] == atag )) { 3923 /* hit. check line and update. */ 3924 UShort u16 = line->u16s[loff]; 3925 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */ 3926 line->u16s[loff] = u16 | mask; /* set them */ 3927 return ok; 3928 } else { 3929 /* miss. nuke existing line and re-use it. */ 3930 UWord i; 3931 fi->tags[lineno] = atag; 3932 for (i = 0; i < FI_LINE_SZB / 8; i++) 3933 line->u16s[i] = 0; 3934 line->u16s[loff] = mask; 3935 return False; 3936 } 3937 } 3938 } 3939 3940 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a ) 3941 { 3942 if (UNLIKELY( !VG_IS_2_ALIGNED(a) )) 3943 return False; 3944 { 3945 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3946 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3947 FiLine* line = &fi->lines[lineno]; 3948 UWord loff = (a - atag) / 8; 3949 UShort mask = 0xF << (2 * (a & 6)); 3950 /* mask is F000, 0F00, 00F0 or 000F */ 3951 if (LIKELY( fi->tags[lineno] == atag )) { 3952 /* hit. check line and update. */ 3953 UShort u16 = line->u16s[loff]; 3954 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */ 3955 line->u16s[loff] = u16 | mask; /* set them */ 3956 return ok; 3957 } else { 3958 /* miss. nuke existing line and re-use it. */ 3959 UWord i; 3960 fi->tags[lineno] = atag; 3961 for (i = 0; i < FI_LINE_SZB / 8; i++) 3962 line->u16s[i] = 0; 3963 line->u16s[loff] = mask; 3964 return False; 3965 } 3966 } 3967 } 3968 3969 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a ) 3970 { 3971 { 3972 Addr atag = FI_GET_TAG(a); /* tag of 'a' */ 3973 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */ 3974 FiLine* line = &fi->lines[lineno]; 3975 UWord loff = (a - atag) / 8; 3976 UShort mask = 0x3 << (2 * (a & 7)); 3977 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */ 3978 if (LIKELY( fi->tags[lineno] == atag )) { 3979 /* hit. check line and update. */ 3980 UShort u16 = line->u16s[loff]; 3981 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */ 3982 line->u16s[loff] = u16 | mask; /* set them */ 3983 return ok; 3984 } else { 3985 /* miss. nuke existing line and re-use it. */ 3986 UWord i; 3987 fi->tags[lineno] = atag; 3988 for (i = 0; i < FI_LINE_SZB / 8; i++) 3989 line->u16s[i] = 0; 3990 line->u16s[loff] = mask; 3991 return False; 3992 } 3993 } 3994 } 3995 3996 3997 ///////////////////////////////////////////////////////// 3998 // // 3999 // Threads // 4000 // // 4001 ///////////////////////////////////////////////////////// 4002 4003 /* Maps ThrID values to their Thr*s (which contain ThrID values that 4004 should point back to the relevant slot in the array. Lowest 4005 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */ 4006 static XArray* /* of Thr* */ thrid_to_thr_map = NULL; 4007 4008 /* And a counter to dole out ThrID values. For rationale/background, 4009 see comments on definition of ScalarTS (far) above. */ 4010 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */ 4011 4012 static ThrID Thr__to_ThrID ( Thr* thr ) { 4013 return thr->thrid; 4014 } 4015 static Thr* Thr__from_ThrID ( UInt thrid ) { 4016 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 ); 4017 tl_assert(thr->thrid == thrid); 4018 return thr; 4019 } 4020 4021 static Thr* Thr__new ( void ) 4022 { 4023 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) ); 4024 thr->viR = VtsID_INVALID; 4025 thr->viW = VtsID_INVALID; 4026 thr->llexit_done = False; 4027 thr->joinedwith_done = False; 4028 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) ); 4029 if (HG_(clo_history_level) == 1) 4030 thr->local_Kws_n_stacks 4031 = VG_(newXA)( HG_(zalloc), 4032 "libhb.Thr__new.3 (local_Kws_and_stacks)", 4033 HG_(free), sizeof(ULong_n_EC) ); 4034 4035 /* Add this Thr* <-> ThrID binding to the mapping, and 4036 cross-check */ 4037 if (!thrid_to_thr_map) { 4038 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4", 4039 HG_(free), sizeof(Thr*) ); 4040 } 4041 4042 if (thrid_counter >= ThrID_MAX_VALID) { 4043 /* We're hosed. We have to stop. */ 4044 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ ); 4045 } 4046 4047 thr->thrid = thrid_counter++; 4048 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr ); 4049 tl_assert(ix + 1024 == thr->thrid); 4050 4051 return thr; 4052 } 4053 4054 static void note_local_Kw_n_stack_for ( Thr* thr ) 4055 { 4056 Word nPresent; 4057 ULong_n_EC pair; 4058 tl_assert(thr); 4059 4060 // We only collect this info at history level 1 (approx) 4061 if (HG_(clo_history_level) != 1) 4062 return; 4063 4064 /* This is the scalar Kw for thr. */ 4065 pair.ull = VtsID__indexAt( thr->viW, thr ); 4066 pair.ec = main_get_EC( thr ); 4067 tl_assert(pair.ec); 4068 tl_assert(thr->local_Kws_n_stacks); 4069 4070 /* check that we're not adding duplicates */ 4071 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks ); 4072 4073 /* Throw away old stacks, if necessary. We can't accumulate stuff 4074 indefinitely. */ 4075 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) { 4076 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 ); 4077 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks ); 4078 if (0) 4079 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n", 4080 thr, pair.ull, pair.ec ); 4081 } 4082 4083 if (nPresent > 0) { 4084 ULong_n_EC* prevPair 4085 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 ); 4086 tl_assert( prevPair->ull <= pair.ull ); 4087 } 4088 4089 if (nPresent == 0) 4090 pair.ec = NULL; 4091 4092 VG_(addToXA)( thr->local_Kws_n_stacks, &pair ); 4093 4094 if (0) 4095 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n", 4096 thr, pair.ull, pair.ec ); 4097 if (0) 4098 VG_(pp_ExeContext)(pair.ec); 4099 } 4100 4101 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1, 4102 const ULong_n_EC* pair2 ) 4103 { 4104 if (pair1->ull < pair2->ull) return -1; 4105 if (pair1->ull > pair2->ull) return 1; 4106 return 0; 4107 } 4108 4109 4110 ///////////////////////////////////////////////////////// 4111 // // 4112 // Shadow Values // 4113 // // 4114 ///////////////////////////////////////////////////////// 4115 4116 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by 4117 // hb_zsm.h. We have to do everything else here. 4118 4119 /* SVal is 64 bit unsigned int. 4120 4121 <---------30---------> <---------30---------> 4122 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin) 4123 10 X--------------------X XX X--------------------X A: SVal_NOACCESS 4124 11 0--------------------0 00 0--------------------0 A: SVal_INVALID 4125 4126 */ 4127 #define SVAL_TAGMASK (3ULL << 62) 4128 4129 static inline Bool SVal__isC ( SVal s ) { 4130 return (0ULL << 62) == (s & SVAL_TAGMASK); 4131 } 4132 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) { 4133 //tl_assert(VtsID__is_valid(rmini)); 4134 //tl_assert(VtsID__is_valid(wmini)); 4135 return (((ULong)rmini) << 32) | ((ULong)wmini); 4136 } 4137 static inline VtsID SVal__unC_Rmin ( SVal s ) { 4138 tl_assert(SVal__isC(s)); 4139 return (VtsID)(s >> 32); 4140 } 4141 static inline VtsID SVal__unC_Wmin ( SVal s ) { 4142 tl_assert(SVal__isC(s)); 4143 return (VtsID)(s & 0xFFFFFFFFULL); 4144 } 4145 4146 static inline Bool SVal__isA ( SVal s ) { 4147 return (2ULL << 62) == (s & SVAL_TAGMASK); 4148 } 4149 __attribute__((unused)) 4150 static inline SVal SVal__mkA ( void ) { 4151 return 2ULL << 62; 4152 } 4153 4154 /* Direct callback from lib_zsm. */ 4155 static inline void SVal__rcinc ( SVal s ) { 4156 if (SVal__isC(s)) { 4157 VtsID__rcinc( SVal__unC_Rmin(s) ); 4158 VtsID__rcinc( SVal__unC_Wmin(s) ); 4159 } 4160 } 4161 4162 /* Direct callback from lib_zsm. */ 4163 static inline void SVal__rcdec ( SVal s ) { 4164 if (SVal__isC(s)) { 4165 VtsID__rcdec( SVal__unC_Rmin(s) ); 4166 VtsID__rcdec( SVal__unC_Wmin(s) ); 4167 } 4168 } 4169 4170 static inline void *SVal2Ptr (SVal s) 4171 { 4172 return (void*)(UWord)s; 4173 } 4174 4175 static inline SVal Ptr2SVal (void* ptr) 4176 { 4177 return (SVal)(UWord)ptr; 4178 } 4179 4180 4181 4182 ///////////////////////////////////////////////////////// 4183 // // 4184 // Change-event map2 // 4185 // // 4186 ///////////////////////////////////////////////////////// 4187 4188 /* This is in two parts: 4189 4190 1. A hash table of RCECs. This is a set of reference-counted stack 4191 traces. When the reference count of a stack trace becomes zero, 4192 it is removed from the set and freed up. The intent is to have 4193 a set of stack traces which can be referred to from (2), but to 4194 only represent each one once. The set is indexed/searched by 4195 ordering on the stack trace vectors. 4196 4197 2. A Hash table of OldRefs. These store information about each old 4198 ref that we need to record. Hash table key is the address of the 4199 location for which the information is recorded. For LRU 4200 purposes, each OldRef in the hash table is also on a doubly 4201 linked list maintaining the order in which the OldRef were most 4202 recently accessed. 4203 Each OldRef also maintains the stamp at which it was last accessed. 4204 With these stamps, we can quickly check which of 2 OldRef is the 4205 'newest', without having to scan the full list of LRU OldRef. 4206 4207 The important part of an OldRef is, however, its acc component. 4208 This binds a TSW triple (thread, size, R/W) to an RCEC. 4209 4210 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef. 4211 Then we do exact LRU discarding. For each discarded OldRef we must 4212 of course decrement the reference count on the RCEC it 4213 refers to, in order that entries from (1) eventually get 4214 discarded too. 4215 */ 4216 4217 static UWord stats__evm__lookup_found = 0; 4218 static UWord stats__evm__lookup_notfound = 0; 4219 4220 static UWord stats__ctxt_eq_tsw_eq_rcec = 0; 4221 static UWord stats__ctxt_eq_tsw_neq_rcec = 0; 4222 static UWord stats__ctxt_neq_tsw_neq_rcec = 0; 4223 static UWord stats__ctxt_rcdec_calls = 0; 4224 static UWord stats__ctxt_rcec_gc_discards = 0; 4225 4226 static UWord stats__ctxt_tab_curr = 0; 4227 static UWord stats__ctxt_tab_max = 0; 4228 4229 static UWord stats__ctxt_tab_qs = 0; 4230 static UWord stats__ctxt_tab_cmps = 0; 4231 4232 4233 /////////////////////////////////////////////////////// 4234 //// Part (1): A hash table of RCECs 4235 /// 4236 4237 #define N_FRAMES 8 4238 4239 // (UInt) `echo "Reference Counted Execution Context" | md5sum` 4240 #define RCEC_MAGIC 0xab88abb2UL 4241 4242 //#define N_RCEC_TAB 98317 /* prime */ 4243 #define N_RCEC_TAB 196613 /* prime */ 4244 4245 typedef 4246 struct _RCEC { 4247 UWord magic; /* sanity check only */ 4248 struct _RCEC* next; 4249 UWord rc; 4250 UWord rcX; /* used for crosschecking */ 4251 UWord frames_hash; /* hash of all the frames */ 4252 UWord frames[N_FRAMES]; 4253 } 4254 RCEC; 4255 4256 //////////// BEGIN RCEC pool allocator 4257 static PoolAlloc* rcec_pool_allocator; 4258 static RCEC* alloc_RCEC ( void ) { 4259 return VG_(allocEltPA) ( rcec_pool_allocator ); 4260 } 4261 4262 static void free_RCEC ( RCEC* rcec ) { 4263 tl_assert(rcec->magic == RCEC_MAGIC); 4264 VG_(freeEltPA)( rcec_pool_allocator, rcec ); 4265 } 4266 //////////// END RCEC pool allocator 4267 4268 static RCEC** contextTab = NULL; /* hash table of RCEC*s */ 4269 4270 /* Count of allocated RCEC having ref count > 0 */ 4271 static UWord RCEC_referenced = 0; 4272 4273 /* Gives an arbitrary total order on RCEC .frames fields */ 4274 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) { 4275 Word i; 4276 tl_assert(ec1 && ec1->magic == RCEC_MAGIC); 4277 tl_assert(ec2 && ec2->magic == RCEC_MAGIC); 4278 if (ec1->frames_hash < ec2->frames_hash) return -1; 4279 if (ec1->frames_hash > ec2->frames_hash) return 1; 4280 for (i = 0; i < N_FRAMES; i++) { 4281 if (ec1->frames[i] < ec2->frames[i]) return -1; 4282 if (ec1->frames[i] > ec2->frames[i]) return 1; 4283 } 4284 return 0; 4285 } 4286 4287 4288 /* Dec the ref of this RCEC. */ 4289 static void ctxt__rcdec ( RCEC* ec ) 4290 { 4291 stats__ctxt_rcdec_calls++; 4292 tl_assert(ec && ec->magic == RCEC_MAGIC); 4293 tl_assert(ec->rc > 0); 4294 ec->rc--; 4295 if (ec->rc == 0) 4296 RCEC_referenced--; 4297 } 4298 4299 static void ctxt__rcinc ( RCEC* ec ) 4300 { 4301 tl_assert(ec && ec->magic == RCEC_MAGIC); 4302 if (ec->rc == 0) 4303 RCEC_referenced++; 4304 ec->rc++; 4305 } 4306 4307 4308 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and 4309 move it one step closer to the front of the list, so as to make 4310 subsequent searches for it cheaper. */ 4311 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec ) 4312 { 4313 RCEC *ec0, *ec1, *ec2; 4314 if (ec == *headp) 4315 tl_assert(0); /* already at head of list */ 4316 tl_assert(ec != NULL); 4317 ec0 = *headp; 4318 ec1 = NULL; 4319 ec2 = NULL; 4320 while (True) { 4321 if (ec0 == NULL || ec0 == ec) break; 4322 ec2 = ec1; 4323 ec1 = ec0; 4324 ec0 = ec0->next; 4325 } 4326 tl_assert(ec0 == ec); 4327 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) { 4328 RCEC* tmp; 4329 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's 4330 predecessor. Swap ec0 and ec1, that is, move ec0 one step 4331 closer to the start of the list. */ 4332 tl_assert(ec2->next == ec1); 4333 tl_assert(ec1->next == ec0); 4334 tmp = ec0->next; 4335 ec2->next = ec0; 4336 ec0->next = ec1; 4337 ec1->next = tmp; 4338 } 4339 else 4340 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) { 4341 /* it's second in the list. */ 4342 tl_assert(*headp == ec1); 4343 tl_assert(ec1->next == ec0); 4344 ec1->next = ec0->next; 4345 ec0->next = ec1; 4346 *headp = ec0; 4347 } 4348 } 4349 4350 4351 /* Find the given RCEC in the tree, and return a pointer to it. Or, 4352 if not present, add the given one to the tree (by making a copy of 4353 it, so the caller can immediately deallocate the original) and 4354 return a pointer to the copy. The caller can safely have 'example' 4355 on its stack, since we will always return a pointer to a copy of 4356 it, not to the original. Note that the inserted node will have .rc 4357 of zero and so the caller must immediately increment it. */ 4358 __attribute__((noinline)) 4359 static RCEC* ctxt__find_or_add ( RCEC* example ) 4360 { 4361 UWord hent; 4362 RCEC* copy; 4363 tl_assert(example && example->magic == RCEC_MAGIC); 4364 tl_assert(example->rc == 0); 4365 4366 /* Search the hash table to see if we already have it. */ 4367 stats__ctxt_tab_qs++; 4368 hent = example->frames_hash % N_RCEC_TAB; 4369 copy = contextTab[hent]; 4370 while (1) { 4371 if (!copy) break; 4372 tl_assert(copy->magic == RCEC_MAGIC); 4373 stats__ctxt_tab_cmps++; 4374 if (0 == RCEC__cmp_by_frames(copy, example)) break; 4375 copy = copy->next; 4376 } 4377 4378 if (copy) { 4379 tl_assert(copy != example); 4380 /* optimisation: if it's not at the head of its list, move 1 4381 step fwds, to make future searches cheaper */ 4382 if (copy != contextTab[hent]) { 4383 move_RCEC_one_step_forward( &contextTab[hent], copy ); 4384 } 4385 } else { 4386 copy = alloc_RCEC(); 4387 tl_assert(copy != example); 4388 *copy = *example; 4389 copy->next = contextTab[hent]; 4390 contextTab[hent] = copy; 4391 stats__ctxt_tab_curr++; 4392 if (stats__ctxt_tab_curr > stats__ctxt_tab_max) 4393 stats__ctxt_tab_max = stats__ctxt_tab_curr; 4394 } 4395 return copy; 4396 } 4397 4398 static inline UWord ROLW ( UWord w, Int n ) 4399 { 4400 Int bpw = 8 * sizeof(UWord); 4401 w = (w << n) | (w >> (bpw-n)); 4402 return w; 4403 } 4404 4405 __attribute__((noinline)) 4406 static RCEC* get_RCEC ( Thr* thr ) 4407 { 4408 UWord hash, i; 4409 RCEC example; 4410 example.magic = RCEC_MAGIC; 4411 example.rc = 0; 4412 example.rcX = 0; 4413 example.next = NULL; 4414 main_get_stacktrace( thr, &example.frames[0], N_FRAMES ); 4415 hash = 0; 4416 for (i = 0; i < N_FRAMES; i++) { 4417 hash ^= example.frames[i]; 4418 hash = ROLW(hash, 19); 4419 } 4420 example.frames_hash = hash; 4421 return ctxt__find_or_add( &example ); 4422 } 4423 4424 /////////////////////////////////////////////////////// 4425 //// Part (2): 4426 /// A hashtable guest-addr -> OldRef, that refers to (1) 4427 /// Note: we use the guest address as key. This means that the entries 4428 /// for multiple threads accessing the same address will land in the same 4429 /// bucket. It might be nice to have a better distribution of the 4430 /// OldRef in the hashtable by using ask key the guestaddress ^ tsw. 4431 /// The problem is that when a race is reported on a ga, we need to retrieve 4432 /// efficiently the accesses to ga by other threads, only using the ga. 4433 /// Measurements on firefox have shown that the chain length is reasonable. 4434 4435 /* Records an access: a thread, a context (size & writeness) and the 4436 number of held locks. The size (1,2,4,8) is stored as is in szB. 4437 Note that szB uses more bits than needed to store a size up to 8. 4438 This allows to use a TSW as a fully initialised UInt e.g. in 4439 cmp_oldref_tsw. If needed, a more compact representation of szB 4440 can be done (e.g. use only 4 bits, or use only 2 bits and encode the 4441 size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */ 4442 typedef 4443 struct { 4444 UInt thrid : SCALARTS_N_THRBITS; 4445 UInt szB : 32 - SCALARTS_N_THRBITS - 1; 4446 UInt isW : 1; 4447 } TSW; // Thread+Size+Writeness 4448 typedef 4449 struct { 4450 TSW tsw; 4451 WordSetID locksHeldW; 4452 RCEC* rcec; 4453 } 4454 Thr_n_RCEC; 4455 4456 typedef 4457 struct OldRef { 4458 struct OldRef *ht_next; // to link hash table nodes together. 4459 UWord ga; // hash_table key, == address for which we record an access. 4460 struct OldRef *prev; // to refs older than this one 4461 struct OldRef *next; // to refs newer that this one 4462 UWord stamp; // allows to order (by time of access) 2 OldRef 4463 Thr_n_RCEC acc; 4464 } 4465 OldRef; 4466 4467 /* Returns the or->tsw as an UInt */ 4468 static inline UInt oldref_tsw (const OldRef* or) 4469 { 4470 return *(const UInt*)(&or->acc.tsw); 4471 } 4472 4473 /* Compare the tsw component for 2 OldRef. 4474 Used for OldRef hashtable (which already verifies equality of the 4475 'key' part. */ 4476 static Word cmp_oldref_tsw (const void* node1, const void* node2 ) 4477 { 4478 const UInt tsw1 = oldref_tsw(node1); 4479 const UInt tsw2 = oldref_tsw(node2); 4480 4481 if (tsw1 < tsw2) return -1; 4482 if (tsw1 > tsw2) return 1; 4483 return 0; 4484 } 4485 4486 4487 //////////// BEGIN OldRef pool allocator 4488 static PoolAlloc* oldref_pool_allocator; 4489 // Note: We only allocate elements in this pool allocator, we never free them. 4490 // We stop allocating elements at VG_(clo_conflict_cache_size). 4491 //////////// END OldRef pool allocator 4492 4493 static OldRef mru; 4494 static OldRef lru; 4495 // A double linked list, chaining all OldREf in a mru/lru order. 4496 // mru/lru are sentinel nodes. 4497 // Whenever an oldref is re-used, its position is changed as the most recently 4498 // used (i.e. pointed to by mru.prev). 4499 // When a new oldref is needed, it is allocated from the pool 4500 // if we have not yet reached --conflict-cache-size. 4501 // Otherwise, if all oldref have already been allocated, 4502 // the least recently used (i.e. pointed to by lru.next) is re-used. 4503 // When an OldRef is used, it is moved as the most recently used entry 4504 // (i.e. pointed to by mru.prev). 4505 4506 // Removes r from the double linked list 4507 // Note: we do not need to test for special cases such as 4508 // NULL next or prev pointers, because we have sentinel nodes 4509 // at both sides of the list. So, a node is always forward and 4510 // backward linked. 4511 static inline void OldRef_unchain(OldRef *r) 4512 { 4513 r->next->prev = r->prev; 4514 r->prev->next = r->next; 4515 } 4516 4517 // Insert new as the newest OldRef 4518 // Similarly to OldRef_unchain, no need to test for NULL 4519 // pointers, as e.g. mru.prev is always guaranteed to point 4520 // to a non NULL node (lru when the list is empty). 4521 static inline void OldRef_newest(OldRef *new) 4522 { 4523 new->next = &mru; 4524 new->prev = mru.prev; 4525 mru.prev = new; 4526 new->prev->next = new; 4527 } 4528 4529 4530 static VgHashTable* oldrefHT = NULL; /* Hash table* OldRef* */ 4531 static UWord oldrefHTN = 0; /* # elems in oldrefHT */ 4532 /* Note: the nr of ref in the oldrefHT will always be equal to 4533 the nr of elements that were allocated from the OldRef pool allocator 4534 as we never free an OldRef : we just re-use them. */ 4535 4536 4537 /* allocates a new OldRef or re-use the lru one if all allowed OldRef 4538 have already been allocated. */ 4539 static OldRef* alloc_or_reuse_OldRef ( void ) 4540 { 4541 if (oldrefHTN < HG_(clo_conflict_cache_size)) { 4542 oldrefHTN++; 4543 return VG_(allocEltPA) ( oldref_pool_allocator ); 4544 } else { 4545 OldRef *oldref_ht; 4546 OldRef *oldref = lru.next; 4547 4548 OldRef_unchain(oldref); 4549 oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw); 4550 tl_assert (oldref == oldref_ht); 4551 ctxt__rcdec( oldref->acc.rcec ); 4552 return oldref; 4553 } 4554 } 4555 4556 4557 inline static UInt min_UInt ( UInt a, UInt b ) { 4558 return a < b ? a : b; 4559 } 4560 4561 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the 4562 first interval is lower, 1 if the first interval is higher, and 0 4563 if there is any overlap. Redundant paranoia with casting is there 4564 following what looked distinctly like a bug in gcc-4.1.2, in which 4565 some of the comparisons were done signedly instead of 4566 unsignedly. */ 4567 /* Copied from exp-ptrcheck/sg_main.c */ 4568 static inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1, 4569 Addr a2, SizeT n2 ) { 4570 UWord a1w = (UWord)a1; 4571 UWord n1w = (UWord)n1; 4572 UWord a2w = (UWord)a2; 4573 UWord n2w = (UWord)n2; 4574 tl_assert(n1w > 0 && n2w > 0); 4575 if (a1w + n1w <= a2w) return -1L; 4576 if (a2w + n2w <= a1w) return 1L; 4577 return 0; 4578 } 4579 4580 static UWord event_map_stamp = 0; // Used to stamp each OldRef when touched. 4581 4582 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr ) 4583 { 4584 OldRef example; 4585 OldRef* ref; 4586 RCEC* rcec; 4587 4588 tl_assert(thr); 4589 ThrID thrid = thr->thrid; 4590 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */ 4591 4592 WordSetID locksHeldW = thr->hgthread->locksetW; 4593 4594 rcec = get_RCEC( thr ); 4595 4596 tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2); 4597 // Check for most frequent cases first 4598 // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1) 4599 4600 /* Look in the oldrefHT to see if we already have a record for this 4601 address/thr/sz/isW. */ 4602 example.ga = a; 4603 example.acc.tsw = (TSW) {.thrid = thrid, 4604 .szB = szB, 4605 .isW = (UInt)(isW & 1)}; 4606 ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw); 4607 4608 if (ref) { 4609 /* We already have a record for this address and this (thrid, R/W, 4610 size) triple. */ 4611 tl_assert (ref->ga == a); 4612 4613 /* thread 'thr' has an entry. Update its RCEC, if it differs. */ 4614 if (rcec == ref->acc.rcec) 4615 stats__ctxt_eq_tsw_eq_rcec++; 4616 else { 4617 stats__ctxt_eq_tsw_neq_rcec++; 4618 ctxt__rcdec( ref->acc.rcec ); 4619 ctxt__rcinc(rcec); 4620 ref->acc.rcec = rcec; 4621 } 4622 tl_assert(ref->acc.tsw.thrid == thrid); 4623 /* Update the stamp, RCEC and the W-held lockset. */ 4624 ref->stamp = event_map_stamp; 4625 ref->acc.locksHeldW = locksHeldW; 4626 4627 OldRef_unchain(ref); 4628 OldRef_newest(ref); 4629 4630 } else { 4631 /* We don't have a record for this address+triple. Create a new one. */ 4632 stats__ctxt_neq_tsw_neq_rcec++; 4633 ref = alloc_or_reuse_OldRef(); 4634 ref->ga = a; 4635 ref->acc.tsw = (TSW) {.thrid = thrid, 4636 .szB = szB, 4637 .isW = (UInt)(isW & 1)}; 4638 ref->stamp = event_map_stamp; 4639 ref->acc.locksHeldW = locksHeldW; 4640 ref->acc.rcec = rcec; 4641 ctxt__rcinc(rcec); 4642 4643 VG_(HT_add_node) ( oldrefHT, ref ); 4644 OldRef_newest (ref); 4645 } 4646 event_map_stamp++; 4647 } 4648 4649 4650 /* Extract info from the conflicting-access machinery. 4651 Returns the most recent conflicting access with thr/[a, a+szB[/isW. */ 4652 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC, 4653 /*OUT*/Thr** resThr, 4654 /*OUT*/SizeT* resSzB, 4655 /*OUT*/Bool* resIsW, 4656 /*OUT*/WordSetID* locksHeldW, 4657 Thr* thr, Addr a, SizeT szB, Bool isW ) 4658 { 4659 Word i, j; 4660 OldRef *ref = NULL; 4661 SizeT ref_szB = 0; 4662 4663 OldRef *cand_ref; 4664 SizeT cand_ref_szB; 4665 Addr cand_a; 4666 4667 Addr toCheck[15]; 4668 Int nToCheck = 0; 4669 4670 tl_assert(thr); 4671 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1); 4672 4673 ThrID thrid = thr->thrid; 4674 4675 toCheck[nToCheck++] = a; 4676 for (i = -7; i < (Word)szB; i++) { 4677 if (i != 0) 4678 toCheck[nToCheck++] = a + i; 4679 } 4680 tl_assert(nToCheck <= 15); 4681 4682 /* Now see if we can find a suitable matching event for 4683 any of the addresses in toCheck[0 .. nToCheck-1]. */ 4684 for (j = 0; j < nToCheck; j++) { 4685 4686 cand_a = toCheck[j]; 4687 // VG_(printf)("test %ld %p\n", j, cand_a); 4688 4689 /* Find the first HT element for this address. 4690 We might have several of these. They will be linked via ht_next. 4691 We however need to check various elements as the list contains 4692 all elements that map to the same bucket. */ 4693 for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a ); 4694 cand_ref; cand_ref = cand_ref->ht_next) { 4695 if (cand_ref->ga != cand_a) 4696 /* OldRef for another address in this HT bucket. Ignore. */ 4697 continue; 4698 4699 if (cand_ref->acc.tsw.thrid == thrid) 4700 /* This is an access by the same thread, but we're only 4701 interested in accesses from other threads. Ignore. */ 4702 continue; 4703 4704 if ((!cand_ref->acc.tsw.isW) && (!isW)) 4705 /* We don't want to report a read racing against another 4706 read; that's stupid. So in this case move on. */ 4707 continue; 4708 4709 cand_ref_szB = cand_ref->acc.tsw.szB; 4710 if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0) 4711 /* No overlap with the access we're asking about. Ignore. */ 4712 continue; 4713 4714 /* We have a match. Keep this match if it is newer than 4715 the previous match. Note that stamp are Unsigned Words, and 4716 for long running applications, event_map_stamp might have cycled. 4717 So, 'roll' each stamp using event_map_stamp to have the 4718 stamps in the good order, in case event_map_stamp recycled. */ 4719 if (!ref 4720 || (ref->stamp - event_map_stamp) 4721 < (cand_ref->stamp - event_map_stamp)) { 4722 ref = cand_ref; 4723 ref_szB = cand_ref_szB; 4724 } 4725 } 4726 4727 if (ref) { 4728 /* return with success */ 4729 Int n, maxNFrames; 4730 RCEC* ref_rcec = ref->acc.rcec; 4731 tl_assert(ref->acc.tsw.thrid); 4732 tl_assert(ref_rcec); 4733 tl_assert(ref_rcec->magic == RCEC_MAGIC); 4734 tl_assert(ref_szB >= 1); 4735 /* Count how many non-zero frames we have. */ 4736 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size)); 4737 for (n = 0; n < maxNFrames; n++) { 4738 if (0 == ref_rcec->frames[n]) break; 4739 } 4740 *resEC = VG_(make_ExeContext_from_StackTrace)(ref_rcec->frames, 4741 n); 4742 *resThr = Thr__from_ThrID(ref->acc.tsw.thrid); 4743 *resSzB = ref_szB; 4744 *resIsW = ref->acc.tsw.isW; 4745 *locksHeldW = ref->acc.locksHeldW; 4746 stats__evm__lookup_found++; 4747 return True; 4748 } 4749 4750 /* consider next address in toCheck[] */ 4751 } /* for (j = 0; j < nToCheck; j++) */ 4752 4753 /* really didn't find anything. */ 4754 stats__evm__lookup_notfound++; 4755 return False; 4756 } 4757 4758 4759 void libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn ) 4760 { 4761 OldRef *ref = lru.next; 4762 SizeT ref_szB; 4763 Int n; 4764 4765 while (ref != &mru) { 4766 ref_szB = ref->acc.tsw.szB; 4767 if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) { 4768 RCEC* ref_rcec = ref->acc.rcec; 4769 for (n = 0; n < N_FRAMES; n++) { 4770 if (0 == ref_rcec->frames[n]) { 4771 break; 4772 } 4773 } 4774 (*fn)(ref_rcec->frames, n, 4775 Thr__from_ThrID(ref->acc.tsw.thrid), 4776 ref->ga, 4777 ref_szB, 4778 ref->acc.tsw.isW, 4779 ref->acc.locksHeldW); 4780 } 4781 tl_assert (ref->next == &mru 4782 || ((ref->stamp - event_map_stamp) 4783 < ref->next->stamp - event_map_stamp)); 4784 ref = ref->next; 4785 } 4786 } 4787 4788 static void event_map_init ( void ) 4789 { 4790 Word i; 4791 4792 /* Context (RCEC) pool allocator */ 4793 rcec_pool_allocator = VG_(newPA) ( 4794 sizeof(RCEC), 4795 1000 /* RCECs per pool */, 4796 HG_(zalloc), 4797 "libhb.event_map_init.1 (RCEC pools)", 4798 HG_(free) 4799 ); 4800 4801 /* Context table */ 4802 tl_assert(!contextTab); 4803 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)", 4804 N_RCEC_TAB * sizeof(RCEC*) ); 4805 for (i = 0; i < N_RCEC_TAB; i++) 4806 contextTab[i] = NULL; 4807 4808 /* Oldref pool allocator */ 4809 oldref_pool_allocator = VG_(newPA)( 4810 sizeof(OldRef), 4811 1000 /* OldRefs per pool */, 4812 HG_(zalloc), 4813 "libhb.event_map_init.3 (OldRef pools)", 4814 HG_(free) 4815 ); 4816 4817 /* Oldref hashtable */ 4818 tl_assert(!oldrefHT); 4819 oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)"); 4820 4821 oldrefHTN = 0; 4822 mru.prev = &lru; 4823 mru.next = NULL; 4824 lru.prev = NULL; 4825 lru.next = &mru; 4826 mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0, 4827 .szB = 0, 4828 .isW = 0}, 4829 .locksHeldW = 0, 4830 .rcec = NULL}; 4831 lru.acc = mru.acc; 4832 } 4833 4834 static void event_map__check_reference_counts ( void ) 4835 { 4836 RCEC* rcec; 4837 OldRef* oldref; 4838 Word i; 4839 UWord nEnts = 0; 4840 4841 /* Set the 'check' reference counts to zero. Also, optionally 4842 check that the real reference counts are non-zero. We allow 4843 these to fall to zero before a GC, but the GC must get rid of 4844 all those that are zero, hence none should be zero after a 4845 GC. */ 4846 for (i = 0; i < N_RCEC_TAB; i++) { 4847 for (rcec = contextTab[i]; rcec; rcec = rcec->next) { 4848 nEnts++; 4849 tl_assert(rcec); 4850 tl_assert(rcec->magic == RCEC_MAGIC); 4851 rcec->rcX = 0; 4852 } 4853 } 4854 4855 /* check that the stats are sane */ 4856 tl_assert(nEnts == stats__ctxt_tab_curr); 4857 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max); 4858 4859 /* visit all the referencing points, inc check ref counts */ 4860 VG_(HT_ResetIter)( oldrefHT ); 4861 oldref = VG_(HT_Next)( oldrefHT ); 4862 while (oldref) { 4863 tl_assert (oldref->acc.tsw.thrid); 4864 tl_assert (oldref->acc.rcec); 4865 tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC); 4866 oldref->acc.rcec->rcX++; 4867 oldref = VG_(HT_Next)( oldrefHT ); 4868 } 4869 4870 /* compare check ref counts with actual */ 4871 for (i = 0; i < N_RCEC_TAB; i++) { 4872 for (rcec = contextTab[i]; rcec; rcec = rcec->next) { 4873 tl_assert(rcec->rc == rcec->rcX); 4874 } 4875 } 4876 } 4877 4878 __attribute__((noinline)) 4879 static void do_RCEC_GC ( void ) 4880 { 4881 UInt i; 4882 4883 if (VG_(clo_stats)) { 4884 static UInt ctr = 1; 4885 VG_(message)(Vg_DebugMsg, 4886 "libhb: RCEC GC: #%u %lu slots," 4887 " %lu cur ents(ref'd %lu)," 4888 " %lu max ents\n", 4889 ctr++, 4890 (UWord)N_RCEC_TAB, 4891 stats__ctxt_tab_curr, RCEC_referenced, 4892 stats__ctxt_tab_max ); 4893 } 4894 tl_assert (stats__ctxt_tab_curr > RCEC_referenced); 4895 4896 /* Throw away all RCECs with zero reference counts */ 4897 for (i = 0; i < N_RCEC_TAB; i++) { 4898 RCEC** pp = &contextTab[i]; 4899 RCEC* p = *pp; 4900 while (p) { 4901 if (p->rc == 0) { 4902 *pp = p->next; 4903 free_RCEC(p); 4904 p = *pp; 4905 tl_assert(stats__ctxt_tab_curr > 0); 4906 stats__ctxt_rcec_gc_discards++; 4907 stats__ctxt_tab_curr--; 4908 } else { 4909 pp = &p->next; 4910 p = p->next; 4911 } 4912 } 4913 } 4914 4915 tl_assert (stats__ctxt_tab_curr == RCEC_referenced); 4916 } 4917 4918 ///////////////////////////////////////////////////////// 4919 // // 4920 // Core MSM // 4921 // // 4922 ///////////////////////////////////////////////////////// 4923 4924 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19 4925 Nov 08, and again after [...], 4926 June 09. */ 4927 4928 static ULong stats__msmcread = 0; 4929 static ULong stats__msmcread_change = 0; 4930 static ULong stats__msmcwrite = 0; 4931 static ULong stats__msmcwrite_change = 0; 4932 4933 /* Some notes on the H1 history mechanism: 4934 4935 Transition rules are: 4936 4937 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw) 4938 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw) 4939 4940 After any access by a thread T to a location L, L's constraint pair 4941 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock. 4942 4943 After a race by thread T conflicting with some previous access by 4944 some other thread U, for a location with constraint (before 4945 processing the later access) (Cr,Cw), then Cw[U] is the segment in 4946 which the previously access lies. 4947 4948 Hence in record_race_info, we pass in Cfailed and Kfailed, which 4949 are compared so as to find out which thread(s) this access 4950 conflicts with. Once that is established, we also require the 4951 pre-update Cw for the location, so we can index into it for those 4952 threads, to get the scalar clock values for the point at which the 4953 former accesses were made. (In fact we only bother to do any of 4954 this for an arbitrarily chosen one of the conflicting threads, as 4955 that's simpler, it avoids flooding the user with vast amounts of 4956 mostly useless information, and because the program is wrong if it 4957 contains any races at all -- so we don't really need to show all 4958 conflicting access pairs initially, so long as we only show none if 4959 none exist). 4960 4961 --- 4962 4963 That requires the auxiliary proof that 4964 4965 (Cr `join` Kw)[T] == Kw[T] 4966 4967 Why should that be true? Because for any thread T, Kw[T] >= the 4968 scalar clock value for T known by any other thread. In other 4969 words, because T's value for its own scalar clock is at least as up 4970 to date as the value for it known by any other thread (that is true 4971 for both the R- and W- scalar clocks). Hence no other thread will 4972 be able to feed in a value for that element (indirectly via a 4973 constraint) which will exceed Kw[T], and hence the join cannot 4974 cause that particular element to advance. 4975 */ 4976 4977 __attribute__((noinline)) 4978 static void record_race_info ( Thr* acc_thr, 4979 Addr acc_addr, SizeT szB, Bool isWrite, 4980 VtsID Cfailed, 4981 VtsID Kfailed, 4982 VtsID Cw ) 4983 { 4984 /* Call here to report a race. We just hand it onwards to 4985 HG_(record_error_Race). If that in turn discovers that the 4986 error is going to be collected, then, at history_level 2, that 4987 queries the conflicting-event map. The alternative would be to 4988 query it right here. But that causes a lot of pointless queries 4989 for errors which will shortly be discarded as duplicates, and 4990 can become a performance overhead; so we defer the query until 4991 we know the error is not a duplicate. */ 4992 4993 /* Stacks for the bounds of the (or one of the) conflicting 4994 segment(s). These are only set at history_level 1. */ 4995 ExeContext* hist1_seg_start = NULL; 4996 ExeContext* hist1_seg_end = NULL; 4997 Thread* hist1_conf_thr = NULL; 4998 4999 tl_assert(acc_thr); 5000 tl_assert(acc_thr->hgthread); 5001 tl_assert(acc_thr->hgthread->hbthr == acc_thr); 5002 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2); 5003 5004 if (HG_(clo_history_level) == 1) { 5005 Bool found; 5006 Word firstIx, lastIx; 5007 ULong_n_EC key; 5008 5009 /* At history_level 1, we must round up the relevant stack-pair 5010 for the conflicting segment right now. This is because 5011 deferring it is complex; we can't (easily) put Kfailed and 5012 Cfailed into the XError and wait for later without 5013 getting tied up in difficulties with VtsID reference 5014 counting. So just do it now. */ 5015 Thr* confThr; 5016 ULong confTym = 0; 5017 /* Which thread are we in conflict with? There may be more than 5018 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily 5019 (in fact it's the one with the lowest Thr* value). */ 5020 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed ); 5021 /* This must exist! since if it was NULL then there's no 5022 conflict (semantics of return value of 5023 VtsID__findFirst_notLEQ), and msmc{read,write}, which has 5024 called us, just checked exactly this -- that there was in 5025 fact a race. */ 5026 tl_assert(confThr); 5027 5028 /* Get the scalar clock value that the conflicting thread 5029 introduced into the constraint. A careful examination of the 5030 base machine rules shows that this must be the same as the 5031 conflicting thread's scalar clock when it created this 5032 constraint. Hence we know the scalar clock of the 5033 conflicting thread when the conflicting access was made. */ 5034 confTym = VtsID__indexAt( Cfailed, confThr ); 5035 5036 /* Using this scalar clock, index into the conflicting thread's 5037 collection of stack traces made each time its vector clock 5038 (hence its scalar clock) changed. This gives the stack 5039 traces at the start and end of the conflicting segment (well, 5040 as per comment just above, of one of the conflicting 5041 segments, if there are more than one). */ 5042 key.ull = confTym; 5043 key.ec = NULL; 5044 /* tl_assert(confThr); -- asserted just above */ 5045 tl_assert(confThr->local_Kws_n_stacks); 5046 firstIx = lastIx = 0; 5047 found = VG_(lookupXA_UNSAFE)( 5048 confThr->local_Kws_n_stacks, 5049 &key, &firstIx, &lastIx, 5050 (XACmpFn_t)cmp__ULong_n_EC__by_ULong 5051 ); 5052 if (0) VG_(printf)("record_race_info %u %u %u confThr %p " 5053 "confTym %llu found %d (%ld,%ld)\n", 5054 Cfailed, Kfailed, Cw, 5055 confThr, confTym, found, firstIx, lastIx); 5056 /* We can't indefinitely collect stack traces at VTS 5057 transitions, since we'd eventually run out of memory. Hence 5058 note_local_Kw_n_stack_for will eventually throw away old 5059 ones, which in turn means we might fail to find index value 5060 confTym in the array. */ 5061 if (found) { 5062 ULong_n_EC *pair_start, *pair_end; 5063 pair_start 5064 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx ); 5065 hist1_seg_start = pair_start->ec; 5066 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) { 5067 pair_end 5068 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, 5069 lastIx+1 ); 5070 /* from properties of VG_(lookupXA) and the comparison fn used: */ 5071 tl_assert(pair_start->ull < pair_end->