1 2 /*--------------------------------------------------------------------*/ 3 /*--- Management of the translation table and cache. ---*/ 4 /*--- m_transtab.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2017 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_core_basics.h" 33 #include "pub_core_debuglog.h" 34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo) 35 #include "pub_core_libcbase.h" 36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh 37 #include "pub_core_libcproc.h" // VG_(invalidate_icache) 38 #include "pub_core_libcassert.h" 39 #include "pub_core_libcprint.h" 40 #include "pub_core_options.h" 41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 42 #include "pub_core_transtab.h" 43 #include "pub_core_aspacemgr.h" 44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 45 #include "pub_core_xarray.h" 46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses 47 48 49 #define DEBUG_TRANSTAB 0 50 51 52 /*-------------------------------------------------------------*/ 53 /*--- Management of the FIFO-based translation table+cache. ---*/ 54 /*-------------------------------------------------------------*/ 55 56 /* Nr of sectors provided via command line parameter. */ 57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT; 58 /* Nr of sectors. 59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */ 60 static SECno n_sectors = 0; 61 62 /* Average size of a transtab code entry. 0 means to use the tool 63 provided default. */ 64 UInt VG_(clo_avg_transtab_entry_size) = 0; 65 66 /*------------------ CONSTANTS ------------------*/ 67 /* Number of entries in hash table of each sector. This needs to be a prime 68 number to work properly, it must be <= 65535 (so that a TTE index 69 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED) 70 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the 71 hash table. 72 It is strongly recommended not to change this. 73 65521 is the largest prime <= 65535. */ 74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521 75 76 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 77 #define HTT_DELETED EC2TTE_DELETED 78 #define HTT_EMPTY 0XFFFE 79 80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno. 81 typedef UShort HTTno; 82 83 /* Because each sector contains a hash table of TTEntries, we need to 84 specify the maximum allowable loading, after which the sector is 85 deemed full. */ 86 #define SECTOR_TT_LIMIT_PERCENT 65 87 88 /* The sector is deemed full when this many entries are in it. */ 89 #define N_TTES_PER_SECTOR \ 90 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 91 92 /* Equivalence classes for fast address range deletion. There are 1 + 93 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 94 address range which does not fall cleanly within any specific bin. 95 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. 96 ECLASS_N must fit in a EclassNo. */ 97 #define ECLASS_SHIFT 13 98 #define ECLASS_WIDTH 9 99 #define ECLASS_MISC (1 << ECLASS_WIDTH) 100 #define ECLASS_N (1 + ECLASS_MISC) 101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32); 102 103 typedef UShort EClassNo; 104 105 /*------------------ TYPES ------------------*/ 106 107 /* In edges ("to-me") in the graph created by chaining. */ 108 typedef 109 struct { 110 SECno from_sNo; /* sector number */ 111 TTEno from_tteNo; /* TTE number in given sector */ 112 UInt from_offs: (sizeof(UInt)*8)-1; /* code offset from TCEntry::tcptr 113 where the patch is */ 114 Bool to_fastEP:1; /* Is the patch to a fast or slow entry point? */ 115 } 116 InEdge; 117 118 119 /* Out edges ("from-me") in the graph created by chaining. */ 120 typedef 121 struct { 122 SECno to_sNo; /* sector number */ 123 TTEno to_tteNo; /* TTE number in given sector */ 124 UInt from_offs; /* code offset in owning translation where patch is */ 125 } 126 OutEdge; 127 128 129 #define N_FIXED_IN_EDGE_ARR 3 130 typedef 131 struct { 132 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */ 133 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */ 134 union { 135 InEdge fixed[N_FIXED_IN_EDGE_ARR]; /* if !has_var */ 136 XArray* var; /* XArray* of InEdgeArr */ /* if has_var */ 137 } edges; 138 } 139 InEdgeArr; 140 141 #define N_FIXED_OUT_EDGE_ARR 2 142 typedef 143 struct { 144 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */ 145 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */ 146 union { 147 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; /* if !has_var */ 148 XArray* var; /* XArray* of OutEdgeArr */ /* if has_var */ 149 } edges; 150 } 151 OutEdgeArr; 152 153 154 /* A translation-table entry. This indicates precisely which areas of 155 guest code are included in the translation, and contains all other 156 auxiliary info too. These are split into hold and cold parts, 157 TTEntryH and TTEntryC, so as to be more cache-friendly 158 (a.k.a. "faster") when searching for translations that intersect 159 with a certain guest code address range, for deletion. In the 160 worst case this can involve a sequential scan through all the hot 161 parts, so they are packed as compactly as possible -- 32 bytes on a 162 64-bit platform, 20 bytes on a 32-bit platform. 163 164 Each TTEntry is logically a matching cold-and-hot pair, and in fact 165 it was originally one structure. First, the cold part. 166 */ 167 typedef 168 struct { 169 union { 170 struct { 171 /* Profiling only: the count and weight (arbitrary meaning) for 172 this translation. Weight is a property of the translation 173 itself and computed once when the translation is created. 174 Count is an entry count for the translation and is 175 incremented by 1 every time the translation is used, if we 176 are profiling. */ 177 ULong count; 178 UShort weight; 179 } prof; // if status == InUse 180 TTEno next_empty_tte; // if status != InUse 181 } usage; 182 183 /* 64-bit aligned pointer to one or more 64-bit words containing 184 the corresponding host code (must be in the same sector!) 185 This is a pointer into the sector's tc (code) area. */ 186 ULong* tcptr; 187 188 /* This is the original guest address that purportedly is the 189 entry point of the translation. You might think that .entry 190 should be the same as .vge->base[0], and most of the time it 191 is. However, when doing redirections, that is not the case. 192 .vge must always correctly describe the guest code sections 193 from which this translation was made. However, .entry may or 194 may not be a lie, depending on whether or not we're doing 195 redirection. */ 196 Addr entry; 197 198 /* Address range summary info: these are pointers back to 199 eclass[] entries in the containing Sector. Those entries in 200 turn point back here -- the two structures are mutually 201 redundant but both necessary to make fast deletions work. 202 The eclass info is similar to, and derived from, this entry's 203 'vge' field, but it is not the same */ 204 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 205 EClassNo tte2ec_ec[3]; // for each, the eclass # 206 UInt tte2ec_ix[3]; // and the index within the eclass. 207 // for i in 0 .. n_tte2ec-1 208 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 209 // should be the index 210 // of this TTEntry in the containing Sector's tt array. 211 212 /* Admin information for chaining. 'in_edges' is a set of the 213 patch points which jump to this translation -- hence are 214 predecessors in the control flow graph. 'out_edges' points 215 to successors in the control flow graph -- translations to 216 which this one has a patched jump. In short these are just 217 backwards and forwards edges in the graph of patched-together 218 blocks. The 'in_edges' contain slightly more info, enough 219 that we can undo the chaining of each mentioned patch point. 220 The 'out_edges' list exists only so that we can visit the 221 'in_edges' entries of all blocks we're patched through to, in 222 order to remove ourselves from then when we're deleted. */ 223 224 /* A translation can disappear for two reasons: 225 1. erased (as part of the oldest sector cleanup) when the 226 youngest sector is full. 227 2. discarded due to calls to VG_(discard_translations). 228 VG_(discard_translations) sets the status of the 229 translation to 'Deleted'. 230 A.o., the gdbserver discards one or more translations 231 when a breakpoint is inserted or removed at an Addr, 232 or when single stepping mode is enabled/disabled 233 or when a translation is instrumented for gdbserver 234 (all the target jumps of this translation are 235 invalidated). 236 237 So, it is possible that the translation A to be patched 238 (to obtain a patched jump from A to B) is invalidated 239 after B is translated and before A is patched. 240 In case a translation is erased or discarded, the patching 241 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode 242 are checking the 'from' translation still exists before 243 doing the patching. 244 245 Is it safe to erase or discard the current translation E being 246 executed ? Amazing, but yes, it is safe. 247 Here is the explanation: 248 249 The translation E being executed can only be erased if a new 250 translation N is being done. A new translation is done only 251 if the host addr is a not yet patched jump to another 252 translation. In such a case, the guest address of N is 253 assigned to the PC in the VEX state. Control is returned 254 to the scheduler. N will be translated. This can erase the 255 translation E (in case of sector full). VG_(tt_tc_do_chaining) 256 will not do the chaining to a non found translation E. 257 The execution will continue at the current guest PC 258 (i.e. the translation N). 259 => it is safe to erase the current translation being executed. 260 261 The current translation E being executed can also be discarded 262 (e.g. by gdbserver). VG_(discard_translations) will mark 263 this translation E as Deleted, but the translation itself 264 is not erased. In particular, its host code can only 265 be overwritten or erased in case a new translation is done. 266 A new translation will only be done if a not yet translated 267 jump is to be executed. The execution of the Deleted translation 268 E will continue till a non patched jump is encountered. 269 This situation is then similar to the 'erasing' case above : 270 the current translation E can be erased or overwritten, as the 271 execution will continue at the new translation N. 272 */ 273 274 /* It is possible, although very unlikely, that a block A has 275 more than one patched jump to block B. This could happen if 276 (eg) A finishes "jcond B; jmp B". 277 278 This means in turn that B's in_edges set can list A more than 279 once (twice in this example). However, each such entry must 280 have a different from_offs, since a patched jump can only 281 jump to one place at once (it's meaningless for it to have 282 multiple destinations.) IOW, the successor and predecessor 283 edges in the graph are not uniquely determined by a 284 TTEntry --> TTEntry pair, but rather by a 285 (TTEntry,offset) --> TTEntry triple. 286 287 If A has multiple edges to B then B will mention A multiple 288 times in its in_edges. To make things simpler, we then 289 require that A mentions B exactly the same number of times in 290 its out_edges. Furthermore, a matching out-in pair must have 291 the same offset (from_offs). This facilitates sanity 292 checking, and it facilitates establishing the invariant that 293 a out_edges set may not have duplicates when using the 294 equality defined by (TTEntry,offset). Hence the out_edges 295 and in_edges sets really do have both have set semantics. 296 297 eg if A has been patched to B at offsets 42 and 87 (in A) 298 then A.out_edges = { (B,42), (B,87) } (in any order) 299 and B.in_edges = { (A,42), (A,87) } (in any order) 300 301 Hence for each node pair P->Q in the graph, there's a 1:1 302 mapping between P.out_edges and Q.in_edges. 303 */ 304 InEdgeArr in_edges; 305 OutEdgeArr out_edges; 306 } 307 TTEntryC; 308 309 /* And this is the hot part. */ 310 typedef 311 struct { 312 /* This structure describes precisely what ranges of guest code 313 the translation covers, so we can decide whether or not to 314 delete it when translations of a given address range are 315 invalidated. Originally this was a VexGuestExtents, but that 316 itself is 32 bytes on a 64-bit target, and we really want to 317 squeeze in an 8-bit |status| field into the 32 byte field, so 318 as to get 2 of them in a 64 byte LLC line. Hence the 319 VexGuestExtents fields are inlined, the _n_used field is 320 changed to a UChar (it only ever has the values 1, 2 or 3) 321 and the 8-bit status field is placed in byte 31 of the 322 structure. */ 323 /* ORIGINALLY: VexGuestExtents vge; */ 324 Addr vge_base[3]; 325 UShort vge_len[3]; 326 UChar vge_n_used; /* BEWARE: is UShort in VexGuestExtents */ 327 328 /* Status of the slot. Note, we need to be able to do lazy 329 deletion, hence the Deleted state. */ 330 enum { InUse, Deleted, Empty } status : 8; 331 } 332 TTEntryH; 333 334 335 /* Impedance matchers, that convert between a VexGuestExtents and a 336 TTEntryH, ignoring TTEntryH::status, which doesn't exist in a 337 VexGuestExtents -- it is entirely unrelated. */ 338 339 /* Takes a VexGuestExtents and pushes it into a TTEntryH. The 340 TTEntryH.status field is left unchanged. */ 341 static 342 inline void TTEntryH__from_VexGuestExtents ( /*MOD*/TTEntryH* tteH, 343 const VexGuestExtents* vge ) 344 { 345 tteH->vge_base[0] = vge->base[0]; 346 tteH->vge_base[1] = vge->base[1]; 347 tteH->vge_base[2] = vge->base[2]; 348 tteH->vge_len[0] = vge->len[0]; 349 tteH->vge_len[1] = vge->len[1]; 350 tteH->vge_len[2] = vge->len[2]; 351 tteH->vge_n_used = (UChar)vge->n_used; /* BEWARE: no range check. */ 352 } 353 354 /* Takes a TTEntryH and pushes the vge_ components into a VexGuestExtents. */ 355 static 356 inline void TTEntryH__to_VexGuestExtents ( /*MOD*/VexGuestExtents* vge, 357 const TTEntryH* tteH ) 358 { 359 vge->base[0] = tteH->vge_base[0]; 360 vge->base[1] = tteH->vge_base[1]; 361 vge->base[2] = tteH->vge_base[2]; 362 vge->len[0] = tteH->vge_len[0] ; 363 vge->len[1] = tteH->vge_len[1] ; 364 vge->len[2] = tteH->vge_len[2] ; 365 vge->n_used = (UShort)tteH->vge_n_used ; 366 } 367 368 369 /* A structure used for mapping host code addresses back to the 370 relevant TTEntry. Used when doing chaining, for finding the 371 TTEntry to which some arbitrary patch address belongs. */ 372 typedef 373 struct { 374 UChar* start; 375 UInt len; 376 TTEno tteNo; 377 } 378 HostExtent; 379 380 /* Finally, a sector itself. Each sector contains an array of 381 TCEntries, which hold code, and an array of TTEntries, containing 382 all required administrative info. Profiling is supported using the 383 TTEntry usage.prof.count and usage.prof.weight fields, if required. 384 385 If the sector is not in use, all three pointers are NULL and 386 tt_n_inuse is zero. 387 */ 388 typedef 389 struct { 390 /* The TCEntry area. Size of this depends on the average 391 translation size. We try and size it so it becomes full 392 precisely when this sector's translation table (tt) reaches 393 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 394 ULong* tc; 395 396 /* An hash table, mapping guest address to an index in the tt array. 397 htt is a fixed size, always containing 398 exactly N_HTTES_PER_SECTOR entries. */ 399 TTEno* htt; 400 401 /* The TTEntry{C,H} arrays. These are a fixed size, always 402 containing exactly N_TTES_PER_SECTOR entries. */ 403 TTEntryC* ttC; 404 TTEntryH* ttH; 405 406 /* This points to the current allocation point in tc. */ 407 ULong* tc_next; 408 409 /* The count of tt entries with state InUse. */ 410 Int tt_n_inuse; 411 412 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */ 413 TTEno empty_tt_list; 414 415 /* Expandable arrays of tt indices for each of the ECLASS_N 416 address range equivalence classes. These hold indices into 417 the containing sector's tt array, which in turn should point 418 back here. */ 419 Int ec2tte_size[ECLASS_N]; 420 Int ec2tte_used[ECLASS_N]; 421 TTEno* ec2tte[ECLASS_N]; 422 423 /* The host extents. The [start, +len) ranges are constructed 424 in strictly non-overlapping order, so we can binary search 425 them at any time. */ 426 XArray* host_extents; /* XArray* of HostExtent */ 427 } 428 Sector; 429 430 431 /*------------------ DECLS ------------------*/ 432 433 /* The root data structure is an array of sectors. The index of the 434 youngest sector is recorded, and new translations are put into that 435 sector. When it fills up, we move along to the next sector and 436 start to fill that up, wrapping around at the end of the array. 437 That way, once all N_TC_SECTORS have been bought into use for the 438 first time, and are full, we then re-use the oldest sector, 439 endlessly. 440 441 When running, youngest sector should be between >= 0 and < 442 N_TC_SECTORS. The initial value indicates the TT/TC system is 443 not yet initialised. 444 */ 445 static Sector sectors[MAX_N_SECTORS]; 446 static Int youngest_sector = INV_SNO; 447 448 /* The number of ULongs in each TCEntry area. This is computed once 449 at startup and does not change. */ 450 static Int tc_sector_szQ = 0; 451 452 453 /* A list of sector numbers, in the order which they should be 454 searched to find translations. This is an optimisation to be used 455 when searching for translations and should not affect 456 correctness. INV_SNO denotes "no entry". */ 457 static SECno sector_search_order[MAX_N_SECTORS]; 458 459 460 /* Fast helper for the TC. A direct-mapped cache which holds a set of 461 recently used (guest address, host address) pairs. This array is 462 referred to directly from m_dispatch/dispatch-<platform>.S. 463 464 Entries in tt_fast may refer to any valid TC entry, regardless of 465 which sector it's in. Consequently we must be very careful to 466 invalidate this cache when TC entries are changed or disappear. 467 468 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 469 pointed at to cause that cache entry to miss. This relies on the 470 assumption that no guest code actually has that address, hence a 471 value 0x1 seems good. m_translate gives the client a synthetic 472 segfault if it tries to execute at this address. 473 */ 474 /* 475 typedef 476 struct { 477 Addr guest; 478 Addr host; 479 } 480 FastCacheEntry; 481 */ 482 /*global*/ __attribute__((aligned(16))) 483 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 484 485 /* Make sure we're not used before initialisation. */ 486 static Bool init_done = False; 487 488 489 /*------------------ STATS DECLS ------------------*/ 490 491 /* Number of fast-cache updates and flushes done. */ 492 static ULong n_fast_flushes = 0; 493 static ULong n_fast_updates = 0; 494 495 /* Number of full lookups done. */ 496 static ULong n_full_lookups = 0; 497 static ULong n_lookup_probes = 0; 498 499 /* Number/osize/tsize of translations entered; also the number of 500 those for which self-checking was requested. */ 501 static ULong n_in_count = 0; 502 static ULong n_in_osize = 0; 503 static ULong n_in_tsize = 0; 504 static ULong n_in_sc_count = 0; 505 506 /* Number/osize of translations discarded due to lack of space. */ 507 static ULong n_dump_count = 0; 508 static ULong n_dump_osize = 0; 509 static ULong n_sectors_recycled = 0; 510 511 /* Number/osize of translations discarded due to requests to do so. */ 512 static ULong n_disc_count = 0; 513 static ULong n_disc_osize = 0; 514 515 516 /*-------------------------------------------------------------*/ 517 /*--- Misc ---*/ 518 /*-------------------------------------------------------------*/ 519 520 static void* ttaux_malloc ( const HChar* tag, SizeT n ) 521 { 522 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n); 523 } 524 525 static void ttaux_free ( void* p ) 526 { 527 VG_(arena_free)(VG_AR_TTAUX, p); 528 } 529 530 531 /*-------------------------------------------------------------*/ 532 /*--- Chaining support ---*/ 533 /*-------------------------------------------------------------*/ 534 535 static inline TTEntryC* index_tteC ( SECno sNo, TTEno tteNo ) 536 { 537 vg_assert(sNo < n_sectors); 538 vg_assert(tteNo < N_TTES_PER_SECTOR); 539 Sector* s = §ors[sNo]; 540 vg_assert(s->ttC && s->ttH); 541 TTEntryC* tteC = &s->ttC[tteNo]; 542 TTEntryH* tteH = &s->ttH[tteNo]; 543 vg_assert(tteH->status == InUse); 544 return tteC; 545 } 546 547 static inline TTEntryH* index_tteH ( SECno sNo, TTEno tteNo ) 548 { 549 vg_assert(sNo < n_sectors); 550 vg_assert(tteNo < N_TTES_PER_SECTOR); 551 Sector* s = §ors[sNo]; 552 vg_assert(s->ttH); 553 TTEntryH* tteH = &s->ttH[tteNo]; 554 vg_assert(tteH->status == InUse); 555 return tteH; 556 } 557 558 static void InEdge__init ( InEdge* ie ) 559 { 560 ie->from_sNo = INV_SNO; /* invalid */ 561 ie->from_tteNo = 0; 562 ie->from_offs = 0; 563 ie->to_fastEP = False; 564 } 565 566 static void OutEdge__init ( OutEdge* oe ) 567 { 568 oe->to_sNo = INV_SNO; /* invalid */ 569 oe->to_tteNo = 0; 570 oe->from_offs = 0; 571 } 572 573 static void TTEntryC__init ( TTEntryC* tteC ) 574 { 575 VG_(bzero_inline)(tteC, sizeof(*tteC)); 576 } 577 578 static void TTEntryH__init ( TTEntryH* tteH ) 579 { 580 VG_(bzero_inline)(tteH, sizeof(*tteH)); 581 } 582 583 static UWord InEdgeArr__size ( const InEdgeArr* iea ) 584 { 585 if (iea->has_var) { 586 vg_assert(iea->n_fixed == 0); 587 return VG_(sizeXA)(iea->edges.var); 588 } else { 589 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 590 return iea->n_fixed; 591 } 592 } 593 594 static void InEdgeArr__makeEmpty ( InEdgeArr* iea ) 595 { 596 if (iea->has_var) { 597 vg_assert(iea->n_fixed == 0); 598 VG_(deleteXA)(iea->edges.var); 599 iea->edges.var = NULL; 600 iea->has_var = False; 601 } else { 602 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 603 iea->n_fixed = 0; 604 } 605 } 606 607 static 608 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i ) 609 { 610 if (iea->has_var) { 611 vg_assert(iea->n_fixed == 0); 612 return (InEdge*)VG_(indexXA)(iea->edges.var, i); 613 } else { 614 vg_assert(i < iea->n_fixed); 615 return &iea->edges.fixed[i]; 616 } 617 } 618 619 static 620 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i ) 621 { 622 if (iea->has_var) { 623 vg_assert(iea->n_fixed == 0); 624 VG_(removeIndexXA)(iea->edges.var, i); 625 } else { 626 vg_assert(i < iea->n_fixed); 627 for (; i+1 < iea->n_fixed; i++) { 628 iea->edges.fixed[i] = iea->edges.fixed[i+1]; 629 } 630 iea->n_fixed--; 631 } 632 } 633 634 static 635 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie ) 636 { 637 if (iea->has_var) { 638 vg_assert(iea->n_fixed == 0); 639 VG_(addToXA)(iea->edges.var, ie); 640 } else { 641 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 642 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) { 643 /* The fixed array is full, so we have to initialise an 644 XArray and copy the fixed array into it. */ 645 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add", 646 ttaux_free, 647 sizeof(InEdge)); 648 VG_(hintSizeXA) (var, iea->n_fixed + 1); 649 UWord i; 650 for (i = 0; i < iea->n_fixed; i++) { 651 VG_(addToXA)(var, &iea->edges.fixed[i]); 652 } 653 VG_(addToXA)(var, ie); 654 iea->n_fixed = 0; 655 iea->has_var = True; 656 iea->edges.var = var; 657 } else { 658 /* Just add to the fixed array. */ 659 iea->edges.fixed[iea->n_fixed++] = *ie; 660 } 661 } 662 } 663 664 static UWord OutEdgeArr__size ( const OutEdgeArr* oea ) 665 { 666 if (oea->has_var) { 667 vg_assert(oea->n_fixed == 0); 668 return VG_(sizeXA)(oea->edges.var); 669 } else { 670 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 671 return oea->n_fixed; 672 } 673 } 674 675 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea ) 676 { 677 if (oea->has_var) { 678 vg_assert(oea->n_fixed == 0); 679 VG_(deleteXA)(oea->edges.var); 680 oea->edges.var = NULL; 681 oea->has_var = False; 682 } else { 683 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 684 oea->n_fixed = 0; 685 } 686 } 687 688 static 689 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i ) 690 { 691 if (oea->has_var) { 692 vg_assert(oea->n_fixed == 0); 693 return (OutEdge*)VG_(indexXA)(oea->edges.var, i); 694 } else { 695 vg_assert(i < oea->n_fixed); 696 return &oea->edges.fixed[i]; 697 } 698 } 699 700 static 701 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i ) 702 { 703 if (oea->has_var) { 704 vg_assert(oea->n_fixed == 0); 705 VG_(removeIndexXA)(oea->edges.var, i); 706 } else { 707 vg_assert(i < oea->n_fixed); 708 for (; i+1 < oea->n_fixed; i++) { 709 oea->edges.fixed[i] = oea->edges.fixed[i+1]; 710 } 711 oea->n_fixed--; 712 } 713 } 714 715 static 716 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe ) 717 { 718 if (oea->has_var) { 719 vg_assert(oea->n_fixed == 0); 720 VG_(addToXA)(oea->edges.var, oe); 721 } else { 722 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 723 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) { 724 /* The fixed array is full, so we have to initialise an 725 XArray and copy the fixed array into it. */ 726 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add", 727 ttaux_free, 728 sizeof(OutEdge)); 729 VG_(hintSizeXA) (var, oea->n_fixed+1); 730 UWord i; 731 for (i = 0; i < oea->n_fixed; i++) { 732 VG_(addToXA)(var, &oea->edges.fixed[i]); 733 } 734 VG_(addToXA)(var, oe); 735 oea->n_fixed = 0; 736 oea->has_var = True; 737 oea->edges.var = var; 738 } else { 739 /* Just add to the fixed array. */ 740 oea->edges.fixed[oea->n_fixed++] = *oe; 741 } 742 } 743 } 744 745 static 746 Int HostExtent__cmpOrd ( const void* v1, const void* v2 ) 747 { 748 const HostExtent* hx1 = v1; 749 const HostExtent* hx2 = v2; 750 if (hx1->start + hx1->len <= hx2->start) return -1; 751 if (hx2->start + hx2->len <= hx1->start) return 1; 752 return 0; /* partial overlap */ 753 } 754 755 /* True if hx is a dead host extent, i.e. corresponds to host code 756 of an entry that was invalidated. */ 757 static 758 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec) 759 { 760 const TTEno tteNo = hx->tteNo; 761 #define LDEBUG(m) if (DEBUG_TRANSTAB) \ 762 VG_(printf) (m \ 763 " start 0x%p len %u sector %d ttslot %u" \ 764 " tt.entry 0x%lu tt.tcptr 0x%p\n", \ 765 hx->start, hx->len, (int)(sec - sectors), \ 766 hx->tteNo, \ 767 sec->ttC[tteNo].entry, sec->ttC[tteNo].tcptr) 768 769 /* Entry might have been invalidated and not re-used yet.*/ 770 if (sec->ttH[tteNo].status == Deleted) { 771 LDEBUG("found deleted entry"); 772 return True; 773 } 774 /* Maybe we found this entry via a host_extents which was 775 inserted for an entry which was changed to Deleted then 776 re-used after. If this entry was re-used, then its tcptr 777 is >= to host_extents start (i.e. the previous tcptr) + len. 778 This is the case as there is no re-use of host code: a new 779 entry or re-used entry always gets "higher value" host code. */ 780 if ((UChar*) sec->ttC[tteNo].tcptr >= hx->start + hx->len) { 781 LDEBUG("found re-used entry"); 782 return True; 783 } 784 785 return False; 786 #undef LDEBUG 787 } 788 789 static __attribute__((noinline)) 790 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo, 791 /*OUT*/TTEno* from_tteNo, 792 void* hcode ) 793 { 794 SECno i; 795 796 /* Search order logic copied from VG_(search_transtab). */ 797 for (i = 0; i < n_sectors; i++) { 798 SECno sno = sector_search_order[i]; 799 if (UNLIKELY(sno == INV_SNO)) 800 return False; /* run out of sectors to search */ 801 802 const Sector* sec = §ors[sno]; 803 const XArray* /* of HostExtent */ host_extents = sec->host_extents; 804 vg_assert(host_extents); 805 806 HostExtent key; 807 VG_(memset)(&key, 0, sizeof(key)); 808 key.start = hcode; 809 key.len = 1; 810 Word firstW = -1, lastW = -1; 811 Bool found = VG_(lookupXA_UNSAFE)( 812 host_extents, &key, &firstW, &lastW, 813 HostExtent__cmpOrd ); 814 vg_assert(firstW == lastW); // always true, even if not found 815 if (found) { 816 HostExtent* hx = VG_(indexXA)(host_extents, firstW); 817 TTEno tteNo = hx->tteNo; 818 /* Do some additional sanity checks. */ 819 vg_assert(tteNo < N_TTES_PER_SECTOR); 820 821 /* if this hx entry corresponds to dead host code, we must 822 tell this code has not been found, as it cannot be patched. */ 823 if (HostExtent__is_dead (hx, sec)) 824 return False; 825 826 vg_assert(sec->ttH[tteNo].status == InUse); 827 /* Can only half check that the found TTEntry contains hcode, 828 due to not having a length value for the hcode in the 829 TTEntry. */ 830 vg_assert((UChar*)sec->ttC[tteNo].tcptr <= (UChar*)hcode); 831 /* Looks plausible */ 832 *from_sNo = sno; 833 *from_tteNo = tteNo; 834 return True; 835 } 836 } 837 return False; 838 } 839 840 841 /* Figure out whether or not hcode is jitted code present in the main 842 code cache (but not in the no-redir cache). Used for sanity 843 checking. */ 844 static Bool is_in_the_main_TC ( const void* hcode ) 845 { 846 SECno i, sno; 847 for (i = 0; i < n_sectors; i++) { 848 sno = sector_search_order[i]; 849 if (sno == INV_SNO) 850 break; /* run out of sectors to search */ 851 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc 852 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next 853 + sizeof(ULong) - 1) 854 return True; 855 } 856 return False; 857 } 858 859 860 /* Fulfill a chaining request, and record admin info so we 861 can undo it later, if required. 862 */ 863 void VG_(tt_tc_do_chaining) ( void* from__patch_addr, 864 SECno to_sNo, 865 TTEno to_tteNo, 866 Bool to_fastEP ) 867 { 868 /* Get the CPU info established at startup. */ 869 VexArch arch_host = VexArch_INVALID; 870 VexArchInfo archinfo_host; 871 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 872 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 873 VexEndness endness_host = archinfo_host.endness; 874 875 // host_code is where we're patching to. So it needs to 876 // take into account, whether we're jumping to the slow 877 // or fast entry point. By definition, the fast entry point 878 // is exactly one event check's worth of code along from 879 // the slow (tcptr) entry point. 880 TTEntryC* to_tteC = index_tteC(to_sNo, to_tteNo); 881 void* host_code = ((UChar*)to_tteC->tcptr) 882 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0); 883 884 // stay sane -- the patch point (dst) is in this sector's code cache 885 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc ); 886 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next 887 + sizeof(ULong) - 1 ); 888 889 /* Find the TTEntry for the from__ code. This isn't simple since 890 we only know the patch address, which is going to be somewhere 891 inside the from_ block. */ 892 SECno from_sNo = INV_SNO; 893 TTEno from_tteNo = INV_TTE; 894 Bool from_found 895 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo, 896 from__patch_addr ); 897 if (!from_found) { 898 // The from code might have been discarded due to sector re-use 899 // or marked Deleted due to translation invalidation. 900 // In such a case, don't do the chaining. 901 VG_(debugLog)(1,"transtab", 902 "host code %p not found (discarded? sector recycled?)" 903 " => no chaining done\n", 904 from__patch_addr); 905 return; 906 } 907 908 TTEntryC* from_tteC = index_tteC(from_sNo, from_tteNo); 909 910 /* Get VEX to do the patching itself. We have to hand it off 911 since it is host-dependent. */ 912 VexInvalRange vir 913 = LibVEX_Chain( 914 arch_host, endness_host, 915 from__patch_addr, 916 VG_(fnptr_to_fnentry)( 917 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 918 : &VG_(disp_cp_chain_me_to_slowEP)), 919 (void*)host_code 920 ); 921 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 922 923 /* Now do the tricky bit -- update the ch_succs and ch_preds info 924 for the two translations involved, so we can undo the chaining 925 later, which we will have to do if the to_ block gets removed 926 for whatever reason. */ 927 928 /* This is the new from_ -> to_ link to add. */ 929 InEdge ie; 930 InEdge__init(&ie); 931 ie.from_sNo = from_sNo; 932 ie.from_tteNo = from_tteNo; 933 ie.to_fastEP = to_fastEP; 934 HWord from_offs = (HWord)( (UChar*)from__patch_addr 935 - (UChar*)from_tteC->tcptr ); 936 vg_assert(from_offs < 100000/* let's say */); 937 ie.from_offs = (UInt)from_offs; 938 939 /* This is the new to_ -> from_ backlink to add. */ 940 OutEdge oe; 941 OutEdge__init(&oe); 942 oe.to_sNo = to_sNo; 943 oe.to_tteNo = to_tteNo; 944 oe.from_offs = (UInt)from_offs; 945 946 /* Add .. */ 947 InEdgeArr__add(&to_tteC->in_edges, &ie); 948 OutEdgeArr__add(&from_tteC->out_edges, &oe); 949 } 950 951 952 /* Unchain one patch, as described by the specified InEdge. For 953 sanity check purposes only (to check that the patched location is 954 as expected) it also requires the fast and slow entry point 955 addresses of the destination block (that is, the block that owns 956 this InEdge). */ 957 __attribute__((noinline)) 958 static void unchain_one ( VexArch arch_host, VexEndness endness_host, 959 InEdge* ie, 960 void* to_fastEPaddr, void* to_slowEPaddr ) 961 { 962 vg_assert(ie); 963 TTEntryC* tteC 964 = index_tteC(ie->from_sNo, ie->from_tteNo); 965 UChar* place_to_patch 966 = ((UChar*)tteC->tcptr) + ie->from_offs; 967 UChar* disp_cp_chain_me 968 = VG_(fnptr_to_fnentry)( 969 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 970 : &VG_(disp_cp_chain_me_to_slowEP) 971 ); 972 UChar* place_to_jump_to_EXPECTED 973 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr; 974 975 // stay sane: both src and dst for this unchaining are 976 // in the main code cache 977 vg_assert( is_in_the_main_TC(place_to_patch) ); // src 978 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst 979 // dst check is ok because LibVEX_UnChain checks that 980 // place_to_jump_to_EXPECTED really is the current dst, and 981 // asserts if it isn't. 982 VexInvalRange vir 983 = LibVEX_UnChain( arch_host, endness_host, place_to_patch, 984 place_to_jump_to_EXPECTED, disp_cp_chain_me ); 985 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 986 } 987 988 989 /* The specified block is about to be deleted. Update the preds and 990 succs of its associated blocks accordingly. This includes undoing 991 any chained jumps to this block. */ 992 static 993 void unchain_in_preparation_for_deletion ( VexArch arch_host, 994 VexEndness endness_host, 995 SECno here_sNo, TTEno here_tteNo ) 996 { 997 if (DEBUG_TRANSTAB) 998 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo); 999 UWord i, j, n, m; 1000 Int evCheckSzB = LibVEX_evCheckSzB(arch_host); 1001 TTEntryC* here_tteC = index_tteC(here_sNo, here_tteNo); 1002 TTEntryH* here_tteH = index_tteH(here_sNo, here_tteNo); 1003 if (DEBUG_TRANSTAB) 1004 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n", 1005 here_tteC->entry, here_tteC->tcptr); 1006 vg_assert(here_tteH->status == InUse); 1007 1008 /* Visit all InEdges owned by here_tte. */ 1009 n = InEdgeArr__size(&here_tteC->in_edges); 1010 for (i = 0; i < n; i++) { 1011 InEdge* ie = InEdgeArr__index(&here_tteC->in_edges, i); 1012 // Undo the chaining. 1013 UChar* here_slow_EP = (UChar*)here_tteC->tcptr; 1014 UChar* here_fast_EP = here_slow_EP + evCheckSzB; 1015 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP); 1016 // Find the corresponding entry in the "from" node's out_edges, 1017 // and remove it. 1018 TTEntryC* from_tteC = index_tteC(ie->from_sNo, ie->from_tteNo); 1019 m = OutEdgeArr__size(&from_tteC->out_edges); 1020 vg_assert(m > 0); // it must have at least one entry 1021 for (j = 0; j < m; j++) { 1022 OutEdge* oe = OutEdgeArr__index(&from_tteC->out_edges, j); 1023 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo 1024 && oe->from_offs == ie->from_offs) 1025 break; 1026 } 1027 vg_assert(j < m); // "oe must be findable" 1028 OutEdgeArr__deleteIndex(&from_tteC->out_edges, j); 1029 } 1030 1031 /* Visit all OutEdges owned by here_tte. */ 1032 n = OutEdgeArr__size(&here_tteC->out_edges); 1033 for (i = 0; i < n; i++) { 1034 OutEdge* oe = OutEdgeArr__index(&here_tteC->out_edges, i); 1035 // Find the corresponding entry in the "to" node's in_edges, 1036 // and remove it. 1037 TTEntryC* to_tteC = index_tteC(oe->to_sNo, oe->to_tteNo); 1038 m = InEdgeArr__size(&to_tteC->in_edges); 1039 vg_assert(m > 0); // it must have at least one entry 1040 for (j = 0; j < m; j++) { 1041 InEdge* ie = InEdgeArr__index(&to_tteC->in_edges, j); 1042 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo 1043 && ie->from_offs == oe->from_offs) 1044 break; 1045 } 1046 vg_assert(j < m); // "ie must be findable" 1047 InEdgeArr__deleteIndex(&to_tteC->in_edges, j); 1048 } 1049 1050 InEdgeArr__makeEmpty(&here_tteC->in_edges); 1051 OutEdgeArr__makeEmpty(&here_tteC->out_edges); 1052 } 1053 1054 1055 /*-------------------------------------------------------------*/ 1056 /*--- Address-range equivalence class stuff ---*/ 1057 /*-------------------------------------------------------------*/ 1058 1059 /* Return equivalence class number for a range. */ 1060 1061 static EClassNo range_to_eclass ( Addr start, UInt len ) 1062 { 1063 UInt mask = (1 << ECLASS_WIDTH) - 1; 1064 UInt lo = (UInt)start; 1065 UInt hi = lo + len - 1; 1066 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 1067 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 1068 if (loBits == hiBits) { 1069 vg_assert(loBits < ECLASS_N-1); 1070 return loBits; 1071 } else { 1072 return ECLASS_MISC; 1073 } 1074 } 1075 1076 1077 /* Calculates the equivalence class numbers for any VexGuestExtent. 1078 These are written in *eclasses, which must be big enough to hold 3 1079 Ints. The number written, between 1 and 3, is returned. The 1080 eclasses are presented in order, and any duplicates are removed. 1081 */ 1082 1083 static 1084 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses, 1085 const TTEntryH* tteH ) 1086 { 1087 1088 # define SWAP(_lv1,_lv2) \ 1089 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 1090 1091 UInt i, j, n_ec; 1092 EClassNo r; 1093 1094 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3); 1095 1096 n_ec = 0; 1097 for (i = 0; i < tteH->vge_n_used; i++) { 1098 r = range_to_eclass( tteH->vge_base[i], tteH->vge_len[i] ); 1099 if (r == ECLASS_MISC) 1100 goto bad; 1101 /* only add if we haven't already seen it */ 1102 for (j = 0; j < n_ec; j++) 1103 if (eclasses[j] == r) 1104 break; 1105 if (j == n_ec) 1106 eclasses[n_ec++] = r; 1107 } 1108 1109 if (n_ec == 1) 1110 return 1; 1111 1112 if (n_ec == 2) { 1113 /* sort */ 1114 if (eclasses[0] > eclasses[1]) 1115 SWAP(eclasses[0], eclasses[1]); 1116 return 2; 1117 } 1118 1119 if (n_ec == 3) { 1120 /* sort */ 1121 if (eclasses[0] > eclasses[2]) 1122 SWAP(eclasses[0], eclasses[2]); 1123 if (eclasses[0] > eclasses[1]) 1124 SWAP(eclasses[0], eclasses[1]); 1125 if (eclasses[1] > eclasses[2]) 1126 SWAP(eclasses[1], eclasses[2]); 1127 return 3; 1128 } 1129 1130 /* NOTREACHED */ 1131 vg_assert(0); 1132 1133 bad: 1134 eclasses[0] = ECLASS_MISC; 1135 return 1; 1136 1137 # undef SWAP 1138 } 1139 1140 1141 /* Add tteno to the set of entries listed for equivalence class ec in 1142 this sector. Returns used location in eclass array. */ 1143 1144 static 1145 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno ) 1146 { 1147 Int old_sz, new_sz, i, r; 1148 TTEno *old_ar, *new_ar; 1149 1150 vg_assert(ec >= 0 && ec < ECLASS_N); 1151 vg_assert(tteno < N_TTES_PER_SECTOR); 1152 1153 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 1154 1155 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 1156 1157 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 1158 1159 old_sz = sec->ec2tte_size[ec]; 1160 old_ar = sec->ec2tte[ec]; 1161 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 1162 new_ar = ttaux_malloc("transtab.aECN.1", 1163 new_sz * sizeof(TTEno)); 1164 for (i = 0; i < old_sz; i++) 1165 new_ar[i] = old_ar[i]; 1166 if (old_ar) 1167 ttaux_free(old_ar); 1168 sec->ec2tte_size[ec] = new_sz; 1169 sec->ec2tte[ec] = new_ar; 1170 1171 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 1172 } 1173 1174 /* Common case */ 1175 r = sec->ec2tte_used[ec]++; 1176 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 1177 sec->ec2tte[ec][r] = tteno; 1178 return (UInt)r; 1179 } 1180 1181 1182 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 1183 eclass entries to 'sec'. */ 1184 1185 static 1186 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno ) 1187 { 1188 Int i, r; 1189 EClassNo eclasses[3]; 1190 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1191 1192 TTEntryH* tteH = &sec->ttH[tteno]; 1193 r = vexGuestExtents_to_eclasses( eclasses, tteH ); 1194 vg_assert(r >= 1 && r <= 3); 1195 1196 TTEntryC* tteC = &sec->ttC[tteno]; 1197 tteC->n_tte2ec = r; 1198 for (i = 0; i < r; i++) { 1199 tteC->tte2ec_ec[i] = eclasses[i]; 1200 tteC->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno ); 1201 } 1202 } 1203 1204 1205 /* Check the eclass info in 'sec' to ensure it is consistent. Returns 1206 True if OK, False if something's not right. Expensive. */ 1207 1208 static Bool sanity_check_eclasses_in_sector ( const Sector* sec ) 1209 { 1210 # define BAD(_str) do { whassup = (_str); goto bad; } while (0) 1211 1212 const HChar* whassup = NULL; 1213 Int j, k, n, ec_idx; 1214 EClassNo i; 1215 EClassNo ec_num; 1216 TTEno tteno; 1217 ULong* tce; 1218 1219 /* Basic checks on this sector */ 1220 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR) 1221 BAD("invalid sec->tt_n_inuse"); 1222 tce = sec->tc_next; 1223 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 1224 BAD("sec->tc_next points outside tc"); 1225 1226 /* For each eclass ... */ 1227 for (i = 0; i < ECLASS_N; i++) { 1228 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 1229 BAD("ec2tte_size/ec2tte mismatch(1)"); 1230 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 1231 BAD("ec2tte_size/ec2tte mismatch(2)"); 1232 if (sec->ec2tte_used[i] < 0 1233 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 1234 BAD("implausible ec2tte_used"); 1235 if (sec->ec2tte_used[i] == 0) 1236 continue; 1237 1238 /* For each tt reference in each eclass .. ensure the reference 1239 is to a valid tt entry, and that the entry's address ranges 1240 really include this eclass. */ 1241 1242 for (j = 0; j < sec->ec2tte_used[i]; j++) { 1243 tteno = sec->ec2tte[i][j]; 1244 if (tteno == EC2TTE_DELETED) 1245 continue; 1246 if (tteno >= N_TTES_PER_SECTOR) 1247 BAD("implausible tteno"); 1248 TTEntryC* tteC = &sec->ttC[tteno]; 1249 TTEntryH* tteH = &sec->ttH[tteno]; 1250 if (tteH->status != InUse) 1251 BAD("tteno points to non-inuse tte"); 1252 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3) 1253 BAD("tteC->n_tte2ec out of range"); 1254 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 1255 must equal i. Inspect tte's eclass info. */ 1256 n = 0; 1257 for (k = 0; k < tteC->n_tte2ec; k++) { 1258 if (k < tteC->n_tte2ec-1 1259 && tteC->tte2ec_ec[k] >= tteC->tte2ec_ec[k+1]) 1260 BAD("tteC->tte2ec_ec[..] out of order"); 1261 ec_num = tteC->tte2ec_ec[k]; 1262 if (ec_num < 0 || ec_num >= ECLASS_N) 1263 BAD("tteC->tte2ec_ec[..] out of range"); 1264 if (ec_num != i) 1265 continue; 1266 ec_idx = tteC->tte2ec_ix[k]; 1267 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 1268 BAD("tteC->tte2ec_ix[..] out of range"); 1269 if (ec_idx == j) 1270 n++; 1271 } 1272 if (n != 1) 1273 BAD("tteno does not point back at eclass"); 1274 } 1275 } 1276 1277 /* That establishes that for each forward pointer from TTEntrys 1278 there is a corresponding backward pointer from the eclass[] 1279 arrays. However, it doesn't rule out the possibility of other, 1280 bogus pointers in the eclass[] arrays. So do those similarly: 1281 scan through them and check the TTEntryies they point at point 1282 back. */ 1283 1284 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) { 1285 1286 TTEntryC* tteC = &sec->ttC[tteno]; 1287 TTEntryH* tteH = &sec->ttH[tteno]; 1288 if (tteH->status == Empty || tteH->status == Deleted) { 1289 if (tteC->n_tte2ec != 0) 1290 BAD("tteC->n_tte2ec nonzero for unused tte"); 1291 continue; 1292 } 1293 1294 vg_assert(tteH->status == InUse); 1295 1296 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3) 1297 BAD("tteC->n_tte2ec out of range(2)"); 1298 1299 for (j = 0; j < tteC->n_tte2ec; j++) { 1300 ec_num = tteC->tte2ec_ec[j]; 1301 if (ec_num < 0 || ec_num >= ECLASS_N) 1302 BAD("tteC->tte2ec_ec[..] out of range"); 1303 ec_idx = tteC->tte2ec_ix[j]; 1304 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 1305 BAD("tteC->tte2ec_ix[..] out of range(2)"); 1306 if (sec->ec2tte[ec_num][ec_idx] != tteno) 1307 BAD("ec2tte does not point back to tte"); 1308 } 1309 } 1310 1311 return True; 1312 1313 bad: 1314 if (whassup) 1315 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 1316 1317 # if 0 1318 VG_(printf)("eclass = %d\n", i); 1319 VG_(printf)("tteno = %d\n", (Int)tteno); 1320 switch (tte->status) { 1321 case InUse: VG_(printf)("InUse\n"); break; 1322 case Deleted: VG_(printf)("Deleted\n"); break; 1323 case Empty: VG_(printf)("Empty\n"); break; 1324 } 1325 if (tte->status != Empty) { 1326 for (k = 0; k < tte->vge.n_used; k++) 1327 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]); 1328 } 1329 # endif 1330 1331 return False; 1332 1333 # undef BAD 1334 } 1335 1336 1337 /* Sanity check absolutely everything. True == check passed. */ 1338 1339 /* forwards */ 1340 static Bool sanity_check_redir_tt_tc ( void ); 1341 1342 static Bool sanity_check_sector_search_order ( void ) 1343 { 1344 SECno i, j, nListed; 1345 /* assert the array is the right size */ 1346 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order) 1347 / sizeof(sector_search_order[0]))); 1348 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */ 1349 for (i = 0; i < n_sectors; i++) { 1350 if (sector_search_order[i] == INV_SNO 1351 || sector_search_order[i] >= n_sectors) 1352 break; 1353 } 1354 nListed = i; 1355 for (/* */; i < n_sectors; i++) { 1356 if (sector_search_order[i] != INV_SNO) 1357 break; 1358 } 1359 if (i != n_sectors) 1360 return False; 1361 /* Check each sector number only appears once */ 1362 for (i = 0; i < n_sectors; i++) { 1363 if (sector_search_order[i] == INV_SNO) 1364 continue; 1365 for (j = i+1; j < n_sectors; j++) { 1366 if (sector_search_order[j] == sector_search_order[i]) 1367 return False; 1368 } 1369 } 1370 /* Check that the number of listed sectors equals the number 1371 in use, by counting nListed back down. */ 1372 for (i = 0; i < n_sectors; i++) { 1373 if (sectors[i].tc != NULL) 1374 nListed--; 1375 } 1376 if (nListed != 0) 1377 return False; 1378 return True; 1379 } 1380 1381 static Bool sanity_check_all_sectors ( void ) 1382 { 1383 SECno sno; 1384 Bool sane; 1385 Sector* sec; 1386 for (sno = 0; sno < n_sectors; sno++) { 1387 Int i; 1388 Int nr_not_dead_hx = 0; 1389 Int szhxa; 1390 sec = §ors[sno]; 1391 if (sec->tc == NULL) 1392 continue; 1393 sane = sanity_check_eclasses_in_sector( sec ); 1394 if (!sane) 1395 return False; 1396 szhxa = VG_(sizeXA)(sec->host_extents); 1397 for (i = 0; i < szhxa; i++) { 1398 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i); 1399 if (!HostExtent__is_dead (hx, sec)) 1400 nr_not_dead_hx++; 1401 } 1402 if (nr_not_dead_hx != sec->tt_n_inuse) { 1403 VG_(debugLog)(0, "transtab", 1404 "nr_not_dead_hx %d sanity fail " 1405 "(expected == in use %d)\n", 1406 nr_not_dead_hx, sec->tt_n_inuse); 1407 return False; 1408 } 1409 } 1410 1411 if ( !sanity_check_redir_tt_tc() ) 1412 return False; 1413 if ( !sanity_check_sector_search_order() ) 1414 return False; 1415 return True; 1416 } 1417 1418 1419 1420 /*-------------------------------------------------------------*/ 1421 /*--- Add/find translations ---*/ 1422 /*-------------------------------------------------------------*/ 1423 1424 static UInt vge_osize ( const VexGuestExtents* vge ) 1425 { 1426 UInt i, n = 0; 1427 for (i = 0; i < vge->n_used; i++) 1428 n += (UInt)vge->len[i]; 1429 return n; 1430 } 1431 1432 static UInt TTEntryH__osize ( const TTEntryH* tteH ) 1433 { 1434 UInt i, n = 0; 1435 for (i = 0; i < tteH->vge_n_used; i++) 1436 n += (UInt)tteH->vge_len[i]; 1437 return n; 1438 } 1439 1440 static Bool isValidSector ( SECno sector ) 1441 { 1442 if (sector == INV_SNO || sector >= n_sectors) 1443 return False; 1444 return True; 1445 } 1446 1447 static inline HTTno HASH_TT ( Addr key ) 1448 { 1449 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32); 1450 UInt kLo = (UInt)key; 1451 UInt k32 = kHi ^ kLo; 1452 UInt ror = 7; 1453 if (ror > 0) 1454 k32 = (k32 >> ror) | (k32 << (32-ror)); 1455 return (HTTno)(k32 % N_HTTES_PER_SECTOR); 1456 } 1457 1458 static void setFastCacheEntry ( Addr key, ULong* tcptr ) 1459 { 1460 UInt cno = (UInt)VG_TT_FAST_HASH(key); 1461 VG_(tt_fast)[cno].guest = key; 1462 VG_(tt_fast)[cno].host = (Addr)tcptr; 1463 n_fast_updates++; 1464 /* This shouldn't fail. It should be assured by m_translate 1465 which should reject any attempt to make translation of code 1466 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 1467 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 1468 } 1469 1470 /* Invalidate the fast cache VG_(tt_fast). */ 1471 static void invalidateFastCache ( void ) 1472 { 1473 UInt j; 1474 /* This loop is popular enough to make it worth unrolling a 1475 bit, at least on ppc32. */ 1476 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 1477 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 1478 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1479 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1480 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1481 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1482 } 1483 1484 vg_assert(j == VG_TT_FAST_SIZE); 1485 n_fast_flushes++; 1486 } 1487 1488 1489 static TTEno get_empty_tt_slot(SECno sNo) 1490 { 1491 TTEno i; 1492 1493 i = sectors[sNo].empty_tt_list; 1494 sectors[sNo].empty_tt_list = sectors[sNo].ttC[i].usage.next_empty_tte; 1495 1496 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR); 1497 1498 return i; 1499 } 1500 1501 static void add_to_empty_tt_list (SECno sNo, TTEno tteno) 1502 { 1503 sectors[sNo].ttC[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list; 1504 sectors[sNo].empty_tt_list = tteno; 1505 } 1506 1507 static void initialiseSector ( SECno sno ) 1508 { 1509 UInt i; 1510 SysRes sres; 1511 Sector* sec; 1512 vg_assert(isValidSector(sno)); 1513 1514 { Bool sane = sanity_check_sector_search_order(); 1515 vg_assert(sane); 1516 } 1517 sec = §ors[sno]; 1518 1519 if (sec->tc == NULL) { 1520 1521 /* Sector has never been used before. Need to allocate tt and 1522 tc. */ 1523 vg_assert(sec->ttC == NULL); 1524 vg_assert(sec->ttH == NULL); 1525 vg_assert(sec->tc_next == NULL); 1526 vg_assert(sec->tt_n_inuse == 0); 1527 for (EClassNo e = 0; e < ECLASS_N; e++) { 1528 vg_assert(sec->ec2tte_size[e] == 0); 1529 vg_assert(sec->ec2tte_used[e] == 0); 1530 vg_assert(sec->ec2tte[e] == NULL); 1531 } 1532 vg_assert(sec->host_extents == NULL); 1533 1534 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) 1535 VG_(dmsg)("transtab: " "allocate sector %d\n", sno); 1536 1537 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 1538 if (sr_isError(sres)) { 1539 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 1540 8 * tc_sector_szQ ); 1541 /*NOTREACHED*/ 1542 } 1543 sec->tc = (ULong*)(Addr)sr_Res(sres); 1544 1545 sres = VG_(am_mmap_anon_float_valgrind) 1546 ( N_TTES_PER_SECTOR * sizeof(TTEntryC) ); 1547 if (sr_isError(sres)) { 1548 VG_(out_of_memory_NORETURN)("initialiseSector(TTC)", 1549 N_TTES_PER_SECTOR * sizeof(TTEntryC) ); 1550 /*NOTREACHED*/ 1551 } 1552 sec->ttC = (TTEntryC*)(Addr)sr_Res(sres); 1553 1554 sres = VG_(am_mmap_anon_float_valgrind) 1555 ( N_TTES_PER_SECTOR * sizeof(TTEntryH) ); 1556 if (sr_isError(sres)) { 1557 VG_(out_of_memory_NORETURN)("initialiseSector(TTH)", 1558 N_TTES_PER_SECTOR * sizeof(TTEntryH) ); 1559 /*NOTREACHED*/ 1560 } 1561 sec->ttH = (TTEntryH*)(Addr)sr_Res(sres); 1562 1563 sec->empty_tt_list = HTT_EMPTY; 1564 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) { 1565 sec->ttH[ei].status = Empty; 1566 sec->ttC[ei].n_tte2ec = 0; 1567 add_to_empty_tt_list(sno, ei); 1568 } 1569 1570 sres = VG_(am_mmap_anon_float_valgrind) 1571 ( N_HTTES_PER_SECTOR * sizeof(TTEno) ); 1572 if (sr_isError(sres)) { 1573 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)", 1574 N_HTTES_PER_SECTOR * sizeof(TTEno) ); 1575 /*NOTREACHED*/ 1576 } 1577 sec->htt = (TTEno*)(Addr)sr_Res(sres); 1578 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++) 1579 sec->htt[hi] = HTT_EMPTY; 1580 1581 /* Set up the host_extents array. */ 1582 sec->host_extents 1583 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)", 1584 ttaux_free, 1585 sizeof(HostExtent)); 1586 1587 /* Add an entry in the sector_search_order */ 1588 for (i = 0; i < n_sectors; i++) { 1589 if (sector_search_order[i] == INV_SNO) 1590 break; 1591 } 1592 vg_assert(i >= 0 && i < n_sectors); 1593 sector_search_order[i] = sno; 1594 1595 if (VG_(clo_verbosity) > 2) 1596 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 1597 1598 } else { 1599 1600 /* Sector has been used before. Dump the old contents. */ 1601 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) 1602 VG_(dmsg)("transtab: " "recycle sector %d\n", sno); 1603 n_sectors_recycled++; 1604 1605 vg_assert(sec->ttC != NULL); 1606 vg_assert(sec->ttH != NULL); 1607 vg_assert(sec->tc_next != NULL); 1608 n_dump_count += sec->tt_n_inuse; 1609 1610 VexArch arch_host = VexArch_INVALID; 1611 VexArchInfo archinfo_host; 1612 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1613 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1614 VexEndness endness_host = archinfo_host.endness; 1615 1616 /* Visit each just-about-to-be-abandoned translation. */ 1617 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n", 1618 sno); 1619 sec->empty_tt_list = HTT_EMPTY; 1620 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) { 1621 if (sec->ttH[ei].status == InUse) { 1622 vg_assert(sec->ttC[ei].n_tte2ec >= 1); 1623 vg_assert(sec->ttC[ei].n_tte2ec <= 3); 1624 n_dump_osize += TTEntryH__osize(&sec->ttH[ei]); 1625 /* Tell the tool too. */ 1626 if (VG_(needs).superblock_discards) { 1627 VexGuestExtents vge_tmp; 1628 TTEntryH__to_VexGuestExtents( &vge_tmp, &sec->ttH[ei] ); 1629 VG_TDICT_CALL( tool_discard_superblock_info, 1630 sec->ttC[ei].entry, vge_tmp ); 1631 } 1632 unchain_in_preparation_for_deletion(arch_host, 1633 endness_host, sno, ei); 1634 } else { 1635 vg_assert(sec->ttC[ei].n_tte2ec == 0); 1636 } 1637 sec->ttH[ei].status = Empty; 1638 sec->ttC[ei].n_tte2ec = 0; 1639 add_to_empty_tt_list(sno, ei); 1640 } 1641 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++) 1642 sec->htt[hi] = HTT_EMPTY; 1643 1644 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n", 1645 sno); 1646 1647 /* Free up the eclass structures. */ 1648 for (EClassNo e = 0; e < ECLASS_N; e++) { 1649 if (sec->ec2tte_size[e] == 0) { 1650 vg_assert(sec->ec2tte_used[e] == 0); 1651 vg_assert(sec->ec2tte[e] == NULL); 1652 } else { 1653 vg_assert(sec->ec2tte[e] != NULL); 1654 ttaux_free(sec->ec2tte[e]); 1655 sec->ec2tte[e] = NULL; 1656 sec->ec2tte_size[e] = 0; 1657 sec->ec2tte_used[e] = 0; 1658 } 1659 } 1660 1661 /* Empty out the host extents array. */ 1662 vg_assert(sec->host_extents != NULL); 1663 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents)); 1664 vg_assert(VG_(sizeXA)(sec->host_extents) == 0); 1665 1666 /* Sanity check: ensure it is already in 1667 sector_search_order[]. */ 1668 SECno ix; 1669 for (ix = 0; ix < n_sectors; ix++) { 1670 if (sector_search_order[ix] == sno) 1671 break; 1672 } 1673 vg_assert(ix >= 0 && ix < n_sectors); 1674 1675 if (VG_(clo_verbosity) > 2) 1676 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 1677 } 1678 1679 sec->tc_next = sec->tc; 1680 sec->tt_n_inuse = 0; 1681 1682 invalidateFastCache(); 1683 1684 { Bool sane = sanity_check_sector_search_order(); 1685 vg_assert(sane); 1686 } 1687 } 1688 1689 /* Add a translation of vge to TT/TC. The translation is temporarily 1690 in code[0 .. code_len-1]. 1691 1692 pre: youngest_sector points to a valid (although possibly full) 1693 sector. 1694 */ 1695 void VG_(add_to_transtab)( const VexGuestExtents* vge, 1696 Addr entry, 1697 Addr code, 1698 UInt code_len, 1699 Bool is_self_checking, 1700 Int offs_profInc, 1701 UInt n_guest_instrs ) 1702 { 1703 Int tcAvailQ, reqdQ, y; 1704 ULong *tcptr, *tcptr2; 1705 UChar* srcP; 1706 UChar* dstP; 1707 1708 vg_assert(init_done); 1709 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1710 1711 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 1712 vg_assert(code_len > 0 && code_len < 60000); 1713 1714 /* Generally stay sane */ 1715 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */ 1716 1717 if (DEBUG_TRANSTAB) 1718 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n", 1719 entry, code_len); 1720 1721 n_in_count++; 1722 n_in_tsize += code_len; 1723 n_in_osize += vge_osize(vge); 1724 if (is_self_checking) 1725 n_in_sc_count++; 1726 1727 y = youngest_sector; 1728 vg_assert(isValidSector(y)); 1729 1730 if (sectors[y].tc == NULL) 1731 initialiseSector(y); 1732 1733 /* Try putting the translation in this sector. */ 1734 reqdQ = (code_len + 7) >> 3; 1735 1736 /* Will it fit in tc? */ 1737 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1738 - ((ULong*)(sectors[y].tc_next)); 1739 vg_assert(tcAvailQ >= 0); 1740 vg_assert(tcAvailQ <= tc_sector_szQ); 1741 1742 if (tcAvailQ < reqdQ 1743 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) { 1744 /* No. So move on to the next sector. Either it's never been 1745 used before, in which case it will get its tt/tc allocated 1746 now, or it has been used before, in which case it is set to be 1747 empty, hence throwing out the oldest sector. */ 1748 vg_assert(tc_sector_szQ > 0); 1749 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse) 1750 / N_HTTES_PER_SECTOR; 1751 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ)) 1752 / tc_sector_szQ; 1753 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) { 1754 VG_(dmsg)("transtab: " 1755 "declare sector %d full " 1756 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n", 1757 y, tt_loading_pct, tc_loading_pct, 1758 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse); 1759 } 1760 youngest_sector++; 1761 if (youngest_sector >= n_sectors) 1762 youngest_sector = 0; 1763 y = youngest_sector; 1764 initialiseSector(y); 1765 } 1766 1767 /* Be sure ... */ 1768 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1769 - ((ULong*)(sectors[y].tc_next)); 1770 vg_assert(tcAvailQ >= 0); 1771 vg_assert(tcAvailQ <= tc_sector_szQ); 1772 vg_assert(tcAvailQ >= reqdQ); 1773 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR); 1774 vg_assert(sectors[y].tt_n_inuse >= 0); 1775 1776 /* Copy into tc. */ 1777 tcptr = sectors[y].tc_next; 1778 vg_assert(tcptr >= §ors[y].tc[0]); 1779 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 1780 1781 dstP = (UChar*)tcptr; 1782 srcP = (UChar*)code; 1783 VG_(memcpy)(dstP, srcP, code_len); 1784 sectors[y].tc_next += reqdQ; 1785 sectors[y].tt_n_inuse++; 1786 1787 /* more paranoia */ 1788 tcptr2 = sectors[y].tc_next; 1789 vg_assert(tcptr2 >= §ors[y].tc[0]); 1790 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1791 1792 /* Find an empty tt slot, and use it. There must be such a slot 1793 since tt is never allowed to get completely full. */ 1794 TTEno tteix = get_empty_tt_slot(y); 1795 TTEntryC__init(§ors[y].ttC[tteix]); 1796 TTEntryH__init(§ors[y].ttH[tteix]); 1797 sectors[y].ttC[tteix].tcptr = tcptr; 1798 sectors[y].ttC[tteix].usage.prof.count = 0; 1799 sectors[y].ttC[tteix].usage.prof.weight = 1800 n_guest_instrs == 0 ? 1 : n_guest_instrs; 1801 sectors[y].ttC[tteix].entry = entry; 1802 TTEntryH__from_VexGuestExtents( §ors[y].ttH[tteix], vge ); 1803 sectors[y].ttH[tteix].status = InUse; 1804 1805 // Point an htt entry to the tt slot 1806 HTTno htti = HASH_TT(entry); 1807 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR); 1808 while (True) { 1809 if (sectors[y].htt[htti] == HTT_EMPTY 1810 || sectors[y].htt[htti] == HTT_DELETED) 1811 break; 1812 htti++; 1813 if (htti >= N_HTTES_PER_SECTOR) 1814 htti = 0; 1815 } 1816 sectors[y].htt[htti] = tteix; 1817 1818 /* Patch in the profile counter location, if necessary. */ 1819 if (offs_profInc != -1) { 1820 vg_assert(offs_profInc >= 0 && offs_profInc < code_len); 1821 VexArch arch_host = VexArch_INVALID; 1822 VexArchInfo archinfo_host; 1823 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1824 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1825 VexEndness endness_host = archinfo_host.endness; 1826 VexInvalRange vir 1827 = LibVEX_PatchProfInc( arch_host, endness_host, 1828 dstP + offs_profInc, 1829 §ors[y].ttC[tteix].usage.prof.count ); 1830 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 1831 } 1832 1833 VG_(invalidate_icache)( dstP, code_len ); 1834 1835 /* Add this entry to the host_extents map, checking that we're 1836 adding in order. */ 1837 { HostExtent hx; 1838 hx.start = (UChar*)tcptr; 1839 hx.len = code_len; 1840 hx.tteNo = tteix; 1841 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */ 1842 XArray* hx_array = sectors[y].host_extents; 1843 vg_assert(hx_array); 1844 Word n = VG_(sizeXA)(hx_array); 1845 if (n > 0) { 1846 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1); 1847 vg_assert(hx_prev->start + hx_prev->len <= hx.start); 1848 } 1849 VG_(addToXA)(hx_array, &hx); 1850 if (DEBUG_TRANSTAB) 1851 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n", 1852 hx.start, hx.len, y, tteix); 1853 } 1854 1855 /* Update the fast-cache. */ 1856 setFastCacheEntry( entry, tcptr ); 1857 1858 /* Note the eclass numbers for this translation. */ 1859 upd_eclasses_after_add( §ors[y], tteix ); 1860 } 1861 1862 1863 /* Search for the translation of the given guest address. If 1864 requested, a successful search can also cause the fast-caches to be 1865 updated. 1866 */ 1867 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode, 1868 /*OUT*/SECno* res_sNo, 1869 /*OUT*/TTEno* res_tteNo, 1870 Addr guest_addr, 1871 Bool upd_cache ) 1872 { 1873 SECno i, sno; 1874 HTTno j, k, kstart; 1875 TTEno tti; 1876 1877 vg_assert(init_done); 1878 /* Find the initial probe point just once. It will be the same in 1879 all sectors and avoids multiple expensive % operations. */ 1880 n_full_lookups++; 1881 kstart = HASH_TT(guest_addr); 1882 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR); 1883 1884 /* Search in all the sectors,using sector_search_order[] as a 1885 heuristic guide as to what order to visit the sectors. */ 1886 for (i = 0; i < n_sectors; i++) { 1887 1888 sno = sector_search_order[i]; 1889 if (UNLIKELY(sno == INV_SNO)) 1890 return False; /* run out of sectors to search */ 1891 1892 k = kstart; 1893 for (j = 0; j < N_HTTES_PER_SECTOR; j++) { 1894 n_lookup_probes++; 1895 tti = sectors[sno].htt[k]; 1896 if (tti < N_TTES_PER_SECTOR 1897 && sectors[sno].ttC[tti].entry == guest_addr) { 1898 /* found it */ 1899 if (upd_cache) 1900 setFastCacheEntry( 1901 guest_addr, sectors[sno].ttC[tti].tcptr ); 1902 if (res_hcode) 1903 *res_hcode = (Addr)sectors[sno].ttC[tti].tcptr; 1904 if (res_sNo) 1905 *res_sNo = sno; 1906 if (res_tteNo) 1907 *res_tteNo = tti; 1908 /* pull this one one step closer to the front. For large 1909 apps this more or less halves the number of required 1910 probes. */ 1911 if (i > 0) { 1912 Int tmp = sector_search_order[i-1]; 1913 sector_search_order[i-1] = sector_search_order[i]; 1914 sector_search_order[i] = tmp; 1915 } 1916 return True; 1917 } 1918 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr 1919 if (sectors[sno].htt[k] == HTT_EMPTY) 1920 break; /* not found in this sector */ 1921 k++; 1922 if (k == N_HTTES_PER_SECTOR) 1923 k = 0; 1924 } 1925 1926 /* If we fall off the end, all entries are InUse and not 1927 matching, or Deleted. In any case we did not find it in this 1928 sector. */ 1929 } 1930 1931 /* Not found in any sector. */ 1932 return False; 1933 } 1934 1935 1936 /*-------------------------------------------------------------*/ 1937 /*--- Delete translations. ---*/ 1938 /*-------------------------------------------------------------*/ 1939 1940 /* forward */ 1941 static void unredir_discard_translations( Addr, ULong ); 1942 1943 /* Stuff for deleting translations which intersect with a given 1944 address range. Unfortunately, to make this run at a reasonable 1945 speed, it is complex. */ 1946 1947 static inline 1948 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 ) 1949 { 1950 Addr e1 = s1 + r1 - 1; 1951 Addr e2 = s2 + r2 - 1; 1952 if (e1 < s2 || e2 < s1) 1953 return False; 1954 return True; 1955 } 1956 1957 static inline 1958 Bool overlaps ( Addr start, ULong range, const TTEntryH* tteH ) 1959 { 1960 if (overlap1(start, range, tteH->vge_base[0], tteH->vge_len[0])) 1961 return True; 1962 if (tteH->vge_n_used < 2) 1963 return False; 1964 if (overlap1(start, range, tteH->vge_base[1], tteH->vge_len[1])) 1965 return True; 1966 if (tteH->vge_n_used < 3) 1967 return False; 1968 if (overlap1(start, range, tteH->vge_base[2], tteH->vge_len[2])) 1969 return True; 1970 return False; 1971 } 1972 1973 1974 /* Delete a tt entry, and update all the eclass data accordingly. */ 1975 1976 static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno, 1977 VexArch arch_host, VexEndness endness_host ) 1978 { 1979 Int i, ec_idx; 1980 EClassNo ec_num; 1981 1982 /* sec and secNo are mutually redundant; cross-check. */ 1983 vg_assert(sec == §ors[secNo]); 1984 1985 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1986 TTEntryC* tteC = &sec->ttC[tteno]; 1987 TTEntryH* tteH = &sec->ttH[tteno]; 1988 vg_assert(tteH->status == InUse); 1989 vg_assert(tteC->n_tte2ec >= 1 && tteC->n_tte2ec <= 3); 1990 1991 /* Unchain .. */ 1992 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno); 1993 1994 /* Deal with the ec-to-tte links first. */ 1995 for (i = 0; i < tteC->n_tte2ec; i++) { 1996 ec_num = tteC->tte2ec_ec[i]; 1997 ec_idx = tteC->tte2ec_ix[i]; 1998 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1999 vg_assert(ec_idx >= 0); 2000 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 2001 /* Assert that the two links point at each other. */ 2002 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno); 2003 /* "delete" the pointer back to here. */ 2004 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 2005 } 2006 2007 /* Now fix up this TTEntry. */ 2008 /* Mark the entry as deleted in htt. 2009 Note: we could avoid the below hash table search by 2010 adding a reference from tte to its hash position in tt. */ 2011 HTTno j; 2012 HTTno k = HASH_TT(tteC->entry); 2013 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR); 2014 for (j = 0; j < N_HTTES_PER_SECTOR; j++) { 2015 if (sec->htt[k] == tteno) 2016 break; 2017 k++; 2018 if (k == N_HTTES_PER_SECTOR) 2019 k = 0; 2020 } 2021 vg_assert(j < N_HTTES_PER_SECTOR); 2022 sec->htt[k] = HTT_DELETED; 2023 tteH->status = Deleted; 2024 tteC->n_tte2ec = 0; 2025 add_to_empty_tt_list(secNo, tteno); 2026 2027 /* Stats .. */ 2028 sec->tt_n_inuse--; 2029 n_disc_count++; 2030 n_disc_osize += TTEntryH__osize(tteH); 2031 2032 /* Tell the tool too. */ 2033 if (VG_(needs).superblock_discards) { 2034 VexGuestExtents vge_tmp; 2035 TTEntryH__to_VexGuestExtents( &vge_tmp, tteH ); 2036 VG_TDICT_CALL( tool_discard_superblock_info, tteC->entry, vge_tmp ); 2037 } 2038 } 2039 2040 2041 /* Delete translations from sec which intersect specified range, but 2042 only consider translations in the specified eclass. */ 2043 2044 static 2045 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo, 2046 Addr guest_start, ULong range, 2047 EClassNo ec, 2048 VexArch arch_host, 2049 VexEndness endness_host ) 2050 { 2051 Int i; 2052 TTEno tteno; 2053 Bool anyDeld = False; 2054 2055 vg_assert(ec >= 0 && ec < ECLASS_N); 2056 2057 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 2058 2059 tteno = sec->ec2tte[ec][i]; 2060 if (tteno == EC2TTE_DELETED) { 2061 /* already deleted */ 2062 continue; 2063 } 2064 2065 vg_assert(tteno < N_TTES_PER_SECTOR); 2066 2067 TTEntryH* tteH = &sec->ttH[tteno]; 2068 vg_assert(tteH->status == InUse); 2069 2070 if (overlaps( guest_start, range, tteH )) { 2071 anyDeld = True; 2072 delete_tte( sec, secNo, tteno, arch_host, endness_host ); 2073 } 2074 2075 } 2076 2077 return anyDeld; 2078 } 2079 2080 2081 /* Delete translations from sec which intersect specified range, the 2082 slow way, by inspecting all translations in sec. */ 2083 2084 static 2085 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo, 2086 Addr guest_start, ULong range, 2087 VexArch arch_host, 2088 VexEndness endness_host ) 2089 { 2090 TTEno i; 2091 Bool anyDeld = False; 2092 2093 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2094 /* The entire and only purpose of splitting TTEntry into cold 2095 and hot parts (TTEntryC and TTEntryH) is to make this search 2096 loop run as fast as possible, by avoiding having to pull all 2097 of the cold data up the memory hierarchy. */ 2098 if (UNLIKELY(sec->ttH[i].status == InUse 2099 && overlaps( guest_start, range, &sec->ttH[i] ))) { 2100 anyDeld = True; 2101 delete_tte( sec, secNo, i, arch_host, endness_host ); 2102 } 2103 } 2104 2105 return anyDeld; 2106 } 2107 2108 2109 void VG_(discard_translations) ( Addr guest_start, ULong range, 2110 const HChar* who ) 2111 { 2112 Sector* sec; 2113 SECno sno; 2114 EClassNo ec; 2115 Bool anyDeleted = False; 2116 2117 vg_assert(init_done); 2118 2119 VG_(debugLog)(2, "transtab", 2120 "discard_translations(0x%lx, %llu) req by %s\n", 2121 guest_start, range, who ); 2122 2123 /* Pre-deletion sanity check */ 2124 if (VG_(clo_sanity_level) >= 4) { 2125 Bool sane = sanity_check_all_sectors(); 2126 vg_assert(sane); 2127 } 2128 2129 if (range == 0) 2130 return; 2131 2132 VexArch arch_host = VexArch_INVALID; 2133 VexArchInfo archinfo_host; 2134 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 2135 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 2136 VexEndness endness_host = archinfo_host.endness; 2137 2138 /* There are two different ways to do this. 2139 2140 If the range fits within a single address-range equivalence 2141 class, as will be the case for a cache line sized invalidation, 2142 then we only have to inspect the set of translations listed in 2143 that equivalence class, and also in the "sin-bin" equivalence 2144 class ECLASS_MISC. 2145 2146 Otherwise, the invalidation is of a larger range and probably 2147 results from munmap. In this case it's (probably!) faster just 2148 to inspect all translations, dump those we don't want, and 2149 regenerate the equivalence class information (since modifying it 2150 in-situ is even more expensive). 2151 */ 2152 2153 /* First off, figure out if the range falls within a single class, 2154 and if so which one. */ 2155 2156 ec = ECLASS_MISC; 2157 if (range <= (1ULL << ECLASS_SHIFT)) 2158 ec = range_to_eclass( guest_start, (UInt)range ); 2159 2160 /* if ec is ECLASS_MISC then we aren't looking at just a single 2161 class, so use the slow scheme. Else use the fast scheme, 2162 examining 'ec' and ECLASS_MISC. */ 2163 2164 if (ec != ECLASS_MISC) { 2165 2166 VG_(debugLog)(2, "transtab", 2167 " FAST, ec = %d\n", ec); 2168 2169 /* Fast scheme */ 2170 vg_assert(ec >= 0 && ec < ECLASS_MISC); 2171 2172 for (sno = 0; sno < n_sectors; sno++) { 2173 sec = §ors[sno]; 2174 if (sec->tc == NULL) 2175 continue; 2176 anyDeleted |= delete_translations_in_sector_eclass( 2177 sec, sno, guest_start, range, ec, 2178 arch_host, endness_host 2179 ); 2180 anyDeleted |= delete_translations_in_sector_eclass( 2181 sec, sno, guest_start, range, ECLASS_MISC, 2182 arch_host, endness_host 2183 ); 2184 } 2185 2186 } else { 2187 2188 /* slow scheme */ 2189 2190 VG_(debugLog)(2, "transtab", 2191 " SLOW, ec = %d\n", ec); 2192 2193 for (sno = 0; sno < n_sectors; sno++) { 2194 sec = §ors[sno]; 2195 if (sec->tc == NULL) 2196 continue; 2197 anyDeleted |= delete_translations_in_sector( 2198 sec, sno, guest_start, range, 2199 arch_host, endness_host 2200 ); 2201 } 2202 2203 } 2204 2205 if (anyDeleted) 2206 invalidateFastCache(); 2207 2208 /* don't forget the no-redir cache */ 2209 unredir_discard_translations( guest_start, range ); 2210 2211 /* Post-deletion sanity check */ 2212 if (VG_(clo_sanity_level) >= 4) { 2213 TTEno i; 2214 Bool sane = sanity_check_all_sectors(); 2215 vg_assert(sane); 2216 /* But now, also check the requested address range isn't 2217 present anywhere. */ 2218 for (sno = 0; sno < n_sectors; sno++) { 2219 sec = §ors[sno]; 2220 if (sec->tc == NULL) 2221 continue; 2222 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2223 TTEntryH* tteH = &sec->ttH[i]; 2224 if (tteH->status != InUse) 2225 continue; 2226 vg_assert(!overlaps( guest_start, range, tteH )); 2227 } 2228 } 2229 } 2230 } 2231 2232 /* Whether or not tools may discard translations. */ 2233 Bool VG_(ok_to_discard_translations) = False; 2234 2235 /* This function is exported to tools which can use it to discard 2236 translations, provided it is safe to do so. */ 2237 void VG_(discard_translations_safely) ( Addr start, SizeT len, 2238 const HChar* who ) 2239 { 2240 vg_assert(VG_(ok_to_discard_translations)); 2241 VG_(discard_translations)(start, len, who); 2242 } 2243 2244 /*------------------------------------------------------------*/ 2245 /*--- AUXILIARY: the unredirected TT/TC ---*/ 2246 /*------------------------------------------------------------*/ 2247 2248 /* A very simple translation cache which holds a small number of 2249 unredirected translations. This is completely independent of the 2250 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 2251 both structures are simply dumped and we start over. 2252 2253 Since these translations are unredirected, the search key is (by 2254 definition) the first address entry in the .vge field. */ 2255 2256 /* Sized to hold 500 translations of average size 1000 bytes. */ 2257 2258 #define UNREDIR_SZB 1000 2259 2260 #define N_UNREDIR_TT 500 2261 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 2262 2263 typedef 2264 struct { 2265 VexGuestExtents vge; 2266 Addr hcode; 2267 Bool inUse; 2268 } 2269 UTCEntry; 2270 2271 /* We just allocate forwards in _tc, never deleting. */ 2272 static ULong *unredir_tc; 2273 static Int unredir_tc_used = N_UNREDIR_TCQ; 2274 2275 /* Slots in _tt can come into use and out again (.inUse). 2276 Nevertheless _tt_highwater is maintained so that invalidations 2277 don't have to scan all the slots when only a few are in use. 2278 _tt_highwater holds the index of the highest ever allocated 2279 slot. */ 2280 static UTCEntry unredir_tt[N_UNREDIR_TT]; 2281 static Int unredir_tt_highwater; 2282 2283 2284 static void init_unredir_tt_tc ( void ) 2285 { 2286 Int i; 2287 if (unredir_tc == NULL) { 2288 SysRes sres = VG_(am_mmap_anon_float_valgrind) 2289 ( N_UNREDIR_TT * UNREDIR_SZB ); 2290 if (sr_isError(sres)) { 2291 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 2292 N_UNREDIR_TT * UNREDIR_SZB); 2293 /*NOTREACHED*/ 2294 } 2295 unredir_tc = (ULong *)(Addr)sr_Res(sres); 2296 } 2297 unredir_tc_used = 0; 2298 for (i = 0; i < N_UNREDIR_TT; i++) 2299 unredir_tt[i].inUse = False; 2300 unredir_tt_highwater = -1; 2301 } 2302 2303 /* Do a sanity check; return False on failure. */ 2304 static Bool sanity_check_redir_tt_tc ( void ) 2305 { 2306 Int i; 2307 if (unredir_tt_highwater < -1) return False; 2308 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 2309 2310 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 2311 if (unredir_tt[i].inUse) 2312 return False; 2313 2314 if (unredir_tc_used < 0) return False; 2315 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 2316 2317 return True; 2318 } 2319 2320 2321 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation 2322 is temporarily in code[0 .. code_len-1]. 2323 */ 2324 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge, 2325 Addr entry, 2326 Addr code, 2327 UInt code_len ) 2328 { 2329 Int i, j, code_szQ; 2330 HChar *srcP, *dstP; 2331 2332 vg_assert(sanity_check_redir_tt_tc()); 2333 2334 /* This is the whole point: it's not redirected! */ 2335 vg_assert(entry == vge->base[0]); 2336 2337 /* How many unredir_tt slots are needed */ 2338 code_szQ = (code_len + 7) / 8; 2339 2340 /* Look for an empty unredir_tc slot */ 2341 for (i = 0; i < N_UNREDIR_TT; i++) 2342 if (!unredir_tt[i].inUse) 2343 break; 2344 2345 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 2346 /* It's full; dump everything we currently have */ 2347 init_unredir_tt_tc(); 2348 i = 0; 2349 } 2350 2351 vg_assert(unredir_tc_used >= 0); 2352 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2353 vg_assert(code_szQ > 0); 2354 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 2355 vg_assert(i >= 0 && i < N_UNREDIR_TT); 2356 vg_assert(unredir_tt[i].inUse == False); 2357 2358 if (i > unredir_tt_highwater) 2359 unredir_tt_highwater = i; 2360 2361 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 2362 srcP = (HChar*)code; 2363 for (j = 0; j < code_len; j++) 2364 dstP[j] = srcP[j]; 2365 2366 VG_(invalidate_icache)( dstP, code_len ); 2367 2368 unredir_tt[i].inUse = True; 2369 unredir_tt[i].vge = *vge; 2370 unredir_tt[i].hcode = (Addr)dstP; 2371 2372 unredir_tc_used += code_szQ; 2373 vg_assert(unredir_tc_used >= 0); 2374 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2375 2376 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 2377 } 2378 2379 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result, 2380 Addr guest_addr ) 2381 { 2382 Int i; 2383 for (i = 0; i < N_UNREDIR_TT; i++) { 2384 if (!unredir_tt[i].inUse) 2385 continue; 2386 if (unredir_tt[i].vge.base[0] == guest_addr) { 2387 *result = unredir_tt[i].hcode; 2388 return True; 2389 } 2390 } 2391 return False; 2392 } 2393 2394 static void unredir_discard_translations( Addr guest_start, ULong range ) 2395 { 2396 Int i; 2397 2398 vg_assert(sanity_check_redir_tt_tc()); 2399 2400 for (i = 0; i <= unredir_tt_highwater; i++) { 2401 if (unredir_tt[i].inUse) { 2402 /* Fake up a TTEntryH just so we can pass it to overlaps() 2403 without having to make a new version of overlaps() just for 2404 this rare case. */ 2405 TTEntryH tmp_tteH; 2406 TTEntryH__from_VexGuestExtents( &tmp_tteH, &unredir_tt[i].vge ); 2407 tmp_tteH.status = Empty; /* Completely irrelevant; pure paranoia. */ 2408 if (overlaps( guest_start, range, &tmp_tteH )) { 2409 unredir_tt[i].inUse = False; 2410 } 2411 } 2412 } 2413 } 2414 2415 2416 /*------------------------------------------------------------*/ 2417 /*--- Initialisation. ---*/ 2418 /*------------------------------------------------------------*/ 2419 2420 void VG_(init_tt_tc) ( void ) 2421 { 2422 Int i, avg_codeszQ; 2423 2424 vg_assert(!init_done); 2425 init_done = True; 2426 2427 /* Otherwise lots of things go wrong... */ 2428 vg_assert(sizeof(ULong) == 8); 2429 vg_assert(sizeof(TTEno) == sizeof(HTTno)); 2430 vg_assert(sizeof(TTEno) == 2); 2431 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR); 2432 vg_assert(N_HTTES_PER_SECTOR < INV_TTE); 2433 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED); 2434 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY); 2435 /* check fast cache entries really are 2 words long */ 2436 vg_assert(sizeof(Addr) == sizeof(void*)); 2437 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 2438 /* check fast cache entries are packed back-to-back with no spaces */ 2439 vg_assert(sizeof( VG_(tt_fast) ) 2440 == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 2441 /* check fast cache is aligned as we requested. Not fatal if it 2442 isn't, but we might as well make sure. */ 2443 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 2444 2445 /* The TTEntryH size is critical for keeping the LLC miss rate down 2446 when doing a lot of discarding. Hence check it here. We also 2447 have a lot of TTEntryCs so let's check that too. */ 2448 if (sizeof(HWord) == 8) { 2449 vg_assert(sizeof(TTEntryH) <= 32); 2450 vg_assert(sizeof(TTEntryC) <= 112); 2451 } 2452 else if (sizeof(HWord) == 4) { 2453 vg_assert(sizeof(TTEntryH) <= 20); 2454 # if defined(VGP_ppc32_linux) || defined(VGP_mips32_linux) \ 2455 || defined(VGP_arm_linux) 2456 /* On PPC32, MIPS32, ARM32 platforms, alignof(ULong) == 8, so the 2457 structure is larger than on other 32 bit targets. */ 2458 vg_assert(sizeof(TTEntryC) <= 96); 2459 # else 2460 vg_assert(sizeof(TTEntryC) <= 88); 2461 # endif 2462 } 2463 else { 2464 vg_assert(0); 2465 } 2466 2467 /* All the hassle about converting between TTEntryH and 2468 VexGuestExtents was so as to ensure the following. */ 2469 vg_assert(sizeof(TTEntryH) == sizeof(VexGuestExtents)); 2470 2471 if (VG_(clo_verbosity) > 2) 2472 VG_(message)(Vg_DebugMsg, 2473 "TT/TC: VG_(init_tt_tc) " 2474 "(startup of code management)\n"); 2475 2476 /* Figure out how big each tc area should be. */ 2477 if (VG_(clo_avg_transtab_entry_size) == 0) 2478 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 2479 else 2480 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8; 2481 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ); 2482 2483 /* Ensure the calculated value is not way crazy. */ 2484 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR); 2485 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR); 2486 2487 n_sectors = VG_(clo_num_transtab_sectors); 2488 vg_assert(n_sectors >= MIN_N_SECTORS); 2489 vg_assert(n_sectors <= MAX_N_SECTORS); 2490 2491 /* Initialise the sectors, even the ones we aren't going to use. 2492 Set all fields to zero. */ 2493 youngest_sector = 0; 2494 for (i = 0; i < MAX_N_SECTORS; i++) 2495 VG_(memset)(§ors[i], 0, sizeof(sectors[i])); 2496 2497 /* Initialise the sector_search_order hint table, including the 2498 entries we aren't going to use. */ 2499 for (i = 0; i < MAX_N_SECTORS; i++) 2500 sector_search_order[i] = INV_SNO; 2501 2502 /* Initialise the fast cache. */ 2503 invalidateFastCache(); 2504 2505 /* and the unredir tt/tc */ 2506 init_unredir_tt_tc(); 2507 2508 if (VG_(clo_verbosity) > 2 || VG_(clo_stats) 2509 || VG_(debugLog_getLevel) () >= 2) { 2510 VG_(message)(Vg_DebugMsg, 2511 "TT/TC: cache: %s--avg-transtab-entry-size=%u, " 2512 "%stool provided default %u\n", 2513 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ", 2514 VG_(clo_avg_transtab_entry_size), 2515 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ", 2516 VG_(details).avg_translation_sizeB); 2517 VG_(message)(Vg_DebugMsg, 2518 "TT/TC: cache: %d sectors of %'d bytes each = %'d total TC\n", 2519 n_sectors, 8 * tc_sector_szQ, 2520 n_sectors * 8 * tc_sector_szQ ); 2521 VG_(message)(Vg_DebugMsg, 2522 "TT/TC: table: %'d tables[%d] of C %'d + H %'d bytes each " 2523 "= %'d total TT\n", 2524 n_sectors, N_TTES_PER_SECTOR, 2525 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryC)), 2526 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryH)), 2527 (int)(n_sectors * N_TTES_PER_SECTOR 2528 * (sizeof(TTEntryC) + sizeof(TTEntryH)))); 2529 VG_(message)(Vg_DebugMsg, 2530 "TT/TC: table: %d tt entries each = %'d total tt entries\n", 2531 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR); 2532 VG_(message)(Vg_DebugMsg, 2533 "TT/TC: table: %d htt[%d] of %'d bytes each = %'d total HTT" 2534 " (htt[%d] %d%% max occup)\n", 2535 n_sectors, N_HTTES_PER_SECTOR, 2536 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)), 2537 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)), 2538 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT); 2539 } 2540 2541 if (0) { 2542 VG_(printf)("XXXX sizeof(VexGuestExtents) = %d\n", 2543 (Int)sizeof(VexGuestExtents)); 2544 VG_(printf)("XXXX sizeof(InEdge) = %d\n", (Int)sizeof(InEdge)); 2545 VG_(printf)("XXXX sizeof(OutEdge) = %d\n", (Int)sizeof(OutEdge)); 2546 VG_(printf)("XXXX sizeof(InEdgeArr) = %d\n", (Int)sizeof(InEdgeArr)); 2547 VG_(printf)("XXXX sizeof(OutEdgeArr) = %d\n", (Int)sizeof(OutEdgeArr)); 2548 VG_(printf)("XXXX sizeof(TTEntryC) = %d\n", (Int)sizeof(TTEntryC)); 2549 VG_(printf)("XXXX sizeof(TTEntryH) = %d\n", (Int)sizeof(TTEntryH)); 2550 } 2551 } 2552 2553 2554 /*------------------------------------------------------------*/ 2555 /*--- Printing out statistics. ---*/ 2556 /*------------------------------------------------------------*/ 2557 2558 static Double safe_idiv( ULong a, ULong b ) 2559 { 2560 return (b == 0 ? 0 : (Double)a / (Double)b); 2561 } 2562 2563 UInt VG_(get_bbs_translated) ( void ) 2564 { 2565 return n_in_count; 2566 } 2567 2568 void VG_(print_tt_tc_stats) ( void ) 2569 { 2570 VG_(message)(Vg_DebugMsg, 2571 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 2572 n_full_lookups, n_lookup_probes ); 2573 VG_(message)(Vg_DebugMsg, 2574 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 2575 n_fast_updates, n_fast_flushes ); 2576 2577 VG_(message)(Vg_DebugMsg, 2578 " transtab: new %'llu " 2579 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] " 2580 "avg tce size %llu\n", 2581 n_in_count, n_in_osize, n_in_tsize, 2582 safe_idiv(n_in_tsize, n_in_osize), 2583 n_in_sc_count, 2584 n_in_tsize / (n_in_count ? n_in_count : 1)); 2585 VG_(message)(Vg_DebugMsg, 2586 " transtab: dumped %'llu (%'llu -> ?" "?) " 2587 "(sectors recycled %'llu)\n", 2588 n_dump_count, n_dump_osize, n_sectors_recycled ); 2589 VG_(message)(Vg_DebugMsg, 2590 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 2591 n_disc_count, n_disc_osize ); 2592 2593 if (DEBUG_TRANSTAB) { 2594 VG_(printf)("\n"); 2595 for (EClassNo e = 0; e < ECLASS_N; e++) { 2596 VG_(printf)(" %4d", sectors[0].ec2tte_used[e]); 2597 if (e % 16 == 15) 2598 VG_(printf)("\n"); 2599 } 2600 VG_(printf)("\n\n"); 2601 } 2602 } 2603 2604 /*------------------------------------------------------------*/ 2605 /*--- Printing out of profiling results. ---*/ 2606 /*------------------------------------------------------------*/ 2607 2608 static ULong score ( const TTEntryC* tteC ) 2609 { 2610 return ((ULong)tteC->usage.prof.weight) * ((ULong)tteC->usage.prof.count); 2611 } 2612 2613 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops ) 2614 { 2615 SECno sno; 2616 Int r, s; 2617 ULong score_total; 2618 TTEno i; 2619 2620 /* First, compute the total weighted count, and find the top N 2621 ttes. tops contains pointers to the most-used n_tops blocks, in 2622 descending order (viz, tops[0] is the highest scorer). */ 2623 for (s = 0; s < n_tops; s++) { 2624 tops[s].addr = 0; 2625 tops[s].score = 0; 2626 } 2627 2628 score_total = 0; 2629 2630 for (sno = 0; sno < n_sectors; sno++) { 2631 if (sectors[sno].tc == NULL) 2632 continue; 2633 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2634 if (sectors[sno].ttH[i].status != InUse) 2635 continue; 2636 score_total += score(§ors[sno].ttC[i]); 2637 /* Find the rank for sectors[sno].tt{C,H}[i]. */ 2638 r = n_tops-1; 2639 while (True) { 2640 if (r == -1) 2641 break; 2642 if (tops[r].addr == 0) { 2643 r--; 2644 continue; 2645 } 2646 if ( score(§ors[sno].ttC[i]) > tops[r].score ) { 2647 r--; 2648 continue; 2649 } 2650 break; 2651 } 2652 r++; 2653 vg_assert(r >= 0 && r <= n_tops); 2654 /* This bb should be placed at r, and bbs above it shifted 2655 upwards one slot. */ 2656 if (r < n_tops) { 2657 for (s = n_tops-1; s > r; s--) 2658 tops[s] = tops[s-1]; 2659 tops[r].addr = sectors[sno].ttC[i].entry; 2660 tops[r].score = score( §ors[sno].ttC[i] ); 2661 } 2662 } 2663 } 2664 2665 /* Now zero out all the counter fields, so that we can make 2666 multiple calls here and just get the values since the last call, 2667 each time, rather than values accumulated for the whole run. */ 2668 for (sno = 0; sno < n_sectors; sno++) { 2669 if (sectors[sno].tc == NULL) 2670 continue; 2671 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2672 if (sectors[sno].ttH[i].status != InUse) 2673 continue; 2674 sectors[sno].ttC[i].usage.prof.count = 0; 2675 } 2676 } 2677 2678 return score_total; 2679 } 2680 2681 /*--------------------------------------------------------------------*/ 2682 /*--- end ---*/ 2683 /*--------------------------------------------------------------------*/ 2684