1 2 /*--------------------------------------------------------------------*/ 3 /*--- Management of the translation table and cache. ---*/ 4 /*--- m_transtab.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2015 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_core_basics.h" 33 #include "pub_core_debuglog.h" 34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo) 35 #include "pub_core_libcbase.h" 36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh 37 #include "pub_core_libcproc.h" // VG_(invalidate_icache) 38 #include "pub_core_libcassert.h" 39 #include "pub_core_libcprint.h" 40 #include "pub_core_options.h" 41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 42 #include "pub_core_transtab.h" 43 #include "pub_core_aspacemgr.h" 44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 45 #include "pub_core_xarray.h" 46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses 47 48 49 #define DEBUG_TRANSTAB 0 50 51 52 /*-------------------------------------------------------------*/ 53 /*--- Management of the FIFO-based translation table+cache. ---*/ 54 /*-------------------------------------------------------------*/ 55 56 /* Nr of sectors provided via command line parameter. */ 57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT; 58 /* Nr of sectors. 59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */ 60 static SECno n_sectors = 0; 61 62 /* Average size of a transtab code entry. 0 means to use the tool 63 provided default. */ 64 UInt VG_(clo_avg_transtab_entry_size) = 0; 65 66 /*------------------ CONSTANTS ------------------*/ 67 /* Number of entries in hash table of each sector. This needs to be a prime 68 number to work properly, it must be <= 65535 (so that a TTE index 69 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED) 70 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the 71 hash table. 72 It is strongly recommended not to change this. 73 65521 is the largest prime <= 65535. */ 74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521 75 76 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 77 #define HTT_DELETED EC2TTE_DELETED 78 #define HTT_EMPTY 0XFFFE 79 80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno. 81 typedef UShort HTTno; 82 83 /* Because each sector contains a hash table of TTEntries, we need to 84 specify the maximum allowable loading, after which the sector is 85 deemed full. */ 86 #define SECTOR_TT_LIMIT_PERCENT 65 87 88 /* The sector is deemed full when this many entries are in it. */ 89 #define N_TTES_PER_SECTOR \ 90 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 91 92 /* Equivalence classes for fast address range deletion. There are 1 + 93 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 94 address range which does not fall cleanly within any specific bin. 95 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. 96 ECLASS_N must fit in a EclassNo. */ 97 #define ECLASS_SHIFT 11 98 #define ECLASS_WIDTH 8 99 #define ECLASS_MISC (1 << ECLASS_WIDTH) 100 #define ECLASS_N (1 + ECLASS_MISC) 101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32); 102 103 typedef UShort EClassNo; 104 105 /*------------------ TYPES ------------------*/ 106 107 /* In edges ("to-me") in the graph created by chaining. */ 108 typedef 109 struct { 110 SECno from_sNo; /* sector number */ 111 TTEno from_tteNo; /* TTE number in given sector */ 112 UInt from_offs: (sizeof(UInt)*8)-1; /* code offset from TCEntry::tcptr 113 where the patch is */ 114 Bool to_fastEP:1; /* Is the patch to a fast or slow entry point? */ 115 } 116 InEdge; 117 118 119 /* Out edges ("from-me") in the graph created by chaining. */ 120 typedef 121 struct { 122 SECno to_sNo; /* sector number */ 123 TTEno to_tteNo; /* TTE number in given sector */ 124 UInt from_offs; /* code offset in owning translation where patch is */ 125 } 126 OutEdge; 127 128 129 #define N_FIXED_IN_EDGE_ARR 3 130 typedef 131 struct { 132 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */ 133 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */ 134 union { 135 InEdge fixed[N_FIXED_IN_EDGE_ARR]; /* if !has_var */ 136 XArray* var; /* XArray* of InEdgeArr */ /* if has_var */ 137 } edges; 138 } 139 InEdgeArr; 140 141 #define N_FIXED_OUT_EDGE_ARR 2 142 typedef 143 struct { 144 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */ 145 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */ 146 union { 147 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; /* if !has_var */ 148 XArray* var; /* XArray* of OutEdgeArr */ /* if has_var */ 149 } edges; 150 } 151 OutEdgeArr; 152 153 154 /* A translation-table entry. This indicates precisely which areas of 155 guest code are included in the translation, and contains all other 156 auxiliary info too. */ 157 typedef 158 struct { 159 union { 160 struct { 161 /* Profiling only: the count and weight (arbitrary meaning) for 162 this translation. Weight is a property of the translation 163 itself and computed once when the translation is created. 164 Count is an entry count for the translation and is 165 incremented by 1 every time the translation is used, if we 166 are profiling. */ 167 ULong count; 168 UShort weight; 169 } prof; // if status == InUse 170 TTEno next_empty_tte; // if status != InUse 171 } usage; 172 173 /* Status of the slot. Note, we need to be able to do lazy 174 deletion, hence the Deleted state. */ 175 enum { InUse, Deleted, Empty } status; 176 177 /* 64-bit aligned pointer to one or more 64-bit words containing 178 the corresponding host code (must be in the same sector!) 179 This is a pointer into the sector's tc (code) area. */ 180 ULong* tcptr; 181 182 /* This is the original guest address that purportedly is the 183 entry point of the translation. You might think that .entry 184 should be the same as .vge->base[0], and most of the time it 185 is. However, when doing redirections, that is not the case. 186 .vge must always correctly describe the guest code sections 187 from which this translation was made. However, .entry may or 188 may not be a lie, depending on whether or not we're doing 189 redirection. */ 190 Addr entry; 191 192 /* This structure describes precisely what ranges of guest code 193 the translation covers, so we can decide whether or not to 194 delete it when translations of a given address range are 195 invalidated. */ 196 VexGuestExtents vge; 197 198 /* Address range summary info: these are pointers back to 199 eclass[] entries in the containing Sector. Those entries in 200 turn point back here -- the two structures are mutually 201 redundant but both necessary to make fast deletions work. 202 The eclass info is similar to, and derived from, this entry's 203 'vge' field, but it is not the same */ 204 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 205 EClassNo tte2ec_ec[3]; // for each, the eclass # 206 UInt tte2ec_ix[3]; // and the index within the eclass. 207 // for i in 0 .. n_tte2ec-1 208 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 209 // should be the index 210 // of this TTEntry in the containing Sector's tt array. 211 212 /* Admin information for chaining. 'in_edges' is a set of the 213 patch points which jump to this translation -- hence are 214 predecessors in the control flow graph. 'out_edges' points 215 to successors in the control flow graph -- translations to 216 which this one has a patched jump. In short these are just 217 backwards and forwards edges in the graph of patched-together 218 blocks. The 'in_edges' contain slightly more info, enough 219 that we can undo the chaining of each mentioned patch point. 220 The 'out_edges' list exists only so that we can visit the 221 'in_edges' entries of all blocks we're patched through to, in 222 order to remove ourselves from then when we're deleted. */ 223 224 /* A translation can disappear for two reasons: 225 1. erased (as part of the oldest sector cleanup) when the 226 youngest sector is full. 227 2. discarded due to calls to VG_(discard_translations). 228 VG_(discard_translations) sets the status of the 229 translation to 'Deleted'. 230 A.o., the gdbserver discards one or more translations 231 when a breakpoint is inserted or removed at an Addr, 232 or when single stepping mode is enabled/disabled 233 or when a translation is instrumented for gdbserver 234 (all the target jumps of this translation are 235 invalidated). 236 237 So, it is possible that the translation A to be patched 238 (to obtain a patched jump from A to B) is invalidated 239 after B is translated and before A is patched. 240 In case a translation is erased or discarded, the patching 241 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode 242 are checking the 'from' translation still exists before 243 doing the patching. 244 245 Is it safe to erase or discard the current translation E being 246 executed ? Amazing, but yes, it is safe. 247 Here is the explanation: 248 249 The translation E being executed can only be erased if a new 250 translation N is being done. A new translation is done only 251 if the host addr is a not yet patched jump to another 252 translation. In such a case, the guest address of N is 253 assigned to the PC in the VEX state. Control is returned 254 to the scheduler. N will be translated. This can erase the 255 translation E (in case of sector full). VG_(tt_tc_do_chaining) 256 will not do the chaining to a non found translation E. 257 The execution will continue at the current guest PC 258 (i.e. the translation N). 259 => it is safe to erase the current translation being executed. 260 261 The current translation E being executed can also be discarded 262 (e.g. by gdbserver). VG_(discard_translations) will mark 263 this translation E as Deleted, but the translation itself 264 is not erased. In particular, its host code can only 265 be overwritten or erased in case a new translation is done. 266 A new translation will only be done if a not yet translated 267 jump is to be executed. The execution of the Deleted translation 268 E will continue till a non patched jump is encountered. 269 This situation is then similar to the 'erasing' case above : 270 the current translation E can be erased or overwritten, as the 271 execution will continue at the new translation N. 272 273 */ 274 275 /* It is possible, although very unlikely, that a block A has 276 more than one patched jump to block B. This could happen if 277 (eg) A finishes "jcond B; jmp B". 278 279 This means in turn that B's in_edges set can list A more than 280 once (twice in this example). However, each such entry must 281 have a different from_offs, since a patched jump can only 282 jump to one place at once (it's meaningless for it to have 283 multiple destinations.) IOW, the successor and predecessor 284 edges in the graph are not uniquely determined by a 285 TTEntry --> TTEntry pair, but rather by a 286 (TTEntry,offset) --> TTEntry triple. 287 288 If A has multiple edges to B then B will mention A multiple 289 times in its in_edges. To make things simpler, we then 290 require that A mentions B exactly the same number of times in 291 its out_edges. Furthermore, a matching out-in pair must have 292 the same offset (from_offs). This facilitates sanity 293 checking, and it facilitates establishing the invariant that 294 a out_edges set may not have duplicates when using the 295 equality defined by (TTEntry,offset). Hence the out_edges 296 and in_edges sets really do have both have set semantics. 297 298 eg if A has been patched to B at offsets 42 and 87 (in A) 299 then A.out_edges = { (B,42), (B,87) } (in any order) 300 and B.in_edges = { (A,42), (A,87) } (in any order) 301 302 Hence for each node pair P->Q in the graph, there's a 1:1 303 mapping between P.out_edges and Q.in_edges. 304 */ 305 InEdgeArr in_edges; 306 OutEdgeArr out_edges; 307 } 308 TTEntry; 309 310 311 /* A structure used for mapping host code addresses back to the 312 relevant TTEntry. Used when doing chaining, for finding the 313 TTEntry to which some arbitrary patch address belongs. */ 314 typedef 315 struct { 316 UChar* start; 317 UInt len; 318 TTEno tteNo; 319 } 320 HostExtent; 321 322 /* Finally, a sector itself. Each sector contains an array of 323 TCEntries, which hold code, and an array of TTEntries, containing 324 all required administrative info. Profiling is supported using the 325 TTEntry usage.prof.count and usage.prof.weight fields, if required. 326 327 If the sector is not in use, all three pointers are NULL and 328 tt_n_inuse is zero. 329 */ 330 typedef 331 struct { 332 /* The TCEntry area. Size of this depends on the average 333 translation size. We try and size it so it becomes full 334 precisely when this sector's translation table (tt) reaches 335 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 336 ULong* tc; 337 338 /* An hash table, mapping guest address to an index in the tt array. 339 htt is a fixed size, always containing 340 exactly N_HTTES_PER_SECTOR entries. */ 341 TTEno* htt; 342 343 /* The TTEntry array. This is a fixed size, always containing 344 exactly N_TTES_PER_SECTOR entries. */ 345 TTEntry* tt; 346 347 /* This points to the current allocation point in tc. */ 348 ULong* tc_next; 349 350 /* The count of tt entries with state InUse. */ 351 Int tt_n_inuse; 352 353 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */ 354 TTEno empty_tt_list; 355 356 /* Expandable arrays of tt indices for each of the ECLASS_N 357 address range equivalence classes. These hold indices into 358 the containing sector's tt array, which in turn should point 359 back here. */ 360 Int ec2tte_size[ECLASS_N]; 361 Int ec2tte_used[ECLASS_N]; 362 TTEno* ec2tte[ECLASS_N]; 363 364 /* The host extents. The [start, +len) ranges are constructed 365 in strictly non-overlapping order, so we can binary search 366 them at any time. */ 367 XArray* host_extents; /* XArray* of HostExtent */ 368 } 369 Sector; 370 371 372 /*------------------ DECLS ------------------*/ 373 374 /* The root data structure is an array of sectors. The index of the 375 youngest sector is recorded, and new translations are put into that 376 sector. When it fills up, we move along to the next sector and 377 start to fill that up, wrapping around at the end of the array. 378 That way, once all N_TC_SECTORS have been bought into use for the 379 first time, and are full, we then re-use the oldest sector, 380 endlessly. 381 382 When running, youngest sector should be between >= 0 and < 383 N_TC_SECTORS. The initial value indicates the TT/TC system is 384 not yet initialised. 385 */ 386 static Sector sectors[MAX_N_SECTORS]; 387 static Int youngest_sector = INV_SNO; 388 389 /* The number of ULongs in each TCEntry area. This is computed once 390 at startup and does not change. */ 391 static Int tc_sector_szQ = 0; 392 393 394 /* A list of sector numbers, in the order which they should be 395 searched to find translations. This is an optimisation to be used 396 when searching for translations and should not affect 397 correctness. INV_SNO denotes "no entry". */ 398 static SECno sector_search_order[MAX_N_SECTORS]; 399 400 401 /* Fast helper for the TC. A direct-mapped cache which holds a set of 402 recently used (guest address, host address) pairs. This array is 403 referred to directly from m_dispatch/dispatch-<platform>.S. 404 405 Entries in tt_fast may refer to any valid TC entry, regardless of 406 which sector it's in. Consequently we must be very careful to 407 invalidate this cache when TC entries are changed or disappear. 408 409 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 410 pointed at to cause that cache entry to miss. This relies on the 411 assumption that no guest code actually has that address, hence a 412 value 0x1 seems good. m_translate gives the client a synthetic 413 segfault if it tries to execute at this address. 414 */ 415 /* 416 typedef 417 struct { 418 Addr guest; 419 Addr host; 420 } 421 FastCacheEntry; 422 */ 423 /*global*/ __attribute__((aligned(16))) 424 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 425 426 /* Make sure we're not used before initialisation. */ 427 static Bool init_done = False; 428 429 430 /*------------------ STATS DECLS ------------------*/ 431 432 /* Number of fast-cache updates and flushes done. */ 433 static ULong n_fast_flushes = 0; 434 static ULong n_fast_updates = 0; 435 436 /* Number of full lookups done. */ 437 static ULong n_full_lookups = 0; 438 static ULong n_lookup_probes = 0; 439 440 /* Number/osize/tsize of translations entered; also the number of 441 those for which self-checking was requested. */ 442 static ULong n_in_count = 0; 443 static ULong n_in_osize = 0; 444 static ULong n_in_tsize = 0; 445 static ULong n_in_sc_count = 0; 446 447 /* Number/osize of translations discarded due to lack of space. */ 448 static ULong n_dump_count = 0; 449 static ULong n_dump_osize = 0; 450 static ULong n_sectors_recycled = 0; 451 452 /* Number/osize of translations discarded due to requests to do so. */ 453 static ULong n_disc_count = 0; 454 static ULong n_disc_osize = 0; 455 456 457 /*-------------------------------------------------------------*/ 458 /*--- Misc ---*/ 459 /*-------------------------------------------------------------*/ 460 461 static void* ttaux_malloc ( const HChar* tag, SizeT n ) 462 { 463 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n); 464 } 465 466 static void ttaux_free ( void* p ) 467 { 468 VG_(arena_free)(VG_AR_TTAUX, p); 469 } 470 471 472 /*-------------------------------------------------------------*/ 473 /*--- Chaining support ---*/ 474 /*-------------------------------------------------------------*/ 475 476 static inline TTEntry* index_tte ( SECno sNo, TTEno tteNo ) 477 { 478 vg_assert(sNo < n_sectors); 479 vg_assert(tteNo < N_TTES_PER_SECTOR); 480 Sector* s = §ors[sNo]; 481 vg_assert(s->tt); 482 TTEntry* tte = &s->tt[tteNo]; 483 vg_assert(tte->status == InUse); 484 return tte; 485 } 486 487 static void InEdge__init ( InEdge* ie ) 488 { 489 ie->from_sNo = INV_SNO; /* invalid */ 490 ie->from_tteNo = 0; 491 ie->from_offs = 0; 492 ie->to_fastEP = False; 493 } 494 495 static void OutEdge__init ( OutEdge* oe ) 496 { 497 oe->to_sNo = INV_SNO; /* invalid */ 498 oe->to_tteNo = 0; 499 oe->from_offs = 0; 500 } 501 502 static void TTEntry__init ( TTEntry* tte ) 503 { 504 VG_(memset)(tte, 0, sizeof(*tte)); 505 } 506 507 static UWord InEdgeArr__size ( const InEdgeArr* iea ) 508 { 509 if (iea->has_var) { 510 vg_assert(iea->n_fixed == 0); 511 return VG_(sizeXA)(iea->edges.var); 512 } else { 513 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 514 return iea->n_fixed; 515 } 516 } 517 518 static void InEdgeArr__makeEmpty ( InEdgeArr* iea ) 519 { 520 if (iea->has_var) { 521 vg_assert(iea->n_fixed == 0); 522 VG_(deleteXA)(iea->edges.var); 523 iea->edges.var = NULL; 524 iea->has_var = False; 525 } else { 526 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 527 iea->n_fixed = 0; 528 } 529 } 530 531 static 532 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i ) 533 { 534 if (iea->has_var) { 535 vg_assert(iea->n_fixed == 0); 536 return (InEdge*)VG_(indexXA)(iea->edges.var, i); 537 } else { 538 vg_assert(i < iea->n_fixed); 539 return &iea->edges.fixed[i]; 540 } 541 } 542 543 static 544 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i ) 545 { 546 if (iea->has_var) { 547 vg_assert(iea->n_fixed == 0); 548 VG_(removeIndexXA)(iea->edges.var, i); 549 } else { 550 vg_assert(i < iea->n_fixed); 551 for (; i+1 < iea->n_fixed; i++) { 552 iea->edges.fixed[i] = iea->edges.fixed[i+1]; 553 } 554 iea->n_fixed--; 555 } 556 } 557 558 static 559 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie ) 560 { 561 if (iea->has_var) { 562 vg_assert(iea->n_fixed == 0); 563 VG_(addToXA)(iea->edges.var, ie); 564 } else { 565 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 566 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) { 567 /* The fixed array is full, so we have to initialise an 568 XArray and copy the fixed array into it. */ 569 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add", 570 ttaux_free, 571 sizeof(InEdge)); 572 VG_(hintSizeXA) (var, iea->n_fixed + 1); 573 UWord i; 574 for (i = 0; i < iea->n_fixed; i++) { 575 VG_(addToXA)(var, &iea->edges.fixed[i]); 576 } 577 VG_(addToXA)(var, ie); 578 iea->n_fixed = 0; 579 iea->has_var = True; 580 iea->edges.var = var; 581 } else { 582 /* Just add to the fixed array. */ 583 iea->edges.fixed[iea->n_fixed++] = *ie; 584 } 585 } 586 } 587 588 static UWord OutEdgeArr__size ( const OutEdgeArr* oea ) 589 { 590 if (oea->has_var) { 591 vg_assert(oea->n_fixed == 0); 592 return VG_(sizeXA)(oea->edges.var); 593 } else { 594 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 595 return oea->n_fixed; 596 } 597 } 598 599 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea ) 600 { 601 if (oea->has_var) { 602 vg_assert(oea->n_fixed == 0); 603 VG_(deleteXA)(oea->edges.var); 604 oea->edges.var = NULL; 605 oea->has_var = False; 606 } else { 607 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 608 oea->n_fixed = 0; 609 } 610 } 611 612 static 613 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i ) 614 { 615 if (oea->has_var) { 616 vg_assert(oea->n_fixed == 0); 617 return (OutEdge*)VG_(indexXA)(oea->edges.var, i); 618 } else { 619 vg_assert(i < oea->n_fixed); 620 return &oea->edges.fixed[i]; 621 } 622 } 623 624 static 625 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i ) 626 { 627 if (oea->has_var) { 628 vg_assert(oea->n_fixed == 0); 629 VG_(removeIndexXA)(oea->edges.var, i); 630 } else { 631 vg_assert(i < oea->n_fixed); 632 for (; i+1 < oea->n_fixed; i++) { 633 oea->edges.fixed[i] = oea->edges.fixed[i+1]; 634 } 635 oea->n_fixed--; 636 } 637 } 638 639 static 640 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe ) 641 { 642 if (oea->has_var) { 643 vg_assert(oea->n_fixed == 0); 644 VG_(addToXA)(oea->edges.var, oe); 645 } else { 646 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 647 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) { 648 /* The fixed array is full, so we have to initialise an 649 XArray and copy the fixed array into it. */ 650 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add", 651 ttaux_free, 652 sizeof(OutEdge)); 653 VG_(hintSizeXA) (var, oea->n_fixed+1); 654 UWord i; 655 for (i = 0; i < oea->n_fixed; i++) { 656 VG_(addToXA)(var, &oea->edges.fixed[i]); 657 } 658 VG_(addToXA)(var, oe); 659 oea->n_fixed = 0; 660 oea->has_var = True; 661 oea->edges.var = var; 662 } else { 663 /* Just add to the fixed array. */ 664 oea->edges.fixed[oea->n_fixed++] = *oe; 665 } 666 } 667 } 668 669 static 670 Int HostExtent__cmpOrd ( const void* v1, const void* v2 ) 671 { 672 const HostExtent* hx1 = v1; 673 const HostExtent* hx2 = v2; 674 if (hx1->start + hx1->len <= hx2->start) return -1; 675 if (hx2->start + hx2->len <= hx1->start) return 1; 676 return 0; /* partial overlap */ 677 } 678 679 /* True if hx is a dead host extent, i.e. corresponds to host code 680 of an entry that was invalidated. */ 681 static 682 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec) 683 { 684 const TTEno tteNo = hx->tteNo; 685 #define LDEBUG(m) if (DEBUG_TRANSTAB) \ 686 VG_(printf) (m \ 687 " start 0x%p len %u sector %d ttslot %u" \ 688 " tt.entry 0x%lu tt.tcptr 0x%p\n", \ 689 hx->start, hx->len, (int)(sec - sectors), \ 690 hx->tteNo, \ 691 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr) 692 693 /* Entry might have been invalidated and not re-used yet.*/ 694 if (sec->tt[tteNo].status == Deleted) { 695 LDEBUG("found deleted entry"); 696 return True; 697 } 698 /* Maybe we found this entry via a host_extents which was 699 inserted for an entry which was changed to Deleted then 700 re-used after. If this entry was re-used, then its tcptr 701 is >= to host_extents start (i.e. the previous tcptr) + len. 702 This is the case as there is no re-use of host code: a new 703 entry or re-used entry always gets "higher value" host code. */ 704 if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) { 705 LDEBUG("found re-used entry"); 706 return True; 707 } 708 709 return False; 710 #undef LDEBUG 711 } 712 713 static __attribute__((noinline)) 714 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo, 715 /*OUT*/TTEno* from_tteNo, 716 void* hcode ) 717 { 718 SECno i; 719 720 /* Search order logic copied from VG_(search_transtab). */ 721 for (i = 0; i < n_sectors; i++) { 722 SECno sno = sector_search_order[i]; 723 if (UNLIKELY(sno == INV_SNO)) 724 return False; /* run out of sectors to search */ 725 726 const Sector* sec = §ors[sno]; 727 const XArray* /* of HostExtent */ host_extents = sec->host_extents; 728 vg_assert(host_extents); 729 730 HostExtent key; 731 VG_(memset)(&key, 0, sizeof(key)); 732 key.start = hcode; 733 key.len = 1; 734 Word firstW = -1, lastW = -1; 735 Bool found = VG_(lookupXA_UNSAFE)( 736 host_extents, &key, &firstW, &lastW, 737 HostExtent__cmpOrd ); 738 vg_assert(firstW == lastW); // always true, even if not found 739 if (found) { 740 HostExtent* hx = VG_(indexXA)(host_extents, firstW); 741 TTEno tteNo = hx->tteNo; 742 /* Do some additional sanity checks. */ 743 vg_assert(tteNo < N_TTES_PER_SECTOR); 744 745 /* if this hx entry corresponds to dead host code, we must 746 tell this code has not been found, as it cannot be patched. */ 747 if (HostExtent__is_dead (hx, sec)) 748 return False; 749 750 vg_assert(sec->tt[tteNo].status == InUse); 751 /* Can only half check that the found TTEntry contains hcode, 752 due to not having a length value for the hcode in the 753 TTEntry. */ 754 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode); 755 /* Looks plausible */ 756 *from_sNo = sno; 757 *from_tteNo = tteNo; 758 return True; 759 } 760 } 761 return False; 762 } 763 764 765 /* Figure out whether or not hcode is jitted code present in the main 766 code cache (but not in the no-redir cache). Used for sanity 767 checking. */ 768 static Bool is_in_the_main_TC ( const void* hcode ) 769 { 770 SECno i, sno; 771 for (i = 0; i < n_sectors; i++) { 772 sno = sector_search_order[i]; 773 if (sno == INV_SNO) 774 break; /* run out of sectors to search */ 775 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc 776 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next 777 + sizeof(ULong) - 1) 778 return True; 779 } 780 return False; 781 } 782 783 784 /* Fulfill a chaining request, and record admin info so we 785 can undo it later, if required. 786 */ 787 void VG_(tt_tc_do_chaining) ( void* from__patch_addr, 788 SECno to_sNo, 789 TTEno to_tteNo, 790 Bool to_fastEP ) 791 { 792 /* Get the CPU info established at startup. */ 793 VexArch arch_host = VexArch_INVALID; 794 VexArchInfo archinfo_host; 795 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 796 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 797 VexEndness endness_host = archinfo_host.endness; 798 799 // host_code is where we're patching to. So it needs to 800 // take into account, whether we're jumping to the slow 801 // or fast entry point. By definition, the fast entry point 802 // is exactly one event check's worth of code along from 803 // the slow (tcptr) entry point. 804 TTEntry* to_tte = index_tte(to_sNo, to_tteNo); 805 void* host_code = ((UChar*)to_tte->tcptr) 806 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0); 807 808 // stay sane -- the patch point (dst) is in this sector's code cache 809 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc ); 810 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next 811 + sizeof(ULong) - 1 ); 812 813 /* Find the TTEntry for the from__ code. This isn't simple since 814 we only know the patch address, which is going to be somewhere 815 inside the from_ block. */ 816 SECno from_sNo = INV_SNO; 817 TTEno from_tteNo = INV_TTE; 818 Bool from_found 819 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo, 820 from__patch_addr ); 821 if (!from_found) { 822 // The from code might have been discarded due to sector re-use 823 // or marked Deleted due to translation invalidation. 824 // In such a case, don't do the chaining. 825 VG_(debugLog)(1,"transtab", 826 "host code %p not found (discarded? sector recycled?)" 827 " => no chaining done\n", 828 from__patch_addr); 829 return; 830 } 831 832 TTEntry* from_tte = index_tte(from_sNo, from_tteNo); 833 834 /* Get VEX to do the patching itself. We have to hand it off 835 since it is host-dependent. */ 836 VexInvalRange vir 837 = LibVEX_Chain( 838 arch_host, endness_host, 839 from__patch_addr, 840 VG_(fnptr_to_fnentry)( 841 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 842 : &VG_(disp_cp_chain_me_to_slowEP)), 843 (void*)host_code 844 ); 845 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 846 847 /* Now do the tricky bit -- update the ch_succs and ch_preds info 848 for the two translations involved, so we can undo the chaining 849 later, which we will have to do if the to_ block gets removed 850 for whatever reason. */ 851 852 /* This is the new from_ -> to_ link to add. */ 853 InEdge ie; 854 InEdge__init(&ie); 855 ie.from_sNo = from_sNo; 856 ie.from_tteNo = from_tteNo; 857 ie.to_fastEP = to_fastEP; 858 HWord from_offs = (HWord)( (UChar*)from__patch_addr 859 - (UChar*)from_tte->tcptr ); 860 vg_assert(from_offs < 100000/* let's say */); 861 ie.from_offs = (UInt)from_offs; 862 863 /* This is the new to_ -> from_ backlink to add. */ 864 OutEdge oe; 865 OutEdge__init(&oe); 866 oe.to_sNo = to_sNo; 867 oe.to_tteNo = to_tteNo; 868 oe.from_offs = (UInt)from_offs; 869 870 /* Add .. */ 871 InEdgeArr__add(&to_tte->in_edges, &ie); 872 OutEdgeArr__add(&from_tte->out_edges, &oe); 873 } 874 875 876 /* Unchain one patch, as described by the specified InEdge. For 877 sanity check purposes only (to check that the patched location is 878 as expected) it also requires the fast and slow entry point 879 addresses of the destination block (that is, the block that owns 880 this InEdge). */ 881 __attribute__((noinline)) 882 static void unchain_one ( VexArch arch_host, VexEndness endness_host, 883 InEdge* ie, 884 void* to_fastEPaddr, void* to_slowEPaddr ) 885 { 886 vg_assert(ie); 887 TTEntry* tte 888 = index_tte(ie->from_sNo, ie->from_tteNo); 889 UChar* place_to_patch 890 = ((UChar*)tte->tcptr) + ie->from_offs; 891 UChar* disp_cp_chain_me 892 = VG_(fnptr_to_fnentry)( 893 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 894 : &VG_(disp_cp_chain_me_to_slowEP) 895 ); 896 UChar* place_to_jump_to_EXPECTED 897 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr; 898 899 // stay sane: both src and dst for this unchaining are 900 // in the main code cache 901 vg_assert( is_in_the_main_TC(place_to_patch) ); // src 902 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst 903 // dst check is ok because LibVEX_UnChain checks that 904 // place_to_jump_to_EXPECTED really is the current dst, and 905 // asserts if it isn't. 906 VexInvalRange vir 907 = LibVEX_UnChain( arch_host, endness_host, place_to_patch, 908 place_to_jump_to_EXPECTED, disp_cp_chain_me ); 909 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 910 } 911 912 913 /* The specified block is about to be deleted. Update the preds and 914 succs of its associated blocks accordingly. This includes undoing 915 any chained jumps to this block. */ 916 static 917 void unchain_in_preparation_for_deletion ( VexArch arch_host, 918 VexEndness endness_host, 919 SECno here_sNo, TTEno here_tteNo ) 920 { 921 if (DEBUG_TRANSTAB) 922 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo); 923 UWord i, j, n, m; 924 Int evCheckSzB = LibVEX_evCheckSzB(arch_host); 925 TTEntry* here_tte = index_tte(here_sNo, here_tteNo); 926 if (DEBUG_TRANSTAB) 927 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n", 928 here_tte->entry, here_tte->tcptr); 929 vg_assert(here_tte->status == InUse); 930 931 /* Visit all InEdges owned by here_tte. */ 932 n = InEdgeArr__size(&here_tte->in_edges); 933 for (i = 0; i < n; i++) { 934 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i); 935 // Undo the chaining. 936 UChar* here_slow_EP = (UChar*)here_tte->tcptr; 937 UChar* here_fast_EP = here_slow_EP + evCheckSzB; 938 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP); 939 // Find the corresponding entry in the "from" node's out_edges, 940 // and remove it. 941 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo); 942 m = OutEdgeArr__size(&from_tte->out_edges); 943 vg_assert(m > 0); // it must have at least one entry 944 for (j = 0; j < m; j++) { 945 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j); 946 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo 947 && oe->from_offs == ie->from_offs) 948 break; 949 } 950 vg_assert(j < m); // "oe must be findable" 951 OutEdgeArr__deleteIndex(&from_tte->out_edges, j); 952 } 953 954 /* Visit all OutEdges owned by here_tte. */ 955 n = OutEdgeArr__size(&here_tte->out_edges); 956 for (i = 0; i < n; i++) { 957 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i); 958 // Find the corresponding entry in the "to" node's in_edges, 959 // and remove it. 960 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo); 961 m = InEdgeArr__size(&to_tte->in_edges); 962 vg_assert(m > 0); // it must have at least one entry 963 for (j = 0; j < m; j++) { 964 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j); 965 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo 966 && ie->from_offs == oe->from_offs) 967 break; 968 } 969 vg_assert(j < m); // "ie must be findable" 970 InEdgeArr__deleteIndex(&to_tte->in_edges, j); 971 } 972 973 InEdgeArr__makeEmpty(&here_tte->in_edges); 974 OutEdgeArr__makeEmpty(&here_tte->out_edges); 975 } 976 977 978 /*-------------------------------------------------------------*/ 979 /*--- Address-range equivalence class stuff ---*/ 980 /*-------------------------------------------------------------*/ 981 982 /* Return equivalence class number for a range. */ 983 984 static EClassNo range_to_eclass ( Addr start, UInt len ) 985 { 986 UInt mask = (1 << ECLASS_WIDTH) - 1; 987 UInt lo = (UInt)start; 988 UInt hi = lo + len - 1; 989 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 990 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 991 if (loBits == hiBits) { 992 vg_assert(loBits < ECLASS_N-1); 993 return loBits; 994 } else { 995 return ECLASS_MISC; 996 } 997 } 998 999 1000 /* Calculates the equivalence class numbers for any VexGuestExtent. 1001 These are written in *eclasses, which must be big enough to hold 3 1002 Ints. The number written, between 1 and 3, is returned. The 1003 eclasses are presented in order, and any duplicates are removed. 1004 */ 1005 1006 static 1007 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses, 1008 const VexGuestExtents* vge ) 1009 { 1010 1011 # define SWAP(_lv1,_lv2) \ 1012 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 1013 1014 Int i, j, n_ec; 1015 EClassNo r; 1016 1017 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1018 1019 n_ec = 0; 1020 for (i = 0; i < vge->n_used; i++) { 1021 r = range_to_eclass( vge->base[i], vge->len[i] ); 1022 if (r == ECLASS_MISC) 1023 goto bad; 1024 /* only add if we haven't already seen it */ 1025 for (j = 0; j < n_ec; j++) 1026 if (eclasses[j] == r) 1027 break; 1028 if (j == n_ec) 1029 eclasses[n_ec++] = r; 1030 } 1031 1032 if (n_ec == 1) 1033 return 1; 1034 1035 if (n_ec == 2) { 1036 /* sort */ 1037 if (eclasses[0] > eclasses[1]) 1038 SWAP(eclasses[0], eclasses[1]); 1039 return 2; 1040 } 1041 1042 if (n_ec == 3) { 1043 /* sort */ 1044 if (eclasses[0] > eclasses[2]) 1045 SWAP(eclasses[0], eclasses[2]); 1046 if (eclasses[0] > eclasses[1]) 1047 SWAP(eclasses[0], eclasses[1]); 1048 if (eclasses[1] > eclasses[2]) 1049 SWAP(eclasses[1], eclasses[2]); 1050 return 3; 1051 } 1052 1053 /* NOTREACHED */ 1054 vg_assert(0); 1055 1056 bad: 1057 eclasses[0] = ECLASS_MISC; 1058 return 1; 1059 1060 # undef SWAP 1061 } 1062 1063 1064 /* Add tteno to the set of entries listed for equivalence class ec in 1065 this sector. Returns used location in eclass array. */ 1066 1067 static 1068 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno ) 1069 { 1070 Int old_sz, new_sz, i, r; 1071 TTEno *old_ar, *new_ar; 1072 1073 vg_assert(ec >= 0 && ec < ECLASS_N); 1074 vg_assert(tteno < N_TTES_PER_SECTOR); 1075 1076 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 1077 1078 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 1079 1080 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 1081 1082 old_sz = sec->ec2tte_size[ec]; 1083 old_ar = sec->ec2tte[ec]; 1084 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 1085 new_ar = ttaux_malloc("transtab.aECN.1", 1086 new_sz * sizeof(TTEno)); 1087 for (i = 0; i < old_sz; i++) 1088 new_ar[i] = old_ar[i]; 1089 if (old_ar) 1090 ttaux_free(old_ar); 1091 sec->ec2tte_size[ec] = new_sz; 1092 sec->ec2tte[ec] = new_ar; 1093 1094 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 1095 } 1096 1097 /* Common case */ 1098 r = sec->ec2tte_used[ec]++; 1099 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 1100 sec->ec2tte[ec][r] = tteno; 1101 return (UInt)r; 1102 } 1103 1104 1105 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 1106 eclass entries to 'sec'. */ 1107 1108 static 1109 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno ) 1110 { 1111 Int i, r; 1112 EClassNo eclasses[3]; 1113 TTEntry* tte; 1114 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1115 1116 tte = &sec->tt[tteno]; 1117 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 1118 1119 vg_assert(r >= 1 && r <= 3); 1120 tte->n_tte2ec = r; 1121 1122 for (i = 0; i < r; i++) { 1123 tte->tte2ec_ec[i] = eclasses[i]; 1124 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno ); 1125 } 1126 } 1127 1128 1129 /* Check the eclass info in 'sec' to ensure it is consistent. Returns 1130 True if OK, False if something's not right. Expensive. */ 1131 1132 static Bool sanity_check_eclasses_in_sector ( const Sector* sec ) 1133 { 1134 # define BAD(_str) do { whassup = (_str); goto bad; } while (0) 1135 1136 const HChar* whassup = NULL; 1137 Int j, k, n, ec_idx; 1138 EClassNo i; 1139 EClassNo ec_num; 1140 TTEntry* tte; 1141 TTEno tteno; 1142 ULong* tce; 1143 1144 /* Basic checks on this sector */ 1145 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR) 1146 BAD("invalid sec->tt_n_inuse"); 1147 tce = sec->tc_next; 1148 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 1149 BAD("sec->tc_next points outside tc"); 1150 1151 /* For each eclass ... */ 1152 for (i = 0; i < ECLASS_N; i++) { 1153 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 1154 BAD("ec2tte_size/ec2tte mismatch(1)"); 1155 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 1156 BAD("ec2tte_size/ec2tte mismatch(2)"); 1157 if (sec->ec2tte_used[i] < 0 1158 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 1159 BAD("implausible ec2tte_used"); 1160 if (sec->ec2tte_used[i] == 0) 1161 continue; 1162 1163 /* For each tt reference in each eclass .. ensure the reference 1164 is to a valid tt entry, and that the entry's address ranges 1165 really include this eclass. */ 1166 1167 for (j = 0; j < sec->ec2tte_used[i]; j++) { 1168 tteno = sec->ec2tte[i][j]; 1169 if (tteno == EC2TTE_DELETED) 1170 continue; 1171 if (tteno >= N_TTES_PER_SECTOR) 1172 BAD("implausible tteno"); 1173 tte = &sec->tt[tteno]; 1174 if (tte->status != InUse) 1175 BAD("tteno points to non-inuse tte"); 1176 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1177 BAD("tte->n_tte2ec out of range"); 1178 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 1179 must equal i. Inspect tte's eclass info. */ 1180 n = 0; 1181 for (k = 0; k < tte->n_tte2ec; k++) { 1182 if (k < tte->n_tte2ec-1 1183 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 1184 BAD("tte->tte2ec_ec[..] out of order"); 1185 ec_num = tte->tte2ec_ec[k]; 1186 if (ec_num < 0 || ec_num >= ECLASS_N) 1187 BAD("tte->tte2ec_ec[..] out of range"); 1188 if (ec_num != i) 1189 continue; 1190 ec_idx = tte->tte2ec_ix[k]; 1191 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 1192 BAD("tte->tte2ec_ix[..] out of range"); 1193 if (ec_idx == j) 1194 n++; 1195 } 1196 if (n != 1) 1197 BAD("tteno does not point back at eclass"); 1198 } 1199 } 1200 1201 /* That establishes that for each forward pointer from TTEntrys 1202 there is a corresponding backward pointer from the eclass[] 1203 arrays. However, it doesn't rule out the possibility of other, 1204 bogus pointers in the eclass[] arrays. So do those similarly: 1205 scan through them and check the TTEntryies they point at point 1206 back. */ 1207 1208 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) { 1209 1210 tte = &sec->tt[tteno]; 1211 if (tte->status == Empty || tte->status == Deleted) { 1212 if (tte->n_tte2ec != 0) 1213 BAD("tte->n_eclasses nonzero for unused tte"); 1214 continue; 1215 } 1216 1217 vg_assert(tte->status == InUse); 1218 1219 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1220 BAD("tte->n_eclasses out of range(2)"); 1221 1222 for (j = 0; j < tte->n_tte2ec; j++) { 1223 ec_num = tte->tte2ec_ec[j]; 1224 if (ec_num < 0 || ec_num >= ECLASS_N) 1225 BAD("tte->eclass[..] out of range"); 1226 ec_idx = tte->tte2ec_ix[j]; 1227 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 1228 BAD("tte->ec_idx[..] out of range(2)"); 1229 if (sec->ec2tte[ec_num][ec_idx] != tteno) 1230 BAD("ec2tte does not point back to tte"); 1231 } 1232 } 1233 1234 return True; 1235 1236 bad: 1237 if (whassup) 1238 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 1239 1240 # if 0 1241 VG_(printf)("eclass = %d\n", i); 1242 VG_(printf)("tteno = %d\n", (Int)tteno); 1243 switch (tte->status) { 1244 case InUse: VG_(printf)("InUse\n"); break; 1245 case Deleted: VG_(printf)("Deleted\n"); break; 1246 case Empty: VG_(printf)("Empty\n"); break; 1247 } 1248 if (tte->status != Empty) { 1249 for (k = 0; k < tte->vge.n_used; k++) 1250 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]); 1251 } 1252 # endif 1253 1254 return False; 1255 1256 # undef BAD 1257 } 1258 1259 1260 /* Sanity check absolutely everything. True == check passed. */ 1261 1262 /* forwards */ 1263 static Bool sanity_check_redir_tt_tc ( void ); 1264 1265 static Bool sanity_check_sector_search_order ( void ) 1266 { 1267 SECno i, j, nListed; 1268 /* assert the array is the right size */ 1269 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order) 1270 / sizeof(sector_search_order[0]))); 1271 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */ 1272 for (i = 0; i < n_sectors; i++) { 1273 if (sector_search_order[i] == INV_SNO 1274 || sector_search_order[i] >= n_sectors) 1275 break; 1276 } 1277 nListed = i; 1278 for (/* */; i < n_sectors; i++) { 1279 if (sector_search_order[i] != INV_SNO) 1280 break; 1281 } 1282 if (i != n_sectors) 1283 return False; 1284 /* Check each sector number only appears once */ 1285 for (i = 0; i < n_sectors; i++) { 1286 if (sector_search_order[i] == INV_SNO) 1287 continue; 1288 for (j = i+1; j < n_sectors; j++) { 1289 if (sector_search_order[j] == sector_search_order[i]) 1290 return False; 1291 } 1292 } 1293 /* Check that the number of listed sectors equals the number 1294 in use, by counting nListed back down. */ 1295 for (i = 0; i < n_sectors; i++) { 1296 if (sectors[i].tc != NULL) 1297 nListed--; 1298 } 1299 if (nListed != 0) 1300 return False; 1301 return True; 1302 } 1303 1304 static Bool sanity_check_all_sectors ( void ) 1305 { 1306 SECno sno; 1307 Bool sane; 1308 Sector* sec; 1309 for (sno = 0; sno < n_sectors; sno++) { 1310 Int i; 1311 Int nr_not_dead_hx = 0; 1312 Int szhxa; 1313 sec = §ors[sno]; 1314 if (sec->tc == NULL) 1315 continue; 1316 sane = sanity_check_eclasses_in_sector( sec ); 1317 if (!sane) 1318 return False; 1319 szhxa = VG_(sizeXA)(sec->host_extents); 1320 for (i = 0; i < szhxa; i++) { 1321 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i); 1322 if (!HostExtent__is_dead (hx, sec)) 1323 nr_not_dead_hx++; 1324 } 1325 if (nr_not_dead_hx != sec->tt_n_inuse) { 1326 VG_(debugLog)(0, "transtab", 1327 "nr_not_dead_hx %d sanity fail " 1328 "(expected == in use %d)\n", 1329 nr_not_dead_hx, sec->tt_n_inuse); 1330 return False; 1331 } 1332 } 1333 1334 if ( !sanity_check_redir_tt_tc() ) 1335 return False; 1336 if ( !sanity_check_sector_search_order() ) 1337 return False; 1338 return True; 1339 } 1340 1341 1342 1343 /*-------------------------------------------------------------*/ 1344 /*--- Add/find translations ---*/ 1345 /*-------------------------------------------------------------*/ 1346 1347 static UInt vge_osize ( const VexGuestExtents* vge ) 1348 { 1349 UInt i, n = 0; 1350 for (i = 0; i < vge->n_used; i++) 1351 n += (UInt)vge->len[i]; 1352 return n; 1353 } 1354 1355 static Bool isValidSector ( SECno sector ) 1356 { 1357 if (sector == INV_SNO || sector >= n_sectors) 1358 return False; 1359 return True; 1360 } 1361 1362 static inline HTTno HASH_TT ( Addr key ) 1363 { 1364 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32); 1365 UInt kLo = (UInt)key; 1366 UInt k32 = kHi ^ kLo; 1367 UInt ror = 7; 1368 if (ror > 0) 1369 k32 = (k32 >> ror) | (k32 << (32-ror)); 1370 return (HTTno)(k32 % N_HTTES_PER_SECTOR); 1371 } 1372 1373 static void setFastCacheEntry ( Addr key, ULong* tcptr ) 1374 { 1375 UInt cno = (UInt)VG_TT_FAST_HASH(key); 1376 VG_(tt_fast)[cno].guest = key; 1377 VG_(tt_fast)[cno].host = (Addr)tcptr; 1378 n_fast_updates++; 1379 /* This shouldn't fail. It should be assured by m_translate 1380 which should reject any attempt to make translation of code 1381 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 1382 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 1383 } 1384 1385 /* Invalidate the fast cache VG_(tt_fast). */ 1386 static void invalidateFastCache ( void ) 1387 { 1388 UInt j; 1389 /* This loop is popular enough to make it worth unrolling a 1390 bit, at least on ppc32. */ 1391 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 1392 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 1393 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1394 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1395 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1396 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1397 } 1398 1399 vg_assert(j == VG_TT_FAST_SIZE); 1400 n_fast_flushes++; 1401 } 1402 1403 1404 static TTEno get_empty_tt_slot(SECno sNo) 1405 { 1406 TTEno i; 1407 1408 i = sectors[sNo].empty_tt_list; 1409 sectors[sNo].empty_tt_list = sectors[sNo].tt[i].usage.next_empty_tte; 1410 1411 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR); 1412 1413 return i; 1414 } 1415 1416 static void add_in_empty_tt_list (SECno sNo, TTEno tteno) 1417 { 1418 sectors[sNo].tt[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list; 1419 sectors[sNo].empty_tt_list = tteno; 1420 } 1421 1422 static void initialiseSector ( SECno sno ) 1423 { 1424 UInt i; 1425 SysRes sres; 1426 Sector* sec; 1427 vg_assert(isValidSector(sno)); 1428 1429 { Bool sane = sanity_check_sector_search_order(); 1430 vg_assert(sane); 1431 } 1432 sec = §ors[sno]; 1433 1434 if (sec->tc == NULL) { 1435 1436 /* Sector has never been used before. Need to allocate tt and 1437 tc. */ 1438 vg_assert(sec->tt == NULL); 1439 vg_assert(sec->tc_next == NULL); 1440 vg_assert(sec->tt_n_inuse == 0); 1441 for (EClassNo e = 0; e < ECLASS_N; e++) { 1442 vg_assert(sec->ec2tte_size[e] == 0); 1443 vg_assert(sec->ec2tte_used[e] == 0); 1444 vg_assert(sec->ec2tte[e] == NULL); 1445 } 1446 vg_assert(sec->host_extents == NULL); 1447 1448 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) 1449 VG_(dmsg)("transtab: " "allocate sector %d\n", sno); 1450 1451 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 1452 if (sr_isError(sres)) { 1453 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 1454 8 * tc_sector_szQ ); 1455 /*NOTREACHED*/ 1456 } 1457 sec->tc = (ULong*)(Addr)sr_Res(sres); 1458 1459 sres = VG_(am_mmap_anon_float_valgrind) 1460 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1461 if (sr_isError(sres)) { 1462 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 1463 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1464 /*NOTREACHED*/ 1465 } 1466 sec->tt = (TTEntry*)(Addr)sr_Res(sres); 1467 sec->empty_tt_list = HTT_EMPTY; 1468 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) { 1469 sec->tt[ei].status = Empty; 1470 sec->tt[ei].n_tte2ec = 0; 1471 add_in_empty_tt_list(sno, ei); 1472 } 1473 1474 sres = VG_(am_mmap_anon_float_valgrind) 1475 ( N_HTTES_PER_SECTOR * sizeof(TTEno) ); 1476 if (sr_isError(sres)) { 1477 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)", 1478 N_HTTES_PER_SECTOR * sizeof(TTEno) ); 1479 /*NOTREACHED*/ 1480 } 1481 sec->htt = (TTEno*)(Addr)sr_Res(sres); 1482 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++) 1483 sec->htt[hi] = HTT_EMPTY; 1484 1485 /* Set up the host_extents array. */ 1486 sec->host_extents 1487 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)", 1488 ttaux_free, 1489 sizeof(HostExtent)); 1490 1491 /* Add an entry in the sector_search_order */ 1492 for (i = 0; i < n_sectors; i++) { 1493 if (sector_search_order[i] == INV_SNO) 1494 break; 1495 } 1496 vg_assert(i >= 0 && i < n_sectors); 1497 sector_search_order[i] = sno; 1498 1499 if (VG_(clo_verbosity) > 2) 1500 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 1501 1502 } else { 1503 1504 /* Sector has been used before. Dump the old contents. */ 1505 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) 1506 VG_(dmsg)("transtab: " "recycle sector %d\n", sno); 1507 n_sectors_recycled++; 1508 1509 vg_assert(sec->tt != NULL); 1510 vg_assert(sec->tc_next != NULL); 1511 n_dump_count += sec->tt_n_inuse; 1512 1513 VexArch arch_host = VexArch_INVALID; 1514 VexArchInfo archinfo_host; 1515 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1516 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1517 VexEndness endness_host = archinfo_host.endness; 1518 1519 /* Visit each just-about-to-be-abandoned translation. */ 1520 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n", 1521 sno); 1522 sec->empty_tt_list = HTT_EMPTY; 1523 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) { 1524 if (sec->tt[ei].status == InUse) { 1525 vg_assert(sec->tt[ei].n_tte2ec >= 1); 1526 vg_assert(sec->tt[ei].n_tte2ec <= 3); 1527 n_dump_osize += vge_osize(&sec->tt[ei].vge); 1528 /* Tell the tool too. */ 1529 if (VG_(needs).superblock_discards) { 1530 VG_TDICT_CALL( tool_discard_superblock_info, 1531 sec->tt[ei].entry, 1532 sec->tt[ei].vge ); 1533 } 1534 unchain_in_preparation_for_deletion(arch_host, 1535 endness_host, sno, ei); 1536 } else { 1537 vg_assert(sec->tt[ei].n_tte2ec == 0); 1538 } 1539 sec->tt[ei].status = Empty; 1540 sec->tt[ei].n_tte2ec = 0; 1541 add_in_empty_tt_list(sno, ei); 1542 } 1543 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++) 1544 sec->htt[hi] = HTT_EMPTY; 1545 1546 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n", 1547 sno); 1548 1549 /* Free up the eclass structures. */ 1550 for (EClassNo e = 0; e < ECLASS_N; e++) { 1551 if (sec->ec2tte_size[e] == 0) { 1552 vg_assert(sec->ec2tte_used[e] == 0); 1553 vg_assert(sec->ec2tte[e] == NULL); 1554 } else { 1555 vg_assert(sec->ec2tte[e] != NULL); 1556 ttaux_free(sec->ec2tte[e]); 1557 sec->ec2tte[e] = NULL; 1558 sec->ec2tte_size[e] = 0; 1559 sec->ec2tte_used[e] = 0; 1560 } 1561 } 1562 1563 /* Empty out the host extents array. */ 1564 vg_assert(sec->host_extents != NULL); 1565 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents)); 1566 vg_assert(VG_(sizeXA)(sec->host_extents) == 0); 1567 1568 /* Sanity check: ensure it is already in 1569 sector_search_order[]. */ 1570 SECno ix; 1571 for (ix = 0; ix < n_sectors; ix++) { 1572 if (sector_search_order[ix] == sno) 1573 break; 1574 } 1575 vg_assert(ix >= 0 && ix < n_sectors); 1576 1577 if (VG_(clo_verbosity) > 2) 1578 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 1579 } 1580 1581 sec->tc_next = sec->tc; 1582 sec->tt_n_inuse = 0; 1583 1584 invalidateFastCache(); 1585 1586 { Bool sane = sanity_check_sector_search_order(); 1587 vg_assert(sane); 1588 } 1589 } 1590 1591 /* Add a translation of vge to TT/TC. The translation is temporarily 1592 in code[0 .. code_len-1]. 1593 1594 pre: youngest_sector points to a valid (although possibly full) 1595 sector. 1596 */ 1597 void VG_(add_to_transtab)( const VexGuestExtents* vge, 1598 Addr entry, 1599 Addr code, 1600 UInt code_len, 1601 Bool is_self_checking, 1602 Int offs_profInc, 1603 UInt n_guest_instrs ) 1604 { 1605 Int tcAvailQ, reqdQ, y; 1606 ULong *tcptr, *tcptr2; 1607 UChar* srcP; 1608 UChar* dstP; 1609 1610 vg_assert(init_done); 1611 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1612 1613 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 1614 vg_assert(code_len > 0 && code_len < 60000); 1615 1616 /* Generally stay sane */ 1617 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */ 1618 1619 if (DEBUG_TRANSTAB) 1620 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n", 1621 entry, code_len); 1622 1623 n_in_count++; 1624 n_in_tsize += code_len; 1625 n_in_osize += vge_osize(vge); 1626 if (is_self_checking) 1627 n_in_sc_count++; 1628 1629 y = youngest_sector; 1630 vg_assert(isValidSector(y)); 1631 1632 if (sectors[y].tc == NULL) 1633 initialiseSector(y); 1634 1635 /* Try putting the translation in this sector. */ 1636 reqdQ = (code_len + 7) >> 3; 1637 1638 /* Will it fit in tc? */ 1639 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1640 - ((ULong*)(sectors[y].tc_next)); 1641 vg_assert(tcAvailQ >= 0); 1642 vg_assert(tcAvailQ <= tc_sector_szQ); 1643 1644 if (tcAvailQ < reqdQ 1645 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) { 1646 /* No. So move on to the next sector. Either it's never been 1647 used before, in which case it will get its tt/tc allocated 1648 now, or it has been used before, in which case it is set to be 1649 empty, hence throwing out the oldest sector. */ 1650 vg_assert(tc_sector_szQ > 0); 1651 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse) 1652 / N_HTTES_PER_SECTOR; 1653 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ)) 1654 / tc_sector_szQ; 1655 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) { 1656 VG_(dmsg)("transtab: " 1657 "declare sector %d full " 1658 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n", 1659 y, tt_loading_pct, tc_loading_pct, 1660 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse); 1661 } 1662 youngest_sector++; 1663 if (youngest_sector >= n_sectors) 1664 youngest_sector = 0; 1665 y = youngest_sector; 1666 initialiseSector(y); 1667 } 1668 1669 /* Be sure ... */ 1670 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1671 - ((ULong*)(sectors[y].tc_next)); 1672 vg_assert(tcAvailQ >= 0); 1673 vg_assert(tcAvailQ <= tc_sector_szQ); 1674 vg_assert(tcAvailQ >= reqdQ); 1675 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR); 1676 vg_assert(sectors[y].tt_n_inuse >= 0); 1677 1678 /* Copy into tc. */ 1679 tcptr = sectors[y].tc_next; 1680 vg_assert(tcptr >= §ors[y].tc[0]); 1681 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 1682 1683 dstP = (UChar*)tcptr; 1684 srcP = (UChar*)code; 1685 VG_(memcpy)(dstP, srcP, code_len); 1686 sectors[y].tc_next += reqdQ; 1687 sectors[y].tt_n_inuse++; 1688 1689 /* more paranoia */ 1690 tcptr2 = sectors[y].tc_next; 1691 vg_assert(tcptr2 >= §ors[y].tc[0]); 1692 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1693 1694 /* Find an empty tt slot, and use it. There must be such a slot 1695 since tt is never allowed to get completely full. */ 1696 TTEno tteix = get_empty_tt_slot(y); 1697 TTEntry__init(§ors[y].tt[tteix]); 1698 sectors[y].tt[tteix].status = InUse; 1699 sectors[y].tt[tteix].tcptr = tcptr; 1700 sectors[y].tt[tteix].usage.prof.count = 0; 1701 sectors[y].tt[tteix].usage.prof.weight = 1702 n_guest_instrs == 0 ? 1 : n_guest_instrs; 1703 sectors[y].tt[tteix].vge = *vge; 1704 sectors[y].tt[tteix].entry = entry; 1705 1706 // Point an htt entry to the tt slot 1707 HTTno htti = HASH_TT(entry); 1708 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR); 1709 while (True) { 1710 if (sectors[y].htt[htti] == HTT_EMPTY 1711 || sectors[y].htt[htti] == HTT_DELETED) 1712 break; 1713 htti++; 1714 if (htti >= N_HTTES_PER_SECTOR) 1715 htti = 0; 1716 } 1717 sectors[y].htt[htti] = tteix; 1718 1719 /* Patch in the profile counter location, if necessary. */ 1720 if (offs_profInc != -1) { 1721 vg_assert(offs_profInc >= 0 && offs_profInc < code_len); 1722 VexArch arch_host = VexArch_INVALID; 1723 VexArchInfo archinfo_host; 1724 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1725 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1726 VexEndness endness_host = archinfo_host.endness; 1727 VexInvalRange vir 1728 = LibVEX_PatchProfInc( arch_host, endness_host, 1729 dstP + offs_profInc, 1730 §ors[y].tt[tteix].usage.prof.count ); 1731 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 1732 } 1733 1734 VG_(invalidate_icache)( dstP, code_len ); 1735 1736 /* Add this entry to the host_extents map, checking that we're 1737 adding in order. */ 1738 { HostExtent hx; 1739 hx.start = (UChar*)tcptr; 1740 hx.len = code_len; 1741 hx.tteNo = tteix; 1742 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */ 1743 XArray* hx_array = sectors[y].host_extents; 1744 vg_assert(hx_array); 1745 Word n = VG_(sizeXA)(hx_array); 1746 if (n > 0) { 1747 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1); 1748 vg_assert(hx_prev->start + hx_prev->len <= hx.start); 1749 } 1750 VG_(addToXA)(hx_array, &hx); 1751 if (DEBUG_TRANSTAB) 1752 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n", 1753 hx.start, hx.len, y, tteix); 1754 } 1755 1756 /* Update the fast-cache. */ 1757 setFastCacheEntry( entry, tcptr ); 1758 1759 /* Note the eclass numbers for this translation. */ 1760 upd_eclasses_after_add( §ors[y], tteix ); 1761 } 1762 1763 1764 /* Search for the translation of the given guest address. If 1765 requested, a successful search can also cause the fast-caches to be 1766 updated. 1767 */ 1768 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode, 1769 /*OUT*/SECno* res_sNo, 1770 /*OUT*/TTEno* res_tteNo, 1771 Addr guest_addr, 1772 Bool upd_cache ) 1773 { 1774 SECno i, sno; 1775 HTTno j, k, kstart; 1776 TTEno tti; 1777 1778 vg_assert(init_done); 1779 /* Find the initial probe point just once. It will be the same in 1780 all sectors and avoids multiple expensive % operations. */ 1781 n_full_lookups++; 1782 kstart = HASH_TT(guest_addr); 1783 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR); 1784 1785 /* Search in all the sectors,using sector_search_order[] as a 1786 heuristic guide as to what order to visit the sectors. */ 1787 for (i = 0; i < n_sectors; i++) { 1788 1789 sno = sector_search_order[i]; 1790 if (UNLIKELY(sno == INV_SNO)) 1791 return False; /* run out of sectors to search */ 1792 1793 k = kstart; 1794 for (j = 0; j < N_HTTES_PER_SECTOR; j++) { 1795 n_lookup_probes++; 1796 tti = sectors[sno].htt[k]; 1797 if (tti < N_TTES_PER_SECTOR 1798 && sectors[sno].tt[tti].entry == guest_addr) { 1799 /* found it */ 1800 if (upd_cache) 1801 setFastCacheEntry( 1802 guest_addr, sectors[sno].tt[tti].tcptr ); 1803 if (res_hcode) 1804 *res_hcode = (Addr)sectors[sno].tt[tti].tcptr; 1805 if (res_sNo) 1806 *res_sNo = sno; 1807 if (res_tteNo) 1808 *res_tteNo = tti; 1809 /* pull this one one step closer to the front. For large 1810 apps this more or less halves the number of required 1811 probes. */ 1812 if (i > 0) { 1813 Int tmp = sector_search_order[i-1]; 1814 sector_search_order[i-1] = sector_search_order[i]; 1815 sector_search_order[i] = tmp; 1816 } 1817 return True; 1818 } 1819 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr 1820 if (sectors[sno].htt[k] == HTT_EMPTY) 1821 break; /* not found in this sector */ 1822 k++; 1823 if (k == N_HTTES_PER_SECTOR) 1824 k = 0; 1825 } 1826 1827 /* If we fall off the end, all entries are InUse and not 1828 matching, or Deleted. In any case we did not find it in this 1829 sector. */ 1830 } 1831 1832 /* Not found in any sector. */ 1833 return False; 1834 } 1835 1836 1837 /*-------------------------------------------------------------*/ 1838 /*--- Delete translations. ---*/ 1839 /*-------------------------------------------------------------*/ 1840 1841 /* forward */ 1842 static void unredir_discard_translations( Addr, ULong ); 1843 1844 /* Stuff for deleting translations which intersect with a given 1845 address range. Unfortunately, to make this run at a reasonable 1846 speed, it is complex. */ 1847 1848 static inline 1849 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 ) 1850 { 1851 Addr e1 = s1 + r1 - 1; 1852 Addr e2 = s2 + r2 - 1; 1853 if (e1 < s2 || e2 < s1) 1854 return False; 1855 return True; 1856 } 1857 1858 static inline 1859 Bool overlaps ( Addr start, ULong range, const VexGuestExtents* vge ) 1860 { 1861 if (overlap1(start, range, vge->base[0], vge->len[0])) 1862 return True; 1863 if (vge->n_used < 2) 1864 return False; 1865 if (overlap1(start, range, vge->base[1], vge->len[1])) 1866 return True; 1867 if (vge->n_used < 3) 1868 return False; 1869 if (overlap1(start, range, vge->base[2], vge->len[2])) 1870 return True; 1871 return False; 1872 } 1873 1874 1875 /* Delete a tt entry, and update all the eclass data accordingly. */ 1876 1877 static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno, 1878 VexArch arch_host, VexEndness endness_host ) 1879 { 1880 Int i, ec_idx; 1881 EClassNo ec_num; 1882 TTEntry* tte; 1883 1884 /* sec and secNo are mutually redundant; cross-check. */ 1885 vg_assert(sec == §ors[secNo]); 1886 1887 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1888 tte = &sec->tt[tteno]; 1889 vg_assert(tte->status == InUse); 1890 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1891 1892 /* Unchain .. */ 1893 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno); 1894 1895 /* Deal with the ec-to-tte links first. */ 1896 for (i = 0; i < tte->n_tte2ec; i++) { 1897 ec_num = tte->tte2ec_ec[i]; 1898 ec_idx = tte->tte2ec_ix[i]; 1899 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1900 vg_assert(ec_idx >= 0); 1901 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1902 /* Assert that the two links point at each other. */ 1903 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno); 1904 /* "delete" the pointer back to here. */ 1905 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1906 } 1907 1908 /* Now fix up this TTEntry. */ 1909 /* Mark the entry as deleted in htt. 1910 Note: we could avoid the below hash table search by 1911 adding a reference from tte to its hash position in tt. */ 1912 HTTno j; 1913 HTTno k = HASH_TT(tte->entry); 1914 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR); 1915 for (j = 0; j < N_HTTES_PER_SECTOR; j++) { 1916 if (sec->htt[k] == tteno) 1917 break; 1918 k++; 1919 if (k == N_HTTES_PER_SECTOR) 1920 k = 0; 1921 } 1922 vg_assert(j < N_HTTES_PER_SECTOR); 1923 sec->htt[k] = HTT_DELETED; 1924 tte->status = Deleted; 1925 tte->n_tte2ec = 0; 1926 add_in_empty_tt_list(secNo, tteno); 1927 1928 /* Stats .. */ 1929 sec->tt_n_inuse--; 1930 n_disc_count++; 1931 n_disc_osize += vge_osize(&tte->vge); 1932 1933 /* Tell the tool too. */ 1934 if (VG_(needs).superblock_discards) { 1935 VG_TDICT_CALL( tool_discard_superblock_info, 1936 tte->entry, 1937 tte->vge ); 1938 } 1939 } 1940 1941 1942 /* Delete translations from sec which intersect specified range, but 1943 only consider translations in the specified eclass. */ 1944 1945 static 1946 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo, 1947 Addr guest_start, ULong range, 1948 EClassNo ec, 1949 VexArch arch_host, 1950 VexEndness endness_host ) 1951 { 1952 Int i; 1953 TTEno tteno; 1954 Bool anyDeld = False; 1955 TTEntry* tte; 1956 1957 vg_assert(ec >= 0 && ec < ECLASS_N); 1958 1959 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1960 1961 tteno = sec->ec2tte[ec][i]; 1962 if (tteno == EC2TTE_DELETED) { 1963 /* already deleted */ 1964 continue; 1965 } 1966 1967 vg_assert(tteno < N_TTES_PER_SECTOR); 1968 1969 tte = &sec->tt[tteno]; 1970 vg_assert(tte->status == InUse); 1971 1972 if (overlaps( guest_start, range, &tte->vge )) { 1973 anyDeld = True; 1974 delete_tte( sec, secNo, tteno, arch_host, endness_host ); 1975 } 1976 1977 } 1978 1979 return anyDeld; 1980 } 1981 1982 1983 /* Delete translations from sec which intersect specified range, the 1984 slow way, by inspecting all translations in sec. */ 1985 1986 static 1987 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo, 1988 Addr guest_start, ULong range, 1989 VexArch arch_host, 1990 VexEndness endness_host ) 1991 { 1992 TTEno i; 1993 Bool anyDeld = False; 1994 1995 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1996 if (sec->tt[i].status == InUse 1997 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1998 anyDeld = True; 1999 delete_tte( sec, secNo, i, arch_host, endness_host ); 2000 } 2001 } 2002 2003 return anyDeld; 2004 } 2005 2006 2007 void VG_(discard_translations) ( Addr guest_start, ULong range, 2008 const HChar* who ) 2009 { 2010 Sector* sec; 2011 SECno sno; 2012 EClassNo ec; 2013 Bool anyDeleted = False; 2014 2015 vg_assert(init_done); 2016 2017 VG_(debugLog)(2, "transtab", 2018 "discard_translations(0x%lx, %llu) req by %s\n", 2019 guest_start, range, who ); 2020 2021 /* Pre-deletion sanity check */ 2022 if (VG_(clo_sanity_level >= 4)) { 2023 Bool sane = sanity_check_all_sectors(); 2024 vg_assert(sane); 2025 } 2026 2027 if (range == 0) 2028 return; 2029 2030 VexArch arch_host = VexArch_INVALID; 2031 VexArchInfo archinfo_host; 2032 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 2033 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 2034 VexEndness endness_host = archinfo_host.endness; 2035 2036 /* There are two different ways to do this. 2037 2038 If the range fits within a single address-range equivalence 2039 class, as will be the case for a cache line sized invalidation, 2040 then we only have to inspect the set of translations listed in 2041 that equivalence class, and also in the "sin-bin" equivalence 2042 class ECLASS_MISC. 2043 2044 Otherwise, the invalidation is of a larger range and probably 2045 results from munmap. In this case it's (probably!) faster just 2046 to inspect all translations, dump those we don't want, and 2047 regenerate the equivalence class information (since modifying it 2048 in-situ is even more expensive). 2049 */ 2050 2051 /* First off, figure out if the range falls within a single class, 2052 and if so which one. */ 2053 2054 ec = ECLASS_MISC; 2055 if (range < (1ULL << ECLASS_SHIFT)) 2056 ec = range_to_eclass( guest_start, (UInt)range ); 2057 2058 /* if ec is ECLASS_MISC then we aren't looking at just a single 2059 class, so use the slow scheme. Else use the fast scheme, 2060 examining 'ec' and ECLASS_MISC. */ 2061 2062 if (ec != ECLASS_MISC) { 2063 2064 VG_(debugLog)(2, "transtab", 2065 " FAST, ec = %d\n", ec); 2066 2067 /* Fast scheme */ 2068 vg_assert(ec >= 0 && ec < ECLASS_MISC); 2069 2070 for (sno = 0; sno < n_sectors; sno++) { 2071 sec = §ors[sno]; 2072 if (sec->tc == NULL) 2073 continue; 2074 anyDeleted |= delete_translations_in_sector_eclass( 2075 sec, sno, guest_start, range, ec, 2076 arch_host, endness_host 2077 ); 2078 anyDeleted |= delete_translations_in_sector_eclass( 2079 sec, sno, guest_start, range, ECLASS_MISC, 2080 arch_host, endness_host 2081 ); 2082 } 2083 2084 } else { 2085 2086 /* slow scheme */ 2087 2088 VG_(debugLog)(2, "transtab", 2089 " SLOW, ec = %d\n", ec); 2090 2091 for (sno = 0; sno < n_sectors; sno++) { 2092 sec = §ors[sno]; 2093 if (sec->tc == NULL) 2094 continue; 2095 anyDeleted |= delete_translations_in_sector( 2096 sec, sno, guest_start, range, 2097 arch_host, endness_host 2098 ); 2099 } 2100 2101 } 2102 2103 if (anyDeleted) 2104 invalidateFastCache(); 2105 2106 /* don't forget the no-redir cache */ 2107 unredir_discard_translations( guest_start, range ); 2108 2109 /* Post-deletion sanity check */ 2110 if (VG_(clo_sanity_level >= 4)) { 2111 TTEno i; 2112 TTEntry* tte; 2113 Bool sane = sanity_check_all_sectors(); 2114 vg_assert(sane); 2115 /* But now, also check the requested address range isn't 2116 present anywhere. */ 2117 for (sno = 0; sno < n_sectors; sno++) { 2118 sec = §ors[sno]; 2119 if (sec->tc == NULL) 2120 continue; 2121 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2122 tte = &sec->tt[i]; 2123 if (tte->status != InUse) 2124 continue; 2125 vg_assert(!overlaps( guest_start, range, &tte->vge )); 2126 } 2127 } 2128 } 2129 } 2130 2131 /* Whether or not tools may discard translations. */ 2132 Bool VG_(ok_to_discard_translations) = False; 2133 2134 /* This function is exported to tools which can use it to discard 2135 translations, provided it is safe to do so. */ 2136 void VG_(discard_translations_safely) ( Addr start, SizeT len, 2137 const HChar* who ) 2138 { 2139 vg_assert(VG_(ok_to_discard_translations)); 2140 VG_(discard_translations)(start, len, who); 2141 } 2142 2143 /*------------------------------------------------------------*/ 2144 /*--- AUXILIARY: the unredirected TT/TC ---*/ 2145 /*------------------------------------------------------------*/ 2146 2147 /* A very simple translation cache which holds a small number of 2148 unredirected translations. This is completely independent of the 2149 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 2150 both structures are simply dumped and we start over. 2151 2152 Since these translations are unredirected, the search key is (by 2153 definition) the first address entry in the .vge field. */ 2154 2155 /* Sized to hold 500 translations of average size 1000 bytes. */ 2156 2157 #define UNREDIR_SZB 1000 2158 2159 #define N_UNREDIR_TT 500 2160 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 2161 2162 typedef 2163 struct { 2164 VexGuestExtents vge; 2165 Addr hcode; 2166 Bool inUse; 2167 } 2168 UTCEntry; 2169 2170 /* We just allocate forwards in _tc, never deleting. */ 2171 static ULong *unredir_tc; 2172 static Int unredir_tc_used = N_UNREDIR_TCQ; 2173 2174 /* Slots in _tt can come into use and out again (.inUse). 2175 Nevertheless _tt_highwater is maintained so that invalidations 2176 don't have to scan all the slots when only a few are in use. 2177 _tt_highwater holds the index of the highest ever allocated 2178 slot. */ 2179 static UTCEntry unredir_tt[N_UNREDIR_TT]; 2180 static Int unredir_tt_highwater; 2181 2182 2183 static void init_unredir_tt_tc ( void ) 2184 { 2185 Int i; 2186 if (unredir_tc == NULL) { 2187 SysRes sres = VG_(am_mmap_anon_float_valgrind) 2188 ( N_UNREDIR_TT * UNREDIR_SZB ); 2189 if (sr_isError(sres)) { 2190 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 2191 N_UNREDIR_TT * UNREDIR_SZB); 2192 /*NOTREACHED*/ 2193 } 2194 unredir_tc = (ULong *)(Addr)sr_Res(sres); 2195 } 2196 unredir_tc_used = 0; 2197 for (i = 0; i < N_UNREDIR_TT; i++) 2198 unredir_tt[i].inUse = False; 2199 unredir_tt_highwater = -1; 2200 } 2201 2202 /* Do a sanity check; return False on failure. */ 2203 static Bool sanity_check_redir_tt_tc ( void ) 2204 { 2205 Int i; 2206 if (unredir_tt_highwater < -1) return False; 2207 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 2208 2209 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 2210 if (unredir_tt[i].inUse) 2211 return False; 2212 2213 if (unredir_tc_used < 0) return False; 2214 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 2215 2216 return True; 2217 } 2218 2219 2220 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation 2221 is temporarily in code[0 .. code_len-1]. 2222 */ 2223 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge, 2224 Addr entry, 2225 Addr code, 2226 UInt code_len ) 2227 { 2228 Int i, j, code_szQ; 2229 HChar *srcP, *dstP; 2230 2231 vg_assert(sanity_check_redir_tt_tc()); 2232 2233 /* This is the whole point: it's not redirected! */ 2234 vg_assert(entry == vge->base[0]); 2235 2236 /* How many unredir_tt slots are needed */ 2237 code_szQ = (code_len + 7) / 8; 2238 2239 /* Look for an empty unredir_tc slot */ 2240 for (i = 0; i < N_UNREDIR_TT; i++) 2241 if (!unredir_tt[i].inUse) 2242 break; 2243 2244 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 2245 /* It's full; dump everything we currently have */ 2246 init_unredir_tt_tc(); 2247 i = 0; 2248 } 2249 2250 vg_assert(unredir_tc_used >= 0); 2251 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2252 vg_assert(code_szQ > 0); 2253 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 2254 vg_assert(i >= 0 && i < N_UNREDIR_TT); 2255 vg_assert(unredir_tt[i].inUse == False); 2256 2257 if (i > unredir_tt_highwater) 2258 unredir_tt_highwater = i; 2259 2260 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 2261 srcP = (HChar*)code; 2262 for (j = 0; j < code_len; j++) 2263 dstP[j] = srcP[j]; 2264 2265 VG_(invalidate_icache)( dstP, code_len ); 2266 2267 unredir_tt[i].inUse = True; 2268 unredir_tt[i].vge = *vge; 2269 unredir_tt[i].hcode = (Addr)dstP; 2270 2271 unredir_tc_used += code_szQ; 2272 vg_assert(unredir_tc_used >= 0); 2273 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2274 2275 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 2276 } 2277 2278 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result, 2279 Addr guest_addr ) 2280 { 2281 Int i; 2282 for (i = 0; i < N_UNREDIR_TT; i++) { 2283 if (!unredir_tt[i].inUse) 2284 continue; 2285 if (unredir_tt[i].vge.base[0] == guest_addr) { 2286 *result = unredir_tt[i].hcode; 2287 return True; 2288 } 2289 } 2290 return False; 2291 } 2292 2293 static void unredir_discard_translations( Addr guest_start, ULong range ) 2294 { 2295 Int i; 2296 2297 vg_assert(sanity_check_redir_tt_tc()); 2298 2299 for (i = 0; i <= unredir_tt_highwater; i++) { 2300 if (unredir_tt[i].inUse 2301 && overlaps( guest_start, range, &unredir_tt[i].vge)) 2302 unredir_tt[i].inUse = False; 2303 } 2304 } 2305 2306 2307 /*------------------------------------------------------------*/ 2308 /*--- Initialisation. ---*/ 2309 /*------------------------------------------------------------*/ 2310 2311 void VG_(init_tt_tc) ( void ) 2312 { 2313 Int i, avg_codeszQ; 2314 2315 vg_assert(!init_done); 2316 init_done = True; 2317 2318 /* Otherwise lots of things go wrong... */ 2319 vg_assert(sizeof(ULong) == 8); 2320 vg_assert(sizeof(TTEno) == sizeof(HTTno)); 2321 vg_assert(sizeof(TTEno) == 2); 2322 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR); 2323 vg_assert(N_HTTES_PER_SECTOR < INV_TTE); 2324 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED); 2325 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY); 2326 /* check fast cache entries really are 2 words long */ 2327 vg_assert(sizeof(Addr) == sizeof(void*)); 2328 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 2329 /* check fast cache entries are packed back-to-back with no spaces */ 2330 vg_assert(sizeof( VG_(tt_fast) ) 2331 == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 2332 /* check fast cache is aligned as we requested. Not fatal if it 2333 isn't, but we might as well make sure. */ 2334 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 2335 2336 if (VG_(clo_verbosity) > 2) 2337 VG_(message)(Vg_DebugMsg, 2338 "TT/TC: VG_(init_tt_tc) " 2339 "(startup of code management)\n"); 2340 2341 /* Figure out how big each tc area should be. */ 2342 if (VG_(clo_avg_transtab_entry_size) == 0) 2343 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 2344 else 2345 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8; 2346 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ); 2347 2348 /* Ensure the calculated value is not way crazy. */ 2349 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR); 2350 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR); 2351 2352 n_sectors = VG_(clo_num_transtab_sectors); 2353 vg_assert(n_sectors >= MIN_N_SECTORS); 2354 vg_assert(n_sectors <= MAX_N_SECTORS); 2355 2356 /* Initialise the sectors, even the ones we aren't going to use. 2357 Set all fields to zero. */ 2358 youngest_sector = 0; 2359 for (i = 0; i < MAX_N_SECTORS; i++) 2360 VG_(memset)(§ors[i], 0, sizeof(sectors[i])); 2361 2362 /* Initialise the sector_search_order hint table, including the 2363 entries we aren't going to use. */ 2364 for (i = 0; i < MAX_N_SECTORS; i++) 2365 sector_search_order[i] = INV_SNO; 2366 2367 /* Initialise the fast cache. */ 2368 invalidateFastCache(); 2369 2370 /* and the unredir tt/tc */ 2371 init_unredir_tt_tc(); 2372 2373 if (VG_(clo_verbosity) > 2 || VG_(clo_stats) 2374 || VG_(debugLog_getLevel) () >= 2) { 2375 VG_(message)(Vg_DebugMsg, 2376 "TT/TC: cache: %s--avg-transtab-entry-size=%u, " 2377 "%stool provided default %u\n", 2378 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ", 2379 VG_(clo_avg_transtab_entry_size), 2380 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ", 2381 VG_(details).avg_translation_sizeB); 2382 VG_(message)(Vg_DebugMsg, 2383 "TT/TC: cache: %d sectors of %d bytes each = %d total TC\n", 2384 n_sectors, 8 * tc_sector_szQ, 2385 n_sectors * 8 * tc_sector_szQ ); 2386 VG_(message)(Vg_DebugMsg, 2387 "TT/TC: table: %d tables[%d] of %d bytes each = %d total TT\n", 2388 n_sectors, N_TTES_PER_SECTOR, 2389 (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)), 2390 (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry))); 2391 VG_(message)(Vg_DebugMsg, 2392 "TT/TC: table: %d tt entries each = %d total tt entries\n", 2393 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR); 2394 VG_(message)(Vg_DebugMsg, 2395 "TT/TC: table: %d htt[%d] of %d bytes each = %d total HTT" 2396 " (htt[%d] %d%% max occup)\n", 2397 n_sectors, N_HTTES_PER_SECTOR, 2398 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)), 2399 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)), 2400 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT); 2401 } 2402 } 2403 2404 2405 /*------------------------------------------------------------*/ 2406 /*--- Printing out statistics. ---*/ 2407 /*------------------------------------------------------------*/ 2408 2409 static Double safe_idiv( ULong a, ULong b ) 2410 { 2411 return (b == 0 ? 0 : (Double)a / (Double)b); 2412 } 2413 2414 UInt VG_(get_bbs_translated) ( void ) 2415 { 2416 return n_in_count; 2417 } 2418 2419 void VG_(print_tt_tc_stats) ( void ) 2420 { 2421 VG_(message)(Vg_DebugMsg, 2422 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 2423 n_full_lookups, n_lookup_probes ); 2424 VG_(message)(Vg_DebugMsg, 2425 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 2426 n_fast_updates, n_fast_flushes ); 2427 2428 VG_(message)(Vg_DebugMsg, 2429 " transtab: new %'llu " 2430 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] " 2431 "avg tce size %llu\n", 2432 n_in_count, n_in_osize, n_in_tsize, 2433 safe_idiv(n_in_tsize, n_in_osize), 2434 n_in_sc_count, 2435 n_in_tsize / (n_in_count ? n_in_count : 1)); 2436 VG_(message)(Vg_DebugMsg, 2437 " transtab: dumped %'llu (%'llu -> ?" "?) " 2438 "(sectors recycled %'llu)\n", 2439 n_dump_count, n_dump_osize, n_sectors_recycled ); 2440 VG_(message)(Vg_DebugMsg, 2441 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 2442 n_disc_count, n_disc_osize ); 2443 2444 if (DEBUG_TRANSTAB) { 2445 VG_(printf)("\n"); 2446 for (EClassNo e = 0; e < ECLASS_N; e++) { 2447 VG_(printf)(" %4d", sectors[0].ec2tte_used[e]); 2448 if (e % 16 == 15) 2449 VG_(printf)("\n"); 2450 } 2451 VG_(printf)("\n\n"); 2452 } 2453 } 2454 2455 /*------------------------------------------------------------*/ 2456 /*--- Printing out of profiling results. ---*/ 2457 /*------------------------------------------------------------*/ 2458 2459 static ULong score ( const TTEntry* tte ) 2460 { 2461 return ((ULong)tte->usage.prof.weight) * ((ULong)tte->usage.prof.count); 2462 } 2463 2464 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops ) 2465 { 2466 SECno sno; 2467 Int r, s; 2468 ULong score_total; 2469 TTEno i; 2470 2471 /* First, compute the total weighted count, and find the top N 2472 ttes. tops contains pointers to the most-used n_tops blocks, in 2473 descending order (viz, tops[0] is the highest scorer). */ 2474 for (s = 0; s < n_tops; s++) { 2475 tops[s].addr = 0; 2476 tops[s].score = 0; 2477 } 2478 2479 score_total = 0; 2480 2481 for (sno = 0; sno < n_sectors; sno++) { 2482 if (sectors[sno].tc == NULL) 2483 continue; 2484 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2485 if (sectors[sno].tt[i].status != InUse) 2486 continue; 2487 score_total += score(§ors[sno].tt[i]); 2488 /* Find the rank for sectors[sno].tt[i]. */ 2489 r = n_tops-1; 2490 while (True) { 2491 if (r == -1) 2492 break; 2493 if (tops[r].addr == 0) { 2494 r--; 2495 continue; 2496 } 2497 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 2498 r--; 2499 continue; 2500 } 2501 break; 2502 } 2503 r++; 2504 vg_assert(r >= 0 && r <= n_tops); 2505 /* This bb should be placed at r, and bbs above it shifted 2506 upwards one slot. */ 2507 if (r < n_tops) { 2508 for (s = n_tops-1; s > r; s--) 2509 tops[s] = tops[s-1]; 2510 tops[r].addr = sectors[sno].tt[i].entry; 2511 tops[r].score = score( §ors[sno].tt[i] ); 2512 } 2513 } 2514 } 2515 2516 /* Now zero out all the counter fields, so that we can make 2517 multiple calls here and just get the values since the last call, 2518 each time, rather than values accumulated for the whole run. */ 2519 for (sno = 0; sno < n_sectors; sno++) { 2520 if (sectors[sno].tc == NULL) 2521 continue; 2522 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2523 if (sectors[sno].tt[i].status != InUse) 2524 continue; 2525 sectors[sno].tt[i].usage.prof.count = 0; 2526 } 2527 } 2528 2529 return score_total; 2530 } 2531 2532 /*--------------------------------------------------------------------*/ 2533 /*--- end ---*/ 2534 /*--------------------------------------------------------------------*/ 2535