1 2 /*--------------------------------------------------------------------*/ 3 /*--- Management of the translation table and cache. ---*/ 4 /*--- m_transtab.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2012 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_core_basics.h" 33 #include "pub_core_debuglog.h" 34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo) 35 #include "pub_core_libcbase.h" 36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh 37 #include "pub_core_libcproc.h" // VG_(invalidate_icache) 38 #include "pub_core_libcassert.h" 39 #include "pub_core_libcprint.h" 40 #include "pub_core_options.h" 41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 42 #include "pub_core_transtab.h" 43 #include "pub_core_aspacemgr.h" 44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 45 #include "pub_core_xarray.h" 46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses 47 48 49 /* #define DEBUG_TRANSTAB */ 50 51 52 /*-------------------------------------------------------------*/ 53 /*--- Management of the FIFO-based translation table+cache. ---*/ 54 /*-------------------------------------------------------------*/ 55 56 /*------------------ CONSTANTS ------------------*/ 57 58 /* Number of sectors the TC is divided into. If you need a larger 59 overall translation cache, increase this value. */ 60 #define N_SECTORS 8 61 62 /* Number of TC entries in each sector. This needs to be a prime 63 number to work properly, it must be <= 65535 (so that a TT index 64 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 65 'deleted') and it is strongly recommended not to change this. 66 65521 is the largest prime <= 65535. */ 67 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521 68 69 /* Because each sector contains a hash table of TTEntries, we need to 70 specify the maximum allowable loading, after which the sector is 71 deemed full. */ 72 #define SECTOR_TT_LIMIT_PERCENT 65 73 74 /* The sector is deemed full when this many entries are in it. */ 75 #define N_TTES_PER_SECTOR_USABLE \ 76 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 77 78 /* Equivalence classes for fast address range deletion. There are 1 + 79 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 80 address range which does not fall cleanly within any specific bin. 81 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 82 #define ECLASS_SHIFT 11 83 #define ECLASS_WIDTH 8 84 #define ECLASS_MISC (1 << ECLASS_WIDTH) 85 #define ECLASS_N (1 + ECLASS_MISC) 86 87 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 88 89 90 /*------------------ TYPES ------------------*/ 91 92 /* In edges ("to-me") in the graph created by chaining. */ 93 typedef 94 struct { 95 UInt from_sNo; /* sector number */ 96 UInt from_tteNo; /* TTE number in given sector */ 97 UInt from_offs; /* code offset from TCEntry::tcptr where the patch is */ 98 Bool to_fastEP; /* Is the patch to a fast or slow entry point? */ 99 } 100 InEdge; 101 102 103 /* Out edges ("from-me") in the graph created by chaining. */ 104 typedef 105 struct { 106 UInt to_sNo; /* sector number */ 107 UInt to_tteNo; /* TTE number in given sector */ 108 UInt from_offs; /* code offset in owning translation where patch is */ 109 } 110 OutEdge; 111 112 113 #define N_FIXED_IN_EDGE_ARR 3 114 typedef 115 struct { 116 UInt n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */ 117 InEdge fixed[N_FIXED_IN_EDGE_ARR]; 118 XArray* var; /* XArray* of InEdgeArr */ 119 } 120 InEdgeArr; 121 122 #define N_FIXED_OUT_EDGE_ARR 2 123 typedef 124 struct { 125 UInt n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */ 126 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; 127 XArray* var; /* XArray* of OutEdgeArr */ 128 } 129 OutEdgeArr; 130 131 132 /* A translation-table entry. This indicates precisely which areas of 133 guest code are included in the translation, and contains all other 134 auxiliary info too. */ 135 typedef 136 struct { 137 /* Profiling only: the count and weight (arbitrary meaning) for 138 this translation. Weight is a property of the translation 139 itself and computed once when the translation is created. 140 Count is an entry count for the translation and is 141 incremented by 1 every time the translation is used, if we 142 are profiling. */ 143 ULong count; 144 UShort weight; 145 146 /* Status of the slot. Note, we need to be able to do lazy 147 deletion, hence the Deleted state. */ 148 enum { InUse, Deleted, Empty } status; 149 150 /* 64-bit aligned pointer to one or more 64-bit words containing 151 the corresponding host code (must be in the same sector!) 152 This is a pointer into the sector's tc (code) area. */ 153 ULong* tcptr; 154 155 /* This is the original guest address that purportedly is the 156 entry point of the translation. You might think that .entry 157 should be the same as .vge->base[0], and most of the time it 158 is. However, when doing redirections, that is not the case. 159 .vge must always correctly describe the guest code sections 160 from which this translation was made. However, .entry may or 161 may not be a lie, depending on whether or not we're doing 162 redirection. */ 163 Addr64 entry; 164 165 /* This structure describes precisely what ranges of guest code 166 the translation covers, so we can decide whether or not to 167 delete it when translations of a given address range are 168 invalidated. */ 169 VexGuestExtents vge; 170 171 /* Address range summary info: these are pointers back to 172 eclass[] entries in the containing Sector. Those entries in 173 turn point back here -- the two structures are mutually 174 redundant but both necessary to make fast deletions work. 175 The eclass info is similar to, and derived from, this entry's 176 'vge' field, but it is not the same */ 177 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 178 UShort tte2ec_ec[3]; // for each, the eclass # 179 UInt tte2ec_ix[3]; // and the index within the eclass. 180 // for i in 0 .. n_tte2ec-1 181 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 182 // should be the index 183 // of this TTEntry in the containing Sector's tt array. 184 185 /* Admin information for chaining. 'in_edges' is a set of the 186 patch points which jump to this translation -- hence are 187 predecessors in the control flow graph. 'out_edges' points 188 to successors in the control flow graph -- translations to 189 which this one has a patched jump. In short these are just 190 backwards and forwards edges in the graph of patched-together 191 blocks. The 'in_edges' contain slightly more info, enough 192 that we can undo the chaining of each mentioned patch point. 193 The 'out_edges' list exists only so that we can visit the 194 'in_edges' entries of all blocks we're patched through to, in 195 order to remove ourselves from then when we're deleted. */ 196 197 /* A translation can disappear for two reasons: 198 1. erased (as part of the oldest sector cleanup) when the 199 youngest sector is full. 200 2. discarded due to calls to VG_(discard_translations). 201 VG_(discard_translations) sets the status of the 202 translation to 'Deleted'. 203 A.o., the gdbserver discards one or more translations 204 when a breakpoint is inserted or removed at an Addr, 205 or when single stepping mode is enabled/disabled 206 or when a translation is instrumented for gdbserver 207 (all the target jumps of this translation are 208 invalidated). 209 210 So, it is possible that the translation A to be patched 211 (to obtain a patched jump from A to B) is invalidated 212 after B is translated and before A is patched. 213 In case a translation is erased or discarded, the patching 214 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode 215 are checking the 'from' translation still exists before 216 doing the patching. 217 218 Is it safe to erase or discard the current translation E being 219 executed ? Amazing, but yes, it is safe. 220 Here is the explanation: 221 222 The translation E being executed can only be erased if a new 223 translation N is being done. A new translation is done only 224 if the host addr is a not yet patched jump to another 225 translation. In such a case, the guest address of N is 226 assigned to the PC in the VEX state. Control is returned 227 to the scheduler. N will be translated. This can erase the 228 translation E (in case of sector full). VG_(tt_tc_do_chaining) 229 will not do the chaining to a non found translation E. 230 The execution will continue at the current guest PC 231 (i.e. the translation N). 232 => it is safe to erase the current translation being executed. 233 234 The current translation E being executed can also be discarded 235 (e.g. by gdbserver). VG_(discard_translations) will mark 236 this translation E as Deleted, but the translation itself 237 is not erased. In particular, its host code can only 238 be overwritten or erased in case a new translation is done. 239 A new translation will only be done if a not yet translated 240 jump is to be executed. The execution of the Deleted translation 241 E will continue till a non patched jump is encountered. 242 This situation is then similar to the 'erasing' case above : 243 the current translation E can be erased or overwritten, as the 244 execution will continue at the new translation N. 245 246 */ 247 248 /* It is possible, although very unlikely, that a block A has 249 more than one patched jump to block B. This could happen if 250 (eg) A finishes "jcond B; jmp B". 251 252 This means in turn that B's in_edges set can list A more than 253 once (twice in this example). However, each such entry must 254 have a different from_offs, since a patched jump can only 255 jump to one place at once (it's meaningless for it to have 256 multiple destinations.) IOW, the successor and predecessor 257 edges in the graph are not uniquely determined by a 258 TTEntry --> TTEntry pair, but rather by a 259 (TTEntry,offset) --> TTEntry triple. 260 261 If A has multiple edges to B then B will mention A multiple 262 times in its in_edges. To make things simpler, we then 263 require that A mentions B exactly the same number of times in 264 its out_edges. Furthermore, a matching out-in pair must have 265 the same offset (from_offs). This facilitates sanity 266 checking, and it facilitates establishing the invariant that 267 a out_edges set may not have duplicates when using the 268 equality defined by (TTEntry,offset). Hence the out_edges 269 and in_edges sets really do have both have set semantics. 270 271 eg if A has been patched to B at offsets 42 and 87 (in A) 272 then A.out_edges = { (B,42), (B,87) } (in any order) 273 and B.in_edges = { (A,42), (A,87) } (in any order) 274 275 Hence for each node pair P->Q in the graph, there's a 1:1 276 mapping between P.out_edges and Q.in_edges. 277 */ 278 InEdgeArr in_edges; 279 OutEdgeArr out_edges; 280 } 281 TTEntry; 282 283 284 /* A structure used for mapping host code addresses back to the 285 relevant TTEntry. Used when doing chaining, for finding the 286 TTEntry to which some arbitrary patch address belongs. */ 287 typedef 288 struct { 289 UChar* start; 290 UInt len; 291 UInt tteNo; 292 } 293 HostExtent; 294 295 /* Finally, a sector itself. Each sector contains an array of 296 TCEntries, which hold code, and an array of TTEntries, containing 297 all required administrative info. Profiling is supported using the 298 TTEntry .count and .weight fields, if required. 299 300 If the sector is not in use, all three pointers are NULL and 301 tt_n_inuse is zero. 302 */ 303 typedef 304 struct { 305 /* The TCEntry area. Size of this depends on the average 306 translation size. We try and size it so it becomes full 307 precisely when this sector's translation table (tt) reaches 308 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 309 ULong* tc; 310 311 /* The TTEntry array. This is a fixed size, always containing 312 exactly N_TTES_PER_SECTOR entries. */ 313 TTEntry* tt; 314 315 /* This points to the current allocation point in tc. */ 316 ULong* tc_next; 317 318 /* The count of tt entries with state InUse. */ 319 Int tt_n_inuse; 320 321 /* Expandable arrays of tt indices for each of the ECLASS_N 322 address range equivalence classes. These hold indices into 323 the containing sector's tt array, which in turn should point 324 back here. */ 325 Int ec2tte_size[ECLASS_N]; 326 Int ec2tte_used[ECLASS_N]; 327 UShort* ec2tte[ECLASS_N]; 328 329 /* The host extents. The [start, +len) ranges are constructed 330 in strictly non-overlapping order, so we can binary search 331 them at any time. */ 332 XArray* host_extents; /* XArray* of HostExtent */ 333 } 334 Sector; 335 336 337 /*------------------ DECLS ------------------*/ 338 339 /* The root data structure is an array of sectors. The index of the 340 youngest sector is recorded, and new translations are put into that 341 sector. When it fills up, we move along to the next sector and 342 start to fill that up, wrapping around at the end of the array. 343 That way, once all N_TC_SECTORS have been bought into use for the 344 first time, and are full, we then re-use the oldest sector, 345 endlessly. 346 347 When running, youngest sector should be between >= 0 and < 348 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 349 not yet initialised. 350 */ 351 static Sector sectors[N_SECTORS]; 352 static Int youngest_sector = -1; 353 354 /* The number of ULongs in each TCEntry area. This is computed once 355 at startup and does not change. */ 356 static Int tc_sector_szQ; 357 358 359 /* A list of sector numbers, in the order which they should be 360 searched to find translations. This is an optimisation to be used 361 when searching for translations and should not affect 362 correctness. -1 denotes "no entry". */ 363 static Int sector_search_order[N_SECTORS]; 364 365 366 /* Fast helper for the TC. A direct-mapped cache which holds a set of 367 recently used (guest address, host address) pairs. This array is 368 referred to directly from m_dispatch/dispatch-<platform>.S. 369 370 Entries in tt_fast may refer to any valid TC entry, regardless of 371 which sector it's in. Consequently we must be very careful to 372 invalidate this cache when TC entries are changed or disappear. 373 374 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 375 pointed at to cause that cache entry to miss. This relies on the 376 assumption that no guest code actually has that address, hence a 377 value 0x1 seems good. m_translate gives the client a synthetic 378 segfault if it tries to execute at this address. 379 */ 380 /* 381 typedef 382 struct { 383 Addr guest; 384 Addr host; 385 } 386 FastCacheEntry; 387 */ 388 /*global*/ __attribute__((aligned(16))) 389 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 390 391 /* Make sure we're not used before initialisation. */ 392 static Bool init_done = False; 393 394 395 /*------------------ STATS DECLS ------------------*/ 396 397 /* Number of fast-cache updates and flushes done. */ 398 static ULong n_fast_flushes = 0; 399 static ULong n_fast_updates = 0; 400 401 /* Number of full lookups done. */ 402 static ULong n_full_lookups = 0; 403 static ULong n_lookup_probes = 0; 404 405 /* Number/osize/tsize of translations entered; also the number of 406 those for which self-checking was requested. */ 407 static ULong n_in_count = 0; 408 static ULong n_in_osize = 0; 409 static ULong n_in_tsize = 0; 410 static ULong n_in_sc_count = 0; 411 412 /* Number/osize of translations discarded due to lack of space. */ 413 static ULong n_dump_count = 0; 414 static ULong n_dump_osize = 0; 415 416 /* Number/osize of translations discarded due to requests to do so. */ 417 static ULong n_disc_count = 0; 418 static ULong n_disc_osize = 0; 419 420 421 /*-------------------------------------------------------------*/ 422 /*--- Misc ---*/ 423 /*-------------------------------------------------------------*/ 424 425 static void* ttaux_malloc ( HChar* tag, SizeT n ) 426 { 427 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n); 428 } 429 430 static void ttaux_free ( void* p ) 431 { 432 VG_(arena_free)(VG_AR_TTAUX, p); 433 } 434 435 436 /*-------------------------------------------------------------*/ 437 /*--- Chaining support ---*/ 438 /*-------------------------------------------------------------*/ 439 440 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo ) 441 { 442 vg_assert(sNo < N_SECTORS); 443 vg_assert(tteNo < N_TTES_PER_SECTOR); 444 Sector* s = §ors[sNo]; 445 vg_assert(s->tt); 446 TTEntry* tte = &s->tt[tteNo]; 447 vg_assert(tte->status == InUse); 448 return tte; 449 } 450 451 static void InEdge__init ( InEdge* ie ) 452 { 453 ie->from_sNo = -1; /* invalid */ 454 ie->from_tteNo = 0; 455 ie->from_offs = 0; 456 ie->to_fastEP = False; 457 } 458 459 static void OutEdge__init ( OutEdge* oe ) 460 { 461 oe->to_sNo = -1; /* invalid */ 462 oe->to_tteNo = 0; 463 oe->from_offs = 0; 464 } 465 466 static void TTEntry__init ( TTEntry* tte ) 467 { 468 VG_(memset)(tte, 0, sizeof(*tte)); 469 } 470 471 static UWord InEdgeArr__size ( InEdgeArr* iea ) 472 { 473 if (iea->var) { 474 vg_assert(iea->n_fixed == 0); 475 return VG_(sizeXA)(iea->var); 476 } else { 477 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 478 return iea->n_fixed; 479 } 480 } 481 482 static void InEdgeArr__makeEmpty ( InEdgeArr* iea ) 483 { 484 if (iea->var) { 485 vg_assert(iea->n_fixed == 0); 486 VG_(deleteXA)(iea->var); 487 iea->var = NULL; 488 } else { 489 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 490 iea->n_fixed = 0; 491 } 492 } 493 494 static 495 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i ) 496 { 497 if (iea->var) { 498 vg_assert(iea->n_fixed == 0); 499 return (InEdge*)VG_(indexXA)(iea->var, i); 500 } else { 501 vg_assert(i < iea->n_fixed); 502 return &iea->fixed[i]; 503 } 504 } 505 506 static 507 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i ) 508 { 509 if (iea->var) { 510 vg_assert(iea->n_fixed == 0); 511 VG_(removeIndexXA)(iea->var, i); 512 } else { 513 vg_assert(i < iea->n_fixed); 514 for (; i+1 < iea->n_fixed; i++) { 515 iea->fixed[i] = iea->fixed[i+1]; 516 } 517 iea->n_fixed--; 518 } 519 } 520 521 static 522 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie ) 523 { 524 if (iea->var) { 525 vg_assert(iea->n_fixed == 0); 526 VG_(addToXA)(iea->var, ie); 527 } else { 528 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 529 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) { 530 /* The fixed array is full, so we have to initialise an 531 XArray and copy the fixed array into it. */ 532 iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add", 533 ttaux_free, 534 sizeof(InEdge)); 535 UWord i; 536 for (i = 0; i < iea->n_fixed; i++) { 537 VG_(addToXA)(iea->var, &iea->fixed[i]); 538 } 539 VG_(addToXA)(iea->var, ie); 540 iea->n_fixed = 0; 541 } else { 542 /* Just add to the fixed array. */ 543 iea->fixed[iea->n_fixed++] = *ie; 544 } 545 } 546 } 547 548 static UWord OutEdgeArr__size ( OutEdgeArr* oea ) 549 { 550 if (oea->var) { 551 vg_assert(oea->n_fixed == 0); 552 return VG_(sizeXA)(oea->var); 553 } else { 554 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 555 return oea->n_fixed; 556 } 557 } 558 559 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea ) 560 { 561 if (oea->var) { 562 vg_assert(oea->n_fixed == 0); 563 VG_(deleteXA)(oea->var); 564 oea->var = NULL; 565 } else { 566 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 567 oea->n_fixed = 0; 568 } 569 } 570 571 static 572 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i ) 573 { 574 if (oea->var) { 575 vg_assert(oea->n_fixed == 0); 576 return (OutEdge*)VG_(indexXA)(oea->var, i); 577 } else { 578 vg_assert(i < oea->n_fixed); 579 return &oea->fixed[i]; 580 } 581 } 582 583 static 584 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i ) 585 { 586 if (oea->var) { 587 vg_assert(oea->n_fixed == 0); 588 VG_(removeIndexXA)(oea->var, i); 589 } else { 590 vg_assert(i < oea->n_fixed); 591 for (; i+1 < oea->n_fixed; i++) { 592 oea->fixed[i] = oea->fixed[i+1]; 593 } 594 oea->n_fixed--; 595 } 596 } 597 598 static 599 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe ) 600 { 601 if (oea->var) { 602 vg_assert(oea->n_fixed == 0); 603 VG_(addToXA)(oea->var, oe); 604 } else { 605 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 606 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) { 607 /* The fixed array is full, so we have to initialise an 608 XArray and copy the fixed array into it. */ 609 oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add", 610 ttaux_free, 611 sizeof(OutEdge)); 612 UWord i; 613 for (i = 0; i < oea->n_fixed; i++) { 614 VG_(addToXA)(oea->var, &oea->fixed[i]); 615 } 616 VG_(addToXA)(oea->var, oe); 617 oea->n_fixed = 0; 618 } else { 619 /* Just add to the fixed array. */ 620 oea->fixed[oea->n_fixed++] = *oe; 621 } 622 } 623 } 624 625 static 626 Int HostExtent__cmpOrd ( void* v1, void* v2 ) 627 { 628 HostExtent* hx1 = (HostExtent*)v1; 629 HostExtent* hx2 = (HostExtent*)v2; 630 if (hx1->start + hx1->len <= hx2->start) return -1; 631 if (hx2->start + hx2->len <= hx1->start) return 1; 632 return 0; /* partial overlap */ 633 } 634 635 static __attribute__((noinline)) 636 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo, 637 /*OUT*/UInt* from_tteNo, 638 void* hcode ) 639 { 640 Int i; 641 642 /* Search order logic copied from VG_(search_transtab). */ 643 for (i = 0; i < N_SECTORS; i++) { 644 Int sno = sector_search_order[i]; 645 if (UNLIKELY(sno == -1)) 646 return False; /* run out of sectors to search */ 647 648 Sector* sec = §ors[sno]; 649 XArray* /* of HostExtent */ host_extents = sec->host_extents; 650 vg_assert(host_extents); 651 652 HostExtent key; 653 VG_(memset)(&key, 0, sizeof(key)); 654 key.start = hcode; 655 key.len = 1; 656 Word firstW = -1, lastW = -1; 657 Bool found = VG_(lookupXA_UNSAFE)( 658 host_extents, &key, &firstW, &lastW, 659 (Int(*)(void*,void*))HostExtent__cmpOrd 660 ); 661 vg_assert(firstW == lastW); // always true, even if not found 662 if (found) { 663 HostExtent* hx = VG_(indexXA)(host_extents, firstW); 664 UInt tteNo = hx->tteNo; 665 /* Do some additional sanity checks. */ 666 vg_assert(tteNo <= N_TTES_PER_SECTOR); 667 /* Entry might have been invalidated. Consider this 668 as not found. */ 669 if (sec->tt[tteNo].status == Deleted) 670 return False; 671 vg_assert(sec->tt[tteNo].status == InUse); 672 /* Can only half check that the found TTEntry contains hcode, 673 due to not having a length value for the hcode in the 674 TTEntry. */ 675 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode); 676 /* Looks plausible */ 677 *from_sNo = sno; 678 *from_tteNo = (UInt)tteNo; 679 return True; 680 } 681 } 682 return False; 683 } 684 685 686 /* Figure out whether or not hcode is jitted code present in the main 687 code cache (but not in the no-redir cache). Used for sanity 688 checking. */ 689 static Bool is_in_the_main_TC ( void* hcode ) 690 { 691 Int i, sno; 692 for (i = 0; i < N_SECTORS; i++) { 693 sno = sector_search_order[i]; 694 if (sno == -1) 695 break; /* run out of sectors to search */ 696 if ((UChar*)hcode >= (UChar*)sectors[sno].tc 697 && (UChar*)hcode <= (UChar*)sectors[sno].tc_next 698 + sizeof(ULong) - 1) 699 return True; 700 } 701 return False; 702 } 703 704 705 /* Fulfill a chaining request, and record admin info so we 706 can undo it later, if required. 707 */ 708 void VG_(tt_tc_do_chaining) ( void* from__patch_addr, 709 UInt to_sNo, 710 UInt to_tteNo, 711 Bool to_fastEP ) 712 { 713 /* Get the CPU info established at startup. */ 714 VexArch vex_arch = VexArch_INVALID; 715 VG_(machine_get_VexArchInfo)( &vex_arch, NULL ); 716 717 // host_code is where we're patching to. So it needs to 718 // take into account, whether we're jumping to the slow 719 // or fast entry point. By definition, the fast entry point 720 // is exactly one event check's worth of code along from 721 // the slow (tcptr) entry point. 722 TTEntry* to_tte = index_tte(to_sNo, to_tteNo); 723 void* host_code = ((UChar*)to_tte->tcptr) 724 + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0); 725 726 // stay sane -- the patch point (dst) is in this sector's code cache 727 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc ); 728 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next 729 + sizeof(ULong) - 1 ); 730 731 /* Find the TTEntry for the from__ code. This isn't simple since 732 we only know the patch address, which is going to be somewhere 733 inside the from_ block. */ 734 UInt from_sNo = (UInt)-1; 735 UInt from_tteNo = (UInt)-1; 736 Bool from_found 737 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo, 738 from__patch_addr ); 739 if (!from_found) { 740 // The from code might have been discarded due to sector re-use 741 // or marked Deleted due to translation invalidation. 742 // In such a case, don't do the chaining. 743 VG_(debugLog)(1,"transtab", 744 "host code %p not found (discarded? sector recycled?)" 745 " => no chaining done\n", 746 from__patch_addr); 747 return; 748 } 749 750 TTEntry* from_tte = index_tte(from_sNo, from_tteNo); 751 752 /* Get VEX to do the patching itself. We have to hand it off 753 since it is host-dependent. */ 754 VexInvalRange vir 755 = LibVEX_Chain( 756 vex_arch, 757 from__patch_addr, 758 VG_(fnptr_to_fnentry)( 759 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 760 : &VG_(disp_cp_chain_me_to_slowEP)), 761 (void*)host_code 762 ); 763 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 764 765 /* Now do the tricky bit -- update the ch_succs and ch_preds info 766 for the two translations involved, so we can undo the chaining 767 later, which we will have to do if the to_ block gets removed 768 for whatever reason. */ 769 770 /* This is the new from_ -> to_ link to add. */ 771 InEdge ie; 772 InEdge__init(&ie); 773 ie.from_sNo = from_sNo; 774 ie.from_tteNo = from_tteNo; 775 ie.to_fastEP = to_fastEP; 776 HWord from_offs = (HWord)( (UChar*)from__patch_addr 777 - (UChar*)from_tte->tcptr ); 778 vg_assert(from_offs < 100000/* let's say */); 779 ie.from_offs = (UInt)from_offs; 780 781 /* This is the new to_ -> from_ backlink to add. */ 782 OutEdge oe; 783 OutEdge__init(&oe); 784 oe.to_sNo = to_sNo; 785 oe.to_tteNo = to_tteNo; 786 oe.from_offs = (UInt)from_offs; 787 788 /* Add .. */ 789 InEdgeArr__add(&to_tte->in_edges, &ie); 790 OutEdgeArr__add(&from_tte->out_edges, &oe); 791 } 792 793 794 /* Unchain one patch, as described by the specified InEdge. For 795 sanity check purposes only (to check that the patched location is 796 as expected) it also requires the fast and slow entry point 797 addresses of the destination block (that is, the block that owns 798 this InEdge). */ 799 __attribute__((noinline)) 800 static void unchain_one ( VexArch vex_arch, 801 InEdge* ie, 802 void* to_fastEPaddr, void* to_slowEPaddr ) 803 { 804 vg_assert(ie); 805 TTEntry* tte 806 = index_tte(ie->from_sNo, ie->from_tteNo); 807 UChar* place_to_patch 808 = ((HChar*)tte->tcptr) + ie->from_offs; 809 UChar* disp_cp_chain_me 810 = VG_(fnptr_to_fnentry)( 811 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 812 : &VG_(disp_cp_chain_me_to_slowEP) 813 ); 814 UChar* place_to_jump_to_EXPECTED 815 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr; 816 817 // stay sane: both src and dst for this unchaining are 818 // in the main code cache 819 vg_assert( is_in_the_main_TC(place_to_patch) ); // src 820 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst 821 // dst check is ok because LibVEX_UnChain checks that 822 // place_to_jump_to_EXPECTED really is the current dst, and 823 // asserts if it isn't. 824 VexInvalRange vir 825 = LibVEX_UnChain( vex_arch, place_to_patch, 826 place_to_jump_to_EXPECTED, disp_cp_chain_me ); 827 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 828 } 829 830 831 /* The specified block is about to be deleted. Update the preds and 832 succs of its associated blocks accordingly. This includes undoing 833 any chained jumps to this block. */ 834 static 835 void unchain_in_preparation_for_deletion ( VexArch vex_arch, 836 UInt here_sNo, UInt here_tteNo ) 837 { 838 if (0) 839 VG_(printf)("QQQ unchain_in_prep %u.%u\n", here_sNo, here_tteNo); 840 UWord i, j, n, m; 841 Int evCheckSzB = LibVEX_evCheckSzB(vex_arch); 842 TTEntry* here_tte = index_tte(here_sNo, here_tteNo); 843 vg_assert(here_tte->status == InUse); 844 845 /* Visit all InEdges owned by here_tte. */ 846 n = InEdgeArr__size(&here_tte->in_edges); 847 for (i = 0; i < n; i++) { 848 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i); 849 // Undo the chaining. 850 UChar* here_slow_EP = (UChar*)here_tte->tcptr; 851 UChar* here_fast_EP = here_slow_EP + evCheckSzB; 852 unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP); 853 // Find the corresponding entry in the "from" node's out_edges, 854 // and remove it. 855 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo); 856 m = OutEdgeArr__size(&from_tte->out_edges); 857 vg_assert(m > 0); // it must have at least one entry 858 for (j = 0; j < m; j++) { 859 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j); 860 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo 861 && oe->from_offs == ie->from_offs) 862 break; 863 } 864 vg_assert(j < m); // "oe must be findable" 865 OutEdgeArr__deleteIndex(&from_tte->out_edges, j); 866 } 867 868 /* Visit all OutEdges owned by here_tte. */ 869 n = OutEdgeArr__size(&here_tte->out_edges); 870 for (i = 0; i < n; i++) { 871 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i); 872 // Find the corresponding entry in the "to" node's in_edges, 873 // and remove it. 874 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo); 875 m = InEdgeArr__size(&to_tte->in_edges); 876 vg_assert(m > 0); // it must have at least one entry 877 for (j = 0; j < m; j++) { 878 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j); 879 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo 880 && ie->from_offs == oe->from_offs) 881 break; 882 } 883 vg_assert(j < m); // "ie must be findable" 884 InEdgeArr__deleteIndex(&to_tte->in_edges, j); 885 } 886 887 InEdgeArr__makeEmpty(&here_tte->in_edges); 888 OutEdgeArr__makeEmpty(&here_tte->out_edges); 889 } 890 891 892 /*-------------------------------------------------------------*/ 893 /*--- Address-range equivalence class stuff ---*/ 894 /*-------------------------------------------------------------*/ 895 896 /* Return equivalence class number for a range. */ 897 898 static Int range_to_eclass ( Addr64 start, UInt len ) 899 { 900 UInt mask = (1 << ECLASS_WIDTH) - 1; 901 UInt lo = (UInt)start; 902 UInt hi = lo + len - 1; 903 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 904 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 905 if (loBits == hiBits) { 906 vg_assert(loBits < ECLASS_N-1); 907 return loBits; 908 } else { 909 return ECLASS_MISC; 910 } 911 } 912 913 914 /* Calculates the equivalence class numbers for any VexGuestExtent. 915 These are written in *eclasses, which must be big enough to hold 3 916 Ints. The number written, between 1 and 3, is returned. The 917 eclasses are presented in order, and any duplicates are removed. 918 */ 919 920 static 921 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 922 VexGuestExtents* vge ) 923 { 924 # define SWAP(_lv1,_lv2) \ 925 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 926 927 Int i, j, n_ec, r; 928 929 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 930 931 n_ec = 0; 932 for (i = 0; i < vge->n_used; i++) { 933 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 934 if (r == ECLASS_MISC) 935 goto bad; 936 /* only add if we haven't already seen it */ 937 for (j = 0; j < n_ec; j++) 938 if (eclasses[j] == r) 939 break; 940 if (j == n_ec) 941 eclasses[n_ec++] = r; 942 } 943 944 if (n_ec == 1) 945 return 1; 946 947 if (n_ec == 2) { 948 /* sort */ 949 if (eclasses[0] > eclasses[1]) 950 SWAP(eclasses[0], eclasses[1]); 951 return 2; 952 } 953 954 if (n_ec == 3) { 955 /* sort */ 956 if (eclasses[0] > eclasses[2]) 957 SWAP(eclasses[0], eclasses[2]); 958 if (eclasses[0] > eclasses[1]) 959 SWAP(eclasses[0], eclasses[1]); 960 if (eclasses[1] > eclasses[2]) 961 SWAP(eclasses[1], eclasses[2]); 962 return 3; 963 } 964 965 /* NOTREACHED */ 966 vg_assert(0); 967 968 bad: 969 eclasses[0] = ECLASS_MISC; 970 return 1; 971 972 # undef SWAP 973 } 974 975 976 /* Add tteno to the set of entries listed for equivalence class ec in 977 this sector. Returns used location in eclass array. */ 978 979 static 980 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 981 { 982 Int old_sz, new_sz, i, r; 983 UShort *old_ar, *new_ar; 984 985 vg_assert(ec >= 0 && ec < ECLASS_N); 986 vg_assert(tteno < N_TTES_PER_SECTOR); 987 988 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 989 990 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 991 992 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 993 994 old_sz = sec->ec2tte_size[ec]; 995 old_ar = sec->ec2tte[ec]; 996 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 997 new_ar = ttaux_malloc("transtab.aECN.1", 998 new_sz * sizeof(UShort)); 999 for (i = 0; i < old_sz; i++) 1000 new_ar[i] = old_ar[i]; 1001 if (old_ar) 1002 ttaux_free(old_ar); 1003 sec->ec2tte_size[ec] = new_sz; 1004 sec->ec2tte[ec] = new_ar; 1005 1006 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 1007 } 1008 1009 /* Common case */ 1010 r = sec->ec2tte_used[ec]++; 1011 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 1012 sec->ec2tte[ec][r] = tteno; 1013 return (UInt)r; 1014 } 1015 1016 1017 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 1018 eclass entries to 'sec'. */ 1019 1020 static 1021 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 1022 { 1023 Int i, r, eclasses[3]; 1024 TTEntry* tte; 1025 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1026 1027 tte = &sec->tt[tteno]; 1028 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 1029 1030 vg_assert(r >= 1 && r <= 3); 1031 tte->n_tte2ec = r; 1032 1033 for (i = 0; i < r; i++) { 1034 tte->tte2ec_ec[i] = eclasses[i]; 1035 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 1036 } 1037 } 1038 1039 1040 /* Check the eclass info in 'sec' to ensure it is consistent. Returns 1041 True if OK, False if something's not right. Expensive. */ 1042 1043 static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 1044 { 1045 # define BAD(_str) do { whassup = (_str); goto bad; } while (0) 1046 1047 HChar* whassup = NULL; 1048 Int i, j, k, n, ec_num, ec_idx; 1049 TTEntry* tte; 1050 UShort tteno; 1051 ULong* tce; 1052 1053 /* Basic checks on this sector */ 1054 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 1055 BAD("invalid sec->tt_n_inuse"); 1056 tce = sec->tc_next; 1057 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 1058 BAD("sec->tc_next points outside tc"); 1059 1060 /* For each eclass ... */ 1061 for (i = 0; i < ECLASS_N; i++) { 1062 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 1063 BAD("ec2tte_size/ec2tte mismatch(1)"); 1064 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 1065 BAD("ec2tte_size/ec2tte mismatch(2)"); 1066 if (sec->ec2tte_used[i] < 0 1067 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 1068 BAD("implausible ec2tte_used"); 1069 if (sec->ec2tte_used[i] == 0) 1070 continue; 1071 1072 /* For each tt reference in each eclass .. ensure the reference 1073 is to a valid tt entry, and that the entry's address ranges 1074 really include this eclass. */ 1075 1076 for (j = 0; j < sec->ec2tte_used[i]; j++) { 1077 tteno = sec->ec2tte[i][j]; 1078 if (tteno == EC2TTE_DELETED) 1079 continue; 1080 if (tteno >= N_TTES_PER_SECTOR) 1081 BAD("implausible tteno"); 1082 tte = &sec->tt[tteno]; 1083 if (tte->status != InUse) 1084 BAD("tteno points to non-inuse tte"); 1085 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1086 BAD("tte->n_tte2ec out of range"); 1087 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 1088 must equal i. Inspect tte's eclass info. */ 1089 n = 0; 1090 for (k = 0; k < tte->n_tte2ec; k++) { 1091 if (k < tte->n_tte2ec-1 1092 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 1093 BAD("tte->tte2ec_ec[..] out of order"); 1094 ec_num = tte->tte2ec_ec[k]; 1095 if (ec_num < 0 || ec_num >= ECLASS_N) 1096 BAD("tte->tte2ec_ec[..] out of range"); 1097 if (ec_num != i) 1098 continue; 1099 ec_idx = tte->tte2ec_ix[k]; 1100 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 1101 BAD("tte->tte2ec_ix[..] out of range"); 1102 if (ec_idx == j) 1103 n++; 1104 } 1105 if (n != 1) 1106 BAD("tteno does not point back at eclass"); 1107 } 1108 } 1109 1110 /* That establishes that for each forward pointer from TTEntrys 1111 there is a corresponding backward pointer from the eclass[] 1112 arrays. However, it doesn't rule out the possibility of other, 1113 bogus pointers in the eclass[] arrays. So do those similarly: 1114 scan through them and check the TTEntryies they point at point 1115 back. */ 1116 1117 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 1118 1119 tte = &sec->tt[i]; 1120 if (tte->status == Empty || tte->status == Deleted) { 1121 if (tte->n_tte2ec != 0) 1122 BAD("tte->n_eclasses nonzero for unused tte"); 1123 continue; 1124 } 1125 1126 vg_assert(tte->status == InUse); 1127 1128 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1129 BAD("tte->n_eclasses out of range(2)"); 1130 1131 for (j = 0; j < tte->n_tte2ec; j++) { 1132 ec_num = tte->tte2ec_ec[j]; 1133 if (ec_num < 0 || ec_num >= ECLASS_N) 1134 BAD("tte->eclass[..] out of range"); 1135 ec_idx = tte->tte2ec_ix[j]; 1136 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 1137 BAD("tte->ec_idx[..] out of range(2)"); 1138 if (sec->ec2tte[ec_num][ec_idx] != i) 1139 BAD("ec2tte does not point back to tte"); 1140 } 1141 } 1142 1143 return True; 1144 1145 bad: 1146 if (whassup) 1147 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 1148 1149 # if 0 1150 VG_(printf)("eclass = %d\n", i); 1151 VG_(printf)("tteno = %d\n", (Int)tteno); 1152 switch (tte->status) { 1153 case InUse: VG_(printf)("InUse\n"); break; 1154 case Deleted: VG_(printf)("Deleted\n"); break; 1155 case Empty: VG_(printf)("Empty\n"); break; 1156 } 1157 if (tte->status != Empty) { 1158 for (k = 0; k < tte->vge.n_used; k++) 1159 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 1160 (Int)tte->vge.len[k]); 1161 } 1162 # endif 1163 1164 return False; 1165 1166 # undef BAD 1167 } 1168 1169 1170 /* Sanity check absolutely everything. True == check passed. */ 1171 1172 /* forwards */ 1173 static Bool sanity_check_redir_tt_tc ( void ); 1174 1175 static Bool sanity_check_sector_search_order ( void ) 1176 { 1177 Int i, j, nListed; 1178 /* assert the array is the right size */ 1179 vg_assert(N_SECTORS == (sizeof(sector_search_order) 1180 / sizeof(sector_search_order[0]))); 1181 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */ 1182 for (i = 0; i < N_SECTORS; i++) { 1183 if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS) 1184 break; 1185 } 1186 nListed = i; 1187 for (/* */; i < N_SECTORS; i++) { 1188 if (sector_search_order[i] != -1) 1189 break; 1190 } 1191 if (i != N_SECTORS) 1192 return False; 1193 /* Check each sector number only appears once */ 1194 for (i = 0; i < N_SECTORS; i++) { 1195 if (sector_search_order[i] == -1) 1196 continue; 1197 for (j = i+1; j < N_SECTORS; j++) { 1198 if (sector_search_order[j] == sector_search_order[i]) 1199 return False; 1200 } 1201 } 1202 /* Check that the number of listed sectors equals the number 1203 in use, by counting nListed back down. */ 1204 for (i = 0; i < N_SECTORS; i++) { 1205 if (sectors[i].tc != NULL) 1206 nListed--; 1207 } 1208 if (nListed != 0) 1209 return False; 1210 return True; 1211 } 1212 1213 static Bool sanity_check_all_sectors ( void ) 1214 { 1215 Int sno; 1216 Bool sane; 1217 Sector* sec; 1218 for (sno = 0; sno < N_SECTORS; sno++) { 1219 sec = §ors[sno]; 1220 if (sec->tc == NULL) 1221 continue; 1222 sane = sanity_check_eclasses_in_sector( sec ); 1223 if (!sane) 1224 return False; 1225 } 1226 if ( !sanity_check_redir_tt_tc() ) 1227 return False; 1228 if ( !sanity_check_sector_search_order() ) 1229 return False; 1230 return True; 1231 } 1232 1233 1234 1235 /*-------------------------------------------------------------*/ 1236 /*--- Add/find translations ---*/ 1237 /*-------------------------------------------------------------*/ 1238 1239 static UInt vge_osize ( VexGuestExtents* vge ) 1240 { 1241 UInt i, n = 0; 1242 for (i = 0; i < vge->n_used; i++) 1243 n += (UInt)vge->len[i]; 1244 return n; 1245 } 1246 1247 static Bool isValidSector ( Int sector ) 1248 { 1249 if (sector < 0 || sector >= N_SECTORS) 1250 return False; 1251 return True; 1252 } 1253 1254 static inline UInt HASH_TT ( Addr64 key ) 1255 { 1256 UInt kHi = (UInt)(key >> 32); 1257 UInt kLo = (UInt)key; 1258 UInt k32 = kHi ^ kLo; 1259 UInt ror = 7; 1260 if (ror > 0) 1261 k32 = (k32 >> ror) | (k32 << (32-ror)); 1262 return k32 % N_TTES_PER_SECTOR; 1263 } 1264 1265 static void setFastCacheEntry ( Addr64 key, ULong* tcptr ) 1266 { 1267 UInt cno = (UInt)VG_TT_FAST_HASH(key); 1268 VG_(tt_fast)[cno].guest = (Addr)key; 1269 VG_(tt_fast)[cno].host = (Addr)tcptr; 1270 n_fast_updates++; 1271 /* This shouldn't fail. It should be assured by m_translate 1272 which should reject any attempt to make translation of code 1273 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 1274 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 1275 } 1276 1277 /* Invalidate the fast cache VG_(tt_fast). */ 1278 static void invalidateFastCache ( void ) 1279 { 1280 UInt j; 1281 /* This loop is popular enough to make it worth unrolling a 1282 bit, at least on ppc32. */ 1283 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 1284 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 1285 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1286 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1287 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1288 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1289 } 1290 1291 vg_assert(j == VG_TT_FAST_SIZE); 1292 n_fast_flushes++; 1293 } 1294 1295 static void initialiseSector ( Int sno ) 1296 { 1297 Int i; 1298 SysRes sres; 1299 Sector* sec; 1300 vg_assert(isValidSector(sno)); 1301 1302 { Bool sane = sanity_check_sector_search_order(); 1303 vg_assert(sane); 1304 } 1305 sec = §ors[sno]; 1306 1307 if (sec->tc == NULL) { 1308 1309 /* Sector has never been used before. Need to allocate tt and 1310 tc. */ 1311 vg_assert(sec->tt == NULL); 1312 vg_assert(sec->tc_next == NULL); 1313 vg_assert(sec->tt_n_inuse == 0); 1314 for (i = 0; i < ECLASS_N; i++) { 1315 vg_assert(sec->ec2tte_size[i] == 0); 1316 vg_assert(sec->ec2tte_used[i] == 0); 1317 vg_assert(sec->ec2tte[i] == NULL); 1318 } 1319 vg_assert(sec->host_extents == NULL); 1320 1321 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 1322 1323 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 1324 if (sr_isError(sres)) { 1325 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 1326 8 * tc_sector_szQ ); 1327 /*NOTREACHED*/ 1328 } 1329 sec->tc = (ULong*)(AddrH)sr_Res(sres); 1330 1331 sres = VG_(am_mmap_anon_float_valgrind) 1332 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1333 if (sr_isError(sres)) { 1334 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 1335 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1336 /*NOTREACHED*/ 1337 } 1338 sec->tt = (TTEntry*)(AddrH)sr_Res(sres); 1339 1340 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1341 sec->tt[i].status = Empty; 1342 sec->tt[i].n_tte2ec = 0; 1343 } 1344 1345 /* Set up the host_extents array. */ 1346 sec->host_extents 1347 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)", 1348 ttaux_free, 1349 sizeof(HostExtent)); 1350 1351 /* Add an entry in the sector_search_order */ 1352 for (i = 0; i < N_SECTORS; i++) { 1353 if (sector_search_order[i] == -1) 1354 break; 1355 } 1356 vg_assert(i >= 0 && i < N_SECTORS); 1357 sector_search_order[i] = sno; 1358 1359 if (VG_(clo_verbosity) > 2) 1360 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 1361 1362 } else { 1363 1364 /* Sector has been used before. Dump the old contents. */ 1365 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 1366 vg_assert(sec->tt != NULL); 1367 vg_assert(sec->tc_next != NULL); 1368 n_dump_count += sec->tt_n_inuse; 1369 1370 VexArch vex_arch = VexArch_INVALID; 1371 VG_(machine_get_VexArchInfo)( &vex_arch, NULL ); 1372 1373 /* Visit each just-about-to-be-abandoned translation. */ 1374 if (0) VG_(printf)("QQQ unlink-entire-sector: %d START\n", sno); 1375 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1376 if (sec->tt[i].status == InUse) { 1377 vg_assert(sec->tt[i].n_tte2ec >= 1); 1378 vg_assert(sec->tt[i].n_tte2ec <= 3); 1379 n_dump_osize += vge_osize(&sec->tt[i].vge); 1380 /* Tell the tool too. */ 1381 if (VG_(needs).superblock_discards) { 1382 VG_TDICT_CALL( tool_discard_superblock_info, 1383 sec->tt[i].entry, 1384 sec->tt[i].vge ); 1385 } 1386 unchain_in_preparation_for_deletion(vex_arch, sno, i); 1387 } else { 1388 vg_assert(sec->tt[i].n_tte2ec == 0); 1389 } 1390 sec->tt[i].status = Empty; 1391 sec->tt[i].n_tte2ec = 0; 1392 } 1393 if (0) VG_(printf)("QQQ unlink-entire-sector: %d END\n", sno); 1394 1395 /* Free up the eclass structures. */ 1396 for (i = 0; i < ECLASS_N; i++) { 1397 if (sec->ec2tte_size[i] == 0) { 1398 vg_assert(sec->ec2tte_used[i] == 0); 1399 vg_assert(sec->ec2tte[i] == NULL); 1400 } else { 1401 vg_assert(sec->ec2tte[i] != NULL); 1402 ttaux_free(sec->ec2tte[i]); 1403 sec->ec2tte[i] = NULL; 1404 sec->ec2tte_size[i] = 0; 1405 sec->ec2tte_used[i] = 0; 1406 } 1407 } 1408 1409 /* Empty out the host extents array. */ 1410 vg_assert(sec->host_extents != NULL); 1411 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents)); 1412 vg_assert(VG_(sizeXA)(sec->host_extents) == 0); 1413 1414 /* Sanity check: ensure it is already in 1415 sector_search_order[]. */ 1416 for (i = 0; i < N_SECTORS; i++) { 1417 if (sector_search_order[i] == sno) 1418 break; 1419 } 1420 vg_assert(i >= 0 && i < N_SECTORS); 1421 1422 if (VG_(clo_verbosity) > 2) 1423 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 1424 } 1425 1426 sec->tc_next = sec->tc; 1427 sec->tt_n_inuse = 0; 1428 1429 invalidateFastCache(); 1430 1431 { Bool sane = sanity_check_sector_search_order(); 1432 vg_assert(sane); 1433 } 1434 } 1435 1436 1437 /* Add a translation of vge to TT/TC. The translation is temporarily 1438 in code[0 .. code_len-1]. 1439 1440 pre: youngest_sector points to a valid (although possibly full) 1441 sector. 1442 */ 1443 void VG_(add_to_transtab)( VexGuestExtents* vge, 1444 Addr64 entry, 1445 AddrH code, 1446 UInt code_len, 1447 Bool is_self_checking, 1448 Int offs_profInc, 1449 UInt n_guest_instrs, 1450 VexArch arch_host ) 1451 { 1452 Int tcAvailQ, reqdQ, y, i; 1453 ULong *tcptr, *tcptr2; 1454 UChar* srcP; 1455 UChar* dstP; 1456 1457 vg_assert(init_done); 1458 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1459 1460 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 1461 vg_assert(code_len > 0 && code_len < 60000); 1462 1463 /* Generally stay sane */ 1464 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */ 1465 1466 if (0) 1467 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 1468 entry, code_len); 1469 1470 n_in_count++; 1471 n_in_tsize += code_len; 1472 n_in_osize += vge_osize(vge); 1473 if (is_self_checking) 1474 n_in_sc_count++; 1475 1476 y = youngest_sector; 1477 vg_assert(isValidSector(y)); 1478 1479 if (sectors[y].tc == NULL) 1480 initialiseSector(y); 1481 1482 /* Try putting the translation in this sector. */ 1483 reqdQ = (code_len + 7) >> 3; 1484 1485 /* Will it fit in tc? */ 1486 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1487 - ((ULong*)(sectors[y].tc_next)); 1488 vg_assert(tcAvailQ >= 0); 1489 vg_assert(tcAvailQ <= tc_sector_szQ); 1490 1491 if (tcAvailQ < reqdQ 1492 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 1493 /* No. So move on to the next sector. Either it's never been 1494 used before, in which case it will get its tt/tc allocated 1495 now, or it has been used before, in which case it is set to be 1496 empty, hence throwing out the oldest sector. */ 1497 vg_assert(tc_sector_szQ > 0); 1498 VG_(debugLog)(1,"transtab", 1499 "declare sector %d full " 1500 "(TT loading %2d%%, TC loading %2d%%)\n", 1501 y, 1502 (100 * sectors[y].tt_n_inuse) 1503 / N_TTES_PER_SECTOR, 1504 (100 * (tc_sector_szQ - tcAvailQ)) 1505 / tc_sector_szQ); 1506 youngest_sector++; 1507 if (youngest_sector >= N_SECTORS) 1508 youngest_sector = 0; 1509 y = youngest_sector; 1510 initialiseSector(y); 1511 } 1512 1513 /* Be sure ... */ 1514 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1515 - ((ULong*)(sectors[y].tc_next)); 1516 vg_assert(tcAvailQ >= 0); 1517 vg_assert(tcAvailQ <= tc_sector_szQ); 1518 vg_assert(tcAvailQ >= reqdQ); 1519 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 1520 vg_assert(sectors[y].tt_n_inuse >= 0); 1521 1522 /* Copy into tc. */ 1523 tcptr = sectors[y].tc_next; 1524 vg_assert(tcptr >= §ors[y].tc[0]); 1525 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 1526 1527 dstP = (UChar*)tcptr; 1528 srcP = (UChar*)code; 1529 VG_(memcpy)(dstP, srcP, code_len); 1530 sectors[y].tc_next += reqdQ; 1531 sectors[y].tt_n_inuse++; 1532 1533 /* more paranoia */ 1534 tcptr2 = sectors[y].tc_next; 1535 vg_assert(tcptr2 >= §ors[y].tc[0]); 1536 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1537 1538 /* Find an empty tt slot, and use it. There must be such a slot 1539 since tt is never allowed to get completely full. */ 1540 i = HASH_TT(entry); 1541 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 1542 while (True) { 1543 if (sectors[y].tt[i].status == Empty 1544 || sectors[y].tt[i].status == Deleted) 1545 break; 1546 i++; 1547 if (i >= N_TTES_PER_SECTOR) 1548 i = 0; 1549 } 1550 1551 TTEntry__init(§ors[y].tt[i]); 1552 sectors[y].tt[i].status = InUse; 1553 sectors[y].tt[i].tcptr = tcptr; 1554 sectors[y].tt[i].count = 0; 1555 sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs; 1556 sectors[y].tt[i].vge = *vge; 1557 sectors[y].tt[i].entry = entry; 1558 1559 /* Patch in the profile counter location, if necessary. */ 1560 if (offs_profInc != -1) { 1561 vg_assert(offs_profInc >= 0 && offs_profInc < code_len); 1562 VexInvalRange vir 1563 = LibVEX_PatchProfInc( arch_host, 1564 dstP + offs_profInc, 1565 §ors[y].tt[i].count ); 1566 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 1567 } 1568 1569 VG_(invalidate_icache)( dstP, code_len ); 1570 1571 /* Add this entry to the host_extents map, checking that we're 1572 adding in order. */ 1573 { HostExtent hx; 1574 hx.start = (UChar*)tcptr; 1575 hx.len = code_len; 1576 hx.tteNo = i; 1577 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */ 1578 XArray* hx_array = sectors[y].host_extents; 1579 vg_assert(hx_array); 1580 Word n = VG_(sizeXA)(hx_array); 1581 if (n > 0) { 1582 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1); 1583 vg_assert(hx_prev->start + hx_prev->len <= hx.start); 1584 } 1585 VG_(addToXA)(hx_array, &hx); 1586 } 1587 1588 /* Update the fast-cache. */ 1589 setFastCacheEntry( entry, tcptr ); 1590 1591 /* Note the eclass numbers for this translation. */ 1592 upd_eclasses_after_add( §ors[y], i ); 1593 } 1594 1595 1596 /* Search for the translation of the given guest address. If 1597 requested, a successful search can also cause the fast-caches to be 1598 updated. 1599 */ 1600 Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode, 1601 /*OUT*/UInt* res_sNo, 1602 /*OUT*/UInt* res_tteNo, 1603 Addr64 guest_addr, 1604 Bool upd_cache ) 1605 { 1606 Int i, j, k, kstart, sno; 1607 1608 vg_assert(init_done); 1609 /* Find the initial probe point just once. It will be the same in 1610 all sectors and avoids multiple expensive % operations. */ 1611 n_full_lookups++; 1612 k = -1; 1613 kstart = HASH_TT(guest_addr); 1614 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 1615 1616 /* Search in all the sectors,using sector_search_order[] as a 1617 heuristic guide as to what order to visit the sectors. */ 1618 for (i = 0; i < N_SECTORS; i++) { 1619 1620 sno = sector_search_order[i]; 1621 if (UNLIKELY(sno == -1)) 1622 return False; /* run out of sectors to search */ 1623 1624 k = kstart; 1625 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 1626 n_lookup_probes++; 1627 if (sectors[sno].tt[k].status == InUse 1628 && sectors[sno].tt[k].entry == guest_addr) { 1629 /* found it */ 1630 if (upd_cache) 1631 setFastCacheEntry( 1632 guest_addr, sectors[sno].tt[k].tcptr ); 1633 if (res_hcode) 1634 *res_hcode = (AddrH)sectors[sno].tt[k].tcptr; 1635 if (res_sNo) 1636 *res_sNo = sno; 1637 if (res_tteNo) 1638 *res_tteNo = k; 1639 /* pull this one one step closer to the front. For large 1640 apps this more or less halves the number of required 1641 probes. */ 1642 if (i > 0) { 1643 Int tmp = sector_search_order[i-1]; 1644 sector_search_order[i-1] = sector_search_order[i]; 1645 sector_search_order[i] = tmp; 1646 } 1647 return True; 1648 } 1649 if (sectors[sno].tt[k].status == Empty) 1650 break; /* not found in this sector */ 1651 k++; 1652 if (k == N_TTES_PER_SECTOR) 1653 k = 0; 1654 } 1655 1656 /* If we fall off the end, all entries are InUse and not 1657 matching, or Deleted. In any case we did not find it in this 1658 sector. */ 1659 } 1660 1661 /* Not found in any sector. */ 1662 return False; 1663 } 1664 1665 1666 /*-------------------------------------------------------------*/ 1667 /*--- Delete translations. ---*/ 1668 /*-------------------------------------------------------------*/ 1669 1670 /* forward */ 1671 static void unredir_discard_translations( Addr64, ULong ); 1672 1673 /* Stuff for deleting translations which intersect with a given 1674 address range. Unfortunately, to make this run at a reasonable 1675 speed, it is complex. */ 1676 1677 static inline 1678 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 1679 { 1680 Addr64 e1 = s1 + r1 - 1ULL; 1681 Addr64 e2 = s2 + r2 - 1ULL; 1682 if (e1 < s2 || e2 < s1) 1683 return False; 1684 return True; 1685 } 1686 1687 static inline 1688 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 1689 { 1690 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 1691 return True; 1692 if (vge->n_used < 2) 1693 return False; 1694 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 1695 return True; 1696 if (vge->n_used < 3) 1697 return False; 1698 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 1699 return True; 1700 return False; 1701 } 1702 1703 1704 /* Delete a tt entry, and update all the eclass data accordingly. */ 1705 1706 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno, 1707 VexArch vex_arch ) 1708 { 1709 Int i, ec_num, ec_idx; 1710 TTEntry* tte; 1711 1712 /* sec and secNo are mutually redundant; cross-check. */ 1713 vg_assert(sec == §ors[secNo]); 1714 1715 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1716 tte = &sec->tt[tteno]; 1717 vg_assert(tte->status == InUse); 1718 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1719 1720 /* Unchain .. */ 1721 unchain_in_preparation_for_deletion(vex_arch, secNo, tteno); 1722 1723 /* Deal with the ec-to-tte links first. */ 1724 for (i = 0; i < tte->n_tte2ec; i++) { 1725 ec_num = (Int)tte->tte2ec_ec[i]; 1726 ec_idx = tte->tte2ec_ix[i]; 1727 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1728 vg_assert(ec_idx >= 0); 1729 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1730 /* Assert that the two links point at each other. */ 1731 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1732 /* "delete" the pointer back to here. */ 1733 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1734 } 1735 1736 /* Now fix up this TTEntry. */ 1737 tte->status = Deleted; 1738 tte->n_tte2ec = 0; 1739 1740 /* Stats .. */ 1741 sec->tt_n_inuse--; 1742 n_disc_count++; 1743 n_disc_osize += vge_osize(&tte->vge); 1744 1745 /* Tell the tool too. */ 1746 if (VG_(needs).superblock_discards) { 1747 VG_TDICT_CALL( tool_discard_superblock_info, 1748 tte->entry, 1749 tte->vge ); 1750 } 1751 } 1752 1753 1754 /* Delete translations from sec which intersect specified range, but 1755 only consider translations in the specified eclass. */ 1756 1757 static 1758 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo, 1759 Addr64 guest_start, ULong range, 1760 Int ec, 1761 VexArch vex_arch ) 1762 { 1763 Int i; 1764 UShort tteno; 1765 Bool anyDeld = False; 1766 TTEntry* tte; 1767 1768 vg_assert(ec >= 0 && ec < ECLASS_N); 1769 1770 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1771 1772 tteno = sec->ec2tte[ec][i]; 1773 if (tteno == EC2TTE_DELETED) { 1774 /* already deleted */ 1775 continue; 1776 } 1777 1778 vg_assert(tteno < N_TTES_PER_SECTOR); 1779 1780 tte = &sec->tt[tteno]; 1781 vg_assert(tte->status == InUse); 1782 1783 if (overlaps( guest_start, range, &tte->vge )) { 1784 anyDeld = True; 1785 delete_tte( sec, secNo, (Int)tteno, vex_arch ); 1786 } 1787 1788 } 1789 1790 return anyDeld; 1791 } 1792 1793 1794 /* Delete translations from sec which intersect specified range, the 1795 slow way, by inspecting all translations in sec. */ 1796 1797 static 1798 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo, 1799 Addr64 guest_start, ULong range, 1800 VexArch vex_arch ) 1801 { 1802 Int i; 1803 Bool anyDeld = False; 1804 1805 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1806 if (sec->tt[i].status == InUse 1807 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1808 anyDeld = True; 1809 delete_tte( sec, secNo, i, vex_arch ); 1810 } 1811 } 1812 1813 return anyDeld; 1814 } 1815 1816 1817 void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1818 HChar* who ) 1819 { 1820 Sector* sec; 1821 Int sno, ec; 1822 Bool anyDeleted = False; 1823 1824 vg_assert(init_done); 1825 1826 VG_(debugLog)(2, "transtab", 1827 "discard_translations(0x%llx, %lld) req by %s\n", 1828 guest_start, range, who ); 1829 1830 /* Pre-deletion sanity check */ 1831 if (VG_(clo_sanity_level >= 4)) { 1832 Bool sane = sanity_check_all_sectors(); 1833 vg_assert(sane); 1834 } 1835 1836 if (range == 0) 1837 return; 1838 1839 VexArch vex_arch = VexArch_INVALID; 1840 VG_(machine_get_VexArchInfo)( &vex_arch, NULL ); 1841 1842 /* There are two different ways to do this. 1843 1844 If the range fits within a single address-range equivalence 1845 class, as will be the case for a cache line sized invalidation, 1846 then we only have to inspect the set of translations listed in 1847 that equivalence class, and also in the "sin-bin" equivalence 1848 class ECLASS_MISC. 1849 1850 Otherwise, the invalidation is of a larger range and probably 1851 results from munmap. In this case it's (probably!) faster just 1852 to inspect all translations, dump those we don't want, and 1853 regenerate the equivalence class information (since modifying it 1854 in-situ is even more expensive). 1855 */ 1856 1857 /* First off, figure out if the range falls within a single class, 1858 and if so which one. */ 1859 1860 ec = ECLASS_MISC; 1861 if (range < (1ULL << ECLASS_SHIFT)) 1862 ec = range_to_eclass( guest_start, (UInt)range ); 1863 1864 /* if ec is ECLASS_MISC then we aren't looking at just a single 1865 class, so use the slow scheme. Else use the fast scheme, 1866 examining 'ec' and ECLASS_MISC. */ 1867 1868 if (ec != ECLASS_MISC) { 1869 1870 VG_(debugLog)(2, "transtab", 1871 " FAST, ec = %d\n", ec); 1872 1873 /* Fast scheme */ 1874 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1875 1876 for (sno = 0; sno < N_SECTORS; sno++) { 1877 sec = §ors[sno]; 1878 if (sec->tc == NULL) 1879 continue; 1880 anyDeleted |= delete_translations_in_sector_eclass( 1881 sec, sno, guest_start, range, ec, 1882 vex_arch 1883 ); 1884 anyDeleted |= delete_translations_in_sector_eclass( 1885 sec, sno, guest_start, range, ECLASS_MISC, 1886 vex_arch 1887 ); 1888 } 1889 1890 } else { 1891 1892 /* slow scheme */ 1893 1894 VG_(debugLog)(2, "transtab", 1895 " SLOW, ec = %d\n", ec); 1896 1897 for (sno = 0; sno < N_SECTORS; sno++) { 1898 sec = §ors[sno]; 1899 if (sec->tc == NULL) 1900 continue; 1901 anyDeleted |= delete_translations_in_sector( 1902 sec, sno, guest_start, range, vex_arch ); 1903 } 1904 1905 } 1906 1907 if (anyDeleted) 1908 invalidateFastCache(); 1909 1910 /* don't forget the no-redir cache */ 1911 unredir_discard_translations( guest_start, range ); 1912 1913 /* Post-deletion sanity check */ 1914 if (VG_(clo_sanity_level >= 4)) { 1915 Int i; 1916 TTEntry* tte; 1917 Bool sane = sanity_check_all_sectors(); 1918 vg_assert(sane); 1919 /* But now, also check the requested address range isn't 1920 present anywhere. */ 1921 for (sno = 0; sno < N_SECTORS; sno++) { 1922 sec = §ors[sno]; 1923 if (sec->tc == NULL) 1924 continue; 1925 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1926 tte = &sec->tt[i]; 1927 if (tte->status != InUse) 1928 continue; 1929 vg_assert(!overlaps( guest_start, range, &tte->vge )); 1930 } 1931 } 1932 } 1933 } 1934 1935 1936 /*------------------------------------------------------------*/ 1937 /*--- AUXILIARY: the unredirected TT/TC ---*/ 1938 /*------------------------------------------------------------*/ 1939 1940 /* A very simple translation cache which holds a small number of 1941 unredirected translations. This is completely independent of the 1942 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 1943 both structures are simply dumped and we start over. 1944 1945 Since these translations are unredirected, the search key is (by 1946 definition) the first address entry in the .vge field. */ 1947 1948 /* Sized to hold 500 translations of average size 1000 bytes. */ 1949 1950 #define UNREDIR_SZB 1000 1951 1952 #define N_UNREDIR_TT 500 1953 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 1954 1955 typedef 1956 struct { 1957 VexGuestExtents vge; 1958 Addr hcode; 1959 Bool inUse; 1960 } 1961 UTCEntry; 1962 1963 /* We just allocate forwards in _tc, never deleting. */ 1964 static ULong *unredir_tc; 1965 static Int unredir_tc_used = N_UNREDIR_TCQ; 1966 1967 /* Slots in _tt can come into use and out again (.inUse). 1968 Nevertheless _tt_highwater is maintained so that invalidations 1969 don't have to scan all the slots when only a few are in use. 1970 _tt_highwater holds the index of the highest ever allocated 1971 slot. */ 1972 static UTCEntry unredir_tt[N_UNREDIR_TT]; 1973 static Int unredir_tt_highwater; 1974 1975 1976 static void init_unredir_tt_tc ( void ) 1977 { 1978 Int i; 1979 if (unredir_tc == NULL) { 1980 SysRes sres = VG_(am_mmap_anon_float_valgrind) 1981 ( N_UNREDIR_TT * UNREDIR_SZB ); 1982 if (sr_isError(sres)) { 1983 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 1984 N_UNREDIR_TT * UNREDIR_SZB); 1985 /*NOTREACHED*/ 1986 } 1987 unredir_tc = (ULong *)(AddrH)sr_Res(sres); 1988 } 1989 unredir_tc_used = 0; 1990 for (i = 0; i < N_UNREDIR_TT; i++) 1991 unredir_tt[i].inUse = False; 1992 unredir_tt_highwater = -1; 1993 } 1994 1995 /* Do a sanity check; return False on failure. */ 1996 static Bool sanity_check_redir_tt_tc ( void ) 1997 { 1998 Int i; 1999 if (unredir_tt_highwater < -1) return False; 2000 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 2001 2002 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 2003 if (unredir_tt[i].inUse) 2004 return False; 2005 2006 if (unredir_tc_used < 0) return False; 2007 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 2008 2009 return True; 2010 } 2011 2012 2013 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation 2014 is temporarily in code[0 .. code_len-1]. 2015 */ 2016 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 2017 Addr64 entry, 2018 AddrH code, 2019 UInt code_len ) 2020 { 2021 Int i, j, code_szQ; 2022 HChar *srcP, *dstP; 2023 2024 vg_assert(sanity_check_redir_tt_tc()); 2025 2026 /* This is the whole point: it's not redirected! */ 2027 vg_assert(entry == vge->base[0]); 2028 2029 /* How many unredir_tt slots are needed */ 2030 code_szQ = (code_len + 7) / 8; 2031 2032 /* Look for an empty unredir_tc slot */ 2033 for (i = 0; i < N_UNREDIR_TT; i++) 2034 if (!unredir_tt[i].inUse) 2035 break; 2036 2037 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 2038 /* It's full; dump everything we currently have */ 2039 init_unredir_tt_tc(); 2040 i = 0; 2041 } 2042 2043 vg_assert(unredir_tc_used >= 0); 2044 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2045 vg_assert(code_szQ > 0); 2046 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 2047 vg_assert(i >= 0 && i < N_UNREDIR_TT); 2048 vg_assert(unredir_tt[i].inUse == False); 2049 2050 if (i > unredir_tt_highwater) 2051 unredir_tt_highwater = i; 2052 2053 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 2054 srcP = (HChar*)code; 2055 for (j = 0; j < code_len; j++) 2056 dstP[j] = srcP[j]; 2057 2058 VG_(invalidate_icache)( dstP, code_len ); 2059 2060 unredir_tt[i].inUse = True; 2061 unredir_tt[i].vge = *vge; 2062 unredir_tt[i].hcode = (Addr)dstP; 2063 2064 unredir_tc_used += code_szQ; 2065 vg_assert(unredir_tc_used >= 0); 2066 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2067 2068 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 2069 } 2070 2071 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 2072 Addr64 guest_addr ) 2073 { 2074 Int i; 2075 for (i = 0; i < N_UNREDIR_TT; i++) { 2076 if (!unredir_tt[i].inUse) 2077 continue; 2078 if (unredir_tt[i].vge.base[0] == guest_addr) { 2079 *result = (AddrH)unredir_tt[i].hcode; 2080 return True; 2081 } 2082 } 2083 return False; 2084 } 2085 2086 static void unredir_discard_translations( Addr64 guest_start, ULong range ) 2087 { 2088 Int i; 2089 2090 vg_assert(sanity_check_redir_tt_tc()); 2091 2092 for (i = 0; i <= unredir_tt_highwater; i++) { 2093 if (unredir_tt[i].inUse 2094 && overlaps( guest_start, range, &unredir_tt[i].vge)) 2095 unredir_tt[i].inUse = False; 2096 } 2097 } 2098 2099 2100 /*------------------------------------------------------------*/ 2101 /*--- Initialisation. ---*/ 2102 /*------------------------------------------------------------*/ 2103 2104 void VG_(init_tt_tc) ( void ) 2105 { 2106 Int i, j, avg_codeszQ; 2107 2108 vg_assert(!init_done); 2109 init_done = True; 2110 2111 /* Otherwise lots of things go wrong... */ 2112 vg_assert(sizeof(ULong) == 8); 2113 vg_assert(sizeof(Addr64) == 8); 2114 /* check fast cache entries really are 2 words long */ 2115 vg_assert(sizeof(Addr) == sizeof(void*)); 2116 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 2117 /* check fast cache entries are packed back-to-back with no spaces */ 2118 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 2119 /* check fast cache is aligned as we requested. Not fatal if it 2120 isn't, but we might as well make sure. */ 2121 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 2122 2123 if (VG_(clo_verbosity) > 2) 2124 VG_(message)(Vg_DebugMsg, 2125 "TT/TC: VG_(init_tt_tc) " 2126 "(startup of code management)\n"); 2127 2128 /* Figure out how big each tc area should be. */ 2129 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 2130 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 2131 2132 /* Ensure the calculated value is not way crazy. */ 2133 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 2134 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE); 2135 2136 /* Initialise the sectors */ 2137 youngest_sector = 0; 2138 for (i = 0; i < N_SECTORS; i++) { 2139 sectors[i].tc = NULL; 2140 sectors[i].tt = NULL; 2141 sectors[i].tc_next = NULL; 2142 sectors[i].tt_n_inuse = 0; 2143 for (j = 0; j < ECLASS_N; j++) { 2144 sectors[i].ec2tte_size[j] = 0; 2145 sectors[i].ec2tte_used[j] = 0; 2146 sectors[i].ec2tte[j] = NULL; 2147 } 2148 sectors[i].host_extents = NULL; 2149 } 2150 2151 /* Initialise the sector_search_order hint table. */ 2152 for (i = 0; i < N_SECTORS; i++) 2153 sector_search_order[i] = -1; 2154 2155 /* Initialise the fast cache. */ 2156 invalidateFastCache(); 2157 2158 /* and the unredir tt/tc */ 2159 init_unredir_tt_tc(); 2160 2161 if (VG_(clo_verbosity) > 2) { 2162 VG_(message)(Vg_DebugMsg, 2163 "TT/TC: cache: %d sectors of %d bytes each = %d total\n", 2164 N_SECTORS, 8 * tc_sector_szQ, 2165 N_SECTORS * 8 * tc_sector_szQ ); 2166 VG_(message)(Vg_DebugMsg, 2167 "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n", 2168 N_SECTORS * N_TTES_PER_SECTOR, 2169 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 2170 SECTOR_TT_LIMIT_PERCENT ); 2171 } 2172 2173 VG_(debugLog)(2, "transtab", 2174 "cache: %d sectors of %d bytes each = %d total\n", 2175 N_SECTORS, 8 * tc_sector_szQ, 2176 N_SECTORS * 8 * tc_sector_szQ ); 2177 VG_(debugLog)(2, "transtab", 2178 "table: %d total entries, max occupancy %d (%d%%)\n", 2179 N_SECTORS * N_TTES_PER_SECTOR, 2180 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 2181 SECTOR_TT_LIMIT_PERCENT ); 2182 } 2183 2184 2185 /*------------------------------------------------------------*/ 2186 /*--- Printing out statistics. ---*/ 2187 /*------------------------------------------------------------*/ 2188 2189 static ULong safe_idiv( ULong a, ULong b ) 2190 { 2191 return (b == 0 ? 0 : a / b); 2192 } 2193 2194 UInt VG_(get_bbs_translated) ( void ) 2195 { 2196 return n_in_count; 2197 } 2198 2199 void VG_(print_tt_tc_stats) ( void ) 2200 { 2201 VG_(message)(Vg_DebugMsg, 2202 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 2203 n_full_lookups, n_lookup_probes ); 2204 VG_(message)(Vg_DebugMsg, 2205 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 2206 n_fast_updates, n_fast_flushes ); 2207 2208 VG_(message)(Vg_DebugMsg, 2209 " transtab: new %'lld " 2210 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n", 2211 n_in_count, n_in_osize, n_in_tsize, 2212 safe_idiv(10*n_in_tsize, n_in_osize), 2213 n_in_sc_count); 2214 VG_(message)(Vg_DebugMsg, 2215 " transtab: dumped %'llu (%'llu -> ?" "?)\n", 2216 n_dump_count, n_dump_osize ); 2217 VG_(message)(Vg_DebugMsg, 2218 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 2219 n_disc_count, n_disc_osize ); 2220 2221 if (0) { 2222 Int i; 2223 VG_(printf)("\n"); 2224 for (i = 0; i < ECLASS_N; i++) { 2225 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 2226 if (i % 16 == 15) 2227 VG_(printf)("\n"); 2228 } 2229 VG_(printf)("\n\n"); 2230 } 2231 } 2232 2233 /*------------------------------------------------------------*/ 2234 /*--- Printing out of profiling results. ---*/ 2235 /*------------------------------------------------------------*/ 2236 2237 static ULong score ( TTEntry* tte ) 2238 { 2239 return ((ULong)tte->weight) * ((ULong)tte->count); 2240 } 2241 2242 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 2243 { 2244 Int sno, i, r, s; 2245 ULong score_total; 2246 2247 /* First, compute the total weighted count, and find the top N 2248 ttes. tops contains pointers to the most-used n_tops blocks, in 2249 descending order (viz, tops[0] is the highest scorer). */ 2250 for (i = 0; i < n_tops; i++) { 2251 tops[i].addr = 0; 2252 tops[i].score = 0; 2253 } 2254 2255 score_total = 0; 2256 2257 for (sno = 0; sno < N_SECTORS; sno++) { 2258 if (sectors[sno].tc == NULL) 2259 continue; 2260 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2261 if (sectors[sno].tt[i].status != InUse) 2262 continue; 2263 score_total += score(§ors[sno].tt[i]); 2264 /* Find the rank for sectors[sno].tt[i]. */ 2265 r = n_tops-1; 2266 while (True) { 2267 if (r == -1) 2268 break; 2269 if (tops[r].addr == 0) { 2270 r--; 2271 continue; 2272 } 2273 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 2274 r--; 2275 continue; 2276 } 2277 break; 2278 } 2279 r++; 2280 vg_assert(r >= 0 && r <= n_tops); 2281 /* This bb should be placed at r, and bbs above it shifted 2282 upwards one slot. */ 2283 if (r < n_tops) { 2284 for (s = n_tops-1; s > r; s--) 2285 tops[s] = tops[s-1]; 2286 tops[r].addr = sectors[sno].tt[i].entry; 2287 tops[r].score = score( §ors[sno].tt[i] ); 2288 } 2289 } 2290 } 2291 2292 return score_total; 2293 } 2294 2295 /*--------------------------------------------------------------------*/ 2296 /*--- end ---*/ 2297 /*--------------------------------------------------------------------*/ 2298