1 2 /*--------------------------------------------------------------------*/ 3 /*--- Management of the translation table and cache. ---*/ 4 /*--- m_transtab.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2013 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_core_basics.h" 33 #include "pub_core_debuglog.h" 34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo) 35 #include "pub_core_libcbase.h" 36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh 37 #include "pub_core_libcproc.h" // VG_(invalidate_icache) 38 #include "pub_core_libcassert.h" 39 #include "pub_core_libcprint.h" 40 #include "pub_core_options.h" 41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 42 #include "pub_core_transtab.h" 43 #include "pub_core_aspacemgr.h" 44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 45 #include "pub_core_xarray.h" 46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses 47 48 49 #define DEBUG_TRANSTAB 0 50 51 52 /*-------------------------------------------------------------*/ 53 /*--- Management of the FIFO-based translation table+cache. ---*/ 54 /*-------------------------------------------------------------*/ 55 56 /* Nr of sectors provided via command line parameter. */ 57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT; 58 /* Nr of sectors. 59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */ 60 static UInt n_sectors = 0; 61 62 /*------------------ CONSTANTS ------------------*/ 63 /* Number of TC entries in each sector. This needs to be a prime 64 number to work properly, it must be <= 65535 (so that a TT index 65 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 66 'deleted') and it is strongly recommended not to change this. 67 65521 is the largest prime <= 65535. */ 68 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521 69 70 /* Because each sector contains a hash table of TTEntries, we need to 71 specify the maximum allowable loading, after which the sector is 72 deemed full. */ 73 #define SECTOR_TT_LIMIT_PERCENT 65 74 75 /* The sector is deemed full when this many entries are in it. */ 76 #define N_TTES_PER_SECTOR_USABLE \ 77 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 78 79 /* Equivalence classes for fast address range deletion. There are 1 + 80 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 81 address range which does not fall cleanly within any specific bin. 82 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 83 #define ECLASS_SHIFT 11 84 #define ECLASS_WIDTH 8 85 #define ECLASS_MISC (1 << ECLASS_WIDTH) 86 #define ECLASS_N (1 + ECLASS_MISC) 87 88 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 89 90 91 /*------------------ TYPES ------------------*/ 92 93 /* In edges ("to-me") in the graph created by chaining. */ 94 typedef 95 struct { 96 UInt from_sNo; /* sector number */ 97 UInt from_tteNo; /* TTE number in given sector */ 98 UInt from_offs; /* code offset from TCEntry::tcptr where the patch is */ 99 Bool to_fastEP; /* Is the patch to a fast or slow entry point? */ 100 } 101 InEdge; 102 103 104 /* Out edges ("from-me") in the graph created by chaining. */ 105 typedef 106 struct { 107 UInt to_sNo; /* sector number */ 108 UInt to_tteNo; /* TTE number in given sector */ 109 UInt from_offs; /* code offset in owning translation where patch is */ 110 } 111 OutEdge; 112 113 114 #define N_FIXED_IN_EDGE_ARR 3 115 typedef 116 struct { 117 UInt n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */ 118 InEdge fixed[N_FIXED_IN_EDGE_ARR]; 119 XArray* var; /* XArray* of InEdgeArr */ 120 } 121 InEdgeArr; 122 123 #define N_FIXED_OUT_EDGE_ARR 2 124 typedef 125 struct { 126 UInt n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */ 127 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; 128 XArray* var; /* XArray* of OutEdgeArr */ 129 } 130 OutEdgeArr; 131 132 133 /* A translation-table entry. This indicates precisely which areas of 134 guest code are included in the translation, and contains all other 135 auxiliary info too. */ 136 typedef 137 struct { 138 /* Profiling only: the count and weight (arbitrary meaning) for 139 this translation. Weight is a property of the translation 140 itself and computed once when the translation is created. 141 Count is an entry count for the translation and is 142 incremented by 1 every time the translation is used, if we 143 are profiling. */ 144 ULong count; 145 UShort weight; 146 147 /* Status of the slot. Note, we need to be able to do lazy 148 deletion, hence the Deleted state. */ 149 enum { InUse, Deleted, Empty } status; 150 151 /* 64-bit aligned pointer to one or more 64-bit words containing 152 the corresponding host code (must be in the same sector!) 153 This is a pointer into the sector's tc (code) area. */ 154 ULong* tcptr; 155 156 /* This is the original guest address that purportedly is the 157 entry point of the translation. You might think that .entry 158 should be the same as .vge->base[0], and most of the time it 159 is. However, when doing redirections, that is not the case. 160 .vge must always correctly describe the guest code sections 161 from which this translation was made. However, .entry may or 162 may not be a lie, depending on whether or not we're doing 163 redirection. */ 164 Addr64 entry; 165 166 /* This structure describes precisely what ranges of guest code 167 the translation covers, so we can decide whether or not to 168 delete it when translations of a given address range are 169 invalidated. */ 170 VexGuestExtents vge; 171 172 /* Address range summary info: these are pointers back to 173 eclass[] entries in the containing Sector. Those entries in 174 turn point back here -- the two structures are mutually 175 redundant but both necessary to make fast deletions work. 176 The eclass info is similar to, and derived from, this entry's 177 'vge' field, but it is not the same */ 178 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 179 UShort tte2ec_ec[3]; // for each, the eclass # 180 UInt tte2ec_ix[3]; // and the index within the eclass. 181 // for i in 0 .. n_tte2ec-1 182 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 183 // should be the index 184 // of this TTEntry in the containing Sector's tt array. 185 186 /* Admin information for chaining. 'in_edges' is a set of the 187 patch points which jump to this translation -- hence are 188 predecessors in the control flow graph. 'out_edges' points 189 to successors in the control flow graph -- translations to 190 which this one has a patched jump. In short these are just 191 backwards and forwards edges in the graph of patched-together 192 blocks. The 'in_edges' contain slightly more info, enough 193 that we can undo the chaining of each mentioned patch point. 194 The 'out_edges' list exists only so that we can visit the 195 'in_edges' entries of all blocks we're patched through to, in 196 order to remove ourselves from then when we're deleted. */ 197 198 /* A translation can disappear for two reasons: 199 1. erased (as part of the oldest sector cleanup) when the 200 youngest sector is full. 201 2. discarded due to calls to VG_(discard_translations). 202 VG_(discard_translations) sets the status of the 203 translation to 'Deleted'. 204 A.o., the gdbserver discards one or more translations 205 when a breakpoint is inserted or removed at an Addr, 206 or when single stepping mode is enabled/disabled 207 or when a translation is instrumented for gdbserver 208 (all the target jumps of this translation are 209 invalidated). 210 211 So, it is possible that the translation A to be patched 212 (to obtain a patched jump from A to B) is invalidated 213 after B is translated and before A is patched. 214 In case a translation is erased or discarded, the patching 215 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode 216 are checking the 'from' translation still exists before 217 doing the patching. 218 219 Is it safe to erase or discard the current translation E being 220 executed ? Amazing, but yes, it is safe. 221 Here is the explanation: 222 223 The translation E being executed can only be erased if a new 224 translation N is being done. A new translation is done only 225 if the host addr is a not yet patched jump to another 226 translation. In such a case, the guest address of N is 227 assigned to the PC in the VEX state. Control is returned 228 to the scheduler. N will be translated. This can erase the 229 translation E (in case of sector full). VG_(tt_tc_do_chaining) 230 will not do the chaining to a non found translation E. 231 The execution will continue at the current guest PC 232 (i.e. the translation N). 233 => it is safe to erase the current translation being executed. 234 235 The current translation E being executed can also be discarded 236 (e.g. by gdbserver). VG_(discard_translations) will mark 237 this translation E as Deleted, but the translation itself 238 is not erased. In particular, its host code can only 239 be overwritten or erased in case a new translation is done. 240 A new translation will only be done if a not yet translated 241 jump is to be executed. The execution of the Deleted translation 242 E will continue till a non patched jump is encountered. 243 This situation is then similar to the 'erasing' case above : 244 the current translation E can be erased or overwritten, as the 245 execution will continue at the new translation N. 246 247 */ 248 249 /* It is possible, although very unlikely, that a block A has 250 more than one patched jump to block B. This could happen if 251 (eg) A finishes "jcond B; jmp B". 252 253 This means in turn that B's in_edges set can list A more than 254 once (twice in this example). However, each such entry must 255 have a different from_offs, since a patched jump can only 256 jump to one place at once (it's meaningless for it to have 257 multiple destinations.) IOW, the successor and predecessor 258 edges in the graph are not uniquely determined by a 259 TTEntry --> TTEntry pair, but rather by a 260 (TTEntry,offset) --> TTEntry triple. 261 262 If A has multiple edges to B then B will mention A multiple 263 times in its in_edges. To make things simpler, we then 264 require that A mentions B exactly the same number of times in 265 its out_edges. Furthermore, a matching out-in pair must have 266 the same offset (from_offs). This facilitates sanity 267 checking, and it facilitates establishing the invariant that 268 a out_edges set may not have duplicates when using the 269 equality defined by (TTEntry,offset). Hence the out_edges 270 and in_edges sets really do have both have set semantics. 271 272 eg if A has been patched to B at offsets 42 and 87 (in A) 273 then A.out_edges = { (B,42), (B,87) } (in any order) 274 and B.in_edges = { (A,42), (A,87) } (in any order) 275 276 Hence for each node pair P->Q in the graph, there's a 1:1 277 mapping between P.out_edges and Q.in_edges. 278 */ 279 InEdgeArr in_edges; 280 OutEdgeArr out_edges; 281 } 282 TTEntry; 283 284 285 /* A structure used for mapping host code addresses back to the 286 relevant TTEntry. Used when doing chaining, for finding the 287 TTEntry to which some arbitrary patch address belongs. */ 288 typedef 289 struct { 290 UChar* start; 291 UInt len; 292 UInt tteNo; 293 } 294 HostExtent; 295 296 /* Finally, a sector itself. Each sector contains an array of 297 TCEntries, which hold code, and an array of TTEntries, containing 298 all required administrative info. Profiling is supported using the 299 TTEntry .count and .weight fields, if required. 300 301 If the sector is not in use, all three pointers are NULL and 302 tt_n_inuse is zero. 303 */ 304 typedef 305 struct { 306 /* The TCEntry area. Size of this depends on the average 307 translation size. We try and size it so it becomes full 308 precisely when this sector's translation table (tt) reaches 309 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 310 ULong* tc; 311 312 /* The TTEntry array. This is a fixed size, always containing 313 exactly N_TTES_PER_SECTOR entries. */ 314 TTEntry* tt; 315 316 /* This points to the current allocation point in tc. */ 317 ULong* tc_next; 318 319 /* The count of tt entries with state InUse. */ 320 Int tt_n_inuse; 321 322 /* Expandable arrays of tt indices for each of the ECLASS_N 323 address range equivalence classes. These hold indices into 324 the containing sector's tt array, which in turn should point 325 back here. */ 326 Int ec2tte_size[ECLASS_N]; 327 Int ec2tte_used[ECLASS_N]; 328 UShort* ec2tte[ECLASS_N]; 329 330 /* The host extents. The [start, +len) ranges are constructed 331 in strictly non-overlapping order, so we can binary search 332 them at any time. */ 333 XArray* host_extents; /* XArray* of HostExtent */ 334 } 335 Sector; 336 337 338 /*------------------ DECLS ------------------*/ 339 340 /* The root data structure is an array of sectors. The index of the 341 youngest sector is recorded, and new translations are put into that 342 sector. When it fills up, we move along to the next sector and 343 start to fill that up, wrapping around at the end of the array. 344 That way, once all N_TC_SECTORS have been bought into use for the 345 first time, and are full, we then re-use the oldest sector, 346 endlessly. 347 348 When running, youngest sector should be between >= 0 and < 349 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 350 not yet initialised. 351 */ 352 static Sector sectors[MAX_N_SECTORS]; 353 static Int youngest_sector = -1; 354 355 /* The number of ULongs in each TCEntry area. This is computed once 356 at startup and does not change. */ 357 static Int tc_sector_szQ = 0; 358 359 360 /* A list of sector numbers, in the order which they should be 361 searched to find translations. This is an optimisation to be used 362 when searching for translations and should not affect 363 correctness. -1 denotes "no entry". */ 364 static Int sector_search_order[MAX_N_SECTORS]; 365 366 367 /* Fast helper for the TC. A direct-mapped cache which holds a set of 368 recently used (guest address, host address) pairs. This array is 369 referred to directly from m_dispatch/dispatch-<platform>.S. 370 371 Entries in tt_fast may refer to any valid TC entry, regardless of 372 which sector it's in. Consequently we must be very careful to 373 invalidate this cache when TC entries are changed or disappear. 374 375 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 376 pointed at to cause that cache entry to miss. This relies on the 377 assumption that no guest code actually has that address, hence a 378 value 0x1 seems good. m_translate gives the client a synthetic 379 segfault if it tries to execute at this address. 380 */ 381 /* 382 typedef 383 struct { 384 Addr guest; 385 Addr host; 386 } 387 FastCacheEntry; 388 */ 389 /*global*/ __attribute__((aligned(16))) 390 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 391 392 /* Make sure we're not used before initialisation. */ 393 static Bool init_done = False; 394 395 396 /*------------------ STATS DECLS ------------------*/ 397 398 /* Number of fast-cache updates and flushes done. */ 399 static ULong n_fast_flushes = 0; 400 static ULong n_fast_updates = 0; 401 402 /* Number of full lookups done. */ 403 static ULong n_full_lookups = 0; 404 static ULong n_lookup_probes = 0; 405 406 /* Number/osize/tsize of translations entered; also the number of 407 those for which self-checking was requested. */ 408 static ULong n_in_count = 0; 409 static ULong n_in_osize = 0; 410 static ULong n_in_tsize = 0; 411 static ULong n_in_sc_count = 0; 412 413 /* Number/osize of translations discarded due to lack of space. */ 414 static ULong n_dump_count = 0; 415 static ULong n_dump_osize = 0; 416 417 /* Number/osize of translations discarded due to requests to do so. */ 418 static ULong n_disc_count = 0; 419 static ULong n_disc_osize = 0; 420 421 422 /*-------------------------------------------------------------*/ 423 /*--- Misc ---*/ 424 /*-------------------------------------------------------------*/ 425 426 static void* ttaux_malloc ( const HChar* tag, SizeT n ) 427 { 428 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n); 429 } 430 431 static void ttaux_free ( void* p ) 432 { 433 VG_(arena_free)(VG_AR_TTAUX, p); 434 } 435 436 437 /*-------------------------------------------------------------*/ 438 /*--- Chaining support ---*/ 439 /*-------------------------------------------------------------*/ 440 441 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo ) 442 { 443 vg_assert(sNo < n_sectors); 444 vg_assert(tteNo < N_TTES_PER_SECTOR); 445 Sector* s = §ors[sNo]; 446 vg_assert(s->tt); 447 TTEntry* tte = &s->tt[tteNo]; 448 vg_assert(tte->status == InUse); 449 return tte; 450 } 451 452 static void InEdge__init ( InEdge* ie ) 453 { 454 ie->from_sNo = -1; /* invalid */ 455 ie->from_tteNo = 0; 456 ie->from_offs = 0; 457 ie->to_fastEP = False; 458 } 459 460 static void OutEdge__init ( OutEdge* oe ) 461 { 462 oe->to_sNo = -1; /* invalid */ 463 oe->to_tteNo = 0; 464 oe->from_offs = 0; 465 } 466 467 static void TTEntry__init ( TTEntry* tte ) 468 { 469 VG_(memset)(tte, 0, sizeof(*tte)); 470 } 471 472 static UWord InEdgeArr__size ( InEdgeArr* iea ) 473 { 474 if (iea->var) { 475 vg_assert(iea->n_fixed == 0); 476 return VG_(sizeXA)(iea->var); 477 } else { 478 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 479 return iea->n_fixed; 480 } 481 } 482 483 static void InEdgeArr__makeEmpty ( InEdgeArr* iea ) 484 { 485 if (iea->var) { 486 vg_assert(iea->n_fixed == 0); 487 VG_(deleteXA)(iea->var); 488 iea->var = NULL; 489 } else { 490 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 491 iea->n_fixed = 0; 492 } 493 } 494 495 static 496 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i ) 497 { 498 if (iea->var) { 499 vg_assert(iea->n_fixed == 0); 500 return (InEdge*)VG_(indexXA)(iea->var, i); 501 } else { 502 vg_assert(i < iea->n_fixed); 503 return &iea->fixed[i]; 504 } 505 } 506 507 static 508 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i ) 509 { 510 if (iea->var) { 511 vg_assert(iea->n_fixed == 0); 512 VG_(removeIndexXA)(iea->var, i); 513 } else { 514 vg_assert(i < iea->n_fixed); 515 for (; i+1 < iea->n_fixed; i++) { 516 iea->fixed[i] = iea->fixed[i+1]; 517 } 518 iea->n_fixed--; 519 } 520 } 521 522 static 523 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie ) 524 { 525 if (iea->var) { 526 vg_assert(iea->n_fixed == 0); 527 VG_(addToXA)(iea->var, ie); 528 } else { 529 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 530 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) { 531 /* The fixed array is full, so we have to initialise an 532 XArray and copy the fixed array into it. */ 533 iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add", 534 ttaux_free, 535 sizeof(InEdge)); 536 UWord i; 537 for (i = 0; i < iea->n_fixed; i++) { 538 VG_(addToXA)(iea->var, &iea->fixed[i]); 539 } 540 VG_(addToXA)(iea->var, ie); 541 iea->n_fixed = 0; 542 } else { 543 /* Just add to the fixed array. */ 544 iea->fixed[iea->n_fixed++] = *ie; 545 } 546 } 547 } 548 549 static UWord OutEdgeArr__size ( OutEdgeArr* oea ) 550 { 551 if (oea->var) { 552 vg_assert(oea->n_fixed == 0); 553 return VG_(sizeXA)(oea->var); 554 } else { 555 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 556 return oea->n_fixed; 557 } 558 } 559 560 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea ) 561 { 562 if (oea->var) { 563 vg_assert(oea->n_fixed == 0); 564 VG_(deleteXA)(oea->var); 565 oea->var = NULL; 566 } else { 567 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 568 oea->n_fixed = 0; 569 } 570 } 571 572 static 573 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i ) 574 { 575 if (oea->var) { 576 vg_assert(oea->n_fixed == 0); 577 return (OutEdge*)VG_(indexXA)(oea->var, i); 578 } else { 579 vg_assert(i < oea->n_fixed); 580 return &oea->fixed[i]; 581 } 582 } 583 584 static 585 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i ) 586 { 587 if (oea->var) { 588 vg_assert(oea->n_fixed == 0); 589 VG_(removeIndexXA)(oea->var, i); 590 } else { 591 vg_assert(i < oea->n_fixed); 592 for (; i+1 < oea->n_fixed; i++) { 593 oea->fixed[i] = oea->fixed[i+1]; 594 } 595 oea->n_fixed--; 596 } 597 } 598 599 static 600 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe ) 601 { 602 if (oea->var) { 603 vg_assert(oea->n_fixed == 0); 604 VG_(addToXA)(oea->var, oe); 605 } else { 606 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 607 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) { 608 /* The fixed array is full, so we have to initialise an 609 XArray and copy the fixed array into it. */ 610 oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add", 611 ttaux_free, 612 sizeof(OutEdge)); 613 UWord i; 614 for (i = 0; i < oea->n_fixed; i++) { 615 VG_(addToXA)(oea->var, &oea->fixed[i]); 616 } 617 VG_(addToXA)(oea->var, oe); 618 oea->n_fixed = 0; 619 } else { 620 /* Just add to the fixed array. */ 621 oea->fixed[oea->n_fixed++] = *oe; 622 } 623 } 624 } 625 626 static 627 Int HostExtent__cmpOrd ( const void* v1, const void* v2 ) 628 { 629 const HostExtent* hx1 = v1; 630 const HostExtent* hx2 = v2; 631 if (hx1->start + hx1->len <= hx2->start) return -1; 632 if (hx2->start + hx2->len <= hx1->start) return 1; 633 return 0; /* partial overlap */ 634 } 635 636 /* True if hx is a dead host extent, i.e. corresponds to host code 637 of an entry that was invalidated. */ 638 static 639 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec) 640 { 641 const UInt tteNo = hx->tteNo; 642 #define LDEBUG(m) if (DEBUG_TRANSTAB) \ 643 VG_(printf) (m \ 644 " start 0x%p len %u sector %d ttslot %u" \ 645 " tt.entry 0x%llu tt.tcptr 0x%p\n", \ 646 hx->start, hx->len, (int)(sec - sectors), \ 647 hx->tteNo, \ 648 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr) 649 650 /* Entry might have been invalidated and not re-used yet.*/ 651 if (sec->tt[tteNo].status == Deleted) { 652 LDEBUG("found deleted entry"); 653 return True; 654 } 655 /* Maybe we found this entry via a host_extents which was 656 inserted for an entry which was changed to Deleted then 657 re-used after. If this entry was re-used, then its tcptr 658 is >= to host_extents start (i.e. the previous tcptr) + len. 659 This is the case as there is no re-use of host code: a new 660 entry or re-used entry always gets "higher value" host code. */ 661 if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) { 662 LDEBUG("found re-used entry"); 663 return True; 664 } 665 666 return False; 667 #undef LDEBUG 668 } 669 670 static __attribute__((noinline)) 671 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo, 672 /*OUT*/UInt* from_tteNo, 673 void* hcode ) 674 { 675 Int i; 676 677 /* Search order logic copied from VG_(search_transtab). */ 678 for (i = 0; i < n_sectors; i++) { 679 Int sno = sector_search_order[i]; 680 if (UNLIKELY(sno == -1)) 681 return False; /* run out of sectors to search */ 682 683 Sector* sec = §ors[sno]; 684 XArray* /* of HostExtent */ host_extents = sec->host_extents; 685 vg_assert(host_extents); 686 687 HostExtent key; 688 VG_(memset)(&key, 0, sizeof(key)); 689 key.start = hcode; 690 key.len = 1; 691 Word firstW = -1, lastW = -1; 692 Bool found = VG_(lookupXA_UNSAFE)( 693 host_extents, &key, &firstW, &lastW, 694 HostExtent__cmpOrd ); 695 vg_assert(firstW == lastW); // always true, even if not found 696 if (found) { 697 HostExtent* hx = VG_(indexXA)(host_extents, firstW); 698 UInt tteNo = hx->tteNo; 699 /* Do some additional sanity checks. */ 700 vg_assert(tteNo <= N_TTES_PER_SECTOR); 701 702 /* if this hx entry corresponds to dead host code, we must 703 tell this code has not been found, as it cannot be patched. */ 704 if (HostExtent__is_dead (hx, sec)) 705 return False; 706 707 vg_assert(sec->tt[tteNo].status == InUse); 708 /* Can only half check that the found TTEntry contains hcode, 709 due to not having a length value for the hcode in the 710 TTEntry. */ 711 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode); 712 /* Looks plausible */ 713 *from_sNo = sno; 714 *from_tteNo = (UInt)tteNo; 715 return True; 716 } 717 } 718 return False; 719 } 720 721 722 /* Figure out whether or not hcode is jitted code present in the main 723 code cache (but not in the no-redir cache). Used for sanity 724 checking. */ 725 static Bool is_in_the_main_TC ( void* hcode ) 726 { 727 Int i, sno; 728 for (i = 0; i < n_sectors; i++) { 729 sno = sector_search_order[i]; 730 if (sno == -1) 731 break; /* run out of sectors to search */ 732 if ((UChar*)hcode >= (UChar*)sectors[sno].tc 733 && (UChar*)hcode <= (UChar*)sectors[sno].tc_next 734 + sizeof(ULong) - 1) 735 return True; 736 } 737 return False; 738 } 739 740 741 /* Fulfill a chaining request, and record admin info so we 742 can undo it later, if required. 743 */ 744 void VG_(tt_tc_do_chaining) ( void* from__patch_addr, 745 UInt to_sNo, 746 UInt to_tteNo, 747 Bool to_fastEP ) 748 { 749 /* Get the CPU info established at startup. */ 750 VexArch vex_arch = VexArch_INVALID; 751 VG_(machine_get_VexArchInfo)( &vex_arch, NULL ); 752 753 // host_code is where we're patching to. So it needs to 754 // take into account, whether we're jumping to the slow 755 // or fast entry point. By definition, the fast entry point 756 // is exactly one event check's worth of code along from 757 // the slow (tcptr) entry point. 758 TTEntry* to_tte = index_tte(to_sNo, to_tteNo); 759 void* host_code = ((UChar*)to_tte->tcptr) 760 + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0); 761 762 // stay sane -- the patch point (dst) is in this sector's code cache 763 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc ); 764 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next 765 + sizeof(ULong) - 1 ); 766 767 /* Find the TTEntry for the from__ code. This isn't simple since 768 we only know the patch address, which is going to be somewhere 769 inside the from_ block. */ 770 UInt from_sNo = (UInt)-1; 771 UInt from_tteNo = (UInt)-1; 772 Bool from_found 773 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo, 774 from__patch_addr ); 775 if (!from_found) { 776 // The from code might have been discarded due to sector re-use 777 // or marked Deleted due to translation invalidation. 778 // In such a case, don't do the chaining. 779 VG_(debugLog)(1,"transtab", 780 "host code %p not found (discarded? sector recycled?)" 781 " => no chaining done\n", 782 from__patch_addr); 783 return; 784 } 785 786 TTEntry* from_tte = index_tte(from_sNo, from_tteNo); 787 788 /* Get VEX to do the patching itself. We have to hand it off 789 since it is host-dependent. */ 790 VexInvalRange vir 791 = LibVEX_Chain( 792 vex_arch, 793 from__patch_addr, 794 VG_(fnptr_to_fnentry)( 795 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 796 : &VG_(disp_cp_chain_me_to_slowEP)), 797 (void*)host_code 798 ); 799 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 800 801 /* Now do the tricky bit -- update the ch_succs and ch_preds info 802 for the two translations involved, so we can undo the chaining 803 later, which we will have to do if the to_ block gets removed 804 for whatever reason. */ 805 806 /* This is the new from_ -> to_ link to add. */ 807 InEdge ie; 808 InEdge__init(&ie); 809 ie.from_sNo = from_sNo; 810 ie.from_tteNo = from_tteNo; 811 ie.to_fastEP = to_fastEP; 812 HWord from_offs = (HWord)( (UChar*)from__patch_addr 813 - (UChar*)from_tte->tcptr ); 814 vg_assert(from_offs < 100000/* let's say */); 815 ie.from_offs = (UInt)from_offs; 816 817 /* This is the new to_ -> from_ backlink to add. */ 818 OutEdge oe; 819 OutEdge__init(&oe); 820 oe.to_sNo = to_sNo; 821 oe.to_tteNo = to_tteNo; 822 oe.from_offs = (UInt)from_offs; 823 824 /* Add .. */ 825 InEdgeArr__add(&to_tte->in_edges, &ie); 826 OutEdgeArr__add(&from_tte->out_edges, &oe); 827 } 828 829 830 /* Unchain one patch, as described by the specified InEdge. For 831 sanity check purposes only (to check that the patched location is 832 as expected) it also requires the fast and slow entry point 833 addresses of the destination block (that is, the block that owns 834 this InEdge). */ 835 __attribute__((noinline)) 836 static void unchain_one ( VexArch vex_arch, 837 InEdge* ie, 838 void* to_fastEPaddr, void* to_slowEPaddr ) 839 { 840 vg_assert(ie); 841 TTEntry* tte 842 = index_tte(ie->from_sNo, ie->from_tteNo); 843 UChar* place_to_patch 844 = ((UChar*)tte->tcptr) + ie->from_offs; 845 UChar* disp_cp_chain_me 846 = VG_(fnptr_to_fnentry)( 847 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 848 : &VG_(disp_cp_chain_me_to_slowEP) 849 ); 850 UChar* place_to_jump_to_EXPECTED 851 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr; 852 853 // stay sane: both src and dst for this unchaining are 854 // in the main code cache 855 vg_assert( is_in_the_main_TC(place_to_patch) ); // src 856 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst 857 // dst check is ok because LibVEX_UnChain checks that 858 // place_to_jump_to_EXPECTED really is the current dst, and 859 // asserts if it isn't. 860 VexInvalRange vir 861 = LibVEX_UnChain( vex_arch, place_to_patch, 862 place_to_jump_to_EXPECTED, disp_cp_chain_me ); 863 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 864 } 865 866 867 /* The specified block is about to be deleted. Update the preds and 868 succs of its associated blocks accordingly. This includes undoing 869 any chained jumps to this block. */ 870 static 871 void unchain_in_preparation_for_deletion ( VexArch vex_arch, 872 UInt here_sNo, UInt here_tteNo ) 873 { 874 if (DEBUG_TRANSTAB) 875 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo); 876 UWord i, j, n, m; 877 Int evCheckSzB = LibVEX_evCheckSzB(vex_arch); 878 TTEntry* here_tte = index_tte(here_sNo, here_tteNo); 879 if (DEBUG_TRANSTAB) 880 VG_(printf)("... QQQ tt.entry 0x%llu tt.tcptr 0x%p\n", 881 here_tte->entry, here_tte->tcptr); 882 vg_assert(here_tte->status == InUse); 883 884 /* Visit all InEdges owned by here_tte. */ 885 n = InEdgeArr__size(&here_tte->in_edges); 886 for (i = 0; i < n; i++) { 887 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i); 888 // Undo the chaining. 889 UChar* here_slow_EP = (UChar*)here_tte->tcptr; 890 UChar* here_fast_EP = here_slow_EP + evCheckSzB; 891 unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP); 892 // Find the corresponding entry in the "from" node's out_edges, 893 // and remove it. 894 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo); 895 m = OutEdgeArr__size(&from_tte->out_edges); 896 vg_assert(m > 0); // it must have at least one entry 897 for (j = 0; j < m; j++) { 898 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j); 899 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo 900 && oe->from_offs == ie->from_offs) 901 break; 902 } 903 vg_assert(j < m); // "oe must be findable" 904 OutEdgeArr__deleteIndex(&from_tte->out_edges, j); 905 } 906 907 /* Visit all OutEdges owned by here_tte. */ 908 n = OutEdgeArr__size(&here_tte->out_edges); 909 for (i = 0; i < n; i++) { 910 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i); 911 // Find the corresponding entry in the "to" node's in_edges, 912 // and remove it. 913 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo); 914 m = InEdgeArr__size(&to_tte->in_edges); 915 vg_assert(m > 0); // it must have at least one entry 916 for (j = 0; j < m; j++) { 917 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j); 918 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo 919 && ie->from_offs == oe->from_offs) 920 break; 921 } 922 vg_assert(j < m); // "ie must be findable" 923 InEdgeArr__deleteIndex(&to_tte->in_edges, j); 924 } 925 926 InEdgeArr__makeEmpty(&here_tte->in_edges); 927 OutEdgeArr__makeEmpty(&here_tte->out_edges); 928 } 929 930 931 /*-------------------------------------------------------------*/ 932 /*--- Address-range equivalence class stuff ---*/ 933 /*-------------------------------------------------------------*/ 934 935 /* Return equivalence class number for a range. */ 936 937 static Int range_to_eclass ( Addr64 start, UInt len ) 938 { 939 UInt mask = (1 << ECLASS_WIDTH) - 1; 940 UInt lo = (UInt)start; 941 UInt hi = lo + len - 1; 942 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 943 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 944 if (loBits == hiBits) { 945 vg_assert(loBits < ECLASS_N-1); 946 return loBits; 947 } else { 948 return ECLASS_MISC; 949 } 950 } 951 952 953 /* Calculates the equivalence class numbers for any VexGuestExtent. 954 These are written in *eclasses, which must be big enough to hold 3 955 Ints. The number written, between 1 and 3, is returned. The 956 eclasses are presented in order, and any duplicates are removed. 957 */ 958 959 static 960 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 961 VexGuestExtents* vge ) 962 { 963 # define SWAP(_lv1,_lv2) \ 964 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 965 966 Int i, j, n_ec, r; 967 968 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 969 970 n_ec = 0; 971 for (i = 0; i < vge->n_used; i++) { 972 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 973 if (r == ECLASS_MISC) 974 goto bad; 975 /* only add if we haven't already seen it */ 976 for (j = 0; j < n_ec; j++) 977 if (eclasses[j] == r) 978 break; 979 if (j == n_ec) 980 eclasses[n_ec++] = r; 981 } 982 983 if (n_ec == 1) 984 return 1; 985 986 if (n_ec == 2) { 987 /* sort */ 988 if (eclasses[0] > eclasses[1]) 989 SWAP(eclasses[0], eclasses[1]); 990 return 2; 991 } 992 993 if (n_ec == 3) { 994 /* sort */ 995 if (eclasses[0] > eclasses[2]) 996 SWAP(eclasses[0], eclasses[2]); 997 if (eclasses[0] > eclasses[1]) 998 SWAP(eclasses[0], eclasses[1]); 999 if (eclasses[1] > eclasses[2]) 1000 SWAP(eclasses[1], eclasses[2]); 1001 return 3; 1002 } 1003 1004 /* NOTREACHED */ 1005 vg_assert(0); 1006 1007 bad: 1008 eclasses[0] = ECLASS_MISC; 1009 return 1; 1010 1011 # undef SWAP 1012 } 1013 1014 1015 /* Add tteno to the set of entries listed for equivalence class ec in 1016 this sector. Returns used location in eclass array. */ 1017 1018 static 1019 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 1020 { 1021 Int old_sz, new_sz, i, r; 1022 UShort *old_ar, *new_ar; 1023 1024 vg_assert(ec >= 0 && ec < ECLASS_N); 1025 vg_assert(tteno < N_TTES_PER_SECTOR); 1026 1027 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 1028 1029 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 1030 1031 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 1032 1033 old_sz = sec->ec2tte_size[ec]; 1034 old_ar = sec->ec2tte[ec]; 1035 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 1036 new_ar = ttaux_malloc("transtab.aECN.1", 1037 new_sz * sizeof(UShort)); 1038 for (i = 0; i < old_sz; i++) 1039 new_ar[i] = old_ar[i]; 1040 if (old_ar) 1041 ttaux_free(old_ar); 1042 sec->ec2tte_size[ec] = new_sz; 1043 sec->ec2tte[ec] = new_ar; 1044 1045 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 1046 } 1047 1048 /* Common case */ 1049 r = sec->ec2tte_used[ec]++; 1050 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 1051 sec->ec2tte[ec][r] = tteno; 1052 return (UInt)r; 1053 } 1054 1055 1056 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 1057 eclass entries to 'sec'. */ 1058 1059 static 1060 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 1061 { 1062 Int i, r, eclasses[3]; 1063 TTEntry* tte; 1064 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1065 1066 tte = &sec->tt[tteno]; 1067 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 1068 1069 vg_assert(r >= 1 && r <= 3); 1070 tte->n_tte2ec = r; 1071 1072 for (i = 0; i < r; i++) { 1073 tte->tte2ec_ec[i] = eclasses[i]; 1074 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 1075 } 1076 } 1077 1078 1079 /* Check the eclass info in 'sec' to ensure it is consistent. Returns 1080 True if OK, False if something's not right. Expensive. */ 1081 1082 static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 1083 { 1084 # define BAD(_str) do { whassup = (_str); goto bad; } while (0) 1085 1086 const HChar* whassup = NULL; 1087 Int i, j, k, n, ec_num, ec_idx; 1088 TTEntry* tte; 1089 UShort tteno; 1090 ULong* tce; 1091 1092 /* Basic checks on this sector */ 1093 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 1094 BAD("invalid sec->tt_n_inuse"); 1095 tce = sec->tc_next; 1096 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 1097 BAD("sec->tc_next points outside tc"); 1098 1099 /* For each eclass ... */ 1100 for (i = 0; i < ECLASS_N; i++) { 1101 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 1102 BAD("ec2tte_size/ec2tte mismatch(1)"); 1103 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 1104 BAD("ec2tte_size/ec2tte mismatch(2)"); 1105 if (sec->ec2tte_used[i] < 0 1106 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 1107 BAD("implausible ec2tte_used"); 1108 if (sec->ec2tte_used[i] == 0) 1109 continue; 1110 1111 /* For each tt reference in each eclass .. ensure the reference 1112 is to a valid tt entry, and that the entry's address ranges 1113 really include this eclass. */ 1114 1115 for (j = 0; j < sec->ec2tte_used[i]; j++) { 1116 tteno = sec->ec2tte[i][j]; 1117 if (tteno == EC2TTE_DELETED) 1118 continue; 1119 if (tteno >= N_TTES_PER_SECTOR) 1120 BAD("implausible tteno"); 1121 tte = &sec->tt[tteno]; 1122 if (tte->status != InUse) 1123 BAD("tteno points to non-inuse tte"); 1124 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1125 BAD("tte->n_tte2ec out of range"); 1126 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 1127 must equal i. Inspect tte's eclass info. */ 1128 n = 0; 1129 for (k = 0; k < tte->n_tte2ec; k++) { 1130 if (k < tte->n_tte2ec-1 1131 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 1132 BAD("tte->tte2ec_ec[..] out of order"); 1133 ec_num = tte->tte2ec_ec[k]; 1134 if (ec_num < 0 || ec_num >= ECLASS_N) 1135 BAD("tte->tte2ec_ec[..] out of range"); 1136 if (ec_num != i) 1137 continue; 1138 ec_idx = tte->tte2ec_ix[k]; 1139 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 1140 BAD("tte->tte2ec_ix[..] out of range"); 1141 if (ec_idx == j) 1142 n++; 1143 } 1144 if (n != 1) 1145 BAD("tteno does not point back at eclass"); 1146 } 1147 } 1148 1149 /* That establishes that for each forward pointer from TTEntrys 1150 there is a corresponding backward pointer from the eclass[] 1151 arrays. However, it doesn't rule out the possibility of other, 1152 bogus pointers in the eclass[] arrays. So do those similarly: 1153 scan through them and check the TTEntryies they point at point 1154 back. */ 1155 1156 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 1157 1158 tte = &sec->tt[i]; 1159 if (tte->status == Empty || tte->status == Deleted) { 1160 if (tte->n_tte2ec != 0) 1161 BAD("tte->n_eclasses nonzero for unused tte"); 1162 continue; 1163 } 1164 1165 vg_assert(tte->status == InUse); 1166 1167 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1168 BAD("tte->n_eclasses out of range(2)"); 1169 1170 for (j = 0; j < tte->n_tte2ec; j++) { 1171 ec_num = tte->tte2ec_ec[j]; 1172 if (ec_num < 0 || ec_num >= ECLASS_N) 1173 BAD("tte->eclass[..] out of range"); 1174 ec_idx = tte->tte2ec_ix[j]; 1175 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 1176 BAD("tte->ec_idx[..] out of range(2)"); 1177 if (sec->ec2tte[ec_num][ec_idx] != i) 1178 BAD("ec2tte does not point back to tte"); 1179 } 1180 } 1181 1182 return True; 1183 1184 bad: 1185 if (whassup) 1186 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 1187 1188 # if 0 1189 VG_(printf)("eclass = %d\n", i); 1190 VG_(printf)("tteno = %d\n", (Int)tteno); 1191 switch (tte->status) { 1192 case InUse: VG_(printf)("InUse\n"); break; 1193 case Deleted: VG_(printf)("Deleted\n"); break; 1194 case Empty: VG_(printf)("Empty\n"); break; 1195 } 1196 if (tte->status != Empty) { 1197 for (k = 0; k < tte->vge.n_used; k++) 1198 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 1199 (Int)tte->vge.len[k]); 1200 } 1201 # endif 1202 1203 return False; 1204 1205 # undef BAD 1206 } 1207 1208 1209 /* Sanity check absolutely everything. True == check passed. */ 1210 1211 /* forwards */ 1212 static Bool sanity_check_redir_tt_tc ( void ); 1213 1214 static Bool sanity_check_sector_search_order ( void ) 1215 { 1216 Int i, j, nListed; 1217 /* assert the array is the right size */ 1218 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order) 1219 / sizeof(sector_search_order[0]))); 1220 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */ 1221 for (i = 0; i < n_sectors; i++) { 1222 if (sector_search_order[i] < 0 || sector_search_order[i] >= n_sectors) 1223 break; 1224 } 1225 nListed = i; 1226 for (/* */; i < n_sectors; i++) { 1227 if (sector_search_order[i] != -1) 1228 break; 1229 } 1230 if (i != n_sectors) 1231 return False; 1232 /* Check each sector number only appears once */ 1233 for (i = 0; i < n_sectors; i++) { 1234 if (sector_search_order[i] == -1) 1235 continue; 1236 for (j = i+1; j < n_sectors; j++) { 1237 if (sector_search_order[j] == sector_search_order[i]) 1238 return False; 1239 } 1240 } 1241 /* Check that the number of listed sectors equals the number 1242 in use, by counting nListed back down. */ 1243 for (i = 0; i < n_sectors; i++) { 1244 if (sectors[i].tc != NULL) 1245 nListed--; 1246 } 1247 if (nListed != 0) 1248 return False; 1249 return True; 1250 } 1251 1252 static Bool sanity_check_all_sectors ( void ) 1253 { 1254 Int sno; 1255 Bool sane; 1256 Sector* sec; 1257 for (sno = 0; sno < n_sectors; sno++) { 1258 Int i; 1259 Int nr_not_dead_hx = 0; 1260 Int szhxa; 1261 sec = §ors[sno]; 1262 if (sec->tc == NULL) 1263 continue; 1264 sane = sanity_check_eclasses_in_sector( sec ); 1265 if (!sane) 1266 return False; 1267 szhxa = VG_(sizeXA)(sec->host_extents); 1268 for (i = 0; i < szhxa; i++) { 1269 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i); 1270 if (!HostExtent__is_dead (hx, sec)) 1271 nr_not_dead_hx++; 1272 } 1273 if (nr_not_dead_hx != sec->tt_n_inuse) { 1274 VG_(debugLog)(0, "transtab", 1275 "nr_not_dead_hx %d sanity fail (expected == in use %d)\n", 1276 nr_not_dead_hx, sec->tt_n_inuse); 1277 return False; 1278 } 1279 } 1280 1281 if ( !sanity_check_redir_tt_tc() ) 1282 return False; 1283 if ( !sanity_check_sector_search_order() ) 1284 return False; 1285 return True; 1286 } 1287 1288 1289 1290 /*-------------------------------------------------------------*/ 1291 /*--- Add/find translations ---*/ 1292 /*-------------------------------------------------------------*/ 1293 1294 static UInt vge_osize ( VexGuestExtents* vge ) 1295 { 1296 UInt i, n = 0; 1297 for (i = 0; i < vge->n_used; i++) 1298 n += (UInt)vge->len[i]; 1299 return n; 1300 } 1301 1302 static Bool isValidSector ( Int sector ) 1303 { 1304 if (sector < 0 || sector >= n_sectors) 1305 return False; 1306 return True; 1307 } 1308 1309 static inline UInt HASH_TT ( Addr64 key ) 1310 { 1311 UInt kHi = (UInt)(key >> 32); 1312 UInt kLo = (UInt)key; 1313 UInt k32 = kHi ^ kLo; 1314 UInt ror = 7; 1315 if (ror > 0) 1316 k32 = (k32 >> ror) | (k32 << (32-ror)); 1317 return k32 % N_TTES_PER_SECTOR; 1318 } 1319 1320 static void setFastCacheEntry ( Addr64 key, ULong* tcptr ) 1321 { 1322 UInt cno = (UInt)VG_TT_FAST_HASH(key); 1323 VG_(tt_fast)[cno].guest = (Addr)key; 1324 VG_(tt_fast)[cno].host = (Addr)tcptr; 1325 n_fast_updates++; 1326 /* This shouldn't fail. It should be assured by m_translate 1327 which should reject any attempt to make translation of code 1328 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 1329 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 1330 } 1331 1332 /* Invalidate the fast cache VG_(tt_fast). */ 1333 static void invalidateFastCache ( void ) 1334 { 1335 UInt j; 1336 /* This loop is popular enough to make it worth unrolling a 1337 bit, at least on ppc32. */ 1338 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 1339 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 1340 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1341 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1342 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1343 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1344 } 1345 1346 vg_assert(j == VG_TT_FAST_SIZE); 1347 n_fast_flushes++; 1348 } 1349 1350 static void initialiseSector ( Int sno ) 1351 { 1352 Int i; 1353 SysRes sres; 1354 Sector* sec; 1355 vg_assert(isValidSector(sno)); 1356 1357 { Bool sane = sanity_check_sector_search_order(); 1358 vg_assert(sane); 1359 } 1360 sec = §ors[sno]; 1361 1362 if (sec->tc == NULL) { 1363 1364 /* Sector has never been used before. Need to allocate tt and 1365 tc. */ 1366 vg_assert(sec->tt == NULL); 1367 vg_assert(sec->tc_next == NULL); 1368 vg_assert(sec->tt_n_inuse == 0); 1369 for (i = 0; i < ECLASS_N; i++) { 1370 vg_assert(sec->ec2tte_size[i] == 0); 1371 vg_assert(sec->ec2tte_used[i] == 0); 1372 vg_assert(sec->ec2tte[i] == NULL); 1373 } 1374 vg_assert(sec->host_extents == NULL); 1375 1376 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 1377 if (VG_(clo_stats)) 1378 VG_(dmsg)("transtab: " "allocate sector %d\n", sno); 1379 1380 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 1381 if (sr_isError(sres)) { 1382 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 1383 8 * tc_sector_szQ ); 1384 /*NOTREACHED*/ 1385 } 1386 sec->tc = (ULong*)(AddrH)sr_Res(sres); 1387 1388 sres = VG_(am_mmap_anon_float_valgrind) 1389 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1390 if (sr_isError(sres)) { 1391 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 1392 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1393 /*NOTREACHED*/ 1394 } 1395 sec->tt = (TTEntry*)(AddrH)sr_Res(sres); 1396 1397 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1398 sec->tt[i].status = Empty; 1399 sec->tt[i].n_tte2ec = 0; 1400 } 1401 1402 /* Set up the host_extents array. */ 1403 sec->host_extents 1404 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)", 1405 ttaux_free, 1406 sizeof(HostExtent)); 1407 1408 /* Add an entry in the sector_search_order */ 1409 for (i = 0; i < n_sectors; i++) { 1410 if (sector_search_order[i] == -1) 1411 break; 1412 } 1413 vg_assert(i >= 0 && i < n_sectors); 1414 sector_search_order[i] = sno; 1415 1416 if (VG_(clo_verbosity) > 2) 1417 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 1418 1419 } else { 1420 1421 /* Sector has been used before. Dump the old contents. */ 1422 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 1423 if (VG_(clo_stats)) 1424 VG_(dmsg)("transtab: " "recycle sector %d\n", sno); 1425 1426 vg_assert(sec->tt != NULL); 1427 vg_assert(sec->tc_next != NULL); 1428 n_dump_count += sec->tt_n_inuse; 1429 1430 VexArch vex_arch = VexArch_INVALID; 1431 VG_(machine_get_VexArchInfo)( &vex_arch, NULL ); 1432 1433 /* Visit each just-about-to-be-abandoned translation. */ 1434 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n", 1435 sno); 1436 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1437 if (sec->tt[i].status == InUse) { 1438 vg_assert(sec->tt[i].n_tte2ec >= 1); 1439 vg_assert(sec->tt[i].n_tte2ec <= 3); 1440 n_dump_osize += vge_osize(&sec->tt[i].vge); 1441 /* Tell the tool too. */ 1442 if (VG_(needs).superblock_discards) { 1443 VG_TDICT_CALL( tool_discard_superblock_info, 1444 sec->tt[i].entry, 1445 sec->tt[i].vge ); 1446 } 1447 unchain_in_preparation_for_deletion(vex_arch, sno, i); 1448 } else { 1449 vg_assert(sec->tt[i].n_tte2ec == 0); 1450 } 1451 sec->tt[i].status = Empty; 1452 sec->tt[i].n_tte2ec = 0; 1453 } 1454 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n", 1455 sno); 1456 1457 /* Free up the eclass structures. */ 1458 for (i = 0; i < ECLASS_N; i++) { 1459 if (sec->ec2tte_size[i] == 0) { 1460 vg_assert(sec->ec2tte_used[i] == 0); 1461 vg_assert(sec->ec2tte[i] == NULL); 1462 } else { 1463 vg_assert(sec->ec2tte[i] != NULL); 1464 ttaux_free(sec->ec2tte[i]); 1465 sec->ec2tte[i] = NULL; 1466 sec->ec2tte_size[i] = 0; 1467 sec->ec2tte_used[i] = 0; 1468 } 1469 } 1470 1471 /* Empty out the host extents array. */ 1472 vg_assert(sec->host_extents != NULL); 1473 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents)); 1474 vg_assert(VG_(sizeXA)(sec->host_extents) == 0); 1475 1476 /* Sanity check: ensure it is already in 1477 sector_search_order[]. */ 1478 for (i = 0; i < n_sectors; i++) { 1479 if (sector_search_order[i] == sno) 1480 break; 1481 } 1482 vg_assert(i >= 0 && i < n_sectors); 1483 1484 if (VG_(clo_verbosity) > 2) 1485 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 1486 } 1487 1488 sec->tc_next = sec->tc; 1489 sec->tt_n_inuse = 0; 1490 1491 invalidateFastCache(); 1492 1493 { Bool sane = sanity_check_sector_search_order(); 1494 vg_assert(sane); 1495 } 1496 } 1497 1498 1499 /* Add a translation of vge to TT/TC. The translation is temporarily 1500 in code[0 .. code_len-1]. 1501 1502 pre: youngest_sector points to a valid (although possibly full) 1503 sector. 1504 */ 1505 void VG_(add_to_transtab)( VexGuestExtents* vge, 1506 Addr64 entry, 1507 AddrH code, 1508 UInt code_len, 1509 Bool is_self_checking, 1510 Int offs_profInc, 1511 UInt n_guest_instrs, 1512 VexArch arch_host ) 1513 { 1514 Int tcAvailQ, reqdQ, y, i; 1515 ULong *tcptr, *tcptr2; 1516 UChar* srcP; 1517 UChar* dstP; 1518 1519 vg_assert(init_done); 1520 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1521 1522 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 1523 vg_assert(code_len > 0 && code_len < 60000); 1524 1525 /* Generally stay sane */ 1526 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */ 1527 1528 if (DEBUG_TRANSTAB) 1529 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d) ...\n", 1530 entry, code_len); 1531 1532 n_in_count++; 1533 n_in_tsize += code_len; 1534 n_in_osize += vge_osize(vge); 1535 if (is_self_checking) 1536 n_in_sc_count++; 1537 1538 y = youngest_sector; 1539 vg_assert(isValidSector(y)); 1540 1541 if (sectors[y].tc == NULL) 1542 initialiseSector(y); 1543 1544 /* Try putting the translation in this sector. */ 1545 reqdQ = (code_len + 7) >> 3; 1546 1547 /* Will it fit in tc? */ 1548 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1549 - ((ULong*)(sectors[y].tc_next)); 1550 vg_assert(tcAvailQ >= 0); 1551 vg_assert(tcAvailQ <= tc_sector_szQ); 1552 1553 if (tcAvailQ < reqdQ 1554 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 1555 /* No. So move on to the next sector. Either it's never been 1556 used before, in which case it will get its tt/tc allocated 1557 now, or it has been used before, in which case it is set to be 1558 empty, hence throwing out the oldest sector. */ 1559 vg_assert(tc_sector_szQ > 0); 1560 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse) 1561 / N_TTES_PER_SECTOR; 1562 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ)) 1563 / tc_sector_szQ; 1564 VG_(debugLog)(1,"transtab", 1565 "declare sector %d full " 1566 "(TT loading %2d%%, TC loading %2d%%)\n", 1567 y, tt_loading_pct, tc_loading_pct); 1568 if (VG_(clo_stats)) { 1569 VG_(dmsg)("transtab: " 1570 "declare sector %d full " 1571 "(TT loading %2d%%, TC loading %2d%%)\n", 1572 y, tt_loading_pct, tc_loading_pct); 1573 } 1574 youngest_sector++; 1575 if (youngest_sector >= n_sectors) 1576 youngest_sector = 0; 1577 y = youngest_sector; 1578 initialiseSector(y); 1579 } 1580 1581 /* Be sure ... */ 1582 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1583 - ((ULong*)(sectors[y].tc_next)); 1584 vg_assert(tcAvailQ >= 0); 1585 vg_assert(tcAvailQ <= tc_sector_szQ); 1586 vg_assert(tcAvailQ >= reqdQ); 1587 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 1588 vg_assert(sectors[y].tt_n_inuse >= 0); 1589 1590 /* Copy into tc. */ 1591 tcptr = sectors[y].tc_next; 1592 vg_assert(tcptr >= §ors[y].tc[0]); 1593 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 1594 1595 dstP = (UChar*)tcptr; 1596 srcP = (UChar*)code; 1597 VG_(memcpy)(dstP, srcP, code_len); 1598 sectors[y].tc_next += reqdQ; 1599 sectors[y].tt_n_inuse++; 1600 1601 /* more paranoia */ 1602 tcptr2 = sectors[y].tc_next; 1603 vg_assert(tcptr2 >= §ors[y].tc[0]); 1604 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1605 1606 /* Find an empty tt slot, and use it. There must be such a slot 1607 since tt is never allowed to get completely full. */ 1608 i = HASH_TT(entry); 1609 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 1610 while (True) { 1611 if (sectors[y].tt[i].status == Empty 1612 || sectors[y].tt[i].status == Deleted) 1613 break; 1614 i++; 1615 if (i >= N_TTES_PER_SECTOR) 1616 i = 0; 1617 } 1618 1619 TTEntry__init(§ors[y].tt[i]); 1620 sectors[y].tt[i].status = InUse; 1621 sectors[y].tt[i].tcptr = tcptr; 1622 sectors[y].tt[i].count = 0; 1623 sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs; 1624 sectors[y].tt[i].vge = *vge; 1625 sectors[y].tt[i].entry = entry; 1626 1627 /* Patch in the profile counter location, if necessary. */ 1628 if (offs_profInc != -1) { 1629 vg_assert(offs_profInc >= 0 && offs_profInc < code_len); 1630 VexInvalRange vir 1631 = LibVEX_PatchProfInc( arch_host, 1632 dstP + offs_profInc, 1633 §ors[y].tt[i].count ); 1634 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 1635 } 1636 1637 VG_(invalidate_icache)( dstP, code_len ); 1638 1639 /* Add this entry to the host_extents map, checking that we're 1640 adding in order. */ 1641 { HostExtent hx; 1642 hx.start = (UChar*)tcptr; 1643 hx.len = code_len; 1644 hx.tteNo = i; 1645 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */ 1646 XArray* hx_array = sectors[y].host_extents; 1647 vg_assert(hx_array); 1648 Word n = VG_(sizeXA)(hx_array); 1649 if (n > 0) { 1650 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1); 1651 vg_assert(hx_prev->start + hx_prev->len <= hx.start); 1652 } 1653 VG_(addToXA)(hx_array, &hx); 1654 if (DEBUG_TRANSTAB) 1655 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n", 1656 hx.start, hx.len, y, i); 1657 } 1658 1659 /* Update the fast-cache. */ 1660 setFastCacheEntry( entry, tcptr ); 1661 1662 /* Note the eclass numbers for this translation. */ 1663 upd_eclasses_after_add( §ors[y], i ); 1664 } 1665 1666 1667 /* Search for the translation of the given guest address. If 1668 requested, a successful search can also cause the fast-caches to be 1669 updated. 1670 */ 1671 Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode, 1672 /*OUT*/UInt* res_sNo, 1673 /*OUT*/UInt* res_tteNo, 1674 Addr64 guest_addr, 1675 Bool upd_cache ) 1676 { 1677 Int i, j, k, kstart, sno; 1678 1679 vg_assert(init_done); 1680 /* Find the initial probe point just once. It will be the same in 1681 all sectors and avoids multiple expensive % operations. */ 1682 n_full_lookups++; 1683 k = -1; 1684 kstart = HASH_TT(guest_addr); 1685 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 1686 1687 /* Search in all the sectors,using sector_search_order[] as a 1688 heuristic guide as to what order to visit the sectors. */ 1689 for (i = 0; i < n_sectors; i++) { 1690 1691 sno = sector_search_order[i]; 1692 if (UNLIKELY(sno == -1)) 1693 return False; /* run out of sectors to search */ 1694 1695 k = kstart; 1696 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 1697 n_lookup_probes++; 1698 if (sectors[sno].tt[k].status == InUse 1699 && sectors[sno].tt[k].entry == guest_addr) { 1700 /* found it */ 1701 if (upd_cache) 1702 setFastCacheEntry( 1703 guest_addr, sectors[sno].tt[k].tcptr ); 1704 if (res_hcode) 1705 *res_hcode = (AddrH)sectors[sno].tt[k].tcptr; 1706 if (res_sNo) 1707 *res_sNo = sno; 1708 if (res_tteNo) 1709 *res_tteNo = k; 1710 /* pull this one one step closer to the front. For large 1711 apps this more or less halves the number of required 1712 probes. */ 1713 if (i > 0) { 1714 Int tmp = sector_search_order[i-1]; 1715 sector_search_order[i-1] = sector_search_order[i]; 1716 sector_search_order[i] = tmp; 1717 } 1718 return True; 1719 } 1720 if (sectors[sno].tt[k].status == Empty) 1721 break; /* not found in this sector */ 1722 k++; 1723 if (k == N_TTES_PER_SECTOR) 1724 k = 0; 1725 } 1726 1727 /* If we fall off the end, all entries are InUse and not 1728 matching, or Deleted. In any case we did not find it in this 1729 sector. */ 1730 } 1731 1732 /* Not found in any sector. */ 1733 return False; 1734 } 1735 1736 1737 /*-------------------------------------------------------------*/ 1738 /*--- Delete translations. ---*/ 1739 /*-------------------------------------------------------------*/ 1740 1741 /* forward */ 1742 static void unredir_discard_translations( Addr64, ULong ); 1743 1744 /* Stuff for deleting translations which intersect with a given 1745 address range. Unfortunately, to make this run at a reasonable 1746 speed, it is complex. */ 1747 1748 static inline 1749 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 1750 { 1751 Addr64 e1 = s1 + r1 - 1ULL; 1752 Addr64 e2 = s2 + r2 - 1ULL; 1753 if (e1 < s2 || e2 < s1) 1754 return False; 1755 return True; 1756 } 1757 1758 static inline 1759 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 1760 { 1761 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 1762 return True; 1763 if (vge->n_used < 2) 1764 return False; 1765 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 1766 return True; 1767 if (vge->n_used < 3) 1768 return False; 1769 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 1770 return True; 1771 return False; 1772 } 1773 1774 1775 /* Delete a tt entry, and update all the eclass data accordingly. */ 1776 1777 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno, 1778 VexArch vex_arch ) 1779 { 1780 Int i, ec_num, ec_idx; 1781 TTEntry* tte; 1782 1783 /* sec and secNo are mutually redundant; cross-check. */ 1784 vg_assert(sec == §ors[secNo]); 1785 1786 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1787 tte = &sec->tt[tteno]; 1788 vg_assert(tte->status == InUse); 1789 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1790 1791 /* Unchain .. */ 1792 unchain_in_preparation_for_deletion(vex_arch, secNo, tteno); 1793 1794 /* Deal with the ec-to-tte links first. */ 1795 for (i = 0; i < tte->n_tte2ec; i++) { 1796 ec_num = (Int)tte->tte2ec_ec[i]; 1797 ec_idx = tte->tte2ec_ix[i]; 1798 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1799 vg_assert(ec_idx >= 0); 1800 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1801 /* Assert that the two links point at each other. */ 1802 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1803 /* "delete" the pointer back to here. */ 1804 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1805 } 1806 1807 /* Now fix up this TTEntry. */ 1808 tte->status = Deleted; 1809 tte->n_tte2ec = 0; 1810 1811 /* Stats .. */ 1812 sec->tt_n_inuse--; 1813 n_disc_count++; 1814 n_disc_osize += vge_osize(&tte->vge); 1815 1816 /* Tell the tool too. */ 1817 if (VG_(needs).superblock_discards) { 1818 VG_TDICT_CALL( tool_discard_superblock_info, 1819 tte->entry, 1820 tte->vge ); 1821 } 1822 } 1823 1824 1825 /* Delete translations from sec which intersect specified range, but 1826 only consider translations in the specified eclass. */ 1827 1828 static 1829 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo, 1830 Addr64 guest_start, ULong range, 1831 Int ec, 1832 VexArch vex_arch ) 1833 { 1834 Int i; 1835 UShort tteno; 1836 Bool anyDeld = False; 1837 TTEntry* tte; 1838 1839 vg_assert(ec >= 0 && ec < ECLASS_N); 1840 1841 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1842 1843 tteno = sec->ec2tte[ec][i]; 1844 if (tteno == EC2TTE_DELETED) { 1845 /* already deleted */ 1846 continue; 1847 } 1848 1849 vg_assert(tteno < N_TTES_PER_SECTOR); 1850 1851 tte = &sec->tt[tteno]; 1852 vg_assert(tte->status == InUse); 1853 1854 if (overlaps( guest_start, range, &tte->vge )) { 1855 anyDeld = True; 1856 delete_tte( sec, secNo, (Int)tteno, vex_arch ); 1857 } 1858 1859 } 1860 1861 return anyDeld; 1862 } 1863 1864 1865 /* Delete translations from sec which intersect specified range, the 1866 slow way, by inspecting all translations in sec. */ 1867 1868 static 1869 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo, 1870 Addr64 guest_start, ULong range, 1871 VexArch vex_arch ) 1872 { 1873 Int i; 1874 Bool anyDeld = False; 1875 1876 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1877 if (sec->tt[i].status == InUse 1878 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1879 anyDeld = True; 1880 delete_tte( sec, secNo, i, vex_arch ); 1881 } 1882 } 1883 1884 return anyDeld; 1885 } 1886 1887 1888 void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1889 const HChar* who ) 1890 { 1891 Sector* sec; 1892 Int sno, ec; 1893 Bool anyDeleted = False; 1894 1895 vg_assert(init_done); 1896 1897 VG_(debugLog)(2, "transtab", 1898 "discard_translations(0x%llx, %lld) req by %s\n", 1899 guest_start, range, who ); 1900 1901 /* Pre-deletion sanity check */ 1902 if (VG_(clo_sanity_level >= 4)) { 1903 Bool sane = sanity_check_all_sectors(); 1904 vg_assert(sane); 1905 } 1906 1907 if (range == 0) 1908 return; 1909 1910 VexArch vex_arch = VexArch_INVALID; 1911 VG_(machine_get_VexArchInfo)( &vex_arch, NULL ); 1912 1913 /* There are two different ways to do this. 1914 1915 If the range fits within a single address-range equivalence 1916 class, as will be the case for a cache line sized invalidation, 1917 then we only have to inspect the set of translations listed in 1918 that equivalence class, and also in the "sin-bin" equivalence 1919 class ECLASS_MISC. 1920 1921 Otherwise, the invalidation is of a larger range and probably 1922 results from munmap. In this case it's (probably!) faster just 1923 to inspect all translations, dump those we don't want, and 1924 regenerate the equivalence class information (since modifying it 1925 in-situ is even more expensive). 1926 */ 1927 1928 /* First off, figure out if the range falls within a single class, 1929 and if so which one. */ 1930 1931 ec = ECLASS_MISC; 1932 if (range < (1ULL << ECLASS_SHIFT)) 1933 ec = range_to_eclass( guest_start, (UInt)range ); 1934 1935 /* if ec is ECLASS_MISC then we aren't looking at just a single 1936 class, so use the slow scheme. Else use the fast scheme, 1937 examining 'ec' and ECLASS_MISC. */ 1938 1939 if (ec != ECLASS_MISC) { 1940 1941 VG_(debugLog)(2, "transtab", 1942 " FAST, ec = %d\n", ec); 1943 1944 /* Fast scheme */ 1945 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1946 1947 for (sno = 0; sno < n_sectors; sno++) { 1948 sec = §ors[sno]; 1949 if (sec->tc == NULL) 1950 continue; 1951 anyDeleted |= delete_translations_in_sector_eclass( 1952 sec, sno, guest_start, range, ec, 1953 vex_arch 1954 ); 1955 anyDeleted |= delete_translations_in_sector_eclass( 1956 sec, sno, guest_start, range, ECLASS_MISC, 1957 vex_arch 1958 ); 1959 } 1960 1961 } else { 1962 1963 /* slow scheme */ 1964 1965 VG_(debugLog)(2, "transtab", 1966 " SLOW, ec = %d\n", ec); 1967 1968 for (sno = 0; sno < n_sectors; sno++) { 1969 sec = §ors[sno]; 1970 if (sec->tc == NULL) 1971 continue; 1972 anyDeleted |= delete_translations_in_sector( 1973 sec, sno, guest_start, range, vex_arch ); 1974 } 1975 1976 } 1977 1978 if (anyDeleted) 1979 invalidateFastCache(); 1980 1981 /* don't forget the no-redir cache */ 1982 unredir_discard_translations( guest_start, range ); 1983 1984 /* Post-deletion sanity check */ 1985 if (VG_(clo_sanity_level >= 4)) { 1986 Int i; 1987 TTEntry* tte; 1988 Bool sane = sanity_check_all_sectors(); 1989 vg_assert(sane); 1990 /* But now, also check the requested address range isn't 1991 present anywhere. */ 1992 for (sno = 0; sno < n_sectors; sno++) { 1993 sec = §ors[sno]; 1994 if (sec->tc == NULL) 1995 continue; 1996 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1997 tte = &sec->tt[i]; 1998 if (tte->status != InUse) 1999 continue; 2000 vg_assert(!overlaps( guest_start, range, &tte->vge )); 2001 } 2002 } 2003 } 2004 } 2005 2006 2007 /*------------------------------------------------------------*/ 2008 /*--- AUXILIARY: the unredirected TT/TC ---*/ 2009 /*------------------------------------------------------------*/ 2010 2011 /* A very simple translation cache which holds a small number of 2012 unredirected translations. This is completely independent of the 2013 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 2014 both structures are simply dumped and we start over. 2015 2016 Since these translations are unredirected, the search key is (by 2017 definition) the first address entry in the .vge field. */ 2018 2019 /* Sized to hold 500 translations of average size 1000 bytes. */ 2020 2021 #define UNREDIR_SZB 1000 2022 2023 #define N_UNREDIR_TT 500 2024 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 2025 2026 typedef 2027 struct { 2028 VexGuestExtents vge; 2029 Addr hcode; 2030 Bool inUse; 2031 } 2032 UTCEntry; 2033 2034 /* We just allocate forwards in _tc, never deleting. */ 2035 static ULong *unredir_tc; 2036 static Int unredir_tc_used = N_UNREDIR_TCQ; 2037 2038 /* Slots in _tt can come into use and out again (.inUse). 2039 Nevertheless _tt_highwater is maintained so that invalidations 2040 don't have to scan all the slots when only a few are in use. 2041 _tt_highwater holds the index of the highest ever allocated 2042 slot. */ 2043 static UTCEntry unredir_tt[N_UNREDIR_TT]; 2044 static Int unredir_tt_highwater; 2045 2046 2047 static void init_unredir_tt_tc ( void ) 2048 { 2049 Int i; 2050 if (unredir_tc == NULL) { 2051 SysRes sres = VG_(am_mmap_anon_float_valgrind) 2052 ( N_UNREDIR_TT * UNREDIR_SZB ); 2053 if (sr_isError(sres)) { 2054 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 2055 N_UNREDIR_TT * UNREDIR_SZB); 2056 /*NOTREACHED*/ 2057 } 2058 unredir_tc = (ULong *)(AddrH)sr_Res(sres); 2059 } 2060 unredir_tc_used = 0; 2061 for (i = 0; i < N_UNREDIR_TT; i++) 2062 unredir_tt[i].inUse = False; 2063 unredir_tt_highwater = -1; 2064 } 2065 2066 /* Do a sanity check; return False on failure. */ 2067 static Bool sanity_check_redir_tt_tc ( void ) 2068 { 2069 Int i; 2070 if (unredir_tt_highwater < -1) return False; 2071 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 2072 2073 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 2074 if (unredir_tt[i].inUse) 2075 return False; 2076 2077 if (unredir_tc_used < 0) return False; 2078 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 2079 2080 return True; 2081 } 2082 2083 2084 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation 2085 is temporarily in code[0 .. code_len-1]. 2086 */ 2087 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 2088 Addr64 entry, 2089 AddrH code, 2090 UInt code_len ) 2091 { 2092 Int i, j, code_szQ; 2093 HChar *srcP, *dstP; 2094 2095 vg_assert(sanity_check_redir_tt_tc()); 2096 2097 /* This is the whole point: it's not redirected! */ 2098 vg_assert(entry == vge->base[0]); 2099 2100 /* How many unredir_tt slots are needed */ 2101 code_szQ = (code_len + 7) / 8; 2102 2103 /* Look for an empty unredir_tc slot */ 2104 for (i = 0; i < N_UNREDIR_TT; i++) 2105 if (!unredir_tt[i].inUse) 2106 break; 2107 2108 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 2109 /* It's full; dump everything we currently have */ 2110 init_unredir_tt_tc(); 2111 i = 0; 2112 } 2113 2114 vg_assert(unredir_tc_used >= 0); 2115 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2116 vg_assert(code_szQ > 0); 2117 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 2118 vg_assert(i >= 0 && i < N_UNREDIR_TT); 2119 vg_assert(unredir_tt[i].inUse == False); 2120 2121 if (i > unredir_tt_highwater) 2122 unredir_tt_highwater = i; 2123 2124 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 2125 srcP = (HChar*)code; 2126 for (j = 0; j < code_len; j++) 2127 dstP[j] = srcP[j]; 2128 2129 VG_(invalidate_icache)( dstP, code_len ); 2130 2131 unredir_tt[i].inUse = True; 2132 unredir_tt[i].vge = *vge; 2133 unredir_tt[i].hcode = (Addr)dstP; 2134 2135 unredir_tc_used += code_szQ; 2136 vg_assert(unredir_tc_used >= 0); 2137 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2138 2139 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 2140 } 2141 2142 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 2143 Addr64 guest_addr ) 2144 { 2145 Int i; 2146 for (i = 0; i < N_UNREDIR_TT; i++) { 2147 if (!unredir_tt[i].inUse) 2148 continue; 2149 if (unredir_tt[i].vge.base[0] == guest_addr) { 2150 *result = (AddrH)unredir_tt[i].hcode; 2151 return True; 2152 } 2153 } 2154 return False; 2155 } 2156 2157 static void unredir_discard_translations( Addr64 guest_start, ULong range ) 2158 { 2159 Int i; 2160 2161 vg_assert(sanity_check_redir_tt_tc()); 2162 2163 for (i = 0; i <= unredir_tt_highwater; i++) { 2164 if (unredir_tt[i].inUse 2165 && overlaps( guest_start, range, &unredir_tt[i].vge)) 2166 unredir_tt[i].inUse = False; 2167 } 2168 } 2169 2170 2171 /*------------------------------------------------------------*/ 2172 /*--- Initialisation. ---*/ 2173 /*------------------------------------------------------------*/ 2174 2175 void VG_(init_tt_tc) ( void ) 2176 { 2177 Int i, avg_codeszQ; 2178 2179 vg_assert(!init_done); 2180 init_done = True; 2181 2182 /* Otherwise lots of things go wrong... */ 2183 vg_assert(sizeof(ULong) == 8); 2184 vg_assert(sizeof(Addr64) == 8); 2185 /* check fast cache entries really are 2 words long */ 2186 vg_assert(sizeof(Addr) == sizeof(void*)); 2187 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 2188 /* check fast cache entries are packed back-to-back with no spaces */ 2189 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 2190 /* check fast cache is aligned as we requested. Not fatal if it 2191 isn't, but we might as well make sure. */ 2192 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 2193 2194 if (VG_(clo_verbosity) > 2) 2195 VG_(message)(Vg_DebugMsg, 2196 "TT/TC: VG_(init_tt_tc) " 2197 "(startup of code management)\n"); 2198 2199 /* Figure out how big each tc area should be. */ 2200 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 2201 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 2202 2203 /* Ensure the calculated value is not way crazy. */ 2204 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 2205 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE); 2206 2207 n_sectors = VG_(clo_num_transtab_sectors); 2208 vg_assert(n_sectors >= MIN_N_SECTORS); 2209 vg_assert(n_sectors <= MAX_N_SECTORS); 2210 2211 /* Initialise the sectors, even the ones we aren't going to use. 2212 Set all fields to zero. */ 2213 youngest_sector = 0; 2214 for (i = 0; i < MAX_N_SECTORS; i++) 2215 VG_(memset)(§ors[i], 0, sizeof(sectors[i])); 2216 2217 /* Initialise the sector_search_order hint table, including the 2218 entries we aren't going to use. */ 2219 for (i = 0; i < MAX_N_SECTORS; i++) 2220 sector_search_order[i] = -1; 2221 2222 /* Initialise the fast cache. */ 2223 invalidateFastCache(); 2224 2225 /* and the unredir tt/tc */ 2226 init_unredir_tt_tc(); 2227 2228 if (VG_(clo_verbosity) > 2 || VG_(clo_stats) 2229 || VG_(debugLog_getLevel) () >= 2) { 2230 VG_(message)(Vg_DebugMsg, 2231 "TT/TC: cache: %d sectors of %d bytes each = %d total\n", 2232 n_sectors, 8 * tc_sector_szQ, 2233 n_sectors * 8 * tc_sector_szQ ); 2234 VG_(message)(Vg_DebugMsg, 2235 "TT/TC: table: %d tables of %d bytes each = %d total\n", 2236 n_sectors, (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)), 2237 (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry))); 2238 VG_(message)(Vg_DebugMsg, 2239 "TT/TC: table: %d entries each = %d total entries" 2240 " max occupancy %d (%d%%)\n", 2241 N_TTES_PER_SECTOR, 2242 n_sectors * N_TTES_PER_SECTOR, 2243 n_sectors * N_TTES_PER_SECTOR_USABLE, 2244 SECTOR_TT_LIMIT_PERCENT ); 2245 } 2246 } 2247 2248 2249 /*------------------------------------------------------------*/ 2250 /*--- Printing out statistics. ---*/ 2251 /*------------------------------------------------------------*/ 2252 2253 static ULong safe_idiv( ULong a, ULong b ) 2254 { 2255 return (b == 0 ? 0 : a / b); 2256 } 2257 2258 UInt VG_(get_bbs_translated) ( void ) 2259 { 2260 return n_in_count; 2261 } 2262 2263 void VG_(print_tt_tc_stats) ( void ) 2264 { 2265 VG_(message)(Vg_DebugMsg, 2266 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 2267 n_full_lookups, n_lookup_probes ); 2268 VG_(message)(Vg_DebugMsg, 2269 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 2270 n_fast_updates, n_fast_flushes ); 2271 2272 VG_(message)(Vg_DebugMsg, 2273 " transtab: new %'lld " 2274 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n", 2275 n_in_count, n_in_osize, n_in_tsize, 2276 safe_idiv(10*n_in_tsize, n_in_osize), 2277 n_in_sc_count); 2278 VG_(message)(Vg_DebugMsg, 2279 " transtab: dumped %'llu (%'llu -> ?" "?)\n", 2280 n_dump_count, n_dump_osize ); 2281 VG_(message)(Vg_DebugMsg, 2282 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 2283 n_disc_count, n_disc_osize ); 2284 2285 if (DEBUG_TRANSTAB) { 2286 Int i; 2287 VG_(printf)("\n"); 2288 for (i = 0; i < ECLASS_N; i++) { 2289 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 2290 if (i % 16 == 15) 2291 VG_(printf)("\n"); 2292 } 2293 VG_(printf)("\n\n"); 2294 } 2295 } 2296 2297 /*------------------------------------------------------------*/ 2298 /*--- Printing out of profiling results. ---*/ 2299 /*------------------------------------------------------------*/ 2300 2301 static ULong score ( TTEntry* tte ) 2302 { 2303 return ((ULong)tte->weight) * ((ULong)tte->count); 2304 } 2305 2306 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops ) 2307 { 2308 Int sno, i, r, s; 2309 ULong score_total; 2310 2311 /* First, compute the total weighted count, and find the top N 2312 ttes. tops contains pointers to the most-used n_tops blocks, in 2313 descending order (viz, tops[0] is the highest scorer). */ 2314 for (i = 0; i < n_tops; i++) { 2315 tops[i].addr = 0; 2316 tops[i].score = 0; 2317 } 2318 2319 score_total = 0; 2320 2321 for (sno = 0; sno < n_sectors; sno++) { 2322 if (sectors[sno].tc == NULL) 2323 continue; 2324 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2325 if (sectors[sno].tt[i].status != InUse) 2326 continue; 2327 score_total += score(§ors[sno].tt[i]); 2328 /* Find the rank for sectors[sno].tt[i]. */ 2329 r = n_tops-1; 2330 while (True) { 2331 if (r == -1) 2332 break; 2333 if (tops[r].addr == 0) { 2334 r--; 2335 continue; 2336 } 2337 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 2338 r--; 2339 continue; 2340 } 2341 break; 2342 } 2343 r++; 2344 vg_assert(r >= 0 && r <= n_tops); 2345 /* This bb should be placed at r, and bbs above it shifted 2346 upwards one slot. */ 2347 if (r < n_tops) { 2348 for (s = n_tops-1; s > r; s--) 2349 tops[s] = tops[s-1]; 2350 tops[r].addr = sectors[sno].tt[i].entry; 2351 tops[r].score = score( §ors[sno].tt[i] ); 2352 } 2353 } 2354 } 2355 2356 /* Now zero out all the counter fields, so that we can make 2357 multiple calls here and just get the values since the last call, 2358 each time, rather than values accumulated for the whole run. */ 2359 for (sno = 0; sno < n_sectors; sno++) { 2360 if (sectors[sno].tc == NULL) 2361 continue; 2362 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2363 if (sectors[sno].tt[i].status != InUse) 2364 continue; 2365 sectors[sno].tt[i].count = 0; 2366 } 2367 } 2368 2369 return score_total; 2370 } 2371 2372 /*--------------------------------------------------------------------*/ 2373 /*--- end ---*/ 2374 /*--------------------------------------------------------------------*/ 2375