1 2 /*--------------------------------------------------------------------*/ 3 /*--- Management of the translation table and cache. ---*/ 4 /*--- m_transtab.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2010 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_core_basics.h" 33 #include "pub_core_debuglog.h" 34 #include "pub_core_machine.h" // For VG(machine_get_VexArchInfo) 35 #include "pub_core_libcbase.h" 36 #include "pub_core_libcassert.h" 37 #include "pub_core_libcprint.h" 38 #include "pub_core_options.h" 39 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 40 #include "pub_core_transtab.h" 41 #include "pub_core_aspacemgr.h" 42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 43 44 // JRS FIXME get rid of this somehow 45 #if defined(VGP_arm_linux) 46 # include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush 47 # include "pub_core_syscall.h" // VG_(do_syscallN) 48 #endif 49 50 51 /* #define DEBUG_TRANSTAB */ 52 53 54 /*-------------------------------------------------------------*/ 55 /*--- Management of the FIFO-based translation table+cache. ---*/ 56 /*-------------------------------------------------------------*/ 57 58 /*------------------ CONSTANTS ------------------*/ 59 60 /* Number of sectors the TC is divided into. If you need a larger 61 overall translation cache, increase this value. */ 62 #define N_SECTORS 8 63 64 /* Number of TC entries in each sector. This needs to be a prime 65 number to work properly, it must be <= 65535 (so that a TT index 66 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 67 'deleted') and it is strongly recommended not to change this. 68 65521 is the largest prime <= 65535. */ 69 #define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521 70 71 /* Because each sector contains a hash table of TTEntries, we need to 72 specify the maximum allowable loading, after which the sector is 73 deemed full. */ 74 #define SECTOR_TT_LIMIT_PERCENT 65 75 76 /* The sector is deemed full when this many entries are in it. */ 77 #define N_TTES_PER_SECTOR_USABLE \ 78 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 79 80 /* Equivalence classes for fast address range deletion. There are 1 + 81 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 82 address range which does not fall cleanly within any specific bin. 83 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 84 #define ECLASS_SHIFT 11 85 #define ECLASS_WIDTH 8 86 #define ECLASS_MISC (1 << ECLASS_WIDTH) 87 #define ECLASS_N (1 + ECLASS_MISC) 88 89 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 90 91 92 /*------------------ TYPES ------------------*/ 93 94 /* A translation-table entry. This indicates precisely which areas of 95 guest code are included in the translation, and contains all other 96 auxiliary info too. */ 97 typedef 98 struct { 99 /* Profiling only: the count and weight (arbitrary meaning) for 100 this translation. Weight is a property of the translation 101 itself and computed once when the translation is created. 102 Count is an entry count for the translation and is 103 incremented by 1 every time the translation is used, if we 104 are profiling. */ 105 UInt count; 106 UShort weight; 107 108 /* Status of the slot. Note, we need to be able to do lazy 109 deletion, hence the Deleted state. */ 110 enum { InUse, Deleted, Empty } status; 111 112 /* 64-bit aligned pointer to one or more 64-bit words containing 113 the corresponding host code (must be in the same sector!) 114 This is a pointer into the sector's tc (code) area. */ 115 ULong* tcptr; 116 117 /* This is the original guest address that purportedly is the 118 entry point of the translation. You might think that .entry 119 should be the same as .vge->base[0], and most of the time it 120 is. However, when doing redirections, that is not the case. 121 .vge must always correctly describe the guest code sections 122 from which this translation was made. However, .entry may or 123 may not be a lie, depending on whether or not we're doing 124 redirection. */ 125 Addr64 entry; 126 127 /* This structure describes precisely what ranges of guest code 128 the translation covers, so we can decide whether or not to 129 delete it when translations of a given address range are 130 invalidated. */ 131 VexGuestExtents vge; 132 133 /* Address range summary info: these are pointers back to 134 eclass[] entries in the containing Sector. Those entries in 135 turn point back here -- the two structures are mutually 136 redundant but both necessary to make fast deletions work. 137 The eclass info is similar to, and derived from, this entry's 138 'vge' field, but it is not the same */ 139 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 140 UShort tte2ec_ec[3]; // for each, the eclass # 141 UInt tte2ec_ix[3]; // and the index within the eclass. 142 // for i in 0 .. n_tte2ec-1 143 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 144 // should be the index 145 // of this TTEntry in the containing Sector's tt array. 146 } 147 TTEntry; 148 149 150 /* Finally, a sector itself. Each sector contains an array of 151 TCEntries, which hold code, and an array of TTEntries, containing 152 all required administrative info. Profiling is supported using the 153 TTEntry .count and .weight fields, if required. Each sector is 154 independent in that no cross-sector references are allowed. 155 156 If the sector is not in use, all three pointers are NULL and 157 tt_n_inuse is zero. 158 */ 159 typedef 160 struct { 161 /* The TCEntry area. Size of this depends on the average 162 translation size. We try and size it so it becomes full 163 precisely when this sector's translation table (tt) reaches 164 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 165 ULong* tc; 166 167 /* The TTEntry array. This is a fixed size, always containing 168 exactly N_TTES_PER_SECTOR entries. */ 169 TTEntry* tt; 170 171 /* This points to the current allocation point in tc. */ 172 ULong* tc_next; 173 174 /* The count of tt entries with state InUse. */ 175 Int tt_n_inuse; 176 177 /* Expandable arrays of tt indices for each of the ECLASS_N 178 address range equivalence classes. These hold indices into 179 the containing sector's tt array, which in turn should point 180 back here. */ 181 Int ec2tte_size[ECLASS_N]; 182 Int ec2tte_used[ECLASS_N]; 183 UShort* ec2tte[ECLASS_N]; 184 } 185 Sector; 186 187 188 /*------------------ DECLS ------------------*/ 189 190 /* The root data structure is an array of sectors. The index of the 191 youngest sector is recorded, and new translations are put into that 192 sector. When it fills up, we move along to the next sector and 193 start to fill that up, wrapping around at the end of the array. 194 That way, once all N_TC_SECTORS have been bought into use for the 195 first time, and are full, we then re-use the oldest sector, 196 endlessly. 197 198 When running, youngest sector should be between >= 0 and < 199 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 200 not yet initialised. 201 */ 202 static Sector sectors[N_SECTORS]; 203 static Int youngest_sector = -1; 204 205 /* The number of ULongs in each TCEntry area. This is computed once 206 at startup and does not change. */ 207 static Int tc_sector_szQ; 208 209 210 /* A list of sector numbers, in the order which they should be 211 searched to find translations. This is an optimisation to be used 212 when searching for translations and should not affect 213 correctness. -1 denotes "no entry". */ 214 static Int sector_search_order[N_SECTORS]; 215 216 217 /* Fast helper for the TC. A direct-mapped cache which holds a set of 218 recently used (guest address, host address) pairs. This array is 219 referred to directly from m_dispatch/dispatch-<platform>.S. 220 221 Entries in tt_fast may refer to any valid TC entry, regardless of 222 which sector it's in. Consequently we must be very careful to 223 invalidate this cache when TC entries are changed or disappear. 224 225 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 226 pointed at to cause that cache entry to miss. This relies on the 227 assumption that no guest code actually has that address, hence a 228 value 0x1 seems good. m_translate gives the client a synthetic 229 segfault if it tries to execute at this address. 230 */ 231 /* 232 typedef 233 struct { 234 Addr guest; 235 Addr host; 236 } 237 FastCacheEntry; 238 */ 239 /*global*/ __attribute__((aligned(16))) 240 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 241 /* 242 #define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1) 243 */ 244 245 /* For profiling, we have a parallel array of pointers to .count 246 fields in TT entries. Again, these pointers must be invalidated 247 when translations disappear. A NULL pointer suffices to indicate 248 an unused slot. 249 250 When not profiling (the normal case, VG_(clo_profile_flags) == 0), 251 all tt_fastN entries are set to NULL at startup and never read nor 252 written after that. 253 254 When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN 255 change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR 256 then the corresponding tt_fastN[i] must be null. If 257 tt_fast[i].guest is any other value, then tt_fastN[i] *must* point 258 to the .count field of the corresponding TT entry. 259 260 tt_fast and tt_fastN are referred to from assembly code 261 (dispatch.S). 262 */ 263 /*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE]; 264 265 266 /* Make sure we're not used before initialisation. */ 267 static Bool init_done = False; 268 269 270 /*------------------ STATS DECLS ------------------*/ 271 272 /* Number of fast-cache updates and flushes done. */ 273 ULong n_fast_flushes = 0; 274 ULong n_fast_updates = 0; 275 276 /* Number of full lookups done. */ 277 ULong n_full_lookups = 0; 278 ULong n_lookup_probes = 0; 279 280 /* Number/osize/tsize of translations entered; also the number of 281 those for which self-checking was requested. */ 282 ULong n_in_count = 0; 283 ULong n_in_osize = 0; 284 ULong n_in_tsize = 0; 285 ULong n_in_sc_count = 0; 286 287 /* Number/osize of translations discarded due to lack of space. */ 288 ULong n_dump_count = 0; 289 ULong n_dump_osize = 0; 290 291 /* Number/osize of translations discarded due to requests to do so. */ 292 ULong n_disc_count = 0; 293 ULong n_disc_osize = 0; 294 295 296 /*-------------------------------------------------------------*/ 297 /*--- Address-range equivalence class stuff ---*/ 298 /*-------------------------------------------------------------*/ 299 300 /* Return equivalence class number for a range. */ 301 302 static Int range_to_eclass ( Addr64 start, UInt len ) 303 { 304 UInt mask = (1 << ECLASS_WIDTH) - 1; 305 UInt lo = (UInt)start; 306 UInt hi = lo + len - 1; 307 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 308 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 309 if (loBits == hiBits) { 310 vg_assert(loBits < ECLASS_N-1); 311 return loBits; 312 } else { 313 return ECLASS_MISC; 314 } 315 } 316 317 318 /* Calculates the equivalence class numbers for any VexGuestExtent. 319 These are written in *eclasses, which must be big enough to hold 3 320 Ints. The number written, between 1 and 3, is returned. The 321 eclasses are presented in order, and any duplicates are removed. 322 */ 323 324 static 325 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 326 VexGuestExtents* vge ) 327 { 328 # define SWAP(_lv1,_lv2) \ 329 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 330 331 Int i, j, n_ec, r; 332 333 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 334 335 n_ec = 0; 336 for (i = 0; i < vge->n_used; i++) { 337 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 338 if (r == ECLASS_MISC) 339 goto bad; 340 /* only add if we haven't already seen it */ 341 for (j = 0; j < n_ec; j++) 342 if (eclasses[j] == r) 343 break; 344 if (j == n_ec) 345 eclasses[n_ec++] = r; 346 } 347 348 if (n_ec == 1) 349 return 1; 350 351 if (n_ec == 2) { 352 /* sort */ 353 if (eclasses[0] > eclasses[1]) 354 SWAP(eclasses[0], eclasses[1]); 355 return 2; 356 } 357 358 if (n_ec == 3) { 359 /* sort */ 360 if (eclasses[0] > eclasses[2]) 361 SWAP(eclasses[0], eclasses[2]); 362 if (eclasses[0] > eclasses[1]) 363 SWAP(eclasses[0], eclasses[1]); 364 if (eclasses[1] > eclasses[2]) 365 SWAP(eclasses[1], eclasses[2]); 366 return 3; 367 } 368 369 /* NOTREACHED */ 370 vg_assert(0); 371 372 bad: 373 eclasses[0] = ECLASS_MISC; 374 return 1; 375 376 # undef SWAP 377 } 378 379 380 /* Add tteno to the set of entries listed for equivalence class ec in 381 this sector. Returns used location in eclass array. */ 382 383 static 384 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 385 { 386 Int old_sz, new_sz, i, r; 387 UShort *old_ar, *new_ar; 388 389 vg_assert(ec >= 0 && ec < ECLASS_N); 390 vg_assert(tteno < N_TTES_PER_SECTOR); 391 392 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 393 394 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 395 396 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 397 398 old_sz = sec->ec2tte_size[ec]; 399 old_ar = sec->ec2tte[ec]; 400 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 401 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1", 402 new_sz * sizeof(UShort)); 403 for (i = 0; i < old_sz; i++) 404 new_ar[i] = old_ar[i]; 405 if (old_ar) 406 VG_(arena_free)(VG_AR_TTAUX, old_ar); 407 sec->ec2tte_size[ec] = new_sz; 408 sec->ec2tte[ec] = new_ar; 409 410 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 411 } 412 413 /* Common case */ 414 r = sec->ec2tte_used[ec]++; 415 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 416 sec->ec2tte[ec][r] = tteno; 417 return (UInt)r; 418 } 419 420 421 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 422 eclass entries to 'sec'. */ 423 424 static 425 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 426 { 427 Int i, r, eclasses[3]; 428 TTEntry* tte; 429 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 430 431 tte = &sec->tt[tteno]; 432 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 433 434 vg_assert(r >= 1 && r <= 3); 435 tte->n_tte2ec = r; 436 437 for (i = 0; i < r; i++) { 438 tte->tte2ec_ec[i] = eclasses[i]; 439 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 440 } 441 } 442 443 444 /* Check the eclass info in 'sec' to ensure it is consistent. Returns 445 True if OK, False if something's not right. Expensive. */ 446 447 static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 448 { 449 # define BAD(_str) do { whassup = (_str); goto bad; } while (0) 450 451 HChar* whassup = NULL; 452 Int i, j, k, n, ec_num, ec_idx; 453 TTEntry* tte; 454 UShort tteno; 455 ULong* tce; 456 457 /* Basic checks on this sector */ 458 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 459 BAD("invalid sec->tt_n_inuse"); 460 tce = sec->tc_next; 461 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 462 BAD("sec->tc_next points outside tc"); 463 464 /* For each eclass ... */ 465 for (i = 0; i < ECLASS_N; i++) { 466 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 467 BAD("ec2tte_size/ec2tte mismatch(1)"); 468 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 469 BAD("ec2tte_size/ec2tte mismatch(2)"); 470 if (sec->ec2tte_used[i] < 0 471 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 472 BAD("implausible ec2tte_used"); 473 if (sec->ec2tte_used[i] == 0) 474 continue; 475 476 /* For each tt reference in each eclass .. ensure the reference 477 is to a valid tt entry, and that the entry's address ranges 478 really include this eclass. */ 479 480 for (j = 0; j < sec->ec2tte_used[i]; j++) { 481 tteno = sec->ec2tte[i][j]; 482 if (tteno == EC2TTE_DELETED) 483 continue; 484 if (tteno >= N_TTES_PER_SECTOR) 485 BAD("implausible tteno"); 486 tte = &sec->tt[tteno]; 487 if (tte->status != InUse) 488 BAD("tteno points to non-inuse tte"); 489 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 490 BAD("tte->n_tte2ec out of range"); 491 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 492 must equal i. Inspect tte's eclass info. */ 493 n = 0; 494 for (k = 0; k < tte->n_tte2ec; k++) { 495 if (k < tte->n_tte2ec-1 496 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 497 BAD("tte->tte2ec_ec[..] out of order"); 498 ec_num = tte->tte2ec_ec[k]; 499 if (ec_num < 0 || ec_num >= ECLASS_N) 500 BAD("tte->tte2ec_ec[..] out of range"); 501 if (ec_num != i) 502 continue; 503 ec_idx = tte->tte2ec_ix[k]; 504 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 505 BAD("tte->tte2ec_ix[..] out of range"); 506 if (ec_idx == j) 507 n++; 508 } 509 if (n != 1) 510 BAD("tteno does not point back at eclass"); 511 } 512 } 513 514 /* That establishes that for each forward pointer from TTEntrys 515 there is a corresponding backward pointer from the eclass[] 516 arrays. However, it doesn't rule out the possibility of other, 517 bogus pointers in the eclass[] arrays. So do those similarly: 518 scan through them and check the TTEntryies they point at point 519 back. */ 520 521 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 522 523 tte = &sec->tt[i]; 524 if (tte->status == Empty || tte->status == Deleted) { 525 if (tte->n_tte2ec != 0) 526 BAD("tte->n_eclasses nonzero for unused tte"); 527 continue; 528 } 529 530 vg_assert(tte->status == InUse); 531 532 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 533 BAD("tte->n_eclasses out of range(2)"); 534 535 for (j = 0; j < tte->n_tte2ec; j++) { 536 ec_num = tte->tte2ec_ec[j]; 537 if (ec_num < 0 || ec_num >= ECLASS_N) 538 BAD("tte->eclass[..] out of range"); 539 ec_idx = tte->tte2ec_ix[j]; 540 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 541 BAD("tte->ec_idx[..] out of range(2)"); 542 if (sec->ec2tte[ec_num][ec_idx] != i) 543 BAD("ec2tte does not point back to tte"); 544 } 545 } 546 547 return True; 548 549 bad: 550 if (whassup) 551 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 552 553 # if 0 554 VG_(printf)("eclass = %d\n", i); 555 VG_(printf)("tteno = %d\n", (Int)tteno); 556 switch (tte->status) { 557 case InUse: VG_(printf)("InUse\n"); break; 558 case Deleted: VG_(printf)("Deleted\n"); break; 559 case Empty: VG_(printf)("Empty\n"); break; 560 } 561 if (tte->status != Empty) { 562 for (k = 0; k < tte->vge.n_used; k++) 563 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 564 (Int)tte->vge.len[k]); 565 } 566 # endif 567 568 return False; 569 570 # undef BAD 571 } 572 573 574 /* Sanity check absolutely everything. True == check passed. */ 575 576 /* forwards */ 577 static Bool sanity_check_redir_tt_tc ( void ); 578 static Bool sanity_check_fastcache ( void ); 579 580 static Bool sanity_check_sector_search_order ( void ) 581 { 582 Int i, j, nListed; 583 /* assert the array is the right size */ 584 vg_assert(N_SECTORS == (sizeof(sector_search_order) 585 / sizeof(sector_search_order[0]))); 586 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */ 587 for (i = 0; i < N_SECTORS; i++) { 588 if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS) 589 break; 590 } 591 nListed = i; 592 for (/* */; i < N_SECTORS; i++) { 593 if (sector_search_order[i] != -1) 594 break; 595 } 596 if (i != N_SECTORS) 597 return False; 598 /* Check each sector number only appears once */ 599 for (i = 0; i < N_SECTORS; i++) { 600 if (sector_search_order[i] == -1) 601 continue; 602 for (j = i+1; j < N_SECTORS; j++) { 603 if (sector_search_order[j] == sector_search_order[i]) 604 return False; 605 } 606 } 607 /* Check that the number of listed sectors equals the number 608 in use, by counting nListed back down. */ 609 for (i = 0; i < N_SECTORS; i++) { 610 if (sectors[i].tc != NULL) 611 nListed--; 612 } 613 if (nListed != 0) 614 return False; 615 return True; 616 } 617 618 static Bool sanity_check_all_sectors ( void ) 619 { 620 Int sno; 621 Bool sane; 622 Sector* sec; 623 for (sno = 0; sno < N_SECTORS; sno++) { 624 sec = §ors[sno]; 625 if (sec->tc == NULL) 626 continue; 627 sane = sanity_check_eclasses_in_sector( sec ); 628 if (!sane) 629 return False; 630 } 631 if ( !sanity_check_redir_tt_tc() ) 632 return False; 633 if ( !sanity_check_fastcache() ) 634 return False; 635 if ( !sanity_check_sector_search_order() ) 636 return False; 637 return True; 638 } 639 640 641 642 /*-------------------------------------------------------------*/ 643 /*--- Add/find translations ---*/ 644 /*-------------------------------------------------------------*/ 645 646 static UInt vge_osize ( VexGuestExtents* vge ) 647 { 648 UInt i, n = 0; 649 for (i = 0; i < vge->n_used; i++) 650 n += (UInt)vge->len[i]; 651 return n; 652 } 653 654 static Bool isValidSector ( Int sector ) 655 { 656 if (sector < 0 || sector >= N_SECTORS) 657 return False; 658 return True; 659 } 660 661 static inline UInt HASH_TT ( Addr64 key ) 662 { 663 UInt kHi = (UInt)(key >> 32); 664 UInt kLo = (UInt)key; 665 UInt k32 = kHi ^ kLo; 666 UInt ror = 7; 667 if (ror > 0) 668 k32 = (k32 >> ror) | (k32 << (32-ror)); 669 return k32 % N_TTES_PER_SECTOR; 670 } 671 672 static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count ) 673 { 674 UInt cno = (UInt)VG_TT_FAST_HASH(key); 675 VG_(tt_fast)[cno].guest = (Addr)key; 676 VG_(tt_fast)[cno].host = (Addr)tcptr; 677 if (VG_(clo_profile_flags) > 0) 678 VG_(tt_fastN)[cno] = count; 679 n_fast_updates++; 680 /* This shouldn't fail. It should be assured by m_translate 681 which should reject any attempt to make translation of code 682 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 683 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 684 } 685 686 /* Invalidate the fast cache's counter array, VG_(tt_fastN). */ 687 static void invalidateFastNCache ( void ) 688 { 689 UInt j; 690 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 691 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 692 VG_(tt_fastN)[j+0] = NULL; 693 VG_(tt_fastN)[j+1] = NULL; 694 VG_(tt_fastN)[j+2] = NULL; 695 VG_(tt_fastN)[j+3] = NULL; 696 } 697 vg_assert(j == VG_TT_FAST_SIZE); 698 } 699 700 /* Invalidate the fast cache VG_(tt_fast). If profiling, also 701 invalidate the fast cache's counter array VG_(tt_fastN), otherwise 702 don't touch it. */ 703 static void invalidateFastCache ( void ) 704 { 705 UInt j; 706 /* This loop is popular enough to make it worth unrolling a 707 bit, at least on ppc32. */ 708 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 709 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 710 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 711 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 712 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 713 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 714 } 715 716 if (VG_(clo_profile_flags) > 0) 717 invalidateFastNCache(); 718 719 vg_assert(j == VG_TT_FAST_SIZE); 720 n_fast_flushes++; 721 } 722 723 static Bool sanity_check_fastcache ( void ) 724 { 725 UInt j; 726 if (0) VG_(printf)("sanity check fastcache\n"); 727 if (VG_(clo_profile_flags) > 0) { 728 /* profiling */ 729 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 730 if (VG_(tt_fastN)[j] == NULL 731 && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR) 732 return False; 733 if (VG_(tt_fastN)[j] != NULL 734 && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR) 735 return False; 736 } 737 } else { 738 /* not profiling */ 739 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 740 if (VG_(tt_fastN)[j] != NULL) 741 return False; 742 } 743 } 744 return True; 745 } 746 747 static void initialiseSector ( Int sno ) 748 { 749 Int i; 750 SysRes sres; 751 Sector* sec; 752 vg_assert(isValidSector(sno)); 753 754 { Bool sane = sanity_check_sector_search_order(); 755 vg_assert(sane); 756 } 757 sec = §ors[sno]; 758 759 if (sec->tc == NULL) { 760 761 /* Sector has never been used before. Need to allocate tt and 762 tc. */ 763 vg_assert(sec->tt == NULL); 764 vg_assert(sec->tc_next == NULL); 765 vg_assert(sec->tt_n_inuse == 0); 766 for (i = 0; i < ECLASS_N; i++) { 767 vg_assert(sec->ec2tte_size[i] == 0); 768 vg_assert(sec->ec2tte_used[i] == 0); 769 vg_assert(sec->ec2tte[i] == NULL); 770 } 771 772 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 773 774 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 775 if (sr_isError(sres)) { 776 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 777 8 * tc_sector_szQ ); 778 /*NOTREACHED*/ 779 } 780 sec->tc = (ULong*)(AddrH)sr_Res(sres); 781 782 sres = VG_(am_mmap_anon_float_valgrind) 783 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 784 if (sr_isError(sres)) { 785 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 786 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 787 /*NOTREACHED*/ 788 } 789 sec->tt = (TTEntry*)(AddrH)sr_Res(sres); 790 791 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 792 sec->tt[i].status = Empty; 793 sec->tt[i].n_tte2ec = 0; 794 } 795 796 /* Add an entry in the sector_search_order */ 797 for (i = 0; i < N_SECTORS; i++) { 798 if (sector_search_order[i] == -1) 799 break; 800 } 801 vg_assert(i >= 0 && i < N_SECTORS); 802 sector_search_order[i] = sno; 803 804 if (VG_(clo_verbosity) > 2) 805 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 806 807 } else { 808 809 /* Sector has been used before. Dump the old contents. */ 810 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 811 vg_assert(sec->tt != NULL); 812 vg_assert(sec->tc_next != NULL); 813 n_dump_count += sec->tt_n_inuse; 814 815 /* Visit each just-about-to-be-abandoned translation. */ 816 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 817 if (sec->tt[i].status == InUse) { 818 vg_assert(sec->tt[i].n_tte2ec >= 1); 819 vg_assert(sec->tt[i].n_tte2ec <= 3); 820 n_dump_osize += vge_osize(&sec->tt[i].vge); 821 /* Tell the tool too. */ 822 if (VG_(needs).superblock_discards) { 823 VG_TDICT_CALL( tool_discard_superblock_info, 824 sec->tt[i].entry, 825 sec->tt[i].vge ); 826 } 827 } else { 828 vg_assert(sec->tt[i].n_tte2ec == 0); 829 } 830 sec->tt[i].status = Empty; 831 sec->tt[i].n_tte2ec = 0; 832 } 833 834 /* Free up the eclass structures. */ 835 for (i = 0; i < ECLASS_N; i++) { 836 if (sec->ec2tte_size[i] == 0) { 837 vg_assert(sec->ec2tte_used[i] == 0); 838 vg_assert(sec->ec2tte[i] == NULL); 839 } else { 840 vg_assert(sec->ec2tte[i] != NULL); 841 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]); 842 sec->ec2tte[i] = NULL; 843 sec->ec2tte_size[i] = 0; 844 sec->ec2tte_used[i] = 0; 845 } 846 } 847 848 /* Sanity check: ensure it is already in 849 sector_search_order[]. */ 850 for (i = 0; i < N_SECTORS; i++) { 851 if (sector_search_order[i] == sno) 852 break; 853 } 854 vg_assert(i >= 0 && i < N_SECTORS); 855 856 if (VG_(clo_verbosity) > 2) 857 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 858 } 859 860 sec->tc_next = sec->tc; 861 sec->tt_n_inuse = 0; 862 863 invalidateFastCache(); 864 865 { Bool sane = sanity_check_sector_search_order(); 866 vg_assert(sane); 867 } 868 } 869 870 static void invalidate_icache ( void *ptr, Int nbytes ) 871 { 872 # if defined(VGA_ppc32) || defined(VGA_ppc64) 873 Addr startaddr = (Addr) ptr; 874 Addr endaddr = startaddr + nbytes; 875 Addr cls; 876 Addr addr; 877 VexArchInfo vai; 878 879 if (nbytes == 0) return; 880 vg_assert(nbytes > 0); 881 882 VG_(machine_get_VexArchInfo)( NULL, &vai ); 883 cls = vai.ppc_cache_line_szB; 884 885 /* Stay sane .. */ 886 vg_assert(cls == 32 || cls == 64 || cls == 128); 887 888 startaddr &= ~(cls - 1); 889 for (addr = startaddr; addr < endaddr; addr += cls) { 890 __asm__ __volatile__("dcbst 0,%0" : : "r" (addr)); 891 } 892 __asm__ __volatile__("sync"); 893 for (addr = startaddr; addr < endaddr; addr += cls) { 894 __asm__ __volatile__("icbi 0,%0" : : "r" (addr)); 895 } 896 __asm__ __volatile__("sync; isync"); 897 898 # elif defined(VGA_x86) 899 /* no need to do anything, hardware provides coherence */ 900 901 # elif defined(VGA_amd64) 902 /* no need to do anything, hardware provides coherence */ 903 904 # elif defined(VGP_arm_linux) 905 /* ARM cache flushes are privileged, so we must defer to the kernel. */ 906 Addr startaddr = (Addr) ptr; 907 Addr endaddr = startaddr + nbytes; 908 VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr); 909 910 # else 911 # error "Unknown ARCH" 912 # endif 913 } 914 915 916 /* Add a translation of vge to TT/TC. The translation is temporarily 917 in code[0 .. code_len-1]. 918 919 pre: youngest_sector points to a valid (although possibly full) 920 sector. 921 */ 922 void VG_(add_to_transtab)( VexGuestExtents* vge, 923 Addr64 entry, 924 AddrH code, 925 UInt code_len, 926 Bool is_self_checking ) 927 { 928 Int tcAvailQ, reqdQ, y, i; 929 ULong *tcptr, *tcptr2; 930 UChar* srcP; 931 UChar* dstP; 932 933 vg_assert(init_done); 934 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 935 936 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 937 vg_assert(code_len > 0 && code_len < 60000); 938 939 if (0) 940 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 941 entry, code_len); 942 943 n_in_count++; 944 n_in_tsize += code_len; 945 n_in_osize += vge_osize(vge); 946 if (is_self_checking) 947 n_in_sc_count++; 948 949 y = youngest_sector; 950 vg_assert(isValidSector(y)); 951 952 if (sectors[y].tc == NULL) 953 initialiseSector(y); 954 955 /* Try putting the translation in this sector. */ 956 reqdQ = (code_len + 7) >> 3; 957 958 /* Will it fit in tc? */ 959 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 960 - ((ULong*)(sectors[y].tc_next)); 961 vg_assert(tcAvailQ >= 0); 962 vg_assert(tcAvailQ <= tc_sector_szQ); 963 964 if (tcAvailQ < reqdQ 965 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 966 /* No. So move on to the next sector. Either it's never been 967 used before, in which case it will get its tt/tc allocated 968 now, or it has been used before, in which case it is set to be 969 empty, hence throwing out the oldest sector. */ 970 vg_assert(tc_sector_szQ > 0); 971 VG_(debugLog)(1,"transtab", 972 "declare sector %d full " 973 "(TT loading %2d%%, TC loading %2d%%)\n", 974 y, 975 (100 * sectors[y].tt_n_inuse) 976 / N_TTES_PER_SECTOR, 977 (100 * (tc_sector_szQ - tcAvailQ)) 978 / tc_sector_szQ); 979 youngest_sector++; 980 if (youngest_sector >= N_SECTORS) 981 youngest_sector = 0; 982 y = youngest_sector; 983 initialiseSector(y); 984 } 985 986 /* Be sure ... */ 987 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 988 - ((ULong*)(sectors[y].tc_next)); 989 vg_assert(tcAvailQ >= 0); 990 vg_assert(tcAvailQ <= tc_sector_szQ); 991 vg_assert(tcAvailQ >= reqdQ); 992 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 993 vg_assert(sectors[y].tt_n_inuse >= 0); 994 995 /* Copy into tc. */ 996 tcptr = sectors[y].tc_next; 997 vg_assert(tcptr >= §ors[y].tc[0]); 998 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 999 1000 dstP = (UChar*)tcptr; 1001 srcP = (UChar*)code; 1002 for (i = 0; i < code_len; i++) 1003 dstP[i] = srcP[i]; 1004 sectors[y].tc_next += reqdQ; 1005 sectors[y].tt_n_inuse++; 1006 1007 invalidate_icache( dstP, code_len ); 1008 1009 /* more paranoia */ 1010 tcptr2 = sectors[y].tc_next; 1011 vg_assert(tcptr2 >= §ors[y].tc[0]); 1012 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1013 1014 /* Find an empty tt slot, and use it. There must be such a slot 1015 since tt is never allowed to get completely full. */ 1016 i = HASH_TT(entry); 1017 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 1018 while (True) { 1019 if (sectors[y].tt[i].status == Empty 1020 || sectors[y].tt[i].status == Deleted) 1021 break; 1022 i++; 1023 if (i >= N_TTES_PER_SECTOR) 1024 i = 0; 1025 } 1026 1027 sectors[y].tt[i].status = InUse; 1028 sectors[y].tt[i].tcptr = tcptr; 1029 sectors[y].tt[i].count = 0; 1030 sectors[y].tt[i].weight = 1; 1031 sectors[y].tt[i].vge = *vge; 1032 sectors[y].tt[i].entry = entry; 1033 1034 /* Update the fast-cache. */ 1035 setFastCacheEntry( entry, tcptr, §ors[y].tt[i].count ); 1036 1037 /* Note the eclass numbers for this translation. */ 1038 upd_eclasses_after_add( §ors[y], i ); 1039 } 1040 1041 1042 /* Search for the translation of the given guest address. If 1043 requested, a successful search can also cause the fast-caches to be 1044 updated. 1045 */ 1046 Bool VG_(search_transtab) ( /*OUT*/AddrH* result, 1047 Addr64 guest_addr, 1048 Bool upd_cache ) 1049 { 1050 Int i, j, k, kstart, sno; 1051 1052 vg_assert(init_done); 1053 /* Find the initial probe point just once. It will be the same in 1054 all sectors and avoids multiple expensive % operations. */ 1055 n_full_lookups++; 1056 k = -1; 1057 kstart = HASH_TT(guest_addr); 1058 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 1059 1060 /* Search in all the sectors,using sector_search_order[] as a 1061 heuristic guide as to what order to visit the sectors. */ 1062 for (i = 0; i < N_SECTORS; i++) { 1063 1064 sno = sector_search_order[i]; 1065 if (UNLIKELY(sno == -1)) 1066 return False; /* run out of sectors to search */ 1067 1068 k = kstart; 1069 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 1070 n_lookup_probes++; 1071 if (sectors[sno].tt[k].status == InUse 1072 && sectors[sno].tt[k].entry == guest_addr) { 1073 /* found it */ 1074 if (upd_cache) 1075 setFastCacheEntry( 1076 guest_addr, sectors[sno].tt[k].tcptr, 1077 §ors[sno].tt[k].count ); 1078 if (result) 1079 *result = (AddrH)sectors[sno].tt[k].tcptr; 1080 /* pull this one one step closer to the front. For large 1081 apps this more or less halves the number of required 1082 probes. */ 1083 if (i > 0) { 1084 Int tmp = sector_search_order[i-1]; 1085 sector_search_order[i-1] = sector_search_order[i]; 1086 sector_search_order[i] = tmp; 1087 } 1088 return True; 1089 } 1090 if (sectors[sno].tt[k].status == Empty) 1091 break; /* not found in this sector */ 1092 k++; 1093 if (k == N_TTES_PER_SECTOR) 1094 k = 0; 1095 } 1096 1097 /* If we fall off the end, all entries are InUse and not 1098 matching, or Deleted. In any case we did not find it in this 1099 sector. */ 1100 } 1101 1102 /* Not found in any sector. */ 1103 return False; 1104 } 1105 1106 1107 /*-------------------------------------------------------------*/ 1108 /*--- Delete translations. ---*/ 1109 /*-------------------------------------------------------------*/ 1110 1111 /* forward */ 1112 static void unredir_discard_translations( Addr64, ULong ); 1113 1114 /* Stuff for deleting translations which intersect with a given 1115 address range. Unfortunately, to make this run at a reasonable 1116 speed, it is complex. */ 1117 1118 static inline 1119 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 1120 { 1121 Addr64 e1 = s1 + r1 - 1ULL; 1122 Addr64 e2 = s2 + r2 - 1ULL; 1123 if (e1 < s2 || e2 < s1) 1124 return False; 1125 return True; 1126 } 1127 1128 static inline 1129 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 1130 { 1131 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 1132 return True; 1133 if (vge->n_used < 2) 1134 return False; 1135 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 1136 return True; 1137 if (vge->n_used < 3) 1138 return False; 1139 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 1140 return True; 1141 return False; 1142 } 1143 1144 1145 /* Delete a tt entry, and update all the eclass data accordingly. */ 1146 1147 static void delete_tte ( /*MOD*/Sector* sec, Int tteno ) 1148 { 1149 Int i, ec_num, ec_idx; 1150 TTEntry* tte; 1151 1152 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1153 tte = &sec->tt[tteno]; 1154 vg_assert(tte->status == InUse); 1155 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1156 1157 /* Deal with the ec-to-tte links first. */ 1158 for (i = 0; i < tte->n_tte2ec; i++) { 1159 ec_num = (Int)tte->tte2ec_ec[i]; 1160 ec_idx = tte->tte2ec_ix[i]; 1161 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1162 vg_assert(ec_idx >= 0); 1163 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1164 /* Assert that the two links point at each other. */ 1165 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1166 /* "delete" the pointer back to here. */ 1167 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1168 } 1169 1170 /* Now fix up this TTEntry. */ 1171 tte->status = Deleted; 1172 tte->n_tte2ec = 0; 1173 1174 /* Stats .. */ 1175 sec->tt_n_inuse--; 1176 n_disc_count++; 1177 n_disc_osize += vge_osize(&tte->vge); 1178 1179 /* Tell the tool too. */ 1180 if (VG_(needs).superblock_discards) { 1181 VG_TDICT_CALL( tool_discard_superblock_info, 1182 tte->entry, 1183 tte->vge ); 1184 } 1185 } 1186 1187 1188 /* Delete translations from sec which intersect specified range, but 1189 only consider translations in the specified eclass. */ 1190 1191 static 1192 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, 1193 Addr64 guest_start, ULong range, 1194 Int ec ) 1195 { 1196 Int i; 1197 UShort tteno; 1198 Bool anyDeld = False; 1199 TTEntry* tte; 1200 1201 vg_assert(ec >= 0 && ec < ECLASS_N); 1202 1203 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1204 1205 tteno = sec->ec2tte[ec][i]; 1206 if (tteno == EC2TTE_DELETED) { 1207 /* already deleted */ 1208 continue; 1209 } 1210 1211 vg_assert(tteno < N_TTES_PER_SECTOR); 1212 1213 tte = &sec->tt[tteno]; 1214 vg_assert(tte->status == InUse); 1215 1216 if (overlaps( guest_start, range, &tte->vge )) { 1217 anyDeld = True; 1218 delete_tte( sec, (Int)tteno ); 1219 } 1220 1221 } 1222 1223 return anyDeld; 1224 } 1225 1226 1227 /* Delete translations from sec which intersect specified range, the 1228 slow way, by inspecting all translations in sec. */ 1229 1230 static 1231 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, 1232 Addr64 guest_start, ULong range ) 1233 { 1234 Int i; 1235 Bool anyDeld = False; 1236 1237 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1238 if (sec->tt[i].status == InUse 1239 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1240 anyDeld = True; 1241 delete_tte( sec, i ); 1242 } 1243 } 1244 1245 return anyDeld; 1246 } 1247 1248 1249 void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1250 HChar* who ) 1251 { 1252 Sector* sec; 1253 Int sno, ec; 1254 Bool anyDeleted = False; 1255 1256 vg_assert(init_done); 1257 1258 VG_(debugLog)(2, "transtab", 1259 "discard_translations(0x%llx, %lld) req by %s\n", 1260 guest_start, range, who ); 1261 1262 /* Pre-deletion sanity check */ 1263 if (VG_(clo_sanity_level >= 4)) { 1264 Bool sane = sanity_check_all_sectors(); 1265 vg_assert(sane); 1266 } 1267 1268 if (range == 0) 1269 return; 1270 1271 /* There are two different ways to do this. 1272 1273 If the range fits within a single address-range equivalence 1274 class, as will be the case for a cache line sized invalidation, 1275 then we only have to inspect the set of translations listed in 1276 that equivalence class, and also in the "sin-bin" equivalence 1277 class ECLASS_MISC. 1278 1279 Otherwise, the invalidation is of a larger range and probably 1280 results from munmap. In this case it's (probably!) faster just 1281 to inspect all translations, dump those we don't want, and 1282 regenerate the equivalence class information (since modifying it 1283 in-situ is even more expensive). 1284 */ 1285 1286 /* First off, figure out if the range falls within a single class, 1287 and if so which one. */ 1288 1289 ec = ECLASS_MISC; 1290 if (range < (1ULL << ECLASS_SHIFT)) 1291 ec = range_to_eclass( guest_start, (UInt)range ); 1292 1293 /* if ec is ECLASS_MISC then we aren't looking at just a single 1294 class, so use the slow scheme. Else use the fast scheme, 1295 examining 'ec' and ECLASS_MISC. */ 1296 1297 if (ec != ECLASS_MISC) { 1298 1299 VG_(debugLog)(2, "transtab", 1300 " FAST, ec = %d\n", ec); 1301 1302 /* Fast scheme */ 1303 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1304 1305 for (sno = 0; sno < N_SECTORS; sno++) { 1306 sec = §ors[sno]; 1307 if (sec->tc == NULL) 1308 continue; 1309 anyDeleted |= delete_translations_in_sector_eclass( 1310 sec, guest_start, range, ec ); 1311 anyDeleted |= delete_translations_in_sector_eclass( 1312 sec, guest_start, range, ECLASS_MISC ); 1313 } 1314 1315 } else { 1316 1317 /* slow scheme */ 1318 1319 VG_(debugLog)(2, "transtab", 1320 " SLOW, ec = %d\n", ec); 1321 1322 for (sno = 0; sno < N_SECTORS; sno++) { 1323 sec = §ors[sno]; 1324 if (sec->tc == NULL) 1325 continue; 1326 anyDeleted |= delete_translations_in_sector( 1327 sec, guest_start, range ); 1328 } 1329 1330 } 1331 1332 if (anyDeleted) 1333 invalidateFastCache(); 1334 1335 /* don't forget the no-redir cache */ 1336 unredir_discard_translations( guest_start, range ); 1337 1338 /* Post-deletion sanity check */ 1339 if (VG_(clo_sanity_level >= 4)) { 1340 Int i; 1341 TTEntry* tte; 1342 Bool sane = sanity_check_all_sectors(); 1343 vg_assert(sane); 1344 /* But now, also check the requested address range isn't 1345 present anywhere. */ 1346 for (sno = 0; sno < N_SECTORS; sno++) { 1347 sec = §ors[sno]; 1348 if (sec->tc == NULL) 1349 continue; 1350 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1351 tte = &sec->tt[i]; 1352 if (tte->status != InUse) 1353 continue; 1354 vg_assert(!overlaps( guest_start, range, &tte->vge )); 1355 } 1356 } 1357 } 1358 } 1359 1360 1361 /*------------------------------------------------------------*/ 1362 /*--- AUXILIARY: the unredirected TT/TC ---*/ 1363 /*------------------------------------------------------------*/ 1364 1365 /* A very simple translation cache which holds a small number of 1366 unredirected translations. This is completely independent of the 1367 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 1368 both structures are simply dumped and we start over. 1369 1370 Since these translations are unredirected, the search key is (by 1371 definition) the first address entry in the .vge field. */ 1372 1373 /* Sized to hold 500 translations of average size 1000 bytes. */ 1374 1375 #define UNREDIR_SZB 1000 1376 1377 #define N_UNREDIR_TT 500 1378 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 1379 1380 typedef 1381 struct { 1382 VexGuestExtents vge; 1383 Addr hcode; 1384 Bool inUse; 1385 } 1386 UTCEntry; 1387 1388 /* We just allocate forwards in _tc, never deleting. */ 1389 static ULong *unredir_tc; 1390 static Int unredir_tc_used = N_UNREDIR_TCQ; 1391 1392 /* Slots in _tt can come into use and out again (.inUse). 1393 Nevertheless _tt_highwater is maintained so that invalidations 1394 don't have to scan all the slots when only a few are in use. 1395 _tt_highwater holds the index of the highest ever allocated 1396 slot. */ 1397 static UTCEntry unredir_tt[N_UNREDIR_TT]; 1398 static Int unredir_tt_highwater; 1399 1400 1401 static void init_unredir_tt_tc ( void ) 1402 { 1403 Int i; 1404 if (unredir_tc == NULL) { 1405 SysRes sres = VG_(am_mmap_anon_float_valgrind) 1406 ( N_UNREDIR_TT * UNREDIR_SZB ); 1407 if (sr_isError(sres)) { 1408 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 1409 N_UNREDIR_TT * UNREDIR_SZB); 1410 /*NOTREACHED*/ 1411 } 1412 unredir_tc = (ULong *)(AddrH)sr_Res(sres); 1413 } 1414 unredir_tc_used = 0; 1415 for (i = 0; i < N_UNREDIR_TT; i++) 1416 unredir_tt[i].inUse = False; 1417 unredir_tt_highwater = -1; 1418 } 1419 1420 /* Do a sanity check; return False on failure. */ 1421 static Bool sanity_check_redir_tt_tc ( void ) 1422 { 1423 Int i; 1424 if (unredir_tt_highwater < -1) return False; 1425 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 1426 1427 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 1428 if (unredir_tt[i].inUse) 1429 return False; 1430 1431 if (unredir_tc_used < 0) return False; 1432 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 1433 1434 return True; 1435 } 1436 1437 1438 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation 1439 is temporarily in code[0 .. code_len-1]. 1440 */ 1441 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 1442 Addr64 entry, 1443 AddrH code, 1444 UInt code_len ) 1445 { 1446 Int i, j, code_szQ; 1447 HChar *srcP, *dstP; 1448 1449 vg_assert(sanity_check_redir_tt_tc()); 1450 1451 /* This is the whole point: it's not redirected! */ 1452 vg_assert(entry == vge->base[0]); 1453 1454 /* How many unredir_tt slots are needed */ 1455 code_szQ = (code_len + 7) / 8; 1456 1457 /* Look for an empty unredir_tc slot */ 1458 for (i = 0; i < N_UNREDIR_TT; i++) 1459 if (!unredir_tt[i].inUse) 1460 break; 1461 1462 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 1463 /* It's full; dump everything we currently have */ 1464 init_unredir_tt_tc(); 1465 i = 0; 1466 } 1467 1468 vg_assert(unredir_tc_used >= 0); 1469 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1470 vg_assert(code_szQ > 0); 1471 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 1472 vg_assert(i >= 0 && i < N_UNREDIR_TT); 1473 vg_assert(unredir_tt[i].inUse == False); 1474 1475 if (i > unredir_tt_highwater) 1476 unredir_tt_highwater = i; 1477 1478 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 1479 srcP = (HChar*)code; 1480 for (j = 0; j < code_len; j++) 1481 dstP[j] = srcP[j]; 1482 1483 invalidate_icache( dstP, code_len ); 1484 1485 unredir_tt[i].inUse = True; 1486 unredir_tt[i].vge = *vge; 1487 unredir_tt[i].hcode = (Addr)dstP; 1488 1489 unredir_tc_used += code_szQ; 1490 vg_assert(unredir_tc_used >= 0); 1491 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1492 1493 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 1494 } 1495 1496 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 1497 Addr64 guest_addr ) 1498 { 1499 Int i; 1500 for (i = 0; i < N_UNREDIR_TT; i++) { 1501 if (!unredir_tt[i].inUse) 1502 continue; 1503 if (unredir_tt[i].vge.base[0] == guest_addr) { 1504 *result = (AddrH)unredir_tt[i].hcode; 1505 return True; 1506 } 1507 } 1508 return False; 1509 } 1510 1511 static void unredir_discard_translations( Addr64 guest_start, ULong range ) 1512 { 1513 Int i; 1514 1515 vg_assert(sanity_check_redir_tt_tc()); 1516 1517 for (i = 0; i <= unredir_tt_highwater; i++) { 1518 if (unredir_tt[i].inUse 1519 && overlaps( guest_start, range, &unredir_tt[i].vge)) 1520 unredir_tt[i].inUse = False; 1521 } 1522 } 1523 1524 1525 /*------------------------------------------------------------*/ 1526 /*--- Initialisation. ---*/ 1527 /*------------------------------------------------------------*/ 1528 1529 void VG_(init_tt_tc) ( void ) 1530 { 1531 Int i, j, avg_codeszQ; 1532 1533 vg_assert(!init_done); 1534 init_done = True; 1535 1536 /* Otherwise lots of things go wrong... */ 1537 vg_assert(sizeof(ULong) == 8); 1538 vg_assert(sizeof(Addr64) == 8); 1539 /* check fast cache entries really are 2 words long */ 1540 vg_assert(sizeof(Addr) == sizeof(void*)); 1541 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 1542 /* check fast cache entries are packed back-to-back with no spaces */ 1543 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 1544 /* check fast cache is aligned as we requested. Not fatal if it 1545 isn't, but we might as well make sure. */ 1546 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 1547 1548 if (VG_(clo_verbosity) > 2) 1549 VG_(message)(Vg_DebugMsg, 1550 "TT/TC: VG_(init_tt_tc) " 1551 "(startup of code management)\n"); 1552 1553 /* Figure out how big each tc area should be. */ 1554 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 1555 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 1556 1557 /* Ensure the calculated value is not way crazy. */ 1558 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 1559 vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE); 1560 1561 /* Initialise the sectors */ 1562 youngest_sector = 0; 1563 for (i = 0; i < N_SECTORS; i++) { 1564 sectors[i].tc = NULL; 1565 sectors[i].tt = NULL; 1566 sectors[i].tc_next = NULL; 1567 sectors[i].tt_n_inuse = 0; 1568 for (j = 0; j < ECLASS_N; j++) { 1569 sectors[i].ec2tte_size[j] = 0; 1570 sectors[i].ec2tte_used[j] = 0; 1571 sectors[i].ec2tte[j] = NULL; 1572 } 1573 } 1574 1575 /* Initialise the sector_search_order hint table. */ 1576 for (i = 0; i < N_SECTORS; i++) 1577 sector_search_order[i] = -1; 1578 1579 /* Initialise the fast caches. If not profiling (the usual case), 1580 we have to explicitly invalidate the fastN cache as 1581 invalidateFastCache() won't do that for us. */ 1582 invalidateFastCache(); 1583 if (VG_(clo_profile_flags) == 0) 1584 invalidateFastNCache(); 1585 1586 /* and the unredir tt/tc */ 1587 init_unredir_tt_tc(); 1588 1589 if (VG_(clo_verbosity) > 2) { 1590 VG_(message)(Vg_DebugMsg, 1591 "TT/TC: cache: %d sectors of %d bytes each = %d total\n", 1592 N_SECTORS, 8 * tc_sector_szQ, 1593 N_SECTORS * 8 * tc_sector_szQ ); 1594 VG_(message)(Vg_DebugMsg, 1595 "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n", 1596 N_SECTORS * N_TTES_PER_SECTOR, 1597 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1598 SECTOR_TT_LIMIT_PERCENT ); 1599 } 1600 1601 VG_(debugLog)(2, "transtab", 1602 "cache: %d sectors of %d bytes each = %d total\n", 1603 N_SECTORS, 8 * tc_sector_szQ, 1604 N_SECTORS * 8 * tc_sector_szQ ); 1605 VG_(debugLog)(2, "transtab", 1606 "table: %d total entries, max occupancy %d (%d%%)\n", 1607 N_SECTORS * N_TTES_PER_SECTOR, 1608 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1609 SECTOR_TT_LIMIT_PERCENT ); 1610 } 1611 1612 1613 /*------------------------------------------------------------*/ 1614 /*--- Printing out statistics. ---*/ 1615 /*------------------------------------------------------------*/ 1616 1617 static ULong safe_idiv( ULong a, ULong b ) 1618 { 1619 return (b == 0 ? 0 : a / b); 1620 } 1621 1622 UInt VG_(get_bbs_translated) ( void ) 1623 { 1624 return n_in_count; 1625 } 1626 1627 void VG_(print_tt_tc_stats) ( void ) 1628 { 1629 VG_(message)(Vg_DebugMsg, 1630 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 1631 n_full_lookups, n_lookup_probes ); 1632 VG_(message)(Vg_DebugMsg, 1633 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 1634 n_fast_updates, n_fast_flushes ); 1635 1636 VG_(message)(Vg_DebugMsg, 1637 " transtab: new %'lld " 1638 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n", 1639 n_in_count, n_in_osize, n_in_tsize, 1640 safe_idiv(10*n_in_tsize, n_in_osize), 1641 n_in_sc_count); 1642 VG_(message)(Vg_DebugMsg, 1643 " transtab: dumped %'llu (%'llu -> ?" "?)\n", 1644 n_dump_count, n_dump_osize ); 1645 VG_(message)(Vg_DebugMsg, 1646 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 1647 n_disc_count, n_disc_osize ); 1648 1649 if (0) { 1650 Int i; 1651 VG_(printf)("\n"); 1652 for (i = 0; i < ECLASS_N; i++) { 1653 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 1654 if (i % 16 == 15) 1655 VG_(printf)("\n"); 1656 } 1657 VG_(printf)("\n\n"); 1658 } 1659 } 1660 1661 /*------------------------------------------------------------*/ 1662 /*--- Printing out of profiling results. ---*/ 1663 /*------------------------------------------------------------*/ 1664 1665 static ULong score ( TTEntry* tte ) 1666 { 1667 return ((ULong)tte->weight) * ((ULong)tte->count); 1668 } 1669 1670 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 1671 { 1672 Int sno, i, r, s; 1673 ULong score_total; 1674 1675 /* First, compute the total weighted count, and find the top N 1676 ttes. tops contains pointers to the most-used n_tops blocks, in 1677 descending order (viz, tops[0] is the highest scorer). */ 1678 for (i = 0; i < n_tops; i++) { 1679 tops[i].addr = 0; 1680 tops[i].score = 0; 1681 } 1682 1683 score_total = 0; 1684 1685 for (sno = 0; sno < N_SECTORS; sno++) { 1686 if (sectors[sno].tc == NULL) 1687 continue; 1688 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1689 if (sectors[sno].tt[i].status != InUse) 1690 continue; 1691 score_total += score(§ors[sno].tt[i]); 1692 /* Find the rank for sectors[sno].tt[i]. */ 1693 r = n_tops-1; 1694 while (True) { 1695 if (r == -1) 1696 break; 1697 if (tops[r].addr == 0) { 1698 r--; 1699 continue; 1700 } 1701 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 1702 r--; 1703 continue; 1704 } 1705 break; 1706 } 1707 r++; 1708 vg_assert(r >= 0 && r <= n_tops); 1709 /* This bb should be placed at r, and bbs above it shifted 1710 upwards one slot. */ 1711 if (r < n_tops) { 1712 for (s = n_tops-1; s > r; s--) 1713 tops[s] = tops[s-1]; 1714 tops[r].addr = sectors[sno].tt[i].entry; 1715 tops[r].score = score( §ors[sno].tt[i] ); 1716 } 1717 } 1718 } 1719 1720 return score_total; 1721 } 1722 1723 /*--------------------------------------------------------------------*/ 1724 /*--- end ---*/ 1725 /*--------------------------------------------------------------------*/ 1726