1 2 /*--------------------------------------------------------------------*/ 3 /*--- Management of the translation table and cache. ---*/ 4 /*--- m_transtab.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2011 Julian Seward 12 jseward (at) acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_core_basics.h" 33 #include "pub_core_debuglog.h" 34 #include "pub_core_machine.h" // For VG(machine_get_VexArchInfo) 35 #include "pub_core_libcbase.h" 36 #include "pub_core_libcassert.h" 37 #include "pub_core_libcprint.h" 38 #include "pub_core_options.h" 39 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 40 #include "pub_core_transtab.h" 41 #include "pub_core_aspacemgr.h" 42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 43 44 // JRS FIXME get rid of this somehow 45 #if defined(VGP_arm_linux) 46 # include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush 47 # include "pub_core_syscall.h" // VG_(do_syscallN) 48 #endif 49 50 51 /* #define DEBUG_TRANSTAB */ 52 53 54 /*-------------------------------------------------------------*/ 55 /*--- Management of the FIFO-based translation table+cache. ---*/ 56 /*-------------------------------------------------------------*/ 57 58 /*------------------ CONSTANTS ------------------*/ 59 60 /* Number of sectors the TC is divided into. If you need a larger 61 overall translation cache, increase this value. */ 62 #define N_SECTORS 8 63 64 /* Number of TC entries in each sector. This needs to be a prime 65 number to work properly, it must be <= 65535 (so that a TT index 66 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 67 'deleted') and it is strongly recommended not to change this. 68 65521 is the largest prime <= 65535. */ 69 #define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521 70 71 /* Because each sector contains a hash table of TTEntries, we need to 72 specify the maximum allowable loading, after which the sector is 73 deemed full. */ 74 #define SECTOR_TT_LIMIT_PERCENT 65 75 76 /* The sector is deemed full when this many entries are in it. */ 77 #define N_TTES_PER_SECTOR_USABLE \ 78 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 79 80 /* Equivalence classes for fast address range deletion. There are 1 + 81 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 82 address range which does not fall cleanly within any specific bin. 83 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 84 #define ECLASS_SHIFT 11 85 #define ECLASS_WIDTH 8 86 #define ECLASS_MISC (1 << ECLASS_WIDTH) 87 #define ECLASS_N (1 + ECLASS_MISC) 88 89 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 90 91 92 /*------------------ TYPES ------------------*/ 93 94 /* A translation-table entry. This indicates precisely which areas of 95 guest code are included in the translation, and contains all other 96 auxiliary info too. */ 97 typedef 98 struct { 99 /* Profiling only: the count and weight (arbitrary meaning) for 100 this translation. Weight is a property of the translation 101 itself and computed once when the translation is created. 102 Count is an entry count for the translation and is 103 incremented by 1 every time the translation is used, if we 104 are profiling. */ 105 UInt count; 106 UShort weight; 107 108 /* Status of the slot. Note, we need to be able to do lazy 109 deletion, hence the Deleted state. */ 110 enum { InUse, Deleted, Empty } status; 111 112 /* 64-bit aligned pointer to one or more 64-bit words containing 113 the corresponding host code (must be in the same sector!) 114 This is a pointer into the sector's tc (code) area. */ 115 ULong* tcptr; 116 117 /* This is the original guest address that purportedly is the 118 entry point of the translation. You might think that .entry 119 should be the same as .vge->base[0], and most of the time it 120 is. However, when doing redirections, that is not the case. 121 .vge must always correctly describe the guest code sections 122 from which this translation was made. However, .entry may or 123 may not be a lie, depending on whether or not we're doing 124 redirection. */ 125 Addr64 entry; 126 127 /* This structure describes precisely what ranges of guest code 128 the translation covers, so we can decide whether or not to 129 delete it when translations of a given address range are 130 invalidated. */ 131 VexGuestExtents vge; 132 133 /* Address range summary info: these are pointers back to 134 eclass[] entries in the containing Sector. Those entries in 135 turn point back here -- the two structures are mutually 136 redundant but both necessary to make fast deletions work. 137 The eclass info is similar to, and derived from, this entry's 138 'vge' field, but it is not the same */ 139 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 140 UShort tte2ec_ec[3]; // for each, the eclass # 141 UInt tte2ec_ix[3]; // and the index within the eclass. 142 // for i in 0 .. n_tte2ec-1 143 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 144 // should be the index 145 // of this TTEntry in the containing Sector's tt array. 146 } 147 TTEntry; 148 149 150 /* Finally, a sector itself. Each sector contains an array of 151 TCEntries, which hold code, and an array of TTEntries, containing 152 all required administrative info. Profiling is supported using the 153 TTEntry .count and .weight fields, if required. Each sector is 154 independent in that no cross-sector references are allowed. 155 156 If the sector is not in use, all three pointers are NULL and 157 tt_n_inuse is zero. 158 */ 159 typedef 160 struct { 161 /* The TCEntry area. Size of this depends on the average 162 translation size. We try and size it so it becomes full 163 precisely when this sector's translation table (tt) reaches 164 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 165 ULong* tc; 166 167 /* The TTEntry array. This is a fixed size, always containing 168 exactly N_TTES_PER_SECTOR entries. */ 169 TTEntry* tt; 170 171 /* This points to the current allocation point in tc. */ 172 ULong* tc_next; 173 174 /* The count of tt entries with state InUse. */ 175 Int tt_n_inuse; 176 177 /* Expandable arrays of tt indices for each of the ECLASS_N 178 address range equivalence classes. These hold indices into 179 the containing sector's tt array, which in turn should point 180 back here. */ 181 Int ec2tte_size[ECLASS_N]; 182 Int ec2tte_used[ECLASS_N]; 183 UShort* ec2tte[ECLASS_N]; 184 } 185 Sector; 186 187 188 /*------------------ DECLS ------------------*/ 189 190 /* The root data structure is an array of sectors. The index of the 191 youngest sector is recorded, and new translations are put into that 192 sector. When it fills up, we move along to the next sector and 193 start to fill that up, wrapping around at the end of the array. 194 That way, once all N_TC_SECTORS have been bought into use for the 195 first time, and are full, we then re-use the oldest sector, 196 endlessly. 197 198 When running, youngest sector should be between >= 0 and < 199 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 200 not yet initialised. 201 */ 202 static Sector sectors[N_SECTORS]; 203 static Int youngest_sector = -1; 204 205 /* The number of ULongs in each TCEntry area. This is computed once 206 at startup and does not change. */ 207 static Int tc_sector_szQ; 208 209 210 /* A list of sector numbers, in the order which they should be 211 searched to find translations. This is an optimisation to be used 212 when searching for translations and should not affect 213 correctness. -1 denotes "no entry". */ 214 static Int sector_search_order[N_SECTORS]; 215 216 217 /* Fast helper for the TC. A direct-mapped cache which holds a set of 218 recently used (guest address, host address) pairs. This array is 219 referred to directly from m_dispatch/dispatch-<platform>.S. 220 221 Entries in tt_fast may refer to any valid TC entry, regardless of 222 which sector it's in. Consequently we must be very careful to 223 invalidate this cache when TC entries are changed or disappear. 224 225 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 226 pointed at to cause that cache entry to miss. This relies on the 227 assumption that no guest code actually has that address, hence a 228 value 0x1 seems good. m_translate gives the client a synthetic 229 segfault if it tries to execute at this address. 230 */ 231 /* 232 typedef 233 struct { 234 Addr guest; 235 Addr host; 236 } 237 FastCacheEntry; 238 */ 239 /*global*/ __attribute__((aligned(16))) 240 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 241 /* 242 #define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1) 243 */ 244 245 /* For profiling, we have a parallel array of pointers to .count 246 fields in TT entries. Again, these pointers must be invalidated 247 when translations disappear. A NULL pointer suffices to indicate 248 an unused slot. 249 250 When not profiling (the normal case, VG_(clo_profile_flags) == 0), 251 all tt_fastN entries are set to NULL at startup and never read nor 252 written after that. 253 254 When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN 255 change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR 256 then the corresponding tt_fastN[i] must be null. If 257 tt_fast[i].guest is any other value, then tt_fastN[i] *must* point 258 to the .count field of the corresponding TT entry. 259 260 tt_fast and tt_fastN are referred to from assembly code 261 (dispatch.S). 262 */ 263 /*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE]; 264 265 266 /* Make sure we're not used before initialisation. */ 267 static Bool init_done = False; 268 269 270 /*------------------ STATS DECLS ------------------*/ 271 272 /* Number of fast-cache updates and flushes done. */ 273 ULong n_fast_flushes = 0; 274 ULong n_fast_updates = 0; 275 276 /* Number of full lookups done. */ 277 ULong n_full_lookups = 0; 278 ULong n_lookup_probes = 0; 279 280 /* Number/osize/tsize of translations entered; also the number of 281 those for which self-checking was requested. */ 282 ULong n_in_count = 0; 283 ULong n_in_osize = 0; 284 ULong n_in_tsize = 0; 285 ULong n_in_sc_count = 0; 286 287 /* Number/osize of translations discarded due to lack of space. */ 288 ULong n_dump_count = 0; 289 ULong n_dump_osize = 0; 290 291 /* Number/osize of translations discarded due to requests to do so. */ 292 ULong n_disc_count = 0; 293 ULong n_disc_osize = 0; 294 295 296 /*-------------------------------------------------------------*/ 297 /*--- Address-range equivalence class stuff ---*/ 298 /*-------------------------------------------------------------*/ 299 300 /* Return equivalence class number for a range. */ 301 302 static Int range_to_eclass ( Addr64 start, UInt len ) 303 { 304 UInt mask = (1 << ECLASS_WIDTH) - 1; 305 UInt lo = (UInt)start; 306 UInt hi = lo + len - 1; 307 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 308 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 309 if (loBits == hiBits) { 310 vg_assert(loBits < ECLASS_N-1); 311 return loBits; 312 } else { 313 return ECLASS_MISC; 314 } 315 } 316 317 318 /* Calculates the equivalence class numbers for any VexGuestExtent. 319 These are written in *eclasses, which must be big enough to hold 3 320 Ints. The number written, between 1 and 3, is returned. The 321 eclasses are presented in order, and any duplicates are removed. 322 */ 323 324 static 325 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 326 VexGuestExtents* vge ) 327 { 328 # define SWAP(_lv1,_lv2) \ 329 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 330 331 Int i, j, n_ec, r; 332 333 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 334 335 n_ec = 0; 336 for (i = 0; i < vge->n_used; i++) { 337 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 338 if (r == ECLASS_MISC) 339 goto bad; 340 /* only add if we haven't already seen it */ 341 for (j = 0; j < n_ec; j++) 342 if (eclasses[j] == r) 343 break; 344 if (j == n_ec) 345 eclasses[n_ec++] = r; 346 } 347 348 if (n_ec == 1) 349 return 1; 350 351 if (n_ec == 2) { 352 /* sort */ 353 if (eclasses[0] > eclasses[1]) 354 SWAP(eclasses[0], eclasses[1]); 355 return 2; 356 } 357 358 if (n_ec == 3) { 359 /* sort */ 360 if (eclasses[0] > eclasses[2]) 361 SWAP(eclasses[0], eclasses[2]); 362 if (eclasses[0] > eclasses[1]) 363 SWAP(eclasses[0], eclasses[1]); 364 if (eclasses[1] > eclasses[2]) 365 SWAP(eclasses[1], eclasses[2]); 366 return 3; 367 } 368 369 /* NOTREACHED */ 370 vg_assert(0); 371 372 bad: 373 eclasses[0] = ECLASS_MISC; 374 return 1; 375 376 # undef SWAP 377 } 378 379 380 /* Add tteno to the set of entries listed for equivalence class ec in 381 this sector. Returns used location in eclass array. */ 382 383 static 384 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 385 { 386 Int old_sz, new_sz, i, r; 387 UShort *old_ar, *new_ar; 388 389 vg_assert(ec >= 0 && ec < ECLASS_N); 390 vg_assert(tteno < N_TTES_PER_SECTOR); 391 392 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 393 394 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 395 396 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 397 398 old_sz = sec->ec2tte_size[ec]; 399 old_ar = sec->ec2tte[ec]; 400 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 401 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1", 402 new_sz * sizeof(UShort)); 403 for (i = 0; i < old_sz; i++) 404 new_ar[i] = old_ar[i]; 405 if (old_ar) 406 VG_(arena_free)(VG_AR_TTAUX, old_ar); 407 sec->ec2tte_size[ec] = new_sz; 408 sec->ec2tte[ec] = new_ar; 409 410 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 411 } 412 413 /* Common case */ 414 r = sec->ec2tte_used[ec]++; 415 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 416 sec->ec2tte[ec][r] = tteno; 417 return (UInt)r; 418 } 419 420 421 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 422 eclass entries to 'sec'. */ 423 424 static 425 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 426 { 427 Int i, r, eclasses[3]; 428 TTEntry* tte; 429 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 430 431 tte = &sec->tt[tteno]; 432 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 433 434 vg_assert(r >= 1 && r <= 3); 435 tte->n_tte2ec = r; 436 437 for (i = 0; i < r; i++) { 438 tte->tte2ec_ec[i] = eclasses[i]; 439 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 440 } 441 } 442 443 444 /* Check the eclass info in 'sec' to ensure it is consistent. Returns 445 True if OK, False if something's not right. Expensive. */ 446 447 static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 448 { 449 # define BAD(_str) do { whassup = (_str); goto bad; } while (0) 450 451 HChar* whassup = NULL; 452 Int i, j, k, n, ec_num, ec_idx; 453 TTEntry* tte; 454 UShort tteno; 455 ULong* tce; 456 457 /* Basic checks on this sector */ 458 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 459 BAD("invalid sec->tt_n_inuse"); 460 tce = sec->tc_next; 461 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 462 BAD("sec->tc_next points outside tc"); 463 464 /* For each eclass ... */ 465 for (i = 0; i < ECLASS_N; i++) { 466 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 467 BAD("ec2tte_size/ec2tte mismatch(1)"); 468 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 469 BAD("ec2tte_size/ec2tte mismatch(2)"); 470 if (sec->ec2tte_used[i] < 0 471 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 472 BAD("implausible ec2tte_used"); 473 if (sec->ec2tte_used[i] == 0) 474 continue; 475 476 /* For each tt reference in each eclass .. ensure the reference 477 is to a valid tt entry, and that the entry's address ranges 478 really include this eclass. */ 479 480 for (j = 0; j < sec->ec2tte_used[i]; j++) { 481 tteno = sec->ec2tte[i][j]; 482 if (tteno == EC2TTE_DELETED) 483 continue; 484 if (tteno >= N_TTES_PER_SECTOR) 485 BAD("implausible tteno"); 486 tte = &sec->tt[tteno]; 487 if (tte->status != InUse) 488 BAD("tteno points to non-inuse tte"); 489 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 490 BAD("tte->n_tte2ec out of range"); 491 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 492 must equal i. Inspect tte's eclass info. */ 493 n = 0; 494 for (k = 0; k < tte->n_tte2ec; k++) { 495 if (k < tte->n_tte2ec-1 496 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 497 BAD("tte->tte2ec_ec[..] out of order"); 498 ec_num = tte->tte2ec_ec[k]; 499 if (ec_num < 0 || ec_num >= ECLASS_N) 500 BAD("tte->tte2ec_ec[..] out of range"); 501 if (ec_num != i) 502 continue; 503 ec_idx = tte->tte2ec_ix[k]; 504 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 505 BAD("tte->tte2ec_ix[..] out of range"); 506 if (ec_idx == j) 507 n++; 508 } 509 if (n != 1) 510 BAD("tteno does not point back at eclass"); 511 } 512 } 513 514 /* That establishes that for each forward pointer from TTEntrys 515 there is a corresponding backward pointer from the eclass[] 516 arrays. However, it doesn't rule out the possibility of other, 517 bogus pointers in the eclass[] arrays. So do those similarly: 518 scan through them and check the TTEntryies they point at point 519 back. */ 520 521 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 522 523 tte = &sec->tt[i]; 524 if (tte->status == Empty || tte->status == Deleted) { 525 if (tte->n_tte2ec != 0) 526 BAD("tte->n_eclasses nonzero for unused tte"); 527 continue; 528 } 529 530 vg_assert(tte->status == InUse); 531 532 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 533 BAD("tte->n_eclasses out of range(2)"); 534 535 for (j = 0; j < tte->n_tte2ec; j++) { 536 ec_num = tte->tte2ec_ec[j]; 537 if (ec_num < 0 || ec_num >= ECLASS_N) 538 BAD("tte->eclass[..] out of range"); 539 ec_idx = tte->tte2ec_ix[j]; 540 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 541 BAD("tte->ec_idx[..] out of range(2)"); 542 if (sec->ec2tte[ec_num][ec_idx] != i) 543 BAD("ec2tte does not point back to tte"); 544 } 545 } 546 547 return True; 548 549 bad: 550 if (whassup) 551 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 552 553 # if 0 554 VG_(printf)("eclass = %d\n", i); 555 VG_(printf)("tteno = %d\n", (Int)tteno); 556 switch (tte->status) { 557 case InUse: VG_(printf)("InUse\n"); break; 558 case Deleted: VG_(printf)("Deleted\n"); break; 559 case Empty: VG_(printf)("Empty\n"); break; 560 } 561 if (tte->status != Empty) { 562 for (k = 0; k < tte->vge.n_used; k++) 563 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 564 (Int)tte->vge.len[k]); 565 } 566 # endif 567 568 return False; 569 570 # undef BAD 571 } 572 573 574 /* Sanity check absolutely everything. True == check passed. */ 575 576 /* forwards */ 577 static Bool sanity_check_redir_tt_tc ( void ); 578 static Bool sanity_check_fastcache ( void ); 579 580 static Bool sanity_check_sector_search_order ( void ) 581 { 582 Int i, j, nListed; 583 /* assert the array is the right size */ 584 vg_assert(N_SECTORS == (sizeof(sector_search_order) 585 / sizeof(sector_search_order[0]))); 586 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */ 587 for (i = 0; i < N_SECTORS; i++) { 588 if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS) 589 break; 590 } 591 nListed = i; 592 for (/* */; i < N_SECTORS; i++) { 593 if (sector_search_order[i] != -1) 594 break; 595 } 596 if (i != N_SECTORS) 597 return False; 598 /* Check each sector number only appears once */ 599 for (i = 0; i < N_SECTORS; i++) { 600 if (sector_search_order[i] == -1) 601 continue; 602 for (j = i+1; j < N_SECTORS; j++) { 603 if (sector_search_order[j] == sector_search_order[i]) 604 return False; 605 } 606 } 607 /* Check that the number of listed sectors equals the number 608 in use, by counting nListed back down. */ 609 for (i = 0; i < N_SECTORS; i++) { 610 if (sectors[i].tc != NULL) 611 nListed--; 612 } 613 if (nListed != 0) 614 return False; 615 return True; 616 } 617 618 static Bool sanity_check_all_sectors ( void ) 619 { 620 Int sno; 621 Bool sane; 622 Sector* sec; 623 for (sno = 0; sno < N_SECTORS; sno++) { 624 sec = §ors[sno]; 625 if (sec->tc == NULL) 626 continue; 627 sane = sanity_check_eclasses_in_sector( sec ); 628 if (!sane) 629 return False; 630 } 631 if ( !sanity_check_redir_tt_tc() ) 632 return False; 633 if ( !sanity_check_fastcache() ) 634 return False; 635 if ( !sanity_check_sector_search_order() ) 636 return False; 637 return True; 638 } 639 640 641 642 /*-------------------------------------------------------------*/ 643 /*--- Add/find translations ---*/ 644 /*-------------------------------------------------------------*/ 645 646 static UInt vge_osize ( VexGuestExtents* vge ) 647 { 648 UInt i, n = 0; 649 for (i = 0; i < vge->n_used; i++) 650 n += (UInt)vge->len[i]; 651 return n; 652 } 653 654 static Bool isValidSector ( Int sector ) 655 { 656 if (sector < 0 || sector >= N_SECTORS) 657 return False; 658 return True; 659 } 660 661 static inline UInt HASH_TT ( Addr64 key ) 662 { 663 UInt kHi = (UInt)(key >> 32); 664 UInt kLo = (UInt)key; 665 UInt k32 = kHi ^ kLo; 666 UInt ror = 7; 667 if (ror > 0) 668 k32 = (k32 >> ror) | (k32 << (32-ror)); 669 return k32 % N_TTES_PER_SECTOR; 670 } 671 672 static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count ) 673 { 674 UInt cno = (UInt)VG_TT_FAST_HASH(key); 675 VG_(tt_fast)[cno].guest = (Addr)key; 676 VG_(tt_fast)[cno].host = (Addr)tcptr; 677 if (VG_(clo_profile_flags) > 0) 678 VG_(tt_fastN)[cno] = count; 679 n_fast_updates++; 680 /* This shouldn't fail. It should be assured by m_translate 681 which should reject any attempt to make translation of code 682 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 683 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 684 } 685 686 /* Invalidate the fast cache's counter array, VG_(tt_fastN). */ 687 static void invalidateFastNCache ( void ) 688 { 689 UInt j; 690 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 691 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 692 VG_(tt_fastN)[j+0] = NULL; 693 VG_(tt_fastN)[j+1] = NULL; 694 VG_(tt_fastN)[j+2] = NULL; 695 VG_(tt_fastN)[j+3] = NULL; 696 } 697 vg_assert(j == VG_TT_FAST_SIZE); 698 } 699 700 /* Invalidate the fast cache VG_(tt_fast). If profiling, also 701 invalidate the fast cache's counter array VG_(tt_fastN), otherwise 702 don't touch it. */ 703 static void invalidateFastCache ( void ) 704 { 705 UInt j; 706 /* This loop is popular enough to make it worth unrolling a 707 bit, at least on ppc32. */ 708 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 709 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 710 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 711 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 712 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 713 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 714 } 715 716 if (VG_(clo_profile_flags) > 0) 717 invalidateFastNCache(); 718 719 vg_assert(j == VG_TT_FAST_SIZE); 720 n_fast_flushes++; 721 } 722 723 static Bool sanity_check_fastcache ( void ) 724 { 725 UInt j; 726 if (0) VG_(printf)("sanity check fastcache\n"); 727 if (VG_(clo_profile_flags) > 0) { 728 /* profiling */ 729 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 730 if (VG_(tt_fastN)[j] == NULL 731 && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR) 732 return False; 733 if (VG_(tt_fastN)[j] != NULL 734 && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR) 735 return False; 736 } 737 } else { 738 /* not profiling */ 739 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 740 if (VG_(tt_fastN)[j] != NULL) 741 return False; 742 } 743 } 744 return True; 745 } 746 747 static void initialiseSector ( Int sno ) 748 { 749 Int i; 750 SysRes sres; 751 Sector* sec; 752 vg_assert(isValidSector(sno)); 753 754 { Bool sane = sanity_check_sector_search_order(); 755 vg_assert(sane); 756 } 757 sec = §ors[sno]; 758 759 if (sec->tc == NULL) { 760 761 /* Sector has never been used before. Need to allocate tt and 762 tc. */ 763 vg_assert(sec->tt == NULL); 764 vg_assert(sec->tc_next == NULL); 765 vg_assert(sec->tt_n_inuse == 0); 766 for (i = 0; i < ECLASS_N; i++) { 767 vg_assert(sec->ec2tte_size[i] == 0); 768 vg_assert(sec->ec2tte_used[i] == 0); 769 vg_assert(sec->ec2tte[i] == NULL); 770 } 771 772 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 773 774 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 775 if (sr_isError(sres)) { 776 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 777 8 * tc_sector_szQ ); 778 /*NOTREACHED*/ 779 } 780 sec->tc = (ULong*)(AddrH)sr_Res(sres); 781 782 sres = VG_(am_mmap_anon_float_valgrind) 783 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 784 if (sr_isError(sres)) { 785 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 786 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 787 /*NOTREACHED*/ 788 } 789 sec->tt = (TTEntry*)(AddrH)sr_Res(sres); 790 791 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 792 sec->tt[i].status = Empty; 793 sec->tt[i].n_tte2ec = 0; 794 } 795 796 /* Add an entry in the sector_search_order */ 797 for (i = 0; i < N_SECTORS; i++) { 798 if (sector_search_order[i] == -1) 799 break; 800 } 801 vg_assert(i >= 0 && i < N_SECTORS); 802 sector_search_order[i] = sno; 803 804 if (VG_(clo_verbosity) > 2) 805 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 806 807 } else { 808 809 /* Sector has been used before. Dump the old contents. */ 810 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 811 vg_assert(sec->tt != NULL); 812 vg_assert(sec->tc_next != NULL); 813 n_dump_count += sec->tt_n_inuse; 814 815 /* Visit each just-about-to-be-abandoned translation. */ 816 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 817 if (sec->tt[i].status == InUse) { 818 vg_assert(sec->tt[i].n_tte2ec >= 1); 819 vg_assert(sec->tt[i].n_tte2ec <= 3); 820 n_dump_osize += vge_osize(&sec->tt[i].vge); 821 /* Tell the tool too. */ 822 if (VG_(needs).superblock_discards) { 823 VG_TDICT_CALL( tool_discard_superblock_info, 824 sec->tt[i].entry, 825 sec->tt[i].vge ); 826 } 827 } else { 828 vg_assert(sec->tt[i].n_tte2ec == 0); 829 } 830 sec->tt[i].status = Empty; 831 sec->tt[i].n_tte2ec = 0; 832 } 833 834 /* Free up the eclass structures. */ 835 for (i = 0; i < ECLASS_N; i++) { 836 if (sec->ec2tte_size[i] == 0) { 837 vg_assert(sec->ec2tte_used[i] == 0); 838 vg_assert(sec->ec2tte[i] == NULL); 839 } else { 840 vg_assert(sec->ec2tte[i] != NULL); 841 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]); 842 sec->ec2tte[i] = NULL; 843 sec->ec2tte_size[i] = 0; 844 sec->ec2tte_used[i] = 0; 845 } 846 } 847 848 /* Sanity check: ensure it is already in 849 sector_search_order[]. */ 850 for (i = 0; i < N_SECTORS; i++) { 851 if (sector_search_order[i] == sno) 852 break; 853 } 854 vg_assert(i >= 0 && i < N_SECTORS); 855 856 if (VG_(clo_verbosity) > 2) 857 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 858 } 859 860 sec->tc_next = sec->tc; 861 sec->tt_n_inuse = 0; 862 863 invalidateFastCache(); 864 865 { Bool sane = sanity_check_sector_search_order(); 866 vg_assert(sane); 867 } 868 } 869 870 static void invalidate_icache ( void *ptr, Int nbytes ) 871 { 872 # if defined(VGA_ppc32) || defined(VGA_ppc64) 873 Addr startaddr = (Addr) ptr; 874 Addr endaddr = startaddr + nbytes; 875 Addr cls; 876 Addr addr; 877 VexArchInfo vai; 878 879 if (nbytes == 0) return; 880 vg_assert(nbytes > 0); 881 882 VG_(machine_get_VexArchInfo)( NULL, &vai ); 883 cls = vai.ppc_cache_line_szB; 884 885 /* Stay sane .. */ 886 vg_assert(cls == 32 || cls == 64 || cls == 128); 887 888 startaddr &= ~(cls - 1); 889 for (addr = startaddr; addr < endaddr; addr += cls) { 890 __asm__ __volatile__("dcbst 0,%0" : : "r" (addr)); 891 } 892 __asm__ __volatile__("sync"); 893 for (addr = startaddr; addr < endaddr; addr += cls) { 894 __asm__ __volatile__("icbi 0,%0" : : "r" (addr)); 895 } 896 __asm__ __volatile__("sync; isync"); 897 898 # elif defined(VGA_x86) 899 /* no need to do anything, hardware provides coherence */ 900 901 # elif defined(VGA_amd64) 902 /* no need to do anything, hardware provides coherence */ 903 904 # elif defined(VGA_s390x) 905 /* no need to do anything, hardware provides coherence */ 906 907 # elif defined(VGP_arm_linux) 908 /* ARM cache flushes are privileged, so we must defer to the kernel. */ 909 Addr startaddr = (Addr) ptr; 910 Addr endaddr = startaddr + nbytes; 911 VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr); 912 913 # else 914 # error "Unknown ARCH" 915 # endif 916 } 917 918 919 /* Add a translation of vge to TT/TC. The translation is temporarily 920 in code[0 .. code_len-1]. 921 922 pre: youngest_sector points to a valid (although possibly full) 923 sector. 924 */ 925 void VG_(add_to_transtab)( VexGuestExtents* vge, 926 Addr64 entry, 927 AddrH code, 928 UInt code_len, 929 Bool is_self_checking ) 930 { 931 Int tcAvailQ, reqdQ, y, i; 932 ULong *tcptr, *tcptr2; 933 UChar* srcP; 934 UChar* dstP; 935 936 vg_assert(init_done); 937 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 938 939 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 940 vg_assert(code_len > 0 && code_len < 60000); 941 942 if (0) 943 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 944 entry, code_len); 945 946 n_in_count++; 947 n_in_tsize += code_len; 948 n_in_osize += vge_osize(vge); 949 if (is_self_checking) 950 n_in_sc_count++; 951 952 y = youngest_sector; 953 vg_assert(isValidSector(y)); 954 955 if (sectors[y].tc == NULL) 956 initialiseSector(y); 957 958 /* Try putting the translation in this sector. */ 959 reqdQ = (code_len + 7) >> 3; 960 961 /* Will it fit in tc? */ 962 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 963 - ((ULong*)(sectors[y].tc_next)); 964 vg_assert(tcAvailQ >= 0); 965 vg_assert(tcAvailQ <= tc_sector_szQ); 966 967 if (tcAvailQ < reqdQ 968 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 969 /* No. So move on to the next sector. Either it's never been 970 used before, in which case it will get its tt/tc allocated 971 now, or it has been used before, in which case it is set to be 972 empty, hence throwing out the oldest sector. */ 973 vg_assert(tc_sector_szQ > 0); 974 VG_(debugLog)(1,"transtab", 975 "declare sector %d full " 976 "(TT loading %2d%%, TC loading %2d%%)\n", 977 y, 978 (100 * sectors[y].tt_n_inuse) 979 / N_TTES_PER_SECTOR, 980 (100 * (tc_sector_szQ - tcAvailQ)) 981 / tc_sector_szQ); 982 youngest_sector++; 983 if (youngest_sector >= N_SECTORS) 984 youngest_sector = 0; 985 y = youngest_sector; 986 initialiseSector(y); 987 } 988 989 /* Be sure ... */ 990 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 991 - ((ULong*)(sectors[y].tc_next)); 992 vg_assert(tcAvailQ >= 0); 993 vg_assert(tcAvailQ <= tc_sector_szQ); 994 vg_assert(tcAvailQ >= reqdQ); 995 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 996 vg_assert(sectors[y].tt_n_inuse >= 0); 997 998 /* Copy into tc. */ 999 tcptr = sectors[y].tc_next; 1000 vg_assert(tcptr >= §ors[y].tc[0]); 1001 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 1002 1003 dstP = (UChar*)tcptr; 1004 srcP = (UChar*)code; 1005 for (i = 0; i < code_len; i++) 1006 dstP[i] = srcP[i]; 1007 sectors[y].tc_next += reqdQ; 1008 sectors[y].tt_n_inuse++; 1009 1010 invalidate_icache( dstP, code_len ); 1011 1012 /* more paranoia */ 1013 tcptr2 = sectors[y].tc_next; 1014 vg_assert(tcptr2 >= §ors[y].tc[0]); 1015 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1016 1017 /* Find an empty tt slot, and use it. There must be such a slot 1018 since tt is never allowed to get completely full. */ 1019 i = HASH_TT(entry); 1020 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 1021 while (True) { 1022 if (sectors[y].tt[i].status == Empty 1023 || sectors[y].tt[i].status == Deleted) 1024 break; 1025 i++; 1026 if (i >= N_TTES_PER_SECTOR) 1027 i = 0; 1028 } 1029 1030 sectors[y].tt[i].status = InUse; 1031 sectors[y].tt[i].tcptr = tcptr; 1032 sectors[y].tt[i].count = 0; 1033 sectors[y].tt[i].weight = 1; 1034 sectors[y].tt[i].vge = *vge; 1035 sectors[y].tt[i].entry = entry; 1036 1037 /* Update the fast-cache. */ 1038 setFastCacheEntry( entry, tcptr, §ors[y].tt[i].count ); 1039 1040 /* Note the eclass numbers for this translation. */ 1041 upd_eclasses_after_add( §ors[y], i ); 1042 } 1043 1044 1045 /* Search for the translation of the given guest address. If 1046 requested, a successful search can also cause the fast-caches to be 1047 updated. 1048 */ 1049 Bool VG_(search_transtab) ( /*OUT*/AddrH* result, 1050 Addr64 guest_addr, 1051 Bool upd_cache ) 1052 { 1053 Int i, j, k, kstart, sno; 1054 1055 vg_assert(init_done); 1056 /* Find the initial probe point just once. It will be the same in 1057 all sectors and avoids multiple expensive % operations. */ 1058 n_full_lookups++; 1059 k = -1; 1060 kstart = HASH_TT(guest_addr); 1061 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 1062 1063 /* Search in all the sectors,using sector_search_order[] as a 1064 heuristic guide as to what order to visit the sectors. */ 1065 for (i = 0; i < N_SECTORS; i++) { 1066 1067 sno = sector_search_order[i]; 1068 if (UNLIKELY(sno == -1)) 1069 return False; /* run out of sectors to search */ 1070 1071 k = kstart; 1072 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 1073 n_lookup_probes++; 1074 if (sectors[sno].tt[k].status == InUse 1075 && sectors[sno].tt[k].entry == guest_addr) { 1076 /* found it */ 1077 if (upd_cache) 1078 setFastCacheEntry( 1079 guest_addr, sectors[sno].tt[k].tcptr, 1080 §ors[sno].tt[k].count ); 1081 if (result) 1082 *result = (AddrH)sectors[sno].tt[k].tcptr; 1083 /* pull this one one step closer to the front. For large 1084 apps this more or less halves the number of required 1085 probes. */ 1086 if (i > 0) { 1087 Int tmp = sector_search_order[i-1]; 1088 sector_search_order[i-1] = sector_search_order[i]; 1089 sector_search_order[i] = tmp; 1090 } 1091 return True; 1092 } 1093 if (sectors[sno].tt[k].status == Empty) 1094 break; /* not found in this sector */ 1095 k++; 1096 if (k == N_TTES_PER_SECTOR) 1097 k = 0; 1098 } 1099 1100 /* If we fall off the end, all entries are InUse and not 1101 matching, or Deleted. In any case we did not find it in this 1102 sector. */ 1103 } 1104 1105 /* Not found in any sector. */ 1106 return False; 1107 } 1108 1109 1110 /*-------------------------------------------------------------*/ 1111 /*--- Delete translations. ---*/ 1112 /*-------------------------------------------------------------*/ 1113 1114 /* forward */ 1115 static void unredir_discard_translations( Addr64, ULong ); 1116 1117 /* Stuff for deleting translations which intersect with a given 1118 address range. Unfortunately, to make this run at a reasonable 1119 speed, it is complex. */ 1120 1121 static inline 1122 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 1123 { 1124 Addr64 e1 = s1 + r1 - 1ULL; 1125 Addr64 e2 = s2 + r2 - 1ULL; 1126 if (e1 < s2 || e2 < s1) 1127 return False; 1128 return True; 1129 } 1130 1131 static inline 1132 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 1133 { 1134 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 1135 return True; 1136 if (vge->n_used < 2) 1137 return False; 1138 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 1139 return True; 1140 if (vge->n_used < 3) 1141 return False; 1142 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 1143 return True; 1144 return False; 1145 } 1146 1147 1148 /* Delete a tt entry, and update all the eclass data accordingly. */ 1149 1150 static void delete_tte ( /*MOD*/Sector* sec, Int tteno ) 1151 { 1152 Int i, ec_num, ec_idx; 1153 TTEntry* tte; 1154 1155 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1156 tte = &sec->tt[tteno]; 1157 vg_assert(tte->status == InUse); 1158 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1159 1160 /* Deal with the ec-to-tte links first. */ 1161 for (i = 0; i < tte->n_tte2ec; i++) { 1162 ec_num = (Int)tte->tte2ec_ec[i]; 1163 ec_idx = tte->tte2ec_ix[i]; 1164 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1165 vg_assert(ec_idx >= 0); 1166 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1167 /* Assert that the two links point at each other. */ 1168 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1169 /* "delete" the pointer back to here. */ 1170 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1171 } 1172 1173 /* Now fix up this TTEntry. */ 1174 tte->status = Deleted; 1175 tte->n_tte2ec = 0; 1176 1177 /* Stats .. */ 1178 sec->tt_n_inuse--; 1179 n_disc_count++; 1180 n_disc_osize += vge_osize(&tte->vge); 1181 1182 /* Tell the tool too. */ 1183 if (VG_(needs).superblock_discards) { 1184 VG_TDICT_CALL( tool_discard_superblock_info, 1185 tte->entry, 1186 tte->vge ); 1187 } 1188 } 1189 1190 1191 /* Delete translations from sec which intersect specified range, but 1192 only consider translations in the specified eclass. */ 1193 1194 static 1195 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, 1196 Addr64 guest_start, ULong range, 1197 Int ec ) 1198 { 1199 Int i; 1200 UShort tteno; 1201 Bool anyDeld = False; 1202 TTEntry* tte; 1203 1204 vg_assert(ec >= 0 && ec < ECLASS_N); 1205 1206 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1207 1208 tteno = sec->ec2tte[ec][i]; 1209 if (tteno == EC2TTE_DELETED) { 1210 /* already deleted */ 1211 continue; 1212 } 1213 1214 vg_assert(tteno < N_TTES_PER_SECTOR); 1215 1216 tte = &sec->tt[tteno]; 1217 vg_assert(tte->status == InUse); 1218 1219 if (overlaps( guest_start, range, &tte->vge )) { 1220 anyDeld = True; 1221 delete_tte( sec, (Int)tteno ); 1222 } 1223 1224 } 1225 1226 return anyDeld; 1227 } 1228 1229 1230 /* Delete translations from sec which intersect specified range, the 1231 slow way, by inspecting all translations in sec. */ 1232 1233 static 1234 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, 1235 Addr64 guest_start, ULong range ) 1236 { 1237 Int i; 1238 Bool anyDeld = False; 1239 1240 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1241 if (sec->tt[i].status == InUse 1242 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1243 anyDeld = True; 1244 delete_tte( sec, i ); 1245 } 1246 } 1247 1248 return anyDeld; 1249 } 1250 1251 1252 void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1253 HChar* who ) 1254 { 1255 Sector* sec; 1256 Int sno, ec; 1257 Bool anyDeleted = False; 1258 1259 vg_assert(init_done); 1260 1261 VG_(debugLog)(2, "transtab", 1262 "discard_translations(0x%llx, %lld) req by %s\n", 1263 guest_start, range, who ); 1264 1265 /* Pre-deletion sanity check */ 1266 if (VG_(clo_sanity_level >= 4)) { 1267 Bool sane = sanity_check_all_sectors(); 1268 vg_assert(sane); 1269 } 1270 1271 if (range == 0) 1272 return; 1273 1274 /* There are two different ways to do this. 1275 1276 If the range fits within a single address-range equivalence 1277 class, as will be the case for a cache line sized invalidation, 1278 then we only have to inspect the set of translations listed in 1279 that equivalence class, and also in the "sin-bin" equivalence 1280 class ECLASS_MISC. 1281 1282 Otherwise, the invalidation is of a larger range and probably 1283 results from munmap. In this case it's (probably!) faster just 1284 to inspect all translations, dump those we don't want, and 1285 regenerate the equivalence class information (since modifying it 1286 in-situ is even more expensive). 1287 */ 1288 1289 /* First off, figure out if the range falls within a single class, 1290 and if so which one. */ 1291 1292 ec = ECLASS_MISC; 1293 if (range < (1ULL << ECLASS_SHIFT)) 1294 ec = range_to_eclass( guest_start, (UInt)range ); 1295 1296 /* if ec is ECLASS_MISC then we aren't looking at just a single 1297 class, so use the slow scheme. Else use the fast scheme, 1298 examining 'ec' and ECLASS_MISC. */ 1299 1300 if (ec != ECLASS_MISC) { 1301 1302 VG_(debugLog)(2, "transtab", 1303 " FAST, ec = %d\n", ec); 1304 1305 /* Fast scheme */ 1306 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1307 1308 for (sno = 0; sno < N_SECTORS; sno++) { 1309 sec = §ors[sno]; 1310 if (sec->tc == NULL) 1311 continue; 1312 anyDeleted |= delete_translations_in_sector_eclass( 1313 sec, guest_start, range, ec ); 1314 anyDeleted |= delete_translations_in_sector_eclass( 1315 sec, guest_start, range, ECLASS_MISC ); 1316 } 1317 1318 } else { 1319 1320 /* slow scheme */ 1321 1322 VG_(debugLog)(2, "transtab", 1323 " SLOW, ec = %d\n", ec); 1324 1325 for (sno = 0; sno < N_SECTORS; sno++) { 1326 sec = §ors[sno]; 1327 if (sec->tc == NULL) 1328 continue; 1329 anyDeleted |= delete_translations_in_sector( 1330 sec, guest_start, range ); 1331 } 1332 1333 } 1334 1335 if (anyDeleted) 1336 invalidateFastCache(); 1337 1338 /* don't forget the no-redir cache */ 1339 unredir_discard_translations( guest_start, range ); 1340 1341 /* Post-deletion sanity check */ 1342 if (VG_(clo_sanity_level >= 4)) { 1343 Int i; 1344 TTEntry* tte; 1345 Bool sane = sanity_check_all_sectors(); 1346 vg_assert(sane); 1347 /* But now, also check the requested address range isn't 1348 present anywhere. */ 1349 for (sno = 0; sno < N_SECTORS; sno++) { 1350 sec = §ors[sno]; 1351 if (sec->tc == NULL) 1352 continue; 1353 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1354 tte = &sec->tt[i]; 1355 if (tte->status != InUse) 1356 continue; 1357 vg_assert(!overlaps( guest_start, range, &tte->vge )); 1358 } 1359 } 1360 } 1361 } 1362 1363 1364 /*------------------------------------------------------------*/ 1365 /*--- AUXILIARY: the unredirected TT/TC ---*/ 1366 /*------------------------------------------------------------*/ 1367 1368 /* A very simple translation cache which holds a small number of 1369 unredirected translations. This is completely independent of the 1370 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 1371 both structures are simply dumped and we start over. 1372 1373 Since these translations are unredirected, the search key is (by 1374 definition) the first address entry in the .vge field. */ 1375 1376 /* Sized to hold 500 translations of average size 1000 bytes. */ 1377 1378 #define UNREDIR_SZB 1000 1379 1380 #define N_UNREDIR_TT 500 1381 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 1382 1383 typedef 1384 struct { 1385 VexGuestExtents vge; 1386 Addr hcode; 1387 Bool inUse; 1388 } 1389 UTCEntry; 1390 1391 /* We just allocate forwards in _tc, never deleting. */ 1392 static ULong *unredir_tc; 1393 static Int unredir_tc_used = N_UNREDIR_TCQ; 1394 1395 /* Slots in _tt can come into use and out again (.inUse). 1396 Nevertheless _tt_highwater is maintained so that invalidations 1397 don't have to scan all the slots when only a few are in use. 1398 _tt_highwater holds the index of the highest ever allocated 1399 slot. */ 1400 static UTCEntry unredir_tt[N_UNREDIR_TT]; 1401 static Int unredir_tt_highwater; 1402 1403 1404 static void init_unredir_tt_tc ( void ) 1405 { 1406 Int i; 1407 if (unredir_tc == NULL) { 1408 SysRes sres = VG_(am_mmap_anon_float_valgrind) 1409 ( N_UNREDIR_TT * UNREDIR_SZB ); 1410 if (sr_isError(sres)) { 1411 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 1412 N_UNREDIR_TT * UNREDIR_SZB); 1413 /*NOTREACHED*/ 1414 } 1415 unredir_tc = (ULong *)(AddrH)sr_Res(sres); 1416 } 1417 unredir_tc_used = 0; 1418 for (i = 0; i < N_UNREDIR_TT; i++) 1419 unredir_tt[i].inUse = False; 1420 unredir_tt_highwater = -1; 1421 } 1422 1423 /* Do a sanity check; return False on failure. */ 1424 static Bool sanity_check_redir_tt_tc ( void ) 1425 { 1426 Int i; 1427 if (unredir_tt_highwater < -1) return False; 1428 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 1429 1430 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 1431 if (unredir_tt[i].inUse) 1432 return False; 1433 1434 if (unredir_tc_used < 0) return False; 1435 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 1436 1437 return True; 1438 } 1439 1440 1441 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation 1442 is temporarily in code[0 .. code_len-1]. 1443 */ 1444 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 1445 Addr64 entry, 1446 AddrH code, 1447 UInt code_len ) 1448 { 1449 Int i, j, code_szQ; 1450 HChar *srcP, *dstP; 1451 1452 vg_assert(sanity_check_redir_tt_tc()); 1453 1454 /* This is the whole point: it's not redirected! */ 1455 vg_assert(entry == vge->base[0]); 1456 1457 /* How many unredir_tt slots are needed */ 1458 code_szQ = (code_len + 7) / 8; 1459 1460 /* Look for an empty unredir_tc slot */ 1461 for (i = 0; i < N_UNREDIR_TT; i++) 1462 if (!unredir_tt[i].inUse) 1463 break; 1464 1465 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 1466 /* It's full; dump everything we currently have */ 1467 init_unredir_tt_tc(); 1468 i = 0; 1469 } 1470 1471 vg_assert(unredir_tc_used >= 0); 1472 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1473 vg_assert(code_szQ > 0); 1474 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 1475 vg_assert(i >= 0 && i < N_UNREDIR_TT); 1476 vg_assert(unredir_tt[i].inUse == False); 1477 1478 if (i > unredir_tt_highwater) 1479 unredir_tt_highwater = i; 1480 1481 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 1482 srcP = (HChar*)code; 1483 for (j = 0; j < code_len; j++) 1484 dstP[j] = srcP[j]; 1485 1486 invalidate_icache( dstP, code_len ); 1487 1488 unredir_tt[i].inUse = True; 1489 unredir_tt[i].vge = *vge; 1490 unredir_tt[i].hcode = (Addr)dstP; 1491 1492 unredir_tc_used += code_szQ; 1493 vg_assert(unredir_tc_used >= 0); 1494 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1495 1496 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 1497 } 1498 1499 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 1500 Addr64 guest_addr ) 1501 { 1502 Int i; 1503 for (i = 0; i < N_UNREDIR_TT; i++) { 1504 if (!unredir_tt[i].inUse) 1505 continue; 1506 if (unredir_tt[i].vge.base[0] == guest_addr) { 1507 *result = (AddrH)unredir_tt[i].hcode; 1508 return True; 1509 } 1510 } 1511 return False; 1512 } 1513 1514 static void unredir_discard_translations( Addr64 guest_start, ULong range ) 1515 { 1516 Int i; 1517 1518 vg_assert(sanity_check_redir_tt_tc()); 1519 1520 for (i = 0; i <= unredir_tt_highwater; i++) { 1521 if (unredir_tt[i].inUse 1522 && overlaps( guest_start, range, &unredir_tt[i].vge)) 1523 unredir_tt[i].inUse = False; 1524 } 1525 } 1526 1527 1528 /*------------------------------------------------------------*/ 1529 /*--- Initialisation. ---*/ 1530 /*------------------------------------------------------------*/ 1531 1532 void VG_(init_tt_tc) ( void ) 1533 { 1534 Int i, j, avg_codeszQ; 1535 1536 vg_assert(!init_done); 1537 init_done = True; 1538 1539 /* Otherwise lots of things go wrong... */ 1540 vg_assert(sizeof(ULong) == 8); 1541 vg_assert(sizeof(Addr64) == 8); 1542 /* check fast cache entries really are 2 words long */ 1543 vg_assert(sizeof(Addr) == sizeof(void*)); 1544 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 1545 /* check fast cache entries are packed back-to-back with no spaces */ 1546 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 1547 /* check fast cache is aligned as we requested. Not fatal if it 1548 isn't, but we might as well make sure. */ 1549 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 1550 1551 if (VG_(clo_verbosity) > 2) 1552 VG_(message)(Vg_DebugMsg, 1553 "TT/TC: VG_(init_tt_tc) " 1554 "(startup of code management)\n"); 1555 1556 /* Figure out how big each tc area should be. */ 1557 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 1558 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 1559 1560 /* Ensure the calculated value is not way crazy. */ 1561 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 1562 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE); 1563 1564 /* Initialise the sectors */ 1565 youngest_sector = 0; 1566 for (i = 0; i < N_SECTORS; i++) { 1567 sectors[i].tc = NULL; 1568 sectors[i].tt = NULL; 1569 sectors[i].tc_next = NULL; 1570 sectors[i].tt_n_inuse = 0; 1571 for (j = 0; j < ECLASS_N; j++) { 1572 sectors[i].ec2tte_size[j] = 0; 1573 sectors[i].ec2tte_used[j] = 0; 1574 sectors[i].ec2tte[j] = NULL; 1575 } 1576 } 1577 1578 /* Initialise the sector_search_order hint table. */ 1579 for (i = 0; i < N_SECTORS; i++) 1580 sector_search_order[i] = -1; 1581 1582 /* Initialise the fast caches. If not profiling (the usual case), 1583 we have to explicitly invalidate the fastN cache as 1584 invalidateFastCache() won't do that for us. */ 1585 invalidateFastCache(); 1586 if (VG_(clo_profile_flags) == 0) 1587 invalidateFastNCache(); 1588 1589 /* and the unredir tt/tc */ 1590 init_unredir_tt_tc(); 1591 1592 if (VG_(clo_verbosity) > 2) { 1593 VG_(message)(Vg_DebugMsg, 1594 "TT/TC: cache: %d sectors of %d bytes each = %d total\n", 1595 N_SECTORS, 8 * tc_sector_szQ, 1596 N_SECTORS * 8 * tc_sector_szQ ); 1597 VG_(message)(Vg_DebugMsg, 1598 "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n", 1599 N_SECTORS * N_TTES_PER_SECTOR, 1600 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1601 SECTOR_TT_LIMIT_PERCENT ); 1602 } 1603 1604 VG_(debugLog)(2, "transtab", 1605 "cache: %d sectors of %d bytes each = %d total\n", 1606 N_SECTORS, 8 * tc_sector_szQ, 1607 N_SECTORS * 8 * tc_sector_szQ ); 1608 VG_(debugLog)(2, "transtab", 1609 "table: %d total entries, max occupancy %d (%d%%)\n", 1610 N_SECTORS * N_TTES_PER_SECTOR, 1611 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1612 SECTOR_TT_LIMIT_PERCENT ); 1613 } 1614 1615 1616 /*------------------------------------------------------------*/ 1617 /*--- Printing out statistics. ---*/ 1618 /*------------------------------------------------------------*/ 1619 1620 static ULong safe_idiv( ULong a, ULong b ) 1621 { 1622 return (b == 0 ? 0 : a / b); 1623 } 1624 1625 UInt VG_(get_bbs_translated) ( void ) 1626 { 1627 return n_in_count; 1628 } 1629 1630 void VG_(print_tt_tc_stats) ( void ) 1631 { 1632 VG_(message)(Vg_DebugMsg, 1633 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 1634 n_full_lookups, n_lookup_probes ); 1635 VG_(message)(Vg_DebugMsg, 1636 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 1637 n_fast_updates, n_fast_flushes ); 1638 1639 VG_(message)(Vg_DebugMsg, 1640 " transtab: new %'lld " 1641 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n", 1642 n_in_count, n_in_osize, n_in_tsize, 1643 safe_idiv(10*n_in_tsize, n_in_osize), 1644 n_in_sc_count); 1645 VG_(message)(Vg_DebugMsg, 1646 " transtab: dumped %'llu (%'llu -> ?" "?)\n", 1647 n_dump_count, n_dump_osize ); 1648 VG_(message)(Vg_DebugMsg, 1649 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 1650 n_disc_count, n_disc_osize ); 1651 1652 if (0) { 1653 Int i; 1654 VG_(printf)("\n"); 1655 for (i = 0; i < ECLASS_N; i++) { 1656 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 1657 if (i % 16 == 15) 1658 VG_(printf)("\n"); 1659 } 1660 VG_(printf)("\n\n"); 1661 } 1662 } 1663 1664 /*------------------------------------------------------------*/ 1665 /*--- Printing out of profiling results. ---*/ 1666 /*------------------------------------------------------------*/ 1667 1668 static ULong score ( TTEntry* tte ) 1669 { 1670 return ((ULong)tte->weight) * ((ULong)tte->count); 1671 } 1672 1673 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 1674 { 1675 Int sno, i, r, s; 1676 ULong score_total; 1677 1678 /* First, compute the total weighted count, and find the top N 1679 ttes. tops contains pointers to the most-used n_tops blocks, in 1680 descending order (viz, tops[0] is the highest scorer). */ 1681 for (i = 0; i < n_tops; i++) { 1682 tops[i].addr = 0; 1683 tops[i].score = 0; 1684 } 1685 1686 score_total = 0; 1687 1688 for (sno = 0; sno < N_SECTORS; sno++) { 1689 if (sectors[sno].tc == NULL) 1690 continue; 1691 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1692 if (sectors[sno].tt[i].status != InUse) 1693 continue; 1694 score_total += score(§ors[sno].tt[i]); 1695 /* Find the rank for sectors[sno].tt[i]. */ 1696 r = n_tops-1; 1697 while (True) { 1698 if (r == -1) 1699 break; 1700 if (tops[r].addr == 0) { 1701 r--; 1702 continue; 1703 } 1704 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 1705 r--; 1706 continue; 1707 } 1708 break; 1709 } 1710 r++; 1711 vg_assert(r >= 0 && r <= n_tops); 1712 /* This bb should be placed at r, and bbs above it shifted 1713 upwards one slot. */ 1714 if (r < n_tops) { 1715 for (s = n_tops-1; s > r; s--) 1716 tops[s] = tops[s-1]; 1717 tops[r].addr = sectors[sno].tt[i].entry; 1718 tops[r].score = score( §ors[sno].tt[i] ); 1719 } 1720 } 1721 } 1722 1723 return score_total; 1724 } 1725 1726 /*--------------------------------------------------------------------*/ 1727 /*--- end ---*/ 1728 /*--------------------------------------------------------------------*/ 1729