1 2 /*--------------------------------------------------------------------*/ 3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/ 4 /*--- accessibility (A) and validity (V) status of each byte. ---*/ 5 /*--- mc_main.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of MemCheck, a heavyweight Valgrind tool for 10 detecting memory errors. 11 12 Copyright (C) 2000-2010 Julian Seward 13 jseward (at) acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 #include "pub_tool_basics.h" 34 #include "pub_tool_aspacemgr.h" 35 #include "pub_tool_hashtable.h" // For mc_include.h 36 #include "pub_tool_libcbase.h" 37 #include "pub_tool_libcassert.h" 38 #include "pub_tool_libcprint.h" 39 #include "pub_tool_machine.h" 40 #include "pub_tool_mallocfree.h" 41 #include "pub_tool_options.h" 42 #include "pub_tool_oset.h" 43 #include "pub_tool_replacemalloc.h" 44 #include "pub_tool_tooliface.h" 45 #include "pub_tool_threadstate.h" 46 47 #include "mc_include.h" 48 #include "memcheck.h" /* for client requests */ 49 50 51 /* Set to 1 to do a little more sanity checking */ 52 #define VG_DEBUG_MEMORY 0 53 54 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args) 55 56 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */ 57 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */ 58 59 60 /*------------------------------------------------------------*/ 61 /*--- Fast-case knobs ---*/ 62 /*------------------------------------------------------------*/ 63 64 // Comment these out to disable the fast cases (don't just set them to zero). 65 66 #define PERF_FAST_LOADV 1 67 #define PERF_FAST_STOREV 1 68 69 #define PERF_FAST_SARP 1 70 71 #define PERF_FAST_STACK 1 72 #define PERF_FAST_STACK2 1 73 74 /* Change this to 1 to enable assertions on origin tracking cache fast 75 paths */ 76 #define OC_ENABLE_ASSERTIONS 0 77 78 79 /*------------------------------------------------------------*/ 80 /*--- Comments on the origin tracking implementation ---*/ 81 /*------------------------------------------------------------*/ 82 83 /* See detailed comment entitled 84 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 85 which is contained further on in this file. */ 86 87 88 /*------------------------------------------------------------*/ 89 /*--- V bits and A bits ---*/ 90 /*------------------------------------------------------------*/ 91 92 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck 93 thinks the corresponding value bit is defined. And every memory byte 94 has an A bit, which tracks whether Memcheck thinks the program can access 95 it safely (ie. it's mapped, and has at least one of the RWX permission bits 96 set). So every N-bit register is shadowed with N V bits, and every memory 97 byte is shadowed with 8 V bits and one A bit. 98 99 In the implementation, we use two forms of compression (compressed V bits 100 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead 101 for memory. 102 103 Memcheck also tracks extra information about each heap block that is 104 allocated, for detecting memory leaks and other purposes. 105 */ 106 107 /*------------------------------------------------------------*/ 108 /*--- Basic A/V bitmap representation. ---*/ 109 /*------------------------------------------------------------*/ 110 111 /* All reads and writes are checked against a memory map (a.k.a. shadow 112 memory), which records the state of all memory in the process. 113 114 On 32-bit machines the memory map is organised as follows. 115 The top 16 bits of an address are used to index into a top-level 116 map table, containing 65536 entries. Each entry is a pointer to a 117 second-level map, which records the accesibililty and validity 118 permissions for the 65536 bytes indexed by the lower 16 bits of the 119 address. Each byte is represented by two bits (details are below). So 120 each second-level map contains 16384 bytes. This two-level arrangement 121 conveniently divides the 4G address space into 64k lumps, each size 64k 122 bytes. 123 124 All entries in the primary (top-level) map must point to a valid 125 secondary (second-level) map. Since many of the 64kB chunks will 126 have the same status for every bit -- ie. noaccess (for unused 127 address space) or entirely addressable and defined (for code segments) -- 128 there are three distinguished secondary maps, which indicate 'noaccess', 129 'undefined' and 'defined'. For these uniform 64kB chunks, the primary 130 map entry points to the relevant distinguished map. In practice, 131 typically more than half of the addressable memory is represented with 132 the 'undefined' or 'defined' distinguished secondary map, so it gives a 133 good saving. It also lets us set the V+A bits of large address regions 134 quickly in set_address_range_perms(). 135 136 On 64-bit machines it's more complicated. If we followed the same basic 137 scheme we'd have a four-level table which would require too many memory 138 accesses. So instead the top-level map table has 2^19 entries (indexed 139 using bits 16..34 of the address); this covers the bottom 32GB. Any 140 accesses above 32GB are handled with a slow, sparse auxiliary table. 141 Valgrind's address space manager tries very hard to keep things below 142 this 32GB barrier so that performance doesn't suffer too much. 143 144 Note that this file has a lot of different functions for reading and 145 writing shadow memory. Only a couple are strictly necessary (eg. 146 get_vabits2 and set_vabits2), most are just specialised for specific 147 common cases to improve performance. 148 149 Aside: the V+A bits are less precise than they could be -- we have no way 150 of marking memory as read-only. It would be great if we could add an 151 extra state VA_BITSn_READONLY. But then we'd have 5 different states, 152 which requires 2.3 bits to hold, and there's no way to do that elegantly 153 -- we'd have to double up to 4 bits of metadata per byte, which doesn't 154 seem worth it. 155 */ 156 157 /* --------------- Basic configuration --------------- */ 158 159 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */ 160 161 #if VG_WORDSIZE == 4 162 163 /* cover the entire address space */ 164 # define N_PRIMARY_BITS 16 165 166 #else 167 168 /* Just handle the first 256G fast and the rest via auxiliary 169 primaries. If you change this, Memcheck will assert at startup. 170 See the definition of UNALIGNED_OR_HIGH for extensive comments. */ 171 # define N_PRIMARY_BITS 22 172 173 #endif 174 175 176 /* Do not change this. */ 177 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS) 178 179 /* Do not change this. */ 180 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1) 181 182 183 /* --------------- Secondary maps --------------- */ 184 185 // Each byte of memory conceptually has an A bit, which indicates its 186 // addressability, and 8 V bits, which indicates its definedness. 187 // 188 // But because very few bytes are partially defined, we can use a nice 189 // compression scheme to reduce the size of shadow memory. Each byte of 190 // memory has 2 bits which indicates its state (ie. V+A bits): 191 // 192 // 00: noaccess (unaddressable but treated as fully defined) 193 // 01: undefined (addressable and fully undefined) 194 // 10: defined (addressable and fully defined) 195 // 11: partdefined (addressable and partially defined) 196 // 197 // In the "partdefined" case, we use a secondary table to store the V bits. 198 // Each entry in the secondary-V-bits table maps a byte address to its 8 V 199 // bits. 200 // 201 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for 202 // four bytes (32 bits) of memory are in each chunk. Hence the name 203 // "vabits8". This lets us get the V+A bits for four bytes at a time 204 // easily (without having to do any shifting and/or masking), and that is a 205 // very common operation. (Note that although each vabits8 chunk 206 // is 8 bits in size, it represents 32 bits of memory.) 207 // 208 // The representation is "inverse" little-endian... each 4 bytes of 209 // memory is represented by a 1 byte value, where: 210 // 211 // - the status of byte (a+0) is held in bits [1..0] 212 // - the status of byte (a+1) is held in bits [3..2] 213 // - the status of byte (a+2) is held in bits [5..4] 214 // - the status of byte (a+3) is held in bits [7..6] 215 // 216 // It's "inverse" because endianness normally describes a mapping from 217 // value bits to memory addresses; in this case the mapping is inverted. 218 // Ie. instead of particular value bits being held in certain addresses, in 219 // this case certain addresses are represented by particular value bits. 220 // See insert_vabits2_into_vabits8() for an example. 221 // 222 // But note that we don't compress the V bits stored in registers; they 223 // need to be explicit to made the shadow operations possible. Therefore 224 // when moving values between registers and memory we need to convert 225 // between the expanded in-register format and the compressed in-memory 226 // format. This isn't so difficult, it just requires careful attention in a 227 // few places. 228 229 // These represent eight bits of memory. 230 #define VA_BITS2_NOACCESS 0x0 // 00b 231 #define VA_BITS2_UNDEFINED 0x1 // 01b 232 #define VA_BITS2_DEFINED 0x2 // 10b 233 #define VA_BITS2_PARTDEFINED 0x3 // 11b 234 235 // These represent 16 bits of memory. 236 #define VA_BITS4_NOACCESS 0x0 // 00_00b 237 #define VA_BITS4_UNDEFINED 0x5 // 01_01b 238 #define VA_BITS4_DEFINED 0xa // 10_10b 239 240 // These represent 32 bits of memory. 241 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b 242 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b 243 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b 244 245 // These represent 64 bits of memory. 246 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2 247 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2 248 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2 249 250 251 #define SM_CHUNKS 16384 252 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2) 253 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3) 254 255 // Paranoia: it's critical for performance that the requested inlining 256 // occurs. So try extra hard. 257 #define INLINE inline __attribute__((always_inline)) 258 259 static INLINE Addr start_of_this_sm ( Addr a ) { 260 return (a & (~SM_MASK)); 261 } 262 static INLINE Bool is_start_of_sm ( Addr a ) { 263 return (start_of_this_sm(a) == a); 264 } 265 266 typedef 267 struct { 268 UChar vabits8[SM_CHUNKS]; 269 } 270 SecMap; 271 272 // 3 distinguished secondary maps, one for no-access, one for 273 // accessible but undefined, and one for accessible and defined. 274 // Distinguished secondaries may never be modified. 275 #define SM_DIST_NOACCESS 0 276 #define SM_DIST_UNDEFINED 1 277 #define SM_DIST_DEFINED 2 278 279 static SecMap sm_distinguished[3]; 280 281 static INLINE Bool is_distinguished_sm ( SecMap* sm ) { 282 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2]; 283 } 284 285 // Forward declaration 286 static void update_SM_counts(SecMap* oldSM, SecMap* newSM); 287 288 /* dist_sm points to one of our three distinguished secondaries. Make 289 a copy of it so that we can write to it. 290 */ 291 static SecMap* copy_for_writing ( SecMap* dist_sm ) 292 { 293 SecMap* new_sm; 294 tl_assert(dist_sm == &sm_distinguished[0] 295 || dist_sm == &sm_distinguished[1] 296 || dist_sm == &sm_distinguished[2]); 297 298 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap)); 299 if (new_sm == NULL) 300 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap", 301 sizeof(SecMap) ); 302 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap)); 303 update_SM_counts(dist_sm, new_sm); 304 return new_sm; 305 } 306 307 /* --------------- Stats --------------- */ 308 309 static Int n_issued_SMs = 0; 310 static Int n_deissued_SMs = 0; 311 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs 312 static Int n_undefined_SMs = 0; 313 static Int n_defined_SMs = 0; 314 static Int n_non_DSM_SMs = 0; 315 static Int max_noaccess_SMs = 0; 316 static Int max_undefined_SMs = 0; 317 static Int max_defined_SMs = 0; 318 static Int max_non_DSM_SMs = 0; 319 320 /* # searches initiated in auxmap_L1, and # base cmps required */ 321 static ULong n_auxmap_L1_searches = 0; 322 static ULong n_auxmap_L1_cmps = 0; 323 /* # of searches that missed in auxmap_L1 and therefore had to 324 be handed to auxmap_L2. And the number of nodes inserted. */ 325 static ULong n_auxmap_L2_searches = 0; 326 static ULong n_auxmap_L2_nodes = 0; 327 328 static Int n_sanity_cheap = 0; 329 static Int n_sanity_expensive = 0; 330 331 static Int n_secVBit_nodes = 0; 332 static Int max_secVBit_nodes = 0; 333 334 static void update_SM_counts(SecMap* oldSM, SecMap* newSM) 335 { 336 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --; 337 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--; 338 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --; 339 else { n_non_DSM_SMs --; 340 n_deissued_SMs ++; } 341 342 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++; 343 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++; 344 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++; 345 else { n_non_DSM_SMs ++; 346 n_issued_SMs ++; } 347 348 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs; 349 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs; 350 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs; 351 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs; 352 } 353 354 /* --------------- Primary maps --------------- */ 355 356 /* The main primary map. This covers some initial part of the address 357 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is 358 handled using the auxiliary primary map. 359 */ 360 static SecMap* primary_map[N_PRIMARY_MAP]; 361 362 363 /* An entry in the auxiliary primary map. base must be a 64k-aligned 364 value, and sm points at the relevant secondary map. As with the 365 main primary map, the secondary may be either a real secondary, or 366 one of the three distinguished secondaries. DO NOT CHANGE THIS 367 LAYOUT: the first word has to be the key for OSet fast lookups. 368 */ 369 typedef 370 struct { 371 Addr base; 372 SecMap* sm; 373 } 374 AuxMapEnt; 375 376 /* Tunable parameter: How big is the L1 queue? */ 377 #define N_AUXMAP_L1 24 378 379 /* Tunable parameter: How far along the L1 queue to insert 380 entries resulting from L2 lookups? */ 381 #define AUXMAP_L1_INSERT_IX 12 382 383 static struct { 384 Addr base; 385 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node 386 } 387 auxmap_L1[N_AUXMAP_L1]; 388 389 static OSet* auxmap_L2 = NULL; 390 391 static void init_auxmap_L1_L2 ( void ) 392 { 393 Int i; 394 for (i = 0; i < N_AUXMAP_L1; i++) { 395 auxmap_L1[i].base = 0; 396 auxmap_L1[i].ent = NULL; 397 } 398 399 tl_assert(0 == offsetof(AuxMapEnt,base)); 400 tl_assert(sizeof(Addr) == sizeof(void*)); 401 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base), 402 /*fastCmp*/ NULL, 403 VG_(malloc), "mc.iaLL.1", VG_(free) ); 404 } 405 406 /* Check representation invariants; if OK return NULL; else a 407 descriptive bit of text. Also return the number of 408 non-distinguished secondary maps referred to from the auxiliary 409 primary maps. */ 410 411 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found ) 412 { 413 Word i, j; 414 /* On a 32-bit platform, the L2 and L1 tables should 415 both remain empty forever. 416 417 On a 64-bit platform: 418 In the L2 table: 419 all .base & 0xFFFF == 0 420 all .base > MAX_PRIMARY_ADDRESS 421 In the L1 table: 422 all .base & 0xFFFF == 0 423 all (.base > MAX_PRIMARY_ADDRESS 424 .base & 0xFFFF == 0 425 and .ent points to an AuxMapEnt with the same .base) 426 or 427 (.base == 0 and .ent == NULL) 428 */ 429 *n_secmaps_found = 0; 430 if (sizeof(void*) == 4) { 431 /* 32-bit platform */ 432 if (VG_(OSetGen_Size)(auxmap_L2) != 0) 433 return "32-bit: auxmap_L2 is non-empty"; 434 for (i = 0; i < N_AUXMAP_L1; i++) 435 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL) 436 return "32-bit: auxmap_L1 is non-empty"; 437 } else { 438 /* 64-bit platform */ 439 UWord elems_seen = 0; 440 AuxMapEnt *elem, *res; 441 AuxMapEnt key; 442 /* L2 table */ 443 VG_(OSetGen_ResetIter)(auxmap_L2); 444 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) { 445 elems_seen++; 446 if (0 != (elem->base & (Addr)0xFFFF)) 447 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2"; 448 if (elem->base <= MAX_PRIMARY_ADDRESS) 449 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2"; 450 if (elem->sm == NULL) 451 return "64-bit: .sm in _L2 is NULL"; 452 if (!is_distinguished_sm(elem->sm)) 453 (*n_secmaps_found)++; 454 } 455 if (elems_seen != n_auxmap_L2_nodes) 456 return "64-bit: disagreement on number of elems in _L2"; 457 /* Check L1-L2 correspondence */ 458 for (i = 0; i < N_AUXMAP_L1; i++) { 459 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL) 460 continue; 461 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF)) 462 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1"; 463 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS) 464 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1"; 465 if (auxmap_L1[i].ent == NULL) 466 return "64-bit: .ent is NULL in auxmap_L1"; 467 if (auxmap_L1[i].ent->base != auxmap_L1[i].base) 468 return "64-bit: _L1 and _L2 bases are inconsistent"; 469 /* Look it up in auxmap_L2. */ 470 key.base = auxmap_L1[i].base; 471 key.sm = 0; 472 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 473 if (res == NULL) 474 return "64-bit: _L1 .base not found in _L2"; 475 if (res != auxmap_L1[i].ent) 476 return "64-bit: _L1 .ent disagrees with _L2 entry"; 477 } 478 /* Check L1 contains no duplicates */ 479 for (i = 0; i < N_AUXMAP_L1; i++) { 480 if (auxmap_L1[i].base == 0) 481 continue; 482 for (j = i+1; j < N_AUXMAP_L1; j++) { 483 if (auxmap_L1[j].base == 0) 484 continue; 485 if (auxmap_L1[j].base == auxmap_L1[i].base) 486 return "64-bit: duplicate _L1 .base entries"; 487 } 488 } 489 } 490 return NULL; /* ok */ 491 } 492 493 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent ) 494 { 495 Word i; 496 tl_assert(ent); 497 tl_assert(rank >= 0 && rank < N_AUXMAP_L1); 498 for (i = N_AUXMAP_L1-1; i > rank; i--) 499 auxmap_L1[i] = auxmap_L1[i-1]; 500 auxmap_L1[rank].base = ent->base; 501 auxmap_L1[rank].ent = ent; 502 } 503 504 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a ) 505 { 506 AuxMapEnt key; 507 AuxMapEnt* res; 508 Word i; 509 510 tl_assert(a > MAX_PRIMARY_ADDRESS); 511 a &= ~(Addr)0xFFFF; 512 513 /* First search the front-cache, which is a self-organising 514 list containing the most popular entries. */ 515 516 if (LIKELY(auxmap_L1[0].base == a)) 517 return auxmap_L1[0].ent; 518 if (LIKELY(auxmap_L1[1].base == a)) { 519 Addr t_base = auxmap_L1[0].base; 520 AuxMapEnt* t_ent = auxmap_L1[0].ent; 521 auxmap_L1[0].base = auxmap_L1[1].base; 522 auxmap_L1[0].ent = auxmap_L1[1].ent; 523 auxmap_L1[1].base = t_base; 524 auxmap_L1[1].ent = t_ent; 525 return auxmap_L1[0].ent; 526 } 527 528 n_auxmap_L1_searches++; 529 530 for (i = 0; i < N_AUXMAP_L1; i++) { 531 if (auxmap_L1[i].base == a) { 532 break; 533 } 534 } 535 tl_assert(i >= 0 && i <= N_AUXMAP_L1); 536 537 n_auxmap_L1_cmps += (ULong)(i+1); 538 539 if (i < N_AUXMAP_L1) { 540 if (i > 0) { 541 Addr t_base = auxmap_L1[i-1].base; 542 AuxMapEnt* t_ent = auxmap_L1[i-1].ent; 543 auxmap_L1[i-1].base = auxmap_L1[i-0].base; 544 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent; 545 auxmap_L1[i-0].base = t_base; 546 auxmap_L1[i-0].ent = t_ent; 547 i--; 548 } 549 return auxmap_L1[i].ent; 550 } 551 552 n_auxmap_L2_searches++; 553 554 /* First see if we already have it. */ 555 key.base = a; 556 key.sm = 0; 557 558 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 559 if (res) 560 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res ); 561 return res; 562 } 563 564 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a ) 565 { 566 AuxMapEnt *nyu, *res; 567 568 /* First see if we already have it. */ 569 res = maybe_find_in_auxmap( a ); 570 if (LIKELY(res)) 571 return res; 572 573 /* Ok, there's no entry in the secondary map, so we'll have 574 to allocate one. */ 575 a &= ~(Addr)0xFFFF; 576 577 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) ); 578 tl_assert(nyu); 579 nyu->base = a; 580 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS]; 581 VG_(OSetGen_Insert)( auxmap_L2, nyu ); 582 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu ); 583 n_auxmap_L2_nodes++; 584 return nyu; 585 } 586 587 /* --------------- SecMap fundamentals --------------- */ 588 589 // In all these, 'low' means it's definitely in the main primary map, 590 // 'high' means it's definitely in the auxiliary table. 591 592 static INLINE SecMap** get_secmap_low_ptr ( Addr a ) 593 { 594 UWord pm_off = a >> 16; 595 # if VG_DEBUG_MEMORY >= 1 596 tl_assert(pm_off < N_PRIMARY_MAP); 597 # endif 598 return &primary_map[ pm_off ]; 599 } 600 601 static INLINE SecMap** get_secmap_high_ptr ( Addr a ) 602 { 603 AuxMapEnt* am = find_or_alloc_in_auxmap(a); 604 return &am->sm; 605 } 606 607 static SecMap** get_secmap_ptr ( Addr a ) 608 { 609 return ( a <= MAX_PRIMARY_ADDRESS 610 ? get_secmap_low_ptr(a) 611 : get_secmap_high_ptr(a)); 612 } 613 614 static INLINE SecMap* get_secmap_for_reading_low ( Addr a ) 615 { 616 return *get_secmap_low_ptr(a); 617 } 618 619 static INLINE SecMap* get_secmap_for_reading_high ( Addr a ) 620 { 621 return *get_secmap_high_ptr(a); 622 } 623 624 static INLINE SecMap* get_secmap_for_writing_low(Addr a) 625 { 626 SecMap** p = get_secmap_low_ptr(a); 627 if (UNLIKELY(is_distinguished_sm(*p))) 628 *p = copy_for_writing(*p); 629 return *p; 630 } 631 632 static INLINE SecMap* get_secmap_for_writing_high ( Addr a ) 633 { 634 SecMap** p = get_secmap_high_ptr(a); 635 if (UNLIKELY(is_distinguished_sm(*p))) 636 *p = copy_for_writing(*p); 637 return *p; 638 } 639 640 /* Produce the secmap for 'a', either from the primary map or by 641 ensuring there is an entry for it in the aux primary map. The 642 secmap may be a distinguished one as the caller will only want to 643 be able to read it. 644 */ 645 static INLINE SecMap* get_secmap_for_reading ( Addr a ) 646 { 647 return ( a <= MAX_PRIMARY_ADDRESS 648 ? get_secmap_for_reading_low (a) 649 : get_secmap_for_reading_high(a) ); 650 } 651 652 /* Produce the secmap for 'a', either from the primary map or by 653 ensuring there is an entry for it in the aux primary map. The 654 secmap may not be a distinguished one, since the caller will want 655 to be able to write it. If it is a distinguished secondary, make a 656 writable copy of it, install it, and return the copy instead. (COW 657 semantics). 658 */ 659 static SecMap* get_secmap_for_writing ( Addr a ) 660 { 661 return ( a <= MAX_PRIMARY_ADDRESS 662 ? get_secmap_for_writing_low (a) 663 : get_secmap_for_writing_high(a) ); 664 } 665 666 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't 667 allocate one if one doesn't already exist. This is used by the 668 leak checker. 669 */ 670 static SecMap* maybe_get_secmap_for ( Addr a ) 671 { 672 if (a <= MAX_PRIMARY_ADDRESS) { 673 return get_secmap_for_reading_low(a); 674 } else { 675 AuxMapEnt* am = maybe_find_in_auxmap(a); 676 return am ? am->sm : NULL; 677 } 678 } 679 680 /* --------------- Fundamental functions --------------- */ 681 682 static INLINE 683 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 ) 684 { 685 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 686 *vabits8 &= ~(0x3 << shift); // mask out the two old bits 687 *vabits8 |= (vabits2 << shift); // mask in the two new bits 688 } 689 690 static INLINE 691 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 ) 692 { 693 UInt shift; 694 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 695 shift = (a & 2) << 1; // shift by 0 or 4 696 *vabits8 &= ~(0xf << shift); // mask out the four old bits 697 *vabits8 |= (vabits4 << shift); // mask in the four new bits 698 } 699 700 static INLINE 701 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 ) 702 { 703 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 704 vabits8 >>= shift; // shift the two bits to the bottom 705 return 0x3 & vabits8; // mask out the rest 706 } 707 708 static INLINE 709 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 ) 710 { 711 UInt shift; 712 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 713 shift = (a & 2) << 1; // shift by 0 or 4 714 vabits8 >>= shift; // shift the four bits to the bottom 715 return 0xf & vabits8; // mask out the rest 716 } 717 718 // Note that these four are only used in slow cases. The fast cases do 719 // clever things like combine the auxmap check (in 720 // get_secmap_{read,writ}able) with alignment checks. 721 722 // *** WARNING! *** 723 // Any time this function is called, if it is possible that vabits2 724 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the 725 // sec-V-bits table must also be set! 726 static INLINE 727 void set_vabits2 ( Addr a, UChar vabits2 ) 728 { 729 SecMap* sm = get_secmap_for_writing(a); 730 UWord sm_off = SM_OFF(a); 731 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 732 } 733 734 static INLINE 735 UChar get_vabits2 ( Addr a ) 736 { 737 SecMap* sm = get_secmap_for_reading(a); 738 UWord sm_off = SM_OFF(a); 739 UChar vabits8 = sm->vabits8[sm_off]; 740 return extract_vabits2_from_vabits8(a, vabits8); 741 } 742 743 // *** WARNING! *** 744 // Any time this function is called, if it is possible that any of the 745 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the 746 // corresponding entry(s) in the sec-V-bits table must also be set! 747 static INLINE 748 UChar get_vabits8_for_aligned_word32 ( Addr a ) 749 { 750 SecMap* sm = get_secmap_for_reading(a); 751 UWord sm_off = SM_OFF(a); 752 UChar vabits8 = sm->vabits8[sm_off]; 753 return vabits8; 754 } 755 756 static INLINE 757 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 ) 758 { 759 SecMap* sm = get_secmap_for_writing(a); 760 UWord sm_off = SM_OFF(a); 761 sm->vabits8[sm_off] = vabits8; 762 } 763 764 765 // Forward declarations 766 static UWord get_sec_vbits8(Addr a); 767 static void set_sec_vbits8(Addr a, UWord vbits8); 768 769 // Returns False if there was an addressability error. 770 static INLINE 771 Bool set_vbits8 ( Addr a, UChar vbits8 ) 772 { 773 Bool ok = True; 774 UChar vabits2 = get_vabits2(a); 775 if ( VA_BITS2_NOACCESS != vabits2 ) { 776 // Addressable. Convert in-register format to in-memory format. 777 // Also remove any existing sec V bit entry for the byte if no 778 // longer necessary. 779 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; } 780 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; } 781 else { vabits2 = VA_BITS2_PARTDEFINED; 782 set_sec_vbits8(a, vbits8); } 783 set_vabits2(a, vabits2); 784 785 } else { 786 // Unaddressable! Do nothing -- when writing to unaddressable 787 // memory it acts as a black hole, and the V bits can never be seen 788 // again. So we don't have to write them at all. 789 ok = False; 790 } 791 return ok; 792 } 793 794 // Returns False if there was an addressability error. In that case, we put 795 // all defined bits into vbits8. 796 static INLINE 797 Bool get_vbits8 ( Addr a, UChar* vbits8 ) 798 { 799 Bool ok = True; 800 UChar vabits2 = get_vabits2(a); 801 802 // Convert the in-memory format to in-register format. 803 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; } 804 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; } 805 else if ( VA_BITS2_NOACCESS == vabits2 ) { 806 *vbits8 = V_BITS8_DEFINED; // Make V bits defined! 807 ok = False; 808 } else { 809 tl_assert( VA_BITS2_PARTDEFINED == vabits2 ); 810 *vbits8 = get_sec_vbits8(a); 811 } 812 return ok; 813 } 814 815 816 /* --------------- Secondary V bit table ------------ */ 817 818 // This table holds the full V bit pattern for partially-defined bytes 819 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow 820 // memory. 821 // 822 // Note: the nodes in this table can become stale. Eg. if you write a PDB, 823 // then overwrite the same address with a fully defined byte, the sec-V-bit 824 // node will not necessarily be removed. This is because checking for 825 // whether removal is necessary would slow down the fast paths. 826 // 827 // To avoid the stale nodes building up too much, we periodically (once the 828 // table reaches a certain size) garbage collect (GC) the table by 829 // traversing it and evicting any "sufficiently stale" nodes, ie. nodes that 830 // are stale and haven't been touched for a certain number of collections. 831 // If more than a certain proportion of nodes survived, we increase the 832 // table size so that GCs occur less often. 833 // 834 // (So this a bit different to a traditional GC, where you definitely want 835 // to remove any dead nodes. It's more like we have a resizable cache and 836 // we're trying to find the right balance how many elements to evict and how 837 // big to make the cache.) 838 // 839 // This policy is designed to avoid bad table bloat in the worst case where 840 // a program creates huge numbers of stale PDBs -- we would get this bloat 841 // if we had no GC -- while handling well the case where a node becomes 842 // stale but shortly afterwards is rewritten with a PDB and so becomes 843 // non-stale again (which happens quite often, eg. in perf/bz2). If we just 844 // remove all stale nodes as soon as possible, we just end up re-adding a 845 // lot of them in later again. The "sufficiently stale" approach avoids 846 // this. (If a program has many live PDBs, performance will just suck, 847 // there's no way around that.) 848 849 static OSet* secVBitTable; 850 851 // Stats 852 static ULong sec_vbits_new_nodes = 0; 853 static ULong sec_vbits_updates = 0; 854 855 // This must be a power of two; this is checked in mc_pre_clo_init(). 856 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover 857 // a larger address range) they take more space but we can get multiple 858 // partially-defined bytes in one if they are close to each other, reducing 859 // the number of total nodes. In practice sometimes they are clustered (eg. 860 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous 861 // row), but often not. So we choose something intermediate. 862 #define BYTES_PER_SEC_VBIT_NODE 16 863 864 // We make the table bigger if more than this many nodes survive a GC. 865 #define MAX_SURVIVOR_PROPORTION 0.5 866 867 // Each time we make the table bigger, we increase it by this much. 868 #define TABLE_GROWTH_FACTOR 2 869 870 // This defines "sufficiently stale" -- any node that hasn't been touched in 871 // this many GCs will be removed. 872 #define MAX_STALE_AGE 2 873 874 // We GC the table when it gets this many nodes in it, ie. it's effectively 875 // the table size. It can change. 876 static Int secVBitLimit = 1024; 877 878 // The number of GCs done, used to age sec-V-bit nodes for eviction. 879 // Because it's unsigned, wrapping doesn't matter -- the right answer will 880 // come out anyway. 881 static UInt GCs_done = 0; 882 883 typedef 884 struct { 885 Addr a; 886 UChar vbits8[BYTES_PER_SEC_VBIT_NODE]; 887 UInt last_touched; 888 } 889 SecVBitNode; 890 891 static OSet* createSecVBitTable(void) 892 { 893 return VG_(OSetGen_Create)( offsetof(SecVBitNode, a), 894 NULL, // use fast comparisons 895 VG_(malloc), "mc.cSVT.1 (sec VBit table)", 896 VG_(free) ); 897 } 898 899 static void gcSecVBitTable(void) 900 { 901 OSet* secVBitTable2; 902 SecVBitNode* n; 903 Int i, n_nodes = 0, n_survivors = 0; 904 905 GCs_done++; 906 907 // Create the new table. 908 secVBitTable2 = createSecVBitTable(); 909 910 // Traverse the table, moving fresh nodes into the new table. 911 VG_(OSetGen_ResetIter)(secVBitTable); 912 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) { 913 Bool keep = False; 914 if ( (GCs_done - n->last_touched) <= MAX_STALE_AGE ) { 915 // Keep node if it's been touched recently enough (regardless of 916 // freshness/staleness). 917 keep = True; 918 } else { 919 // Keep node if any of its bytes are non-stale. Using 920 // get_vabits2() for the lookup is not very efficient, but I don't 921 // think it matters. 922 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 923 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) { 924 keep = True; // Found a non-stale byte, so keep 925 break; 926 } 927 } 928 } 929 930 if ( keep ) { 931 // Insert a copy of the node into the new table. 932 SecVBitNode* n2 = 933 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode)); 934 *n2 = *n; 935 VG_(OSetGen_Insert)(secVBitTable2, n2); 936 } 937 } 938 939 // Get the before and after sizes. 940 n_nodes = VG_(OSetGen_Size)(secVBitTable); 941 n_survivors = VG_(OSetGen_Size)(secVBitTable2); 942 943 // Destroy the old table, and put the new one in its place. 944 VG_(OSetGen_Destroy)(secVBitTable); 945 secVBitTable = secVBitTable2; 946 947 if (VG_(clo_verbosity) > 1) { 948 Char percbuf[6]; 949 VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf); 950 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n", 951 n_nodes, n_survivors, percbuf); 952 } 953 954 // Increase table size if necessary. 955 if (n_survivors > (secVBitLimit * MAX_SURVIVOR_PROPORTION)) { 956 secVBitLimit *= TABLE_GROWTH_FACTOR; 957 if (VG_(clo_verbosity) > 1) 958 VG_(message)(Vg_DebugMsg, "memcheck GC: increase table size to %d\n", 959 secVBitLimit); 960 } 961 } 962 963 static UWord get_sec_vbits8(Addr a) 964 { 965 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 966 Int amod = a % BYTES_PER_SEC_VBIT_NODE; 967 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 968 UChar vbits8; 969 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a); 970 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 971 // make it to the secondary V bits table. 972 vbits8 = n->vbits8[amod]; 973 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 974 return vbits8; 975 } 976 977 static void set_sec_vbits8(Addr a, UWord vbits8) 978 { 979 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 980 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE; 981 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 982 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 983 // make it to the secondary V bits table. 984 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 985 if (n) { 986 n->vbits8[amod] = vbits8; // update 987 n->last_touched = GCs_done; 988 sec_vbits_updates++; 989 } else { 990 // New node: assign the specific byte, make the rest invalid (they 991 // should never be read as-is, but be cautious). 992 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode)); 993 n->a = aAligned; 994 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 995 n->vbits8[i] = V_BITS8_UNDEFINED; 996 } 997 n->vbits8[amod] = vbits8; 998 n->last_touched = GCs_done; 999 1000 // Do a table GC if necessary. Nb: do this before inserting the new 1001 // node, to avoid erroneously GC'ing the new node. 1002 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) { 1003 gcSecVBitTable(); 1004 } 1005 1006 // Insert the new node. 1007 VG_(OSetGen_Insert)(secVBitTable, n); 1008 sec_vbits_new_nodes++; 1009 1010 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable); 1011 if (n_secVBit_nodes > max_secVBit_nodes) 1012 max_secVBit_nodes = n_secVBit_nodes; 1013 } 1014 } 1015 1016 /* --------------- Endianness helpers --------------- */ 1017 1018 /* Returns the offset in memory of the byteno-th most significant byte 1019 in a wordszB-sized word, given the specified endianness. */ 1020 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian, 1021 UWord byteno ) { 1022 return bigendian ? (wordszB-1-byteno) : byteno; 1023 } 1024 1025 1026 /* --------------- Ignored address ranges --------------- */ 1027 1028 #define M_IGNORE_RANGES 4 1029 1030 typedef 1031 struct { 1032 Int used; 1033 Addr start[M_IGNORE_RANGES]; 1034 Addr end[M_IGNORE_RANGES]; 1035 } 1036 IgnoreRanges; 1037 1038 static IgnoreRanges ignoreRanges; 1039 1040 INLINE Bool MC_(in_ignored_range) ( Addr a ) 1041 { 1042 Int i; 1043 if (LIKELY(ignoreRanges.used == 0)) 1044 return False; 1045 for (i = 0; i < ignoreRanges.used; i++) { 1046 if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i]) 1047 return True; 1048 } 1049 return False; 1050 } 1051 1052 1053 /* Parse a 32- or 64-bit hex number, including leading 0x, from string 1054 starting at *ppc, putting result in *result, and return True. Or 1055 fail, in which case *ppc and *result are undefined, and return 1056 False. */ 1057 1058 static Bool isHex ( UChar c ) 1059 { 1060 return ((c >= '0' && c <= '9') || 1061 (c >= 'a' && c <= 'f') || 1062 (c >= 'A' && c <= 'F')); 1063 } 1064 1065 static UInt fromHex ( UChar c ) 1066 { 1067 if (c >= '0' && c <= '9') 1068 return (UInt)c - (UInt)'0'; 1069 if (c >= 'a' && c <= 'f') 1070 return 10 + (UInt)c - (UInt)'a'; 1071 if (c >= 'A' && c <= 'F') 1072 return 10 + (UInt)c - (UInt)'A'; 1073 /*NOTREACHED*/ 1074 tl_assert(0); 1075 return 0; 1076 } 1077 1078 static Bool parse_Addr ( UChar** ppc, Addr* result ) 1079 { 1080 Int used, limit = 2 * sizeof(Addr); 1081 if (**ppc != '0') 1082 return False; 1083 (*ppc)++; 1084 if (**ppc != 'x') 1085 return False; 1086 (*ppc)++; 1087 *result = 0; 1088 used = 0; 1089 while (isHex(**ppc)) { 1090 UInt d = fromHex(**ppc); 1091 tl_assert(d < 16); 1092 *result = ((*result) << 4) | fromHex(**ppc); 1093 (*ppc)++; 1094 used++; 1095 if (used > limit) return False; 1096 } 1097 if (used == 0) 1098 return False; 1099 return True; 1100 } 1101 1102 /* Parse two such numbers separated by a dash, or fail. */ 1103 1104 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 ) 1105 { 1106 Bool ok = parse_Addr(ppc, result1); 1107 if (!ok) 1108 return False; 1109 if (**ppc != '-') 1110 return False; 1111 (*ppc)++; 1112 ok = parse_Addr(ppc, result2); 1113 if (!ok) 1114 return False; 1115 return True; 1116 } 1117 1118 /* Parse a set of ranges separated by commas into 'ignoreRanges', or 1119 fail. */ 1120 1121 static Bool parse_ignore_ranges ( UChar* str0 ) 1122 { 1123 Addr start, end; 1124 Bool ok; 1125 UChar* str = str0; 1126 UChar** ppc = &str; 1127 ignoreRanges.used = 0; 1128 while (1) { 1129 ok = parse_range(ppc, &start, &end); 1130 if (!ok) 1131 return False; 1132 if (ignoreRanges.used >= M_IGNORE_RANGES) 1133 return False; 1134 ignoreRanges.start[ignoreRanges.used] = start; 1135 ignoreRanges.end[ignoreRanges.used] = end; 1136 ignoreRanges.used++; 1137 if (**ppc == 0) 1138 return True; 1139 if (**ppc != ',') 1140 return False; 1141 (*ppc)++; 1142 } 1143 /*NOTREACHED*/ 1144 return False; 1145 } 1146 1147 1148 /* --------------- Load/store slow cases. --------------- */ 1149 1150 static 1151 #ifndef PERF_FAST_LOADV 1152 INLINE 1153 #endif 1154 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian ) 1155 { 1156 /* Make up a 64-bit result V word, which contains the loaded data for 1157 valid addresses and Defined for invalid addresses. Iterate over 1158 the bytes in the word, from the most significant down to the 1159 least. */ 1160 ULong vbits64 = V_BITS64_UNDEFINED; 1161 SizeT szB = nBits / 8; 1162 SSizeT i; // Must be signed. 1163 SizeT n_addrs_bad = 0; 1164 Addr ai; 1165 Bool partial_load_exemption_applies; 1166 UChar vbits8; 1167 Bool ok; 1168 1169 PROF_EVENT(30, "mc_LOADVn_slow"); 1170 1171 /* ------------ BEGIN semi-fast cases ------------ */ 1172 /* These deal quickly-ish with the common auxiliary primary map 1173 cases on 64-bit platforms. Are merely a speedup hack; can be 1174 omitted without loss of correctness/functionality. Note that in 1175 both cases the "sizeof(void*) == 8" causes these cases to be 1176 folded out by compilers on 32-bit platforms. These are derived 1177 from LOADV64 and LOADV32. 1178 */ 1179 if (LIKELY(sizeof(void*) == 8 1180 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1181 SecMap* sm = get_secmap_for_reading(a); 1182 UWord sm_off16 = SM_OFF_16(a); 1183 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 1184 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) 1185 return V_BITS64_DEFINED; 1186 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) 1187 return V_BITS64_UNDEFINED; 1188 /* else fall into the slow case */ 1189 } 1190 if (LIKELY(sizeof(void*) == 8 1191 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1192 SecMap* sm = get_secmap_for_reading(a); 1193 UWord sm_off = SM_OFF(a); 1194 UWord vabits8 = sm->vabits8[sm_off]; 1195 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) 1196 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED); 1197 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) 1198 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED); 1199 /* else fall into slow case */ 1200 } 1201 /* ------------ END semi-fast cases ------------ */ 1202 1203 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1204 1205 for (i = szB-1; i >= 0; i--) { 1206 PROF_EVENT(31, "mc_LOADVn_slow(loop)"); 1207 ai = a + byte_offset_w(szB, bigendian, i); 1208 ok = get_vbits8(ai, &vbits8); 1209 if (!ok) n_addrs_bad++; 1210 vbits64 <<= 8; 1211 vbits64 |= vbits8; 1212 } 1213 1214 /* This is a hack which avoids producing errors for code which 1215 insists in stepping along byte strings in aligned word-sized 1216 chunks, and there is a partially defined word at the end. (eg, 1217 optimised strlen). Such code is basically broken at least WRT 1218 semantics of ANSI C, but sometimes users don't have the option 1219 to fix it, and so this option is provided. Note it is now 1220 defaulted to not-engaged. 1221 1222 A load from a partially-addressible place is allowed if: 1223 - the command-line flag is set 1224 - it's a word-sized, word-aligned load 1225 - at least one of the addresses in the word *is* valid 1226 */ 1227 partial_load_exemption_applies 1228 = MC_(clo_partial_loads_ok) && szB == VG_WORDSIZE 1229 && VG_IS_WORD_ALIGNED(a) 1230 && n_addrs_bad < VG_WORDSIZE; 1231 1232 if (n_addrs_bad > 0 && !partial_load_exemption_applies) 1233 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1234 1235 return vbits64; 1236 } 1237 1238 1239 static 1240 #ifndef PERF_FAST_STOREV 1241 INLINE 1242 #endif 1243 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian ) 1244 { 1245 SizeT szB = nBits / 8; 1246 SizeT i, n_addrs_bad = 0; 1247 UChar vbits8; 1248 Addr ai; 1249 Bool ok; 1250 1251 PROF_EVENT(35, "mc_STOREVn_slow"); 1252 1253 /* ------------ BEGIN semi-fast cases ------------ */ 1254 /* These deal quickly-ish with the common auxiliary primary map 1255 cases on 64-bit platforms. Are merely a speedup hack; can be 1256 omitted without loss of correctness/functionality. Note that in 1257 both cases the "sizeof(void*) == 8" causes these cases to be 1258 folded out by compilers on 32-bit platforms. These are derived 1259 from STOREV64 and STOREV32. 1260 */ 1261 if (LIKELY(sizeof(void*) == 8 1262 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1263 SecMap* sm = get_secmap_for_reading(a); 1264 UWord sm_off16 = SM_OFF_16(a); 1265 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 1266 if (LIKELY( !is_distinguished_sm(sm) && 1267 (VA_BITS16_DEFINED == vabits16 || 1268 VA_BITS16_UNDEFINED == vabits16) )) { 1269 /* Handle common case quickly: a is suitably aligned, */ 1270 /* is mapped, and is addressible. */ 1271 // Convert full V-bits in register to compact 2-bit form. 1272 if (LIKELY(V_BITS64_DEFINED == vbytes)) { 1273 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED; 1274 return; 1275 } else if (V_BITS64_UNDEFINED == vbytes) { 1276 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED; 1277 return; 1278 } 1279 /* else fall into the slow case */ 1280 } 1281 /* else fall into the slow case */ 1282 } 1283 if (LIKELY(sizeof(void*) == 8 1284 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1285 SecMap* sm = get_secmap_for_reading(a); 1286 UWord sm_off = SM_OFF(a); 1287 UWord vabits8 = sm->vabits8[sm_off]; 1288 if (LIKELY( !is_distinguished_sm(sm) && 1289 (VA_BITS8_DEFINED == vabits8 || 1290 VA_BITS8_UNDEFINED == vabits8) )) { 1291 /* Handle common case quickly: a is suitably aligned, */ 1292 /* is mapped, and is addressible. */ 1293 // Convert full V-bits in register to compact 2-bit form. 1294 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) { 1295 sm->vabits8[sm_off] = VA_BITS8_DEFINED; 1296 return; 1297 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) { 1298 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 1299 return; 1300 } 1301 /* else fall into the slow case */ 1302 } 1303 /* else fall into the slow case */ 1304 } 1305 /* ------------ END semi-fast cases ------------ */ 1306 1307 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1308 1309 /* Dump vbytes in memory, iterating from least to most significant 1310 byte. At the same time establish addressibility of the location. */ 1311 for (i = 0; i < szB; i++) { 1312 PROF_EVENT(36, "mc_STOREVn_slow(loop)"); 1313 ai = a + byte_offset_w(szB, bigendian, i); 1314 vbits8 = vbytes & 0xff; 1315 ok = set_vbits8(ai, vbits8); 1316 if (!ok) n_addrs_bad++; 1317 vbytes >>= 8; 1318 } 1319 1320 /* If an address error has happened, report it. */ 1321 if (n_addrs_bad > 0) 1322 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True ); 1323 } 1324 1325 1326 /*------------------------------------------------------------*/ 1327 /*--- Setting permissions over address ranges. ---*/ 1328 /*------------------------------------------------------------*/ 1329 1330 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16, 1331 UWord dsm_num ) 1332 { 1333 UWord sm_off, sm_off16; 1334 UWord vabits2 = vabits16 & 0x3; 1335 SizeT lenA, lenB, len_to_next_secmap; 1336 Addr aNext; 1337 SecMap* sm; 1338 SecMap** sm_ptr; 1339 SecMap* example_dsm; 1340 1341 PROF_EVENT(150, "set_address_range_perms"); 1342 1343 /* Check the V+A bits make sense. */ 1344 tl_assert(VA_BITS16_NOACCESS == vabits16 || 1345 VA_BITS16_UNDEFINED == vabits16 || 1346 VA_BITS16_DEFINED == vabits16); 1347 1348 // This code should never write PDBs; ensure this. (See comment above 1349 // set_vabits2().) 1350 tl_assert(VA_BITS2_PARTDEFINED != vabits2); 1351 1352 if (lenT == 0) 1353 return; 1354 1355 if (lenT > 256 * 1024 * 1024) { 1356 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) { 1357 Char* s = "unknown???"; 1358 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess"; 1359 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined"; 1360 if (vabits16 == VA_BITS16_DEFINED ) s = "defined"; 1361 VG_(message)(Vg_UserMsg, "Warning: set address range perms: " 1362 "large range [0x%lx, 0x%lx) (%s)\n", 1363 a, a + lenT, s); 1364 } 1365 } 1366 1367 #ifndef PERF_FAST_SARP 1368 /*------------------ debug-only case ------------------ */ 1369 { 1370 // Endianness doesn't matter here because all bytes are being set to 1371 // the same value. 1372 // Nb: We don't have to worry about updating the sec-V-bits table 1373 // after these set_vabits2() calls because this code never writes 1374 // VA_BITS2_PARTDEFINED values. 1375 SizeT i; 1376 for (i = 0; i < lenT; i++) { 1377 set_vabits2(a + i, vabits2); 1378 } 1379 return; 1380 } 1381 #endif 1382 1383 /*------------------ standard handling ------------------ */ 1384 1385 /* Get the distinguished secondary that we might want 1386 to use (part of the space-compression scheme). */ 1387 example_dsm = &sm_distinguished[dsm_num]; 1388 1389 // We have to handle ranges covering various combinations of partial and 1390 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case. 1391 // Cases marked with a '*' are common. 1392 // 1393 // TYPE PARTS USED 1394 // ---- ---------- 1395 // * one partial sec-map (p) 1 1396 // - one whole sec-map (P) 2 1397 // 1398 // * two partial sec-maps (pp) 1,3 1399 // - one partial, one whole sec-map (pP) 1,2 1400 // - one whole, one partial sec-map (Pp) 2,3 1401 // - two whole sec-maps (PP) 2,2 1402 // 1403 // * one partial, one whole, one partial (pPp) 1,2,3 1404 // - one partial, two whole (pPP) 1,2,2 1405 // - two whole, one partial (PPp) 2,2,3 1406 // - three whole (PPP) 2,2,2 1407 // 1408 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3 1409 // - one partial, N-1 whole (pP...PP) 1,2...2,2 1410 // - N-1 whole, one partial (PP...Pp) 2,2...2,3 1411 // - N whole (PP...PP) 2,2...2,3 1412 1413 // Break up total length (lenT) into two parts: length in the first 1414 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB. 1415 aNext = start_of_this_sm(a) + SM_SIZE; 1416 len_to_next_secmap = aNext - a; 1417 if ( lenT <= len_to_next_secmap ) { 1418 // Range entirely within one sec-map. Covers almost all cases. 1419 PROF_EVENT(151, "set_address_range_perms-single-secmap"); 1420 lenA = lenT; 1421 lenB = 0; 1422 } else if (is_start_of_sm(a)) { 1423 // Range spans at least one whole sec-map, and starts at the beginning 1424 // of a sec-map; skip to Part 2. 1425 PROF_EVENT(152, "set_address_range_perms-startof-secmap"); 1426 lenA = 0; 1427 lenB = lenT; 1428 goto part2; 1429 } else { 1430 // Range spans two or more sec-maps, first one is partial. 1431 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps"); 1432 lenA = len_to_next_secmap; 1433 lenB = lenT - lenA; 1434 } 1435 1436 //------------------------------------------------------------------------ 1437 // Part 1: Deal with the first sec_map. Most of the time the range will be 1438 // entirely within a sec_map and this part alone will suffice. Also, 1439 // doing it this way lets us avoid repeatedly testing for the crossing of 1440 // a sec-map boundary within these loops. 1441 //------------------------------------------------------------------------ 1442 1443 // If it's distinguished, make it undistinguished if necessary. 1444 sm_ptr = get_secmap_ptr(a); 1445 if (is_distinguished_sm(*sm_ptr)) { 1446 if (*sm_ptr == example_dsm) { 1447 // Sec-map already has the V+A bits that we want, so skip. 1448 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick"); 1449 a = aNext; 1450 lenA = 0; 1451 } else { 1452 PROF_EVENT(155, "set_address_range_perms-dist-sm1"); 1453 *sm_ptr = copy_for_writing(*sm_ptr); 1454 } 1455 } 1456 sm = *sm_ptr; 1457 1458 // 1 byte steps 1459 while (True) { 1460 if (VG_IS_8_ALIGNED(a)) break; 1461 if (lenA < 1) break; 1462 PROF_EVENT(156, "set_address_range_perms-loop1a"); 1463 sm_off = SM_OFF(a); 1464 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1465 a += 1; 1466 lenA -= 1; 1467 } 1468 // 8-aligned, 8 byte steps 1469 while (True) { 1470 if (lenA < 8) break; 1471 PROF_EVENT(157, "set_address_range_perms-loop8a"); 1472 sm_off16 = SM_OFF_16(a); 1473 ((UShort*)(sm->vabits8))[sm_off16] = vabits16; 1474 a += 8; 1475 lenA -= 8; 1476 } 1477 // 1 byte steps 1478 while (True) { 1479 if (lenA < 1) break; 1480 PROF_EVENT(158, "set_address_range_perms-loop1b"); 1481 sm_off = SM_OFF(a); 1482 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1483 a += 1; 1484 lenA -= 1; 1485 } 1486 1487 // We've finished the first sec-map. Is that it? 1488 if (lenB == 0) 1489 return; 1490 1491 //------------------------------------------------------------------------ 1492 // Part 2: Fast-set entire sec-maps at a time. 1493 //------------------------------------------------------------------------ 1494 part2: 1495 // 64KB-aligned, 64KB steps. 1496 // Nb: we can reach here with lenB < SM_SIZE 1497 tl_assert(0 == lenA); 1498 while (True) { 1499 if (lenB < SM_SIZE) break; 1500 tl_assert(is_start_of_sm(a)); 1501 PROF_EVENT(159, "set_address_range_perms-loop64K"); 1502 sm_ptr = get_secmap_ptr(a); 1503 if (!is_distinguished_sm(*sm_ptr)) { 1504 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm"); 1505 // Free the non-distinguished sec-map that we're replacing. This 1506 // case happens moderately often, enough to be worthwhile. 1507 VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap)); 1508 } 1509 update_SM_counts(*sm_ptr, example_dsm); 1510 // Make the sec-map entry point to the example DSM 1511 *sm_ptr = example_dsm; 1512 lenB -= SM_SIZE; 1513 a += SM_SIZE; 1514 } 1515 1516 // We've finished the whole sec-maps. Is that it? 1517 if (lenB == 0) 1518 return; 1519 1520 //------------------------------------------------------------------------ 1521 // Part 3: Finish off the final partial sec-map, if necessary. 1522 //------------------------------------------------------------------------ 1523 1524 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE); 1525 1526 // If it's distinguished, make it undistinguished if necessary. 1527 sm_ptr = get_secmap_ptr(a); 1528 if (is_distinguished_sm(*sm_ptr)) { 1529 if (*sm_ptr == example_dsm) { 1530 // Sec-map already has the V+A bits that we want, so stop. 1531 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick"); 1532 return; 1533 } else { 1534 PROF_EVENT(162, "set_address_range_perms-dist-sm2"); 1535 *sm_ptr = copy_for_writing(*sm_ptr); 1536 } 1537 } 1538 sm = *sm_ptr; 1539 1540 // 8-aligned, 8 byte steps 1541 while (True) { 1542 if (lenB < 8) break; 1543 PROF_EVENT(163, "set_address_range_perms-loop8b"); 1544 sm_off16 = SM_OFF_16(a); 1545 ((UShort*)(sm->vabits8))[sm_off16] = vabits16; 1546 a += 8; 1547 lenB -= 8; 1548 } 1549 // 1 byte steps 1550 while (True) { 1551 if (lenB < 1) return; 1552 PROF_EVENT(164, "set_address_range_perms-loop1c"); 1553 sm_off = SM_OFF(a); 1554 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1555 a += 1; 1556 lenB -= 1; 1557 } 1558 } 1559 1560 1561 /* --- Set permissions for arbitrary address ranges --- */ 1562 1563 void MC_(make_mem_noaccess) ( Addr a, SizeT len ) 1564 { 1565 PROF_EVENT(40, "MC_(make_mem_noaccess)"); 1566 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len); 1567 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS ); 1568 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1569 ocache_sarp_Clear_Origins ( a, len ); 1570 } 1571 1572 static void make_mem_undefined ( Addr a, SizeT len ) 1573 { 1574 PROF_EVENT(41, "make_mem_undefined"); 1575 DEBUG("make_mem_undefined(%p, %lu)\n", a, len); 1576 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1577 } 1578 1579 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag ) 1580 { 1581 PROF_EVENT(41, "MC_(make_mem_undefined)"); 1582 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len); 1583 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1584 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1585 ocache_sarp_Set_Origins ( a, len, otag ); 1586 } 1587 1588 static 1589 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len, 1590 ThreadId tid, UInt okind ) 1591 { 1592 UInt ecu; 1593 ExeContext* here; 1594 /* VG_(record_ExeContext) checks for validity of tid, and asserts 1595 if it is invalid. So no need to do it here. */ 1596 tl_assert(okind <= 3); 1597 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ ); 1598 tl_assert(here); 1599 ecu = VG_(get_ECU_from_ExeContext)(here); 1600 tl_assert(VG_(is_plausible_ECU)(ecu)); 1601 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind ); 1602 } 1603 1604 static 1605 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) { 1606 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN ); 1607 } 1608 1609 1610 void MC_(make_mem_defined) ( Addr a, SizeT len ) 1611 { 1612 PROF_EVENT(42, "MC_(make_mem_defined)"); 1613 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len); 1614 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED ); 1615 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1616 ocache_sarp_Clear_Origins ( a, len ); 1617 } 1618 1619 /* For each byte in [a,a+len), if the byte is addressable, make it be 1620 defined, but if it isn't addressible, leave it alone. In other 1621 words a version of MC_(make_mem_defined) that doesn't mess with 1622 addressibility. Low-performance implementation. */ 1623 static void make_mem_defined_if_addressable ( Addr a, SizeT len ) 1624 { 1625 SizeT i; 1626 UChar vabits2; 1627 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len); 1628 for (i = 0; i < len; i++) { 1629 vabits2 = get_vabits2( a+i ); 1630 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) { 1631 set_vabits2(a+i, VA_BITS2_DEFINED); 1632 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1633 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1634 } 1635 } 1636 } 1637 } 1638 1639 /* Similarly (needed for mprotect handling ..) */ 1640 static void make_mem_defined_if_noaccess ( Addr a, SizeT len ) 1641 { 1642 SizeT i; 1643 UChar vabits2; 1644 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len); 1645 for (i = 0; i < len; i++) { 1646 vabits2 = get_vabits2( a+i ); 1647 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) { 1648 set_vabits2(a+i, VA_BITS2_DEFINED); 1649 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1650 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1651 } 1652 } 1653 } 1654 } 1655 1656 /* --- Block-copy permissions (needed for implementing realloc() and 1657 sys_mremap). --- */ 1658 1659 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len ) 1660 { 1661 SizeT i, j; 1662 UChar vabits2, vabits8; 1663 Bool aligned, nooverlap; 1664 1665 DEBUG("MC_(copy_address_range_state)\n"); 1666 PROF_EVENT(50, "MC_(copy_address_range_state)"); 1667 1668 if (len == 0 || src == dst) 1669 return; 1670 1671 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst); 1672 nooverlap = src+len <= dst || dst+len <= src; 1673 1674 if (nooverlap && aligned) { 1675 1676 /* Vectorised fast case, when no overlap and suitably aligned */ 1677 /* vector loop */ 1678 i = 0; 1679 while (len >= 4) { 1680 vabits8 = get_vabits8_for_aligned_word32( src+i ); 1681 set_vabits8_for_aligned_word32( dst+i, vabits8 ); 1682 if (LIKELY(VA_BITS8_DEFINED == vabits8 1683 || VA_BITS8_UNDEFINED == vabits8 1684 || VA_BITS8_NOACCESS == vabits8)) { 1685 /* do nothing */ 1686 } else { 1687 /* have to copy secondary map info */ 1688 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 )) 1689 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) ); 1690 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 )) 1691 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) ); 1692 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 )) 1693 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) ); 1694 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 )) 1695 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) ); 1696 } 1697 i += 4; 1698 len -= 4; 1699 } 1700 /* fixup loop */ 1701 while (len >= 1) { 1702 vabits2 = get_vabits2( src+i ); 1703 set_vabits2( dst+i, vabits2 ); 1704 if (VA_BITS2_PARTDEFINED == vabits2) { 1705 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 1706 } 1707 i++; 1708 len--; 1709 } 1710 1711 } else { 1712 1713 /* We have to do things the slow way */ 1714 if (src < dst) { 1715 for (i = 0, j = len-1; i < len; i++, j--) { 1716 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)"); 1717 vabits2 = get_vabits2( src+j ); 1718 set_vabits2( dst+j, vabits2 ); 1719 if (VA_BITS2_PARTDEFINED == vabits2) { 1720 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) ); 1721 } 1722 } 1723 } 1724 1725 if (src > dst) { 1726 for (i = 0; i < len; i++) { 1727 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)"); 1728 vabits2 = get_vabits2( src+i ); 1729 set_vabits2( dst+i, vabits2 ); 1730 if (VA_BITS2_PARTDEFINED == vabits2) { 1731 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 1732 } 1733 } 1734 } 1735 } 1736 1737 } 1738 1739 1740 /*------------------------------------------------------------*/ 1741 /*--- Origin tracking stuff - cache basics ---*/ 1742 /*------------------------------------------------------------*/ 1743 1744 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 1745 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1746 1747 Note that this implementation draws inspiration from the "origin 1748 tracking by value piggybacking" scheme described in "Tracking Bad 1749 Apples: Reporting the Origin of Null and Undefined Value Errors" 1750 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer, 1751 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is 1752 implemented completely differently. 1753 1754 Origin tags and ECUs -- about the shadow values 1755 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1756 1757 This implementation tracks the defining point of all uninitialised 1758 values using so called "origin tags", which are 32-bit integers, 1759 rather than using the values themselves to encode the origins. The 1760 latter, so-called value piggybacking", is what the OOPSLA07 paper 1761 describes. 1762 1763 Origin tags, as tracked by the machinery below, are 32-bit unsigned 1764 ints (UInts), regardless of the machine's word size. Each tag 1765 comprises an upper 30-bit ECU field and a lower 2-bit 1766 'kind' field. The ECU field is a number given out by m_execontext 1767 and has a 1-1 mapping with ExeContext*s. An ECU can be used 1768 directly as an origin tag (otag), but in fact we want to put 1769 additional information 'kind' field to indicate roughly where the 1770 tag came from. This helps print more understandable error messages 1771 for the user -- it has no other purpose. In summary: 1772 1773 * Both ECUs and origin tags are represented as 32-bit words 1774 1775 * m_execontext and the core-tool interface deal purely in ECUs. 1776 They have no knowledge of origin tags - that is a purely 1777 Memcheck-internal matter. 1778 1779 * all valid ECUs have the lowest 2 bits zero and at least 1780 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU)) 1781 1782 * to convert from an ECU to an otag, OR in one of the MC_OKIND_ 1783 constants defined in mc_include.h. 1784 1785 * to convert an otag back to an ECU, AND it with ~3 1786 1787 One important fact is that no valid otag is zero. A zero otag is 1788 used by the implementation to indicate "no origin", which could 1789 mean that either the value is defined, or it is undefined but the 1790 implementation somehow managed to lose the origin. 1791 1792 The ECU used for memory created by malloc etc is derived from the 1793 stack trace at the time the malloc etc happens. This means the 1794 mechanism can show the exact allocation point for heap-created 1795 uninitialised values. 1796 1797 In contrast, it is simply too expensive to create a complete 1798 backtrace for each stack allocation. Therefore we merely use a 1799 depth-1 backtrace for stack allocations, which can be done once at 1800 translation time, rather than N times at run time. The result of 1801 this is that, for stack created uninitialised values, Memcheck can 1802 only show the allocating function, and not what called it. 1803 Furthermore, compilers tend to move the stack pointer just once at 1804 the start of the function, to allocate all locals, and so in fact 1805 the stack origin almost always simply points to the opening brace 1806 of the function. Net result is, for stack origins, the mechanism 1807 can tell you in which function the undefined value was created, but 1808 that's all. Users will need to carefully check all locals in the 1809 specified function. 1810 1811 Shadowing registers and memory 1812 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1813 1814 Memory is shadowed using a two level cache structure (ocacheL1 and 1815 ocacheL2). Memory references are first directed to ocacheL1. This 1816 is a traditional 2-way set associative cache with 32-byte lines and 1817 approximate LRU replacement within each set. 1818 1819 A naive implementation would require storing one 32 bit otag for 1820 each byte of memory covered, a 4:1 space overhead. Instead, there 1821 is one otag for every 4 bytes of memory covered, plus a 4-bit mask 1822 that shows which of the 4 bytes have that shadow value and which 1823 have a shadow value of zero (indicating no origin). Hence a lot of 1824 space is saved, but the cost is that only one different origin per 1825 4 bytes of address space can be represented. This is a source of 1826 imprecision, but how much of a problem it really is remains to be 1827 seen. 1828 1829 A cache line that contains all zeroes ("no origins") contains no 1830 useful information, and can be ejected from the L1 cache "for 1831 free", in the sense that a read miss on the L1 causes a line of 1832 zeroes to be installed. However, ejecting a line containing 1833 nonzeroes risks losing origin information permanently. In order to 1834 prevent such lossage, ejected nonzero lines are placed in a 1835 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache 1836 lines. This can grow arbitrarily large, and so should ensure that 1837 Memcheck runs out of memory in preference to losing useful origin 1838 info due to cache size limitations. 1839 1840 Shadowing registers is a bit tricky, because the shadow values are 1841 32 bits, regardless of the size of the register. That gives a 1842 problem for registers smaller than 32 bits. The solution is to 1843 find spaces in the guest state that are unused, and use those to 1844 shadow guest state fragments smaller than 32 bits. For example, on 1845 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the 1846 shadow are allocated for the register's otag, then there are still 1847 12 bytes left over which could be used to shadow 3 other values. 1848 1849 This implies there is some non-obvious mapping from guest state 1850 (start,length) pairs to the relevant shadow offset (for the origin 1851 tags). And it is unfortunately guest-architecture specific. The 1852 mapping is contained in mc_machine.c, which is quite lengthy but 1853 straightforward. 1854 1855 Instrumenting the IR 1856 ~~~~~~~~~~~~~~~~~~~~ 1857 1858 Instrumentation is largely straightforward, and done by the 1859 functions schemeE and schemeS in mc_translate.c. These generate 1860 code for handling the origin tags of expressions (E) and statements 1861 (S) respectively. The rather strange names are a reference to the 1862 "compilation schemes" shown in Simon Peyton Jones' book "The 1863 Implementation of Functional Programming Languages" (Prentice Hall, 1864 1987, see 1865 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm). 1866 1867 schemeS merely arranges to move shadow values around the guest 1868 state to track the incoming IR. schemeE is largely trivial too. 1869 The only significant point is how to compute the otag corresponding 1870 to binary (or ternary, quaternary, etc) operator applications. The 1871 rule is simple: just take whichever value is larger (32-bit 1872 unsigned max). Constants get the special value zero. Hence this 1873 rule always propagates a nonzero (known) otag in preference to a 1874 zero (unknown, or more likely, value-is-defined) tag, as we want. 1875 If two different undefined values are inputs to a binary operator 1876 application, then which is propagated is arbitrary, but that 1877 doesn't matter, since the program is erroneous in using either of 1878 the values, and so there's no point in attempting to propagate 1879 both. 1880 1881 Since constants are abstracted to (otag) zero, much of the 1882 instrumentation code can be folded out without difficulty by the 1883 generic post-instrumentation IR cleanup pass, using these rules: 1884 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are 1885 constants is evaluated at JIT time. And the resulting dead code 1886 removal. In practice this causes surprisingly few Max32Us to 1887 survive through to backend code generation. 1888 1889 Integration with the V-bits machinery 1890 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1891 1892 This is again largely straightforward. Mostly the otag and V bits 1893 stuff are independent. The only point of interaction is when the V 1894 bits instrumenter creates a call to a helper function to report an 1895 uninitialised value error -- in that case it must first use schemeE 1896 to get hold of the origin tag expression for the value, and pass 1897 that to the helper too. 1898 1899 There is the usual stuff to do with setting address range 1900 permissions. When memory is painted undefined, we must also know 1901 the origin tag to paint with, which involves some tedious plumbing, 1902 particularly to do with the fast case stack handlers. When memory 1903 is painted defined or noaccess then the origin tags must be forced 1904 to zero. 1905 1906 One of the goals of the implementation was to ensure that the 1907 non-origin tracking mode isn't slowed down at all. To do this, 1908 various functions to do with memory permissions setting (again, 1909 mostly pertaining to the stack) are duplicated for the with- and 1910 without-otag case. 1911 1912 Dealing with stack redzones, and the NIA cache 1913 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1914 1915 This is one of the few non-obvious parts of the implementation. 1916 1917 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small 1918 reserved area below the stack pointer, that can be used as scratch 1919 space by compiler generated code for functions. In the Memcheck 1920 sources this is referred to as the "stack redzone". The important 1921 thing here is that such redzones are considered volatile across 1922 function calls and returns. So Memcheck takes care to mark them as 1923 undefined for each call and return, on the afflicted platforms. 1924 Past experience shows this is essential in order to get reliable 1925 messages about uninitialised values that come from the stack. 1926 1927 So the question is, when we paint a redzone undefined, what origin 1928 tag should we use for it? Consider a function f() calling g(). If 1929 we paint the redzone using an otag derived from the ExeContext of 1930 the CALL/BL instruction in f, then any errors in g causing it to 1931 use uninitialised values that happen to lie in the redzone, will be 1932 reported as having their origin in f. Which is highly confusing. 1933 1934 The same applies for returns: if, on a return, we paint the redzone 1935 using a origin tag derived from the ExeContext of the RET/BLR 1936 instruction in g, then any later errors in f causing it to use 1937 uninitialised values in the redzone, will be reported as having 1938 their origin in g. Which is just as confusing. 1939 1940 To do it right, in both cases we need to use an origin tag which 1941 pertains to the instruction which dynamically follows the CALL/BL 1942 or RET/BLR. In short, one derived from the NIA - the "next 1943 instruction address". 1944 1945 To make this work, Memcheck's redzone-painting helper, 1946 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the 1947 NIA. It converts the NIA to a 1-element ExeContext, and uses that 1948 ExeContext's ECU as the basis for the otag used to paint the 1949 redzone. The expensive part of this is converting an NIA into an 1950 ECU, since this happens once for every call and every return. So 1951 we use a simple 511-line, 2-way set associative cache 1952 (nia_to_ecu_cache) to cache the mappings, and that knocks most of 1953 the cost out. 1954 1955 Further background comments 1956 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1957 1958 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't 1959 > it really just the address of the relevant ExeContext? 1960 1961 Well, it's not the address, but a value which has a 1-1 mapping 1962 with ExeContexts, and is guaranteed not to be zero, since zero 1963 denotes (to memcheck) "unknown origin or defined value". So these 1964 UInts are just numbers starting at 4 and incrementing by 4; each 1965 ExeContext is given a number when it is created. (*** NOTE this 1966 confuses otags and ECUs; see comments above ***). 1967 1968 Making these otags 32-bit regardless of the machine's word size 1969 makes the 64-bit implementation easier (next para). And it doesn't 1970 really limit us in any way, since for the tags to overflow would 1971 require that the program somehow caused 2^30-1 different 1972 ExeContexts to be created, in which case it is probably in deep 1973 trouble. Not to mention V will have soaked up many tens of 1974 gigabytes of memory merely to store them all. 1975 1976 So having 64-bit origins doesn't really buy you anything, and has 1977 the following downsides: 1978 1979 Suppose that instead, an otag is a UWord. This would mean that, on 1980 a 64-bit target, 1981 1982 1. It becomes hard to shadow any element of guest state which is 1983 smaller than 8 bytes. To do so means you'd need to find some 1984 8-byte-sized hole in the guest state which you don't want to 1985 shadow, and use that instead to hold the otag. On ppc64, the 1986 condition code register(s) are split into 20 UChar sized pieces, 1987 all of which need to be tracked (guest_XER_SO .. guest_CR7_0) 1988 and so that would entail finding 160 bytes somewhere else in the 1989 guest state. 1990 1991 Even on x86, I want to track origins for %AH .. %DH (bits 15:8 1992 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of 1993 same) and so I had to look for 4 untracked otag-sized areas in 1994 the guest state to make that possible. 1995 1996 The same problem exists of course when origin tags are only 32 1997 bits, but it's less extreme. 1998 1999 2. (More compelling) it doubles the size of the origin shadow 2000 memory. Given that the shadow memory is organised as a fixed 2001 size cache, and that accuracy of tracking is limited by origins 2002 falling out the cache due to space conflicts, this isn't good. 2003 2004 > Another question: is the origin tracking perfect, or are there 2005 > cases where it fails to determine an origin? 2006 2007 It is imperfect for at least for the following reasons, and 2008 probably more: 2009 2010 * Insufficient capacity in the origin cache. When a line is 2011 evicted from the cache it is gone forever, and so subsequent 2012 queries for the line produce zero, indicating no origin 2013 information. Interestingly, a line containing all zeroes can be 2014 evicted "free" from the cache, since it contains no useful 2015 information, so there is scope perhaps for some cleverer cache 2016 management schemes. (*** NOTE, with the introduction of the 2017 second level origin tag cache, ocacheL2, this is no longer a 2018 problem. ***) 2019 2020 * The origin cache only stores one otag per 32-bits of address 2021 space, plus 4 bits indicating which of the 4 bytes has that tag 2022 and which are considered defined. The result is that if two 2023 undefined bytes in the same word are stored in memory, the first 2024 stored byte's origin will be lost and replaced by the origin for 2025 the second byte. 2026 2027 * Nonzero origin tags for defined values. Consider a binary 2028 operator application op(x,y). Suppose y is undefined (and so has 2029 a valid nonzero origin tag), and x is defined, but erroneously 2030 has a nonzero origin tag (defined values should have tag zero). 2031 If the erroneous tag has a numeric value greater than y's tag, 2032 then the rule for propagating origin tags though binary 2033 operations, which is simply to take the unsigned max of the two 2034 tags, will erroneously propagate x's tag rather than y's. 2035 2036 * Some obscure uses of x86/amd64 byte registers can cause lossage 2037 or confusion of origins. %AH .. %DH are treated as different 2038 from, and unrelated to, their parent registers, %EAX .. %EDX. 2039 So some wierd sequences like 2040 2041 movb undefined-value, %AH 2042 movb defined-value, %AL 2043 .. use %AX or %EAX .. 2044 2045 will cause the origin attributed to %AH to be ignored, since %AL, 2046 %AX, %EAX are treated as the same register, and %AH as a 2047 completely separate one. 2048 2049 But having said all that, it actually seems to work fairly well in 2050 practice. 2051 */ 2052 2053 static UWord stats_ocacheL1_find = 0; 2054 static UWord stats_ocacheL1_found_at_1 = 0; 2055 static UWord stats_ocacheL1_found_at_N = 0; 2056 static UWord stats_ocacheL1_misses = 0; 2057 static UWord stats_ocacheL1_lossage = 0; 2058 static UWord stats_ocacheL1_movefwds = 0; 2059 2060 static UWord stats__ocacheL2_refs = 0; 2061 static UWord stats__ocacheL2_misses = 0; 2062 static UWord stats__ocacheL2_n_nodes_max = 0; 2063 2064 /* Cache of 32-bit values, one every 32 bits of address space */ 2065 2066 #define OC_BITS_PER_LINE 5 2067 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2)) 2068 2069 static INLINE UWord oc_line_offset ( Addr a ) { 2070 return (a >> 2) & (OC_W32S_PER_LINE - 1); 2071 } 2072 static INLINE Bool is_valid_oc_tag ( Addr tag ) { 2073 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1)); 2074 } 2075 2076 #define OC_LINES_PER_SET 2 2077 2078 #define OC_N_SET_BITS 20 2079 #define OC_N_SETS (1 << OC_N_SET_BITS) 2080 2081 /* These settings give: 2082 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful 2083 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful 2084 */ 2085 2086 #define OC_MOVE_FORWARDS_EVERY_BITS 7 2087 2088 2089 typedef 2090 struct { 2091 Addr tag; 2092 UInt w32[OC_W32S_PER_LINE]; 2093 UChar descr[OC_W32S_PER_LINE]; 2094 } 2095 OCacheLine; 2096 2097 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not 2098 in use, 'n' (nonzero) if it contains at least one valid origin tag, 2099 and 'z' if all the represented tags are zero. */ 2100 static UChar classify_OCacheLine ( OCacheLine* line ) 2101 { 2102 UWord i; 2103 if (line->tag == 1/*invalid*/) 2104 return 'e'; /* EMPTY */ 2105 tl_assert(is_valid_oc_tag(line->tag)); 2106 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2107 tl_assert(0 == ((~0xF) & line->descr[i])); 2108 if (line->w32[i] > 0 && line->descr[i] > 0) 2109 return 'n'; /* NONZERO - contains useful info */ 2110 } 2111 return 'z'; /* ZERO - no useful info */ 2112 } 2113 2114 typedef 2115 struct { 2116 OCacheLine line[OC_LINES_PER_SET]; 2117 } 2118 OCacheSet; 2119 2120 typedef 2121 struct { 2122 OCacheSet set[OC_N_SETS]; 2123 } 2124 OCache; 2125 2126 static OCache* ocacheL1 = NULL; 2127 static UWord ocacheL1_event_ctr = 0; 2128 2129 static void init_ocacheL2 ( void ); /* fwds */ 2130 static void init_OCache ( void ) 2131 { 2132 UWord line, set; 2133 tl_assert(MC_(clo_mc_level) >= 3); 2134 tl_assert(ocacheL1 == NULL); 2135 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache)); 2136 if (ocacheL1 == NULL) { 2137 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1", 2138 sizeof(OCache) ); 2139 } 2140 tl_assert(ocacheL1 != NULL); 2141 for (set = 0; set < OC_N_SETS; set++) { 2142 for (line = 0; line < OC_LINES_PER_SET; line++) { 2143 ocacheL1->set[set].line[line].tag = 1/*invalid*/; 2144 } 2145 } 2146 init_ocacheL2(); 2147 } 2148 2149 static void moveLineForwards ( OCacheSet* set, UWord lineno ) 2150 { 2151 OCacheLine tmp; 2152 stats_ocacheL1_movefwds++; 2153 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET); 2154 tmp = set->line[lineno-1]; 2155 set->line[lineno-1] = set->line[lineno]; 2156 set->line[lineno] = tmp; 2157 } 2158 2159 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) { 2160 UWord i; 2161 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2162 line->w32[i] = 0; /* NO ORIGIN */ 2163 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */ 2164 } 2165 line->tag = tag; 2166 } 2167 2168 ////////////////////////////////////////////////////////////// 2169 //// OCache backing store 2170 2171 static OSet* ocacheL2 = NULL; 2172 2173 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) { 2174 return VG_(malloc)(cc, szB); 2175 } 2176 static void ocacheL2_free ( void* v ) { 2177 VG_(free)( v ); 2178 } 2179 2180 /* Stats: # nodes currently in tree */ 2181 static UWord stats__ocacheL2_n_nodes = 0; 2182 2183 static void init_ocacheL2 ( void ) 2184 { 2185 tl_assert(!ocacheL2); 2186 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */ 2187 tl_assert(0 == offsetof(OCacheLine,tag)); 2188 ocacheL2 2189 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag), 2190 NULL, /* fast cmp */ 2191 ocacheL2_malloc, "mc.ioL2", ocacheL2_free ); 2192 tl_assert(ocacheL2); 2193 stats__ocacheL2_n_nodes = 0; 2194 } 2195 2196 /* Find line with the given tag in the tree, or NULL if not found. */ 2197 static OCacheLine* ocacheL2_find_tag ( Addr tag ) 2198 { 2199 OCacheLine* line; 2200 tl_assert(is_valid_oc_tag(tag)); 2201 stats__ocacheL2_refs++; 2202 line = VG_(OSetGen_Lookup)( ocacheL2, &tag ); 2203 return line; 2204 } 2205 2206 /* Delete the line with the given tag from the tree, if it is present, and 2207 free up the associated memory. */ 2208 static void ocacheL2_del_tag ( Addr tag ) 2209 { 2210 OCacheLine* line; 2211 tl_assert(is_valid_oc_tag(tag)); 2212 stats__ocacheL2_refs++; 2213 line = VG_(OSetGen_Remove)( ocacheL2, &tag ); 2214 if (line) { 2215 VG_(OSetGen_FreeNode)(ocacheL2, line); 2216 tl_assert(stats__ocacheL2_n_nodes > 0); 2217 stats__ocacheL2_n_nodes--; 2218 } 2219 } 2220 2221 /* Add a copy of the given line to the tree. It must not already be 2222 present. */ 2223 static void ocacheL2_add_line ( OCacheLine* line ) 2224 { 2225 OCacheLine* copy; 2226 tl_assert(is_valid_oc_tag(line->tag)); 2227 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) ); 2228 tl_assert(copy); 2229 *copy = *line; 2230 stats__ocacheL2_refs++; 2231 VG_(OSetGen_Insert)( ocacheL2, copy ); 2232 stats__ocacheL2_n_nodes++; 2233 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max) 2234 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes; 2235 } 2236 2237 //// 2238 ////////////////////////////////////////////////////////////// 2239 2240 __attribute__((noinline)) 2241 static OCacheLine* find_OCacheLine_SLOW ( Addr a ) 2242 { 2243 OCacheLine *victim, *inL2; 2244 UChar c; 2245 UWord line; 2246 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2247 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2248 UWord tag = a & tagmask; 2249 tl_assert(setno >= 0 && setno < OC_N_SETS); 2250 2251 /* we already tried line == 0; skip therefore. */ 2252 for (line = 1; line < OC_LINES_PER_SET; line++) { 2253 if (ocacheL1->set[setno].line[line].tag == tag) { 2254 if (line == 1) { 2255 stats_ocacheL1_found_at_1++; 2256 } else { 2257 stats_ocacheL1_found_at_N++; 2258 } 2259 if (UNLIKELY(0 == (ocacheL1_event_ctr++ 2260 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) { 2261 moveLineForwards( &ocacheL1->set[setno], line ); 2262 line--; 2263 } 2264 return &ocacheL1->set[setno].line[line]; 2265 } 2266 } 2267 2268 /* A miss. Use the last slot. Implicitly this means we're 2269 ejecting the line in the last slot. */ 2270 stats_ocacheL1_misses++; 2271 tl_assert(line == OC_LINES_PER_SET); 2272 line--; 2273 tl_assert(line > 0); 2274 2275 /* First, move the to-be-ejected line to the L2 cache. */ 2276 victim = &ocacheL1->set[setno].line[line]; 2277 c = classify_OCacheLine(victim); 2278 switch (c) { 2279 case 'e': 2280 /* the line is empty (has invalid tag); ignore it. */ 2281 break; 2282 case 'z': 2283 /* line contains zeroes. We must ensure the backing store is 2284 updated accordingly, either by copying the line there 2285 verbatim, or by ensuring it isn't present there. We 2286 chosse the latter on the basis that it reduces the size of 2287 the backing store. */ 2288 ocacheL2_del_tag( victim->tag ); 2289 break; 2290 case 'n': 2291 /* line contains at least one real, useful origin. Copy it 2292 to the backing store. */ 2293 stats_ocacheL1_lossage++; 2294 inL2 = ocacheL2_find_tag( victim->tag ); 2295 if (inL2) { 2296 *inL2 = *victim; 2297 } else { 2298 ocacheL2_add_line( victim ); 2299 } 2300 break; 2301 default: 2302 tl_assert(0); 2303 } 2304 2305 /* Now we must reload the L1 cache from the backing tree, if 2306 possible. */ 2307 tl_assert(tag != victim->tag); /* stay sane */ 2308 inL2 = ocacheL2_find_tag( tag ); 2309 if (inL2) { 2310 /* We're in luck. It's in the L2. */ 2311 ocacheL1->set[setno].line[line] = *inL2; 2312 } else { 2313 /* Missed at both levels of the cache hierarchy. We have to 2314 declare it as full of zeroes (unknown origins). */ 2315 stats__ocacheL2_misses++; 2316 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag ); 2317 } 2318 2319 /* Move it one forwards */ 2320 moveLineForwards( &ocacheL1->set[setno], line ); 2321 line--; 2322 2323 return &ocacheL1->set[setno].line[line]; 2324 } 2325 2326 static INLINE OCacheLine* find_OCacheLine ( Addr a ) 2327 { 2328 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2329 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2330 UWord tag = a & tagmask; 2331 2332 stats_ocacheL1_find++; 2333 2334 if (OC_ENABLE_ASSERTIONS) { 2335 tl_assert(setno >= 0 && setno < OC_N_SETS); 2336 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1))); 2337 } 2338 2339 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) { 2340 return &ocacheL1->set[setno].line[0]; 2341 } 2342 2343 return find_OCacheLine_SLOW( a ); 2344 } 2345 2346 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag ) 2347 { 2348 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2349 //// Set the origins for a+0 .. a+7 2350 { OCacheLine* line; 2351 UWord lineoff = oc_line_offset(a); 2352 if (OC_ENABLE_ASSERTIONS) { 2353 tl_assert(lineoff >= 0 2354 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2355 } 2356 line = find_OCacheLine( a ); 2357 line->descr[lineoff+0] = 0xF; 2358 line->descr[lineoff+1] = 0xF; 2359 line->w32[lineoff+0] = otag; 2360 line->w32[lineoff+1] = otag; 2361 } 2362 //// END inlined, specialised version of MC_(helperc_b_store8) 2363 } 2364 2365 2366 /*------------------------------------------------------------*/ 2367 /*--- Aligned fast case permission setters, ---*/ 2368 /*--- for dealing with stacks ---*/ 2369 /*------------------------------------------------------------*/ 2370 2371 /*--------------------- 32-bit ---------------------*/ 2372 2373 /* Nb: by "aligned" here we mean 4-byte aligned */ 2374 2375 static INLINE void make_aligned_word32_undefined ( Addr a ) 2376 { 2377 PROF_EVENT(300, "make_aligned_word32_undefined"); 2378 2379 #ifndef PERF_FAST_STACK2 2380 make_mem_undefined(a, 4); 2381 #else 2382 { 2383 UWord sm_off; 2384 SecMap* sm; 2385 2386 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2387 PROF_EVENT(301, "make_aligned_word32_undefined-slow1"); 2388 make_mem_undefined(a, 4); 2389 return; 2390 } 2391 2392 sm = get_secmap_for_writing_low(a); 2393 sm_off = SM_OFF(a); 2394 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 2395 } 2396 #endif 2397 } 2398 2399 static INLINE 2400 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag ) 2401 { 2402 make_aligned_word32_undefined(a); 2403 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2404 //// Set the origins for a+0 .. a+3 2405 { OCacheLine* line; 2406 UWord lineoff = oc_line_offset(a); 2407 if (OC_ENABLE_ASSERTIONS) { 2408 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2409 } 2410 line = find_OCacheLine( a ); 2411 line->descr[lineoff] = 0xF; 2412 line->w32[lineoff] = otag; 2413 } 2414 //// END inlined, specialised version of MC_(helperc_b_store4) 2415 } 2416 2417 static INLINE 2418 void make_aligned_word32_noaccess ( Addr a ) 2419 { 2420 PROF_EVENT(310, "make_aligned_word32_noaccess"); 2421 2422 #ifndef PERF_FAST_STACK2 2423 MC_(make_mem_noaccess)(a, 4); 2424 #else 2425 { 2426 UWord sm_off; 2427 SecMap* sm; 2428 2429 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2430 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1"); 2431 MC_(make_mem_noaccess)(a, 4); 2432 return; 2433 } 2434 2435 sm = get_secmap_for_writing_low(a); 2436 sm_off = SM_OFF(a); 2437 sm->vabits8[sm_off] = VA_BITS8_NOACCESS; 2438 2439 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2440 //// Set the origins for a+0 .. a+3. 2441 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2442 OCacheLine* line; 2443 UWord lineoff = oc_line_offset(a); 2444 if (OC_ENABLE_ASSERTIONS) { 2445 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2446 } 2447 line = find_OCacheLine( a ); 2448 line->descr[lineoff] = 0; 2449 } 2450 //// END inlined, specialised version of MC_(helperc_b_store4) 2451 } 2452 #endif 2453 } 2454 2455 /*--------------------- 64-bit ---------------------*/ 2456 2457 /* Nb: by "aligned" here we mean 8-byte aligned */ 2458 2459 static INLINE void make_aligned_word64_undefined ( Addr a ) 2460 { 2461 PROF_EVENT(320, "make_aligned_word64_undefined"); 2462 2463 #ifndef PERF_FAST_STACK2 2464 make_mem_undefined(a, 8); 2465 #else 2466 { 2467 UWord sm_off16; 2468 SecMap* sm; 2469 2470 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2471 PROF_EVENT(321, "make_aligned_word64_undefined-slow1"); 2472 make_mem_undefined(a, 8); 2473 return; 2474 } 2475 2476 sm = get_secmap_for_writing_low(a); 2477 sm_off16 = SM_OFF_16(a); 2478 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED; 2479 } 2480 #endif 2481 } 2482 2483 static INLINE 2484 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag ) 2485 { 2486 make_aligned_word64_undefined(a); 2487 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2488 //// Set the origins for a+0 .. a+7 2489 { OCacheLine* line; 2490 UWord lineoff = oc_line_offset(a); 2491 tl_assert(lineoff >= 0 2492 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2493 line = find_OCacheLine( a ); 2494 line->descr[lineoff+0] = 0xF; 2495 line->descr[lineoff+1] = 0xF; 2496 line->w32[lineoff+0] = otag; 2497 line->w32[lineoff+1] = otag; 2498 } 2499 //// END inlined, specialised version of MC_(helperc_b_store8) 2500 } 2501 2502 static INLINE 2503 void make_aligned_word64_noaccess ( Addr a ) 2504 { 2505 PROF_EVENT(330, "make_aligned_word64_noaccess"); 2506 2507 #ifndef PERF_FAST_STACK2 2508 MC_(make_mem_noaccess)(a, 8); 2509 #else 2510 { 2511 UWord sm_off16; 2512 SecMap* sm; 2513 2514 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2515 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1"); 2516 MC_(make_mem_noaccess)(a, 8); 2517 return; 2518 } 2519 2520 sm = get_secmap_for_writing_low(a); 2521 sm_off16 = SM_OFF_16(a); 2522 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS; 2523 2524 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2525 //// Clear the origins for a+0 .. a+7. 2526 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2527 OCacheLine* line; 2528 UWord lineoff = oc_line_offset(a); 2529 tl_assert(lineoff >= 0 2530 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2531 line = find_OCacheLine( a ); 2532 line->descr[lineoff+0] = 0; 2533 line->descr[lineoff+1] = 0; 2534 } 2535 //// END inlined, specialised version of MC_(helperc_b_store8) 2536 } 2537 #endif 2538 } 2539 2540 2541 /*------------------------------------------------------------*/ 2542 /*--- Stack pointer adjustment ---*/ 2543 /*------------------------------------------------------------*/ 2544 2545 #ifdef PERF_FAST_STACK 2546 # define MAYBE_USED 2547 #else 2548 # define MAYBE_USED __attribute__((unused)) 2549 #endif 2550 2551 /*--------------- adjustment by 4 bytes ---------------*/ 2552 2553 MAYBE_USED 2554 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu) 2555 { 2556 UInt otag = ecu | MC_OKIND_STACK; 2557 PROF_EVENT(110, "new_mem_stack_4"); 2558 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2559 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2560 } else { 2561 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag ); 2562 } 2563 } 2564 2565 MAYBE_USED 2566 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP) 2567 { 2568 PROF_EVENT(110, "new_mem_stack_4"); 2569 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2570 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2571 } else { 2572 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 ); 2573 } 2574 } 2575 2576 MAYBE_USED 2577 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP) 2578 { 2579 PROF_EVENT(120, "die_mem_stack_4"); 2580 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2581 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2582 } else { 2583 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 ); 2584 } 2585 } 2586 2587 /*--------------- adjustment by 8 bytes ---------------*/ 2588 2589 MAYBE_USED 2590 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu) 2591 { 2592 UInt otag = ecu | MC_OKIND_STACK; 2593 PROF_EVENT(111, "new_mem_stack_8"); 2594 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2595 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2596 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2597 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2598 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2599 } else { 2600 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag ); 2601 } 2602 } 2603 2604 MAYBE_USED 2605 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP) 2606 { 2607 PROF_EVENT(111, "new_mem_stack_8"); 2608 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2609 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2610 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2611 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2612 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2613 } else { 2614 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 ); 2615 } 2616 } 2617 2618 MAYBE_USED 2619 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP) 2620 { 2621 PROF_EVENT(121, "die_mem_stack_8"); 2622 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2623 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2624 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2625 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2626 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2627 } else { 2628 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 ); 2629 } 2630 } 2631 2632 /*--------------- adjustment by 12 bytes ---------------*/ 2633 2634 MAYBE_USED 2635 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu) 2636 { 2637 UInt otag = ecu | MC_OKIND_STACK; 2638 PROF_EVENT(112, "new_mem_stack_12"); 2639 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2640 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2641 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2642 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2643 /* from previous test we don't have 8-alignment at offset +0, 2644 hence must have 8 alignment at offsets +4/-4. Hence safe to 2645 do 4 at +0 and then 8 at +4/. */ 2646 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2647 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2648 } else { 2649 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag ); 2650 } 2651 } 2652 2653 MAYBE_USED 2654 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP) 2655 { 2656 PROF_EVENT(112, "new_mem_stack_12"); 2657 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2658 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2659 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2660 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2661 /* from previous test we don't have 8-alignment at offset +0, 2662 hence must have 8 alignment at offsets +4/-4. Hence safe to 2663 do 4 at +0 and then 8 at +4/. */ 2664 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2665 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2666 } else { 2667 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 ); 2668 } 2669 } 2670 2671 MAYBE_USED 2672 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP) 2673 { 2674 PROF_EVENT(122, "die_mem_stack_12"); 2675 /* Note the -12 in the test */ 2676 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) { 2677 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at 2678 -4. */ 2679 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2680 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2681 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2682 /* We have 4-alignment at +0, but we don't have 8-alignment at 2683 -12. So we must have 8-alignment at -8. Hence do 4 at -12 2684 and then 8 at -8. */ 2685 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2686 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2687 } else { 2688 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 ); 2689 } 2690 } 2691 2692 /*--------------- adjustment by 16 bytes ---------------*/ 2693 2694 MAYBE_USED 2695 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu) 2696 { 2697 UInt otag = ecu | MC_OKIND_STACK; 2698 PROF_EVENT(113, "new_mem_stack_16"); 2699 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2700 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 2701 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2702 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2703 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2704 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 2705 Hence do 4 at +0, 8 at +4, 4 at +12. */ 2706 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2707 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 2708 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 2709 } else { 2710 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag ); 2711 } 2712 } 2713 2714 MAYBE_USED 2715 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP) 2716 { 2717 PROF_EVENT(113, "new_mem_stack_16"); 2718 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2719 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 2720 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2721 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2722 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2723 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 2724 Hence do 4 at +0, 8 at +4, 4 at +12. */ 2725 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2726 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2727 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 2728 } else { 2729 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 ); 2730 } 2731 } 2732 2733 MAYBE_USED 2734 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP) 2735 { 2736 PROF_EVENT(123, "die_mem_stack_16"); 2737 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2738 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */ 2739 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2740 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2741 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2742 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */ 2743 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2744 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2745 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2746 } else { 2747 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 ); 2748 } 2749 } 2750 2751 /*--------------- adjustment by 32 bytes ---------------*/ 2752 2753 MAYBE_USED 2754 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu) 2755 { 2756 UInt otag = ecu | MC_OKIND_STACK; 2757 PROF_EVENT(114, "new_mem_stack_32"); 2758 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2759 /* Straightforward */ 2760 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2761 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2762 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2763 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2764 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2765 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 2766 +0,+28. */ 2767 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2768 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 2769 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 2770 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag ); 2771 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag ); 2772 } else { 2773 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag ); 2774 } 2775 } 2776 2777 MAYBE_USED 2778 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP) 2779 { 2780 PROF_EVENT(114, "new_mem_stack_32"); 2781 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2782 /* Straightforward */ 2783 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2784 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2785 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2786 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2787 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2788 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 2789 +0,+28. */ 2790 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2791 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2792 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 2793 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 ); 2794 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 ); 2795 } else { 2796 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 ); 2797 } 2798 } 2799 2800 MAYBE_USED 2801 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP) 2802 { 2803 PROF_EVENT(124, "die_mem_stack_32"); 2804 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2805 /* Straightforward */ 2806 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2807 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2808 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2809 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2810 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2811 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and 2812 4 at -32,-4. */ 2813 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2814 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 ); 2815 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 ); 2816 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2817 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2818 } else { 2819 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 ); 2820 } 2821 } 2822 2823 /*--------------- adjustment by 112 bytes ---------------*/ 2824 2825 MAYBE_USED 2826 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu) 2827 { 2828 UInt otag = ecu | MC_OKIND_STACK; 2829 PROF_EVENT(115, "new_mem_stack_112"); 2830 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2831 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2832 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2833 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2834 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2835 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 2836 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 2837 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 2838 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 2839 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 2840 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 2841 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 2842 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 2843 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 2844 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 2845 } else { 2846 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag ); 2847 } 2848 } 2849 2850 MAYBE_USED 2851 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP) 2852 { 2853 PROF_EVENT(115, "new_mem_stack_112"); 2854 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2855 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2856 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2857 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2858 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2859 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 2860 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 2861 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 2862 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 2863 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 2864 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 2865 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 2866 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 2867 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 2868 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 2869 } else { 2870 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 ); 2871 } 2872 } 2873 2874 MAYBE_USED 2875 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP) 2876 { 2877 PROF_EVENT(125, "die_mem_stack_112"); 2878 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2879 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 2880 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 2881 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 2882 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 2883 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 2884 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 2885 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 2886 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 2887 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 2888 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 2889 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2890 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2891 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2892 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2893 } else { 2894 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 ); 2895 } 2896 } 2897 2898 /*--------------- adjustment by 128 bytes ---------------*/ 2899 2900 MAYBE_USED 2901 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu) 2902 { 2903 UInt otag = ecu | MC_OKIND_STACK; 2904 PROF_EVENT(116, "new_mem_stack_128"); 2905 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2906 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2907 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2908 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2909 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2910 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 2911 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 2912 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 2913 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 2914 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 2915 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 2916 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 2917 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 2918 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 2919 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 2920 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 2921 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 2922 } else { 2923 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag ); 2924 } 2925 } 2926 2927 MAYBE_USED 2928 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP) 2929 { 2930 PROF_EVENT(116, "new_mem_stack_128"); 2931 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2932 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2933 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2934 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2935 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2936 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 2937 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 2938 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 2939 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 2940 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 2941 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 2942 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 2943 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 2944 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 2945 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 2946 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 2947 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 2948 } else { 2949 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 ); 2950 } 2951 } 2952 2953 MAYBE_USED 2954 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP) 2955 { 2956 PROF_EVENT(126, "die_mem_stack_128"); 2957 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2958 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 2959 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 2960 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 2961 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 2962 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 2963 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 2964 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 2965 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 2966 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 2967 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 2968 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 2969 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 2970 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2971 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2972 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2973 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2974 } else { 2975 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 ); 2976 } 2977 } 2978 2979 /*--------------- adjustment by 144 bytes ---------------*/ 2980 2981 MAYBE_USED 2982 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu) 2983 { 2984 UInt otag = ecu | MC_OKIND_STACK; 2985 PROF_EVENT(117, "new_mem_stack_144"); 2986 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2987 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2988 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2989 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2990 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2991 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 2992 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 2993 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 2994 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 2996 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 2997 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 2998 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 2999 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3000 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3001 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3002 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3003 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3004 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3005 } else { 3006 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag ); 3007 } 3008 } 3009 3010 MAYBE_USED 3011 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP) 3012 { 3013 PROF_EVENT(117, "new_mem_stack_144"); 3014 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3015 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3016 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3017 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3018 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3019 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3020 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3021 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3022 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3023 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3024 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3025 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3026 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3027 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3028 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3029 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3030 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3031 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3032 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3033 } else { 3034 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 ); 3035 } 3036 } 3037 3038 MAYBE_USED 3039 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP) 3040 { 3041 PROF_EVENT(127, "die_mem_stack_144"); 3042 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3043 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3044 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3045 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3046 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3047 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3048 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3049 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3050 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3051 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3052 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3053 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3054 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3055 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3056 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3057 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3058 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3059 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3060 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3061 } else { 3062 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 ); 3063 } 3064 } 3065 3066 /*--------------- adjustment by 160 bytes ---------------*/ 3067 3068 MAYBE_USED 3069 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu) 3070 { 3071 UInt otag = ecu | MC_OKIND_STACK; 3072 PROF_EVENT(118, "new_mem_stack_160"); 3073 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3074 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 3075 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 3076 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3077 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3078 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3079 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3080 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3081 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3082 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3083 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3084 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3085 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3086 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3087 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3088 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3089 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3090 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3091 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3092 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag ); 3093 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag ); 3094 } else { 3095 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag ); 3096 } 3097 } 3098 3099 MAYBE_USED 3100 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP) 3101 { 3102 PROF_EVENT(118, "new_mem_stack_160"); 3103 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3104 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3105 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3106 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3107 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3108 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3109 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3110 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3111 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3112 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3113 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3114 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3115 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3116 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3117 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3118 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3119 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3120 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3121 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3122 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 ); 3123 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 ); 3124 } else { 3125 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 ); 3126 } 3127 } 3128 3129 MAYBE_USED 3130 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP) 3131 { 3132 PROF_EVENT(128, "die_mem_stack_160"); 3133 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3134 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160); 3135 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152); 3136 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3137 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3138 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3139 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3140 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3141 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3142 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3143 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3144 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3145 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3146 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3147 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3148 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3149 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3150 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3151 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3152 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3153 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3154 } else { 3155 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 ); 3156 } 3157 } 3158 3159 /*--------------- adjustment by N bytes ---------------*/ 3160 3161 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu ) 3162 { 3163 UInt otag = ecu | MC_OKIND_STACK; 3164 PROF_EVENT(115, "new_mem_stack_w_otag"); 3165 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag ); 3166 } 3167 3168 static void mc_new_mem_stack ( Addr a, SizeT len ) 3169 { 3170 PROF_EVENT(115, "new_mem_stack"); 3171 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len ); 3172 } 3173 3174 static void mc_die_mem_stack ( Addr a, SizeT len ) 3175 { 3176 PROF_EVENT(125, "die_mem_stack"); 3177 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len ); 3178 } 3179 3180 3181 /* The AMD64 ABI says: 3182 3183 "The 128-byte area beyond the location pointed to by %rsp is considered 3184 to be reserved and shall not be modified by signal or interrupt 3185 handlers. Therefore, functions may use this area for temporary data 3186 that is not needed across function calls. In particular, leaf functions 3187 may use this area for their entire stack frame, rather than adjusting 3188 the stack pointer in the prologue and epilogue. This area is known as 3189 red zone [sic]." 3190 3191 So after any call or return we need to mark this redzone as containing 3192 undefined values. 3193 3194 Consider this: we're in function f. f calls g. g moves rsp down 3195 modestly (say 16 bytes) and writes stuff all over the red zone, making it 3196 defined. g returns. f is buggy and reads from parts of the red zone 3197 that it didn't write on. But because g filled that area in, f is going 3198 to be picking up defined V bits and so any errors from reading bits of 3199 the red zone it didn't write, will be missed. The only solution I could 3200 think of was to make the red zone undefined when g returns to f. 3201 3202 This is in accordance with the ABI, which makes it clear the redzone 3203 is volatile across function calls. 3204 3205 The problem occurs the other way round too: f could fill the RZ up 3206 with defined values and g could mistakenly read them. So the RZ 3207 also needs to be nuked on function calls. 3208 */ 3209 3210 3211 /* Here's a simple cache to hold nia -> ECU mappings. It could be 3212 improved so as to have a lower miss rate. */ 3213 3214 static UWord stats__nia_cache_queries = 0; 3215 static UWord stats__nia_cache_misses = 0; 3216 3217 typedef 3218 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */ 3219 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */ 3220 WCacheEnt; 3221 3222 #define N_NIA_TO_ECU_CACHE 511 3223 3224 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE]; 3225 3226 static void init_nia_to_ecu_cache ( void ) 3227 { 3228 UWord i; 3229 Addr zero_addr = 0; 3230 ExeContext* zero_ec; 3231 UInt zero_ecu; 3232 /* Fill all the slots with an entry for address zero, and the 3233 relevant otags accordingly. Hence the cache is initially filled 3234 with valid data. */ 3235 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr); 3236 tl_assert(zero_ec); 3237 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec); 3238 tl_assert(VG_(is_plausible_ECU)(zero_ecu)); 3239 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) { 3240 nia_to_ecu_cache[i].nia0 = zero_addr; 3241 nia_to_ecu_cache[i].ecu0 = zero_ecu; 3242 nia_to_ecu_cache[i].nia1 = zero_addr; 3243 nia_to_ecu_cache[i].ecu1 = zero_ecu; 3244 } 3245 } 3246 3247 static inline UInt convert_nia_to_ecu ( Addr nia ) 3248 { 3249 UWord i; 3250 UInt ecu; 3251 ExeContext* ec; 3252 3253 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) ); 3254 3255 stats__nia_cache_queries++; 3256 i = nia % N_NIA_TO_ECU_CACHE; 3257 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE); 3258 3259 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia )) 3260 return nia_to_ecu_cache[i].ecu0; 3261 3262 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) { 3263 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; } 3264 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 ); 3265 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 ); 3266 # undef SWAP 3267 return nia_to_ecu_cache[i].ecu0; 3268 } 3269 3270 stats__nia_cache_misses++; 3271 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia); 3272 tl_assert(ec); 3273 ecu = VG_(get_ECU_from_ExeContext)(ec); 3274 tl_assert(VG_(is_plausible_ECU)(ecu)); 3275 3276 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0; 3277 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0; 3278 3279 nia_to_ecu_cache[i].nia0 = nia; 3280 nia_to_ecu_cache[i].ecu0 = (UWord)ecu; 3281 return ecu; 3282 } 3283 3284 3285 /* Note that this serves both the origin-tracking and 3286 no-origin-tracking modes. We assume that calls to it are 3287 sufficiently infrequent that it isn't worth specialising for the 3288 with/without origin-tracking cases. */ 3289 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia ) 3290 { 3291 UInt otag; 3292 tl_assert(sizeof(UWord) == sizeof(SizeT)); 3293 if (0) 3294 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n", 3295 base, len, nia ); 3296 3297 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3298 UInt ecu = convert_nia_to_ecu ( nia ); 3299 tl_assert(VG_(is_plausible_ECU)(ecu)); 3300 otag = ecu | MC_OKIND_STACK; 3301 } else { 3302 tl_assert(nia == 0); 3303 otag = 0; 3304 } 3305 3306 # if 0 3307 /* Really slow version */ 3308 MC_(make_mem_undefined)(base, len, otag); 3309 # endif 3310 3311 # if 0 3312 /* Slow(ish) version, which is fairly easily seen to be correct. 3313 */ 3314 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) { 3315 make_aligned_word64_undefined(base + 0, otag); 3316 make_aligned_word64_undefined(base + 8, otag); 3317 make_aligned_word64_undefined(base + 16, otag); 3318 make_aligned_word64_undefined(base + 24, otag); 3319 3320 make_aligned_word64_undefined(base + 32, otag); 3321 make_aligned_word64_undefined(base + 40, otag); 3322 make_aligned_word64_undefined(base + 48, otag); 3323 make_aligned_word64_undefined(base + 56, otag); 3324 3325 make_aligned_word64_undefined(base + 64, otag); 3326 make_aligned_word64_undefined(base + 72, otag); 3327 make_aligned_word64_undefined(base + 80, otag); 3328 make_aligned_word64_undefined(base + 88, otag); 3329 3330 make_aligned_word64_undefined(base + 96, otag); 3331 make_aligned_word64_undefined(base + 104, otag); 3332 make_aligned_word64_undefined(base + 112, otag); 3333 make_aligned_word64_undefined(base + 120, otag); 3334 } else { 3335 MC_(make_mem_undefined)(base, len, otag); 3336 } 3337 # endif 3338 3339 /* Idea is: go fast when 3340 * 8-aligned and length is 128 3341 * the sm is available in the main primary map 3342 * the address range falls entirely with a single secondary map 3343 If all those conditions hold, just update the V+A bits by writing 3344 directly into the vabits array. (If the sm was distinguished, this 3345 will make a copy and then write to it.) 3346 */ 3347 3348 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) { 3349 /* Now we know the address range is suitably sized and aligned. */ 3350 UWord a_lo = (UWord)(base); 3351 UWord a_hi = (UWord)(base + 128 - 1); 3352 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3353 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3354 // Now we know the entire range is within the main primary map. 3355 SecMap* sm = get_secmap_for_writing_low(a_lo); 3356 SecMap* sm_hi = get_secmap_for_writing_low(a_hi); 3357 /* Now we know that the entire address range falls within a 3358 single secondary map, and that that secondary 'lives' in 3359 the main primary map. */ 3360 if (LIKELY(sm == sm_hi)) { 3361 // Finally, we know that the range is entirely within one secmap. 3362 UWord v_off = SM_OFF(a_lo); 3363 UShort* p = (UShort*)(&sm->vabits8[v_off]); 3364 p[ 0] = VA_BITS16_UNDEFINED; 3365 p[ 1] = VA_BITS16_UNDEFINED; 3366 p[ 2] = VA_BITS16_UNDEFINED; 3367 p[ 3] = VA_BITS16_UNDEFINED; 3368 p[ 4] = VA_BITS16_UNDEFINED; 3369 p[ 5] = VA_BITS16_UNDEFINED; 3370 p[ 6] = VA_BITS16_UNDEFINED; 3371 p[ 7] = VA_BITS16_UNDEFINED; 3372 p[ 8] = VA_BITS16_UNDEFINED; 3373 p[ 9] = VA_BITS16_UNDEFINED; 3374 p[10] = VA_BITS16_UNDEFINED; 3375 p[11] = VA_BITS16_UNDEFINED; 3376 p[12] = VA_BITS16_UNDEFINED; 3377 p[13] = VA_BITS16_UNDEFINED; 3378 p[14] = VA_BITS16_UNDEFINED; 3379 p[15] = VA_BITS16_UNDEFINED; 3380 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3381 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3382 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3383 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3384 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3385 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3386 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3387 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3388 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3389 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3390 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3391 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3392 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3393 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3394 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3395 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3396 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3397 } 3398 return; 3399 } 3400 } 3401 } 3402 3403 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */ 3404 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) { 3405 /* Now we know the address range is suitably sized and aligned. */ 3406 UWord a_lo = (UWord)(base); 3407 UWord a_hi = (UWord)(base + 288 - 1); 3408 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3409 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3410 // Now we know the entire range is within the main primary map. 3411 SecMap* sm = get_secmap_for_writing_low(a_lo); 3412 SecMap* sm_hi = get_secmap_for_writing_low(a_hi); 3413 /* Now we know that the entire address range falls within a 3414 single secondary map, and that that secondary 'lives' in 3415 the main primary map. */ 3416 if (LIKELY(sm == sm_hi)) { 3417 // Finally, we know that the range is entirely within one secmap. 3418 UWord v_off = SM_OFF(a_lo); 3419 UShort* p = (UShort*)(&sm->vabits8[v_off]); 3420 p[ 0] = VA_BITS16_UNDEFINED; 3421 p[ 1] = VA_BITS16_UNDEFINED; 3422 p[ 2] = VA_BITS16_UNDEFINED; 3423 p[ 3] = VA_BITS16_UNDEFINED; 3424 p[ 4] = VA_BITS16_UNDEFINED; 3425 p[ 5] = VA_BITS16_UNDEFINED; 3426 p[ 6] = VA_BITS16_UNDEFINED; 3427 p[ 7] = VA_BITS16_UNDEFINED; 3428 p[ 8] = VA_BITS16_UNDEFINED; 3429 p[ 9] = VA_BITS16_UNDEFINED; 3430 p[10] = VA_BITS16_UNDEFINED; 3431 p[11] = VA_BITS16_UNDEFINED; 3432 p[12] = VA_BITS16_UNDEFINED; 3433 p[13] = VA_BITS16_UNDEFINED; 3434 p[14] = VA_BITS16_UNDEFINED; 3435 p[15] = VA_BITS16_UNDEFINED; 3436 p[16] = VA_BITS16_UNDEFINED; 3437 p[17] = VA_BITS16_UNDEFINED; 3438 p[18] = VA_BITS16_UNDEFINED; 3439 p[19] = VA_BITS16_UNDEFINED; 3440 p[20] = VA_BITS16_UNDEFINED; 3441 p[21] = VA_BITS16_UNDEFINED; 3442 p[22] = VA_BITS16_UNDEFINED; 3443 p[23] = VA_BITS16_UNDEFINED; 3444 p[24] = VA_BITS16_UNDEFINED; 3445 p[25] = VA_BITS16_UNDEFINED; 3446 p[26] = VA_BITS16_UNDEFINED; 3447 p[27] = VA_BITS16_UNDEFINED; 3448 p[28] = VA_BITS16_UNDEFINED; 3449 p[29] = VA_BITS16_UNDEFINED; 3450 p[30] = VA_BITS16_UNDEFINED; 3451 p[31] = VA_BITS16_UNDEFINED; 3452 p[32] = VA_BITS16_UNDEFINED; 3453 p[33] = VA_BITS16_UNDEFINED; 3454 p[34] = VA_BITS16_UNDEFINED; 3455 p[35] = VA_BITS16_UNDEFINED; 3456 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3457 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3458 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3459 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3460 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3461 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3462 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3463 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3464 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3465 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3466 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3467 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3468 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3469 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3470 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3471 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3472 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3473 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag ); 3474 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag ); 3475 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag ); 3476 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag ); 3477 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag ); 3478 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag ); 3479 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag ); 3480 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag ); 3481 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag ); 3482 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag ); 3483 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag ); 3484 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag ); 3485 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag ); 3486 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag ); 3487 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag ); 3488 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag ); 3489 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag ); 3490 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag ); 3491 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag ); 3492 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag ); 3493 } 3494 return; 3495 } 3496 } 3497 } 3498 3499 /* else fall into slow case */ 3500 MC_(make_mem_undefined_w_otag)(base, len, otag); 3501 } 3502 3503 3504 /*------------------------------------------------------------*/ 3505 /*--- Checking memory ---*/ 3506 /*------------------------------------------------------------*/ 3507 3508 typedef 3509 enum { 3510 MC_Ok = 5, 3511 MC_AddrErr = 6, 3512 MC_ValueErr = 7 3513 } 3514 MC_ReadResult; 3515 3516 3517 /* Check permissions for address range. If inadequate permissions 3518 exist, *bad_addr is set to the offending address, so the caller can 3519 know what it is. */ 3520 3521 /* Returns True if [a .. a+len) is not addressible. Otherwise, 3522 returns False, and if bad_addr is non-NULL, sets *bad_addr to 3523 indicate the lowest failing address. Functions below are 3524 similar. */ 3525 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr ) 3526 { 3527 SizeT i; 3528 UWord vabits2; 3529 3530 PROF_EVENT(60, "check_mem_is_noaccess"); 3531 for (i = 0; i < len; i++) { 3532 PROF_EVENT(61, "check_mem_is_noaccess(loop)"); 3533 vabits2 = get_vabits2(a); 3534 if (VA_BITS2_NOACCESS != vabits2) { 3535 if (bad_addr != NULL) *bad_addr = a; 3536 return False; 3537 } 3538 a++; 3539 } 3540 return True; 3541 } 3542 3543 static Bool is_mem_addressable ( Addr a, SizeT len, 3544 /*OUT*/Addr* bad_addr ) 3545 { 3546 SizeT i; 3547 UWord vabits2; 3548 3549 PROF_EVENT(62, "is_mem_addressable"); 3550 for (i = 0; i < len; i++) { 3551 PROF_EVENT(63, "is_mem_addressable(loop)"); 3552 vabits2 = get_vabits2(a); 3553 if (VA_BITS2_NOACCESS == vabits2) { 3554 if (bad_addr != NULL) *bad_addr = a; 3555 return False; 3556 } 3557 a++; 3558 } 3559 return True; 3560 } 3561 3562 static MC_ReadResult is_mem_defined ( Addr a, SizeT len, 3563 /*OUT*/Addr* bad_addr, 3564 /*OUT*/UInt* otag ) 3565 { 3566 SizeT i; 3567 UWord vabits2; 3568 3569 PROF_EVENT(64, "is_mem_defined"); 3570 DEBUG("is_mem_defined\n"); 3571 3572 if (otag) *otag = 0; 3573 if (bad_addr) *bad_addr = 0; 3574 for (i = 0; i < len; i++) { 3575 PROF_EVENT(65, "is_mem_defined(loop)"); 3576 vabits2 = get_vabits2(a); 3577 if (VA_BITS2_DEFINED != vabits2) { 3578 // Error! Nb: Report addressability errors in preference to 3579 // definedness errors. And don't report definedeness errors unless 3580 // --undef-value-errors=yes. 3581 if (bad_addr) { 3582 *bad_addr = a; 3583 } 3584 if (VA_BITS2_NOACCESS == vabits2) { 3585 return MC_AddrErr; 3586 } 3587 if (MC_(clo_mc_level) >= 2) { 3588 if (otag && MC_(clo_mc_level) == 3) { 3589 *otag = MC_(helperc_b_load1)( a ); 3590 } 3591 return MC_ValueErr; 3592 } 3593 } 3594 a++; 3595 } 3596 return MC_Ok; 3597 } 3598 3599 3600 /* Check a zero-terminated ascii string. Tricky -- don't want to 3601 examine the actual bytes, to find the end, until we're sure it is 3602 safe to do so. */ 3603 3604 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag ) 3605 { 3606 UWord vabits2; 3607 3608 PROF_EVENT(66, "mc_is_defined_asciiz"); 3609 DEBUG("mc_is_defined_asciiz\n"); 3610 3611 if (otag) *otag = 0; 3612 if (bad_addr) *bad_addr = 0; 3613 while (True) { 3614 PROF_EVENT(67, "mc_is_defined_asciiz(loop)"); 3615 vabits2 = get_vabits2(a); 3616 if (VA_BITS2_DEFINED != vabits2) { 3617 // Error! Nb: Report addressability errors in preference to 3618 // definedness errors. And don't report definedeness errors unless 3619 // --undef-value-errors=yes. 3620 if (bad_addr) { 3621 *bad_addr = a; 3622 } 3623 if (VA_BITS2_NOACCESS == vabits2) { 3624 return MC_AddrErr; 3625 } 3626 if (MC_(clo_mc_level) >= 2) { 3627 if (otag && MC_(clo_mc_level) == 3) { 3628 *otag = MC_(helperc_b_load1)( a ); 3629 } 3630 return MC_ValueErr; 3631 } 3632 } 3633 /* Ok, a is safe to read. */ 3634 if (* ((UChar*)a) == 0) { 3635 return MC_Ok; 3636 } 3637 a++; 3638 } 3639 } 3640 3641 3642 /*------------------------------------------------------------*/ 3643 /*--- Memory event handlers ---*/ 3644 /*------------------------------------------------------------*/ 3645 3646 static 3647 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s, 3648 Addr base, SizeT size ) 3649 { 3650 Addr bad_addr; 3651 Bool ok = is_mem_addressable ( base, size, &bad_addr ); 3652 3653 if (!ok) { 3654 switch (part) { 3655 case Vg_CoreSysCall: 3656 MC_(record_memparam_error) ( tid, bad_addr, 3657 /*isAddrErr*/True, s, 0/*otag*/ ); 3658 break; 3659 3660 case Vg_CoreSignal: 3661 MC_(record_core_mem_error)( tid, s ); 3662 break; 3663 3664 default: 3665 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart"); 3666 } 3667 } 3668 } 3669 3670 static 3671 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s, 3672 Addr base, SizeT size ) 3673 { 3674 UInt otag = 0; 3675 Addr bad_addr; 3676 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag ); 3677 3678 if (MC_Ok != res) { 3679 Bool isAddrErr = ( MC_AddrErr == res ? True : False ); 3680 3681 switch (part) { 3682 case Vg_CoreSysCall: 3683 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s, 3684 isAddrErr ? 0 : otag ); 3685 break; 3686 3687 case Vg_CoreSysCallArgInMem: 3688 MC_(record_regparam_error) ( tid, s, otag ); 3689 break; 3690 3691 /* If we're being asked to jump to a silly address, record an error 3692 message before potentially crashing the entire system. */ 3693 case Vg_CoreTranslate: 3694 MC_(record_jump_error)( tid, bad_addr ); 3695 break; 3696 3697 default: 3698 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart"); 3699 } 3700 } 3701 } 3702 3703 static 3704 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid, 3705 Char* s, Addr str ) 3706 { 3707 MC_ReadResult res; 3708 Addr bad_addr = 0; // shut GCC up 3709 UInt otag = 0; 3710 3711 tl_assert(part == Vg_CoreSysCall); 3712 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag ); 3713 if (MC_Ok != res) { 3714 Bool isAddrErr = ( MC_AddrErr == res ? True : False ); 3715 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s, 3716 isAddrErr ? 0 : otag ); 3717 } 3718 } 3719 3720 /* Handling of mmap and mprotect is not as simple as it seems. 3721 3722 The underlying semantics are that memory obtained from mmap is 3723 always initialised, but may be inaccessible. And changes to the 3724 protection of memory do not change its contents and hence not its 3725 definedness state. Problem is we can't model 3726 inaccessible-but-with-some-definedness state; once we mark memory 3727 as inaccessible we lose all info about definedness, and so can't 3728 restore that if it is later made accessible again. 3729 3730 One obvious thing to do is this: 3731 3732 mmap/mprotect NONE -> noaccess 3733 mmap/mprotect other -> defined 3734 3735 The problem case here is: taking accessible memory, writing 3736 uninitialised data to it, mprotecting it NONE and later mprotecting 3737 it back to some accessible state causes the undefinedness to be 3738 lost. 3739 3740 A better proposal is: 3741 3742 (1) mmap NONE -> make noaccess 3743 (2) mmap other -> make defined 3744 3745 (3) mprotect NONE -> # no change 3746 (4) mprotect other -> change any "noaccess" to "defined" 3747 3748 (2) is OK because memory newly obtained from mmap really is defined 3749 (zeroed out by the kernel -- doing anything else would 3750 constitute a massive security hole.) 3751 3752 (1) is OK because the only way to make the memory usable is via 3753 (4), in which case we also wind up correctly marking it all as 3754 defined. 3755 3756 (3) is the weak case. We choose not to change memory state. 3757 (presumably the range is in some mixture of "defined" and 3758 "undefined", viz, accessible but with arbitrary V bits). Doing 3759 nothing means we retain the V bits, so that if the memory is 3760 later mprotected "other", the V bits remain unchanged, so there 3761 can be no false negatives. The bad effect is that if there's 3762 an access in the area, then MC cannot warn; but at least we'll 3763 get a SEGV to show, so it's better than nothing. 3764 3765 Consider the sequence (3) followed by (4). Any memory that was 3766 "defined" or "undefined" previously retains its state (as 3767 required). Any memory that was "noaccess" before can only have 3768 been made that way by (1), and so it's OK to change it to 3769 "defined". 3770 3771 See https://bugs.kde.org/show_bug.cgi?id=205541 3772 and https://bugs.kde.org/show_bug.cgi?id=210268 3773 */ 3774 static 3775 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx, 3776 ULong di_handle ) 3777 { 3778 if (rr || ww || xx) { 3779 /* (2) mmap/mprotect other -> defined */ 3780 MC_(make_mem_defined)(a, len); 3781 } else { 3782 /* (1) mmap/mprotect NONE -> noaccess */ 3783 MC_(make_mem_noaccess)(a, len); 3784 } 3785 } 3786 3787 static 3788 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx ) 3789 { 3790 if (rr || ww || xx) { 3791 /* (4) mprotect other -> change any "noaccess" to "defined" */ 3792 make_mem_defined_if_noaccess(a, len); 3793 } else { 3794 /* (3) mprotect NONE -> # no change */ 3795 /* do nothing */ 3796 } 3797 } 3798 3799 3800 static 3801 void mc_new_mem_startup( Addr a, SizeT len, 3802 Bool rr, Bool ww, Bool xx, ULong di_handle ) 3803 { 3804 // Because code is defined, initialised variables get put in the data 3805 // segment and are defined, and uninitialised variables get put in the 3806 // bss segment and are auto-zeroed (and so defined). 3807 // 3808 // It's possible that there will be padding between global variables. 3809 // This will also be auto-zeroed, and marked as defined by Memcheck. If 3810 // a program uses it, Memcheck will not complain. This is arguably a 3811 // false negative, but it's a grey area -- the behaviour is defined (the 3812 // padding is zeroed) but it's probably not what the user intended. And 3813 // we can't avoid it. 3814 // 3815 // Note: we generally ignore RWX permissions, because we can't track them 3816 // without requiring more than one A bit which would slow things down a 3817 // lot. But on Darwin the 0th page is mapped but !R and !W and !X. 3818 // So we mark any such pages as "unaddressable". 3819 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n", 3820 a, (ULong)len, rr, ww, xx); 3821 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle); 3822 } 3823 3824 static 3825 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len) 3826 { 3827 MC_(make_mem_defined)(a, len); 3828 } 3829 3830 3831 /*------------------------------------------------------------*/ 3832 /*--- Register event handlers ---*/ 3833 /*------------------------------------------------------------*/ 3834 3835 /* Try and get a nonzero origin for the guest state section of thread 3836 tid characterised by (offset,size). Return 0 if nothing to show 3837 for it. */ 3838 static UInt mb_get_origin_for_guest_offset ( ThreadId tid, 3839 Int offset, SizeT size ) 3840 { 3841 Int sh2off; 3842 UChar area[6]; 3843 UInt otag; 3844 sh2off = MC_(get_otrack_shadow_offset)( offset, size ); 3845 if (sh2off == -1) 3846 return 0; /* This piece of guest state is not tracked */ 3847 tl_assert(sh2off >= 0); 3848 tl_assert(0 == (sh2off % 4)); 3849 area[0] = 0x31; 3850 area[5] = 0x27; 3851 VG_(get_shadow_regs_area)( tid, &area[1], 2/*shadowno*/,sh2off,4 ); 3852 tl_assert(area[0] == 0x31); 3853 tl_assert(area[5] == 0x27); 3854 otag = *(UInt*)&area[1]; 3855 return otag; 3856 } 3857 3858 3859 /* When some chunk of guest state is written, mark the corresponding 3860 shadow area as valid. This is used to initialise arbitrarily large 3861 chunks of guest state, hence the _SIZE value, which has to be as 3862 big as the biggest guest state. 3863 */ 3864 static void mc_post_reg_write ( CorePart part, ThreadId tid, 3865 PtrdiffT offset, SizeT size) 3866 { 3867 # define MAX_REG_WRITE_SIZE 1408 3868 UChar area[MAX_REG_WRITE_SIZE]; 3869 tl_assert(size <= MAX_REG_WRITE_SIZE); 3870 VG_(memset)(area, V_BITS8_DEFINED, size); 3871 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area ); 3872 # undef MAX_REG_WRITE_SIZE 3873 } 3874 3875 static 3876 void mc_post_reg_write_clientcall ( ThreadId tid, 3877 PtrdiffT offset, SizeT size, Addr f) 3878 { 3879 mc_post_reg_write(/*dummy*/0, tid, offset, size); 3880 } 3881 3882 /* Look at the definedness of the guest's shadow state for 3883 [offset, offset+len). If any part of that is undefined, record 3884 a parameter error. 3885 */ 3886 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s, 3887 PtrdiffT offset, SizeT size) 3888 { 3889 Int i; 3890 Bool bad; 3891 UInt otag; 3892 3893 UChar area[16]; 3894 tl_assert(size <= 16); 3895 3896 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size ); 3897 3898 bad = False; 3899 for (i = 0; i < size; i++) { 3900 if (area[i] != V_BITS8_DEFINED) { 3901 bad = True; 3902 break; 3903 } 3904 } 3905 3906 if (!bad) 3907 return; 3908 3909 /* We've found some undefinedness. See if we can also find an 3910 origin for it. */ 3911 otag = mb_get_origin_for_guest_offset( tid, offset, size ); 3912 MC_(record_regparam_error) ( tid, s, otag ); 3913 } 3914 3915 3916 /*------------------------------------------------------------*/ 3917 /*--- Functions called directly from generated code: ---*/ 3918 /*--- Load/store handlers. ---*/ 3919 /*------------------------------------------------------------*/ 3920 3921 /* Types: LOADV32, LOADV16, LOADV8 are: 3922 UWord fn ( Addr a ) 3923 so they return 32-bits on 32-bit machines and 64-bits on 3924 64-bit machines. Addr has the same size as a host word. 3925 3926 LOADV64 is always ULong fn ( Addr a ) 3927 3928 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits 3929 are a UWord, and for STOREV64 they are a ULong. 3930 */ 3931 3932 /* If any part of '_a' indicated by the mask is 1, either '_a' is not 3933 naturally '_sz/8'-aligned, or it exceeds the range covered by the 3934 primary map. This is all very tricky (and important!), so let's 3935 work through the maths by hand (below), *and* assert for these 3936 values at startup. */ 3937 #define MASK(_szInBytes) \ 3938 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) ) 3939 3940 /* MASK only exists so as to define this macro. */ 3941 #define UNALIGNED_OR_HIGH(_a,_szInBits) \ 3942 ((_a) & MASK((_szInBits>>3))) 3943 3944 /* On a 32-bit machine: 3945 3946 N_PRIMARY_BITS == 16, so 3947 N_PRIMARY_MAP == 0x10000, so 3948 N_PRIMARY_MAP-1 == 0xFFFF, so 3949 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so 3950 3951 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 ) 3952 = ~ ( 0xFFFF | 0xFFFF0000 ) 3953 = ~ 0xFFFF'FFFF 3954 = 0 3955 3956 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 ) 3957 = ~ ( 0xFFFE | 0xFFFF0000 ) 3958 = ~ 0xFFFF'FFFE 3959 = 1 3960 3961 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 ) 3962 = ~ ( 0xFFFC | 0xFFFF0000 ) 3963 = ~ 0xFFFF'FFFC 3964 = 3 3965 3966 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 ) 3967 = ~ ( 0xFFF8 | 0xFFFF0000 ) 3968 = ~ 0xFFFF'FFF8 3969 = 7 3970 3971 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value 3972 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for 3973 the 1-byte alignment case, it is always a zero value, since MASK(1) 3974 is zero. All as expected. 3975 3976 On a 64-bit machine, it's more complex, since we're testing 3977 simultaneously for misalignment and for the address being at or 3978 above 32G: 3979 3980 N_PRIMARY_BITS == 19, so 3981 N_PRIMARY_MAP == 0x80000, so 3982 N_PRIMARY_MAP-1 == 0x7FFFF, so 3983 (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so 3984 3985 MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 ) 3986 = ~ ( 0xFFFF | 0x7FFFF'0000 ) 3987 = ~ 0x7FFFF'FFFF 3988 = 0xFFFF'FFF8'0000'0000 3989 3990 MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 ) 3991 = ~ ( 0xFFFE | 0x7FFFF'0000 ) 3992 = ~ 0x7FFFF'FFFE 3993 = 0xFFFF'FFF8'0000'0001 3994 3995 MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 ) 3996 = ~ ( 0xFFFC | 0x7FFFF'0000 ) 3997 = ~ 0x7FFFF'FFFC 3998 = 0xFFFF'FFF8'0000'0003 3999 4000 MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 ) 4001 = ~ ( 0xFFF8 | 0x7FFFF'0000 ) 4002 = ~ 0x7FFFF'FFF8 4003 = 0xFFFF'FFF8'0000'0007 4004 */ 4005 4006 4007 /* ------------------------ Size = 8 ------------------------ */ 4008 4009 static INLINE 4010 ULong mc_LOADV64 ( Addr a, Bool isBigEndian ) 4011 { 4012 PROF_EVENT(200, "mc_LOADV64"); 4013 4014 #ifndef PERF_FAST_LOADV 4015 return mc_LOADVn_slow( a, 64, isBigEndian ); 4016 #else 4017 { 4018 UWord sm_off16, vabits16; 4019 SecMap* sm; 4020 4021 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) { 4022 PROF_EVENT(201, "mc_LOADV64-slow1"); 4023 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian ); 4024 } 4025 4026 sm = get_secmap_for_reading_low(a); 4027 sm_off16 = SM_OFF_16(a); 4028 vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 4029 4030 // Handle common case quickly: a is suitably aligned, is mapped, and 4031 // addressible. 4032 // Convert V bits from compact memory form to expanded register form. 4033 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) { 4034 return V_BITS64_DEFINED; 4035 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) { 4036 return V_BITS64_UNDEFINED; 4037 } else { 4038 /* Slow case: the 8 bytes are not all-defined or all-undefined. */ 4039 PROF_EVENT(202, "mc_LOADV64-slow2"); 4040 return mc_LOADVn_slow( a, 64, isBigEndian ); 4041 } 4042 } 4043 #endif 4044 } 4045 4046 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a ) 4047 { 4048 return mc_LOADV64(a, True); 4049 } 4050 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a ) 4051 { 4052 return mc_LOADV64(a, False); 4053 } 4054 4055 4056 static INLINE 4057 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian ) 4058 { 4059 PROF_EVENT(210, "mc_STOREV64"); 4060 4061 #ifndef PERF_FAST_STOREV 4062 // XXX: this slow case seems to be marginally faster than the fast case! 4063 // Investigate further. 4064 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4065 #else 4066 { 4067 UWord sm_off16, vabits16; 4068 SecMap* sm; 4069 4070 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) { 4071 PROF_EVENT(211, "mc_STOREV64-slow1"); 4072 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4073 return; 4074 } 4075 4076 sm = get_secmap_for_reading_low(a); 4077 sm_off16 = SM_OFF_16(a); 4078 vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 4079 4080 if (LIKELY( !is_distinguished_sm(sm) && 4081 (VA_BITS16_DEFINED == vabits16 || 4082 VA_BITS16_UNDEFINED == vabits16) )) 4083 { 4084 /* Handle common case quickly: a is suitably aligned, */ 4085 /* is mapped, and is addressible. */ 4086 // Convert full V-bits in register to compact 2-bit form. 4087 if (V_BITS64_DEFINED == vbits64) { 4088 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED; 4089 } else if (V_BITS64_UNDEFINED == vbits64) { 4090 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED; 4091 } else { 4092 /* Slow but general case -- writing partially defined bytes. */ 4093 PROF_EVENT(212, "mc_STOREV64-slow2"); 4094 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4095 } 4096 } else { 4097 /* Slow but general case. */ 4098 PROF_EVENT(213, "mc_STOREV64-slow3"); 4099 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4100 } 4101 } 4102 #endif 4103 } 4104 4105 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 ) 4106 { 4107 mc_STOREV64(a, vbits64, True); 4108 } 4109 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 ) 4110 { 4111 mc_STOREV64(a, vbits64, False); 4112 } 4113 4114 4115 /* ------------------------ Size = 4 ------------------------ */ 4116 4117 static INLINE 4118 UWord mc_LOADV32 ( Addr a, Bool isBigEndian ) 4119 { 4120 PROF_EVENT(220, "mc_LOADV32"); 4121 4122 #ifndef PERF_FAST_LOADV 4123 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 4124 #else 4125 { 4126 UWord sm_off, vabits8; 4127 SecMap* sm; 4128 4129 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) { 4130 PROF_EVENT(221, "mc_LOADV32-slow1"); 4131 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 4132 } 4133 4134 sm = get_secmap_for_reading_low(a); 4135 sm_off = SM_OFF(a); 4136 vabits8 = sm->vabits8[sm_off]; 4137 4138 // Handle common case quickly: a is suitably aligned, is mapped, and the 4139 // entire word32 it lives in is addressible. 4140 // Convert V bits from compact memory form to expanded register form. 4141 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined). 4142 // Almost certainly not necessary, but be paranoid. 4143 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) { 4144 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED); 4145 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { 4146 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED); 4147 } else { 4148 /* Slow case: the 4 bytes are not all-defined or all-undefined. */ 4149 PROF_EVENT(222, "mc_LOADV32-slow2"); 4150 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 4151 } 4152 } 4153 #endif 4154 } 4155 4156 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a ) 4157 { 4158 return mc_LOADV32(a, True); 4159 } 4160 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a ) 4161 { 4162 return mc_LOADV32(a, False); 4163 } 4164 4165 4166 static INLINE 4167 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian ) 4168 { 4169 PROF_EVENT(230, "mc_STOREV32"); 4170 4171 #ifndef PERF_FAST_STOREV 4172 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4173 #else 4174 { 4175 UWord sm_off, vabits8; 4176 SecMap* sm; 4177 4178 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) { 4179 PROF_EVENT(231, "mc_STOREV32-slow1"); 4180 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4181 return; 4182 } 4183 4184 sm = get_secmap_for_reading_low(a); 4185 sm_off = SM_OFF(a); 4186 vabits8 = sm->vabits8[sm_off]; 4187 4188 // Cleverness: sometimes we don't have to write the shadow memory at 4189 // all, if we can tell that what we want to write is the same as what is 4190 // already there. The 64/16/8 bit cases also have cleverness at this 4191 // point, but it works a little differently to the code below. 4192 if (V_BITS32_DEFINED == vbits32) { 4193 if (vabits8 == (UInt)VA_BITS8_DEFINED) { 4194 return; 4195 } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) { 4196 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED; 4197 } else { 4198 // not defined/undefined, or distinguished and changing state 4199 PROF_EVENT(232, "mc_STOREV32-slow2"); 4200 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4201 } 4202 } else if (V_BITS32_UNDEFINED == vbits32) { 4203 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) { 4204 return; 4205 } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) { 4206 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED; 4207 } else { 4208 // not defined/undefined, or distinguished and changing state 4209 PROF_EVENT(233, "mc_STOREV32-slow3"); 4210 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4211 } 4212 } else { 4213 // Partially defined word 4214 PROF_EVENT(234, "mc_STOREV32-slow4"); 4215 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4216 } 4217 } 4218 #endif 4219 } 4220 4221 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 ) 4222 { 4223 mc_STOREV32(a, vbits32, True); 4224 } 4225 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 ) 4226 { 4227 mc_STOREV32(a, vbits32, False); 4228 } 4229 4230 4231 /* ------------------------ Size = 2 ------------------------ */ 4232 4233 static INLINE 4234 UWord mc_LOADV16 ( Addr a, Bool isBigEndian ) 4235 { 4236 PROF_EVENT(240, "mc_LOADV16"); 4237 4238 #ifndef PERF_FAST_LOADV 4239 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 4240 #else 4241 { 4242 UWord sm_off, vabits8; 4243 SecMap* sm; 4244 4245 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) { 4246 PROF_EVENT(241, "mc_LOADV16-slow1"); 4247 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 4248 } 4249 4250 sm = get_secmap_for_reading_low(a); 4251 sm_off = SM_OFF(a); 4252 vabits8 = sm->vabits8[sm_off]; 4253 // Handle common case quickly: a is suitably aligned, is mapped, and is 4254 // addressible. 4255 // Convert V bits from compact memory form to expanded register form 4256 if (vabits8 == VA_BITS8_DEFINED ) { return V_BITS16_DEFINED; } 4257 else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS16_UNDEFINED; } 4258 else { 4259 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check 4260 // the two sub-bytes. 4261 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8); 4262 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; } 4263 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; } 4264 else { 4265 /* Slow case: the two bytes are not all-defined or all-undefined. */ 4266 PROF_EVENT(242, "mc_LOADV16-slow2"); 4267 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 4268 } 4269 } 4270 } 4271 #endif 4272 } 4273 4274 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a ) 4275 { 4276 return mc_LOADV16(a, True); 4277 } 4278 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a ) 4279 { 4280 return mc_LOADV16(a, False); 4281 } 4282 4283 4284 static INLINE 4285 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian ) 4286 { 4287 PROF_EVENT(250, "mc_STOREV16"); 4288 4289 #ifndef PERF_FAST_STOREV 4290 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4291 #else 4292 { 4293 UWord sm_off, vabits8; 4294 SecMap* sm; 4295 4296 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) { 4297 PROF_EVENT(251, "mc_STOREV16-slow1"); 4298 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4299 return; 4300 } 4301 4302 sm = get_secmap_for_reading_low(a); 4303 sm_off = SM_OFF(a); 4304 vabits8 = sm->vabits8[sm_off]; 4305 if (LIKELY( !is_distinguished_sm(sm) && 4306 (VA_BITS8_DEFINED == vabits8 || 4307 VA_BITS8_UNDEFINED == vabits8) )) 4308 { 4309 /* Handle common case quickly: a is suitably aligned, */ 4310 /* is mapped, and is addressible. */ 4311 // Convert full V-bits in register to compact 2-bit form. 4312 if (V_BITS16_DEFINED == vbits16) { 4313 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED , 4314 &(sm->vabits8[sm_off]) ); 4315 } else if (V_BITS16_UNDEFINED == vbits16) { 4316 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED, 4317 &(sm->vabits8[sm_off]) ); 4318 } else { 4319 /* Slow but general case -- writing partially defined bytes. */ 4320 PROF_EVENT(252, "mc_STOREV16-slow2"); 4321 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4322 } 4323 } else { 4324 /* Slow but general case. */ 4325 PROF_EVENT(253, "mc_STOREV16-slow3"); 4326 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4327 } 4328 } 4329 #endif 4330 } 4331 4332 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 ) 4333 { 4334 mc_STOREV16(a, vbits16, True); 4335 } 4336 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 ) 4337 { 4338 mc_STOREV16(a, vbits16, False); 4339 } 4340 4341 4342 /* ------------------------ Size = 1 ------------------------ */ 4343 /* Note: endianness is irrelevant for size == 1 */ 4344 4345 VG_REGPARM(1) 4346 UWord MC_(helperc_LOADV8) ( Addr a ) 4347 { 4348 PROF_EVENT(260, "mc_LOADV8"); 4349 4350 #ifndef PERF_FAST_LOADV 4351 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 4352 #else 4353 { 4354 UWord sm_off, vabits8; 4355 SecMap* sm; 4356 4357 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) { 4358 PROF_EVENT(261, "mc_LOADV8-slow1"); 4359 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 4360 } 4361 4362 sm = get_secmap_for_reading_low(a); 4363 sm_off = SM_OFF(a); 4364 vabits8 = sm->vabits8[sm_off]; 4365 // Convert V bits from compact memory form to expanded register form 4366 // Handle common case quickly: a is mapped, and the entire 4367 // word32 it lives in is addressible. 4368 if (vabits8 == VA_BITS8_DEFINED ) { return V_BITS8_DEFINED; } 4369 else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS8_UNDEFINED; } 4370 else { 4371 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check 4372 // the single byte. 4373 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8); 4374 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; } 4375 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; } 4376 else { 4377 /* Slow case: the byte is not all-defined or all-undefined. */ 4378 PROF_EVENT(262, "mc_LOADV8-slow2"); 4379 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 4380 } 4381 } 4382 } 4383 #endif 4384 } 4385 4386 4387 VG_REGPARM(2) 4388 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 ) 4389 { 4390 PROF_EVENT(270, "mc_STOREV8"); 4391 4392 #ifndef PERF_FAST_STOREV 4393 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4394 #else 4395 { 4396 UWord sm_off, vabits8; 4397 SecMap* sm; 4398 4399 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) { 4400 PROF_EVENT(271, "mc_STOREV8-slow1"); 4401 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4402 return; 4403 } 4404 4405 sm = get_secmap_for_reading_low(a); 4406 sm_off = SM_OFF(a); 4407 vabits8 = sm->vabits8[sm_off]; 4408 if (LIKELY 4409 ( !is_distinguished_sm(sm) && 4410 ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8) 4411 || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) 4412 ) 4413 ) 4414 ) 4415 { 4416 /* Handle common case quickly: a is mapped, the entire word32 it 4417 lives in is addressible. */ 4418 // Convert full V-bits in register to compact 2-bit form. 4419 if (V_BITS8_DEFINED == vbits8) { 4420 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED, 4421 &(sm->vabits8[sm_off]) ); 4422 } else if (V_BITS8_UNDEFINED == vbits8) { 4423 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED, 4424 &(sm->vabits8[sm_off]) ); 4425 } else { 4426 /* Slow but general case -- writing partially defined bytes. */ 4427 PROF_EVENT(272, "mc_STOREV8-slow2"); 4428 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4429 } 4430 } else { 4431 /* Slow but general case. */ 4432 PROF_EVENT(273, "mc_STOREV8-slow3"); 4433 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4434 } 4435 } 4436 #endif 4437 } 4438 4439 4440 /*------------------------------------------------------------*/ 4441 /*--- Functions called directly from generated code: ---*/ 4442 /*--- Value-check failure handlers. ---*/ 4443 /*------------------------------------------------------------*/ 4444 4445 /* Call these ones when an origin is available ... */ 4446 VG_REGPARM(1) 4447 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) { 4448 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin ); 4449 } 4450 4451 VG_REGPARM(1) 4452 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) { 4453 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin ); 4454 } 4455 4456 VG_REGPARM(1) 4457 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) { 4458 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin ); 4459 } 4460 4461 VG_REGPARM(1) 4462 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) { 4463 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin ); 4464 } 4465 4466 VG_REGPARM(2) 4467 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) { 4468 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin ); 4469 } 4470 4471 /* ... and these when an origin isn't available. */ 4472 4473 VG_REGPARM(0) 4474 void MC_(helperc_value_check0_fail_no_o) ( void ) { 4475 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ ); 4476 } 4477 4478 VG_REGPARM(0) 4479 void MC_(helperc_value_check1_fail_no_o) ( void ) { 4480 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ ); 4481 } 4482 4483 VG_REGPARM(0) 4484 void MC_(helperc_value_check4_fail_no_o) ( void ) { 4485 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ ); 4486 } 4487 4488 VG_REGPARM(0) 4489 void MC_(helperc_value_check8_fail_no_o) ( void ) { 4490 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ ); 4491 } 4492 4493 VG_REGPARM(1) 4494 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) { 4495 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ ); 4496 } 4497 4498 4499 /*------------------------------------------------------------*/ 4500 /*--- Metadata get/set functions, for client requests. ---*/ 4501 /*------------------------------------------------------------*/ 4502 4503 // Nb: this expands the V+A bits out into register-form V bits, even though 4504 // they're in memory. This is for backward compatibility, and because it's 4505 // probably what the user wants. 4506 4507 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment 4508 error [no longer used], 3 == addressing error. */ 4509 /* Nb: We used to issue various definedness/addressability errors from here, 4510 but we took them out because they ranged from not-very-helpful to 4511 downright annoying, and they complicated the error data structures. */ 4512 static Int mc_get_or_set_vbits_for_client ( 4513 Addr a, 4514 Addr vbits, 4515 SizeT szB, 4516 Bool setting /* True <=> set vbits, False <=> get vbits */ 4517 ) 4518 { 4519 SizeT i; 4520 Bool ok; 4521 UChar vbits8; 4522 4523 /* Check that arrays are addressible before doing any getting/setting. */ 4524 for (i = 0; i < szB; i++) { 4525 if (VA_BITS2_NOACCESS == get_vabits2(a + i) || 4526 VA_BITS2_NOACCESS == get_vabits2(vbits + i)) { 4527 return 3; 4528 } 4529 } 4530 4531 /* Do the copy */ 4532 if (setting) { 4533 /* setting */ 4534 for (i = 0; i < szB; i++) { 4535 ok = set_vbits8(a + i, ((UChar*)vbits)[i]); 4536 tl_assert(ok); 4537 } 4538 } else { 4539 /* getting */ 4540 for (i = 0; i < szB; i++) { 4541 ok = get_vbits8(a + i, &vbits8); 4542 tl_assert(ok); 4543 ((UChar*)vbits)[i] = vbits8; 4544 } 4545 // The bytes in vbits[] have now been set, so mark them as such. 4546 MC_(make_mem_defined)(vbits, szB); 4547 } 4548 4549 return 1; 4550 } 4551 4552 4553 /*------------------------------------------------------------*/ 4554 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/ 4555 /*------------------------------------------------------------*/ 4556 4557 /* For the memory leak detector, say whether an entire 64k chunk of 4558 address space is possibly in use, or not. If in doubt return 4559 True. 4560 */ 4561 Bool MC_(is_within_valid_secondary) ( Addr a ) 4562 { 4563 SecMap* sm = maybe_get_secmap_for ( a ); 4564 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS] 4565 || MC_(in_ignored_range)(a)) { 4566 /* Definitely not in use. */ 4567 return False; 4568 } else { 4569 return True; 4570 } 4571 } 4572 4573 4574 /* For the memory leak detector, say whether or not a given word 4575 address is to be regarded as valid. */ 4576 Bool MC_(is_valid_aligned_word) ( Addr a ) 4577 { 4578 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8); 4579 tl_assert(VG_IS_WORD_ALIGNED(a)); 4580 if (is_mem_defined( a, sizeof(UWord), NULL, NULL) == MC_Ok 4581 && !MC_(in_ignored_range)(a)) { 4582 return True; 4583 } else { 4584 return False; 4585 } 4586 } 4587 4588 4589 /*------------------------------------------------------------*/ 4590 /*--- Initialisation ---*/ 4591 /*------------------------------------------------------------*/ 4592 4593 static void init_shadow_memory ( void ) 4594 { 4595 Int i; 4596 SecMap* sm; 4597 4598 tl_assert(V_BIT_UNDEFINED == 1); 4599 tl_assert(V_BIT_DEFINED == 0); 4600 tl_assert(V_BITS8_UNDEFINED == 0xFF); 4601 tl_assert(V_BITS8_DEFINED == 0); 4602 4603 /* Build the 3 distinguished secondaries */ 4604 sm = &sm_distinguished[SM_DIST_NOACCESS]; 4605 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS; 4606 4607 sm = &sm_distinguished[SM_DIST_UNDEFINED]; 4608 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED; 4609 4610 sm = &sm_distinguished[SM_DIST_DEFINED]; 4611 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED; 4612 4613 /* Set up the primary map. */ 4614 /* These entries gradually get overwritten as the used address 4615 space expands. */ 4616 for (i = 0; i < N_PRIMARY_MAP; i++) 4617 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS]; 4618 4619 /* Auxiliary primary maps */ 4620 init_auxmap_L1_L2(); 4621 4622 /* auxmap_size = auxmap_used = 0; 4623 no ... these are statically initialised */ 4624 4625 /* Secondary V bit table */ 4626 secVBitTable = createSecVBitTable(); 4627 } 4628 4629 4630 /*------------------------------------------------------------*/ 4631 /*--- Sanity check machinery (permanently engaged) ---*/ 4632 /*------------------------------------------------------------*/ 4633 4634 static Bool mc_cheap_sanity_check ( void ) 4635 { 4636 n_sanity_cheap++; 4637 PROF_EVENT(490, "cheap_sanity_check"); 4638 /* Check for sane operating level */ 4639 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3) 4640 return False; 4641 /* nothing else useful we can rapidly check */ 4642 return True; 4643 } 4644 4645 static Bool mc_expensive_sanity_check ( void ) 4646 { 4647 Int i; 4648 Word n_secmaps_found; 4649 SecMap* sm; 4650 HChar* errmsg; 4651 Bool bad = False; 4652 4653 if (0) VG_(printf)("expensive sanity check\n"); 4654 if (0) return True; 4655 4656 n_sanity_expensive++; 4657 PROF_EVENT(491, "expensive_sanity_check"); 4658 4659 /* Check for sane operating level */ 4660 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3) 4661 return False; 4662 4663 /* Check that the 3 distinguished SMs are still as they should be. */ 4664 4665 /* Check noaccess DSM. */ 4666 sm = &sm_distinguished[SM_DIST_NOACCESS]; 4667 for (i = 0; i < SM_CHUNKS; i++) 4668 if (sm->vabits8[i] != VA_BITS8_NOACCESS) 4669 bad = True; 4670 4671 /* Check undefined DSM. */ 4672 sm = &sm_distinguished[SM_DIST_UNDEFINED]; 4673 for (i = 0; i < SM_CHUNKS; i++) 4674 if (sm->vabits8[i] != VA_BITS8_UNDEFINED) 4675 bad = True; 4676 4677 /* Check defined DSM. */ 4678 sm = &sm_distinguished[SM_DIST_DEFINED]; 4679 for (i = 0; i < SM_CHUNKS; i++) 4680 if (sm->vabits8[i] != VA_BITS8_DEFINED) 4681 bad = True; 4682 4683 if (bad) { 4684 VG_(printf)("memcheck expensive sanity: " 4685 "distinguished_secondaries have changed\n"); 4686 return False; 4687 } 4688 4689 /* If we're not checking for undefined value errors, the secondary V bit 4690 * table should be empty. */ 4691 if (MC_(clo_mc_level) == 1) { 4692 if (0 != VG_(OSetGen_Size)(secVBitTable)) 4693 return False; 4694 } 4695 4696 /* check the auxiliary maps, very thoroughly */ 4697 n_secmaps_found = 0; 4698 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found ); 4699 if (errmsg) { 4700 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg); 4701 return False; 4702 } 4703 4704 /* n_secmaps_found is now the number referred to by the auxiliary 4705 primary map. Now add on the ones referred to by the main 4706 primary map. */ 4707 for (i = 0; i < N_PRIMARY_MAP; i++) { 4708 if (primary_map[i] == NULL) { 4709 bad = True; 4710 } else { 4711 if (!is_distinguished_sm(primary_map[i])) 4712 n_secmaps_found++; 4713 } 4714 } 4715 4716 /* check that the number of secmaps issued matches the number that 4717 are reachable (iow, no secmap leaks) */ 4718 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs)) 4719 bad = True; 4720 4721 if (bad) { 4722 VG_(printf)("memcheck expensive sanity: " 4723 "apparent secmap leakage\n"); 4724 return False; 4725 } 4726 4727 if (bad) { 4728 VG_(printf)("memcheck expensive sanity: " 4729 "auxmap covers wrong address space\n"); 4730 return False; 4731 } 4732 4733 /* there is only one pointer to each secmap (expensive) */ 4734 4735 return True; 4736 } 4737 4738 /*------------------------------------------------------------*/ 4739 /*--- Command line args ---*/ 4740 /*------------------------------------------------------------*/ 4741 4742 Bool MC_(clo_partial_loads_ok) = False; 4743 Long MC_(clo_freelist_vol) = 20*1000*1000LL; 4744 LeakCheckMode MC_(clo_leak_check) = LC_Summary; 4745 VgRes MC_(clo_leak_resolution) = Vg_HighRes; 4746 Bool MC_(clo_show_reachable) = False; 4747 Bool MC_(clo_show_possibly_lost) = True; 4748 Bool MC_(clo_workaround_gcc296_bugs) = False; 4749 Int MC_(clo_malloc_fill) = -1; 4750 Int MC_(clo_free_fill) = -1; 4751 Int MC_(clo_mc_level) = 2; 4752 const char* MC_(clo_summary_file) = NULL; 4753 4754 4755 static Bool mc_process_cmd_line_options(Char* arg) 4756 { 4757 Char* tmp_str; 4758 4759 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 ); 4760 4761 /* Set MC_(clo_mc_level): 4762 1 = A bit tracking only 4763 2 = A and V bit tracking, but no V bit origins 4764 3 = A and V bit tracking, and V bit origins 4765 4766 Do this by inspecting --undef-value-errors= and 4767 --track-origins=. Reject the case --undef-value-errors=no 4768 --track-origins=yes as meaningless. 4769 */ 4770 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) { 4771 if (MC_(clo_mc_level) == 3) { 4772 goto bad_level; 4773 } else { 4774 MC_(clo_mc_level) = 1; 4775 return True; 4776 } 4777 } 4778 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) { 4779 if (MC_(clo_mc_level) == 1) 4780 MC_(clo_mc_level) = 2; 4781 return True; 4782 } 4783 if (0 == VG_(strcmp)(arg, "--track-origins=no")) { 4784 if (MC_(clo_mc_level) == 3) 4785 MC_(clo_mc_level) = 2; 4786 return True; 4787 } 4788 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) { 4789 if (MC_(clo_mc_level) == 1) { 4790 goto bad_level; 4791 } else { 4792 MC_(clo_mc_level) = 3; 4793 return True; 4794 } 4795 } 4796 4797 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {} 4798 else if VG_BOOL_CLO(arg, "--show-reachable", MC_(clo_show_reachable)) {} 4799 else if VG_BOOL_CLO(arg, "--show-possibly-lost", 4800 MC_(clo_show_possibly_lost)) {} 4801 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs", 4802 MC_(clo_workaround_gcc296_bugs)) {} 4803 4804 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol), 4805 0, 10*1000*1000*1000LL) {} 4806 4807 else if VG_XACT_CLO(arg, "--leak-check=no", 4808 MC_(clo_leak_check), LC_Off) {} 4809 else if VG_XACT_CLO(arg, "--leak-check=summary", 4810 MC_(clo_leak_check), LC_Summary) {} 4811 else if VG_XACT_CLO(arg, "--leak-check=yes", 4812 MC_(clo_leak_check), LC_Full) {} 4813 else if VG_XACT_CLO(arg, "--leak-check=full", 4814 MC_(clo_leak_check), LC_Full) {} 4815 4816 else if VG_XACT_CLO(arg, "--leak-resolution=low", 4817 MC_(clo_leak_resolution), Vg_LowRes) {} 4818 else if VG_XACT_CLO(arg, "--leak-resolution=med", 4819 MC_(clo_leak_resolution), Vg_MedRes) {} 4820 else if VG_XACT_CLO(arg, "--leak-resolution=high", 4821 MC_(clo_leak_resolution), Vg_HighRes) {} 4822 4823 else if VG_STR_CLO(arg, "--summary-file", tmp_str) { 4824 MC_(clo_summary_file) = VG_(strdup)("clo_summary_file", tmp_str); 4825 } 4826 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) { 4827 Int i; 4828 Bool ok = parse_ignore_ranges(tmp_str); 4829 if (!ok) 4830 return False; 4831 tl_assert(ignoreRanges.used >= 0); 4832 tl_assert(ignoreRanges.used < M_IGNORE_RANGES); 4833 for (i = 0; i < ignoreRanges.used; i++) { 4834 Addr s = ignoreRanges.start[i]; 4835 Addr e = ignoreRanges.end[i]; 4836 Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */ 4837 if (e <= s) { 4838 VG_(message)(Vg_DebugMsg, 4839 "ERROR: --ignore-ranges: end <= start in range:\n"); 4840 VG_(message)(Vg_DebugMsg, 4841 " 0x%lx-0x%lx\n", s, e); 4842 return False; 4843 } 4844 if (e - s > limit) { 4845 VG_(message)(Vg_DebugMsg, 4846 "ERROR: --ignore-ranges: suspiciously large range:\n"); 4847 VG_(message)(Vg_DebugMsg, 4848 " 0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s)); 4849 return False; 4850 } 4851 } 4852 } 4853 4854 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {} 4855 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {} 4856 4857 else 4858 return VG_(replacement_malloc_process_cmd_line_option)(arg); 4859 4860 return True; 4861 4862 4863 bad_level: 4864 VG_(fmsg_bad_option)(arg, 4865 "--track-origins=yes has no effect when --undef-value-errors=no.\n"); 4866 } 4867 4868 static void mc_print_usage(void) 4869 { 4870 VG_(printf)( 4871 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n" 4872 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n" 4873 " --show-reachable=no|yes show reachable blocks in leak check? [no]\n" 4874 " --show-possibly-lost=no|yes show possibly lost blocks in leak check?\n" 4875 " [yes]\n" 4876 " --undef-value-errors=no|yes check for undefined value errors [yes]\n" 4877 " --track-origins=no|yes show origins of undefined values? [no]\n" 4878 " --partial-loads-ok=no|yes too hard to explain here; see manual [no]\n" 4879 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n" 4880 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n" 4881 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n" 4882 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n" 4883 " --free-fill=<hexnumber> fill free'd areas with given value\n" 4884 ); 4885 } 4886 4887 static void mc_print_debug_usage(void) 4888 { 4889 VG_(printf)( 4890 " (none)\n" 4891 ); 4892 } 4893 4894 4895 /*------------------------------------------------------------*/ 4896 /*--- Client blocks ---*/ 4897 /*------------------------------------------------------------*/ 4898 4899 /* Client block management: 4900 4901 This is managed as an expanding array of client block descriptors. 4902 Indices of live descriptors are issued to the client, so it can ask 4903 to free them later. Therefore we cannot slide live entries down 4904 over dead ones. Instead we must use free/inuse flags and scan for 4905 an empty slot at allocation time. This in turn means allocation is 4906 relatively expensive, so we hope this does not happen too often. 4907 4908 An unused block has start == size == 0 4909 */ 4910 4911 /* type CGenBlock is defined in mc_include.h */ 4912 4913 /* This subsystem is self-initialising. */ 4914 static UWord cgb_size = 0; 4915 static UWord cgb_used = 0; 4916 static CGenBlock* cgbs = NULL; 4917 4918 /* Stats for this subsystem. */ 4919 static ULong cgb_used_MAX = 0; /* Max in use. */ 4920 static ULong cgb_allocs = 0; /* Number of allocs. */ 4921 static ULong cgb_discards = 0; /* Number of discards. */ 4922 static ULong cgb_search = 0; /* Number of searches. */ 4923 4924 4925 /* Get access to the client block array. */ 4926 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks, 4927 /*OUT*/UWord* nBlocks ) 4928 { 4929 *blocks = cgbs; 4930 *nBlocks = cgb_used; 4931 } 4932 4933 4934 static 4935 Int alloc_client_block ( void ) 4936 { 4937 UWord i, sz_new; 4938 CGenBlock* cgbs_new; 4939 4940 cgb_allocs++; 4941 4942 for (i = 0; i < cgb_used; i++) { 4943 cgb_search++; 4944 if (cgbs[i].start == 0 && cgbs[i].size == 0) 4945 return i; 4946 } 4947 4948 /* Not found. Try to allocate one at the end. */ 4949 if (cgb_used < cgb_size) { 4950 cgb_used++; 4951 return cgb_used-1; 4952 } 4953 4954 /* Ok, we have to allocate a new one. */ 4955 tl_assert(cgb_used == cgb_size); 4956 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size); 4957 4958 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) ); 4959 for (i = 0; i < cgb_used; i++) 4960 cgbs_new[i] = cgbs[i]; 4961 4962 if (cgbs != NULL) 4963 VG_(free)( cgbs ); 4964 cgbs = cgbs_new; 4965 4966 cgb_size = sz_new; 4967 cgb_used++; 4968 if (cgb_used > cgb_used_MAX) 4969 cgb_used_MAX = cgb_used; 4970 return cgb_used-1; 4971 } 4972 4973 4974 static void show_client_block_stats ( void ) 4975 { 4976 VG_(message)(Vg_DebugMsg, 4977 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n", 4978 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search 4979 ); 4980 } 4981 4982 4983 /*------------------------------------------------------------*/ 4984 /*--- Client requests ---*/ 4985 /*------------------------------------------------------------*/ 4986 4987 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret ) 4988 { 4989 Int i; 4990 Bool ok; 4991 Addr bad_addr; 4992 4993 if (!VG_IS_TOOL_USERREQ('M','C',arg[0]) 4994 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0] 4995 && VG_USERREQ__FREELIKE_BLOCK != arg[0] 4996 && VG_USERREQ__CREATE_MEMPOOL != arg[0] 4997 && VG_USERREQ__DESTROY_MEMPOOL != arg[0] 4998 && VG_USERREQ__MEMPOOL_ALLOC != arg[0] 4999 && VG_USERREQ__MEMPOOL_FREE != arg[0] 5000 && VG_USERREQ__MEMPOOL_TRIM != arg[0] 5001 && VG_USERREQ__MOVE_MEMPOOL != arg[0] 5002 && VG_USERREQ__MEMPOOL_CHANGE != arg[0] 5003 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]) 5004 return False; 5005 5006 switch (arg[0]) { 5007 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: 5008 ok = is_mem_addressable ( arg[1], arg[2], &bad_addr ); 5009 if (!ok) 5010 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 ); 5011 *ret = ok ? (UWord)NULL : bad_addr; 5012 break; 5013 5014 case VG_USERREQ__CHECK_MEM_IS_DEFINED: { 5015 MC_ReadResult res; 5016 UInt otag = 0; 5017 res = is_mem_defined ( arg[1], arg[2], &bad_addr, &otag ); 5018 if (MC_AddrErr == res) 5019 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 ); 5020 else if (MC_ValueErr == res) 5021 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/False, otag ); 5022 *ret = ( res==MC_Ok ? (UWord)NULL : bad_addr ); 5023 break; 5024 } 5025 5026 case VG_USERREQ__DO_LEAK_CHECK: 5027 MC_(detect_memory_leaks)(tid, arg[1] ? LC_Summary : LC_Full); 5028 *ret = 0; /* return value is meaningless */ 5029 break; 5030 5031 case VG_USERREQ__MAKE_MEM_NOACCESS: 5032 MC_(make_mem_noaccess) ( arg[1], arg[2] ); 5033 *ret = -1; 5034 break; 5035 5036 case VG_USERREQ__MAKE_MEM_UNDEFINED: 5037 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid, 5038 MC_OKIND_USER ); 5039 *ret = -1; 5040 break; 5041 5042 case VG_USERREQ__MAKE_MEM_DEFINED: 5043 MC_(make_mem_defined) ( arg[1], arg[2] ); 5044 *ret = -1; 5045 break; 5046 5047 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE: 5048 make_mem_defined_if_addressable ( arg[1], arg[2] ); 5049 *ret = -1; 5050 break; 5051 5052 case VG_USERREQ__CREATE_BLOCK: /* describe a block */ 5053 if (arg[1] != 0 && arg[2] != 0) { 5054 i = alloc_client_block(); 5055 /* VG_(printf)("allocated %d %p\n", i, cgbs); */ 5056 cgbs[i].start = arg[1]; 5057 cgbs[i].size = arg[2]; 5058 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]); 5059 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ ); 5060 *ret = i; 5061 } else 5062 *ret = -1; 5063 break; 5064 5065 case VG_USERREQ__DISCARD: /* discard */ 5066 if (cgbs == NULL 5067 || arg[2] >= cgb_used || 5068 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) { 5069 *ret = 1; 5070 } else { 5071 tl_assert(arg[2] >= 0 && arg[2] < cgb_used); 5072 cgbs[arg[2]].start = cgbs[arg[2]].size = 0; 5073 VG_(free)(cgbs[arg[2]].desc); 5074 cgb_discards++; 5075 *ret = 0; 5076 } 5077 break; 5078 5079 case VG_USERREQ__GET_VBITS: 5080 *ret = mc_get_or_set_vbits_for_client 5081 ( arg[1], arg[2], arg[3], False /* get them */ ); 5082 break; 5083 5084 case VG_USERREQ__SET_VBITS: 5085 *ret = mc_get_or_set_vbits_for_client 5086 ( arg[1], arg[2], arg[3], True /* set them */ ); 5087 break; 5088 5089 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */ 5090 UWord** argp = (UWord**)arg; 5091 // MC_(bytes_leaked) et al were set by the last leak check (or zero 5092 // if no prior leak checks performed). 5093 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect); 5094 *argp[2] = MC_(bytes_dubious); 5095 *argp[3] = MC_(bytes_reachable); 5096 *argp[4] = MC_(bytes_suppressed); 5097 // there is no argp[5] 5098 //*argp[5] = MC_(bytes_indirect); 5099 // XXX need to make *argp[1-4] defined; currently done in the 5100 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero. 5101 *ret = 0; 5102 return True; 5103 } 5104 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */ 5105 UWord** argp = (UWord**)arg; 5106 // MC_(blocks_leaked) et al were set by the last leak check (or zero 5107 // if no prior leak checks performed). 5108 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect); 5109 *argp[2] = MC_(blocks_dubious); 5110 *argp[3] = MC_(blocks_reachable); 5111 *argp[4] = MC_(blocks_suppressed); 5112 // there is no argp[5] 5113 //*argp[5] = MC_(blocks_indirect); 5114 // XXX need to make *argp[1-4] defined; currently done in the 5115 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero. 5116 *ret = 0; 5117 return True; 5118 } 5119 case VG_USERREQ__MALLOCLIKE_BLOCK: { 5120 Addr p = (Addr)arg[1]; 5121 SizeT sizeB = arg[2]; 5122 //UInt rzB = arg[3]; XXX: unused! 5123 Bool is_zeroed = (Bool)arg[4]; 5124 5125 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed, 5126 MC_AllocCustom, MC_(malloc_list) ); 5127 return True; 5128 } 5129 case VG_USERREQ__FREELIKE_BLOCK: { 5130 Addr p = (Addr)arg[1]; 5131 UInt rzB = arg[2]; 5132 5133 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom ); 5134 return True; 5135 } 5136 5137 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: { 5138 Char* s = (Char*)arg[1]; 5139 Addr dst = (Addr) arg[2]; 5140 Addr src = (Addr) arg[3]; 5141 SizeT len = (SizeT)arg[4]; 5142 MC_(record_overlap_error)(tid, s, src, dst, len); 5143 return True; 5144 } 5145 5146 case VG_USERREQ__CREATE_MEMPOOL: { 5147 Addr pool = (Addr)arg[1]; 5148 UInt rzB = arg[2]; 5149 Bool is_zeroed = (Bool)arg[3]; 5150 5151 MC_(create_mempool) ( pool, rzB, is_zeroed ); 5152 return True; 5153 } 5154 5155 case VG_USERREQ__DESTROY_MEMPOOL: { 5156 Addr pool = (Addr)arg[1]; 5157 5158 MC_(destroy_mempool) ( pool ); 5159 return True; 5160 } 5161 5162 case VG_USERREQ__MEMPOOL_ALLOC: { 5163 Addr pool = (Addr)arg[1]; 5164 Addr addr = (Addr)arg[2]; 5165 UInt size = arg[3]; 5166 5167 MC_(mempool_alloc) ( tid, pool, addr, size ); 5168 return True; 5169 } 5170 5171 case VG_USERREQ__MEMPOOL_FREE: { 5172 Addr pool = (Addr)arg[1]; 5173 Addr addr = (Addr)arg[2]; 5174 5175 MC_(mempool_free) ( pool, addr ); 5176 return True; 5177 } 5178 5179 case VG_USERREQ__MEMPOOL_TRIM: { 5180 Addr pool = (Addr)arg[1]; 5181 Addr addr = (Addr)arg[2]; 5182 UInt size = arg[3]; 5183 5184 MC_(mempool_trim) ( pool, addr, size ); 5185 return True; 5186 } 5187 5188 case VG_USERREQ__MOVE_MEMPOOL: { 5189 Addr poolA = (Addr)arg[1]; 5190 Addr poolB = (Addr)arg[2]; 5191 5192 MC_(move_mempool) ( poolA, poolB ); 5193 return True; 5194 } 5195 5196 case VG_USERREQ__MEMPOOL_CHANGE: { 5197 Addr pool = (Addr)arg[1]; 5198 Addr addrA = (Addr)arg[2]; 5199 Addr addrB = (Addr)arg[3]; 5200 UInt size = arg[4]; 5201 5202 MC_(mempool_change) ( pool, addrA, addrB, size ); 5203 return True; 5204 } 5205 5206 case VG_USERREQ__MEMPOOL_EXISTS: { 5207 Addr pool = (Addr)arg[1]; 5208 5209 *ret = (UWord) MC_(mempool_exists) ( pool ); 5210 return True; 5211 } 5212 5213 5214 default: 5215 VG_(message)( 5216 Vg_UserMsg, 5217 "Warning: unknown memcheck client request code %llx\n", 5218 (ULong)arg[0] 5219 ); 5220 return False; 5221 } 5222 return True; 5223 } 5224 5225 5226 /*------------------------------------------------------------*/ 5227 /*--- Crude profiling machinery. ---*/ 5228 /*------------------------------------------------------------*/ 5229 5230 // We track a number of interesting events (using PROF_EVENT) 5231 // if MC_PROFILE_MEMORY is defined. 5232 5233 #ifdef MC_PROFILE_MEMORY 5234 5235 UInt MC_(event_ctr)[N_PROF_EVENTS]; 5236 HChar* MC_(event_ctr_name)[N_PROF_EVENTS]; 5237 5238 static void init_prof_mem ( void ) 5239 { 5240 Int i; 5241 for (i = 0; i < N_PROF_EVENTS; i++) { 5242 MC_(event_ctr)[i] = 0; 5243 MC_(event_ctr_name)[i] = NULL; 5244 } 5245 } 5246 5247 static void done_prof_mem ( void ) 5248 { 5249 Int i; 5250 Bool spaced = False; 5251 for (i = 0; i < N_PROF_EVENTS; i++) { 5252 if (!spaced && (i % 10) == 0) { 5253 VG_(printf)("\n"); 5254 spaced = True; 5255 } 5256 if (MC_(event_ctr)[i] > 0) { 5257 spaced = False; 5258 VG_(printf)( "prof mem event %3d: %9d %s\n", 5259 i, MC_(event_ctr)[i], 5260 MC_(event_ctr_name)[i] 5261 ? MC_(event_ctr_name)[i] : "unnamed"); 5262 } 5263 } 5264 } 5265 5266 #else 5267 5268 static void init_prof_mem ( void ) { } 5269 static void done_prof_mem ( void ) { } 5270 5271 #endif 5272 5273 5274 /*------------------------------------------------------------*/ 5275 /*--- Origin tracking stuff ---*/ 5276 /*------------------------------------------------------------*/ 5277 5278 /*--------------------------------------------*/ 5279 /*--- Origin tracking: load handlers ---*/ 5280 /*--------------------------------------------*/ 5281 5282 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) { 5283 return or1 > or2 ? or1 : or2; 5284 } 5285 5286 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) { 5287 OCacheLine* line; 5288 UChar descr; 5289 UWord lineoff = oc_line_offset(a); 5290 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */ 5291 5292 if (OC_ENABLE_ASSERTIONS) { 5293 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5294 } 5295 5296 line = find_OCacheLine( a ); 5297 5298 descr = line->descr[lineoff]; 5299 if (OC_ENABLE_ASSERTIONS) { 5300 tl_assert(descr < 0x10); 5301 } 5302 5303 if (LIKELY(0 == (descr & (1 << byteoff)))) { 5304 return 0; 5305 } else { 5306 return line->w32[lineoff]; 5307 } 5308 } 5309 5310 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) { 5311 OCacheLine* line; 5312 UChar descr; 5313 UWord lineoff, byteoff; 5314 5315 if (UNLIKELY(a & 1)) { 5316 /* Handle misaligned case, slowly. */ 5317 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 ); 5318 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 ); 5319 return merge_origins(oLo, oHi); 5320 } 5321 5322 lineoff = oc_line_offset(a); 5323 byteoff = a & 3; /* 0 or 2 */ 5324 5325 if (OC_ENABLE_ASSERTIONS) { 5326 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5327 } 5328 line = find_OCacheLine( a ); 5329 5330 descr = line->descr[lineoff]; 5331 if (OC_ENABLE_ASSERTIONS) { 5332 tl_assert(descr < 0x10); 5333 } 5334 5335 if (LIKELY(0 == (descr & (3 << byteoff)))) { 5336 return 0; 5337 } else { 5338 return line->w32[lineoff]; 5339 } 5340 } 5341 5342 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) { 5343 OCacheLine* line; 5344 UChar descr; 5345 UWord lineoff; 5346 5347 if (UNLIKELY(a & 3)) { 5348 /* Handle misaligned case, slowly. */ 5349 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 ); 5350 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 ); 5351 return merge_origins(oLo, oHi); 5352 } 5353 5354 lineoff = oc_line_offset(a); 5355 if (OC_ENABLE_ASSERTIONS) { 5356 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5357 } 5358 5359 line = find_OCacheLine( a ); 5360 5361 descr = line->descr[lineoff]; 5362 if (OC_ENABLE_ASSERTIONS) { 5363 tl_assert(descr < 0x10); 5364 } 5365 5366 if (LIKELY(0 == descr)) { 5367 return 0; 5368 } else { 5369 return line->w32[lineoff]; 5370 } 5371 } 5372 5373 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) { 5374 OCacheLine* line; 5375 UChar descrLo, descrHi, descr; 5376 UWord lineoff; 5377 5378 if (UNLIKELY(a & 7)) { 5379 /* Handle misaligned case, slowly. */ 5380 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 ); 5381 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 ); 5382 return merge_origins(oLo, oHi); 5383 } 5384 5385 lineoff = oc_line_offset(a); 5386 if (OC_ENABLE_ASSERTIONS) { 5387 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/ 5388 } 5389 5390 line = find_OCacheLine( a ); 5391 5392 descrLo = line->descr[lineoff + 0]; 5393 descrHi = line->descr[lineoff + 1]; 5394 descr = descrLo | descrHi; 5395 if (OC_ENABLE_ASSERTIONS) { 5396 tl_assert(descr < 0x10); 5397 } 5398 5399 if (LIKELY(0 == descr)) { 5400 return 0; /* both 32-bit chunks are defined */ 5401 } else { 5402 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0]; 5403 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1]; 5404 return merge_origins(oLo, oHi); 5405 } 5406 } 5407 5408 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) { 5409 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 ); 5410 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 ); 5411 UInt oBoth = merge_origins(oLo, oHi); 5412 return (UWord)oBoth; 5413 } 5414 5415 5416 /*--------------------------------------------*/ 5417 /*--- Origin tracking: store handlers ---*/ 5418 /*--------------------------------------------*/ 5419 5420 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) { 5421 OCacheLine* line; 5422 UWord lineoff = oc_line_offset(a); 5423 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */ 5424 5425 if (OC_ENABLE_ASSERTIONS) { 5426 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5427 } 5428 5429 line = find_OCacheLine( a ); 5430 5431 if (d32 == 0) { 5432 line->descr[lineoff] &= ~(1 << byteoff); 5433 } else { 5434 line->descr[lineoff] |= (1 << byteoff); 5435 line->w32[lineoff] = d32; 5436 } 5437 } 5438 5439 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) { 5440 OCacheLine* line; 5441 UWord lineoff, byteoff; 5442 5443 if (UNLIKELY(a & 1)) { 5444 /* Handle misaligned case, slowly. */ 5445 MC_(helperc_b_store1)( a + 0, d32 ); 5446 MC_(helperc_b_store1)( a + 1, d32 ); 5447 return; 5448 } 5449 5450 lineoff = oc_line_offset(a); 5451 byteoff = a & 3; /* 0 or 2 */ 5452 5453 if (OC_ENABLE_ASSERTIONS) { 5454 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5455 } 5456 5457 line = find_OCacheLine( a ); 5458 5459 if (d32 == 0) { 5460 line->descr[lineoff] &= ~(3 << byteoff); 5461 } else { 5462 line->descr[lineoff] |= (3 << byteoff); 5463 line->w32[lineoff] = d32; 5464 } 5465 } 5466 5467 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) { 5468 OCacheLine* line; 5469 UWord lineoff; 5470 5471 if (UNLIKELY(a & 3)) { 5472 /* Handle misaligned case, slowly. */ 5473 MC_(helperc_b_store2)( a + 0, d32 ); 5474 MC_(helperc_b_store2)( a + 2, d32 ); 5475 return; 5476 } 5477 5478 lineoff = oc_line_offset(a); 5479 if (OC_ENABLE_ASSERTIONS) { 5480 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5481 } 5482 5483 line = find_OCacheLine( a ); 5484 5485 if (d32 == 0) { 5486 line->descr[lineoff] = 0; 5487 } else { 5488 line->descr[lineoff] = 0xF; 5489 line->w32[lineoff] = d32; 5490 } 5491 } 5492 5493 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) { 5494 OCacheLine* line; 5495 UWord lineoff; 5496 5497 if (UNLIKELY(a & 7)) { 5498 /* Handle misaligned case, slowly. */ 5499 MC_(helperc_b_store4)( a + 0, d32 ); 5500 MC_(helperc_b_store4)( a + 4, d32 ); 5501 return; 5502 } 5503 5504 lineoff = oc_line_offset(a); 5505 if (OC_ENABLE_ASSERTIONS) { 5506 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/ 5507 } 5508 5509 line = find_OCacheLine( a ); 5510 5511 if (d32 == 0) { 5512 line->descr[lineoff + 0] = 0; 5513 line->descr[lineoff + 1] = 0; 5514 } else { 5515 line->descr[lineoff + 0] = 0xF; 5516 line->descr[lineoff + 1] = 0xF; 5517 line->w32[lineoff + 0] = d32; 5518 line->w32[lineoff + 1] = d32; 5519 } 5520 } 5521 5522 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) { 5523 MC_(helperc_b_store8)( a + 0, d32 ); 5524 MC_(helperc_b_store8)( a + 8, d32 ); 5525 } 5526 5527 5528 /*--------------------------------------------*/ 5529 /*--- Origin tracking: sarp handlers ---*/ 5530 /*--------------------------------------------*/ 5531 5532 __attribute__((noinline)) 5533 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) { 5534 if ((a & 1) && len >= 1) { 5535 MC_(helperc_b_store1)( a, otag ); 5536 a++; 5537 len--; 5538 } 5539 if ((a & 2) && len >= 2) { 5540 MC_(helperc_b_store2)( a, otag ); 5541 a += 2; 5542 len -= 2; 5543 } 5544 if (len >= 4) 5545 tl_assert(0 == (a & 3)); 5546 while (len >= 4) { 5547 MC_(helperc_b_store4)( a, otag ); 5548 a += 4; 5549 len -= 4; 5550 } 5551 if (len >= 2) { 5552 MC_(helperc_b_store2)( a, otag ); 5553 a += 2; 5554 len -= 2; 5555 } 5556 if (len >= 1) { 5557 MC_(helperc_b_store1)( a, otag ); 5558 //a++; 5559 len--; 5560 } 5561 tl_assert(len == 0); 5562 } 5563 5564 __attribute__((noinline)) 5565 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) { 5566 if ((a & 1) && len >= 1) { 5567 MC_(helperc_b_store1)( a, 0 ); 5568 a++; 5569 len--; 5570 } 5571 if ((a & 2) && len >= 2) { 5572 MC_(helperc_b_store2)( a, 0 ); 5573 a += 2; 5574 len -= 2; 5575 } 5576 if (len >= 4) 5577 tl_assert(0 == (a & 3)); 5578 while (len >= 4) { 5579 MC_(helperc_b_store4)( a, 0 ); 5580 a += 4; 5581 len -= 4; 5582 } 5583 if (len >= 2) { 5584 MC_(helperc_b_store2)( a, 0 ); 5585 a += 2; 5586 len -= 2; 5587 } 5588 if (len >= 1) { 5589 MC_(helperc_b_store1)( a, 0 ); 5590 //a++; 5591 len--; 5592 } 5593 tl_assert(len == 0); 5594 } 5595 5596 5597 /*------------------------------------------------------------*/ 5598 /*--- Setup and finalisation ---*/ 5599 /*------------------------------------------------------------*/ 5600 5601 static void mc_post_clo_init ( void ) 5602 { 5603 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 ); 5604 5605 if (MC_(clo_mc_level) == 3) { 5606 /* We're doing origin tracking. */ 5607 # ifdef PERF_FAST_STACK 5608 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU ); 5609 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU ); 5610 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU ); 5611 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU ); 5612 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU ); 5613 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU ); 5614 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU ); 5615 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU ); 5616 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU ); 5617 # endif 5618 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU ); 5619 } else { 5620 /* Not doing origin tracking */ 5621 # ifdef PERF_FAST_STACK 5622 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 ); 5623 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 ); 5624 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 ); 5625 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 ); 5626 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 ); 5627 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 ); 5628 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 ); 5629 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 ); 5630 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 ); 5631 # endif 5632 VG_(track_new_mem_stack) ( mc_new_mem_stack ); 5633 } 5634 5635 /* This origin tracking cache is huge (~100M), so only initialise 5636 if we need it. */ 5637 if (MC_(clo_mc_level) >= 3) { 5638 init_OCache(); 5639 tl_assert(ocacheL1 != NULL); 5640 tl_assert(ocacheL2 != NULL); 5641 } else { 5642 tl_assert(ocacheL1 == NULL); 5643 tl_assert(ocacheL2 == NULL); 5644 } 5645 } 5646 5647 static void print_SM_info(char* type, int n_SMs) 5648 { 5649 VG_(message)(Vg_DebugMsg, 5650 " memcheck: SMs: %s = %d (%ldk, %ldM)\n", 5651 type, 5652 n_SMs, 5653 n_SMs * sizeof(SecMap) / 1024UL, 5654 n_SMs * sizeof(SecMap) / (1024 * 1024UL) ); 5655 } 5656 5657 static void mc_fini ( Int exitcode ) 5658 { 5659 MC_(print_malloc_stats)(); 5660 5661 if (MC_(clo_leak_check) != LC_Off) { 5662 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, MC_(clo_leak_check)); 5663 } else { 5664 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 5665 VG_(umsg)( 5666 "For a detailed leak analysis, rerun with: --leak-check=full\n" 5667 "\n" 5668 ); 5669 } 5670 } 5671 5672 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 5673 VG_(message)(Vg_UserMsg, 5674 "For counts of detected and suppressed errors, rerun with: -v\n"); 5675 } 5676 5677 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1 5678 && MC_(clo_mc_level) == 2) { 5679 VG_(message)(Vg_UserMsg, 5680 "Use --track-origins=yes to see where " 5681 "uninitialised values come from\n"); 5682 } 5683 5684 done_prof_mem(); 5685 5686 if (VG_(clo_stats)) { 5687 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB; 5688 5689 VG_(message)(Vg_DebugMsg, 5690 " memcheck: sanity checks: %d cheap, %d expensive\n", 5691 n_sanity_cheap, n_sanity_expensive ); 5692 VG_(message)(Vg_DebugMsg, 5693 " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n", 5694 n_auxmap_L2_nodes, 5695 n_auxmap_L2_nodes * 64, 5696 n_auxmap_L2_nodes / 16 ); 5697 VG_(message)(Vg_DebugMsg, 5698 " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n", 5699 n_auxmap_L1_searches, n_auxmap_L1_cmps, 5700 (10ULL * n_auxmap_L1_cmps) 5701 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1) 5702 ); 5703 VG_(message)(Vg_DebugMsg, 5704 " memcheck: auxmaps_L2: %lld searches, %lld nodes\n", 5705 n_auxmap_L2_searches, n_auxmap_L2_nodes 5706 ); 5707 5708 print_SM_info("n_issued ", n_issued_SMs); 5709 print_SM_info("n_deissued ", n_deissued_SMs); 5710 print_SM_info("max_noaccess ", max_noaccess_SMs); 5711 print_SM_info("max_undefined", max_undefined_SMs); 5712 print_SM_info("max_defined ", max_defined_SMs); 5713 print_SM_info("max_non_DSM ", max_non_DSM_SMs); 5714 5715 // Three DSMs, plus the non-DSM ones 5716 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap); 5717 // The 3*sizeof(Word) bytes is the AVL node metadata size. 5718 // The 4*sizeof(Word) bytes is the malloc metadata size. 5719 // Hardwiring these sizes in sucks, but I don't see how else to do it. 5720 max_secVBit_szB = max_secVBit_nodes * 5721 (sizeof(SecVBitNode) + 3*sizeof(Word) + 4*sizeof(Word)); 5722 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB; 5723 5724 VG_(message)(Vg_DebugMsg, 5725 " memcheck: max sec V bit nodes: %d (%ldk, %ldM)\n", 5726 max_secVBit_nodes, max_secVBit_szB / 1024, 5727 max_secVBit_szB / (1024 * 1024)); 5728 VG_(message)(Vg_DebugMsg, 5729 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n", 5730 sec_vbits_new_nodes + sec_vbits_updates, 5731 sec_vbits_new_nodes, sec_vbits_updates ); 5732 VG_(message)(Vg_DebugMsg, 5733 " memcheck: max shadow mem size: %ldk, %ldM\n", 5734 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024)); 5735 5736 if (MC_(clo_mc_level) >= 3) { 5737 VG_(message)(Vg_DebugMsg, 5738 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n", 5739 stats_ocacheL1_find, 5740 stats_ocacheL1_misses, 5741 stats_ocacheL1_lossage ); 5742 VG_(message)(Vg_DebugMsg, 5743 " ocacheL1: %'12lu at 0 %'12lu at 1\n", 5744 stats_ocacheL1_find - stats_ocacheL1_misses 5745 - stats_ocacheL1_found_at_1 5746 - stats_ocacheL1_found_at_N, 5747 stats_ocacheL1_found_at_1 ); 5748 VG_(message)(Vg_DebugMsg, 5749 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n", 5750 stats_ocacheL1_found_at_N, 5751 stats_ocacheL1_movefwds ); 5752 VG_(message)(Vg_DebugMsg, 5753 " ocacheL1: %'12lu sizeB %'12u useful\n", 5754 (UWord)sizeof(OCache), 5755 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS ); 5756 VG_(message)(Vg_DebugMsg, 5757 " ocacheL2: %'12lu refs %'12lu misses\n", 5758 stats__ocacheL2_refs, 5759 stats__ocacheL2_misses ); 5760 VG_(message)(Vg_DebugMsg, 5761 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n", 5762 stats__ocacheL2_n_nodes_max, 5763 stats__ocacheL2_n_nodes ); 5764 VG_(message)(Vg_DebugMsg, 5765 " niacache: %'12lu refs %'12lu misses\n", 5766 stats__nia_cache_queries, stats__nia_cache_misses); 5767 } else { 5768 tl_assert(ocacheL1 == NULL); 5769 tl_assert(ocacheL2 == NULL); 5770 } 5771 } 5772 5773 if (0) { 5774 VG_(message)(Vg_DebugMsg, 5775 "------ Valgrind's client block stats follow ---------------\n" ); 5776 show_client_block_stats(); 5777 } 5778 } 5779 5780 static void mc_pre_clo_init(void) 5781 { 5782 VG_(details_name) ("Memcheck"); 5783 VG_(details_version) (NULL); 5784 VG_(details_description) ("a memory error detector"); 5785 VG_(details_copyright_author)( 5786 "Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al."); 5787 VG_(details_bug_reports_to) (VG_BUGS_TO); 5788 VG_(details_avg_translation_sizeB) ( 556 ); 5789 5790 VG_(basic_tool_funcs) (mc_post_clo_init, 5791 MC_(instrument), 5792 mc_fini); 5793 5794 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) ); 5795 5796 5797 VG_(needs_core_errors) (); 5798 VG_(needs_tool_errors) (MC_(eq_Error), 5799 MC_(before_pp_Error), 5800 MC_(pp_Error), 5801 True,/*show TIDs for errors*/ 5802 MC_(update_Error_extra), 5803 MC_(is_recognised_suppression), 5804 MC_(read_extra_suppression_info), 5805 MC_(error_matches_suppression), 5806 MC_(get_error_name), 5807 MC_(get_extra_suppression_info)); 5808 VG_(needs_libc_freeres) (); 5809 VG_(needs_command_line_options)(mc_process_cmd_line_options, 5810 mc_print_usage, 5811 mc_print_debug_usage); 5812 VG_(needs_client_requests) (mc_handle_client_request); 5813 VG_(needs_sanity_checks) (mc_cheap_sanity_check, 5814 mc_expensive_sanity_check); 5815 VG_(needs_malloc_replacement) (MC_(malloc), 5816 MC_(__builtin_new), 5817 MC_(__builtin_vec_new), 5818 MC_(memalign), 5819 MC_(calloc), 5820 MC_(free), 5821 MC_(__builtin_delete), 5822 MC_(__builtin_vec_delete), 5823 MC_(realloc), 5824 MC_(malloc_usable_size), 5825 MC_MALLOC_REDZONE_SZB ); 5826 5827 VG_(needs_xml_output) (); 5828 5829 VG_(track_new_mem_startup) ( mc_new_mem_startup ); 5830 VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid ); 5831 // We assume that brk()/sbrk() does not initialise new memory. Is this 5832 // accurate? John Reiser says: 5833 // 5834 // 0) sbrk() can *decrease* process address space. No zero fill is done 5835 // for a decrease, not even the fragment on the high end of the last page 5836 // that is beyond the new highest address. For maximum safety and 5837 // portability, then the bytes in the last page that reside above [the 5838 // new] sbrk(0) should be considered to be uninitialized, but in practice 5839 // it is exceedingly likely that they will retain their previous 5840 // contents. 5841 // 5842 // 1) If an increase is large enough to require new whole pages, then 5843 // those new whole pages (like all new pages) are zero-filled by the 5844 // operating system. So if sbrk(0) already is page aligned, then 5845 // sbrk(PAGE_SIZE) *does* zero-fill the new memory. 5846 // 5847 // 2) Any increase that lies within an existing allocated page is not 5848 // changed. So if (x = sbrk(0)) is not page aligned, then 5849 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their 5850 // existing contents, and an additional PAGE_SIZE bytes which are zeroed. 5851 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest 5852 // of them come along for the ride because the operating system deals 5853 // only in whole pages. Again, for maximum safety and portability, then 5854 // anything that lives above [the new] sbrk(0) should be considered 5855 // uninitialized, but in practice will retain previous contents [zero in 5856 // this case.]" 5857 // 5858 // In short: 5859 // 5860 // A key property of sbrk/brk is that new whole pages that are supplied 5861 // by the operating system *do* get initialized to zero. 5862 // 5863 // As for the portability of all this: 5864 // 5865 // sbrk and brk are not POSIX. However, any system that is a derivative 5866 // of *nix has sbrk and brk because there are too many softwares (such as 5867 // the Bourne shell) which rely on the traditional memory map (.text, 5868 // .data+.bss, stack) and the existence of sbrk/brk. 5869 // 5870 // So we should arguably observe all this. However: 5871 // - The current inaccuracy has caused maybe one complaint in seven years(?) 5872 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I 5873 // doubt most programmers know the above information. 5874 // So I'm not terribly unhappy with marking it as undefined. --njn. 5875 // 5876 // [More: I think most of what John said only applies to sbrk(). It seems 5877 // that brk() always deals in whole pages. And since this event deals 5878 // directly with brk(), not with sbrk(), perhaps it would be reasonable to 5879 // just mark all memory it allocates as defined.] 5880 // 5881 VG_(track_new_mem_brk) ( make_mem_undefined_w_tid ); 5882 5883 // Handling of mmap and mprotect isn't simple (well, it is simple, 5884 // but the justification isn't.) See comments above, just prior to 5885 // mc_new_mem_mmap. 5886 VG_(track_new_mem_mmap) ( mc_new_mem_mmap ); 5887 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect ); 5888 5889 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) ); 5890 5891 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) ); 5892 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) ); 5893 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) ); 5894 5895 /* Defer the specification of the new_mem_stack functions to the 5896 post_clo_init function, since we need to first parse the command 5897 line before deciding which set to use. */ 5898 5899 # ifdef PERF_FAST_STACK 5900 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 ); 5901 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 ); 5902 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 ); 5903 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 ); 5904 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 ); 5905 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 ); 5906 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 ); 5907 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 ); 5908 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 ); 5909 # endif 5910 VG_(track_die_mem_stack) ( mc_die_mem_stack ); 5911 5912 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) ); 5913 5914 VG_(track_pre_mem_read) ( check_mem_is_defined ); 5915 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz ); 5916 VG_(track_pre_mem_write) ( check_mem_is_addressable ); 5917 VG_(track_post_mem_write) ( mc_post_mem_write ); 5918 5919 if (MC_(clo_mc_level) >= 2) 5920 VG_(track_pre_reg_read) ( mc_pre_reg_read ); 5921 5922 VG_(track_post_reg_write) ( mc_post_reg_write ); 5923 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall ); 5924 5925 init_shadow_memory(); 5926 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" ); 5927 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" ); 5928 init_prof_mem(); 5929 5930 tl_assert( mc_expensive_sanity_check() ); 5931 5932 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true. 5933 tl_assert(sizeof(UWord) == sizeof(Addr)); 5934 // Call me paranoid. I don't care. 5935 tl_assert(sizeof(void*) == sizeof(Addr)); 5936 5937 // BYTES_PER_SEC_VBIT_NODE must be a power of two. 5938 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE)); 5939 5940 /* This is small. Always initialise it. */ 5941 init_nia_to_ecu_cache(); 5942 5943 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know 5944 if we need to, since the command line args haven't been 5945 processed yet. Hence defer it to mc_post_clo_init. */ 5946 tl_assert(ocacheL1 == NULL); 5947 tl_assert(ocacheL2 == NULL); 5948 5949 /* Check some important stuff. See extensive comments above 5950 re UNALIGNED_OR_HIGH for background. */ 5951 # if VG_WORDSIZE == 4 5952 tl_assert(sizeof(void*) == 4); 5953 tl_assert(sizeof(Addr) == 4); 5954 tl_assert(sizeof(UWord) == 4); 5955 tl_assert(sizeof(Word) == 4); 5956 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL); 5957 tl_assert(MASK(1) == 0UL); 5958 tl_assert(MASK(2) == 1UL); 5959 tl_assert(MASK(4) == 3UL); 5960 tl_assert(MASK(8) == 7UL); 5961 # else 5962 tl_assert(VG_WORDSIZE == 8); 5963 tl_assert(sizeof(void*) == 8); 5964 tl_assert(sizeof(Addr) == 8); 5965 tl_assert(sizeof(UWord) == 8); 5966 tl_assert(sizeof(Word) == 8); 5967 tl_assert(MAX_PRIMARY_ADDRESS == 0x3FFFFFFFFFULL); 5968 tl_assert(MASK(1) == 0xFFFFFFC000000000ULL); 5969 tl_assert(MASK(2) == 0xFFFFFFC000000001ULL); 5970 tl_assert(MASK(4) == 0xFFFFFFC000000003ULL); 5971 tl_assert(MASK(8) == 0xFFFFFFC000000007ULL); 5972 # endif 5973 } 5974 5975 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init) 5976 5977 /*--------------------------------------------------------------------*/ 5978 /*--- end mc_main.c ---*/ 5979 /*--------------------------------------------------------------------*/ 5980