1 2 /*--------------------------------------------------------------------*/ 3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/ 4 /*--- accessibility (A) and validity (V) status of each byte. ---*/ 5 /*--- mc_main.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of MemCheck, a heavyweight Valgrind tool for 10 detecting memory errors. 11 12 Copyright (C) 2000-2012 Julian Seward 13 jseward (at) acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 #include "pub_tool_basics.h" 34 #include "pub_tool_aspacemgr.h" 35 #include "pub_tool_gdbserver.h" 36 #include "pub_tool_poolalloc.h" 37 #include "pub_tool_hashtable.h" // For mc_include.h 38 #include "pub_tool_libcbase.h" 39 #include "pub_tool_libcassert.h" 40 #include "pub_tool_libcprint.h" 41 #include "pub_tool_machine.h" 42 #include "pub_tool_mallocfree.h" 43 #include "pub_tool_options.h" 44 #include "pub_tool_oset.h" 45 #include "pub_tool_replacemalloc.h" 46 #include "pub_tool_tooliface.h" 47 #include "pub_tool_threadstate.h" 48 49 #include "mc_include.h" 50 #include "memcheck.h" /* for client requests */ 51 52 53 /* Set to 1 to do a little more sanity checking */ 54 #define VG_DEBUG_MEMORY 0 55 56 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args) 57 58 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */ 59 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */ 60 61 62 /*------------------------------------------------------------*/ 63 /*--- Fast-case knobs ---*/ 64 /*------------------------------------------------------------*/ 65 66 // Comment these out to disable the fast cases (don't just set them to zero). 67 68 #define PERF_FAST_LOADV 1 69 #define PERF_FAST_STOREV 1 70 71 #define PERF_FAST_SARP 1 72 73 #define PERF_FAST_STACK 1 74 #define PERF_FAST_STACK2 1 75 76 /* Change this to 1 to enable assertions on origin tracking cache fast 77 paths */ 78 #define OC_ENABLE_ASSERTIONS 0 79 80 81 /*------------------------------------------------------------*/ 82 /*--- Comments on the origin tracking implementation ---*/ 83 /*------------------------------------------------------------*/ 84 85 /* See detailed comment entitled 86 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 87 which is contained further on in this file. */ 88 89 90 /*------------------------------------------------------------*/ 91 /*--- V bits and A bits ---*/ 92 /*------------------------------------------------------------*/ 93 94 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck 95 thinks the corresponding value bit is defined. And every memory byte 96 has an A bit, which tracks whether Memcheck thinks the program can access 97 it safely (ie. it's mapped, and has at least one of the RWX permission bits 98 set). So every N-bit register is shadowed with N V bits, and every memory 99 byte is shadowed with 8 V bits and one A bit. 100 101 In the implementation, we use two forms of compression (compressed V bits 102 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead 103 for memory. 104 105 Memcheck also tracks extra information about each heap block that is 106 allocated, for detecting memory leaks and other purposes. 107 */ 108 109 /*------------------------------------------------------------*/ 110 /*--- Basic A/V bitmap representation. ---*/ 111 /*------------------------------------------------------------*/ 112 113 /* All reads and writes are checked against a memory map (a.k.a. shadow 114 memory), which records the state of all memory in the process. 115 116 On 32-bit machines the memory map is organised as follows. 117 The top 16 bits of an address are used to index into a top-level 118 map table, containing 65536 entries. Each entry is a pointer to a 119 second-level map, which records the accesibililty and validity 120 permissions for the 65536 bytes indexed by the lower 16 bits of the 121 address. Each byte is represented by two bits (details are below). So 122 each second-level map contains 16384 bytes. This two-level arrangement 123 conveniently divides the 4G address space into 64k lumps, each size 64k 124 bytes. 125 126 All entries in the primary (top-level) map must point to a valid 127 secondary (second-level) map. Since many of the 64kB chunks will 128 have the same status for every bit -- ie. noaccess (for unused 129 address space) or entirely addressable and defined (for code segments) -- 130 there are three distinguished secondary maps, which indicate 'noaccess', 131 'undefined' and 'defined'. For these uniform 64kB chunks, the primary 132 map entry points to the relevant distinguished map. In practice, 133 typically more than half of the addressable memory is represented with 134 the 'undefined' or 'defined' distinguished secondary map, so it gives a 135 good saving. It also lets us set the V+A bits of large address regions 136 quickly in set_address_range_perms(). 137 138 On 64-bit machines it's more complicated. If we followed the same basic 139 scheme we'd have a four-level table which would require too many memory 140 accesses. So instead the top-level map table has 2^19 entries (indexed 141 using bits 16..34 of the address); this covers the bottom 32GB. Any 142 accesses above 32GB are handled with a slow, sparse auxiliary table. 143 Valgrind's address space manager tries very hard to keep things below 144 this 32GB barrier so that performance doesn't suffer too much. 145 146 Note that this file has a lot of different functions for reading and 147 writing shadow memory. Only a couple are strictly necessary (eg. 148 get_vabits2 and set_vabits2), most are just specialised for specific 149 common cases to improve performance. 150 151 Aside: the V+A bits are less precise than they could be -- we have no way 152 of marking memory as read-only. It would be great if we could add an 153 extra state VA_BITSn_READONLY. But then we'd have 5 different states, 154 which requires 2.3 bits to hold, and there's no way to do that elegantly 155 -- we'd have to double up to 4 bits of metadata per byte, which doesn't 156 seem worth it. 157 */ 158 159 /* --------------- Basic configuration --------------- */ 160 161 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */ 162 163 #if VG_WORDSIZE == 4 164 165 /* cover the entire address space */ 166 # define N_PRIMARY_BITS 16 167 168 #else 169 170 /* Just handle the first 32G fast and the rest via auxiliary 171 primaries. If you change this, Memcheck will assert at startup. 172 See the definition of UNALIGNED_OR_HIGH for extensive comments. */ 173 # define N_PRIMARY_BITS 19 174 175 #endif 176 177 178 /* Do not change this. */ 179 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS) 180 181 /* Do not change this. */ 182 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1) 183 184 185 /* --------------- Secondary maps --------------- */ 186 187 // Each byte of memory conceptually has an A bit, which indicates its 188 // addressability, and 8 V bits, which indicates its definedness. 189 // 190 // But because very few bytes are partially defined, we can use a nice 191 // compression scheme to reduce the size of shadow memory. Each byte of 192 // memory has 2 bits which indicates its state (ie. V+A bits): 193 // 194 // 00: noaccess (unaddressable but treated as fully defined) 195 // 01: undefined (addressable and fully undefined) 196 // 10: defined (addressable and fully defined) 197 // 11: partdefined (addressable and partially defined) 198 // 199 // In the "partdefined" case, we use a secondary table to store the V bits. 200 // Each entry in the secondary-V-bits table maps a byte address to its 8 V 201 // bits. 202 // 203 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for 204 // four bytes (32 bits) of memory are in each chunk. Hence the name 205 // "vabits8". This lets us get the V+A bits for four bytes at a time 206 // easily (without having to do any shifting and/or masking), and that is a 207 // very common operation. (Note that although each vabits8 chunk 208 // is 8 bits in size, it represents 32 bits of memory.) 209 // 210 // The representation is "inverse" little-endian... each 4 bytes of 211 // memory is represented by a 1 byte value, where: 212 // 213 // - the status of byte (a+0) is held in bits [1..0] 214 // - the status of byte (a+1) is held in bits [3..2] 215 // - the status of byte (a+2) is held in bits [5..4] 216 // - the status of byte (a+3) is held in bits [7..6] 217 // 218 // It's "inverse" because endianness normally describes a mapping from 219 // value bits to memory addresses; in this case the mapping is inverted. 220 // Ie. instead of particular value bits being held in certain addresses, in 221 // this case certain addresses are represented by particular value bits. 222 // See insert_vabits2_into_vabits8() for an example. 223 // 224 // But note that we don't compress the V bits stored in registers; they 225 // need to be explicit to made the shadow operations possible. Therefore 226 // when moving values between registers and memory we need to convert 227 // between the expanded in-register format and the compressed in-memory 228 // format. This isn't so difficult, it just requires careful attention in a 229 // few places. 230 231 // These represent eight bits of memory. 232 #define VA_BITS2_NOACCESS 0x0 // 00b 233 #define VA_BITS2_UNDEFINED 0x1 // 01b 234 #define VA_BITS2_DEFINED 0x2 // 10b 235 #define VA_BITS2_PARTDEFINED 0x3 // 11b 236 237 // These represent 16 bits of memory. 238 #define VA_BITS4_NOACCESS 0x0 // 00_00b 239 #define VA_BITS4_UNDEFINED 0x5 // 01_01b 240 #define VA_BITS4_DEFINED 0xa // 10_10b 241 242 // These represent 32 bits of memory. 243 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b 244 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b 245 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b 246 247 // These represent 64 bits of memory. 248 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2 249 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2 250 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2 251 252 253 #define SM_CHUNKS 16384 254 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2) 255 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3) 256 257 // Paranoia: it's critical for performance that the requested inlining 258 // occurs. So try extra hard. 259 #define INLINE inline __attribute__((always_inline)) 260 261 static INLINE Addr start_of_this_sm ( Addr a ) { 262 return (a & (~SM_MASK)); 263 } 264 static INLINE Bool is_start_of_sm ( Addr a ) { 265 return (start_of_this_sm(a) == a); 266 } 267 268 typedef 269 struct { 270 UChar vabits8[SM_CHUNKS]; 271 } 272 SecMap; 273 274 // 3 distinguished secondary maps, one for no-access, one for 275 // accessible but undefined, and one for accessible and defined. 276 // Distinguished secondaries may never be modified. 277 #define SM_DIST_NOACCESS 0 278 #define SM_DIST_UNDEFINED 1 279 #define SM_DIST_DEFINED 2 280 281 static SecMap sm_distinguished[3]; 282 283 static INLINE Bool is_distinguished_sm ( SecMap* sm ) { 284 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2]; 285 } 286 287 // Forward declaration 288 static void update_SM_counts(SecMap* oldSM, SecMap* newSM); 289 290 /* dist_sm points to one of our three distinguished secondaries. Make 291 a copy of it so that we can write to it. 292 */ 293 static SecMap* copy_for_writing ( SecMap* dist_sm ) 294 { 295 SecMap* new_sm; 296 tl_assert(dist_sm == &sm_distinguished[0] 297 || dist_sm == &sm_distinguished[1] 298 || dist_sm == &sm_distinguished[2]); 299 300 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap)); 301 if (new_sm == NULL) 302 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap", 303 sizeof(SecMap) ); 304 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap)); 305 update_SM_counts(dist_sm, new_sm); 306 return new_sm; 307 } 308 309 /* --------------- Stats --------------- */ 310 311 static Int n_issued_SMs = 0; 312 static Int n_deissued_SMs = 0; 313 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs 314 static Int n_undefined_SMs = 0; 315 static Int n_defined_SMs = 0; 316 static Int n_non_DSM_SMs = 0; 317 static Int max_noaccess_SMs = 0; 318 static Int max_undefined_SMs = 0; 319 static Int max_defined_SMs = 0; 320 static Int max_non_DSM_SMs = 0; 321 322 /* # searches initiated in auxmap_L1, and # base cmps required */ 323 static ULong n_auxmap_L1_searches = 0; 324 static ULong n_auxmap_L1_cmps = 0; 325 /* # of searches that missed in auxmap_L1 and therefore had to 326 be handed to auxmap_L2. And the number of nodes inserted. */ 327 static ULong n_auxmap_L2_searches = 0; 328 static ULong n_auxmap_L2_nodes = 0; 329 330 static Int n_sanity_cheap = 0; 331 static Int n_sanity_expensive = 0; 332 333 static Int n_secVBit_nodes = 0; 334 static Int max_secVBit_nodes = 0; 335 336 static void update_SM_counts(SecMap* oldSM, SecMap* newSM) 337 { 338 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --; 339 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--; 340 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --; 341 else { n_non_DSM_SMs --; 342 n_deissued_SMs ++; } 343 344 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++; 345 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++; 346 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++; 347 else { n_non_DSM_SMs ++; 348 n_issued_SMs ++; } 349 350 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs; 351 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs; 352 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs; 353 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs; 354 } 355 356 /* --------------- Primary maps --------------- */ 357 358 /* The main primary map. This covers some initial part of the address 359 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is 360 handled using the auxiliary primary map. 361 */ 362 static SecMap* primary_map[N_PRIMARY_MAP]; 363 364 365 /* An entry in the auxiliary primary map. base must be a 64k-aligned 366 value, and sm points at the relevant secondary map. As with the 367 main primary map, the secondary may be either a real secondary, or 368 one of the three distinguished secondaries. DO NOT CHANGE THIS 369 LAYOUT: the first word has to be the key for OSet fast lookups. 370 */ 371 typedef 372 struct { 373 Addr base; 374 SecMap* sm; 375 } 376 AuxMapEnt; 377 378 /* Tunable parameter: How big is the L1 queue? */ 379 #define N_AUXMAP_L1 24 380 381 /* Tunable parameter: How far along the L1 queue to insert 382 entries resulting from L2 lookups? */ 383 #define AUXMAP_L1_INSERT_IX 12 384 385 static struct { 386 Addr base; 387 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node 388 } 389 auxmap_L1[N_AUXMAP_L1]; 390 391 static OSet* auxmap_L2 = NULL; 392 393 static void init_auxmap_L1_L2 ( void ) 394 { 395 Int i; 396 for (i = 0; i < N_AUXMAP_L1; i++) { 397 auxmap_L1[i].base = 0; 398 auxmap_L1[i].ent = NULL; 399 } 400 401 tl_assert(0 == offsetof(AuxMapEnt,base)); 402 tl_assert(sizeof(Addr) == sizeof(void*)); 403 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base), 404 /*fastCmp*/ NULL, 405 VG_(malloc), "mc.iaLL.1", VG_(free) ); 406 } 407 408 /* Check representation invariants; if OK return NULL; else a 409 descriptive bit of text. Also return the number of 410 non-distinguished secondary maps referred to from the auxiliary 411 primary maps. */ 412 413 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found ) 414 { 415 Word i, j; 416 /* On a 32-bit platform, the L2 and L1 tables should 417 both remain empty forever. 418 419 On a 64-bit platform: 420 In the L2 table: 421 all .base & 0xFFFF == 0 422 all .base > MAX_PRIMARY_ADDRESS 423 In the L1 table: 424 all .base & 0xFFFF == 0 425 all (.base > MAX_PRIMARY_ADDRESS 426 .base & 0xFFFF == 0 427 and .ent points to an AuxMapEnt with the same .base) 428 or 429 (.base == 0 and .ent == NULL) 430 */ 431 *n_secmaps_found = 0; 432 if (sizeof(void*) == 4) { 433 /* 32-bit platform */ 434 if (VG_(OSetGen_Size)(auxmap_L2) != 0) 435 return "32-bit: auxmap_L2 is non-empty"; 436 for (i = 0; i < N_AUXMAP_L1; i++) 437 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL) 438 return "32-bit: auxmap_L1 is non-empty"; 439 } else { 440 /* 64-bit platform */ 441 UWord elems_seen = 0; 442 AuxMapEnt *elem, *res; 443 AuxMapEnt key; 444 /* L2 table */ 445 VG_(OSetGen_ResetIter)(auxmap_L2); 446 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) { 447 elems_seen++; 448 if (0 != (elem->base & (Addr)0xFFFF)) 449 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2"; 450 if (elem->base <= MAX_PRIMARY_ADDRESS) 451 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2"; 452 if (elem->sm == NULL) 453 return "64-bit: .sm in _L2 is NULL"; 454 if (!is_distinguished_sm(elem->sm)) 455 (*n_secmaps_found)++; 456 } 457 if (elems_seen != n_auxmap_L2_nodes) 458 return "64-bit: disagreement on number of elems in _L2"; 459 /* Check L1-L2 correspondence */ 460 for (i = 0; i < N_AUXMAP_L1; i++) { 461 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL) 462 continue; 463 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF)) 464 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1"; 465 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS) 466 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1"; 467 if (auxmap_L1[i].ent == NULL) 468 return "64-bit: .ent is NULL in auxmap_L1"; 469 if (auxmap_L1[i].ent->base != auxmap_L1[i].base) 470 return "64-bit: _L1 and _L2 bases are inconsistent"; 471 /* Look it up in auxmap_L2. */ 472 key.base = auxmap_L1[i].base; 473 key.sm = 0; 474 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 475 if (res == NULL) 476 return "64-bit: _L1 .base not found in _L2"; 477 if (res != auxmap_L1[i].ent) 478 return "64-bit: _L1 .ent disagrees with _L2 entry"; 479 } 480 /* Check L1 contains no duplicates */ 481 for (i = 0; i < N_AUXMAP_L1; i++) { 482 if (auxmap_L1[i].base == 0) 483 continue; 484 for (j = i+1; j < N_AUXMAP_L1; j++) { 485 if (auxmap_L1[j].base == 0) 486 continue; 487 if (auxmap_L1[j].base == auxmap_L1[i].base) 488 return "64-bit: duplicate _L1 .base entries"; 489 } 490 } 491 } 492 return NULL; /* ok */ 493 } 494 495 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent ) 496 { 497 Word i; 498 tl_assert(ent); 499 tl_assert(rank >= 0 && rank < N_AUXMAP_L1); 500 for (i = N_AUXMAP_L1-1; i > rank; i--) 501 auxmap_L1[i] = auxmap_L1[i-1]; 502 auxmap_L1[rank].base = ent->base; 503 auxmap_L1[rank].ent = ent; 504 } 505 506 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a ) 507 { 508 AuxMapEnt key; 509 AuxMapEnt* res; 510 Word i; 511 512 tl_assert(a > MAX_PRIMARY_ADDRESS); 513 a &= ~(Addr)0xFFFF; 514 515 /* First search the front-cache, which is a self-organising 516 list containing the most popular entries. */ 517 518 if (LIKELY(auxmap_L1[0].base == a)) 519 return auxmap_L1[0].ent; 520 if (LIKELY(auxmap_L1[1].base == a)) { 521 Addr t_base = auxmap_L1[0].base; 522 AuxMapEnt* t_ent = auxmap_L1[0].ent; 523 auxmap_L1[0].base = auxmap_L1[1].base; 524 auxmap_L1[0].ent = auxmap_L1[1].ent; 525 auxmap_L1[1].base = t_base; 526 auxmap_L1[1].ent = t_ent; 527 return auxmap_L1[0].ent; 528 } 529 530 n_auxmap_L1_searches++; 531 532 for (i = 0; i < N_AUXMAP_L1; i++) { 533 if (auxmap_L1[i].base == a) { 534 break; 535 } 536 } 537 tl_assert(i >= 0 && i <= N_AUXMAP_L1); 538 539 n_auxmap_L1_cmps += (ULong)(i+1); 540 541 if (i < N_AUXMAP_L1) { 542 if (i > 0) { 543 Addr t_base = auxmap_L1[i-1].base; 544 AuxMapEnt* t_ent = auxmap_L1[i-1].ent; 545 auxmap_L1[i-1].base = auxmap_L1[i-0].base; 546 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent; 547 auxmap_L1[i-0].base = t_base; 548 auxmap_L1[i-0].ent = t_ent; 549 i--; 550 } 551 return auxmap_L1[i].ent; 552 } 553 554 n_auxmap_L2_searches++; 555 556 /* First see if we already have it. */ 557 key.base = a; 558 key.sm = 0; 559 560 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 561 if (res) 562 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res ); 563 return res; 564 } 565 566 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a ) 567 { 568 AuxMapEnt *nyu, *res; 569 570 /* First see if we already have it. */ 571 res = maybe_find_in_auxmap( a ); 572 if (LIKELY(res)) 573 return res; 574 575 /* Ok, there's no entry in the secondary map, so we'll have 576 to allocate one. */ 577 a &= ~(Addr)0xFFFF; 578 579 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) ); 580 tl_assert(nyu); 581 nyu->base = a; 582 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS]; 583 VG_(OSetGen_Insert)( auxmap_L2, nyu ); 584 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu ); 585 n_auxmap_L2_nodes++; 586 return nyu; 587 } 588 589 /* --------------- SecMap fundamentals --------------- */ 590 591 // In all these, 'low' means it's definitely in the main primary map, 592 // 'high' means it's definitely in the auxiliary table. 593 594 static INLINE SecMap** get_secmap_low_ptr ( Addr a ) 595 { 596 UWord pm_off = a >> 16; 597 # if VG_DEBUG_MEMORY >= 1 598 tl_assert(pm_off < N_PRIMARY_MAP); 599 # endif 600 return &primary_map[ pm_off ]; 601 } 602 603 static INLINE SecMap** get_secmap_high_ptr ( Addr a ) 604 { 605 AuxMapEnt* am = find_or_alloc_in_auxmap(a); 606 return &am->sm; 607 } 608 609 static SecMap** get_secmap_ptr ( Addr a ) 610 { 611 return ( a <= MAX_PRIMARY_ADDRESS 612 ? get_secmap_low_ptr(a) 613 : get_secmap_high_ptr(a)); 614 } 615 616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a ) 617 { 618 return *get_secmap_low_ptr(a); 619 } 620 621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a ) 622 { 623 return *get_secmap_high_ptr(a); 624 } 625 626 static INLINE SecMap* get_secmap_for_writing_low(Addr a) 627 { 628 SecMap** p = get_secmap_low_ptr(a); 629 if (UNLIKELY(is_distinguished_sm(*p))) 630 *p = copy_for_writing(*p); 631 return *p; 632 } 633 634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a ) 635 { 636 SecMap** p = get_secmap_high_ptr(a); 637 if (UNLIKELY(is_distinguished_sm(*p))) 638 *p = copy_for_writing(*p); 639 return *p; 640 } 641 642 /* Produce the secmap for 'a', either from the primary map or by 643 ensuring there is an entry for it in the aux primary map. The 644 secmap may be a distinguished one as the caller will only want to 645 be able to read it. 646 */ 647 static INLINE SecMap* get_secmap_for_reading ( Addr a ) 648 { 649 return ( a <= MAX_PRIMARY_ADDRESS 650 ? get_secmap_for_reading_low (a) 651 : get_secmap_for_reading_high(a) ); 652 } 653 654 /* Produce the secmap for 'a', either from the primary map or by 655 ensuring there is an entry for it in the aux primary map. The 656 secmap may not be a distinguished one, since the caller will want 657 to be able to write it. If it is a distinguished secondary, make a 658 writable copy of it, install it, and return the copy instead. (COW 659 semantics). 660 */ 661 static SecMap* get_secmap_for_writing ( Addr a ) 662 { 663 return ( a <= MAX_PRIMARY_ADDRESS 664 ? get_secmap_for_writing_low (a) 665 : get_secmap_for_writing_high(a) ); 666 } 667 668 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't 669 allocate one if one doesn't already exist. This is used by the 670 leak checker. 671 */ 672 static SecMap* maybe_get_secmap_for ( Addr a ) 673 { 674 if (a <= MAX_PRIMARY_ADDRESS) { 675 return get_secmap_for_reading_low(a); 676 } else { 677 AuxMapEnt* am = maybe_find_in_auxmap(a); 678 return am ? am->sm : NULL; 679 } 680 } 681 682 /* --------------- Fundamental functions --------------- */ 683 684 static INLINE 685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 ) 686 { 687 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 688 *vabits8 &= ~(0x3 << shift); // mask out the two old bits 689 *vabits8 |= (vabits2 << shift); // mask in the two new bits 690 } 691 692 static INLINE 693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 ) 694 { 695 UInt shift; 696 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 697 shift = (a & 2) << 1; // shift by 0 or 4 698 *vabits8 &= ~(0xf << shift); // mask out the four old bits 699 *vabits8 |= (vabits4 << shift); // mask in the four new bits 700 } 701 702 static INLINE 703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 ) 704 { 705 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 706 vabits8 >>= shift; // shift the two bits to the bottom 707 return 0x3 & vabits8; // mask out the rest 708 } 709 710 static INLINE 711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 ) 712 { 713 UInt shift; 714 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 715 shift = (a & 2) << 1; // shift by 0 or 4 716 vabits8 >>= shift; // shift the four bits to the bottom 717 return 0xf & vabits8; // mask out the rest 718 } 719 720 // Note that these four are only used in slow cases. The fast cases do 721 // clever things like combine the auxmap check (in 722 // get_secmap_{read,writ}able) with alignment checks. 723 724 // *** WARNING! *** 725 // Any time this function is called, if it is possible that vabits2 726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the 727 // sec-V-bits table must also be set! 728 static INLINE 729 void set_vabits2 ( Addr a, UChar vabits2 ) 730 { 731 SecMap* sm = get_secmap_for_writing(a); 732 UWord sm_off = SM_OFF(a); 733 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 734 } 735 736 static INLINE 737 UChar get_vabits2 ( Addr a ) 738 { 739 SecMap* sm = get_secmap_for_reading(a); 740 UWord sm_off = SM_OFF(a); 741 UChar vabits8 = sm->vabits8[sm_off]; 742 return extract_vabits2_from_vabits8(a, vabits8); 743 } 744 745 // *** WARNING! *** 746 // Any time this function is called, if it is possible that any of the 747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the 748 // corresponding entry(s) in the sec-V-bits table must also be set! 749 static INLINE 750 UChar get_vabits8_for_aligned_word32 ( Addr a ) 751 { 752 SecMap* sm = get_secmap_for_reading(a); 753 UWord sm_off = SM_OFF(a); 754 UChar vabits8 = sm->vabits8[sm_off]; 755 return vabits8; 756 } 757 758 static INLINE 759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 ) 760 { 761 SecMap* sm = get_secmap_for_writing(a); 762 UWord sm_off = SM_OFF(a); 763 sm->vabits8[sm_off] = vabits8; 764 } 765 766 767 // Forward declarations 768 static UWord get_sec_vbits8(Addr a); 769 static void set_sec_vbits8(Addr a, UWord vbits8); 770 771 // Returns False if there was an addressability error. 772 static INLINE 773 Bool set_vbits8 ( Addr a, UChar vbits8 ) 774 { 775 Bool ok = True; 776 UChar vabits2 = get_vabits2(a); 777 if ( VA_BITS2_NOACCESS != vabits2 ) { 778 // Addressable. Convert in-register format to in-memory format. 779 // Also remove any existing sec V bit entry for the byte if no 780 // longer necessary. 781 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; } 782 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; } 783 else { vabits2 = VA_BITS2_PARTDEFINED; 784 set_sec_vbits8(a, vbits8); } 785 set_vabits2(a, vabits2); 786 787 } else { 788 // Unaddressable! Do nothing -- when writing to unaddressable 789 // memory it acts as a black hole, and the V bits can never be seen 790 // again. So we don't have to write them at all. 791 ok = False; 792 } 793 return ok; 794 } 795 796 // Returns False if there was an addressability error. In that case, we put 797 // all defined bits into vbits8. 798 static INLINE 799 Bool get_vbits8 ( Addr a, UChar* vbits8 ) 800 { 801 Bool ok = True; 802 UChar vabits2 = get_vabits2(a); 803 804 // Convert the in-memory format to in-register format. 805 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; } 806 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; } 807 else if ( VA_BITS2_NOACCESS == vabits2 ) { 808 *vbits8 = V_BITS8_DEFINED; // Make V bits defined! 809 ok = False; 810 } else { 811 tl_assert( VA_BITS2_PARTDEFINED == vabits2 ); 812 *vbits8 = get_sec_vbits8(a); 813 } 814 return ok; 815 } 816 817 818 /* --------------- Secondary V bit table ------------ */ 819 820 // This table holds the full V bit pattern for partially-defined bytes 821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow 822 // memory. 823 // 824 // Note: the nodes in this table can become stale. Eg. if you write a PDB, 825 // then overwrite the same address with a fully defined byte, the sec-V-bit 826 // node will not necessarily be removed. This is because checking for 827 // whether removal is necessary would slow down the fast paths. 828 // 829 // To avoid the stale nodes building up too much, we periodically (once the 830 // table reaches a certain size) garbage collect (GC) the table by 831 // traversing it and evicting any nodes not having PDB. 832 // If more than a certain proportion of nodes survived, we increase the 833 // table size so that GCs occur less often. 834 // 835 // This policy is designed to avoid bad table bloat in the worst case where 836 // a program creates huge numbers of stale PDBs -- we would get this bloat 837 // if we had no GC -- while handling well the case where a node becomes 838 // stale but shortly afterwards is rewritten with a PDB and so becomes 839 // non-stale again (which happens quite often, eg. in perf/bz2). If we just 840 // remove all stale nodes as soon as possible, we just end up re-adding a 841 // lot of them in later again. The "sufficiently stale" approach avoids 842 // this. (If a program has many live PDBs, performance will just suck, 843 // there's no way around that.) 844 // 845 // Further comments, JRS 14 Feb 2012. It turns out that the policy of 846 // holding on to stale entries for 2 GCs before discarding them can lead 847 // to massive space leaks. So we're changing to an arrangement where 848 // lines are evicted as soon as they are observed to be stale during a 849 // GC. This also has a side benefit of allowing the sufficiently_stale 850 // field to be removed from the SecVBitNode struct, reducing its size by 851 // 8 bytes, which is a substantial space saving considering that the 852 // struct was previously 32 or so bytes, on a 64 bit target. 853 // 854 // In order to try and mitigate the problem that the "sufficiently stale" 855 // heuristic was designed to avoid, the table size is allowed to drift 856 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This 857 // means that nodes will exist in the table longer on average, and hopefully 858 // will be deleted and re-added less frequently. 859 // 860 // The previous scaling up mechanism (now called STEPUP) is retained: 861 // if residency exceeds 50%, the table is scaled up, although by a 862 // factor sqrt(2) rather than 2 as before. This effectively doubles the 863 // frequency of GCs when there are many PDBs at reduces the tendency of 864 // stale PDBs to reside for long periods in the table. 865 866 static OSet* secVBitTable; 867 868 // Stats 869 static ULong sec_vbits_new_nodes = 0; 870 static ULong sec_vbits_updates = 0; 871 872 // This must be a power of two; this is checked in mc_pre_clo_init(). 873 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover 874 // a larger address range) they take more space but we can get multiple 875 // partially-defined bytes in one if they are close to each other, reducing 876 // the number of total nodes. In practice sometimes they are clustered (eg. 877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous 878 // row), but often not. So we choose something intermediate. 879 #define BYTES_PER_SEC_VBIT_NODE 16 880 881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if 882 // more than this many nodes survive a GC. 883 #define STEPUP_SURVIVOR_PROPORTION 0.5 884 #define STEPUP_GROWTH_FACTOR 1.414213562 885 886 // If the above heuristic doesn't apply, then we may make the table 887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than 888 // this many nodes survive a GC, _and_ the total table size does 889 // not exceed a fixed limit. The numbers are somewhat arbitrary, but 890 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5% 891 // effectively although gradually reduces residency and increases time 892 // between GCs for programs with small numbers of PDBs. The 80000 limit 893 // effectively limits the table size to around 2MB for programs with 894 // small numbers of PDBs, whilst giving a reasonably long lifetime to 895 // entries, to try and reduce the costs resulting from deleting and 896 // re-adding of entries. 897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15 898 #define DRIFTUP_GROWTH_FACTOR 1.015 899 #define DRIFTUP_MAX_SIZE 80000 900 901 // We GC the table when it gets this many nodes in it, ie. it's effectively 902 // the table size. It can change. 903 static Int secVBitLimit = 1000; 904 905 // The number of GCs done, used to age sec-V-bit nodes for eviction. 906 // Because it's unsigned, wrapping doesn't matter -- the right answer will 907 // come out anyway. 908 static UInt GCs_done = 0; 909 910 typedef 911 struct { 912 Addr a; 913 UChar vbits8[BYTES_PER_SEC_VBIT_NODE]; 914 } 915 SecVBitNode; 916 917 static OSet* createSecVBitTable(void) 918 { 919 OSet* newSecVBitTable; 920 newSecVBitTable = VG_(OSetGen_Create_With_Pool) 921 ( offsetof(SecVBitNode, a), 922 NULL, // use fast comparisons 923 VG_(malloc), "mc.cSVT.1 (sec VBit table)", 924 VG_(free), 925 1000, 926 sizeof(SecVBitNode)); 927 return newSecVBitTable; 928 } 929 930 static void gcSecVBitTable(void) 931 { 932 OSet* secVBitTable2; 933 SecVBitNode* n; 934 Int i, n_nodes = 0, n_survivors = 0; 935 936 GCs_done++; 937 938 // Create the new table. 939 secVBitTable2 = createSecVBitTable(); 940 941 // Traverse the table, moving fresh nodes into the new table. 942 VG_(OSetGen_ResetIter)(secVBitTable); 943 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) { 944 // Keep node if any of its bytes are non-stale. Using 945 // get_vabits2() for the lookup is not very efficient, but I don't 946 // think it matters. 947 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 948 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) { 949 // Found a non-stale byte, so keep => 950 // Insert a copy of the node into the new table. 951 SecVBitNode* n2 = 952 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode)); 953 *n2 = *n; 954 VG_(OSetGen_Insert)(secVBitTable2, n2); 955 break; 956 } 957 } 958 } 959 960 // Get the before and after sizes. 961 n_nodes = VG_(OSetGen_Size)(secVBitTable); 962 n_survivors = VG_(OSetGen_Size)(secVBitTable2); 963 964 // Destroy the old table, and put the new one in its place. 965 VG_(OSetGen_Destroy)(secVBitTable); 966 secVBitTable = secVBitTable2; 967 968 if (VG_(clo_verbosity) > 1) { 969 Char percbuf[7]; 970 VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf); 971 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n", 972 n_nodes, n_survivors, percbuf); 973 } 974 975 // Increase table size if necessary. 976 if ((Double)n_survivors 977 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) { 978 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR); 979 if (VG_(clo_verbosity) > 1) 980 VG_(message)(Vg_DebugMsg, 981 "memcheck GC: %d new table size (stepup)\n", 982 secVBitLimit); 983 } 984 else 985 if (secVBitLimit < DRIFTUP_MAX_SIZE 986 && (Double)n_survivors 987 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) { 988 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR); 989 if (VG_(clo_verbosity) > 1) 990 VG_(message)(Vg_DebugMsg, 991 "memcheck GC: %d new table size (driftup)\n", 992 secVBitLimit); 993 } 994 } 995 996 static UWord get_sec_vbits8(Addr a) 997 { 998 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 999 Int amod = a % BYTES_PER_SEC_VBIT_NODE; 1000 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 1001 UChar vbits8; 1002 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a); 1003 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 1004 // make it to the secondary V bits table. 1005 vbits8 = n->vbits8[amod]; 1006 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 1007 return vbits8; 1008 } 1009 1010 static void set_sec_vbits8(Addr a, UWord vbits8) 1011 { 1012 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 1013 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE; 1014 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 1015 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 1016 // make it to the secondary V bits table. 1017 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 1018 if (n) { 1019 n->vbits8[amod] = vbits8; // update 1020 sec_vbits_updates++; 1021 } else { 1022 // Do a table GC if necessary. Nb: do this before creating and 1023 // inserting the new node, to avoid erroneously GC'ing the new node. 1024 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) { 1025 gcSecVBitTable(); 1026 } 1027 1028 // New node: assign the specific byte, make the rest invalid (they 1029 // should never be read as-is, but be cautious). 1030 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode)); 1031 n->a = aAligned; 1032 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 1033 n->vbits8[i] = V_BITS8_UNDEFINED; 1034 } 1035 n->vbits8[amod] = vbits8; 1036 1037 // Insert the new node. 1038 VG_(OSetGen_Insert)(secVBitTable, n); 1039 sec_vbits_new_nodes++; 1040 1041 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable); 1042 if (n_secVBit_nodes > max_secVBit_nodes) 1043 max_secVBit_nodes = n_secVBit_nodes; 1044 } 1045 } 1046 1047 /* --------------- Endianness helpers --------------- */ 1048 1049 /* Returns the offset in memory of the byteno-th most significant byte 1050 in a wordszB-sized word, given the specified endianness. */ 1051 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian, 1052 UWord byteno ) { 1053 return bigendian ? (wordszB-1-byteno) : byteno; 1054 } 1055 1056 1057 /* --------------- Ignored address ranges --------------- */ 1058 1059 #define M_IGNORE_RANGES 4 1060 1061 typedef 1062 struct { 1063 Int used; 1064 Addr start[M_IGNORE_RANGES]; 1065 Addr end[M_IGNORE_RANGES]; 1066 } 1067 IgnoreRanges; 1068 1069 static IgnoreRanges ignoreRanges; 1070 1071 INLINE Bool MC_(in_ignored_range) ( Addr a ) 1072 { 1073 Int i; 1074 if (LIKELY(ignoreRanges.used == 0)) 1075 return False; 1076 for (i = 0; i < ignoreRanges.used; i++) { 1077 if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i]) 1078 return True; 1079 } 1080 return False; 1081 } 1082 1083 /* Parse two Addr separated by a dash, or fail. */ 1084 1085 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 ) 1086 { 1087 Bool ok = VG_(parse_Addr) (ppc, result1); 1088 if (!ok) 1089 return False; 1090 if (**ppc != '-') 1091 return False; 1092 (*ppc)++; 1093 ok = VG_(parse_Addr) (ppc, result2); 1094 if (!ok) 1095 return False; 1096 return True; 1097 } 1098 1099 /* Parse a set of ranges separated by commas into 'ignoreRanges', or 1100 fail. */ 1101 1102 static Bool parse_ignore_ranges ( UChar* str0 ) 1103 { 1104 Addr start, end; 1105 Bool ok; 1106 UChar* str = str0; 1107 UChar** ppc = &str; 1108 ignoreRanges.used = 0; 1109 while (1) { 1110 ok = parse_range(ppc, &start, &end); 1111 if (!ok) 1112 return False; 1113 if (ignoreRanges.used >= M_IGNORE_RANGES) 1114 return False; 1115 ignoreRanges.start[ignoreRanges.used] = start; 1116 ignoreRanges.end[ignoreRanges.used] = end; 1117 ignoreRanges.used++; 1118 if (**ppc == 0) 1119 return True; 1120 if (**ppc != ',') 1121 return False; 1122 (*ppc)++; 1123 } 1124 /*NOTREACHED*/ 1125 return False; 1126 } 1127 1128 1129 /* --------------- Load/store slow cases. --------------- */ 1130 1131 static 1132 __attribute__((noinline)) 1133 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian ) 1134 { 1135 PROF_EVENT(30, "mc_LOADVn_slow"); 1136 1137 /* ------------ BEGIN semi-fast cases ------------ */ 1138 /* These deal quickly-ish with the common auxiliary primary map 1139 cases on 64-bit platforms. Are merely a speedup hack; can be 1140 omitted without loss of correctness/functionality. Note that in 1141 both cases the "sizeof(void*) == 8" causes these cases to be 1142 folded out by compilers on 32-bit platforms. These are derived 1143 from LOADV64 and LOADV32. 1144 */ 1145 if (LIKELY(sizeof(void*) == 8 1146 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1147 SecMap* sm = get_secmap_for_reading(a); 1148 UWord sm_off16 = SM_OFF_16(a); 1149 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 1150 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) 1151 return V_BITS64_DEFINED; 1152 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) 1153 return V_BITS64_UNDEFINED; 1154 /* else fall into the slow case */ 1155 } 1156 if (LIKELY(sizeof(void*) == 8 1157 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1158 SecMap* sm = get_secmap_for_reading(a); 1159 UWord sm_off = SM_OFF(a); 1160 UWord vabits8 = sm->vabits8[sm_off]; 1161 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) 1162 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED); 1163 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) 1164 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED); 1165 /* else fall into slow case */ 1166 } 1167 /* ------------ END semi-fast cases ------------ */ 1168 1169 ULong vbits64 = V_BITS64_UNDEFINED; /* result */ 1170 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */ 1171 SSizeT szB = nBits / 8; 1172 SSizeT i; /* Must be signed. */ 1173 SizeT n_addrs_bad = 0; 1174 Addr ai; 1175 UChar vbits8; 1176 Bool ok; 1177 1178 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1179 1180 /* Make up a 64-bit result V word, which contains the loaded data 1181 for valid addresses and Defined for invalid addresses. Iterate 1182 over the bytes in the word, from the most significant down to 1183 the least. The vbits to return are calculated into vbits64. 1184 Also compute the pessimising value to be used when 1185 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant 1186 info can be gleaned from pessim64) but is used as a 1187 cross-check. */ 1188 for (i = szB-1; i >= 0; i--) { 1189 PROF_EVENT(31, "mc_LOADVn_slow(loop)"); 1190 ai = a + byte_offset_w(szB, bigendian, i); 1191 ok = get_vbits8(ai, &vbits8); 1192 vbits64 <<= 8; 1193 vbits64 |= vbits8; 1194 if (!ok) n_addrs_bad++; 1195 pessim64 <<= 8; 1196 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED); 1197 } 1198 1199 /* In the common case, all the addresses involved are valid, so we 1200 just return the computed V bits and have done. */ 1201 if (LIKELY(n_addrs_bad == 0)) 1202 return vbits64; 1203 1204 /* If there's no possibility of getting a partial-loads-ok 1205 exemption, report the error and quit. */ 1206 if (!MC_(clo_partial_loads_ok)) { 1207 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1208 return vbits64; 1209 } 1210 1211 /* The partial-loads-ok excemption might apply. Find out if it 1212 does. If so, don't report an addressing error, but do return 1213 Undefined for the bytes that are out of range, so as to avoid 1214 false negatives. If it doesn't apply, just report an addressing 1215 error in the usual way. */ 1216 1217 /* Some code steps along byte strings in aligned word-sized chunks 1218 even when there is only a partially defined word at the end (eg, 1219 optimised strlen). This is allowed by the memory model of 1220 modern machines, since an aligned load cannot span two pages and 1221 thus cannot "partially fault". Despite such behaviour being 1222 declared undefined by ANSI C/C++. 1223 1224 Therefore, a load from a partially-addressible place is allowed 1225 if all of the following hold: 1226 - the command-line flag is set [by default, it isn't] 1227 - it's a word-sized, word-aligned load 1228 - at least one of the addresses in the word *is* valid 1229 1230 Since this suppresses the addressing error, we avoid false 1231 negatives by marking bytes undefined when they come from an 1232 invalid address. 1233 */ 1234 1235 /* "at least one of the addresses is invalid" */ 1236 tl_assert(pessim64 != V_BITS64_DEFINED); 1237 1238 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a) 1239 && n_addrs_bad < VG_WORDSIZE) { 1240 /* Exemption applies. Use the previously computed pessimising 1241 value for vbits64 and return the combined result, but don't 1242 flag an addressing error. The pessimising value is Defined 1243 for valid addresses and Undefined for invalid addresses. */ 1244 /* for assumption that doing bitwise or implements UifU */ 1245 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0); 1246 /* (really need "UifU" here...) 1247 vbits64 UifU= pessim64 (is pessimised by it, iow) */ 1248 vbits64 |= pessim64; 1249 return vbits64; 1250 } 1251 1252 /* Exemption doesn't apply. Flag an addressing error in the normal 1253 way. */ 1254 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1255 1256 return vbits64; 1257 } 1258 1259 1260 static 1261 __attribute__((noinline)) 1262 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian ) 1263 { 1264 SizeT szB = nBits / 8; 1265 SizeT i, n_addrs_bad = 0; 1266 UChar vbits8; 1267 Addr ai; 1268 Bool ok; 1269 1270 PROF_EVENT(35, "mc_STOREVn_slow"); 1271 1272 /* ------------ BEGIN semi-fast cases ------------ */ 1273 /* These deal quickly-ish with the common auxiliary primary map 1274 cases on 64-bit platforms. Are merely a speedup hack; can be 1275 omitted without loss of correctness/functionality. Note that in 1276 both cases the "sizeof(void*) == 8" causes these cases to be 1277 folded out by compilers on 32-bit platforms. These are derived 1278 from STOREV64 and STOREV32. 1279 */ 1280 if (LIKELY(sizeof(void*) == 8 1281 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1282 SecMap* sm = get_secmap_for_reading(a); 1283 UWord sm_off16 = SM_OFF_16(a); 1284 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 1285 if (LIKELY( !is_distinguished_sm(sm) && 1286 (VA_BITS16_DEFINED == vabits16 || 1287 VA_BITS16_UNDEFINED == vabits16) )) { 1288 /* Handle common case quickly: a is suitably aligned, */ 1289 /* is mapped, and is addressible. */ 1290 // Convert full V-bits in register to compact 2-bit form. 1291 if (LIKELY(V_BITS64_DEFINED == vbytes)) { 1292 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED; 1293 return; 1294 } else if (V_BITS64_UNDEFINED == vbytes) { 1295 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED; 1296 return; 1297 } 1298 /* else fall into the slow case */ 1299 } 1300 /* else fall into the slow case */ 1301 } 1302 if (LIKELY(sizeof(void*) == 8 1303 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1304 SecMap* sm = get_secmap_for_reading(a); 1305 UWord sm_off = SM_OFF(a); 1306 UWord vabits8 = sm->vabits8[sm_off]; 1307 if (LIKELY( !is_distinguished_sm(sm) && 1308 (VA_BITS8_DEFINED == vabits8 || 1309 VA_BITS8_UNDEFINED == vabits8) )) { 1310 /* Handle common case quickly: a is suitably aligned, */ 1311 /* is mapped, and is addressible. */ 1312 // Convert full V-bits in register to compact 2-bit form. 1313 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) { 1314 sm->vabits8[sm_off] = VA_BITS8_DEFINED; 1315 return; 1316 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) { 1317 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 1318 return; 1319 } 1320 /* else fall into the slow case */ 1321 } 1322 /* else fall into the slow case */ 1323 } 1324 /* ------------ END semi-fast cases ------------ */ 1325 1326 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1327 1328 /* Dump vbytes in memory, iterating from least to most significant 1329 byte. At the same time establish addressibility of the location. */ 1330 for (i = 0; i < szB; i++) { 1331 PROF_EVENT(36, "mc_STOREVn_slow(loop)"); 1332 ai = a + byte_offset_w(szB, bigendian, i); 1333 vbits8 = vbytes & 0xff; 1334 ok = set_vbits8(ai, vbits8); 1335 if (!ok) n_addrs_bad++; 1336 vbytes >>= 8; 1337 } 1338 1339 /* If an address error has happened, report it. */ 1340 if (n_addrs_bad > 0) 1341 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True ); 1342 } 1343 1344 1345 /*------------------------------------------------------------*/ 1346 /*--- Setting permissions over address ranges. ---*/ 1347 /*------------------------------------------------------------*/ 1348 1349 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16, 1350 UWord dsm_num ) 1351 { 1352 UWord sm_off, sm_off16; 1353 UWord vabits2 = vabits16 & 0x3; 1354 SizeT lenA, lenB, len_to_next_secmap; 1355 Addr aNext; 1356 SecMap* sm; 1357 SecMap** sm_ptr; 1358 SecMap* example_dsm; 1359 1360 PROF_EVENT(150, "set_address_range_perms"); 1361 1362 /* Check the V+A bits make sense. */ 1363 tl_assert(VA_BITS16_NOACCESS == vabits16 || 1364 VA_BITS16_UNDEFINED == vabits16 || 1365 VA_BITS16_DEFINED == vabits16); 1366 1367 // This code should never write PDBs; ensure this. (See comment above 1368 // set_vabits2().) 1369 tl_assert(VA_BITS2_PARTDEFINED != vabits2); 1370 1371 if (lenT == 0) 1372 return; 1373 1374 if (lenT > 256 * 1024 * 1024) { 1375 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) { 1376 Char* s = "unknown???"; 1377 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess"; 1378 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined"; 1379 if (vabits16 == VA_BITS16_DEFINED ) s = "defined"; 1380 VG_(message)(Vg_UserMsg, "Warning: set address range perms: " 1381 "large range [0x%lx, 0x%lx) (%s)\n", 1382 a, a + lenT, s); 1383 } 1384 } 1385 1386 #ifndef PERF_FAST_SARP 1387 /*------------------ debug-only case ------------------ */ 1388 { 1389 // Endianness doesn't matter here because all bytes are being set to 1390 // the same value. 1391 // Nb: We don't have to worry about updating the sec-V-bits table 1392 // after these set_vabits2() calls because this code never writes 1393 // VA_BITS2_PARTDEFINED values. 1394 SizeT i; 1395 for (i = 0; i < lenT; i++) { 1396 set_vabits2(a + i, vabits2); 1397 } 1398 return; 1399 } 1400 #endif 1401 1402 /*------------------ standard handling ------------------ */ 1403 1404 /* Get the distinguished secondary that we might want 1405 to use (part of the space-compression scheme). */ 1406 example_dsm = &sm_distinguished[dsm_num]; 1407 1408 // We have to handle ranges covering various combinations of partial and 1409 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case. 1410 // Cases marked with a '*' are common. 1411 // 1412 // TYPE PARTS USED 1413 // ---- ---------- 1414 // * one partial sec-map (p) 1 1415 // - one whole sec-map (P) 2 1416 // 1417 // * two partial sec-maps (pp) 1,3 1418 // - one partial, one whole sec-map (pP) 1,2 1419 // - one whole, one partial sec-map (Pp) 2,3 1420 // - two whole sec-maps (PP) 2,2 1421 // 1422 // * one partial, one whole, one partial (pPp) 1,2,3 1423 // - one partial, two whole (pPP) 1,2,2 1424 // - two whole, one partial (PPp) 2,2,3 1425 // - three whole (PPP) 2,2,2 1426 // 1427 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3 1428 // - one partial, N-1 whole (pP...PP) 1,2...2,2 1429 // - N-1 whole, one partial (PP...Pp) 2,2...2,3 1430 // - N whole (PP...PP) 2,2...2,3 1431 1432 // Break up total length (lenT) into two parts: length in the first 1433 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB. 1434 aNext = start_of_this_sm(a) + SM_SIZE; 1435 len_to_next_secmap = aNext - a; 1436 if ( lenT <= len_to_next_secmap ) { 1437 // Range entirely within one sec-map. Covers almost all cases. 1438 PROF_EVENT(151, "set_address_range_perms-single-secmap"); 1439 lenA = lenT; 1440 lenB = 0; 1441 } else if (is_start_of_sm(a)) { 1442 // Range spans at least one whole sec-map, and starts at the beginning 1443 // of a sec-map; skip to Part 2. 1444 PROF_EVENT(152, "set_address_range_perms-startof-secmap"); 1445 lenA = 0; 1446 lenB = lenT; 1447 goto part2; 1448 } else { 1449 // Range spans two or more sec-maps, first one is partial. 1450 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps"); 1451 lenA = len_to_next_secmap; 1452 lenB = lenT - lenA; 1453 } 1454 1455 //------------------------------------------------------------------------ 1456 // Part 1: Deal with the first sec_map. Most of the time the range will be 1457 // entirely within a sec_map and this part alone will suffice. Also, 1458 // doing it this way lets us avoid repeatedly testing for the crossing of 1459 // a sec-map boundary within these loops. 1460 //------------------------------------------------------------------------ 1461 1462 // If it's distinguished, make it undistinguished if necessary. 1463 sm_ptr = get_secmap_ptr(a); 1464 if (is_distinguished_sm(*sm_ptr)) { 1465 if (*sm_ptr == example_dsm) { 1466 // Sec-map already has the V+A bits that we want, so skip. 1467 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick"); 1468 a = aNext; 1469 lenA = 0; 1470 } else { 1471 PROF_EVENT(155, "set_address_range_perms-dist-sm1"); 1472 *sm_ptr = copy_for_writing(*sm_ptr); 1473 } 1474 } 1475 sm = *sm_ptr; 1476 1477 // 1 byte steps 1478 while (True) { 1479 if (VG_IS_8_ALIGNED(a)) break; 1480 if (lenA < 1) break; 1481 PROF_EVENT(156, "set_address_range_perms-loop1a"); 1482 sm_off = SM_OFF(a); 1483 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1484 a += 1; 1485 lenA -= 1; 1486 } 1487 // 8-aligned, 8 byte steps 1488 while (True) { 1489 if (lenA < 8) break; 1490 PROF_EVENT(157, "set_address_range_perms-loop8a"); 1491 sm_off16 = SM_OFF_16(a); 1492 ((UShort*)(sm->vabits8))[sm_off16] = vabits16; 1493 a += 8; 1494 lenA -= 8; 1495 } 1496 // 1 byte steps 1497 while (True) { 1498 if (lenA < 1) break; 1499 PROF_EVENT(158, "set_address_range_perms-loop1b"); 1500 sm_off = SM_OFF(a); 1501 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1502 a += 1; 1503 lenA -= 1; 1504 } 1505 1506 // We've finished the first sec-map. Is that it? 1507 if (lenB == 0) 1508 return; 1509 1510 //------------------------------------------------------------------------ 1511 // Part 2: Fast-set entire sec-maps at a time. 1512 //------------------------------------------------------------------------ 1513 part2: 1514 // 64KB-aligned, 64KB steps. 1515 // Nb: we can reach here with lenB < SM_SIZE 1516 tl_assert(0 == lenA); 1517 while (True) { 1518 if (lenB < SM_SIZE) break; 1519 tl_assert(is_start_of_sm(a)); 1520 PROF_EVENT(159, "set_address_range_perms-loop64K"); 1521 sm_ptr = get_secmap_ptr(a); 1522 if (!is_distinguished_sm(*sm_ptr)) { 1523 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm"); 1524 // Free the non-distinguished sec-map that we're replacing. This 1525 // case happens moderately often, enough to be worthwhile. 1526 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap)); 1527 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n"); 1528 } 1529 update_SM_counts(*sm_ptr, example_dsm); 1530 // Make the sec-map entry point to the example DSM 1531 *sm_ptr = example_dsm; 1532 lenB -= SM_SIZE; 1533 a += SM_SIZE; 1534 } 1535 1536 // We've finished the whole sec-maps. Is that it? 1537 if (lenB == 0) 1538 return; 1539 1540 //------------------------------------------------------------------------ 1541 // Part 3: Finish off the final partial sec-map, if necessary. 1542 //------------------------------------------------------------------------ 1543 1544 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE); 1545 1546 // If it's distinguished, make it undistinguished if necessary. 1547 sm_ptr = get_secmap_ptr(a); 1548 if (is_distinguished_sm(*sm_ptr)) { 1549 if (*sm_ptr == example_dsm) { 1550 // Sec-map already has the V+A bits that we want, so stop. 1551 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick"); 1552 return; 1553 } else { 1554 PROF_EVENT(162, "set_address_range_perms-dist-sm2"); 1555 *sm_ptr = copy_for_writing(*sm_ptr); 1556 } 1557 } 1558 sm = *sm_ptr; 1559 1560 // 8-aligned, 8 byte steps 1561 while (True) { 1562 if (lenB < 8) break; 1563 PROF_EVENT(163, "set_address_range_perms-loop8b"); 1564 sm_off16 = SM_OFF_16(a); 1565 ((UShort*)(sm->vabits8))[sm_off16] = vabits16; 1566 a += 8; 1567 lenB -= 8; 1568 } 1569 // 1 byte steps 1570 while (True) { 1571 if (lenB < 1) return; 1572 PROF_EVENT(164, "set_address_range_perms-loop1c"); 1573 sm_off = SM_OFF(a); 1574 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1575 a += 1; 1576 lenB -= 1; 1577 } 1578 } 1579 1580 1581 /* --- Set permissions for arbitrary address ranges --- */ 1582 1583 void MC_(make_mem_noaccess) ( Addr a, SizeT len ) 1584 { 1585 PROF_EVENT(40, "MC_(make_mem_noaccess)"); 1586 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len); 1587 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS ); 1588 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1589 ocache_sarp_Clear_Origins ( a, len ); 1590 } 1591 1592 static void make_mem_undefined ( Addr a, SizeT len ) 1593 { 1594 PROF_EVENT(41, "make_mem_undefined"); 1595 DEBUG("make_mem_undefined(%p, %lu)\n", a, len); 1596 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1597 } 1598 1599 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag ) 1600 { 1601 PROF_EVENT(41, "MC_(make_mem_undefined)"); 1602 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len); 1603 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1604 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1605 ocache_sarp_Set_Origins ( a, len, otag ); 1606 } 1607 1608 static 1609 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len, 1610 ThreadId tid, UInt okind ) 1611 { 1612 UInt ecu; 1613 ExeContext* here; 1614 /* VG_(record_ExeContext) checks for validity of tid, and asserts 1615 if it is invalid. So no need to do it here. */ 1616 tl_assert(okind <= 3); 1617 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ ); 1618 tl_assert(here); 1619 ecu = VG_(get_ECU_from_ExeContext)(here); 1620 tl_assert(VG_(is_plausible_ECU)(ecu)); 1621 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind ); 1622 } 1623 1624 static 1625 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) { 1626 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN ); 1627 } 1628 1629 1630 void MC_(make_mem_defined) ( Addr a, SizeT len ) 1631 { 1632 PROF_EVENT(42, "MC_(make_mem_defined)"); 1633 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len); 1634 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED ); 1635 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1636 ocache_sarp_Clear_Origins ( a, len ); 1637 } 1638 1639 /* For each byte in [a,a+len), if the byte is addressable, make it be 1640 defined, but if it isn't addressible, leave it alone. In other 1641 words a version of MC_(make_mem_defined) that doesn't mess with 1642 addressibility. Low-performance implementation. */ 1643 static void make_mem_defined_if_addressable ( Addr a, SizeT len ) 1644 { 1645 SizeT i; 1646 UChar vabits2; 1647 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len); 1648 for (i = 0; i < len; i++) { 1649 vabits2 = get_vabits2( a+i ); 1650 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) { 1651 set_vabits2(a+i, VA_BITS2_DEFINED); 1652 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1653 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1654 } 1655 } 1656 } 1657 } 1658 1659 /* Similarly (needed for mprotect handling ..) */ 1660 static void make_mem_defined_if_noaccess ( Addr a, SizeT len ) 1661 { 1662 SizeT i; 1663 UChar vabits2; 1664 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len); 1665 for (i = 0; i < len; i++) { 1666 vabits2 = get_vabits2( a+i ); 1667 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) { 1668 set_vabits2(a+i, VA_BITS2_DEFINED); 1669 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1670 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1671 } 1672 } 1673 } 1674 } 1675 1676 /* --- Block-copy permissions (needed for implementing realloc() and 1677 sys_mremap). --- */ 1678 1679 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len ) 1680 { 1681 SizeT i, j; 1682 UChar vabits2, vabits8; 1683 Bool aligned, nooverlap; 1684 1685 DEBUG("MC_(copy_address_range_state)\n"); 1686 PROF_EVENT(50, "MC_(copy_address_range_state)"); 1687 1688 if (len == 0 || src == dst) 1689 return; 1690 1691 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst); 1692 nooverlap = src+len <= dst || dst+len <= src; 1693 1694 if (nooverlap && aligned) { 1695 1696 /* Vectorised fast case, when no overlap and suitably aligned */ 1697 /* vector loop */ 1698 i = 0; 1699 while (len >= 4) { 1700 vabits8 = get_vabits8_for_aligned_word32( src+i ); 1701 set_vabits8_for_aligned_word32( dst+i, vabits8 ); 1702 if (LIKELY(VA_BITS8_DEFINED == vabits8 1703 || VA_BITS8_UNDEFINED == vabits8 1704 || VA_BITS8_NOACCESS == vabits8)) { 1705 /* do nothing */ 1706 } else { 1707 /* have to copy secondary map info */ 1708 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 )) 1709 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) ); 1710 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 )) 1711 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) ); 1712 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 )) 1713 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) ); 1714 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 )) 1715 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) ); 1716 } 1717 i += 4; 1718 len -= 4; 1719 } 1720 /* fixup loop */ 1721 while (len >= 1) { 1722 vabits2 = get_vabits2( src+i ); 1723 set_vabits2( dst+i, vabits2 ); 1724 if (VA_BITS2_PARTDEFINED == vabits2) { 1725 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 1726 } 1727 i++; 1728 len--; 1729 } 1730 1731 } else { 1732 1733 /* We have to do things the slow way */ 1734 if (src < dst) { 1735 for (i = 0, j = len-1; i < len; i++, j--) { 1736 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)"); 1737 vabits2 = get_vabits2( src+j ); 1738 set_vabits2( dst+j, vabits2 ); 1739 if (VA_BITS2_PARTDEFINED == vabits2) { 1740 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) ); 1741 } 1742 } 1743 } 1744 1745 if (src > dst) { 1746 for (i = 0; i < len; i++) { 1747 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)"); 1748 vabits2 = get_vabits2( src+i ); 1749 set_vabits2( dst+i, vabits2 ); 1750 if (VA_BITS2_PARTDEFINED == vabits2) { 1751 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 1752 } 1753 } 1754 } 1755 } 1756 1757 } 1758 1759 1760 /*------------------------------------------------------------*/ 1761 /*--- Origin tracking stuff - cache basics ---*/ 1762 /*------------------------------------------------------------*/ 1763 1764 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 1765 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1766 1767 Note that this implementation draws inspiration from the "origin 1768 tracking by value piggybacking" scheme described in "Tracking Bad 1769 Apples: Reporting the Origin of Null and Undefined Value Errors" 1770 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer, 1771 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is 1772 implemented completely differently. 1773 1774 Origin tags and ECUs -- about the shadow values 1775 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1776 1777 This implementation tracks the defining point of all uninitialised 1778 values using so called "origin tags", which are 32-bit integers, 1779 rather than using the values themselves to encode the origins. The 1780 latter, so-called value piggybacking", is what the OOPSLA07 paper 1781 describes. 1782 1783 Origin tags, as tracked by the machinery below, are 32-bit unsigned 1784 ints (UInts), regardless of the machine's word size. Each tag 1785 comprises an upper 30-bit ECU field and a lower 2-bit 1786 'kind' field. The ECU field is a number given out by m_execontext 1787 and has a 1-1 mapping with ExeContext*s. An ECU can be used 1788 directly as an origin tag (otag), but in fact we want to put 1789 additional information 'kind' field to indicate roughly where the 1790 tag came from. This helps print more understandable error messages 1791 for the user -- it has no other purpose. In summary: 1792 1793 * Both ECUs and origin tags are represented as 32-bit words 1794 1795 * m_execontext and the core-tool interface deal purely in ECUs. 1796 They have no knowledge of origin tags - that is a purely 1797 Memcheck-internal matter. 1798 1799 * all valid ECUs have the lowest 2 bits zero and at least 1800 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU)) 1801 1802 * to convert from an ECU to an otag, OR in one of the MC_OKIND_ 1803 constants defined in mc_include.h. 1804 1805 * to convert an otag back to an ECU, AND it with ~3 1806 1807 One important fact is that no valid otag is zero. A zero otag is 1808 used by the implementation to indicate "no origin", which could 1809 mean that either the value is defined, or it is undefined but the 1810 implementation somehow managed to lose the origin. 1811 1812 The ECU used for memory created by malloc etc is derived from the 1813 stack trace at the time the malloc etc happens. This means the 1814 mechanism can show the exact allocation point for heap-created 1815 uninitialised values. 1816 1817 In contrast, it is simply too expensive to create a complete 1818 backtrace for each stack allocation. Therefore we merely use a 1819 depth-1 backtrace for stack allocations, which can be done once at 1820 translation time, rather than N times at run time. The result of 1821 this is that, for stack created uninitialised values, Memcheck can 1822 only show the allocating function, and not what called it. 1823 Furthermore, compilers tend to move the stack pointer just once at 1824 the start of the function, to allocate all locals, and so in fact 1825 the stack origin almost always simply points to the opening brace 1826 of the function. Net result is, for stack origins, the mechanism 1827 can tell you in which function the undefined value was created, but 1828 that's all. Users will need to carefully check all locals in the 1829 specified function. 1830 1831 Shadowing registers and memory 1832 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1833 1834 Memory is shadowed using a two level cache structure (ocacheL1 and 1835 ocacheL2). Memory references are first directed to ocacheL1. This 1836 is a traditional 2-way set associative cache with 32-byte lines and 1837 approximate LRU replacement within each set. 1838 1839 A naive implementation would require storing one 32 bit otag for 1840 each byte of memory covered, a 4:1 space overhead. Instead, there 1841 is one otag for every 4 bytes of memory covered, plus a 4-bit mask 1842 that shows which of the 4 bytes have that shadow value and which 1843 have a shadow value of zero (indicating no origin). Hence a lot of 1844 space is saved, but the cost is that only one different origin per 1845 4 bytes of address space can be represented. This is a source of 1846 imprecision, but how much of a problem it really is remains to be 1847 seen. 1848 1849 A cache line that contains all zeroes ("no origins") contains no 1850 useful information, and can be ejected from the L1 cache "for 1851 free", in the sense that a read miss on the L1 causes a line of 1852 zeroes to be installed. However, ejecting a line containing 1853 nonzeroes risks losing origin information permanently. In order to 1854 prevent such lossage, ejected nonzero lines are placed in a 1855 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache 1856 lines. This can grow arbitrarily large, and so should ensure that 1857 Memcheck runs out of memory in preference to losing useful origin 1858 info due to cache size limitations. 1859 1860 Shadowing registers is a bit tricky, because the shadow values are 1861 32 bits, regardless of the size of the register. That gives a 1862 problem for registers smaller than 32 bits. The solution is to 1863 find spaces in the guest state that are unused, and use those to 1864 shadow guest state fragments smaller than 32 bits. For example, on 1865 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the 1866 shadow are allocated for the register's otag, then there are still 1867 12 bytes left over which could be used to shadow 3 other values. 1868 1869 This implies there is some non-obvious mapping from guest state 1870 (start,length) pairs to the relevant shadow offset (for the origin 1871 tags). And it is unfortunately guest-architecture specific. The 1872 mapping is contained in mc_machine.c, which is quite lengthy but 1873 straightforward. 1874 1875 Instrumenting the IR 1876 ~~~~~~~~~~~~~~~~~~~~ 1877 1878 Instrumentation is largely straightforward, and done by the 1879 functions schemeE and schemeS in mc_translate.c. These generate 1880 code for handling the origin tags of expressions (E) and statements 1881 (S) respectively. The rather strange names are a reference to the 1882 "compilation schemes" shown in Simon Peyton Jones' book "The 1883 Implementation of Functional Programming Languages" (Prentice Hall, 1884 1987, see 1885 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm). 1886 1887 schemeS merely arranges to move shadow values around the guest 1888 state to track the incoming IR. schemeE is largely trivial too. 1889 The only significant point is how to compute the otag corresponding 1890 to binary (or ternary, quaternary, etc) operator applications. The 1891 rule is simple: just take whichever value is larger (32-bit 1892 unsigned max). Constants get the special value zero. Hence this 1893 rule always propagates a nonzero (known) otag in preference to a 1894 zero (unknown, or more likely, value-is-defined) tag, as we want. 1895 If two different undefined values are inputs to a binary operator 1896 application, then which is propagated is arbitrary, but that 1897 doesn't matter, since the program is erroneous in using either of 1898 the values, and so there's no point in attempting to propagate 1899 both. 1900 1901 Since constants are abstracted to (otag) zero, much of the 1902 instrumentation code can be folded out without difficulty by the 1903 generic post-instrumentation IR cleanup pass, using these rules: 1904 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are 1905 constants is evaluated at JIT time. And the resulting dead code 1906 removal. In practice this causes surprisingly few Max32Us to 1907 survive through to backend code generation. 1908 1909 Integration with the V-bits machinery 1910 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1911 1912 This is again largely straightforward. Mostly the otag and V bits 1913 stuff are independent. The only point of interaction is when the V 1914 bits instrumenter creates a call to a helper function to report an 1915 uninitialised value error -- in that case it must first use schemeE 1916 to get hold of the origin tag expression for the value, and pass 1917 that to the helper too. 1918 1919 There is the usual stuff to do with setting address range 1920 permissions. When memory is painted undefined, we must also know 1921 the origin tag to paint with, which involves some tedious plumbing, 1922 particularly to do with the fast case stack handlers. When memory 1923 is painted defined or noaccess then the origin tags must be forced 1924 to zero. 1925 1926 One of the goals of the implementation was to ensure that the 1927 non-origin tracking mode isn't slowed down at all. To do this, 1928 various functions to do with memory permissions setting (again, 1929 mostly pertaining to the stack) are duplicated for the with- and 1930 without-otag case. 1931 1932 Dealing with stack redzones, and the NIA cache 1933 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1934 1935 This is one of the few non-obvious parts of the implementation. 1936 1937 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small 1938 reserved area below the stack pointer, that can be used as scratch 1939 space by compiler generated code for functions. In the Memcheck 1940 sources this is referred to as the "stack redzone". The important 1941 thing here is that such redzones are considered volatile across 1942 function calls and returns. So Memcheck takes care to mark them as 1943 undefined for each call and return, on the afflicted platforms. 1944 Past experience shows this is essential in order to get reliable 1945 messages about uninitialised values that come from the stack. 1946 1947 So the question is, when we paint a redzone undefined, what origin 1948 tag should we use for it? Consider a function f() calling g(). If 1949 we paint the redzone using an otag derived from the ExeContext of 1950 the CALL/BL instruction in f, then any errors in g causing it to 1951 use uninitialised values that happen to lie in the redzone, will be 1952 reported as having their origin in f. Which is highly confusing. 1953 1954 The same applies for returns: if, on a return, we paint the redzone 1955 using a origin tag derived from the ExeContext of the RET/BLR 1956 instruction in g, then any later errors in f causing it to use 1957 uninitialised values in the redzone, will be reported as having 1958 their origin in g. Which is just as confusing. 1959 1960 To do it right, in both cases we need to use an origin tag which 1961 pertains to the instruction which dynamically follows the CALL/BL 1962 or RET/BLR. In short, one derived from the NIA - the "next 1963 instruction address". 1964 1965 To make this work, Memcheck's redzone-painting helper, 1966 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the 1967 NIA. It converts the NIA to a 1-element ExeContext, and uses that 1968 ExeContext's ECU as the basis for the otag used to paint the 1969 redzone. The expensive part of this is converting an NIA into an 1970 ECU, since this happens once for every call and every return. So 1971 we use a simple 511-line, 2-way set associative cache 1972 (nia_to_ecu_cache) to cache the mappings, and that knocks most of 1973 the cost out. 1974 1975 Further background comments 1976 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1977 1978 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't 1979 > it really just the address of the relevant ExeContext? 1980 1981 Well, it's not the address, but a value which has a 1-1 mapping 1982 with ExeContexts, and is guaranteed not to be zero, since zero 1983 denotes (to memcheck) "unknown origin or defined value". So these 1984 UInts are just numbers starting at 4 and incrementing by 4; each 1985 ExeContext is given a number when it is created. (*** NOTE this 1986 confuses otags and ECUs; see comments above ***). 1987 1988 Making these otags 32-bit regardless of the machine's word size 1989 makes the 64-bit implementation easier (next para). And it doesn't 1990 really limit us in any way, since for the tags to overflow would 1991 require that the program somehow caused 2^30-1 different 1992 ExeContexts to be created, in which case it is probably in deep 1993 trouble. Not to mention V will have soaked up many tens of 1994 gigabytes of memory merely to store them all. 1995 1996 So having 64-bit origins doesn't really buy you anything, and has 1997 the following downsides: 1998 1999 Suppose that instead, an otag is a UWord. This would mean that, on 2000 a 64-bit target, 2001 2002 1. It becomes hard to shadow any element of guest state which is 2003 smaller than 8 bytes. To do so means you'd need to find some 2004 8-byte-sized hole in the guest state which you don't want to 2005 shadow, and use that instead to hold the otag. On ppc64, the 2006 condition code register(s) are split into 20 UChar sized pieces, 2007 all of which need to be tracked (guest_XER_SO .. guest_CR7_0) 2008 and so that would entail finding 160 bytes somewhere else in the 2009 guest state. 2010 2011 Even on x86, I want to track origins for %AH .. %DH (bits 15:8 2012 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of 2013 same) and so I had to look for 4 untracked otag-sized areas in 2014 the guest state to make that possible. 2015 2016 The same problem exists of course when origin tags are only 32 2017 bits, but it's less extreme. 2018 2019 2. (More compelling) it doubles the size of the origin shadow 2020 memory. Given that the shadow memory is organised as a fixed 2021 size cache, and that accuracy of tracking is limited by origins 2022 falling out the cache due to space conflicts, this isn't good. 2023 2024 > Another question: is the origin tracking perfect, or are there 2025 > cases where it fails to determine an origin? 2026 2027 It is imperfect for at least for the following reasons, and 2028 probably more: 2029 2030 * Insufficient capacity in the origin cache. When a line is 2031 evicted from the cache it is gone forever, and so subsequent 2032 queries for the line produce zero, indicating no origin 2033 information. Interestingly, a line containing all zeroes can be 2034 evicted "free" from the cache, since it contains no useful 2035 information, so there is scope perhaps for some cleverer cache 2036 management schemes. (*** NOTE, with the introduction of the 2037 second level origin tag cache, ocacheL2, this is no longer a 2038 problem. ***) 2039 2040 * The origin cache only stores one otag per 32-bits of address 2041 space, plus 4 bits indicating which of the 4 bytes has that tag 2042 and which are considered defined. The result is that if two 2043 undefined bytes in the same word are stored in memory, the first 2044 stored byte's origin will be lost and replaced by the origin for 2045 the second byte. 2046 2047 * Nonzero origin tags for defined values. Consider a binary 2048 operator application op(x,y). Suppose y is undefined (and so has 2049 a valid nonzero origin tag), and x is defined, but erroneously 2050 has a nonzero origin tag (defined values should have tag zero). 2051 If the erroneous tag has a numeric value greater than y's tag, 2052 then the rule for propagating origin tags though binary 2053 operations, which is simply to take the unsigned max of the two 2054 tags, will erroneously propagate x's tag rather than y's. 2055 2056 * Some obscure uses of x86/amd64 byte registers can cause lossage 2057 or confusion of origins. %AH .. %DH are treated as different 2058 from, and unrelated to, their parent registers, %EAX .. %EDX. 2059 So some wierd sequences like 2060 2061 movb undefined-value, %AH 2062 movb defined-value, %AL 2063 .. use %AX or %EAX .. 2064 2065 will cause the origin attributed to %AH to be ignored, since %AL, 2066 %AX, %EAX are treated as the same register, and %AH as a 2067 completely separate one. 2068 2069 But having said all that, it actually seems to work fairly well in 2070 practice. 2071 */ 2072 2073 static UWord stats_ocacheL1_find = 0; 2074 static UWord stats_ocacheL1_found_at_1 = 0; 2075 static UWord stats_ocacheL1_found_at_N = 0; 2076 static UWord stats_ocacheL1_misses = 0; 2077 static UWord stats_ocacheL1_lossage = 0; 2078 static UWord stats_ocacheL1_movefwds = 0; 2079 2080 static UWord stats__ocacheL2_refs = 0; 2081 static UWord stats__ocacheL2_misses = 0; 2082 static UWord stats__ocacheL2_n_nodes_max = 0; 2083 2084 /* Cache of 32-bit values, one every 32 bits of address space */ 2085 2086 #define OC_BITS_PER_LINE 5 2087 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2)) 2088 2089 static INLINE UWord oc_line_offset ( Addr a ) { 2090 return (a >> 2) & (OC_W32S_PER_LINE - 1); 2091 } 2092 static INLINE Bool is_valid_oc_tag ( Addr tag ) { 2093 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1)); 2094 } 2095 2096 #define OC_LINES_PER_SET 2 2097 2098 #define OC_N_SET_BITS 20 2099 #define OC_N_SETS (1 << OC_N_SET_BITS) 2100 2101 /* These settings give: 2102 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful 2103 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful 2104 */ 2105 2106 #define OC_MOVE_FORWARDS_EVERY_BITS 7 2107 2108 2109 typedef 2110 struct { 2111 Addr tag; 2112 UInt w32[OC_W32S_PER_LINE]; 2113 UChar descr[OC_W32S_PER_LINE]; 2114 } 2115 OCacheLine; 2116 2117 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not 2118 in use, 'n' (nonzero) if it contains at least one valid origin tag, 2119 and 'z' if all the represented tags are zero. */ 2120 static UChar classify_OCacheLine ( OCacheLine* line ) 2121 { 2122 UWord i; 2123 if (line->tag == 1/*invalid*/) 2124 return 'e'; /* EMPTY */ 2125 tl_assert(is_valid_oc_tag(line->tag)); 2126 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2127 tl_assert(0 == ((~0xF) & line->descr[i])); 2128 if (line->w32[i] > 0 && line->descr[i] > 0) 2129 return 'n'; /* NONZERO - contains useful info */ 2130 } 2131 return 'z'; /* ZERO - no useful info */ 2132 } 2133 2134 typedef 2135 struct { 2136 OCacheLine line[OC_LINES_PER_SET]; 2137 } 2138 OCacheSet; 2139 2140 typedef 2141 struct { 2142 OCacheSet set[OC_N_SETS]; 2143 } 2144 OCache; 2145 2146 static OCache* ocacheL1 = NULL; 2147 static UWord ocacheL1_event_ctr = 0; 2148 2149 static void init_ocacheL2 ( void ); /* fwds */ 2150 static void init_OCache ( void ) 2151 { 2152 UWord line, set; 2153 tl_assert(MC_(clo_mc_level) >= 3); 2154 tl_assert(ocacheL1 == NULL); 2155 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache)); 2156 if (ocacheL1 == NULL) { 2157 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1", 2158 sizeof(OCache) ); 2159 } 2160 tl_assert(ocacheL1 != NULL); 2161 for (set = 0; set < OC_N_SETS; set++) { 2162 for (line = 0; line < OC_LINES_PER_SET; line++) { 2163 ocacheL1->set[set].line[line].tag = 1/*invalid*/; 2164 } 2165 } 2166 init_ocacheL2(); 2167 } 2168 2169 static void moveLineForwards ( OCacheSet* set, UWord lineno ) 2170 { 2171 OCacheLine tmp; 2172 stats_ocacheL1_movefwds++; 2173 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET); 2174 tmp = set->line[lineno-1]; 2175 set->line[lineno-1] = set->line[lineno]; 2176 set->line[lineno] = tmp; 2177 } 2178 2179 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) { 2180 UWord i; 2181 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2182 line->w32[i] = 0; /* NO ORIGIN */ 2183 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */ 2184 } 2185 line->tag = tag; 2186 } 2187 2188 ////////////////////////////////////////////////////////////// 2189 //// OCache backing store 2190 2191 static OSet* ocacheL2 = NULL; 2192 2193 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) { 2194 return VG_(malloc)(cc, szB); 2195 } 2196 static void ocacheL2_free ( void* v ) { 2197 VG_(free)( v ); 2198 } 2199 2200 /* Stats: # nodes currently in tree */ 2201 static UWord stats__ocacheL2_n_nodes = 0; 2202 2203 static void init_ocacheL2 ( void ) 2204 { 2205 tl_assert(!ocacheL2); 2206 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */ 2207 tl_assert(0 == offsetof(OCacheLine,tag)); 2208 ocacheL2 2209 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag), 2210 NULL, /* fast cmp */ 2211 ocacheL2_malloc, "mc.ioL2", ocacheL2_free); 2212 tl_assert(ocacheL2); 2213 stats__ocacheL2_n_nodes = 0; 2214 } 2215 2216 /* Find line with the given tag in the tree, or NULL if not found. */ 2217 static OCacheLine* ocacheL2_find_tag ( Addr tag ) 2218 { 2219 OCacheLine* line; 2220 tl_assert(is_valid_oc_tag(tag)); 2221 stats__ocacheL2_refs++; 2222 line = VG_(OSetGen_Lookup)( ocacheL2, &tag ); 2223 return line; 2224 } 2225 2226 /* Delete the line with the given tag from the tree, if it is present, and 2227 free up the associated memory. */ 2228 static void ocacheL2_del_tag ( Addr tag ) 2229 { 2230 OCacheLine* line; 2231 tl_assert(is_valid_oc_tag(tag)); 2232 stats__ocacheL2_refs++; 2233 line = VG_(OSetGen_Remove)( ocacheL2, &tag ); 2234 if (line) { 2235 VG_(OSetGen_FreeNode)(ocacheL2, line); 2236 tl_assert(stats__ocacheL2_n_nodes > 0); 2237 stats__ocacheL2_n_nodes--; 2238 } 2239 } 2240 2241 /* Add a copy of the given line to the tree. It must not already be 2242 present. */ 2243 static void ocacheL2_add_line ( OCacheLine* line ) 2244 { 2245 OCacheLine* copy; 2246 tl_assert(is_valid_oc_tag(line->tag)); 2247 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) ); 2248 tl_assert(copy); 2249 *copy = *line; 2250 stats__ocacheL2_refs++; 2251 VG_(OSetGen_Insert)( ocacheL2, copy ); 2252 stats__ocacheL2_n_nodes++; 2253 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max) 2254 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes; 2255 } 2256 2257 //// 2258 ////////////////////////////////////////////////////////////// 2259 2260 __attribute__((noinline)) 2261 static OCacheLine* find_OCacheLine_SLOW ( Addr a ) 2262 { 2263 OCacheLine *victim, *inL2; 2264 UChar c; 2265 UWord line; 2266 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2267 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2268 UWord tag = a & tagmask; 2269 tl_assert(setno >= 0 && setno < OC_N_SETS); 2270 2271 /* we already tried line == 0; skip therefore. */ 2272 for (line = 1; line < OC_LINES_PER_SET; line++) { 2273 if (ocacheL1->set[setno].line[line].tag == tag) { 2274 if (line == 1) { 2275 stats_ocacheL1_found_at_1++; 2276 } else { 2277 stats_ocacheL1_found_at_N++; 2278 } 2279 if (UNLIKELY(0 == (ocacheL1_event_ctr++ 2280 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) { 2281 moveLineForwards( &ocacheL1->set[setno], line ); 2282 line--; 2283 } 2284 return &ocacheL1->set[setno].line[line]; 2285 } 2286 } 2287 2288 /* A miss. Use the last slot. Implicitly this means we're 2289 ejecting the line in the last slot. */ 2290 stats_ocacheL1_misses++; 2291 tl_assert(line == OC_LINES_PER_SET); 2292 line--; 2293 tl_assert(line > 0); 2294 2295 /* First, move the to-be-ejected line to the L2 cache. */ 2296 victim = &ocacheL1->set[setno].line[line]; 2297 c = classify_OCacheLine(victim); 2298 switch (c) { 2299 case 'e': 2300 /* the line is empty (has invalid tag); ignore it. */ 2301 break; 2302 case 'z': 2303 /* line contains zeroes. We must ensure the backing store is 2304 updated accordingly, either by copying the line there 2305 verbatim, or by ensuring it isn't present there. We 2306 chosse the latter on the basis that it reduces the size of 2307 the backing store. */ 2308 ocacheL2_del_tag( victim->tag ); 2309 break; 2310 case 'n': 2311 /* line contains at least one real, useful origin. Copy it 2312 to the backing store. */ 2313 stats_ocacheL1_lossage++; 2314 inL2 = ocacheL2_find_tag( victim->tag ); 2315 if (inL2) { 2316 *inL2 = *victim; 2317 } else { 2318 ocacheL2_add_line( victim ); 2319 } 2320 break; 2321 default: 2322 tl_assert(0); 2323 } 2324 2325 /* Now we must reload the L1 cache from the backing tree, if 2326 possible. */ 2327 tl_assert(tag != victim->tag); /* stay sane */ 2328 inL2 = ocacheL2_find_tag( tag ); 2329 if (inL2) { 2330 /* We're in luck. It's in the L2. */ 2331 ocacheL1->set[setno].line[line] = *inL2; 2332 } else { 2333 /* Missed at both levels of the cache hierarchy. We have to 2334 declare it as full of zeroes (unknown origins). */ 2335 stats__ocacheL2_misses++; 2336 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag ); 2337 } 2338 2339 /* Move it one forwards */ 2340 moveLineForwards( &ocacheL1->set[setno], line ); 2341 line--; 2342 2343 return &ocacheL1->set[setno].line[line]; 2344 } 2345 2346 static INLINE OCacheLine* find_OCacheLine ( Addr a ) 2347 { 2348 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2349 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2350 UWord tag = a & tagmask; 2351 2352 stats_ocacheL1_find++; 2353 2354 if (OC_ENABLE_ASSERTIONS) { 2355 tl_assert(setno >= 0 && setno < OC_N_SETS); 2356 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1))); 2357 } 2358 2359 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) { 2360 return &ocacheL1->set[setno].line[0]; 2361 } 2362 2363 return find_OCacheLine_SLOW( a ); 2364 } 2365 2366 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag ) 2367 { 2368 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2369 //// Set the origins for a+0 .. a+7 2370 { OCacheLine* line; 2371 UWord lineoff = oc_line_offset(a); 2372 if (OC_ENABLE_ASSERTIONS) { 2373 tl_assert(lineoff >= 0 2374 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2375 } 2376 line = find_OCacheLine( a ); 2377 line->descr[lineoff+0] = 0xF; 2378 line->descr[lineoff+1] = 0xF; 2379 line->w32[lineoff+0] = otag; 2380 line->w32[lineoff+1] = otag; 2381 } 2382 //// END inlined, specialised version of MC_(helperc_b_store8) 2383 } 2384 2385 2386 /*------------------------------------------------------------*/ 2387 /*--- Aligned fast case permission setters, ---*/ 2388 /*--- for dealing with stacks ---*/ 2389 /*------------------------------------------------------------*/ 2390 2391 /*--------------------- 32-bit ---------------------*/ 2392 2393 /* Nb: by "aligned" here we mean 4-byte aligned */ 2394 2395 static INLINE void make_aligned_word32_undefined ( Addr a ) 2396 { 2397 PROF_EVENT(300, "make_aligned_word32_undefined"); 2398 2399 #ifndef PERF_FAST_STACK2 2400 make_mem_undefined(a, 4); 2401 #else 2402 { 2403 UWord sm_off; 2404 SecMap* sm; 2405 2406 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2407 PROF_EVENT(301, "make_aligned_word32_undefined-slow1"); 2408 make_mem_undefined(a, 4); 2409 return; 2410 } 2411 2412 sm = get_secmap_for_writing_low(a); 2413 sm_off = SM_OFF(a); 2414 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 2415 } 2416 #endif 2417 } 2418 2419 static INLINE 2420 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag ) 2421 { 2422 make_aligned_word32_undefined(a); 2423 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2424 //// Set the origins for a+0 .. a+3 2425 { OCacheLine* line; 2426 UWord lineoff = oc_line_offset(a); 2427 if (OC_ENABLE_ASSERTIONS) { 2428 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2429 } 2430 line = find_OCacheLine( a ); 2431 line->descr[lineoff] = 0xF; 2432 line->w32[lineoff] = otag; 2433 } 2434 //// END inlined, specialised version of MC_(helperc_b_store4) 2435 } 2436 2437 static INLINE 2438 void make_aligned_word32_noaccess ( Addr a ) 2439 { 2440 PROF_EVENT(310, "make_aligned_word32_noaccess"); 2441 2442 #ifndef PERF_FAST_STACK2 2443 MC_(make_mem_noaccess)(a, 4); 2444 #else 2445 { 2446 UWord sm_off; 2447 SecMap* sm; 2448 2449 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2450 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1"); 2451 MC_(make_mem_noaccess)(a, 4); 2452 return; 2453 } 2454 2455 sm = get_secmap_for_writing_low(a); 2456 sm_off = SM_OFF(a); 2457 sm->vabits8[sm_off] = VA_BITS8_NOACCESS; 2458 2459 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2460 //// Set the origins for a+0 .. a+3. 2461 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2462 OCacheLine* line; 2463 UWord lineoff = oc_line_offset(a); 2464 if (OC_ENABLE_ASSERTIONS) { 2465 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2466 } 2467 line = find_OCacheLine( a ); 2468 line->descr[lineoff] = 0; 2469 } 2470 //// END inlined, specialised version of MC_(helperc_b_store4) 2471 } 2472 #endif 2473 } 2474 2475 /*--------------------- 64-bit ---------------------*/ 2476 2477 /* Nb: by "aligned" here we mean 8-byte aligned */ 2478 2479 static INLINE void make_aligned_word64_undefined ( Addr a ) 2480 { 2481 PROF_EVENT(320, "make_aligned_word64_undefined"); 2482 2483 #ifndef PERF_FAST_STACK2 2484 make_mem_undefined(a, 8); 2485 #else 2486 { 2487 UWord sm_off16; 2488 SecMap* sm; 2489 2490 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2491 PROF_EVENT(321, "make_aligned_word64_undefined-slow1"); 2492 make_mem_undefined(a, 8); 2493 return; 2494 } 2495 2496 sm = get_secmap_for_writing_low(a); 2497 sm_off16 = SM_OFF_16(a); 2498 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED; 2499 } 2500 #endif 2501 } 2502 2503 static INLINE 2504 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag ) 2505 { 2506 make_aligned_word64_undefined(a); 2507 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2508 //// Set the origins for a+0 .. a+7 2509 { OCacheLine* line; 2510 UWord lineoff = oc_line_offset(a); 2511 tl_assert(lineoff >= 0 2512 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2513 line = find_OCacheLine( a ); 2514 line->descr[lineoff+0] = 0xF; 2515 line->descr[lineoff+1] = 0xF; 2516 line->w32[lineoff+0] = otag; 2517 line->w32[lineoff+1] = otag; 2518 } 2519 //// END inlined, specialised version of MC_(helperc_b_store8) 2520 } 2521 2522 static INLINE 2523 void make_aligned_word64_noaccess ( Addr a ) 2524 { 2525 PROF_EVENT(330, "make_aligned_word64_noaccess"); 2526 2527 #ifndef PERF_FAST_STACK2 2528 MC_(make_mem_noaccess)(a, 8); 2529 #else 2530 { 2531 UWord sm_off16; 2532 SecMap* sm; 2533 2534 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2535 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1"); 2536 MC_(make_mem_noaccess)(a, 8); 2537 return; 2538 } 2539 2540 sm = get_secmap_for_writing_low(a); 2541 sm_off16 = SM_OFF_16(a); 2542 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS; 2543 2544 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2545 //// Clear the origins for a+0 .. a+7. 2546 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2547 OCacheLine* line; 2548 UWord lineoff = oc_line_offset(a); 2549 tl_assert(lineoff >= 0 2550 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2551 line = find_OCacheLine( a ); 2552 line->descr[lineoff+0] = 0; 2553 line->descr[lineoff+1] = 0; 2554 } 2555 //// END inlined, specialised version of MC_(helperc_b_store8) 2556 } 2557 #endif 2558 } 2559 2560 2561 /*------------------------------------------------------------*/ 2562 /*--- Stack pointer adjustment ---*/ 2563 /*------------------------------------------------------------*/ 2564 2565 #ifdef PERF_FAST_STACK 2566 # define MAYBE_USED 2567 #else 2568 # define MAYBE_USED __attribute__((unused)) 2569 #endif 2570 2571 /*--------------- adjustment by 4 bytes ---------------*/ 2572 2573 MAYBE_USED 2574 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu) 2575 { 2576 UInt otag = ecu | MC_OKIND_STACK; 2577 PROF_EVENT(110, "new_mem_stack_4"); 2578 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2579 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2580 } else { 2581 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag ); 2582 } 2583 } 2584 2585 MAYBE_USED 2586 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP) 2587 { 2588 PROF_EVENT(110, "new_mem_stack_4"); 2589 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2590 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2591 } else { 2592 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 ); 2593 } 2594 } 2595 2596 MAYBE_USED 2597 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP) 2598 { 2599 PROF_EVENT(120, "die_mem_stack_4"); 2600 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2601 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2602 } else { 2603 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 ); 2604 } 2605 } 2606 2607 /*--------------- adjustment by 8 bytes ---------------*/ 2608 2609 MAYBE_USED 2610 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu) 2611 { 2612 UInt otag = ecu | MC_OKIND_STACK; 2613 PROF_EVENT(111, "new_mem_stack_8"); 2614 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2615 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2616 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2617 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2618 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2619 } else { 2620 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag ); 2621 } 2622 } 2623 2624 MAYBE_USED 2625 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP) 2626 { 2627 PROF_EVENT(111, "new_mem_stack_8"); 2628 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2629 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2630 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2631 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2632 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2633 } else { 2634 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 ); 2635 } 2636 } 2637 2638 MAYBE_USED 2639 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP) 2640 { 2641 PROF_EVENT(121, "die_mem_stack_8"); 2642 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2643 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2644 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2645 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2646 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2647 } else { 2648 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 ); 2649 } 2650 } 2651 2652 /*--------------- adjustment by 12 bytes ---------------*/ 2653 2654 MAYBE_USED 2655 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu) 2656 { 2657 UInt otag = ecu | MC_OKIND_STACK; 2658 PROF_EVENT(112, "new_mem_stack_12"); 2659 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2660 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2661 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2662 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2663 /* from previous test we don't have 8-alignment at offset +0, 2664 hence must have 8 alignment at offsets +4/-4. Hence safe to 2665 do 4 at +0 and then 8 at +4/. */ 2666 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2667 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2668 } else { 2669 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag ); 2670 } 2671 } 2672 2673 MAYBE_USED 2674 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP) 2675 { 2676 PROF_EVENT(112, "new_mem_stack_12"); 2677 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2678 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2679 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2680 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2681 /* from previous test we don't have 8-alignment at offset +0, 2682 hence must have 8 alignment at offsets +4/-4. Hence safe to 2683 do 4 at +0 and then 8 at +4/. */ 2684 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2685 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2686 } else { 2687 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 ); 2688 } 2689 } 2690 2691 MAYBE_USED 2692 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP) 2693 { 2694 PROF_EVENT(122, "die_mem_stack_12"); 2695 /* Note the -12 in the test */ 2696 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) { 2697 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at 2698 -4. */ 2699 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2700 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2701 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2702 /* We have 4-alignment at +0, but we don't have 8-alignment at 2703 -12. So we must have 8-alignment at -8. Hence do 4 at -12 2704 and then 8 at -8. */ 2705 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2706 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2707 } else { 2708 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 ); 2709 } 2710 } 2711 2712 /*--------------- adjustment by 16 bytes ---------------*/ 2713 2714 MAYBE_USED 2715 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu) 2716 { 2717 UInt otag = ecu | MC_OKIND_STACK; 2718 PROF_EVENT(113, "new_mem_stack_16"); 2719 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2720 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 2721 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2722 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2723 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2724 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 2725 Hence do 4 at +0, 8 at +4, 4 at +12. */ 2726 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2727 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 2728 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 2729 } else { 2730 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag ); 2731 } 2732 } 2733 2734 MAYBE_USED 2735 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP) 2736 { 2737 PROF_EVENT(113, "new_mem_stack_16"); 2738 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2739 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 2740 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2741 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2742 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2743 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 2744 Hence do 4 at +0, 8 at +4, 4 at +12. */ 2745 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2746 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2747 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 2748 } else { 2749 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 ); 2750 } 2751 } 2752 2753 MAYBE_USED 2754 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP) 2755 { 2756 PROF_EVENT(123, "die_mem_stack_16"); 2757 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2758 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */ 2759 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2760 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2761 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2762 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */ 2763 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2764 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2765 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2766 } else { 2767 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 ); 2768 } 2769 } 2770 2771 /*--------------- adjustment by 32 bytes ---------------*/ 2772 2773 MAYBE_USED 2774 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu) 2775 { 2776 UInt otag = ecu | MC_OKIND_STACK; 2777 PROF_EVENT(114, "new_mem_stack_32"); 2778 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2779 /* Straightforward */ 2780 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2781 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2782 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2783 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2784 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2785 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 2786 +0,+28. */ 2787 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2788 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 2789 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 2790 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag ); 2791 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag ); 2792 } else { 2793 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag ); 2794 } 2795 } 2796 2797 MAYBE_USED 2798 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP) 2799 { 2800 PROF_EVENT(114, "new_mem_stack_32"); 2801 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2802 /* Straightforward */ 2803 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2804 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2805 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2806 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2807 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2808 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 2809 +0,+28. */ 2810 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2811 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2812 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 2813 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 ); 2814 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 ); 2815 } else { 2816 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 ); 2817 } 2818 } 2819 2820 MAYBE_USED 2821 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP) 2822 { 2823 PROF_EVENT(124, "die_mem_stack_32"); 2824 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2825 /* Straightforward */ 2826 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2827 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2828 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2829 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2830 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2831 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and 2832 4 at -32,-4. */ 2833 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2834 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 ); 2835 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 ); 2836 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2837 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2838 } else { 2839 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 ); 2840 } 2841 } 2842 2843 /*--------------- adjustment by 112 bytes ---------------*/ 2844 2845 MAYBE_USED 2846 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu) 2847 { 2848 UInt otag = ecu | MC_OKIND_STACK; 2849 PROF_EVENT(115, "new_mem_stack_112"); 2850 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2851 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2852 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2853 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2854 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2855 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 2856 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 2857 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 2858 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 2859 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 2860 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 2861 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 2862 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 2863 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 2864 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 2865 } else { 2866 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag ); 2867 } 2868 } 2869 2870 MAYBE_USED 2871 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP) 2872 { 2873 PROF_EVENT(115, "new_mem_stack_112"); 2874 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2875 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2876 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2877 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2878 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2879 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 2880 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 2881 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 2882 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 2883 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 2884 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 2885 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 2886 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 2887 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 2888 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 2889 } else { 2890 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 ); 2891 } 2892 } 2893 2894 MAYBE_USED 2895 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP) 2896 { 2897 PROF_EVENT(125, "die_mem_stack_112"); 2898 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2899 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 2900 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 2901 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 2902 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 2903 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 2904 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 2905 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 2906 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 2907 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 2908 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 2909 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2910 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2912 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2913 } else { 2914 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 ); 2915 } 2916 } 2917 2918 /*--------------- adjustment by 128 bytes ---------------*/ 2919 2920 MAYBE_USED 2921 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu) 2922 { 2923 UInt otag = ecu | MC_OKIND_STACK; 2924 PROF_EVENT(116, "new_mem_stack_128"); 2925 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2926 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2927 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2928 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2929 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2930 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 2931 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 2932 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 2933 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 2934 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 2936 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 2937 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 2938 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 2939 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 2940 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 2941 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 2942 } else { 2943 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag ); 2944 } 2945 } 2946 2947 MAYBE_USED 2948 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP) 2949 { 2950 PROF_EVENT(116, "new_mem_stack_128"); 2951 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2952 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2954 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2955 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2956 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 2957 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 2958 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 2959 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 2960 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 2961 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 2962 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 2963 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 2964 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 2965 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 2966 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 2967 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 2968 } else { 2969 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 ); 2970 } 2971 } 2972 2973 MAYBE_USED 2974 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP) 2975 { 2976 PROF_EVENT(126, "die_mem_stack_128"); 2977 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2978 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 2979 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 2980 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 2981 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 2982 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 2983 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 2984 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 2985 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 2986 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 2987 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 2988 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 2989 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 2990 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2991 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2992 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2993 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2994 } else { 2995 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 ); 2996 } 2997 } 2998 2999 /*--------------- adjustment by 144 bytes ---------------*/ 3000 3001 MAYBE_USED 3002 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu) 3003 { 3004 UInt otag = ecu | MC_OKIND_STACK; 3005 PROF_EVENT(117, "new_mem_stack_144"); 3006 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3007 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 3008 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 3009 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3010 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3011 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3012 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3013 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3014 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3015 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3016 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3017 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3018 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3019 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3020 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3021 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3022 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3023 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3024 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3025 } else { 3026 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag ); 3027 } 3028 } 3029 3030 MAYBE_USED 3031 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP) 3032 { 3033 PROF_EVENT(117, "new_mem_stack_144"); 3034 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3035 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3036 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3037 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3038 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3039 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3040 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3041 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3042 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3043 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3044 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3045 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3046 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3047 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3048 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3049 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3050 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3051 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3052 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3053 } else { 3054 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 ); 3055 } 3056 } 3057 3058 MAYBE_USED 3059 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP) 3060 { 3061 PROF_EVENT(127, "die_mem_stack_144"); 3062 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3063 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3064 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3065 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3066 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3067 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3068 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3069 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3070 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3071 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3072 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3073 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3074 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3075 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3076 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3077 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3078 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3079 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3080 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3081 } else { 3082 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 ); 3083 } 3084 } 3085 3086 /*--------------- adjustment by 160 bytes ---------------*/ 3087 3088 MAYBE_USED 3089 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu) 3090 { 3091 UInt otag = ecu | MC_OKIND_STACK; 3092 PROF_EVENT(118, "new_mem_stack_160"); 3093 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3094 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 3095 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 3096 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3097 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3098 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3099 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3100 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3101 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3102 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3103 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3104 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3106 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3107 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3108 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3109 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3110 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3111 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3112 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag ); 3113 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag ); 3114 } else { 3115 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag ); 3116 } 3117 } 3118 3119 MAYBE_USED 3120 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP) 3121 { 3122 PROF_EVENT(118, "new_mem_stack_160"); 3123 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3124 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3125 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3126 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3127 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3128 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3129 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3130 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3131 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3132 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3133 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3134 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3135 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3136 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3137 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3138 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3139 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3140 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3141 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3142 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 ); 3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 ); 3144 } else { 3145 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 ); 3146 } 3147 } 3148 3149 MAYBE_USED 3150 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP) 3151 { 3152 PROF_EVENT(128, "die_mem_stack_160"); 3153 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3154 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160); 3155 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152); 3156 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3157 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3158 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3159 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3160 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3161 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3162 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3163 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3164 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3165 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3166 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3174 } else { 3175 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 ); 3176 } 3177 } 3178 3179 /*--------------- adjustment by N bytes ---------------*/ 3180 3181 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu ) 3182 { 3183 UInt otag = ecu | MC_OKIND_STACK; 3184 PROF_EVENT(115, "new_mem_stack_w_otag"); 3185 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag ); 3186 } 3187 3188 static void mc_new_mem_stack ( Addr a, SizeT len ) 3189 { 3190 PROF_EVENT(115, "new_mem_stack"); 3191 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len ); 3192 } 3193 3194 static void mc_die_mem_stack ( Addr a, SizeT len ) 3195 { 3196 PROF_EVENT(125, "die_mem_stack"); 3197 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len ); 3198 } 3199 3200 3201 /* The AMD64 ABI says: 3202 3203 "The 128-byte area beyond the location pointed to by %rsp is considered 3204 to be reserved and shall not be modified by signal or interrupt 3205 handlers. Therefore, functions may use this area for temporary data 3206 that is not needed across function calls. In particular, leaf functions 3207 may use this area for their entire stack frame, rather than adjusting 3208 the stack pointer in the prologue and epilogue. This area is known as 3209 red zone [sic]." 3210 3211 So after any call or return we need to mark this redzone as containing 3212 undefined values. 3213 3214 Consider this: we're in function f. f calls g. g moves rsp down 3215 modestly (say 16 bytes) and writes stuff all over the red zone, making it 3216 defined. g returns. f is buggy and reads from parts of the red zone 3217 that it didn't write on. But because g filled that area in, f is going 3218 to be picking up defined V bits and so any errors from reading bits of 3219 the red zone it didn't write, will be missed. The only solution I could 3220 think of was to make the red zone undefined when g returns to f. 3221 3222 This is in accordance with the ABI, which makes it clear the redzone 3223 is volatile across function calls. 3224 3225 The problem occurs the other way round too: f could fill the RZ up 3226 with defined values and g could mistakenly read them. So the RZ 3227 also needs to be nuked on function calls. 3228 */ 3229 3230 3231 /* Here's a simple cache to hold nia -> ECU mappings. It could be 3232 improved so as to have a lower miss rate. */ 3233 3234 static UWord stats__nia_cache_queries = 0; 3235 static UWord stats__nia_cache_misses = 0; 3236 3237 typedef 3238 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */ 3239 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */ 3240 WCacheEnt; 3241 3242 #define N_NIA_TO_ECU_CACHE 511 3243 3244 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE]; 3245 3246 static void init_nia_to_ecu_cache ( void ) 3247 { 3248 UWord i; 3249 Addr zero_addr = 0; 3250 ExeContext* zero_ec; 3251 UInt zero_ecu; 3252 /* Fill all the slots with an entry for address zero, and the 3253 relevant otags accordingly. Hence the cache is initially filled 3254 with valid data. */ 3255 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr); 3256 tl_assert(zero_ec); 3257 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec); 3258 tl_assert(VG_(is_plausible_ECU)(zero_ecu)); 3259 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) { 3260 nia_to_ecu_cache[i].nia0 = zero_addr; 3261 nia_to_ecu_cache[i].ecu0 = zero_ecu; 3262 nia_to_ecu_cache[i].nia1 = zero_addr; 3263 nia_to_ecu_cache[i].ecu1 = zero_ecu; 3264 } 3265 } 3266 3267 static inline UInt convert_nia_to_ecu ( Addr nia ) 3268 { 3269 UWord i; 3270 UInt ecu; 3271 ExeContext* ec; 3272 3273 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) ); 3274 3275 stats__nia_cache_queries++; 3276 i = nia % N_NIA_TO_ECU_CACHE; 3277 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE); 3278 3279 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia )) 3280 return nia_to_ecu_cache[i].ecu0; 3281 3282 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) { 3283 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; } 3284 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 ); 3285 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 ); 3286 # undef SWAP 3287 return nia_to_ecu_cache[i].ecu0; 3288 } 3289 3290 stats__nia_cache_misses++; 3291 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia); 3292 tl_assert(ec); 3293 ecu = VG_(get_ECU_from_ExeContext)(ec); 3294 tl_assert(VG_(is_plausible_ECU)(ecu)); 3295 3296 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0; 3297 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0; 3298 3299 nia_to_ecu_cache[i].nia0 = nia; 3300 nia_to_ecu_cache[i].ecu0 = (UWord)ecu; 3301 return ecu; 3302 } 3303 3304 3305 /* Note that this serves both the origin-tracking and 3306 no-origin-tracking modes. We assume that calls to it are 3307 sufficiently infrequent that it isn't worth specialising for the 3308 with/without origin-tracking cases. */ 3309 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia ) 3310 { 3311 UInt otag; 3312 tl_assert(sizeof(UWord) == sizeof(SizeT)); 3313 if (0) 3314 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n", 3315 base, len, nia ); 3316 3317 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3318 UInt ecu = convert_nia_to_ecu ( nia ); 3319 tl_assert(VG_(is_plausible_ECU)(ecu)); 3320 otag = ecu | MC_OKIND_STACK; 3321 } else { 3322 tl_assert(nia == 0); 3323 otag = 0; 3324 } 3325 3326 # if 0 3327 /* Really slow version */ 3328 MC_(make_mem_undefined)(base, len, otag); 3329 # endif 3330 3331 # if 0 3332 /* Slow(ish) version, which is fairly easily seen to be correct. 3333 */ 3334 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) { 3335 make_aligned_word64_undefined(base + 0, otag); 3336 make_aligned_word64_undefined(base + 8, otag); 3337 make_aligned_word64_undefined(base + 16, otag); 3338 make_aligned_word64_undefined(base + 24, otag); 3339 3340 make_aligned_word64_undefined(base + 32, otag); 3341 make_aligned_word64_undefined(base + 40, otag); 3342 make_aligned_word64_undefined(base + 48, otag); 3343 make_aligned_word64_undefined(base + 56, otag); 3344 3345 make_aligned_word64_undefined(base + 64, otag); 3346 make_aligned_word64_undefined(base + 72, otag); 3347 make_aligned_word64_undefined(base + 80, otag); 3348 make_aligned_word64_undefined(base + 88, otag); 3349 3350 make_aligned_word64_undefined(base + 96, otag); 3351 make_aligned_word64_undefined(base + 104, otag); 3352 make_aligned_word64_undefined(base + 112, otag); 3353 make_aligned_word64_undefined(base + 120, otag); 3354 } else { 3355 MC_(make_mem_undefined)(base, len, otag); 3356 } 3357 # endif 3358 3359 /* Idea is: go fast when 3360 * 8-aligned and length is 128 3361 * the sm is available in the main primary map 3362 * the address range falls entirely with a single secondary map 3363 If all those conditions hold, just update the V+A bits by writing 3364 directly into the vabits array. (If the sm was distinguished, this 3365 will make a copy and then write to it.) 3366 */ 3367 3368 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) { 3369 /* Now we know the address range is suitably sized and aligned. */ 3370 UWord a_lo = (UWord)(base); 3371 UWord a_hi = (UWord)(base + 128 - 1); 3372 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3373 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3374 // Now we know the entire range is within the main primary map. 3375 SecMap* sm = get_secmap_for_writing_low(a_lo); 3376 SecMap* sm_hi = get_secmap_for_writing_low(a_hi); 3377 /* Now we know that the entire address range falls within a 3378 single secondary map, and that that secondary 'lives' in 3379 the main primary map. */ 3380 if (LIKELY(sm == sm_hi)) { 3381 // Finally, we know that the range is entirely within one secmap. 3382 UWord v_off = SM_OFF(a_lo); 3383 UShort* p = (UShort*)(&sm->vabits8[v_off]); 3384 p[ 0] = VA_BITS16_UNDEFINED; 3385 p[ 1] = VA_BITS16_UNDEFINED; 3386 p[ 2] = VA_BITS16_UNDEFINED; 3387 p[ 3] = VA_BITS16_UNDEFINED; 3388 p[ 4] = VA_BITS16_UNDEFINED; 3389 p[ 5] = VA_BITS16_UNDEFINED; 3390 p[ 6] = VA_BITS16_UNDEFINED; 3391 p[ 7] = VA_BITS16_UNDEFINED; 3392 p[ 8] = VA_BITS16_UNDEFINED; 3393 p[ 9] = VA_BITS16_UNDEFINED; 3394 p[10] = VA_BITS16_UNDEFINED; 3395 p[11] = VA_BITS16_UNDEFINED; 3396 p[12] = VA_BITS16_UNDEFINED; 3397 p[13] = VA_BITS16_UNDEFINED; 3398 p[14] = VA_BITS16_UNDEFINED; 3399 p[15] = VA_BITS16_UNDEFINED; 3400 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3401 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3402 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3403 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3404 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3405 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3406 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3407 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3408 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3409 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3410 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3411 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3412 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3413 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3414 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3415 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3416 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3417 } 3418 return; 3419 } 3420 } 3421 } 3422 3423 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */ 3424 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) { 3425 /* Now we know the address range is suitably sized and aligned. */ 3426 UWord a_lo = (UWord)(base); 3427 UWord a_hi = (UWord)(base + 288 - 1); 3428 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3429 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3430 // Now we know the entire range is within the main primary map. 3431 SecMap* sm = get_secmap_for_writing_low(a_lo); 3432 SecMap* sm_hi = get_secmap_for_writing_low(a_hi); 3433 /* Now we know that the entire address range falls within a 3434 single secondary map, and that that secondary 'lives' in 3435 the main primary map. */ 3436 if (LIKELY(sm == sm_hi)) { 3437 // Finally, we know that the range is entirely within one secmap. 3438 UWord v_off = SM_OFF(a_lo); 3439 UShort* p = (UShort*)(&sm->vabits8[v_off]); 3440 p[ 0] = VA_BITS16_UNDEFINED; 3441 p[ 1] = VA_BITS16_UNDEFINED; 3442 p[ 2] = VA_BITS16_UNDEFINED; 3443 p[ 3] = VA_BITS16_UNDEFINED; 3444 p[ 4] = VA_BITS16_UNDEFINED; 3445 p[ 5] = VA_BITS16_UNDEFINED; 3446 p[ 6] = VA_BITS16_UNDEFINED; 3447 p[ 7] = VA_BITS16_UNDEFINED; 3448 p[ 8] = VA_BITS16_UNDEFINED; 3449 p[ 9] = VA_BITS16_UNDEFINED; 3450 p[10] = VA_BITS16_UNDEFINED; 3451 p[11] = VA_BITS16_UNDEFINED; 3452 p[12] = VA_BITS16_UNDEFINED; 3453 p[13] = VA_BITS16_UNDEFINED; 3454 p[14] = VA_BITS16_UNDEFINED; 3455 p[15] = VA_BITS16_UNDEFINED; 3456 p[16] = VA_BITS16_UNDEFINED; 3457 p[17] = VA_BITS16_UNDEFINED; 3458 p[18] = VA_BITS16_UNDEFINED; 3459 p[19] = VA_BITS16_UNDEFINED; 3460 p[20] = VA_BITS16_UNDEFINED; 3461 p[21] = VA_BITS16_UNDEFINED; 3462 p[22] = VA_BITS16_UNDEFINED; 3463 p[23] = VA_BITS16_UNDEFINED; 3464 p[24] = VA_BITS16_UNDEFINED; 3465 p[25] = VA_BITS16_UNDEFINED; 3466 p[26] = VA_BITS16_UNDEFINED; 3467 p[27] = VA_BITS16_UNDEFINED; 3468 p[28] = VA_BITS16_UNDEFINED; 3469 p[29] = VA_BITS16_UNDEFINED; 3470 p[30] = VA_BITS16_UNDEFINED; 3471 p[31] = VA_BITS16_UNDEFINED; 3472 p[32] = VA_BITS16_UNDEFINED; 3473 p[33] = VA_BITS16_UNDEFINED; 3474 p[34] = VA_BITS16_UNDEFINED; 3475 p[35] = VA_BITS16_UNDEFINED; 3476 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3477 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3478 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3479 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3480 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3481 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3482 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3483 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3484 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3485 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3486 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3487 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3488 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3489 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3490 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3491 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3492 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3493 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag ); 3494 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag ); 3495 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag ); 3496 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag ); 3497 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag ); 3498 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag ); 3499 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag ); 3500 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag ); 3501 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag ); 3502 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag ); 3503 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag ); 3504 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag ); 3505 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag ); 3506 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag ); 3507 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag ); 3508 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag ); 3509 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag ); 3510 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag ); 3511 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag ); 3512 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag ); 3513 } 3514 return; 3515 } 3516 } 3517 } 3518 3519 /* else fall into slow case */ 3520 MC_(make_mem_undefined_w_otag)(base, len, otag); 3521 } 3522 3523 3524 /*------------------------------------------------------------*/ 3525 /*--- Checking memory ---*/ 3526 /*------------------------------------------------------------*/ 3527 3528 typedef 3529 enum { 3530 MC_Ok = 5, 3531 MC_AddrErr = 6, 3532 MC_ValueErr = 7 3533 } 3534 MC_ReadResult; 3535 3536 3537 /* Check permissions for address range. If inadequate permissions 3538 exist, *bad_addr is set to the offending address, so the caller can 3539 know what it is. */ 3540 3541 /* Returns True if [a .. a+len) is not addressible. Otherwise, 3542 returns False, and if bad_addr is non-NULL, sets *bad_addr to 3543 indicate the lowest failing address. Functions below are 3544 similar. */ 3545 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr ) 3546 { 3547 SizeT i; 3548 UWord vabits2; 3549 3550 PROF_EVENT(60, "check_mem_is_noaccess"); 3551 for (i = 0; i < len; i++) { 3552 PROF_EVENT(61, "check_mem_is_noaccess(loop)"); 3553 vabits2 = get_vabits2(a); 3554 if (VA_BITS2_NOACCESS != vabits2) { 3555 if (bad_addr != NULL) *bad_addr = a; 3556 return False; 3557 } 3558 a++; 3559 } 3560 return True; 3561 } 3562 3563 static Bool is_mem_addressable ( Addr a, SizeT len, 3564 /*OUT*/Addr* bad_addr ) 3565 { 3566 SizeT i; 3567 UWord vabits2; 3568 3569 PROF_EVENT(62, "is_mem_addressable"); 3570 for (i = 0; i < len; i++) { 3571 PROF_EVENT(63, "is_mem_addressable(loop)"); 3572 vabits2 = get_vabits2(a); 3573 if (VA_BITS2_NOACCESS == vabits2) { 3574 if (bad_addr != NULL) *bad_addr = a; 3575 return False; 3576 } 3577 a++; 3578 } 3579 return True; 3580 } 3581 3582 static MC_ReadResult is_mem_defined ( Addr a, SizeT len, 3583 /*OUT*/Addr* bad_addr, 3584 /*OUT*/UInt* otag ) 3585 { 3586 SizeT i; 3587 UWord vabits2; 3588 3589 PROF_EVENT(64, "is_mem_defined"); 3590 DEBUG("is_mem_defined\n"); 3591 3592 if (otag) *otag = 0; 3593 if (bad_addr) *bad_addr = 0; 3594 for (i = 0; i < len; i++) { 3595 PROF_EVENT(65, "is_mem_defined(loop)"); 3596 vabits2 = get_vabits2(a); 3597 if (VA_BITS2_DEFINED != vabits2) { 3598 // Error! Nb: Report addressability errors in preference to 3599 // definedness errors. And don't report definedeness errors unless 3600 // --undef-value-errors=yes. 3601 if (bad_addr) { 3602 *bad_addr = a; 3603 } 3604 if (VA_BITS2_NOACCESS == vabits2) { 3605 return MC_AddrErr; 3606 } 3607 if (MC_(clo_mc_level) >= 2) { 3608 if (otag && MC_(clo_mc_level) == 3) { 3609 *otag = MC_(helperc_b_load1)( a ); 3610 } 3611 return MC_ValueErr; 3612 } 3613 } 3614 a++; 3615 } 3616 return MC_Ok; 3617 } 3618 3619 3620 /* Like is_mem_defined but doesn't give up at the first uninitialised 3621 byte -- the entire range is always checked. This is important for 3622 detecting errors in the case where a checked range strays into 3623 invalid memory, but that fact is not detected by the ordinary 3624 is_mem_defined(), because of an undefined section that precedes the 3625 out of range section, possibly as a result of an alignment hole in 3626 the checked data. This version always checks the entire range and 3627 can report both a definedness and an accessbility error, if 3628 necessary. */ 3629 static void is_mem_defined_comprehensive ( 3630 Addr a, SizeT len, 3631 /*OUT*/Bool* errorV, /* is there a definedness err? */ 3632 /*OUT*/Addr* bad_addrV, /* if so where? */ 3633 /*OUT*/UInt* otagV, /* and what's its otag? */ 3634 /*OUT*/Bool* errorA, /* is there an addressability err? */ 3635 /*OUT*/Addr* bad_addrA /* if so where? */ 3636 ) 3637 { 3638 SizeT i; 3639 UWord vabits2; 3640 Bool already_saw_errV = False; 3641 3642 PROF_EVENT(64, "is_mem_defined"); // fixme 3643 DEBUG("is_mem_defined_comprehensive\n"); 3644 3645 tl_assert(!(*errorV || *errorA)); 3646 3647 for (i = 0; i < len; i++) { 3648 PROF_EVENT(65, "is_mem_defined(loop)"); // fixme 3649 vabits2 = get_vabits2(a); 3650 switch (vabits2) { 3651 case VA_BITS2_DEFINED: 3652 a++; 3653 break; 3654 case VA_BITS2_UNDEFINED: 3655 case VA_BITS2_PARTDEFINED: 3656 if (!already_saw_errV) { 3657 *errorV = True; 3658 *bad_addrV = a; 3659 if (MC_(clo_mc_level) == 3) { 3660 *otagV = MC_(helperc_b_load1)( a ); 3661 } else { 3662 *otagV = 0; 3663 } 3664 already_saw_errV = True; 3665 } 3666 a++; /* keep going */ 3667 break; 3668 case VA_BITS2_NOACCESS: 3669 *errorA = True; 3670 *bad_addrA = a; 3671 return; /* give up now. */ 3672 default: 3673 tl_assert(0); 3674 } 3675 } 3676 } 3677 3678 3679 /* Check a zero-terminated ascii string. Tricky -- don't want to 3680 examine the actual bytes, to find the end, until we're sure it is 3681 safe to do so. */ 3682 3683 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag ) 3684 { 3685 UWord vabits2; 3686 3687 PROF_EVENT(66, "mc_is_defined_asciiz"); 3688 DEBUG("mc_is_defined_asciiz\n"); 3689 3690 if (otag) *otag = 0; 3691 if (bad_addr) *bad_addr = 0; 3692 while (True) { 3693 PROF_EVENT(67, "mc_is_defined_asciiz(loop)"); 3694 vabits2 = get_vabits2(a); 3695 if (VA_BITS2_DEFINED != vabits2) { 3696 // Error! Nb: Report addressability errors in preference to 3697 // definedness errors. And don't report definedeness errors unless 3698 // --undef-value-errors=yes. 3699 if (bad_addr) { 3700 *bad_addr = a; 3701 } 3702 if (VA_BITS2_NOACCESS == vabits2) { 3703 return MC_AddrErr; 3704 } 3705 if (MC_(clo_mc_level) >= 2) { 3706 if (otag && MC_(clo_mc_level) == 3) { 3707 *otag = MC_(helperc_b_load1)( a ); 3708 } 3709 return MC_ValueErr; 3710 } 3711 } 3712 /* Ok, a is safe to read. */ 3713 if (* ((UChar*)a) == 0) { 3714 return MC_Ok; 3715 } 3716 a++; 3717 } 3718 } 3719 3720 3721 /*------------------------------------------------------------*/ 3722 /*--- Memory event handlers ---*/ 3723 /*------------------------------------------------------------*/ 3724 3725 static 3726 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s, 3727 Addr base, SizeT size ) 3728 { 3729 Addr bad_addr; 3730 Bool ok = is_mem_addressable ( base, size, &bad_addr ); 3731 3732 if (!ok) { 3733 switch (part) { 3734 case Vg_CoreSysCall: 3735 MC_(record_memparam_error) ( tid, bad_addr, 3736 /*isAddrErr*/True, s, 0/*otag*/ ); 3737 break; 3738 3739 case Vg_CoreSignal: 3740 MC_(record_core_mem_error)( tid, s ); 3741 break; 3742 3743 default: 3744 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart"); 3745 } 3746 } 3747 } 3748 3749 static 3750 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s, 3751 Addr base, SizeT size ) 3752 { 3753 UInt otag = 0; 3754 Addr bad_addr; 3755 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag ); 3756 3757 if (MC_Ok != res) { 3758 Bool isAddrErr = ( MC_AddrErr == res ? True : False ); 3759 3760 switch (part) { 3761 case Vg_CoreSysCall: 3762 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s, 3763 isAddrErr ? 0 : otag ); 3764 break; 3765 3766 case Vg_CoreSysCallArgInMem: 3767 MC_(record_regparam_error) ( tid, s, otag ); 3768 break; 3769 3770 /* If we're being asked to jump to a silly address, record an error 3771 message before potentially crashing the entire system. */ 3772 case Vg_CoreTranslate: 3773 MC_(record_jump_error)( tid, bad_addr ); 3774 break; 3775 3776 default: 3777 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart"); 3778 } 3779 } 3780 } 3781 3782 static 3783 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid, 3784 Char* s, Addr str ) 3785 { 3786 MC_ReadResult res; 3787 Addr bad_addr = 0; // shut GCC up 3788 UInt otag = 0; 3789 3790 tl_assert(part == Vg_CoreSysCall); 3791 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag ); 3792 if (MC_Ok != res) { 3793 Bool isAddrErr = ( MC_AddrErr == res ? True : False ); 3794 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s, 3795 isAddrErr ? 0 : otag ); 3796 } 3797 } 3798 3799 /* Handling of mmap and mprotect is not as simple as it seems. 3800 3801 The underlying semantics are that memory obtained from mmap is 3802 always initialised, but may be inaccessible. And changes to the 3803 protection of memory do not change its contents and hence not its 3804 definedness state. Problem is we can't model 3805 inaccessible-but-with-some-definedness state; once we mark memory 3806 as inaccessible we lose all info about definedness, and so can't 3807 restore that if it is later made accessible again. 3808 3809 One obvious thing to do is this: 3810 3811 mmap/mprotect NONE -> noaccess 3812 mmap/mprotect other -> defined 3813 3814 The problem case here is: taking accessible memory, writing 3815 uninitialised data to it, mprotecting it NONE and later mprotecting 3816 it back to some accessible state causes the undefinedness to be 3817 lost. 3818 3819 A better proposal is: 3820 3821 (1) mmap NONE -> make noaccess 3822 (2) mmap other -> make defined 3823 3824 (3) mprotect NONE -> # no change 3825 (4) mprotect other -> change any "noaccess" to "defined" 3826 3827 (2) is OK because memory newly obtained from mmap really is defined 3828 (zeroed out by the kernel -- doing anything else would 3829 constitute a massive security hole.) 3830 3831 (1) is OK because the only way to make the memory usable is via 3832 (4), in which case we also wind up correctly marking it all as 3833 defined. 3834 3835 (3) is the weak case. We choose not to change memory state. 3836 (presumably the range is in some mixture of "defined" and 3837 "undefined", viz, accessible but with arbitrary V bits). Doing 3838 nothing means we retain the V bits, so that if the memory is 3839 later mprotected "other", the V bits remain unchanged, so there 3840 can be no false negatives. The bad effect is that if there's 3841 an access in the area, then MC cannot warn; but at least we'll 3842 get a SEGV to show, so it's better than nothing. 3843 3844 Consider the sequence (3) followed by (4). Any memory that was 3845 "defined" or "undefined" previously retains its state (as 3846 required). Any memory that was "noaccess" before can only have 3847 been made that way by (1), and so it's OK to change it to 3848 "defined". 3849 3850 See https://bugs.kde.org/show_bug.cgi?id=205541 3851 and https://bugs.kde.org/show_bug.cgi?id=210268 3852 */ 3853 static 3854 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx, 3855 ULong di_handle ) 3856 { 3857 if (rr || ww || xx) { 3858 /* (2) mmap/mprotect other -> defined */ 3859 MC_(make_mem_defined)(a, len); 3860 } else { 3861 /* (1) mmap/mprotect NONE -> noaccess */ 3862 MC_(make_mem_noaccess)(a, len); 3863 } 3864 } 3865 3866 static 3867 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx ) 3868 { 3869 if (rr || ww || xx) { 3870 /* (4) mprotect other -> change any "noaccess" to "defined" */ 3871 make_mem_defined_if_noaccess(a, len); 3872 } else { 3873 /* (3) mprotect NONE -> # no change */ 3874 /* do nothing */ 3875 } 3876 } 3877 3878 3879 static 3880 void mc_new_mem_startup( Addr a, SizeT len, 3881 Bool rr, Bool ww, Bool xx, ULong di_handle ) 3882 { 3883 // Because code is defined, initialised variables get put in the data 3884 // segment and are defined, and uninitialised variables get put in the 3885 // bss segment and are auto-zeroed (and so defined). 3886 // 3887 // It's possible that there will be padding between global variables. 3888 // This will also be auto-zeroed, and marked as defined by Memcheck. If 3889 // a program uses it, Memcheck will not complain. This is arguably a 3890 // false negative, but it's a grey area -- the behaviour is defined (the 3891 // padding is zeroed) but it's probably not what the user intended. And 3892 // we can't avoid it. 3893 // 3894 // Note: we generally ignore RWX permissions, because we can't track them 3895 // without requiring more than one A bit which would slow things down a 3896 // lot. But on Darwin the 0th page is mapped but !R and !W and !X. 3897 // So we mark any such pages as "unaddressable". 3898 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n", 3899 a, (ULong)len, rr, ww, xx); 3900 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle); 3901 } 3902 3903 static 3904 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len) 3905 { 3906 MC_(make_mem_defined)(a, len); 3907 } 3908 3909 3910 /*------------------------------------------------------------*/ 3911 /*--- Register event handlers ---*/ 3912 /*------------------------------------------------------------*/ 3913 3914 /* Try and get a nonzero origin for the guest state section of thread 3915 tid characterised by (offset,size). Return 0 if nothing to show 3916 for it. */ 3917 static UInt mb_get_origin_for_guest_offset ( ThreadId tid, 3918 Int offset, SizeT size ) 3919 { 3920 Int sh2off; 3921 UInt area[3]; 3922 UInt otag; 3923 sh2off = MC_(get_otrack_shadow_offset)( offset, size ); 3924 if (sh2off == -1) 3925 return 0; /* This piece of guest state is not tracked */ 3926 tl_assert(sh2off >= 0); 3927 tl_assert(0 == (sh2off % 4)); 3928 area[0] = 0x31313131; 3929 area[2] = 0x27272727; 3930 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 ); 3931 tl_assert(area[0] == 0x31313131); 3932 tl_assert(area[2] == 0x27272727); 3933 otag = area[1]; 3934 return otag; 3935 } 3936 3937 3938 /* When some chunk of guest state is written, mark the corresponding 3939 shadow area as valid. This is used to initialise arbitrarily large 3940 chunks of guest state, hence the _SIZE value, which has to be as 3941 big as the biggest guest state. 3942 */ 3943 static void mc_post_reg_write ( CorePart part, ThreadId tid, 3944 PtrdiffT offset, SizeT size) 3945 { 3946 # define MAX_REG_WRITE_SIZE 1696 3947 UChar area[MAX_REG_WRITE_SIZE]; 3948 tl_assert(size <= MAX_REG_WRITE_SIZE); 3949 VG_(memset)(area, V_BITS8_DEFINED, size); 3950 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area ); 3951 # undef MAX_REG_WRITE_SIZE 3952 } 3953 3954 static 3955 void mc_post_reg_write_clientcall ( ThreadId tid, 3956 PtrdiffT offset, SizeT size, Addr f) 3957 { 3958 mc_post_reg_write(/*dummy*/0, tid, offset, size); 3959 } 3960 3961 /* Look at the definedness of the guest's shadow state for 3962 [offset, offset+len). If any part of that is undefined, record 3963 a parameter error. 3964 */ 3965 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s, 3966 PtrdiffT offset, SizeT size) 3967 { 3968 Int i; 3969 Bool bad; 3970 UInt otag; 3971 3972 UChar area[16]; 3973 tl_assert(size <= 16); 3974 3975 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size ); 3976 3977 bad = False; 3978 for (i = 0; i < size; i++) { 3979 if (area[i] != V_BITS8_DEFINED) { 3980 bad = True; 3981 break; 3982 } 3983 } 3984 3985 if (!bad) 3986 return; 3987 3988 /* We've found some undefinedness. See if we can also find an 3989 origin for it. */ 3990 otag = mb_get_origin_for_guest_offset( tid, offset, size ); 3991 MC_(record_regparam_error) ( tid, s, otag ); 3992 } 3993 3994 3995 /*------------------------------------------------------------*/ 3996 /*--- Functions called directly from generated code: ---*/ 3997 /*--- Load/store handlers. ---*/ 3998 /*------------------------------------------------------------*/ 3999 4000 /* Types: LOADV32, LOADV16, LOADV8 are: 4001 UWord fn ( Addr a ) 4002 so they return 32-bits on 32-bit machines and 64-bits on 4003 64-bit machines. Addr has the same size as a host word. 4004 4005 LOADV64 is always ULong fn ( Addr a ) 4006 4007 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits 4008 are a UWord, and for STOREV64 they are a ULong. 4009 */ 4010 4011 /* If any part of '_a' indicated by the mask is 1, either '_a' is not 4012 naturally '_sz/8'-aligned, or it exceeds the range covered by the 4013 primary map. This is all very tricky (and important!), so let's 4014 work through the maths by hand (below), *and* assert for these 4015 values at startup. */ 4016 #define MASK(_szInBytes) \ 4017 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) ) 4018 4019 /* MASK only exists so as to define this macro. */ 4020 #define UNALIGNED_OR_HIGH(_a,_szInBits) \ 4021 ((_a) & MASK((_szInBits>>3))) 4022 4023 /* On a 32-bit machine: 4024 4025 N_PRIMARY_BITS == 16, so 4026 N_PRIMARY_MAP == 0x10000, so 4027 N_PRIMARY_MAP-1 == 0xFFFF, so 4028 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so 4029 4030 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 ) 4031 = ~ ( 0xFFFF | 0xFFFF0000 ) 4032 = ~ 0xFFFF'FFFF 4033 = 0 4034 4035 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 ) 4036 = ~ ( 0xFFFE | 0xFFFF0000 ) 4037 = ~ 0xFFFF'FFFE 4038 = 1 4039 4040 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 ) 4041 = ~ ( 0xFFFC | 0xFFFF0000 ) 4042 = ~ 0xFFFF'FFFC 4043 = 3 4044 4045 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 ) 4046 = ~ ( 0xFFF8 | 0xFFFF0000 ) 4047 = ~ 0xFFFF'FFF8 4048 = 7 4049 4050 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value 4051 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for 4052 the 1-byte alignment case, it is always a zero value, since MASK(1) 4053 is zero. All as expected. 4054 4055 On a 64-bit machine, it's more complex, since we're testing 4056 simultaneously for misalignment and for the address being at or 4057 above 32G: 4058 4059 N_PRIMARY_BITS == 19, so 4060 N_PRIMARY_MAP == 0x80000, so 4061 N_PRIMARY_MAP-1 == 0x7FFFF, so 4062 (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so 4063 4064 MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 ) 4065 = ~ ( 0xFFFF | 0x7FFFF'0000 ) 4066 = ~ 0x7FFFF'FFFF 4067 = 0xFFFF'FFF8'0000'0000 4068 4069 MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 ) 4070 = ~ ( 0xFFFE | 0x7FFFF'0000 ) 4071 = ~ 0x7FFFF'FFFE 4072 = 0xFFFF'FFF8'0000'0001 4073 4074 MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 ) 4075 = ~ ( 0xFFFC | 0x7FFFF'0000 ) 4076 = ~ 0x7FFFF'FFFC 4077 = 0xFFFF'FFF8'0000'0003 4078 4079 MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 ) 4080 = ~ ( 0xFFF8 | 0x7FFFF'0000 ) 4081 = ~ 0x7FFFF'FFF8 4082 = 0xFFFF'FFF8'0000'0007 4083 */ 4084 4085 4086 /* ------------------------ Size = 8 ------------------------ */ 4087 4088 static INLINE 4089 ULong mc_LOADV64 ( Addr a, Bool isBigEndian ) 4090 { 4091 PROF_EVENT(200, "mc_LOADV64"); 4092 4093 #ifndef PERF_FAST_LOADV 4094 return mc_LOADVn_slow( a, 64, isBigEndian ); 4095 #else 4096 { 4097 UWord sm_off16, vabits16; 4098 SecMap* sm; 4099 4100 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) { 4101 PROF_EVENT(201, "mc_LOADV64-slow1"); 4102 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian ); 4103 } 4104 4105 sm = get_secmap_for_reading_low(a); 4106 sm_off16 = SM_OFF_16(a); 4107 vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 4108 4109 // Handle common case quickly: a is suitably aligned, is mapped, and 4110 // addressible. 4111 // Convert V bits from compact memory form to expanded register form. 4112 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) { 4113 return V_BITS64_DEFINED; 4114 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) { 4115 return V_BITS64_UNDEFINED; 4116 } else { 4117 /* Slow case: the 8 bytes are not all-defined or all-undefined. */ 4118 PROF_EVENT(202, "mc_LOADV64-slow2"); 4119 return mc_LOADVn_slow( a, 64, isBigEndian ); 4120 } 4121 } 4122 #endif 4123 } 4124 4125 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a ) 4126 { 4127 return mc_LOADV64(a, True); 4128 } 4129 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a ) 4130 { 4131 return mc_LOADV64(a, False); 4132 } 4133 4134 4135 static INLINE 4136 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian ) 4137 { 4138 PROF_EVENT(210, "mc_STOREV64"); 4139 4140 #ifndef PERF_FAST_STOREV 4141 // XXX: this slow case seems to be marginally faster than the fast case! 4142 // Investigate further. 4143 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4144 #else 4145 { 4146 UWord sm_off16, vabits16; 4147 SecMap* sm; 4148 4149 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) { 4150 PROF_EVENT(211, "mc_STOREV64-slow1"); 4151 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4152 return; 4153 } 4154 4155 sm = get_secmap_for_reading_low(a); 4156 sm_off16 = SM_OFF_16(a); 4157 vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 4158 4159 if (LIKELY( !is_distinguished_sm(sm) && 4160 (VA_BITS16_DEFINED == vabits16 || 4161 VA_BITS16_UNDEFINED == vabits16) )) 4162 { 4163 /* Handle common case quickly: a is suitably aligned, */ 4164 /* is mapped, and is addressible. */ 4165 // Convert full V-bits in register to compact 2-bit form. 4166 if (V_BITS64_DEFINED == vbits64) { 4167 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED; 4168 } else if (V_BITS64_UNDEFINED == vbits64) { 4169 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED; 4170 } else { 4171 /* Slow but general case -- writing partially defined bytes. */ 4172 PROF_EVENT(212, "mc_STOREV64-slow2"); 4173 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4174 } 4175 } else { 4176 /* Slow but general case. */ 4177 PROF_EVENT(213, "mc_STOREV64-slow3"); 4178 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4179 } 4180 } 4181 #endif 4182 } 4183 4184 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 ) 4185 { 4186 mc_STOREV64(a, vbits64, True); 4187 } 4188 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 ) 4189 { 4190 mc_STOREV64(a, vbits64, False); 4191 } 4192 4193 4194 /* ------------------------ Size = 4 ------------------------ */ 4195 4196 static INLINE 4197 UWord mc_LOADV32 ( Addr a, Bool isBigEndian ) 4198 { 4199 PROF_EVENT(220, "mc_LOADV32"); 4200 4201 #ifndef PERF_FAST_LOADV 4202 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 4203 #else 4204 { 4205 UWord sm_off, vabits8; 4206 SecMap* sm; 4207 4208 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) { 4209 PROF_EVENT(221, "mc_LOADV32-slow1"); 4210 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 4211 } 4212 4213 sm = get_secmap_for_reading_low(a); 4214 sm_off = SM_OFF(a); 4215 vabits8 = sm->vabits8[sm_off]; 4216 4217 // Handle common case quickly: a is suitably aligned, is mapped, and the 4218 // entire word32 it lives in is addressible. 4219 // Convert V bits from compact memory form to expanded register form. 4220 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined). 4221 // Almost certainly not necessary, but be paranoid. 4222 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) { 4223 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED); 4224 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { 4225 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED); 4226 } else { 4227 /* Slow case: the 4 bytes are not all-defined or all-undefined. */ 4228 PROF_EVENT(222, "mc_LOADV32-slow2"); 4229 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 4230 } 4231 } 4232 #endif 4233 } 4234 4235 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a ) 4236 { 4237 return mc_LOADV32(a, True); 4238 } 4239 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a ) 4240 { 4241 return mc_LOADV32(a, False); 4242 } 4243 4244 4245 static INLINE 4246 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian ) 4247 { 4248 PROF_EVENT(230, "mc_STOREV32"); 4249 4250 #ifndef PERF_FAST_STOREV 4251 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4252 #else 4253 { 4254 UWord sm_off, vabits8; 4255 SecMap* sm; 4256 4257 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) { 4258 PROF_EVENT(231, "mc_STOREV32-slow1"); 4259 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4260 return; 4261 } 4262 4263 sm = get_secmap_for_reading_low(a); 4264 sm_off = SM_OFF(a); 4265 vabits8 = sm->vabits8[sm_off]; 4266 4267 // Cleverness: sometimes we don't have to write the shadow memory at 4268 // all, if we can tell that what we want to write is the same as what is 4269 // already there. The 64/16/8 bit cases also have cleverness at this 4270 // point, but it works a little differently to the code below. 4271 if (V_BITS32_DEFINED == vbits32) { 4272 if (vabits8 == (UInt)VA_BITS8_DEFINED) { 4273 return; 4274 } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) { 4275 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED; 4276 } else { 4277 // not defined/undefined, or distinguished and changing state 4278 PROF_EVENT(232, "mc_STOREV32-slow2"); 4279 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4280 } 4281 } else if (V_BITS32_UNDEFINED == vbits32) { 4282 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) { 4283 return; 4284 } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) { 4285 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED; 4286 } else { 4287 // not defined/undefined, or distinguished and changing state 4288 PROF_EVENT(233, "mc_STOREV32-slow3"); 4289 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4290 } 4291 } else { 4292 // Partially defined word 4293 PROF_EVENT(234, "mc_STOREV32-slow4"); 4294 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 4295 } 4296 } 4297 #endif 4298 } 4299 4300 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 ) 4301 { 4302 mc_STOREV32(a, vbits32, True); 4303 } 4304 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 ) 4305 { 4306 mc_STOREV32(a, vbits32, False); 4307 } 4308 4309 4310 /* ------------------------ Size = 2 ------------------------ */ 4311 4312 static INLINE 4313 UWord mc_LOADV16 ( Addr a, Bool isBigEndian ) 4314 { 4315 PROF_EVENT(240, "mc_LOADV16"); 4316 4317 #ifndef PERF_FAST_LOADV 4318 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 4319 #else 4320 { 4321 UWord sm_off, vabits8; 4322 SecMap* sm; 4323 4324 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) { 4325 PROF_EVENT(241, "mc_LOADV16-slow1"); 4326 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 4327 } 4328 4329 sm = get_secmap_for_reading_low(a); 4330 sm_off = SM_OFF(a); 4331 vabits8 = sm->vabits8[sm_off]; 4332 // Handle common case quickly: a is suitably aligned, is mapped, and is 4333 // addressible. 4334 // Convert V bits from compact memory form to expanded register form 4335 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; } 4336 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; } 4337 else { 4338 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check 4339 // the two sub-bytes. 4340 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8); 4341 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; } 4342 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; } 4343 else { 4344 /* Slow case: the two bytes are not all-defined or all-undefined. */ 4345 PROF_EVENT(242, "mc_LOADV16-slow2"); 4346 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 4347 } 4348 } 4349 } 4350 #endif 4351 } 4352 4353 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a ) 4354 { 4355 return mc_LOADV16(a, True); 4356 } 4357 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a ) 4358 { 4359 return mc_LOADV16(a, False); 4360 } 4361 4362 4363 static INLINE 4364 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian ) 4365 { 4366 PROF_EVENT(250, "mc_STOREV16"); 4367 4368 #ifndef PERF_FAST_STOREV 4369 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4370 #else 4371 { 4372 UWord sm_off, vabits8; 4373 SecMap* sm; 4374 4375 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) { 4376 PROF_EVENT(251, "mc_STOREV16-slow1"); 4377 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4378 return; 4379 } 4380 4381 sm = get_secmap_for_reading_low(a); 4382 sm_off = SM_OFF(a); 4383 vabits8 = sm->vabits8[sm_off]; 4384 if (LIKELY( !is_distinguished_sm(sm) && 4385 (VA_BITS8_DEFINED == vabits8 || 4386 VA_BITS8_UNDEFINED == vabits8) )) 4387 { 4388 /* Handle common case quickly: a is suitably aligned, */ 4389 /* is mapped, and is addressible. */ 4390 // Convert full V-bits in register to compact 2-bit form. 4391 if (V_BITS16_DEFINED == vbits16) { 4392 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED , 4393 &(sm->vabits8[sm_off]) ); 4394 } else if (V_BITS16_UNDEFINED == vbits16) { 4395 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED, 4396 &(sm->vabits8[sm_off]) ); 4397 } else { 4398 /* Slow but general case -- writing partially defined bytes. */ 4399 PROF_EVENT(252, "mc_STOREV16-slow2"); 4400 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4401 } 4402 } else { 4403 /* Slow but general case. */ 4404 PROF_EVENT(253, "mc_STOREV16-slow3"); 4405 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 4406 } 4407 } 4408 #endif 4409 } 4410 4411 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 ) 4412 { 4413 mc_STOREV16(a, vbits16, True); 4414 } 4415 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 ) 4416 { 4417 mc_STOREV16(a, vbits16, False); 4418 } 4419 4420 4421 /* ------------------------ Size = 1 ------------------------ */ 4422 /* Note: endianness is irrelevant for size == 1 */ 4423 4424 VG_REGPARM(1) 4425 UWord MC_(helperc_LOADV8) ( Addr a ) 4426 { 4427 PROF_EVENT(260, "mc_LOADV8"); 4428 4429 #ifndef PERF_FAST_LOADV 4430 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 4431 #else 4432 { 4433 UWord sm_off, vabits8; 4434 SecMap* sm; 4435 4436 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) { 4437 PROF_EVENT(261, "mc_LOADV8-slow1"); 4438 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 4439 } 4440 4441 sm = get_secmap_for_reading_low(a); 4442 sm_off = SM_OFF(a); 4443 vabits8 = sm->vabits8[sm_off]; 4444 // Convert V bits from compact memory form to expanded register form 4445 // Handle common case quickly: a is mapped, and the entire 4446 // word32 it lives in is addressible. 4447 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; } 4448 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; } 4449 else { 4450 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check 4451 // the single byte. 4452 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8); 4453 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; } 4454 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; } 4455 else { 4456 /* Slow case: the byte is not all-defined or all-undefined. */ 4457 PROF_EVENT(262, "mc_LOADV8-slow2"); 4458 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 4459 } 4460 } 4461 } 4462 #endif 4463 } 4464 4465 4466 VG_REGPARM(2) 4467 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 ) 4468 { 4469 PROF_EVENT(270, "mc_STOREV8"); 4470 4471 #ifndef PERF_FAST_STOREV 4472 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4473 #else 4474 { 4475 UWord sm_off, vabits8; 4476 SecMap* sm; 4477 4478 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) { 4479 PROF_EVENT(271, "mc_STOREV8-slow1"); 4480 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4481 return; 4482 } 4483 4484 sm = get_secmap_for_reading_low(a); 4485 sm_off = SM_OFF(a); 4486 vabits8 = sm->vabits8[sm_off]; 4487 if (LIKELY 4488 ( !is_distinguished_sm(sm) && 4489 ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8) 4490 || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) 4491 ) 4492 ) 4493 ) 4494 { 4495 /* Handle common case quickly: a is mapped, the entire word32 it 4496 lives in is addressible. */ 4497 // Convert full V-bits in register to compact 2-bit form. 4498 if (V_BITS8_DEFINED == vbits8) { 4499 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED, 4500 &(sm->vabits8[sm_off]) ); 4501 } else if (V_BITS8_UNDEFINED == vbits8) { 4502 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED, 4503 &(sm->vabits8[sm_off]) ); 4504 } else { 4505 /* Slow but general case -- writing partially defined bytes. */ 4506 PROF_EVENT(272, "mc_STOREV8-slow2"); 4507 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4508 } 4509 } else { 4510 /* Slow but general case. */ 4511 PROF_EVENT(273, "mc_STOREV8-slow3"); 4512 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 4513 } 4514 } 4515 #endif 4516 } 4517 4518 4519 /*------------------------------------------------------------*/ 4520 /*--- Functions called directly from generated code: ---*/ 4521 /*--- Value-check failure handlers. ---*/ 4522 /*------------------------------------------------------------*/ 4523 4524 /* Call these ones when an origin is available ... */ 4525 VG_REGPARM(1) 4526 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) { 4527 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin ); 4528 } 4529 4530 VG_REGPARM(1) 4531 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) { 4532 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin ); 4533 } 4534 4535 VG_REGPARM(1) 4536 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) { 4537 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin ); 4538 } 4539 4540 VG_REGPARM(1) 4541 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) { 4542 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin ); 4543 } 4544 4545 VG_REGPARM(2) 4546 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) { 4547 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin ); 4548 } 4549 4550 /* ... and these when an origin isn't available. */ 4551 4552 VG_REGPARM(0) 4553 void MC_(helperc_value_check0_fail_no_o) ( void ) { 4554 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ ); 4555 } 4556 4557 VG_REGPARM(0) 4558 void MC_(helperc_value_check1_fail_no_o) ( void ) { 4559 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ ); 4560 } 4561 4562 VG_REGPARM(0) 4563 void MC_(helperc_value_check4_fail_no_o) ( void ) { 4564 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ ); 4565 } 4566 4567 VG_REGPARM(0) 4568 void MC_(helperc_value_check8_fail_no_o) ( void ) { 4569 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ ); 4570 } 4571 4572 VG_REGPARM(1) 4573 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) { 4574 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ ); 4575 } 4576 4577 4578 /*------------------------------------------------------------*/ 4579 /*--- Metadata get/set functions, for client requests. ---*/ 4580 /*------------------------------------------------------------*/ 4581 4582 // Nb: this expands the V+A bits out into register-form V bits, even though 4583 // they're in memory. This is for backward compatibility, and because it's 4584 // probably what the user wants. 4585 4586 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment 4587 error [no longer used], 3 == addressing error. */ 4588 /* Nb: We used to issue various definedness/addressability errors from here, 4589 but we took them out because they ranged from not-very-helpful to 4590 downright annoying, and they complicated the error data structures. */ 4591 static Int mc_get_or_set_vbits_for_client ( 4592 Addr a, 4593 Addr vbits, 4594 SizeT szB, 4595 Bool setting, /* True <=> set vbits, False <=> get vbits */ 4596 Bool is_client_request /* True <=> real user request 4597 False <=> internal call from gdbserver */ 4598 ) 4599 { 4600 SizeT i; 4601 Bool ok; 4602 UChar vbits8; 4603 4604 /* Check that arrays are addressible before doing any getting/setting. 4605 vbits to be checked only for real user request. */ 4606 for (i = 0; i < szB; i++) { 4607 if (VA_BITS2_NOACCESS == get_vabits2(a + i) || 4608 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) { 4609 return 3; 4610 } 4611 } 4612 4613 /* Do the copy */ 4614 if (setting) { 4615 /* setting */ 4616 for (i = 0; i < szB; i++) { 4617 ok = set_vbits8(a + i, ((UChar*)vbits)[i]); 4618 tl_assert(ok); 4619 } 4620 } else { 4621 /* getting */ 4622 for (i = 0; i < szB; i++) { 4623 ok = get_vbits8(a + i, &vbits8); 4624 tl_assert(ok); 4625 ((UChar*)vbits)[i] = vbits8; 4626 } 4627 if (is_client_request) 4628 // The bytes in vbits[] have now been set, so mark them as such. 4629 MC_(make_mem_defined)(vbits, szB); 4630 } 4631 4632 return 1; 4633 } 4634 4635 4636 /*------------------------------------------------------------*/ 4637 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/ 4638 /*------------------------------------------------------------*/ 4639 4640 /* For the memory leak detector, say whether an entire 64k chunk of 4641 address space is possibly in use, or not. If in doubt return 4642 True. 4643 */ 4644 Bool MC_(is_within_valid_secondary) ( Addr a ) 4645 { 4646 SecMap* sm = maybe_get_secmap_for ( a ); 4647 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) { 4648 /* Definitely not in use. */ 4649 return False; 4650 } else { 4651 return True; 4652 } 4653 } 4654 4655 4656 /* For the memory leak detector, say whether or not a given word 4657 address is to be regarded as valid. */ 4658 Bool MC_(is_valid_aligned_word) ( Addr a ) 4659 { 4660 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8); 4661 tl_assert(VG_IS_WORD_ALIGNED(a)); 4662 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED) 4663 return False; 4664 if (sizeof(UWord) == 8) { 4665 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED) 4666 return False; 4667 } 4668 if (UNLIKELY(MC_(in_ignored_range)(a))) 4669 return False; 4670 else 4671 return True; 4672 } 4673 4674 4675 /*------------------------------------------------------------*/ 4676 /*--- Initialisation ---*/ 4677 /*------------------------------------------------------------*/ 4678 4679 static void init_shadow_memory ( void ) 4680 { 4681 Int i; 4682 SecMap* sm; 4683 4684 tl_assert(V_BIT_UNDEFINED == 1); 4685 tl_assert(V_BIT_DEFINED == 0); 4686 tl_assert(V_BITS8_UNDEFINED == 0xFF); 4687 tl_assert(V_BITS8_DEFINED == 0); 4688 4689 /* Build the 3 distinguished secondaries */ 4690 sm = &sm_distinguished[SM_DIST_NOACCESS]; 4691 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS; 4692 4693 sm = &sm_distinguished[SM_DIST_UNDEFINED]; 4694 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED; 4695 4696 sm = &sm_distinguished[SM_DIST_DEFINED]; 4697 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED; 4698 4699 /* Set up the primary map. */ 4700 /* These entries gradually get overwritten as the used address 4701 space expands. */ 4702 for (i = 0; i < N_PRIMARY_MAP; i++) 4703 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS]; 4704 4705 /* Auxiliary primary maps */ 4706 init_auxmap_L1_L2(); 4707 4708 /* auxmap_size = auxmap_used = 0; 4709 no ... these are statically initialised */ 4710 4711 /* Secondary V bit table */ 4712 secVBitTable = createSecVBitTable(); 4713 } 4714 4715 4716 /*------------------------------------------------------------*/ 4717 /*--- Sanity check machinery (permanently engaged) ---*/ 4718 /*------------------------------------------------------------*/ 4719 4720 static Bool mc_cheap_sanity_check ( void ) 4721 { 4722 n_sanity_cheap++; 4723 PROF_EVENT(490, "cheap_sanity_check"); 4724 /* Check for sane operating level */ 4725 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3) 4726 return False; 4727 /* nothing else useful we can rapidly check */ 4728 return True; 4729 } 4730 4731 static Bool mc_expensive_sanity_check ( void ) 4732 { 4733 Int i; 4734 Word n_secmaps_found; 4735 SecMap* sm; 4736 HChar* errmsg; 4737 Bool bad = False; 4738 4739 if (0) VG_(printf)("expensive sanity check\n"); 4740 if (0) return True; 4741 4742 n_sanity_expensive++; 4743 PROF_EVENT(491, "expensive_sanity_check"); 4744 4745 /* Check for sane operating level */ 4746 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3) 4747 return False; 4748 4749 /* Check that the 3 distinguished SMs are still as they should be. */ 4750 4751 /* Check noaccess DSM. */ 4752 sm = &sm_distinguished[SM_DIST_NOACCESS]; 4753 for (i = 0; i < SM_CHUNKS; i++) 4754 if (sm->vabits8[i] != VA_BITS8_NOACCESS) 4755 bad = True; 4756 4757 /* Check undefined DSM. */ 4758 sm = &sm_distinguished[SM_DIST_UNDEFINED]; 4759 for (i = 0; i < SM_CHUNKS; i++) 4760 if (sm->vabits8[i] != VA_BITS8_UNDEFINED) 4761 bad = True; 4762 4763 /* Check defined DSM. */ 4764 sm = &sm_distinguished[SM_DIST_DEFINED]; 4765 for (i = 0; i < SM_CHUNKS; i++) 4766 if (sm->vabits8[i] != VA_BITS8_DEFINED) 4767 bad = True; 4768 4769 if (bad) { 4770 VG_(printf)("memcheck expensive sanity: " 4771 "distinguished_secondaries have changed\n"); 4772 return False; 4773 } 4774 4775 /* If we're not checking for undefined value errors, the secondary V bit 4776 * table should be empty. */ 4777 if (MC_(clo_mc_level) == 1) { 4778 if (0 != VG_(OSetGen_Size)(secVBitTable)) 4779 return False; 4780 } 4781 4782 /* check the auxiliary maps, very thoroughly */ 4783 n_secmaps_found = 0; 4784 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found ); 4785 if (errmsg) { 4786 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg); 4787 return False; 4788 } 4789 4790 /* n_secmaps_found is now the number referred to by the auxiliary 4791 primary map. Now add on the ones referred to by the main 4792 primary map. */ 4793 for (i = 0; i < N_PRIMARY_MAP; i++) { 4794 if (primary_map[i] == NULL) { 4795 bad = True; 4796 } else { 4797 if (!is_distinguished_sm(primary_map[i])) 4798 n_secmaps_found++; 4799 } 4800 } 4801 4802 /* check that the number of secmaps issued matches the number that 4803 are reachable (iow, no secmap leaks) */ 4804 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs)) 4805 bad = True; 4806 4807 if (bad) { 4808 VG_(printf)("memcheck expensive sanity: " 4809 "apparent secmap leakage\n"); 4810 return False; 4811 } 4812 4813 if (bad) { 4814 VG_(printf)("memcheck expensive sanity: " 4815 "auxmap covers wrong address space\n"); 4816 return False; 4817 } 4818 4819 /* there is only one pointer to each secmap (expensive) */ 4820 4821 return True; 4822 } 4823 4824 /*------------------------------------------------------------*/ 4825 /*--- Command line args ---*/ 4826 /*------------------------------------------------------------*/ 4827 4828 Bool MC_(clo_partial_loads_ok) = False; 4829 Long MC_(clo_freelist_vol) = 20*1000*1000LL; 4830 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL; 4831 LeakCheckMode MC_(clo_leak_check) = LC_Summary; 4832 VgRes MC_(clo_leak_resolution) = Vg_HighRes; 4833 Bool MC_(clo_show_reachable) = False; 4834 Bool MC_(clo_show_possibly_lost) = True; 4835 Bool MC_(clo_workaround_gcc296_bugs) = False; 4836 Int MC_(clo_malloc_fill) = -1; 4837 Int MC_(clo_free_fill) = -1; 4838 Int MC_(clo_mc_level) = 2; 4839 4840 static Bool mc_process_cmd_line_options(Char* arg) 4841 { 4842 Char* tmp_str; 4843 4844 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 ); 4845 4846 /* Set MC_(clo_mc_level): 4847 1 = A bit tracking only 4848 2 = A and V bit tracking, but no V bit origins 4849 3 = A and V bit tracking, and V bit origins 4850 4851 Do this by inspecting --undef-value-errors= and 4852 --track-origins=. Reject the case --undef-value-errors=no 4853 --track-origins=yes as meaningless. 4854 */ 4855 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) { 4856 if (MC_(clo_mc_level) == 3) { 4857 goto bad_level; 4858 } else { 4859 MC_(clo_mc_level) = 1; 4860 return True; 4861 } 4862 } 4863 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) { 4864 if (MC_(clo_mc_level) == 1) 4865 MC_(clo_mc_level) = 2; 4866 return True; 4867 } 4868 if (0 == VG_(strcmp)(arg, "--track-origins=no")) { 4869 if (MC_(clo_mc_level) == 3) 4870 MC_(clo_mc_level) = 2; 4871 return True; 4872 } 4873 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) { 4874 if (MC_(clo_mc_level) == 1) { 4875 goto bad_level; 4876 } else { 4877 MC_(clo_mc_level) = 3; 4878 return True; 4879 } 4880 } 4881 4882 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {} 4883 else if VG_BOOL_CLO(arg, "--show-reachable", MC_(clo_show_reachable)) {} 4884 else if VG_BOOL_CLO(arg, "--show-possibly-lost", 4885 MC_(clo_show_possibly_lost)) {} 4886 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs", 4887 MC_(clo_workaround_gcc296_bugs)) {} 4888 4889 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol), 4890 0, 10*1000*1000*1000LL) {} 4891 4892 else if VG_BINT_CLO(arg, "--freelist-big-blocks", 4893 MC_(clo_freelist_big_blocks), 4894 0, 10*1000*1000*1000LL) {} 4895 4896 else if VG_XACT_CLO(arg, "--leak-check=no", 4897 MC_(clo_leak_check), LC_Off) {} 4898 else if VG_XACT_CLO(arg, "--leak-check=summary", 4899 MC_(clo_leak_check), LC_Summary) {} 4900 else if VG_XACT_CLO(arg, "--leak-check=yes", 4901 MC_(clo_leak_check), LC_Full) {} 4902 else if VG_XACT_CLO(arg, "--leak-check=full", 4903 MC_(clo_leak_check), LC_Full) {} 4904 4905 else if VG_XACT_CLO(arg, "--leak-resolution=low", 4906 MC_(clo_leak_resolution), Vg_LowRes) {} 4907 else if VG_XACT_CLO(arg, "--leak-resolution=med", 4908 MC_(clo_leak_resolution), Vg_MedRes) {} 4909 else if VG_XACT_CLO(arg, "--leak-resolution=high", 4910 MC_(clo_leak_resolution), Vg_HighRes) {} 4911 4912 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) { 4913 Int i; 4914 Bool ok = parse_ignore_ranges(tmp_str); 4915 if (!ok) 4916 return False; 4917 tl_assert(ignoreRanges.used >= 0); 4918 tl_assert(ignoreRanges.used < M_IGNORE_RANGES); 4919 for (i = 0; i < ignoreRanges.used; i++) { 4920 Addr s = ignoreRanges.start[i]; 4921 Addr e = ignoreRanges.end[i]; 4922 Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */ 4923 if (e <= s) { 4924 VG_(message)(Vg_DebugMsg, 4925 "ERROR: --ignore-ranges: end <= start in range:\n"); 4926 VG_(message)(Vg_DebugMsg, 4927 " 0x%lx-0x%lx\n", s, e); 4928 return False; 4929 } 4930 if (e - s > limit) { 4931 VG_(message)(Vg_DebugMsg, 4932 "ERROR: --ignore-ranges: suspiciously large range:\n"); 4933 VG_(message)(Vg_DebugMsg, 4934 " 0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s)); 4935 return False; 4936 } 4937 } 4938 } 4939 4940 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {} 4941 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {} 4942 4943 else 4944 return VG_(replacement_malloc_process_cmd_line_option)(arg); 4945 4946 return True; 4947 4948 4949 bad_level: 4950 VG_(fmsg_bad_option)(arg, 4951 "--track-origins=yes has no effect when --undef-value-errors=no.\n"); 4952 } 4953 4954 static void mc_print_usage(void) 4955 { 4956 VG_(printf)( 4957 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n" 4958 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n" 4959 " --show-reachable=no|yes show reachable blocks in leak check? [no]\n" 4960 " --show-possibly-lost=no|yes show possibly lost blocks in leak check?\n" 4961 " [yes]\n" 4962 " --undef-value-errors=no|yes check for undefined value errors [yes]\n" 4963 " --track-origins=no|yes show origins of undefined values? [no]\n" 4964 " --partial-loads-ok=no|yes too hard to explain here; see manual [no]\n" 4965 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n" 4966 " --freelist-big-blocks=<number> releases first blocks with size >= [1000000]\n" 4967 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n" 4968 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n" 4969 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n" 4970 " --free-fill=<hexnumber> fill free'd areas with given value\n" 4971 ); 4972 } 4973 4974 static void mc_print_debug_usage(void) 4975 { 4976 VG_(printf)( 4977 " (none)\n" 4978 ); 4979 } 4980 4981 4982 /*------------------------------------------------------------*/ 4983 /*--- Client blocks ---*/ 4984 /*------------------------------------------------------------*/ 4985 4986 /* Client block management: 4987 4988 This is managed as an expanding array of client block descriptors. 4989 Indices of live descriptors are issued to the client, so it can ask 4990 to free them later. Therefore we cannot slide live entries down 4991 over dead ones. Instead we must use free/inuse flags and scan for 4992 an empty slot at allocation time. This in turn means allocation is 4993 relatively expensive, so we hope this does not happen too often. 4994 4995 An unused block has start == size == 0 4996 */ 4997 4998 /* type CGenBlock is defined in mc_include.h */ 4999 5000 /* This subsystem is self-initialising. */ 5001 static UWord cgb_size = 0; 5002 static UWord cgb_used = 0; 5003 static CGenBlock* cgbs = NULL; 5004 5005 /* Stats for this subsystem. */ 5006 static ULong cgb_used_MAX = 0; /* Max in use. */ 5007 static ULong cgb_allocs = 0; /* Number of allocs. */ 5008 static ULong cgb_discards = 0; /* Number of discards. */ 5009 static ULong cgb_search = 0; /* Number of searches. */ 5010 5011 5012 /* Get access to the client block array. */ 5013 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks, 5014 /*OUT*/UWord* nBlocks ) 5015 { 5016 *blocks = cgbs; 5017 *nBlocks = cgb_used; 5018 } 5019 5020 5021 static 5022 Int alloc_client_block ( void ) 5023 { 5024 UWord i, sz_new; 5025 CGenBlock* cgbs_new; 5026 5027 cgb_allocs++; 5028 5029 for (i = 0; i < cgb_used; i++) { 5030 cgb_search++; 5031 if (cgbs[i].start == 0 && cgbs[i].size == 0) 5032 return i; 5033 } 5034 5035 /* Not found. Try to allocate one at the end. */ 5036 if (cgb_used < cgb_size) { 5037 cgb_used++; 5038 return cgb_used-1; 5039 } 5040 5041 /* Ok, we have to allocate a new one. */ 5042 tl_assert(cgb_used == cgb_size); 5043 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size); 5044 5045 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) ); 5046 for (i = 0; i < cgb_used; i++) 5047 cgbs_new[i] = cgbs[i]; 5048 5049 if (cgbs != NULL) 5050 VG_(free)( cgbs ); 5051 cgbs = cgbs_new; 5052 5053 cgb_size = sz_new; 5054 cgb_used++; 5055 if (cgb_used > cgb_used_MAX) 5056 cgb_used_MAX = cgb_used; 5057 return cgb_used-1; 5058 } 5059 5060 5061 static void show_client_block_stats ( void ) 5062 { 5063 VG_(message)(Vg_DebugMsg, 5064 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n", 5065 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search 5066 ); 5067 } 5068 static void print_monitor_help ( void ) 5069 { 5070 VG_(gdb_printf) 5071 ( 5072 "\n" 5073 "memcheck monitor commands:\n" 5074 " get_vbits <addr> [<len>]\n" 5075 " returns validity bits for <len> (or 1) bytes at <addr>\n" 5076 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n" 5077 " Example: get_vbits 0x8049c78 10\n" 5078 " make_memory [noaccess|undefined\n" 5079 " |defined|Definedifaddressable] <addr> [<len>]\n" 5080 " mark <len> (or 1) bytes at <addr> with the given accessibility\n" 5081 " check_memory [addressable|defined] <addr> [<len>]\n" 5082 " check that <len> (or 1) bytes at <addr> have the given accessibility\n" 5083 " and outputs a description of <addr>\n" 5084 " leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n" 5085 " [increased*|changed|any]\n" 5086 " [unlimited*|limited <max_loss_records_output>]\n" 5087 " * = defaults\n" 5088 " Examples: leak_check\n" 5089 " leak_check summary any\n" 5090 " leak_check full reachable any limited 100\n" 5091 " block_list <loss_record_nr>\n" 5092 " after a leak search, shows the list of blocks of <loss_record_nr>\n" 5093 " who_points_at <addr> [<len>]\n" 5094 " shows places pointing inside <len> (default 1) bytes at <addr>\n" 5095 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n" 5096 " with len > 1, will also show \"interior pointers\")\n" 5097 "\n"); 5098 } 5099 5100 /* return True if request recognised, False otherwise */ 5101 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req) 5102 { 5103 Char* wcmd; 5104 Char s[VG_(strlen(req))]; /* copy for strtok_r */ 5105 Char *ssaveptr; 5106 5107 VG_(strcpy) (s, req); 5108 5109 wcmd = VG_(strtok_r) (s, " ", &ssaveptr); 5110 /* NB: if possible, avoid introducing a new command below which 5111 starts with the same first letter(s) as an already existing 5112 command. This ensures a shorter abbreviation for the user. */ 5113 switch (VG_(keyword_id) 5114 ("help get_vbits leak_check make_memory check_memory " 5115 "block_list who_points_at", 5116 wcmd, kwd_report_duplicated_matches)) { 5117 case -2: /* multiple matches */ 5118 return True; 5119 case -1: /* not found */ 5120 return False; 5121 case 0: /* help */ 5122 print_monitor_help(); 5123 return True; 5124 case 1: { /* get_vbits */ 5125 Addr address; 5126 SizeT szB = 1; 5127 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr); 5128 if (szB != 0) { 5129 UChar vbits; 5130 Int i; 5131 Int unaddressable = 0; 5132 for (i = 0; i < szB; i++) { 5133 Int res = mc_get_or_set_vbits_for_client 5134 (address+i, (Addr) &vbits, 1, 5135 False, /* get them */ 5136 False /* is client request */ ); 5137 /* we are before the first character on next line, print a \n. */ 5138 if ((i % 32) == 0 && i != 0) 5139 VG_(gdb_printf) ("\n"); 5140 /* we are before the next block of 4 starts, print a space. */ 5141 else if ((i % 4) == 0 && i != 0) 5142 VG_(gdb_printf) (" "); 5143 if (res == 1) { 5144 VG_(gdb_printf) ("%02x", vbits); 5145 } else { 5146 tl_assert(3 == res); 5147 unaddressable++; 5148 VG_(gdb_printf) ("__"); 5149 } 5150 } 5151 VG_(gdb_printf) ("\n"); 5152 if (unaddressable) { 5153 VG_(gdb_printf) 5154 ("Address %p len %ld has %d bytes unaddressable\n", 5155 (void *)address, szB, unaddressable); 5156 } 5157 } 5158 return True; 5159 } 5160 case 2: { /* leak_check */ 5161 Int err = 0; 5162 LeakCheckParams lcp; 5163 Char* kw; 5164 5165 lcp.mode = LC_Full; 5166 lcp.show_reachable = False; 5167 lcp.show_possibly_lost = True; 5168 lcp.deltamode = LCD_Increased; 5169 lcp.max_loss_records_output = 999999999; 5170 lcp.requested_by_monitor_command = True; 5171 5172 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr); 5173 kw != NULL; 5174 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) { 5175 switch (VG_(keyword_id) 5176 ("full summary " 5177 "reachable possibleleak definiteleak " 5178 "increased changed any " 5179 "unlimited limited ", 5180 kw, kwd_report_all)) { 5181 case -2: err++; break; 5182 case -1: err++; break; 5183 case 0: /* full */ 5184 lcp.mode = LC_Full; break; 5185 case 1: /* summary */ 5186 lcp.mode = LC_Summary; break; 5187 case 2: /* reachable */ 5188 lcp.show_reachable = True; 5189 lcp.show_possibly_lost = True; break; 5190 case 3: /* possibleleak */ 5191 lcp.show_reachable = False; 5192 lcp.show_possibly_lost = True; break; 5193 case 4: /* definiteleak */ 5194 lcp.show_reachable = False; 5195 lcp.show_possibly_lost = False; break; 5196 case 5: /* increased */ 5197 lcp.deltamode = LCD_Increased; break; 5198 case 6: /* changed */ 5199 lcp.deltamode = LCD_Changed; break; 5200 case 7: /* any */ 5201 lcp.deltamode = LCD_Any; break; 5202 case 8: /* unlimited */ 5203 lcp.max_loss_records_output = 999999999; break; 5204 case 9: { /* limited */ 5205 int int_value; 5206 char* endptr; 5207 5208 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr); 5209 if (wcmd == NULL) { 5210 int_value = 0; 5211 endptr = "empty"; /* to report an error below */ 5212 } else { 5213 int_value = VG_(strtoll10) (wcmd, (Char **)&endptr); 5214 } 5215 if (*endptr != '\0') 5216 VG_(gdb_printf) ("missing or malformed integer value\n"); 5217 else if (int_value > 0) 5218 lcp.max_loss_records_output = (UInt) int_value; 5219 else 5220 VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n", 5221 int_value); 5222 break; 5223 } 5224 default: 5225 tl_assert (0); 5226 } 5227 } 5228 if (!err) 5229 MC_(detect_memory_leaks)(tid, &lcp); 5230 return True; 5231 } 5232 5233 case 3: { /* make_memory */ 5234 Addr address; 5235 SizeT szB = 1; 5236 int kwdid = VG_(keyword_id) 5237 ("noaccess undefined defined Definedifaddressable", 5238 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all); 5239 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr); 5240 if (address == (Addr) 0 && szB == 0) return True; 5241 switch (kwdid) { 5242 case -2: break; 5243 case -1: break; 5244 case 0: MC_(make_mem_noaccess) (address, szB); break; 5245 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid, 5246 MC_OKIND_USER ); break; 5247 case 2: MC_(make_mem_defined) ( address, szB ); break; 5248 case 3: make_mem_defined_if_addressable ( address, szB ); break;; 5249 default: tl_assert(0); 5250 } 5251 return True; 5252 } 5253 5254 case 4: { /* check_memory */ 5255 Addr address; 5256 SizeT szB = 1; 5257 Addr bad_addr; 5258 UInt okind; 5259 char* src; 5260 UInt otag; 5261 UInt ecu; 5262 ExeContext* origin_ec; 5263 MC_ReadResult res; 5264 5265 int kwdid = VG_(keyword_id) 5266 ("addressable defined", 5267 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all); 5268 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr); 5269 if (address == (Addr) 0 && szB == 0) return True; 5270 switch (kwdid) { 5271 case -2: break; 5272 case -1: break; 5273 case 0: 5274 if (is_mem_addressable ( address, szB, &bad_addr )) 5275 VG_(gdb_printf) ("Address %p len %ld addressable\n", 5276 (void *)address, szB); 5277 else 5278 VG_(gdb_printf) 5279 ("Address %p len %ld not addressable:\nbad address %p\n", 5280 (void *)address, szB, (void *) bad_addr); 5281 MC_(pp_describe_addr) (address); 5282 break; 5283 case 1: res = is_mem_defined ( address, szB, &bad_addr, &otag ); 5284 if (MC_AddrErr == res) 5285 VG_(gdb_printf) 5286 ("Address %p len %ld not addressable:\nbad address %p\n", 5287 (void *)address, szB, (void *) bad_addr); 5288 else if (MC_ValueErr == res) { 5289 okind = otag & 3; 5290 switch (okind) { 5291 case MC_OKIND_STACK: 5292 src = " was created by a stack allocation"; break; 5293 case MC_OKIND_HEAP: 5294 src = " was created by a heap allocation"; break; 5295 case MC_OKIND_USER: 5296 src = " was created by a client request"; break; 5297 case MC_OKIND_UNKNOWN: 5298 src = ""; break; 5299 default: tl_assert(0); 5300 } 5301 VG_(gdb_printf) 5302 ("Address %p len %ld not defined:\n" 5303 "Uninitialised value at %p%s\n", 5304 (void *)address, szB, (void *) bad_addr, src); 5305 ecu = otag & ~3; 5306 if (VG_(is_plausible_ECU)(ecu)) { 5307 origin_ec = VG_(get_ExeContext_from_ECU)( ecu ); 5308 VG_(pp_ExeContext)( origin_ec ); 5309 } 5310 } 5311 else 5312 VG_(gdb_printf) ("Address %p len %ld defined\n", 5313 (void *)address, szB); 5314 MC_(pp_describe_addr) (address); 5315 break; 5316 default: tl_assert(0); 5317 } 5318 return True; 5319 } 5320 5321 case 5: { /* block_list */ 5322 Char* wl; 5323 Char *endptr; 5324 UInt lr_nr = 0; 5325 wl = VG_(strtok_r) (NULL, " ", &ssaveptr); 5326 lr_nr = VG_(strtoull10) (wl, &endptr); 5327 if (wl != NULL && *endptr != '\0') { 5328 VG_(gdb_printf) ("malformed integer\n"); 5329 } else { 5330 // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array. 5331 if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1)) 5332 VG_(gdb_printf) ("invalid loss record nr\n"); 5333 } 5334 return True; 5335 } 5336 5337 case 6: { /* who_points_at */ 5338 Addr address; 5339 SizeT szB = 1; 5340 5341 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr); 5342 if (address == (Addr) 0) { 5343 VG_(gdb_printf) ("Cannot search who points at 0x0\n"); 5344 return True; 5345 } 5346 MC_(who_points_at) (address, szB); 5347 return True; 5348 } 5349 5350 default: 5351 tl_assert(0); 5352 return False; 5353 } 5354 } 5355 5356 /*------------------------------------------------------------*/ 5357 /*--- Client requests ---*/ 5358 /*------------------------------------------------------------*/ 5359 5360 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret ) 5361 { 5362 Int i; 5363 Bool ok; 5364 Addr bad_addr; 5365 5366 if (!VG_IS_TOOL_USERREQ('M','C',arg[0]) 5367 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0] 5368 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0] 5369 && VG_USERREQ__FREELIKE_BLOCK != arg[0] 5370 && VG_USERREQ__CREATE_MEMPOOL != arg[0] 5371 && VG_USERREQ__DESTROY_MEMPOOL != arg[0] 5372 && VG_USERREQ__MEMPOOL_ALLOC != arg[0] 5373 && VG_USERREQ__MEMPOOL_FREE != arg[0] 5374 && VG_USERREQ__MEMPOOL_TRIM != arg[0] 5375 && VG_USERREQ__MOVE_MEMPOOL != arg[0] 5376 && VG_USERREQ__MEMPOOL_CHANGE != arg[0] 5377 && VG_USERREQ__MEMPOOL_EXISTS != arg[0] 5378 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]) 5379 return False; 5380 5381 switch (arg[0]) { 5382 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: 5383 ok = is_mem_addressable ( arg[1], arg[2], &bad_addr ); 5384 if (!ok) 5385 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 ); 5386 *ret = ok ? (UWord)NULL : bad_addr; 5387 break; 5388 5389 case VG_USERREQ__CHECK_MEM_IS_DEFINED: { 5390 Bool errorV = False; 5391 Addr bad_addrV = 0; 5392 UInt otagV = 0; 5393 Bool errorA = False; 5394 Addr bad_addrA = 0; 5395 is_mem_defined_comprehensive( 5396 arg[1], arg[2], 5397 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA 5398 ); 5399 if (errorV) { 5400 MC_(record_user_error) ( tid, bad_addrV, 5401 /*isAddrErr*/False, otagV ); 5402 } 5403 if (errorA) { 5404 MC_(record_user_error) ( tid, bad_addrA, 5405 /*isAddrErr*/True, 0 ); 5406 } 5407 /* Return the lower of the two erring addresses, if any. */ 5408 *ret = 0; 5409 if (errorV && !errorA) { 5410 *ret = bad_addrV; 5411 } 5412 if (!errorV && errorA) { 5413 *ret = bad_addrA; 5414 } 5415 if (errorV && errorA) { 5416 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA; 5417 } 5418 break; 5419 } 5420 5421 case VG_USERREQ__DO_LEAK_CHECK: { 5422 LeakCheckParams lcp; 5423 5424 if (arg[1] == 0) 5425 lcp.mode = LC_Full; 5426 else if (arg[1] == 1) 5427 lcp.mode = LC_Summary; 5428 else { 5429 VG_(message)(Vg_UserMsg, 5430 "Warning: unknown memcheck leak search mode\n"); 5431 lcp.mode = LC_Full; 5432 } 5433 5434 lcp.show_reachable = MC_(clo_show_reachable); 5435 lcp.show_possibly_lost = MC_(clo_show_possibly_lost); 5436 5437 if (arg[2] == 0) 5438 lcp.deltamode = LCD_Any; 5439 else if (arg[2] == 1) 5440 lcp.deltamode = LCD_Increased; 5441 else if (arg[2] == 2) 5442 lcp.deltamode = LCD_Changed; 5443 else { 5444 VG_(message) 5445 (Vg_UserMsg, 5446 "Warning: unknown memcheck leak search deltamode\n"); 5447 lcp.deltamode = LCD_Any; 5448 } 5449 lcp.max_loss_records_output = 999999999; 5450 lcp.requested_by_monitor_command = False; 5451 5452 MC_(detect_memory_leaks)(tid, &lcp); 5453 *ret = 0; /* return value is meaningless */ 5454 break; 5455 } 5456 5457 case VG_USERREQ__MAKE_MEM_NOACCESS: 5458 MC_(make_mem_noaccess) ( arg[1], arg[2] ); 5459 *ret = -1; 5460 break; 5461 5462 case VG_USERREQ__MAKE_MEM_UNDEFINED: 5463 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid, 5464 MC_OKIND_USER ); 5465 *ret = -1; 5466 break; 5467 5468 case VG_USERREQ__MAKE_MEM_DEFINED: 5469 MC_(make_mem_defined) ( arg[1], arg[2] ); 5470 *ret = -1; 5471 break; 5472 5473 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE: 5474 make_mem_defined_if_addressable ( arg[1], arg[2] ); 5475 *ret = -1; 5476 break; 5477 5478 case VG_USERREQ__CREATE_BLOCK: /* describe a block */ 5479 if (arg[1] != 0 && arg[2] != 0) { 5480 i = alloc_client_block(); 5481 /* VG_(printf)("allocated %d %p\n", i, cgbs); */ 5482 cgbs[i].start = arg[1]; 5483 cgbs[i].size = arg[2]; 5484 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]); 5485 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ ); 5486 *ret = i; 5487 } else 5488 *ret = -1; 5489 break; 5490 5491 case VG_USERREQ__DISCARD: /* discard */ 5492 if (cgbs == NULL 5493 || arg[2] >= cgb_used || 5494 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) { 5495 *ret = 1; 5496 } else { 5497 tl_assert(arg[2] >= 0 && arg[2] < cgb_used); 5498 cgbs[arg[2]].start = cgbs[arg[2]].size = 0; 5499 VG_(free)(cgbs[arg[2]].desc); 5500 cgb_discards++; 5501 *ret = 0; 5502 } 5503 break; 5504 5505 case VG_USERREQ__GET_VBITS: 5506 *ret = mc_get_or_set_vbits_for_client 5507 ( arg[1], arg[2], arg[3], 5508 False /* get them */, 5509 True /* is client request */ ); 5510 break; 5511 5512 case VG_USERREQ__SET_VBITS: 5513 *ret = mc_get_or_set_vbits_for_client 5514 ( arg[1], arg[2], arg[3], 5515 True /* set them */, 5516 True /* is client request */ ); 5517 break; 5518 5519 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */ 5520 UWord** argp = (UWord**)arg; 5521 // MC_(bytes_leaked) et al were set by the last leak check (or zero 5522 // if no prior leak checks performed). 5523 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect); 5524 *argp[2] = MC_(bytes_dubious); 5525 *argp[3] = MC_(bytes_reachable); 5526 *argp[4] = MC_(bytes_suppressed); 5527 // there is no argp[5] 5528 //*argp[5] = MC_(bytes_indirect); 5529 // XXX need to make *argp[1-4] defined; currently done in the 5530 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero. 5531 *ret = 0; 5532 return True; 5533 } 5534 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */ 5535 UWord** argp = (UWord**)arg; 5536 // MC_(blocks_leaked) et al were set by the last leak check (or zero 5537 // if no prior leak checks performed). 5538 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect); 5539 *argp[2] = MC_(blocks_dubious); 5540 *argp[3] = MC_(blocks_reachable); 5541 *argp[4] = MC_(blocks_suppressed); 5542 // there is no argp[5] 5543 //*argp[5] = MC_(blocks_indirect); 5544 // XXX need to make *argp[1-4] defined; currently done in the 5545 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero. 5546 *ret = 0; 5547 return True; 5548 } 5549 case VG_USERREQ__MALLOCLIKE_BLOCK: { 5550 Addr p = (Addr)arg[1]; 5551 SizeT sizeB = arg[2]; 5552 UInt rzB = arg[3]; 5553 Bool is_zeroed = (Bool)arg[4]; 5554 5555 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed, 5556 MC_AllocCustom, MC_(malloc_list) ); 5557 if (rzB > 0) { 5558 MC_(make_mem_noaccess) ( p - rzB, rzB); 5559 MC_(make_mem_noaccess) ( p + sizeB, rzB); 5560 } 5561 return True; 5562 } 5563 case VG_USERREQ__RESIZEINPLACE_BLOCK: { 5564 Addr p = (Addr)arg[1]; 5565 SizeT oldSizeB = arg[2]; 5566 SizeT newSizeB = arg[3]; 5567 UInt rzB = arg[4]; 5568 5569 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB ); 5570 return True; 5571 } 5572 case VG_USERREQ__FREELIKE_BLOCK: { 5573 Addr p = (Addr)arg[1]; 5574 UInt rzB = arg[2]; 5575 5576 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom ); 5577 return True; 5578 } 5579 5580 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: { 5581 Char* s = (Char*)arg[1]; 5582 Addr dst = (Addr) arg[2]; 5583 Addr src = (Addr) arg[3]; 5584 SizeT len = (SizeT)arg[4]; 5585 MC_(record_overlap_error)(tid, s, src, dst, len); 5586 return True; 5587 } 5588 5589 case VG_USERREQ__CREATE_MEMPOOL: { 5590 Addr pool = (Addr)arg[1]; 5591 UInt rzB = arg[2]; 5592 Bool is_zeroed = (Bool)arg[3]; 5593 5594 MC_(create_mempool) ( pool, rzB, is_zeroed ); 5595 return True; 5596 } 5597 5598 case VG_USERREQ__DESTROY_MEMPOOL: { 5599 Addr pool = (Addr)arg[1]; 5600 5601 MC_(destroy_mempool) ( pool ); 5602 return True; 5603 } 5604 5605 case VG_USERREQ__MEMPOOL_ALLOC: { 5606 Addr pool = (Addr)arg[1]; 5607 Addr addr = (Addr)arg[2]; 5608 UInt size = arg[3]; 5609 5610 MC_(mempool_alloc) ( tid, pool, addr, size ); 5611 return True; 5612 } 5613 5614 case VG_USERREQ__MEMPOOL_FREE: { 5615 Addr pool = (Addr)arg[1]; 5616 Addr addr = (Addr)arg[2]; 5617 5618 MC_(mempool_free) ( pool, addr ); 5619 return True; 5620 } 5621 5622 case VG_USERREQ__MEMPOOL_TRIM: { 5623 Addr pool = (Addr)arg[1]; 5624 Addr addr = (Addr)arg[2]; 5625 UInt size = arg[3]; 5626 5627 MC_(mempool_trim) ( pool, addr, size ); 5628 return True; 5629 } 5630 5631 case VG_USERREQ__MOVE_MEMPOOL: { 5632 Addr poolA = (Addr)arg[1]; 5633 Addr poolB = (Addr)arg[2]; 5634 5635 MC_(move_mempool) ( poolA, poolB ); 5636 return True; 5637 } 5638 5639 case VG_USERREQ__MEMPOOL_CHANGE: { 5640 Addr pool = (Addr)arg[1]; 5641 Addr addrA = (Addr)arg[2]; 5642 Addr addrB = (Addr)arg[3]; 5643 UInt size = arg[4]; 5644 5645 MC_(mempool_change) ( pool, addrA, addrB, size ); 5646 return True; 5647 } 5648 5649 case VG_USERREQ__MEMPOOL_EXISTS: { 5650 Addr pool = (Addr)arg[1]; 5651 5652 *ret = (UWord) MC_(mempool_exists) ( pool ); 5653 return True; 5654 } 5655 5656 case VG_USERREQ__GDB_MONITOR_COMMAND: { 5657 Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]); 5658 if (handled) 5659 *ret = 1; 5660 else 5661 *ret = 0; 5662 return handled; 5663 } 5664 5665 default: 5666 VG_(message)( 5667 Vg_UserMsg, 5668 "Warning: unknown memcheck client request code %llx\n", 5669 (ULong)arg[0] 5670 ); 5671 return False; 5672 } 5673 return True; 5674 } 5675 5676 5677 /*------------------------------------------------------------*/ 5678 /*--- Crude profiling machinery. ---*/ 5679 /*------------------------------------------------------------*/ 5680 5681 // We track a number of interesting events (using PROF_EVENT) 5682 // if MC_PROFILE_MEMORY is defined. 5683 5684 #ifdef MC_PROFILE_MEMORY 5685 5686 UInt MC_(event_ctr)[N_PROF_EVENTS]; 5687 HChar* MC_(event_ctr_name)[N_PROF_EVENTS]; 5688 5689 static void init_prof_mem ( void ) 5690 { 5691 Int i; 5692 for (i = 0; i < N_PROF_EVENTS; i++) { 5693 MC_(event_ctr)[i] = 0; 5694 MC_(event_ctr_name)[i] = NULL; 5695 } 5696 } 5697 5698 static void done_prof_mem ( void ) 5699 { 5700 Int i; 5701 Bool spaced = False; 5702 for (i = 0; i < N_PROF_EVENTS; i++) { 5703 if (!spaced && (i % 10) == 0) { 5704 VG_(printf)("\n"); 5705 spaced = True; 5706 } 5707 if (MC_(event_ctr)[i] > 0) { 5708 spaced = False; 5709 VG_(printf)( "prof mem event %3d: %9d %s\n", 5710 i, MC_(event_ctr)[i], 5711 MC_(event_ctr_name)[i] 5712 ? MC_(event_ctr_name)[i] : "unnamed"); 5713 } 5714 } 5715 } 5716 5717 #else 5718 5719 static void init_prof_mem ( void ) { } 5720 static void done_prof_mem ( void ) { } 5721 5722 #endif 5723 5724 5725 /*------------------------------------------------------------*/ 5726 /*--- Origin tracking stuff ---*/ 5727 /*------------------------------------------------------------*/ 5728 5729 /*--------------------------------------------*/ 5730 /*--- Origin tracking: load handlers ---*/ 5731 /*--------------------------------------------*/ 5732 5733 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) { 5734 return or1 > or2 ? or1 : or2; 5735 } 5736 5737 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) { 5738 OCacheLine* line; 5739 UChar descr; 5740 UWord lineoff = oc_line_offset(a); 5741 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */ 5742 5743 if (OC_ENABLE_ASSERTIONS) { 5744 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5745 } 5746 5747 line = find_OCacheLine( a ); 5748 5749 descr = line->descr[lineoff]; 5750 if (OC_ENABLE_ASSERTIONS) { 5751 tl_assert(descr < 0x10); 5752 } 5753 5754 if (LIKELY(0 == (descr & (1 << byteoff)))) { 5755 return 0; 5756 } else { 5757 return line->w32[lineoff]; 5758 } 5759 } 5760 5761 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) { 5762 OCacheLine* line; 5763 UChar descr; 5764 UWord lineoff, byteoff; 5765 5766 if (UNLIKELY(a & 1)) { 5767 /* Handle misaligned case, slowly. */ 5768 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 ); 5769 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 ); 5770 return merge_origins(oLo, oHi); 5771 } 5772 5773 lineoff = oc_line_offset(a); 5774 byteoff = a & 3; /* 0 or 2 */ 5775 5776 if (OC_ENABLE_ASSERTIONS) { 5777 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5778 } 5779 line = find_OCacheLine( a ); 5780 5781 descr = line->descr[lineoff]; 5782 if (OC_ENABLE_ASSERTIONS) { 5783 tl_assert(descr < 0x10); 5784 } 5785 5786 if (LIKELY(0 == (descr & (3 << byteoff)))) { 5787 return 0; 5788 } else { 5789 return line->w32[lineoff]; 5790 } 5791 } 5792 5793 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) { 5794 OCacheLine* line; 5795 UChar descr; 5796 UWord lineoff; 5797 5798 if (UNLIKELY(a & 3)) { 5799 /* Handle misaligned case, slowly. */ 5800 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 ); 5801 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 ); 5802 return merge_origins(oLo, oHi); 5803 } 5804 5805 lineoff = oc_line_offset(a); 5806 if (OC_ENABLE_ASSERTIONS) { 5807 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5808 } 5809 5810 line = find_OCacheLine( a ); 5811 5812 descr = line->descr[lineoff]; 5813 if (OC_ENABLE_ASSERTIONS) { 5814 tl_assert(descr < 0x10); 5815 } 5816 5817 if (LIKELY(0 == descr)) { 5818 return 0; 5819 } else { 5820 return line->w32[lineoff]; 5821 } 5822 } 5823 5824 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) { 5825 OCacheLine* line; 5826 UChar descrLo, descrHi, descr; 5827 UWord lineoff; 5828 5829 if (UNLIKELY(a & 7)) { 5830 /* Handle misaligned case, slowly. */ 5831 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 ); 5832 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 ); 5833 return merge_origins(oLo, oHi); 5834 } 5835 5836 lineoff = oc_line_offset(a); 5837 if (OC_ENABLE_ASSERTIONS) { 5838 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/ 5839 } 5840 5841 line = find_OCacheLine( a ); 5842 5843 descrLo = line->descr[lineoff + 0]; 5844 descrHi = line->descr[lineoff + 1]; 5845 descr = descrLo | descrHi; 5846 if (OC_ENABLE_ASSERTIONS) { 5847 tl_assert(descr < 0x10); 5848 } 5849 5850 if (LIKELY(0 == descr)) { 5851 return 0; /* both 32-bit chunks are defined */ 5852 } else { 5853 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0]; 5854 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1]; 5855 return merge_origins(oLo, oHi); 5856 } 5857 } 5858 5859 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) { 5860 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 ); 5861 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 ); 5862 UInt oBoth = merge_origins(oLo, oHi); 5863 return (UWord)oBoth; 5864 } 5865 5866 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) { 5867 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 ); 5868 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 ); 5869 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 ); 5870 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 ); 5871 UInt oAll = merge_origins(merge_origins(oQ0, oQ1), 5872 merge_origins(oQ2, oQ3)); 5873 return (UWord)oAll; 5874 } 5875 5876 5877 /*--------------------------------------------*/ 5878 /*--- Origin tracking: store handlers ---*/ 5879 /*--------------------------------------------*/ 5880 5881 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) { 5882 OCacheLine* line; 5883 UWord lineoff = oc_line_offset(a); 5884 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */ 5885 5886 if (OC_ENABLE_ASSERTIONS) { 5887 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5888 } 5889 5890 line = find_OCacheLine( a ); 5891 5892 if (d32 == 0) { 5893 line->descr[lineoff] &= ~(1 << byteoff); 5894 } else { 5895 line->descr[lineoff] |= (1 << byteoff); 5896 line->w32[lineoff] = d32; 5897 } 5898 } 5899 5900 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) { 5901 OCacheLine* line; 5902 UWord lineoff, byteoff; 5903 5904 if (UNLIKELY(a & 1)) { 5905 /* Handle misaligned case, slowly. */ 5906 MC_(helperc_b_store1)( a + 0, d32 ); 5907 MC_(helperc_b_store1)( a + 1, d32 ); 5908 return; 5909 } 5910 5911 lineoff = oc_line_offset(a); 5912 byteoff = a & 3; /* 0 or 2 */ 5913 5914 if (OC_ENABLE_ASSERTIONS) { 5915 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5916 } 5917 5918 line = find_OCacheLine( a ); 5919 5920 if (d32 == 0) { 5921 line->descr[lineoff] &= ~(3 << byteoff); 5922 } else { 5923 line->descr[lineoff] |= (3 << byteoff); 5924 line->w32[lineoff] = d32; 5925 } 5926 } 5927 5928 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) { 5929 OCacheLine* line; 5930 UWord lineoff; 5931 5932 if (UNLIKELY(a & 3)) { 5933 /* Handle misaligned case, slowly. */ 5934 MC_(helperc_b_store2)( a + 0, d32 ); 5935 MC_(helperc_b_store2)( a + 2, d32 ); 5936 return; 5937 } 5938 5939 lineoff = oc_line_offset(a); 5940 if (OC_ENABLE_ASSERTIONS) { 5941 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 5942 } 5943 5944 line = find_OCacheLine( a ); 5945 5946 if (d32 == 0) { 5947 line->descr[lineoff] = 0; 5948 } else { 5949 line->descr[lineoff] = 0xF; 5950 line->w32[lineoff] = d32; 5951 } 5952 } 5953 5954 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) { 5955 OCacheLine* line; 5956 UWord lineoff; 5957 5958 if (UNLIKELY(a & 7)) { 5959 /* Handle misaligned case, slowly. */ 5960 MC_(helperc_b_store4)( a + 0, d32 ); 5961 MC_(helperc_b_store4)( a + 4, d32 ); 5962 return; 5963 } 5964 5965 lineoff = oc_line_offset(a); 5966 if (OC_ENABLE_ASSERTIONS) { 5967 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/ 5968 } 5969 5970 line = find_OCacheLine( a ); 5971 5972 if (d32 == 0) { 5973 line->descr[lineoff + 0] = 0; 5974 line->descr[lineoff + 1] = 0; 5975 } else { 5976 line->descr[lineoff + 0] = 0xF; 5977 line->descr[lineoff + 1] = 0xF; 5978 line->w32[lineoff + 0] = d32; 5979 line->w32[lineoff + 1] = d32; 5980 } 5981 } 5982 5983 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) { 5984 MC_(helperc_b_store8)( a + 0, d32 ); 5985 MC_(helperc_b_store8)( a + 8, d32 ); 5986 } 5987 5988 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) { 5989 MC_(helperc_b_store8)( a + 0, d32 ); 5990 MC_(helperc_b_store8)( a + 8, d32 ); 5991 MC_(helperc_b_store8)( a + 16, d32 ); 5992 MC_(helperc_b_store8)( a + 24, d32 ); 5993 } 5994 5995 5996 /*--------------------------------------------*/ 5997 /*--- Origin tracking: sarp handlers ---*/ 5998 /*--------------------------------------------*/ 5999 6000 __attribute__((noinline)) 6001 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) { 6002 if ((a & 1) && len >= 1) { 6003 MC_(helperc_b_store1)( a, otag ); 6004 a++; 6005 len--; 6006 } 6007 if ((a & 2) && len >= 2) { 6008 MC_(helperc_b_store2)( a, otag ); 6009 a += 2; 6010 len -= 2; 6011 } 6012 if (len >= 4) 6013 tl_assert(0 == (a & 3)); 6014 while (len >= 4) { 6015 MC_(helperc_b_store4)( a, otag ); 6016 a += 4; 6017 len -= 4; 6018 } 6019 if (len >= 2) { 6020 MC_(helperc_b_store2)( a, otag ); 6021 a += 2; 6022 len -= 2; 6023 } 6024 if (len >= 1) { 6025 MC_(helperc_b_store1)( a, otag ); 6026 //a++; 6027 len--; 6028 } 6029 tl_assert(len == 0); 6030 } 6031 6032 __attribute__((noinline)) 6033 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) { 6034 if ((a & 1) && len >= 1) { 6035 MC_(helperc_b_store1)( a, 0 ); 6036 a++; 6037 len--; 6038 } 6039 if ((a & 2) && len >= 2) { 6040 MC_(helperc_b_store2)( a, 0 ); 6041 a += 2; 6042 len -= 2; 6043 } 6044 if (len >= 4) 6045 tl_assert(0 == (a & 3)); 6046 while (len >= 4) { 6047 MC_(helperc_b_store4)( a, 0 ); 6048 a += 4; 6049 len -= 4; 6050 } 6051 if (len >= 2) { 6052 MC_(helperc_b_store2)( a, 0 ); 6053 a += 2; 6054 len -= 2; 6055 } 6056 if (len >= 1) { 6057 MC_(helperc_b_store1)( a, 0 ); 6058 //a++; 6059 len--; 6060 } 6061 tl_assert(len == 0); 6062 } 6063 6064 6065 /*------------------------------------------------------------*/ 6066 /*--- Setup and finalisation ---*/ 6067 /*------------------------------------------------------------*/ 6068 6069 static void mc_post_clo_init ( void ) 6070 { 6071 /* If we've been asked to emit XML, mash around various other 6072 options so as to constrain the output somewhat. */ 6073 if (VG_(clo_xml)) { 6074 /* Extract as much info as possible from the leak checker. */ 6075 /* MC_(clo_show_reachable) = True; */ 6076 MC_(clo_leak_check) = LC_Full; 6077 } 6078 6079 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)) 6080 VG_(message)(Vg_UserMsg, 6081 "Warning: --freelist-big-blocks value %lld has no effect\n" 6082 "as it is >= to --freelist-vol value %lld\n", 6083 MC_(clo_freelist_big_blocks), 6084 MC_(clo_freelist_vol)); 6085 6086 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 ); 6087 6088 if (MC_(clo_mc_level) == 3) { 6089 /* We're doing origin tracking. */ 6090 # ifdef PERF_FAST_STACK 6091 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU ); 6092 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU ); 6093 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU ); 6094 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU ); 6095 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU ); 6096 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU ); 6097 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU ); 6098 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU ); 6099 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU ); 6100 # endif 6101 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU ); 6102 } else { 6103 /* Not doing origin tracking */ 6104 # ifdef PERF_FAST_STACK 6105 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 ); 6106 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 ); 6107 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 ); 6108 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 ); 6109 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 ); 6110 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 ); 6111 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 ); 6112 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 ); 6113 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 ); 6114 # endif 6115 VG_(track_new_mem_stack) ( mc_new_mem_stack ); 6116 } 6117 6118 /* This origin tracking cache is huge (~100M), so only initialise 6119 if we need it. */ 6120 if (MC_(clo_mc_level) >= 3) { 6121 init_OCache(); 6122 tl_assert(ocacheL1 != NULL); 6123 tl_assert(ocacheL2 != NULL); 6124 } else { 6125 tl_assert(ocacheL1 == NULL); 6126 tl_assert(ocacheL2 == NULL); 6127 } 6128 6129 /* Do not check definedness of guest state if --undef-value-errors=no */ 6130 if (MC_(clo_mc_level) >= 2) 6131 VG_(track_pre_reg_read) ( mc_pre_reg_read ); 6132 } 6133 6134 static void print_SM_info(char* type, int n_SMs) 6135 { 6136 VG_(message)(Vg_DebugMsg, 6137 " memcheck: SMs: %s = %d (%ldk, %ldM)\n", 6138 type, 6139 n_SMs, 6140 n_SMs * sizeof(SecMap) / 1024UL, 6141 n_SMs * sizeof(SecMap) / (1024 * 1024UL) ); 6142 } 6143 6144 static void mc_fini ( Int exitcode ) 6145 { 6146 MC_(print_malloc_stats)(); 6147 6148 if (MC_(clo_leak_check) != LC_Off) { 6149 LeakCheckParams lcp; 6150 lcp.mode = MC_(clo_leak_check); 6151 lcp.show_reachable = MC_(clo_show_reachable); 6152 lcp.show_possibly_lost = MC_(clo_show_possibly_lost); 6153 lcp.deltamode = LCD_Any; 6154 lcp.max_loss_records_output = 999999999; 6155 lcp.requested_by_monitor_command = False; 6156 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp); 6157 } else { 6158 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 6159 VG_(umsg)( 6160 "For a detailed leak analysis, rerun with: --leak-check=full\n" 6161 "\n" 6162 ); 6163 } 6164 } 6165 6166 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 6167 VG_(message)(Vg_UserMsg, 6168 "For counts of detected and suppressed errors, rerun with: -v\n"); 6169 } 6170 6171 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1 6172 && MC_(clo_mc_level) == 2) { 6173 VG_(message)(Vg_UserMsg, 6174 "Use --track-origins=yes to see where " 6175 "uninitialised values come from\n"); 6176 } 6177 6178 done_prof_mem(); 6179 6180 if (VG_(clo_stats)) { 6181 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB; 6182 6183 VG_(message)(Vg_DebugMsg, 6184 " memcheck: sanity checks: %d cheap, %d expensive\n", 6185 n_sanity_cheap, n_sanity_expensive ); 6186 VG_(message)(Vg_DebugMsg, 6187 " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n", 6188 n_auxmap_L2_nodes, 6189 n_auxmap_L2_nodes * 64, 6190 n_auxmap_L2_nodes / 16 ); 6191 VG_(message)(Vg_DebugMsg, 6192 " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n", 6193 n_auxmap_L1_searches, n_auxmap_L1_cmps, 6194 (10ULL * n_auxmap_L1_cmps) 6195 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1) 6196 ); 6197 VG_(message)(Vg_DebugMsg, 6198 " memcheck: auxmaps_L2: %lld searches, %lld nodes\n", 6199 n_auxmap_L2_searches, n_auxmap_L2_nodes 6200 ); 6201 6202 print_SM_info("n_issued ", n_issued_SMs); 6203 print_SM_info("n_deissued ", n_deissued_SMs); 6204 print_SM_info("max_noaccess ", max_noaccess_SMs); 6205 print_SM_info("max_undefined", max_undefined_SMs); 6206 print_SM_info("max_defined ", max_defined_SMs); 6207 print_SM_info("max_non_DSM ", max_non_DSM_SMs); 6208 6209 // Three DSMs, plus the non-DSM ones 6210 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap); 6211 // The 3*sizeof(Word) bytes is the AVL node metadata size. 6212 // The VG_ROUNDUP is because the OSet pool allocator will/must align 6213 // the elements on pointer size. 6214 // Note that the pool allocator has some additional small overhead 6215 // which is not counted in the below. 6216 // Hardwiring this logic sucks, but I don't see how else to do it. 6217 max_secVBit_szB = max_secVBit_nodes * 6218 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*))); 6219 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB; 6220 6221 VG_(message)(Vg_DebugMsg, 6222 " memcheck: max sec V bit nodes: %d (%ldk, %ldM)\n", 6223 max_secVBit_nodes, max_secVBit_szB / 1024, 6224 max_secVBit_szB / (1024 * 1024)); 6225 VG_(message)(Vg_DebugMsg, 6226 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n", 6227 sec_vbits_new_nodes + sec_vbits_updates, 6228 sec_vbits_new_nodes, sec_vbits_updates ); 6229 VG_(message)(Vg_DebugMsg, 6230 " memcheck: max shadow mem size: %ldk, %ldM\n", 6231 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024)); 6232 6233 if (MC_(clo_mc_level) >= 3) { 6234 VG_(message)(Vg_DebugMsg, 6235 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n", 6236 stats_ocacheL1_find, 6237 stats_ocacheL1_misses, 6238 stats_ocacheL1_lossage ); 6239 VG_(message)(Vg_DebugMsg, 6240 " ocacheL1: %'12lu at 0 %'12lu at 1\n", 6241 stats_ocacheL1_find - stats_ocacheL1_misses 6242 - stats_ocacheL1_found_at_1 6243 - stats_ocacheL1_found_at_N, 6244 stats_ocacheL1_found_at_1 ); 6245 VG_(message)(Vg_DebugMsg, 6246 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n", 6247 stats_ocacheL1_found_at_N, 6248 stats_ocacheL1_movefwds ); 6249 VG_(message)(Vg_DebugMsg, 6250 " ocacheL1: %'12lu sizeB %'12u useful\n", 6251 (UWord)sizeof(OCache), 6252 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS ); 6253 VG_(message)(Vg_DebugMsg, 6254 " ocacheL2: %'12lu refs %'12lu misses\n", 6255 stats__ocacheL2_refs, 6256 stats__ocacheL2_misses ); 6257 VG_(message)(Vg_DebugMsg, 6258 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n", 6259 stats__ocacheL2_n_nodes_max, 6260 stats__ocacheL2_n_nodes ); 6261 VG_(message)(Vg_DebugMsg, 6262 " niacache: %'12lu refs %'12lu misses\n", 6263 stats__nia_cache_queries, stats__nia_cache_misses); 6264 } else { 6265 tl_assert(ocacheL1 == NULL); 6266 tl_assert(ocacheL2 == NULL); 6267 } 6268 } 6269 6270 if (0) { 6271 VG_(message)(Vg_DebugMsg, 6272 "------ Valgrind's client block stats follow ---------------\n" ); 6273 show_client_block_stats(); 6274 } 6275 } 6276 6277 /* mark the given addr/len unaddressable for watchpoint implementation 6278 The PointKind will be handled at access time */ 6279 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert, 6280 Addr addr, SizeT len) 6281 { 6282 /* GDBTD this is somewhat fishy. We might rather have to save the previous 6283 accessibility and definedness in gdbserver so as to allow restoring it 6284 properly. Currently, we assume that the user only watches things 6285 which are properly addressable and defined */ 6286 if (insert) 6287 MC_(make_mem_noaccess) (addr, len); 6288 else 6289 MC_(make_mem_defined) (addr, len); 6290 return True; 6291 } 6292 6293 static void mc_pre_clo_init(void) 6294 { 6295 VG_(details_name) ("Memcheck"); 6296 VG_(details_version) (NULL); 6297 VG_(details_description) ("a memory error detector"); 6298 VG_(details_copyright_author)( 6299 "Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al."); 6300 VG_(details_bug_reports_to) (VG_BUGS_TO); 6301 VG_(details_avg_translation_sizeB) ( 640 ); 6302 6303 VG_(basic_tool_funcs) (mc_post_clo_init, 6304 MC_(instrument), 6305 mc_fini); 6306 6307 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) ); 6308 6309 6310 VG_(needs_core_errors) (); 6311 VG_(needs_tool_errors) (MC_(eq_Error), 6312 MC_(before_pp_Error), 6313 MC_(pp_Error), 6314 True,/*show TIDs for errors*/ 6315 MC_(update_Error_extra), 6316 MC_(is_recognised_suppression), 6317 MC_(read_extra_suppression_info), 6318 MC_(error_matches_suppression), 6319 MC_(get_error_name), 6320 MC_(get_extra_suppression_info)); 6321 VG_(needs_libc_freeres) (); 6322 VG_(needs_command_line_options)(mc_process_cmd_line_options, 6323 mc_print_usage, 6324 mc_print_debug_usage); 6325 VG_(needs_client_requests) (mc_handle_client_request); 6326 VG_(needs_sanity_checks) (mc_cheap_sanity_check, 6327 mc_expensive_sanity_check); 6328 VG_(needs_malloc_replacement) (MC_(malloc), 6329 MC_(__builtin_new), 6330 MC_(__builtin_vec_new), 6331 MC_(memalign), 6332 MC_(calloc), 6333 MC_(free), 6334 MC_(__builtin_delete), 6335 MC_(__builtin_vec_delete), 6336 MC_(realloc), 6337 MC_(malloc_usable_size), 6338 MC_MALLOC_DEFAULT_REDZONE_SZB ); 6339 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)(); 6340 6341 VG_(needs_xml_output) (); 6342 6343 VG_(track_new_mem_startup) ( mc_new_mem_startup ); 6344 VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid ); 6345 // We assume that brk()/sbrk() does not initialise new memory. Is this 6346 // accurate? John Reiser says: 6347 // 6348 // 0) sbrk() can *decrease* process address space. No zero fill is done 6349 // for a decrease, not even the fragment on the high end of the last page 6350 // that is beyond the new highest address. For maximum safety and 6351 // portability, then the bytes in the last page that reside above [the 6352 // new] sbrk(0) should be considered to be uninitialized, but in practice 6353 // it is exceedingly likely that they will retain their previous 6354 // contents. 6355 // 6356 // 1) If an increase is large enough to require new whole pages, then 6357 // those new whole pages (like all new pages) are zero-filled by the 6358 // operating system. So if sbrk(0) already is page aligned, then 6359 // sbrk(PAGE_SIZE) *does* zero-fill the new memory. 6360 // 6361 // 2) Any increase that lies within an existing allocated page is not 6362 // changed. So if (x = sbrk(0)) is not page aligned, then 6363 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their 6364 // existing contents, and an additional PAGE_SIZE bytes which are zeroed. 6365 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest 6366 // of them come along for the ride because the operating system deals 6367 // only in whole pages. Again, for maximum safety and portability, then 6368 // anything that lives above [the new] sbrk(0) should be considered 6369 // uninitialized, but in practice will retain previous contents [zero in 6370 // this case.]" 6371 // 6372 // In short: 6373 // 6374 // A key property of sbrk/brk is that new whole pages that are supplied 6375 // by the operating system *do* get initialized to zero. 6376 // 6377 // As for the portability of all this: 6378 // 6379 // sbrk and brk are not POSIX. However, any system that is a derivative 6380 // of *nix has sbrk and brk because there are too many softwares (such as 6381 // the Bourne shell) which rely on the traditional memory map (.text, 6382 // .data+.bss, stack) and the existence of sbrk/brk. 6383 // 6384 // So we should arguably observe all this. However: 6385 // - The current inaccuracy has caused maybe one complaint in seven years(?) 6386 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I 6387 // doubt most programmers know the above information. 6388 // So I'm not terribly unhappy with marking it as undefined. --njn. 6389 // 6390 // [More: I think most of what John said only applies to sbrk(). It seems 6391 // that brk() always deals in whole pages. And since this event deals 6392 // directly with brk(), not with sbrk(), perhaps it would be reasonable to 6393 // just mark all memory it allocates as defined.] 6394 // 6395 VG_(track_new_mem_brk) ( make_mem_undefined_w_tid ); 6396 6397 // Handling of mmap and mprotect isn't simple (well, it is simple, 6398 // but the justification isn't.) See comments above, just prior to 6399 // mc_new_mem_mmap. 6400 VG_(track_new_mem_mmap) ( mc_new_mem_mmap ); 6401 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect ); 6402 6403 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) ); 6404 6405 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) ); 6406 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) ); 6407 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) ); 6408 6409 /* Defer the specification of the new_mem_stack functions to the 6410 post_clo_init function, since we need to first parse the command 6411 line before deciding which set to use. */ 6412 6413 # ifdef PERF_FAST_STACK 6414 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 ); 6415 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 ); 6416 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 ); 6417 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 ); 6418 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 ); 6419 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 ); 6420 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 ); 6421 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 ); 6422 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 ); 6423 # endif 6424 VG_(track_die_mem_stack) ( mc_die_mem_stack ); 6425 6426 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) ); 6427 6428 VG_(track_pre_mem_read) ( check_mem_is_defined ); 6429 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz ); 6430 VG_(track_pre_mem_write) ( check_mem_is_addressable ); 6431 VG_(track_post_mem_write) ( mc_post_mem_write ); 6432 6433 VG_(track_post_reg_write) ( mc_post_reg_write ); 6434 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall ); 6435 6436 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint ); 6437 6438 init_shadow_memory(); 6439 MC_(chunk_poolalloc) = VG_(newPA) (sizeof(MC_Chunk), 6440 1000, 6441 VG_(malloc), 6442 "mc.cMC.1 (MC_Chunk pools)", 6443 VG_(free)); 6444 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" ); 6445 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" ); 6446 init_prof_mem(); 6447 6448 tl_assert( mc_expensive_sanity_check() ); 6449 6450 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true. 6451 tl_assert(sizeof(UWord) == sizeof(Addr)); 6452 // Call me paranoid. I don't care. 6453 tl_assert(sizeof(void*) == sizeof(Addr)); 6454 6455 // BYTES_PER_SEC_VBIT_NODE must be a power of two. 6456 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE)); 6457 6458 /* This is small. Always initialise it. */ 6459 init_nia_to_ecu_cache(); 6460 6461 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know 6462 if we need to, since the command line args haven't been 6463 processed yet. Hence defer it to mc_post_clo_init. */ 6464 tl_assert(ocacheL1 == NULL); 6465 tl_assert(ocacheL2 == NULL); 6466 6467 /* Check some important stuff. See extensive comments above 6468 re UNALIGNED_OR_HIGH for background. */ 6469 # if VG_WORDSIZE == 4 6470 tl_assert(sizeof(void*) == 4); 6471 tl_assert(sizeof(Addr) == 4); 6472 tl_assert(sizeof(UWord) == 4); 6473 tl_assert(sizeof(Word) == 4); 6474 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL); 6475 tl_assert(MASK(1) == 0UL); 6476 tl_assert(MASK(2) == 1UL); 6477 tl_assert(MASK(4) == 3UL); 6478 tl_assert(MASK(8) == 7UL); 6479 # else 6480 tl_assert(VG_WORDSIZE == 8); 6481 tl_assert(sizeof(void*) == 8); 6482 tl_assert(sizeof(Addr) == 8); 6483 tl_assert(sizeof(UWord) == 8); 6484 tl_assert(sizeof(Word) == 8); 6485 tl_assert(MAX_PRIMARY_ADDRESS == 0x7FFFFFFFFULL); 6486 tl_assert(MASK(1) == 0xFFFFFFF800000000ULL); 6487 tl_assert(MASK(2) == 0xFFFFFFF800000001ULL); 6488 tl_assert(MASK(4) == 0xFFFFFFF800000003ULL); 6489 tl_assert(MASK(8) == 0xFFFFFFF800000007ULL); 6490 # endif 6491 } 6492 6493 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init) 6494 6495 /*--------------------------------------------------------------------*/ 6496 /*--- end mc_main.c ---*/ 6497 /*--------------------------------------------------------------------*/ 6498