1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/ 5 /*--- accessibility (A) and validity (V) status of each byte. ---*/ 6 /*--- mc_main.c ---*/ 7 /*--------------------------------------------------------------------*/ 8 9 /* 10 This file is part of MemCheck, a heavyweight Valgrind tool for 11 detecting memory errors. 12 13 Copyright (C) 2000-2017 Julian Seward 14 jseward (at) acm.org 15 16 This program is free software; you can redistribute it and/or 17 modify it under the terms of the GNU General Public License as 18 published by the Free Software Foundation; either version 2 of the 19 License, or (at your option) any later version. 20 21 This program is distributed in the hope that it will be useful, but 22 WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 General Public License for more details. 25 26 You should have received a copy of the GNU General Public License 27 along with this program; if not, write to the Free Software 28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 29 02111-1307, USA. 30 31 The GNU General Public License is contained in the file COPYING. 32 */ 33 34 #include "pub_tool_basics.h" 35 #include "pub_tool_aspacemgr.h" 36 #include "pub_tool_gdbserver.h" 37 #include "pub_tool_poolalloc.h" 38 #include "pub_tool_hashtable.h" // For mc_include.h 39 #include "pub_tool_libcbase.h" 40 #include "pub_tool_libcassert.h" 41 #include "pub_tool_libcprint.h" 42 #include "pub_tool_machine.h" 43 #include "pub_tool_mallocfree.h" 44 #include "pub_tool_options.h" 45 #include "pub_tool_oset.h" 46 #include "pub_tool_rangemap.h" 47 #include "pub_tool_replacemalloc.h" 48 #include "pub_tool_tooliface.h" 49 #include "pub_tool_threadstate.h" 50 #include "pub_tool_xarray.h" 51 #include "pub_tool_xtree.h" 52 #include "pub_tool_xtmemory.h" 53 54 #include "mc_include.h" 55 #include "memcheck.h" /* for client requests */ 56 57 58 /* Set to 1 to enable handwritten assembly helpers on targets for 59 which it is supported. */ 60 #define ENABLE_ASSEMBLY_HELPERS 1 61 62 /* Set to 1 to do a little more sanity checking */ 63 #define VG_DEBUG_MEMORY 0 64 65 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args) 66 67 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */ 68 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */ 69 70 71 /*------------------------------------------------------------*/ 72 /*--- Fast-case knobs ---*/ 73 /*------------------------------------------------------------*/ 74 75 // Comment these out to disable the fast cases (don't just set them to zero). 76 77 #define PERF_FAST_LOADV 1 78 #define PERF_FAST_STOREV 1 79 80 #define PERF_FAST_SARP 1 81 82 #define PERF_FAST_STACK 1 83 #define PERF_FAST_STACK2 1 84 85 /* Change this to 1 to enable assertions on origin tracking cache fast 86 paths */ 87 #define OC_ENABLE_ASSERTIONS 0 88 89 90 /*------------------------------------------------------------*/ 91 /*--- Comments on the origin tracking implementation ---*/ 92 /*------------------------------------------------------------*/ 93 94 /* See detailed comment entitled 95 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 96 which is contained further on in this file. */ 97 98 99 /*------------------------------------------------------------*/ 100 /*--- V bits and A bits ---*/ 101 /*------------------------------------------------------------*/ 102 103 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck 104 thinks the corresponding value bit is defined. And every memory byte 105 has an A bit, which tracks whether Memcheck thinks the program can access 106 it safely (ie. it's mapped, and has at least one of the RWX permission bits 107 set). So every N-bit register is shadowed with N V bits, and every memory 108 byte is shadowed with 8 V bits and one A bit. 109 110 In the implementation, we use two forms of compression (compressed V bits 111 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead 112 for memory. 113 114 Memcheck also tracks extra information about each heap block that is 115 allocated, for detecting memory leaks and other purposes. 116 */ 117 118 /*------------------------------------------------------------*/ 119 /*--- Basic A/V bitmap representation. ---*/ 120 /*------------------------------------------------------------*/ 121 122 /* All reads and writes are checked against a memory map (a.k.a. shadow 123 memory), which records the state of all memory in the process. 124 125 On 32-bit machines the memory map is organised as follows. 126 The top 16 bits of an address are used to index into a top-level 127 map table, containing 65536 entries. Each entry is a pointer to a 128 second-level map, which records the accesibililty and validity 129 permissions for the 65536 bytes indexed by the lower 16 bits of the 130 address. Each byte is represented by two bits (details are below). So 131 each second-level map contains 16384 bytes. This two-level arrangement 132 conveniently divides the 4G address space into 64k lumps, each size 64k 133 bytes. 134 135 All entries in the primary (top-level) map must point to a valid 136 secondary (second-level) map. Since many of the 64kB chunks will 137 have the same status for every bit -- ie. noaccess (for unused 138 address space) or entirely addressable and defined (for code segments) -- 139 there are three distinguished secondary maps, which indicate 'noaccess', 140 'undefined' and 'defined'. For these uniform 64kB chunks, the primary 141 map entry points to the relevant distinguished map. In practice, 142 typically more than half of the addressable memory is represented with 143 the 'undefined' or 'defined' distinguished secondary map, so it gives a 144 good saving. It also lets us set the V+A bits of large address regions 145 quickly in set_address_range_perms(). 146 147 On 64-bit machines it's more complicated. If we followed the same basic 148 scheme we'd have a four-level table which would require too many memory 149 accesses. So instead the top-level map table has 2^20 entries (indexed 150 using bits 16..35 of the address); this covers the bottom 64GB. Any 151 accesses above 64GB are handled with a slow, sparse auxiliary table. 152 Valgrind's address space manager tries very hard to keep things below 153 this 64GB barrier so that performance doesn't suffer too much. 154 155 Note that this file has a lot of different functions for reading and 156 writing shadow memory. Only a couple are strictly necessary (eg. 157 get_vabits2 and set_vabits2), most are just specialised for specific 158 common cases to improve performance. 159 160 Aside: the V+A bits are less precise than they could be -- we have no way 161 of marking memory as read-only. It would be great if we could add an 162 extra state VA_BITSn_READONLY. But then we'd have 5 different states, 163 which requires 2.3 bits to hold, and there's no way to do that elegantly 164 -- we'd have to double up to 4 bits of metadata per byte, which doesn't 165 seem worth it. 166 */ 167 168 /* --------------- Basic configuration --------------- */ 169 170 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */ 171 172 #if VG_WORDSIZE == 4 173 174 /* cover the entire address space */ 175 # define N_PRIMARY_BITS 16 176 177 #else 178 179 /* Just handle the first 128G fast and the rest via auxiliary 180 primaries. If you change this, Memcheck will assert at startup. 181 See the definition of UNALIGNED_OR_HIGH for extensive comments. */ 182 # define N_PRIMARY_BITS 21 183 184 #endif 185 186 187 /* Do not change this. */ 188 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS) 189 190 /* Do not change this. */ 191 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1) 192 193 194 /* --------------- Secondary maps --------------- */ 195 196 // Each byte of memory conceptually has an A bit, which indicates its 197 // addressability, and 8 V bits, which indicates its definedness. 198 // 199 // But because very few bytes are partially defined, we can use a nice 200 // compression scheme to reduce the size of shadow memory. Each byte of 201 // memory has 2 bits which indicates its state (ie. V+A bits): 202 // 203 // 00: noaccess (unaddressable but treated as fully defined) 204 // 01: undefined (addressable and fully undefined) 205 // 10: defined (addressable and fully defined) 206 // 11: partdefined (addressable and partially defined) 207 // 208 // In the "partdefined" case, we use a secondary table to store the V bits. 209 // Each entry in the secondary-V-bits table maps a byte address to its 8 V 210 // bits. 211 // 212 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for 213 // four bytes (32 bits) of memory are in each chunk. Hence the name 214 // "vabits8". This lets us get the V+A bits for four bytes at a time 215 // easily (without having to do any shifting and/or masking), and that is a 216 // very common operation. (Note that although each vabits8 chunk 217 // is 8 bits in size, it represents 32 bits of memory.) 218 // 219 // The representation is "inverse" little-endian... each 4 bytes of 220 // memory is represented by a 1 byte value, where: 221 // 222 // - the status of byte (a+0) is held in bits [1..0] 223 // - the status of byte (a+1) is held in bits [3..2] 224 // - the status of byte (a+2) is held in bits [5..4] 225 // - the status of byte (a+3) is held in bits [7..6] 226 // 227 // It's "inverse" because endianness normally describes a mapping from 228 // value bits to memory addresses; in this case the mapping is inverted. 229 // Ie. instead of particular value bits being held in certain addresses, in 230 // this case certain addresses are represented by particular value bits. 231 // See insert_vabits2_into_vabits8() for an example. 232 // 233 // But note that we don't compress the V bits stored in registers; they 234 // need to be explicit to made the shadow operations possible. Therefore 235 // when moving values between registers and memory we need to convert 236 // between the expanded in-register format and the compressed in-memory 237 // format. This isn't so difficult, it just requires careful attention in a 238 // few places. 239 240 // These represent eight bits of memory. 241 #define VA_BITS2_NOACCESS 0x0 // 00b 242 #define VA_BITS2_UNDEFINED 0x1 // 01b 243 #define VA_BITS2_DEFINED 0x2 // 10b 244 #define VA_BITS2_PARTDEFINED 0x3 // 11b 245 246 // These represent 16 bits of memory. 247 #define VA_BITS4_NOACCESS 0x0 // 00_00b 248 #define VA_BITS4_UNDEFINED 0x5 // 01_01b 249 #define VA_BITS4_DEFINED 0xa // 10_10b 250 251 // These represent 32 bits of memory. 252 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b 253 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b 254 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b 255 256 // These represent 64 bits of memory. 257 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2 258 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2 259 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2 260 261 // These represent 128 bits of memory. 262 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4 263 264 265 #define SM_CHUNKS 16384 // Each SM covers 64k of memory. 266 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2) 267 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3) 268 269 // Paranoia: it's critical for performance that the requested inlining 270 // occurs. So try extra hard. 271 #define INLINE inline __attribute__((always_inline)) 272 273 static INLINE Addr start_of_this_sm ( Addr a ) { 274 return (a & (~SM_MASK)); 275 } 276 static INLINE Bool is_start_of_sm ( Addr a ) { 277 return (start_of_this_sm(a) == a); 278 } 279 280 STATIC_ASSERT(SM_CHUNKS % 2 == 0); 281 282 typedef 283 union { 284 UChar vabits8[SM_CHUNKS]; 285 UShort vabits16[SM_CHUNKS/2]; 286 } 287 SecMap; 288 289 // 3 distinguished secondary maps, one for no-access, one for 290 // accessible but undefined, and one for accessible and defined. 291 // Distinguished secondaries may never be modified. 292 #define SM_DIST_NOACCESS 0 293 #define SM_DIST_UNDEFINED 1 294 #define SM_DIST_DEFINED 2 295 296 static SecMap sm_distinguished[3]; 297 298 static INLINE Bool is_distinguished_sm ( SecMap* sm ) { 299 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2]; 300 } 301 302 // Forward declaration 303 static void update_SM_counts(SecMap* oldSM, SecMap* newSM); 304 305 /* dist_sm points to one of our three distinguished secondaries. Make 306 a copy of it so that we can write to it. 307 */ 308 static SecMap* copy_for_writing ( SecMap* dist_sm ) 309 { 310 SecMap* new_sm; 311 tl_assert(dist_sm == &sm_distinguished[0] 312 || dist_sm == &sm_distinguished[1] 313 || dist_sm == &sm_distinguished[2]); 314 315 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap)); 316 if (new_sm == NULL) 317 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap", 318 sizeof(SecMap) ); 319 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap)); 320 update_SM_counts(dist_sm, new_sm); 321 return new_sm; 322 } 323 324 /* --------------- Stats --------------- */ 325 326 static Int n_issued_SMs = 0; 327 static Int n_deissued_SMs = 0; 328 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs 329 static Int n_undefined_SMs = 0; 330 static Int n_defined_SMs = 0; 331 static Int n_non_DSM_SMs = 0; 332 static Int max_noaccess_SMs = 0; 333 static Int max_undefined_SMs = 0; 334 static Int max_defined_SMs = 0; 335 static Int max_non_DSM_SMs = 0; 336 337 /* # searches initiated in auxmap_L1, and # base cmps required */ 338 static ULong n_auxmap_L1_searches = 0; 339 static ULong n_auxmap_L1_cmps = 0; 340 /* # of searches that missed in auxmap_L1 and therefore had to 341 be handed to auxmap_L2. And the number of nodes inserted. */ 342 static ULong n_auxmap_L2_searches = 0; 343 static ULong n_auxmap_L2_nodes = 0; 344 345 static Int n_sanity_cheap = 0; 346 static Int n_sanity_expensive = 0; 347 348 static Int n_secVBit_nodes = 0; 349 static Int max_secVBit_nodes = 0; 350 351 static void update_SM_counts(SecMap* oldSM, SecMap* newSM) 352 { 353 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --; 354 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--; 355 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --; 356 else { n_non_DSM_SMs --; 357 n_deissued_SMs ++; } 358 359 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++; 360 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++; 361 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++; 362 else { n_non_DSM_SMs ++; 363 n_issued_SMs ++; } 364 365 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs; 366 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs; 367 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs; 368 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs; 369 } 370 371 /* --------------- Primary maps --------------- */ 372 373 /* The main primary map. This covers some initial part of the address 374 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is 375 handled using the auxiliary primary map. 376 */ 377 static SecMap* primary_map[N_PRIMARY_MAP]; 378 379 380 /* An entry in the auxiliary primary map. base must be a 64k-aligned 381 value, and sm points at the relevant secondary map. As with the 382 main primary map, the secondary may be either a real secondary, or 383 one of the three distinguished secondaries. DO NOT CHANGE THIS 384 LAYOUT: the first word has to be the key for OSet fast lookups. 385 */ 386 typedef 387 struct { 388 Addr base; 389 SecMap* sm; 390 } 391 AuxMapEnt; 392 393 /* Tunable parameter: How big is the L1 queue? */ 394 #define N_AUXMAP_L1 24 395 396 /* Tunable parameter: How far along the L1 queue to insert 397 entries resulting from L2 lookups? */ 398 #define AUXMAP_L1_INSERT_IX 12 399 400 static struct { 401 Addr base; 402 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node 403 } 404 auxmap_L1[N_AUXMAP_L1]; 405 406 static OSet* auxmap_L2 = NULL; 407 408 static void init_auxmap_L1_L2 ( void ) 409 { 410 Int i; 411 for (i = 0; i < N_AUXMAP_L1; i++) { 412 auxmap_L1[i].base = 0; 413 auxmap_L1[i].ent = NULL; 414 } 415 416 tl_assert(0 == offsetof(AuxMapEnt,base)); 417 tl_assert(sizeof(Addr) == sizeof(void*)); 418 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base), 419 /*fastCmp*/ NULL, 420 VG_(malloc), "mc.iaLL.1", VG_(free) ); 421 } 422 423 /* Check representation invariants; if OK return NULL; else a 424 descriptive bit of text. Also return the number of 425 non-distinguished secondary maps referred to from the auxiliary 426 primary maps. */ 427 428 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found ) 429 { 430 Word i, j; 431 /* On a 32-bit platform, the L2 and L1 tables should 432 both remain empty forever. 433 434 On a 64-bit platform: 435 In the L2 table: 436 all .base & 0xFFFF == 0 437 all .base > MAX_PRIMARY_ADDRESS 438 In the L1 table: 439 all .base & 0xFFFF == 0 440 all (.base > MAX_PRIMARY_ADDRESS 441 .base & 0xFFFF == 0 442 and .ent points to an AuxMapEnt with the same .base) 443 or 444 (.base == 0 and .ent == NULL) 445 */ 446 *n_secmaps_found = 0; 447 if (sizeof(void*) == 4) { 448 /* 32-bit platform */ 449 if (VG_(OSetGen_Size)(auxmap_L2) != 0) 450 return "32-bit: auxmap_L2 is non-empty"; 451 for (i = 0; i < N_AUXMAP_L1; i++) 452 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL) 453 return "32-bit: auxmap_L1 is non-empty"; 454 } else { 455 /* 64-bit platform */ 456 UWord elems_seen = 0; 457 AuxMapEnt *elem, *res; 458 AuxMapEnt key; 459 /* L2 table */ 460 VG_(OSetGen_ResetIter)(auxmap_L2); 461 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) { 462 elems_seen++; 463 if (0 != (elem->base & (Addr)0xFFFF)) 464 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2"; 465 if (elem->base <= MAX_PRIMARY_ADDRESS) 466 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2"; 467 if (elem->sm == NULL) 468 return "64-bit: .sm in _L2 is NULL"; 469 if (!is_distinguished_sm(elem->sm)) 470 (*n_secmaps_found)++; 471 } 472 if (elems_seen != n_auxmap_L2_nodes) 473 return "64-bit: disagreement on number of elems in _L2"; 474 /* Check L1-L2 correspondence */ 475 for (i = 0; i < N_AUXMAP_L1; i++) { 476 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL) 477 continue; 478 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF)) 479 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1"; 480 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS) 481 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1"; 482 if (auxmap_L1[i].ent == NULL) 483 return "64-bit: .ent is NULL in auxmap_L1"; 484 if (auxmap_L1[i].ent->base != auxmap_L1[i].base) 485 return "64-bit: _L1 and _L2 bases are inconsistent"; 486 /* Look it up in auxmap_L2. */ 487 key.base = auxmap_L1[i].base; 488 key.sm = 0; 489 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 490 if (res == NULL) 491 return "64-bit: _L1 .base not found in _L2"; 492 if (res != auxmap_L1[i].ent) 493 return "64-bit: _L1 .ent disagrees with _L2 entry"; 494 } 495 /* Check L1 contains no duplicates */ 496 for (i = 0; i < N_AUXMAP_L1; i++) { 497 if (auxmap_L1[i].base == 0) 498 continue; 499 for (j = i+1; j < N_AUXMAP_L1; j++) { 500 if (auxmap_L1[j].base == 0) 501 continue; 502 if (auxmap_L1[j].base == auxmap_L1[i].base) 503 return "64-bit: duplicate _L1 .base entries"; 504 } 505 } 506 } 507 return NULL; /* ok */ 508 } 509 510 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent ) 511 { 512 Word i; 513 tl_assert(ent); 514 tl_assert(rank >= 0 && rank < N_AUXMAP_L1); 515 for (i = N_AUXMAP_L1-1; i > rank; i--) 516 auxmap_L1[i] = auxmap_L1[i-1]; 517 auxmap_L1[rank].base = ent->base; 518 auxmap_L1[rank].ent = ent; 519 } 520 521 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a ) 522 { 523 AuxMapEnt key; 524 AuxMapEnt* res; 525 Word i; 526 527 tl_assert(a > MAX_PRIMARY_ADDRESS); 528 a &= ~(Addr)0xFFFF; 529 530 /* First search the front-cache, which is a self-organising 531 list containing the most popular entries. */ 532 533 if (LIKELY(auxmap_L1[0].base == a)) 534 return auxmap_L1[0].ent; 535 if (LIKELY(auxmap_L1[1].base == a)) { 536 Addr t_base = auxmap_L1[0].base; 537 AuxMapEnt* t_ent = auxmap_L1[0].ent; 538 auxmap_L1[0].base = auxmap_L1[1].base; 539 auxmap_L1[0].ent = auxmap_L1[1].ent; 540 auxmap_L1[1].base = t_base; 541 auxmap_L1[1].ent = t_ent; 542 return auxmap_L1[0].ent; 543 } 544 545 n_auxmap_L1_searches++; 546 547 for (i = 0; i < N_AUXMAP_L1; i++) { 548 if (auxmap_L1[i].base == a) { 549 break; 550 } 551 } 552 tl_assert(i >= 0 && i <= N_AUXMAP_L1); 553 554 n_auxmap_L1_cmps += (ULong)(i+1); 555 556 if (i < N_AUXMAP_L1) { 557 if (i > 0) { 558 Addr t_base = auxmap_L1[i-1].base; 559 AuxMapEnt* t_ent = auxmap_L1[i-1].ent; 560 auxmap_L1[i-1].base = auxmap_L1[i-0].base; 561 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent; 562 auxmap_L1[i-0].base = t_base; 563 auxmap_L1[i-0].ent = t_ent; 564 i--; 565 } 566 return auxmap_L1[i].ent; 567 } 568 569 n_auxmap_L2_searches++; 570 571 /* First see if we already have it. */ 572 key.base = a; 573 key.sm = 0; 574 575 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 576 if (res) 577 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res ); 578 return res; 579 } 580 581 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a ) 582 { 583 AuxMapEnt *nyu, *res; 584 585 /* First see if we already have it. */ 586 res = maybe_find_in_auxmap( a ); 587 if (LIKELY(res)) 588 return res; 589 590 /* Ok, there's no entry in the secondary map, so we'll have 591 to allocate one. */ 592 a &= ~(Addr)0xFFFF; 593 594 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) ); 595 nyu->base = a; 596 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS]; 597 VG_(OSetGen_Insert)( auxmap_L2, nyu ); 598 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu ); 599 n_auxmap_L2_nodes++; 600 return nyu; 601 } 602 603 /* --------------- SecMap fundamentals --------------- */ 604 605 // In all these, 'low' means it's definitely in the main primary map, 606 // 'high' means it's definitely in the auxiliary table. 607 608 static INLINE UWord get_primary_map_low_offset ( Addr a ) 609 { 610 UWord pm_off = a >> 16; 611 return pm_off; 612 } 613 614 static INLINE SecMap** get_secmap_low_ptr ( Addr a ) 615 { 616 UWord pm_off = a >> 16; 617 # if VG_DEBUG_MEMORY >= 1 618 tl_assert(pm_off < N_PRIMARY_MAP); 619 # endif 620 return &primary_map[ pm_off ]; 621 } 622 623 static INLINE SecMap** get_secmap_high_ptr ( Addr a ) 624 { 625 AuxMapEnt* am = find_or_alloc_in_auxmap(a); 626 return &am->sm; 627 } 628 629 static INLINE SecMap** get_secmap_ptr ( Addr a ) 630 { 631 return ( a <= MAX_PRIMARY_ADDRESS 632 ? get_secmap_low_ptr(a) 633 : get_secmap_high_ptr(a)); 634 } 635 636 static INLINE SecMap* get_secmap_for_reading_low ( Addr a ) 637 { 638 return *get_secmap_low_ptr(a); 639 } 640 641 static INLINE SecMap* get_secmap_for_reading_high ( Addr a ) 642 { 643 return *get_secmap_high_ptr(a); 644 } 645 646 static INLINE SecMap* get_secmap_for_writing_low(Addr a) 647 { 648 SecMap** p = get_secmap_low_ptr(a); 649 if (UNLIKELY(is_distinguished_sm(*p))) 650 *p = copy_for_writing(*p); 651 return *p; 652 } 653 654 static INLINE SecMap* get_secmap_for_writing_high ( Addr a ) 655 { 656 SecMap** p = get_secmap_high_ptr(a); 657 if (UNLIKELY(is_distinguished_sm(*p))) 658 *p = copy_for_writing(*p); 659 return *p; 660 } 661 662 /* Produce the secmap for 'a', either from the primary map or by 663 ensuring there is an entry for it in the aux primary map. The 664 secmap may be a distinguished one as the caller will only want to 665 be able to read it. 666 */ 667 static INLINE SecMap* get_secmap_for_reading ( Addr a ) 668 { 669 return ( a <= MAX_PRIMARY_ADDRESS 670 ? get_secmap_for_reading_low (a) 671 : get_secmap_for_reading_high(a) ); 672 } 673 674 /* Produce the secmap for 'a', either from the primary map or by 675 ensuring there is an entry for it in the aux primary map. The 676 secmap may not be a distinguished one, since the caller will want 677 to be able to write it. If it is a distinguished secondary, make a 678 writable copy of it, install it, and return the copy instead. (COW 679 semantics). 680 */ 681 static INLINE SecMap* get_secmap_for_writing ( Addr a ) 682 { 683 return ( a <= MAX_PRIMARY_ADDRESS 684 ? get_secmap_for_writing_low (a) 685 : get_secmap_for_writing_high(a) ); 686 } 687 688 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't 689 allocate one if one doesn't already exist. This is used by the 690 leak checker. 691 */ 692 static SecMap* maybe_get_secmap_for ( Addr a ) 693 { 694 if (a <= MAX_PRIMARY_ADDRESS) { 695 return get_secmap_for_reading_low(a); 696 } else { 697 AuxMapEnt* am = maybe_find_in_auxmap(a); 698 return am ? am->sm : NULL; 699 } 700 } 701 702 /* --------------- Fundamental functions --------------- */ 703 704 static INLINE 705 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 ) 706 { 707 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 708 *vabits8 &= ~(0x3 << shift); // mask out the two old bits 709 *vabits8 |= (vabits2 << shift); // mask in the two new bits 710 } 711 712 static INLINE 713 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 ) 714 { 715 UInt shift; 716 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 717 shift = (a & 2) << 1; // shift by 0 or 4 718 *vabits8 &= ~(0xf << shift); // mask out the four old bits 719 *vabits8 |= (vabits4 << shift); // mask in the four new bits 720 } 721 722 static INLINE 723 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 ) 724 { 725 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 726 vabits8 >>= shift; // shift the two bits to the bottom 727 return 0x3 & vabits8; // mask out the rest 728 } 729 730 static INLINE 731 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 ) 732 { 733 UInt shift; 734 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 735 shift = (a & 2) << 1; // shift by 0 or 4 736 vabits8 >>= shift; // shift the four bits to the bottom 737 return 0xf & vabits8; // mask out the rest 738 } 739 740 // Note that these four are only used in slow cases. The fast cases do 741 // clever things like combine the auxmap check (in 742 // get_secmap_{read,writ}able) with alignment checks. 743 744 // *** WARNING! *** 745 // Any time this function is called, if it is possible that vabits2 746 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the 747 // sec-V-bits table must also be set! 748 static INLINE 749 void set_vabits2 ( Addr a, UChar vabits2 ) 750 { 751 SecMap* sm = get_secmap_for_writing(a); 752 UWord sm_off = SM_OFF(a); 753 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 754 } 755 756 static INLINE 757 UChar get_vabits2 ( Addr a ) 758 { 759 SecMap* sm = get_secmap_for_reading(a); 760 UWord sm_off = SM_OFF(a); 761 UChar vabits8 = sm->vabits8[sm_off]; 762 return extract_vabits2_from_vabits8(a, vabits8); 763 } 764 765 // *** WARNING! *** 766 // Any time this function is called, if it is possible that any of the 767 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the 768 // corresponding entry(s) in the sec-V-bits table must also be set! 769 static INLINE 770 UChar get_vabits8_for_aligned_word32 ( Addr a ) 771 { 772 SecMap* sm = get_secmap_for_reading(a); 773 UWord sm_off = SM_OFF(a); 774 UChar vabits8 = sm->vabits8[sm_off]; 775 return vabits8; 776 } 777 778 static INLINE 779 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 ) 780 { 781 SecMap* sm = get_secmap_for_writing(a); 782 UWord sm_off = SM_OFF(a); 783 sm->vabits8[sm_off] = vabits8; 784 } 785 786 787 // Forward declarations 788 static UWord get_sec_vbits8(Addr a); 789 static void set_sec_vbits8(Addr a, UWord vbits8); 790 791 // Returns False if there was an addressability error. 792 static INLINE 793 Bool set_vbits8 ( Addr a, UChar vbits8 ) 794 { 795 Bool ok = True; 796 UChar vabits2 = get_vabits2(a); 797 if ( VA_BITS2_NOACCESS != vabits2 ) { 798 // Addressable. Convert in-register format to in-memory format. 799 // Also remove any existing sec V bit entry for the byte if no 800 // longer necessary. 801 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; } 802 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; } 803 else { vabits2 = VA_BITS2_PARTDEFINED; 804 set_sec_vbits8(a, vbits8); } 805 set_vabits2(a, vabits2); 806 807 } else { 808 // Unaddressable! Do nothing -- when writing to unaddressable 809 // memory it acts as a black hole, and the V bits can never be seen 810 // again. So we don't have to write them at all. 811 ok = False; 812 } 813 return ok; 814 } 815 816 // Returns False if there was an addressability error. In that case, we put 817 // all defined bits into vbits8. 818 static INLINE 819 Bool get_vbits8 ( Addr a, UChar* vbits8 ) 820 { 821 Bool ok = True; 822 UChar vabits2 = get_vabits2(a); 823 824 // Convert the in-memory format to in-register format. 825 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; } 826 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; } 827 else if ( VA_BITS2_NOACCESS == vabits2 ) { 828 *vbits8 = V_BITS8_DEFINED; // Make V bits defined! 829 ok = False; 830 } else { 831 tl_assert( VA_BITS2_PARTDEFINED == vabits2 ); 832 *vbits8 = get_sec_vbits8(a); 833 } 834 return ok; 835 } 836 837 838 /* --------------- Secondary V bit table ------------ */ 839 840 // This table holds the full V bit pattern for partially-defined bytes 841 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow 842 // memory. 843 // 844 // Note: the nodes in this table can become stale. Eg. if you write a PDB, 845 // then overwrite the same address with a fully defined byte, the sec-V-bit 846 // node will not necessarily be removed. This is because checking for 847 // whether removal is necessary would slow down the fast paths. 848 // 849 // To avoid the stale nodes building up too much, we periodically (once the 850 // table reaches a certain size) garbage collect (GC) the table by 851 // traversing it and evicting any nodes not having PDB. 852 // If more than a certain proportion of nodes survived, we increase the 853 // table size so that GCs occur less often. 854 // 855 // This policy is designed to avoid bad table bloat in the worst case where 856 // a program creates huge numbers of stale PDBs -- we would get this bloat 857 // if we had no GC -- while handling well the case where a node becomes 858 // stale but shortly afterwards is rewritten with a PDB and so becomes 859 // non-stale again (which happens quite often, eg. in perf/bz2). If we just 860 // remove all stale nodes as soon as possible, we just end up re-adding a 861 // lot of them in later again. The "sufficiently stale" approach avoids 862 // this. (If a program has many live PDBs, performance will just suck, 863 // there's no way around that.) 864 // 865 // Further comments, JRS 14 Feb 2012. It turns out that the policy of 866 // holding on to stale entries for 2 GCs before discarding them can lead 867 // to massive space leaks. So we're changing to an arrangement where 868 // lines are evicted as soon as they are observed to be stale during a 869 // GC. This also has a side benefit of allowing the sufficiently_stale 870 // field to be removed from the SecVBitNode struct, reducing its size by 871 // 8 bytes, which is a substantial space saving considering that the 872 // struct was previously 32 or so bytes, on a 64 bit target. 873 // 874 // In order to try and mitigate the problem that the "sufficiently stale" 875 // heuristic was designed to avoid, the table size is allowed to drift 876 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This 877 // means that nodes will exist in the table longer on average, and hopefully 878 // will be deleted and re-added less frequently. 879 // 880 // The previous scaling up mechanism (now called STEPUP) is retained: 881 // if residency exceeds 50%, the table is scaled up, although by a 882 // factor sqrt(2) rather than 2 as before. This effectively doubles the 883 // frequency of GCs when there are many PDBs at reduces the tendency of 884 // stale PDBs to reside for long periods in the table. 885 886 static OSet* secVBitTable; 887 888 // Stats 889 static ULong sec_vbits_new_nodes = 0; 890 static ULong sec_vbits_updates = 0; 891 892 // This must be a power of two; this is checked in mc_pre_clo_init(). 893 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover 894 // a larger address range) they take more space but we can get multiple 895 // partially-defined bytes in one if they are close to each other, reducing 896 // the number of total nodes. In practice sometimes they are clustered (eg. 897 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous 898 // row), but often not. So we choose something intermediate. 899 #define BYTES_PER_SEC_VBIT_NODE 16 900 901 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if 902 // more than this many nodes survive a GC. 903 #define STEPUP_SURVIVOR_PROPORTION 0.5 904 #define STEPUP_GROWTH_FACTOR 1.414213562 905 906 // If the above heuristic doesn't apply, then we may make the table 907 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than 908 // this many nodes survive a GC, _and_ the total table size does 909 // not exceed a fixed limit. The numbers are somewhat arbitrary, but 910 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5% 911 // effectively although gradually reduces residency and increases time 912 // between GCs for programs with small numbers of PDBs. The 80000 limit 913 // effectively limits the table size to around 2MB for programs with 914 // small numbers of PDBs, whilst giving a reasonably long lifetime to 915 // entries, to try and reduce the costs resulting from deleting and 916 // re-adding of entries. 917 #define DRIFTUP_SURVIVOR_PROPORTION 0.15 918 #define DRIFTUP_GROWTH_FACTOR 1.015 919 #define DRIFTUP_MAX_SIZE 80000 920 921 // We GC the table when it gets this many nodes in it, ie. it's effectively 922 // the table size. It can change. 923 static Int secVBitLimit = 1000; 924 925 // The number of GCs done, used to age sec-V-bit nodes for eviction. 926 // Because it's unsigned, wrapping doesn't matter -- the right answer will 927 // come out anyway. 928 static UInt GCs_done = 0; 929 930 typedef 931 struct { 932 Addr a; 933 UChar vbits8[BYTES_PER_SEC_VBIT_NODE]; 934 } 935 SecVBitNode; 936 937 static OSet* createSecVBitTable(void) 938 { 939 OSet* newSecVBitTable; 940 newSecVBitTable = VG_(OSetGen_Create_With_Pool) 941 ( offsetof(SecVBitNode, a), 942 NULL, // use fast comparisons 943 VG_(malloc), "mc.cSVT.1 (sec VBit table)", 944 VG_(free), 945 1000, 946 sizeof(SecVBitNode)); 947 return newSecVBitTable; 948 } 949 950 static void gcSecVBitTable(void) 951 { 952 OSet* secVBitTable2; 953 SecVBitNode* n; 954 Int i, n_nodes = 0, n_survivors = 0; 955 956 GCs_done++; 957 958 // Create the new table. 959 secVBitTable2 = createSecVBitTable(); 960 961 // Traverse the table, moving fresh nodes into the new table. 962 VG_(OSetGen_ResetIter)(secVBitTable); 963 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) { 964 // Keep node if any of its bytes are non-stale. Using 965 // get_vabits2() for the lookup is not very efficient, but I don't 966 // think it matters. 967 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 968 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) { 969 // Found a non-stale byte, so keep => 970 // Insert a copy of the node into the new table. 971 SecVBitNode* n2 = 972 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode)); 973 *n2 = *n; 974 VG_(OSetGen_Insert)(secVBitTable2, n2); 975 break; 976 } 977 } 978 } 979 980 // Get the before and after sizes. 981 n_nodes = VG_(OSetGen_Size)(secVBitTable); 982 n_survivors = VG_(OSetGen_Size)(secVBitTable2); 983 984 // Destroy the old table, and put the new one in its place. 985 VG_(OSetGen_Destroy)(secVBitTable); 986 secVBitTable = secVBitTable2; 987 988 if (VG_(clo_verbosity) > 1 && n_nodes != 0) { 989 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n", 990 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes); 991 } 992 993 // Increase table size if necessary. 994 if ((Double)n_survivors 995 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) { 996 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR); 997 if (VG_(clo_verbosity) > 1) 998 VG_(message)(Vg_DebugMsg, 999 "memcheck GC: %d new table size (stepup)\n", 1000 secVBitLimit); 1001 } 1002 else 1003 if (secVBitLimit < DRIFTUP_MAX_SIZE 1004 && (Double)n_survivors 1005 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) { 1006 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR); 1007 if (VG_(clo_verbosity) > 1) 1008 VG_(message)(Vg_DebugMsg, 1009 "memcheck GC: %d new table size (driftup)\n", 1010 secVBitLimit); 1011 } 1012 } 1013 1014 static UWord get_sec_vbits8(Addr a) 1015 { 1016 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 1017 Int amod = a % BYTES_PER_SEC_VBIT_NODE; 1018 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 1019 UChar vbits8; 1020 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a); 1021 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 1022 // make it to the secondary V bits table. 1023 vbits8 = n->vbits8[amod]; 1024 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 1025 return vbits8; 1026 } 1027 1028 static void set_sec_vbits8(Addr a, UWord vbits8) 1029 { 1030 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 1031 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE; 1032 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 1033 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 1034 // make it to the secondary V bits table. 1035 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 1036 if (n) { 1037 n->vbits8[amod] = vbits8; // update 1038 sec_vbits_updates++; 1039 } else { 1040 // Do a table GC if necessary. Nb: do this before creating and 1041 // inserting the new node, to avoid erroneously GC'ing the new node. 1042 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) { 1043 gcSecVBitTable(); 1044 } 1045 1046 // New node: assign the specific byte, make the rest invalid (they 1047 // should never be read as-is, but be cautious). 1048 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode)); 1049 n->a = aAligned; 1050 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 1051 n->vbits8[i] = V_BITS8_UNDEFINED; 1052 } 1053 n->vbits8[amod] = vbits8; 1054 1055 // Insert the new node. 1056 VG_(OSetGen_Insert)(secVBitTable, n); 1057 sec_vbits_new_nodes++; 1058 1059 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable); 1060 if (n_secVBit_nodes > max_secVBit_nodes) 1061 max_secVBit_nodes = n_secVBit_nodes; 1062 } 1063 } 1064 1065 /* --------------- Endianness helpers --------------- */ 1066 1067 /* Returns the offset in memory of the byteno-th most significant byte 1068 in a wordszB-sized word, given the specified endianness. */ 1069 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian, 1070 UWord byteno ) { 1071 return bigendian ? (wordszB-1-byteno) : byteno; 1072 } 1073 1074 1075 /* --------------- Ignored address ranges --------------- */ 1076 1077 /* Denotes the address-error-reportability status for address ranges: 1078 IAR_NotIgnored: the usual case -- report errors in this range 1079 IAR_CommandLine: don't report errors -- from command line setting 1080 IAR_ClientReq: don't report errors -- from client request 1081 */ 1082 typedef 1083 enum { IAR_INVALID=99, 1084 IAR_NotIgnored, 1085 IAR_CommandLine, 1086 IAR_ClientReq } 1087 IARKind; 1088 1089 static const HChar* showIARKind ( IARKind iark ) 1090 { 1091 switch (iark) { 1092 case IAR_INVALID: return "INVALID"; 1093 case IAR_NotIgnored: return "NotIgnored"; 1094 case IAR_CommandLine: return "CommandLine"; 1095 case IAR_ClientReq: return "ClientReq"; 1096 default: return "???"; 1097 } 1098 } 1099 1100 // RangeMap<IARKind> 1101 static RangeMap* gIgnoredAddressRanges = NULL; 1102 1103 static void init_gIgnoredAddressRanges ( void ) 1104 { 1105 if (LIKELY(gIgnoredAddressRanges != NULL)) 1106 return; 1107 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1", 1108 VG_(free), IAR_NotIgnored ); 1109 } 1110 1111 Bool MC_(in_ignored_range) ( Addr a ) 1112 { 1113 if (LIKELY(gIgnoredAddressRanges == NULL)) 1114 return False; 1115 UWord how = IAR_INVALID; 1116 UWord key_min = ~(UWord)0; 1117 UWord key_max = (UWord)0; 1118 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a); 1119 tl_assert(key_min <= a && a <= key_max); 1120 switch (how) { 1121 case IAR_NotIgnored: return False; 1122 case IAR_CommandLine: return True; 1123 case IAR_ClientReq: return True; 1124 default: break; /* invalid */ 1125 } 1126 VG_(tool_panic)("MC_(in_ignore_range)"); 1127 /*NOTREACHED*/ 1128 } 1129 1130 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB ) 1131 { 1132 if (LIKELY(!MC_(clo_ignore_range_below_sp))) 1133 return False; 1134 tl_assert(szB >= 1 && szB <= 32); 1135 tl_assert(MC_(clo_ignore_range_below_sp__first_offset) 1136 > MC_(clo_ignore_range_below_sp__last_offset)); 1137 Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset); 1138 Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset); 1139 if (range_lo >= range_hi) { 1140 /* Bizarre. We have a wraparound situation. What should we do? */ 1141 return False; // Play safe 1142 } else { 1143 /* This is the expected case. */ 1144 if (range_lo <= a && a + szB - 1 <= range_hi) 1145 return True; 1146 else 1147 return False; 1148 } 1149 /*NOTREACHED*/ 1150 tl_assert(0); 1151 } 1152 1153 /* Parse two Addrs (in hex) separated by a dash, or fail. */ 1154 1155 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 ) 1156 { 1157 Bool ok = VG_(parse_Addr) (ppc, result1); 1158 if (!ok) 1159 return False; 1160 if (**ppc != '-') 1161 return False; 1162 (*ppc)++; 1163 ok = VG_(parse_Addr) (ppc, result2); 1164 if (!ok) 1165 return False; 1166 return True; 1167 } 1168 1169 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash, 1170 or fail. */ 1171 1172 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 ) 1173 { 1174 Bool ok = VG_(parse_UInt) (ppc, result1); 1175 if (!ok) 1176 return False; 1177 if (**ppc != '-') 1178 return False; 1179 (*ppc)++; 1180 ok = VG_(parse_UInt) (ppc, result2); 1181 if (!ok) 1182 return False; 1183 return True; 1184 } 1185 1186 /* Parse a set of ranges separated by commas into 'ignoreRanges', or 1187 fail. If they are valid, add them to the global set of ignored 1188 ranges. */ 1189 static Bool parse_ignore_ranges ( const HChar* str0 ) 1190 { 1191 init_gIgnoredAddressRanges(); 1192 const HChar* str = str0; 1193 const HChar** ppc = &str; 1194 while (1) { 1195 Addr start = ~(Addr)0; 1196 Addr end = (Addr)0; 1197 Bool ok = parse_Addr_pair(ppc, &start, &end); 1198 if (!ok) 1199 return False; 1200 if (start > end) 1201 return False; 1202 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine ); 1203 if (**ppc == 0) 1204 return True; 1205 if (**ppc != ',') 1206 return False; 1207 (*ppc)++; 1208 } 1209 /*NOTREACHED*/ 1210 return False; 1211 } 1212 1213 /* Add or remove [start, +len) from the set of ignored ranges. */ 1214 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len ) 1215 { 1216 init_gIgnoredAddressRanges(); 1217 const Bool verbose = (VG_(clo_verbosity) > 1); 1218 if (len == 0) { 1219 return False; 1220 } 1221 if (addRange) { 1222 VG_(bindRangeMap)(gIgnoredAddressRanges, 1223 start, start+len-1, IAR_ClientReq); 1224 if (verbose) 1225 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n", 1226 (void*)start, (void*)(start+len-1)); 1227 } else { 1228 VG_(bindRangeMap)(gIgnoredAddressRanges, 1229 start, start+len-1, IAR_NotIgnored); 1230 if (verbose) 1231 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n", 1232 (void*)start, (void*)(start+len-1)); 1233 } 1234 if (verbose) { 1235 VG_(dmsg)("memcheck: now have %u ranges:\n", 1236 VG_(sizeRangeMap)(gIgnoredAddressRanges)); 1237 UInt i; 1238 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) { 1239 UWord val = IAR_INVALID; 1240 UWord key_min = ~(UWord)0; 1241 UWord key_max = (UWord)0; 1242 VG_(indexRangeMap)( &key_min, &key_max, &val, 1243 gIgnoredAddressRanges, i ); 1244 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n", 1245 i, key_min, key_max, showIARKind(val)); 1246 } 1247 } 1248 return True; 1249 } 1250 1251 1252 /* --------------- Load/store slow cases. --------------- */ 1253 1254 static 1255 __attribute__((noinline)) 1256 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res, 1257 Addr a, SizeT nBits, Bool bigendian ) 1258 { 1259 ULong pessim[4]; /* only used when p-l-ok=yes */ 1260 SSizeT szB = nBits / 8; 1261 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */ 1262 SSizeT i, j; /* Must be signed. */ 1263 SizeT n_addrs_bad = 0; 1264 Addr ai; 1265 UChar vbits8; 1266 Bool ok; 1267 1268 /* Code below assumes load size is a power of two and at least 64 1269 bits. */ 1270 tl_assert((szB & (szB-1)) == 0 && szL > 0); 1271 1272 /* If this triggers, you probably just need to increase the size of 1273 the pessim array. */ 1274 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0])); 1275 1276 for (j = 0; j < szL; j++) { 1277 pessim[j] = V_BITS64_DEFINED; 1278 res[j] = V_BITS64_UNDEFINED; 1279 } 1280 1281 /* Make up a result V word, which contains the loaded data for 1282 valid addresses and Defined for invalid addresses. Iterate over 1283 the bytes in the word, from the most significant down to the 1284 least. The vbits to return are calculated into vbits128. Also 1285 compute the pessimising value to be used when 1286 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant 1287 info can be gleaned from the pessim array) but is used as a 1288 cross-check. */ 1289 for (j = szL-1; j >= 0; j--) { 1290 ULong vbits64 = V_BITS64_UNDEFINED; 1291 ULong pessim64 = V_BITS64_DEFINED; 1292 UWord long_index = byte_offset_w(szL, bigendian, j); 1293 for (i = 8-1; i >= 0; i--) { 1294 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP); 1295 ai = a + 8*long_index + byte_offset_w(8, bigendian, i); 1296 ok = get_vbits8(ai, &vbits8); 1297 vbits64 <<= 8; 1298 vbits64 |= vbits8; 1299 if (!ok) n_addrs_bad++; 1300 pessim64 <<= 8; 1301 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED); 1302 } 1303 res[long_index] = vbits64; 1304 pessim[long_index] = pessim64; 1305 } 1306 1307 /* In the common case, all the addresses involved are valid, so we 1308 just return the computed V bits and have done. */ 1309 if (LIKELY(n_addrs_bad == 0)) 1310 return; 1311 1312 /* If there's no possibility of getting a partial-loads-ok 1313 exemption, report the error and quit. */ 1314 if (!MC_(clo_partial_loads_ok)) { 1315 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1316 return; 1317 } 1318 1319 /* The partial-loads-ok excemption might apply. Find out if it 1320 does. If so, don't report an addressing error, but do return 1321 Undefined for the bytes that are out of range, so as to avoid 1322 false negatives. If it doesn't apply, just report an addressing 1323 error in the usual way. */ 1324 1325 /* Some code steps along byte strings in aligned chunks 1326 even when there is only a partially defined word at the end (eg, 1327 optimised strlen). This is allowed by the memory model of 1328 modern machines, since an aligned load cannot span two pages and 1329 thus cannot "partially fault". 1330 1331 Therefore, a load from a partially-addressible place is allowed 1332 if all of the following hold: 1333 - the command-line flag is set [by default, it isn't] 1334 - it's an aligned load 1335 - at least one of the addresses in the word *is* valid 1336 1337 Since this suppresses the addressing error, we avoid false 1338 negatives by marking bytes undefined when they come from an 1339 invalid address. 1340 */ 1341 1342 /* "at least one of the addresses is invalid" */ 1343 ok = False; 1344 for (j = 0; j < szL; j++) 1345 ok |= pessim[j] != V_BITS64_DEFINED; 1346 tl_assert(ok); 1347 1348 if (0 == (a & (szB - 1)) && n_addrs_bad < szB) { 1349 /* Exemption applies. Use the previously computed pessimising 1350 value and return the combined result, but don't flag an 1351 addressing error. The pessimising value is Defined for valid 1352 addresses and Undefined for invalid addresses. */ 1353 /* for assumption that doing bitwise or implements UifU */ 1354 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0); 1355 /* (really need "UifU" here...) 1356 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */ 1357 for (j = szL-1; j >= 0; j--) 1358 res[j] |= pessim[j]; 1359 return; 1360 } 1361 1362 /* Exemption doesn't apply. Flag an addressing error in the normal 1363 way. */ 1364 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1365 } 1366 1367 1368 static 1369 __attribute__((noinline)) 1370 __attribute__((used)) 1371 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since 1372 this function may get called from hand written assembly. */ 1373 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian ) 1374 { 1375 PROF_EVENT(MCPE_LOADVN_SLOW); 1376 1377 /* ------------ BEGIN semi-fast cases ------------ */ 1378 /* These deal quickly-ish with the common auxiliary primary map 1379 cases on 64-bit platforms. Are merely a speedup hack; can be 1380 omitted without loss of correctness/functionality. Note that in 1381 both cases the "sizeof(void*) == 8" causes these cases to be 1382 folded out by compilers on 32-bit platforms. These are derived 1383 from LOADV64 and LOADV32. 1384 */ 1385 if (LIKELY(sizeof(void*) == 8 1386 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1387 SecMap* sm = get_secmap_for_reading(a); 1388 UWord sm_off16 = SM_OFF_16(a); 1389 UWord vabits16 = sm->vabits16[sm_off16]; 1390 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) 1391 return V_BITS64_DEFINED; 1392 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) 1393 return V_BITS64_UNDEFINED; 1394 /* else fall into the slow case */ 1395 } 1396 if (LIKELY(sizeof(void*) == 8 1397 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1398 SecMap* sm = get_secmap_for_reading(a); 1399 UWord sm_off = SM_OFF(a); 1400 UWord vabits8 = sm->vabits8[sm_off]; 1401 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) 1402 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED); 1403 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) 1404 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED); 1405 /* else fall into slow case */ 1406 } 1407 /* ------------ END semi-fast cases ------------ */ 1408 1409 ULong vbits64 = V_BITS64_UNDEFINED; /* result */ 1410 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */ 1411 SSizeT szB = nBits / 8; 1412 SSizeT i; /* Must be signed. */ 1413 SizeT n_addrs_bad = 0; 1414 Addr ai; 1415 UChar vbits8; 1416 Bool ok; 1417 1418 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1419 1420 /* Make up a 64-bit result V word, which contains the loaded data 1421 for valid addresses and Defined for invalid addresses. Iterate 1422 over the bytes in the word, from the most significant down to 1423 the least. The vbits to return are calculated into vbits64. 1424 Also compute the pessimising value to be used when 1425 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant 1426 info can be gleaned from pessim64) but is used as a 1427 cross-check. */ 1428 for (i = szB-1; i >= 0; i--) { 1429 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP); 1430 ai = a + byte_offset_w(szB, bigendian, i); 1431 ok = get_vbits8(ai, &vbits8); 1432 vbits64 <<= 8; 1433 vbits64 |= vbits8; 1434 if (!ok) n_addrs_bad++; 1435 pessim64 <<= 8; 1436 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED); 1437 } 1438 1439 /* In the common case, all the addresses involved are valid, so we 1440 just return the computed V bits and have done. */ 1441 if (LIKELY(n_addrs_bad == 0)) 1442 return vbits64; 1443 1444 /* If there's no possibility of getting a partial-loads-ok 1445 exemption, report the error and quit. */ 1446 if (!MC_(clo_partial_loads_ok)) { 1447 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1448 return vbits64; 1449 } 1450 1451 /* The partial-loads-ok excemption might apply. Find out if it 1452 does. If so, don't report an addressing error, but do return 1453 Undefined for the bytes that are out of range, so as to avoid 1454 false negatives. If it doesn't apply, just report an addressing 1455 error in the usual way. */ 1456 1457 /* Some code steps along byte strings in aligned word-sized chunks 1458 even when there is only a partially defined word at the end (eg, 1459 optimised strlen). This is allowed by the memory model of 1460 modern machines, since an aligned load cannot span two pages and 1461 thus cannot "partially fault". Despite such behaviour being 1462 declared undefined by ANSI C/C++. 1463 1464 Therefore, a load from a partially-addressible place is allowed 1465 if all of the following hold: 1466 - the command-line flag is set [by default, it isn't] 1467 - it's a word-sized, word-aligned load 1468 - at least one of the addresses in the word *is* valid 1469 1470 Since this suppresses the addressing error, we avoid false 1471 negatives by marking bytes undefined when they come from an 1472 invalid address. 1473 */ 1474 1475 /* "at least one of the addresses is invalid" */ 1476 tl_assert(pessim64 != V_BITS64_DEFINED); 1477 1478 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a) 1479 && n_addrs_bad < VG_WORDSIZE) { 1480 /* Exemption applies. Use the previously computed pessimising 1481 value for vbits64 and return the combined result, but don't 1482 flag an addressing error. The pessimising value is Defined 1483 for valid addresses and Undefined for invalid addresses. */ 1484 /* for assumption that doing bitwise or implements UifU */ 1485 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0); 1486 /* (really need "UifU" here...) 1487 vbits64 UifU= pessim64 (is pessimised by it, iow) */ 1488 vbits64 |= pessim64; 1489 return vbits64; 1490 } 1491 1492 /* Also, in appears that gcc generates string-stepping code in 1493 32-bit chunks on 64 bit platforms. So, also grant an exception 1494 for this case. Note that the first clause of the conditional 1495 (VG_WORDSIZE == 8) is known at compile time, so the whole clause 1496 will get folded out in 32 bit builds. */ 1497 if (VG_WORDSIZE == 8 1498 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) { 1499 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0); 1500 /* (really need "UifU" here...) 1501 vbits64 UifU= pessim64 (is pessimised by it, iow) */ 1502 vbits64 |= pessim64; 1503 /* Mark the upper 32 bits as undefined, just to be on the safe 1504 side. */ 1505 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32); 1506 return vbits64; 1507 } 1508 1509 /* Exemption doesn't apply. Flag an addressing error in the normal 1510 way. */ 1511 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1512 1513 return vbits64; 1514 } 1515 1516 1517 static 1518 __attribute__((noinline)) 1519 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian ) 1520 { 1521 SizeT szB = nBits / 8; 1522 SizeT i, n_addrs_bad = 0; 1523 UChar vbits8; 1524 Addr ai; 1525 Bool ok; 1526 1527 PROF_EVENT(MCPE_STOREVN_SLOW); 1528 1529 /* ------------ BEGIN semi-fast cases ------------ */ 1530 /* These deal quickly-ish with the common auxiliary primary map 1531 cases on 64-bit platforms. Are merely a speedup hack; can be 1532 omitted without loss of correctness/functionality. Note that in 1533 both cases the "sizeof(void*) == 8" causes these cases to be 1534 folded out by compilers on 32-bit platforms. The logic below 1535 is somewhat similar to some cases extensively commented in 1536 MC_(helperc_STOREV8). 1537 */ 1538 if (LIKELY(sizeof(void*) == 8 1539 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1540 SecMap* sm = get_secmap_for_reading(a); 1541 UWord sm_off16 = SM_OFF_16(a); 1542 UWord vabits16 = sm->vabits16[sm_off16]; 1543 if (LIKELY( !is_distinguished_sm(sm) && 1544 (VA_BITS16_DEFINED == vabits16 || 1545 VA_BITS16_UNDEFINED == vabits16) )) { 1546 /* Handle common case quickly: a is suitably aligned, */ 1547 /* is mapped, and is addressible. */ 1548 // Convert full V-bits in register to compact 2-bit form. 1549 if (LIKELY(V_BITS64_DEFINED == vbytes)) { 1550 sm->vabits16[sm_off16] = VA_BITS16_DEFINED; 1551 return; 1552 } else if (V_BITS64_UNDEFINED == vbytes) { 1553 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED; 1554 return; 1555 } 1556 /* else fall into the slow case */ 1557 } 1558 /* else fall into the slow case */ 1559 } 1560 if (LIKELY(sizeof(void*) == 8 1561 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1562 SecMap* sm = get_secmap_for_reading(a); 1563 UWord sm_off = SM_OFF(a); 1564 UWord vabits8 = sm->vabits8[sm_off]; 1565 if (LIKELY( !is_distinguished_sm(sm) && 1566 (VA_BITS8_DEFINED == vabits8 || 1567 VA_BITS8_UNDEFINED == vabits8) )) { 1568 /* Handle common case quickly: a is suitably aligned, */ 1569 /* is mapped, and is addressible. */ 1570 // Convert full V-bits in register to compact 2-bit form. 1571 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) { 1572 sm->vabits8[sm_off] = VA_BITS8_DEFINED; 1573 return; 1574 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) { 1575 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 1576 return; 1577 } 1578 /* else fall into the slow case */ 1579 } 1580 /* else fall into the slow case */ 1581 } 1582 /* ------------ END semi-fast cases ------------ */ 1583 1584 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1585 1586 /* Dump vbytes in memory, iterating from least to most significant 1587 byte. At the same time establish addressibility of the location. */ 1588 for (i = 0; i < szB; i++) { 1589 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP); 1590 ai = a + byte_offset_w(szB, bigendian, i); 1591 vbits8 = vbytes & 0xff; 1592 ok = set_vbits8(ai, vbits8); 1593 if (!ok) n_addrs_bad++; 1594 vbytes >>= 8; 1595 } 1596 1597 /* If an address error has happened, report it. */ 1598 if (n_addrs_bad > 0) 1599 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True ); 1600 } 1601 1602 1603 /*------------------------------------------------------------*/ 1604 /*--- Setting permissions over address ranges. ---*/ 1605 /*------------------------------------------------------------*/ 1606 1607 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16, 1608 UWord dsm_num ) 1609 { 1610 UWord sm_off, sm_off16; 1611 UWord vabits2 = vabits16 & 0x3; 1612 SizeT lenA, lenB, len_to_next_secmap; 1613 Addr aNext; 1614 SecMap* sm; 1615 SecMap** sm_ptr; 1616 SecMap* example_dsm; 1617 1618 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS); 1619 1620 /* Check the V+A bits make sense. */ 1621 tl_assert(VA_BITS16_NOACCESS == vabits16 || 1622 VA_BITS16_UNDEFINED == vabits16 || 1623 VA_BITS16_DEFINED == vabits16); 1624 1625 // This code should never write PDBs; ensure this. (See comment above 1626 // set_vabits2().) 1627 tl_assert(VA_BITS2_PARTDEFINED != vabits2); 1628 1629 if (lenT == 0) 1630 return; 1631 1632 if (lenT > 256 * 1024 * 1024) { 1633 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) { 1634 const HChar* s = "unknown???"; 1635 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess"; 1636 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined"; 1637 if (vabits16 == VA_BITS16_DEFINED ) s = "defined"; 1638 VG_(message)(Vg_UserMsg, "Warning: set address range perms: " 1639 "large range [0x%lx, 0x%lx) (%s)\n", 1640 a, a + lenT, s); 1641 } 1642 } 1643 1644 #ifndef PERF_FAST_SARP 1645 /*------------------ debug-only case ------------------ */ 1646 { 1647 // Endianness doesn't matter here because all bytes are being set to 1648 // the same value. 1649 // Nb: We don't have to worry about updating the sec-V-bits table 1650 // after these set_vabits2() calls because this code never writes 1651 // VA_BITS2_PARTDEFINED values. 1652 SizeT i; 1653 for (i = 0; i < lenT; i++) { 1654 set_vabits2(a + i, vabits2); 1655 } 1656 return; 1657 } 1658 #endif 1659 1660 /*------------------ standard handling ------------------ */ 1661 1662 /* Get the distinguished secondary that we might want 1663 to use (part of the space-compression scheme). */ 1664 example_dsm = &sm_distinguished[dsm_num]; 1665 1666 // We have to handle ranges covering various combinations of partial and 1667 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case. 1668 // Cases marked with a '*' are common. 1669 // 1670 // TYPE PARTS USED 1671 // ---- ---------- 1672 // * one partial sec-map (p) 1 1673 // - one whole sec-map (P) 2 1674 // 1675 // * two partial sec-maps (pp) 1,3 1676 // - one partial, one whole sec-map (pP) 1,2 1677 // - one whole, one partial sec-map (Pp) 2,3 1678 // - two whole sec-maps (PP) 2,2 1679 // 1680 // * one partial, one whole, one partial (pPp) 1,2,3 1681 // - one partial, two whole (pPP) 1,2,2 1682 // - two whole, one partial (PPp) 2,2,3 1683 // - three whole (PPP) 2,2,2 1684 // 1685 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3 1686 // - one partial, N-1 whole (pP...PP) 1,2...2,2 1687 // - N-1 whole, one partial (PP...Pp) 2,2...2,3 1688 // - N whole (PP...PP) 2,2...2,3 1689 1690 // Break up total length (lenT) into two parts: length in the first 1691 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB. 1692 aNext = start_of_this_sm(a) + SM_SIZE; 1693 len_to_next_secmap = aNext - a; 1694 if ( lenT <= len_to_next_secmap ) { 1695 // Range entirely within one sec-map. Covers almost all cases. 1696 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP); 1697 lenA = lenT; 1698 lenB = 0; 1699 } else if (is_start_of_sm(a)) { 1700 // Range spans at least one whole sec-map, and starts at the beginning 1701 // of a sec-map; skip to Part 2. 1702 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP); 1703 lenA = 0; 1704 lenB = lenT; 1705 goto part2; 1706 } else { 1707 // Range spans two or more sec-maps, first one is partial. 1708 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS); 1709 lenA = len_to_next_secmap; 1710 lenB = lenT - lenA; 1711 } 1712 1713 //------------------------------------------------------------------------ 1714 // Part 1: Deal with the first sec_map. Most of the time the range will be 1715 // entirely within a sec_map and this part alone will suffice. Also, 1716 // doing it this way lets us avoid repeatedly testing for the crossing of 1717 // a sec-map boundary within these loops. 1718 //------------------------------------------------------------------------ 1719 1720 // If it's distinguished, make it undistinguished if necessary. 1721 sm_ptr = get_secmap_ptr(a); 1722 if (is_distinguished_sm(*sm_ptr)) { 1723 if (*sm_ptr == example_dsm) { 1724 // Sec-map already has the V+A bits that we want, so skip. 1725 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK); 1726 a = aNext; 1727 lenA = 0; 1728 } else { 1729 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1); 1730 *sm_ptr = copy_for_writing(*sm_ptr); 1731 } 1732 } 1733 sm = *sm_ptr; 1734 1735 // 1 byte steps 1736 while (True) { 1737 if (VG_IS_8_ALIGNED(a)) break; 1738 if (lenA < 1) break; 1739 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A); 1740 sm_off = SM_OFF(a); 1741 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1742 a += 1; 1743 lenA -= 1; 1744 } 1745 // 8-aligned, 8 byte steps 1746 while (True) { 1747 if (lenA < 8) break; 1748 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A); 1749 sm_off16 = SM_OFF_16(a); 1750 sm->vabits16[sm_off16] = vabits16; 1751 a += 8; 1752 lenA -= 8; 1753 } 1754 // 1 byte steps 1755 while (True) { 1756 if (lenA < 1) break; 1757 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B); 1758 sm_off = SM_OFF(a); 1759 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1760 a += 1; 1761 lenA -= 1; 1762 } 1763 1764 // We've finished the first sec-map. Is that it? 1765 if (lenB == 0) 1766 return; 1767 1768 //------------------------------------------------------------------------ 1769 // Part 2: Fast-set entire sec-maps at a time. 1770 //------------------------------------------------------------------------ 1771 part2: 1772 // 64KB-aligned, 64KB steps. 1773 // Nb: we can reach here with lenB < SM_SIZE 1774 tl_assert(0 == lenA); 1775 while (True) { 1776 if (lenB < SM_SIZE) break; 1777 tl_assert(is_start_of_sm(a)); 1778 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K); 1779 sm_ptr = get_secmap_ptr(a); 1780 if (!is_distinguished_sm(*sm_ptr)) { 1781 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM); 1782 // Free the non-distinguished sec-map that we're replacing. This 1783 // case happens moderately often, enough to be worthwhile. 1784 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap)); 1785 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n"); 1786 } 1787 update_SM_counts(*sm_ptr, example_dsm); 1788 // Make the sec-map entry point to the example DSM 1789 *sm_ptr = example_dsm; 1790 lenB -= SM_SIZE; 1791 a += SM_SIZE; 1792 } 1793 1794 // We've finished the whole sec-maps. Is that it? 1795 if (lenB == 0) 1796 return; 1797 1798 //------------------------------------------------------------------------ 1799 // Part 3: Finish off the final partial sec-map, if necessary. 1800 //------------------------------------------------------------------------ 1801 1802 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE); 1803 1804 // If it's distinguished, make it undistinguished if necessary. 1805 sm_ptr = get_secmap_ptr(a); 1806 if (is_distinguished_sm(*sm_ptr)) { 1807 if (*sm_ptr == example_dsm) { 1808 // Sec-map already has the V+A bits that we want, so stop. 1809 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK); 1810 return; 1811 } else { 1812 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2); 1813 *sm_ptr = copy_for_writing(*sm_ptr); 1814 } 1815 } 1816 sm = *sm_ptr; 1817 1818 // 8-aligned, 8 byte steps 1819 while (True) { 1820 if (lenB < 8) break; 1821 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B); 1822 sm_off16 = SM_OFF_16(a); 1823 sm->vabits16[sm_off16] = vabits16; 1824 a += 8; 1825 lenB -= 8; 1826 } 1827 // 1 byte steps 1828 while (True) { 1829 if (lenB < 1) return; 1830 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C); 1831 sm_off = SM_OFF(a); 1832 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1833 a += 1; 1834 lenB -= 1; 1835 } 1836 } 1837 1838 1839 /* --- Set permissions for arbitrary address ranges --- */ 1840 1841 void MC_(make_mem_noaccess) ( Addr a, SizeT len ) 1842 { 1843 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS); 1844 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len); 1845 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS ); 1846 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1847 ocache_sarp_Clear_Origins ( a, len ); 1848 } 1849 1850 static void make_mem_undefined ( Addr a, SizeT len ) 1851 { 1852 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED); 1853 DEBUG("make_mem_undefined(%p, %lu)\n", a, len); 1854 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1855 } 1856 1857 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag ) 1858 { 1859 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG); 1860 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len); 1861 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1862 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1863 ocache_sarp_Set_Origins ( a, len, otag ); 1864 } 1865 1866 static 1867 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len, 1868 ThreadId tid, UInt okind ) 1869 { 1870 UInt ecu; 1871 ExeContext* here; 1872 /* VG_(record_ExeContext) checks for validity of tid, and asserts 1873 if it is invalid. So no need to do it here. */ 1874 tl_assert(okind <= 3); 1875 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ ); 1876 tl_assert(here); 1877 ecu = VG_(get_ECU_from_ExeContext)(here); 1878 tl_assert(VG_(is_plausible_ECU)(ecu)); 1879 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind ); 1880 } 1881 1882 static 1883 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid ) 1884 { 1885 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN ); 1886 } 1887 1888 static 1889 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid ) 1890 { 1891 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN ); 1892 } 1893 1894 void MC_(make_mem_defined) ( Addr a, SizeT len ) 1895 { 1896 PROF_EVENT(MCPE_MAKE_MEM_DEFINED); 1897 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len); 1898 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED ); 1899 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1900 ocache_sarp_Clear_Origins ( a, len ); 1901 } 1902 1903 __attribute__((unused)) 1904 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid ) 1905 { 1906 MC_(make_mem_defined)(a, len); 1907 } 1908 1909 /* For each byte in [a,a+len), if the byte is addressable, make it be 1910 defined, but if it isn't addressible, leave it alone. In other 1911 words a version of MC_(make_mem_defined) that doesn't mess with 1912 addressibility. Low-performance implementation. */ 1913 static void make_mem_defined_if_addressable ( Addr a, SizeT len ) 1914 { 1915 SizeT i; 1916 UChar vabits2; 1917 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len); 1918 for (i = 0; i < len; i++) { 1919 vabits2 = get_vabits2( a+i ); 1920 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) { 1921 set_vabits2(a+i, VA_BITS2_DEFINED); 1922 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1923 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1924 } 1925 } 1926 } 1927 } 1928 1929 /* Similarly (needed for mprotect handling ..) */ 1930 static void make_mem_defined_if_noaccess ( Addr a, SizeT len ) 1931 { 1932 SizeT i; 1933 UChar vabits2; 1934 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len); 1935 for (i = 0; i < len; i++) { 1936 vabits2 = get_vabits2( a+i ); 1937 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) { 1938 set_vabits2(a+i, VA_BITS2_DEFINED); 1939 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1940 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1941 } 1942 } 1943 } 1944 } 1945 1946 /* --- Block-copy permissions (needed for implementing realloc() and 1947 sys_mremap). --- */ 1948 1949 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len ) 1950 { 1951 SizeT i, j; 1952 UChar vabits2, vabits8; 1953 Bool aligned, nooverlap; 1954 1955 DEBUG("MC_(copy_address_range_state)\n"); 1956 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE); 1957 1958 if (len == 0 || src == dst) 1959 return; 1960 1961 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst); 1962 nooverlap = src+len <= dst || dst+len <= src; 1963 1964 if (nooverlap && aligned) { 1965 1966 /* Vectorised fast case, when no overlap and suitably aligned */ 1967 /* vector loop */ 1968 i = 0; 1969 while (len >= 4) { 1970 vabits8 = get_vabits8_for_aligned_word32( src+i ); 1971 set_vabits8_for_aligned_word32( dst+i, vabits8 ); 1972 if (LIKELY(VA_BITS8_DEFINED == vabits8 1973 || VA_BITS8_UNDEFINED == vabits8 1974 || VA_BITS8_NOACCESS == vabits8)) { 1975 /* do nothing */ 1976 } else { 1977 /* have to copy secondary map info */ 1978 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 )) 1979 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) ); 1980 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 )) 1981 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) ); 1982 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 )) 1983 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) ); 1984 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 )) 1985 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) ); 1986 } 1987 i += 4; 1988 len -= 4; 1989 } 1990 /* fixup loop */ 1991 while (len >= 1) { 1992 vabits2 = get_vabits2( src+i ); 1993 set_vabits2( dst+i, vabits2 ); 1994 if (VA_BITS2_PARTDEFINED == vabits2) { 1995 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 1996 } 1997 i++; 1998 len--; 1999 } 2000 2001 } else { 2002 2003 /* We have to do things the slow way */ 2004 if (src < dst) { 2005 for (i = 0, j = len-1; i < len; i++, j--) { 2006 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1); 2007 vabits2 = get_vabits2( src+j ); 2008 set_vabits2( dst+j, vabits2 ); 2009 if (VA_BITS2_PARTDEFINED == vabits2) { 2010 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) ); 2011 } 2012 } 2013 } 2014 2015 if (src > dst) { 2016 for (i = 0; i < len; i++) { 2017 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2); 2018 vabits2 = get_vabits2( src+i ); 2019 set_vabits2( dst+i, vabits2 ); 2020 if (VA_BITS2_PARTDEFINED == vabits2) { 2021 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 2022 } 2023 } 2024 } 2025 } 2026 2027 } 2028 2029 2030 /*------------------------------------------------------------*/ 2031 /*--- Origin tracking stuff - cache basics ---*/ 2032 /*------------------------------------------------------------*/ 2033 2034 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 2035 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2036 2037 Note that this implementation draws inspiration from the "origin 2038 tracking by value piggybacking" scheme described in "Tracking Bad 2039 Apples: Reporting the Origin of Null and Undefined Value Errors" 2040 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer, 2041 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is 2042 implemented completely differently. 2043 2044 Origin tags and ECUs -- about the shadow values 2045 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2046 2047 This implementation tracks the defining point of all uninitialised 2048 values using so called "origin tags", which are 32-bit integers, 2049 rather than using the values themselves to encode the origins. The 2050 latter, so-called value piggybacking", is what the OOPSLA07 paper 2051 describes. 2052 2053 Origin tags, as tracked by the machinery below, are 32-bit unsigned 2054 ints (UInts), regardless of the machine's word size. Each tag 2055 comprises an upper 30-bit ECU field and a lower 2-bit 2056 'kind' field. The ECU field is a number given out by m_execontext 2057 and has a 1-1 mapping with ExeContext*s. An ECU can be used 2058 directly as an origin tag (otag), but in fact we want to put 2059 additional information 'kind' field to indicate roughly where the 2060 tag came from. This helps print more understandable error messages 2061 for the user -- it has no other purpose. In summary: 2062 2063 * Both ECUs and origin tags are represented as 32-bit words 2064 2065 * m_execontext and the core-tool interface deal purely in ECUs. 2066 They have no knowledge of origin tags - that is a purely 2067 Memcheck-internal matter. 2068 2069 * all valid ECUs have the lowest 2 bits zero and at least 2070 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU)) 2071 2072 * to convert from an ECU to an otag, OR in one of the MC_OKIND_ 2073 constants defined in mc_include.h. 2074 2075 * to convert an otag back to an ECU, AND it with ~3 2076 2077 One important fact is that no valid otag is zero. A zero otag is 2078 used by the implementation to indicate "no origin", which could 2079 mean that either the value is defined, or it is undefined but the 2080 implementation somehow managed to lose the origin. 2081 2082 The ECU used for memory created by malloc etc is derived from the 2083 stack trace at the time the malloc etc happens. This means the 2084 mechanism can show the exact allocation point for heap-created 2085 uninitialised values. 2086 2087 In contrast, it is simply too expensive to create a complete 2088 backtrace for each stack allocation. Therefore we merely use a 2089 depth-1 backtrace for stack allocations, which can be done once at 2090 translation time, rather than N times at run time. The result of 2091 this is that, for stack created uninitialised values, Memcheck can 2092 only show the allocating function, and not what called it. 2093 Furthermore, compilers tend to move the stack pointer just once at 2094 the start of the function, to allocate all locals, and so in fact 2095 the stack origin almost always simply points to the opening brace 2096 of the function. Net result is, for stack origins, the mechanism 2097 can tell you in which function the undefined value was created, but 2098 that's all. Users will need to carefully check all locals in the 2099 specified function. 2100 2101 Shadowing registers and memory 2102 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2103 2104 Memory is shadowed using a two level cache structure (ocacheL1 and 2105 ocacheL2). Memory references are first directed to ocacheL1. This 2106 is a traditional 2-way set associative cache with 32-byte lines and 2107 approximate LRU replacement within each set. 2108 2109 A naive implementation would require storing one 32 bit otag for 2110 each byte of memory covered, a 4:1 space overhead. Instead, there 2111 is one otag for every 4 bytes of memory covered, plus a 4-bit mask 2112 that shows which of the 4 bytes have that shadow value and which 2113 have a shadow value of zero (indicating no origin). Hence a lot of 2114 space is saved, but the cost is that only one different origin per 2115 4 bytes of address space can be represented. This is a source of 2116 imprecision, but how much of a problem it really is remains to be 2117 seen. 2118 2119 A cache line that contains all zeroes ("no origins") contains no 2120 useful information, and can be ejected from the L1 cache "for 2121 free", in the sense that a read miss on the L1 causes a line of 2122 zeroes to be installed. However, ejecting a line containing 2123 nonzeroes risks losing origin information permanently. In order to 2124 prevent such lossage, ejected nonzero lines are placed in a 2125 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache 2126 lines. This can grow arbitrarily large, and so should ensure that 2127 Memcheck runs out of memory in preference to losing useful origin 2128 info due to cache size limitations. 2129 2130 Shadowing registers is a bit tricky, because the shadow values are 2131 32 bits, regardless of the size of the register. That gives a 2132 problem for registers smaller than 32 bits. The solution is to 2133 find spaces in the guest state that are unused, and use those to 2134 shadow guest state fragments smaller than 32 bits. For example, on 2135 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the 2136 shadow are allocated for the register's otag, then there are still 2137 12 bytes left over which could be used to shadow 3 other values. 2138 2139 This implies there is some non-obvious mapping from guest state 2140 (start,length) pairs to the relevant shadow offset (for the origin 2141 tags). And it is unfortunately guest-architecture specific. The 2142 mapping is contained in mc_machine.c, which is quite lengthy but 2143 straightforward. 2144 2145 Instrumenting the IR 2146 ~~~~~~~~~~~~~~~~~~~~ 2147 2148 Instrumentation is largely straightforward, and done by the 2149 functions schemeE and schemeS in mc_translate.c. These generate 2150 code for handling the origin tags of expressions (E) and statements 2151 (S) respectively. The rather strange names are a reference to the 2152 "compilation schemes" shown in Simon Peyton Jones' book "The 2153 Implementation of Functional Programming Languages" (Prentice Hall, 2154 1987, see 2155 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm). 2156 2157 schemeS merely arranges to move shadow values around the guest 2158 state to track the incoming IR. schemeE is largely trivial too. 2159 The only significant point is how to compute the otag corresponding 2160 to binary (or ternary, quaternary, etc) operator applications. The 2161 rule is simple: just take whichever value is larger (32-bit 2162 unsigned max). Constants get the special value zero. Hence this 2163 rule always propagates a nonzero (known) otag in preference to a 2164 zero (unknown, or more likely, value-is-defined) tag, as we want. 2165 If two different undefined values are inputs to a binary operator 2166 application, then which is propagated is arbitrary, but that 2167 doesn't matter, since the program is erroneous in using either of 2168 the values, and so there's no point in attempting to propagate 2169 both. 2170 2171 Since constants are abstracted to (otag) zero, much of the 2172 instrumentation code can be folded out without difficulty by the 2173 generic post-instrumentation IR cleanup pass, using these rules: 2174 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are 2175 constants is evaluated at JIT time. And the resulting dead code 2176 removal. In practice this causes surprisingly few Max32Us to 2177 survive through to backend code generation. 2178 2179 Integration with the V-bits machinery 2180 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2181 2182 This is again largely straightforward. Mostly the otag and V bits 2183 stuff are independent. The only point of interaction is when the V 2184 bits instrumenter creates a call to a helper function to report an 2185 uninitialised value error -- in that case it must first use schemeE 2186 to get hold of the origin tag expression for the value, and pass 2187 that to the helper too. 2188 2189 There is the usual stuff to do with setting address range 2190 permissions. When memory is painted undefined, we must also know 2191 the origin tag to paint with, which involves some tedious plumbing, 2192 particularly to do with the fast case stack handlers. When memory 2193 is painted defined or noaccess then the origin tags must be forced 2194 to zero. 2195 2196 One of the goals of the implementation was to ensure that the 2197 non-origin tracking mode isn't slowed down at all. To do this, 2198 various functions to do with memory permissions setting (again, 2199 mostly pertaining to the stack) are duplicated for the with- and 2200 without-otag case. 2201 2202 Dealing with stack redzones, and the NIA cache 2203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2204 2205 This is one of the few non-obvious parts of the implementation. 2206 2207 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small 2208 reserved area below the stack pointer, that can be used as scratch 2209 space by compiler generated code for functions. In the Memcheck 2210 sources this is referred to as the "stack redzone". The important 2211 thing here is that such redzones are considered volatile across 2212 function calls and returns. So Memcheck takes care to mark them as 2213 undefined for each call and return, on the afflicted platforms. 2214 Past experience shows this is essential in order to get reliable 2215 messages about uninitialised values that come from the stack. 2216 2217 So the question is, when we paint a redzone undefined, what origin 2218 tag should we use for it? Consider a function f() calling g(). If 2219 we paint the redzone using an otag derived from the ExeContext of 2220 the CALL/BL instruction in f, then any errors in g causing it to 2221 use uninitialised values that happen to lie in the redzone, will be 2222 reported as having their origin in f. Which is highly confusing. 2223 2224 The same applies for returns: if, on a return, we paint the redzone 2225 using a origin tag derived from the ExeContext of the RET/BLR 2226 instruction in g, then any later errors in f causing it to use 2227 uninitialised values in the redzone, will be reported as having 2228 their origin in g. Which is just as confusing. 2229 2230 To do it right, in both cases we need to use an origin tag which 2231 pertains to the instruction which dynamically follows the CALL/BL 2232 or RET/BLR. In short, one derived from the NIA - the "next 2233 instruction address". 2234 2235 To make this work, Memcheck's redzone-painting helper, 2236 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the 2237 NIA. It converts the NIA to a 1-element ExeContext, and uses that 2238 ExeContext's ECU as the basis for the otag used to paint the 2239 redzone. The expensive part of this is converting an NIA into an 2240 ECU, since this happens once for every call and every return. So 2241 we use a simple 511-line, 2-way set associative cache 2242 (nia_to_ecu_cache) to cache the mappings, and that knocks most of 2243 the cost out. 2244 2245 Further background comments 2246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2247 2248 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't 2249 > it really just the address of the relevant ExeContext? 2250 2251 Well, it's not the address, but a value which has a 1-1 mapping 2252 with ExeContexts, and is guaranteed not to be zero, since zero 2253 denotes (to memcheck) "unknown origin or defined value". So these 2254 UInts are just numbers starting at 4 and incrementing by 4; each 2255 ExeContext is given a number when it is created. (*** NOTE this 2256 confuses otags and ECUs; see comments above ***). 2257 2258 Making these otags 32-bit regardless of the machine's word size 2259 makes the 64-bit implementation easier (next para). And it doesn't 2260 really limit us in any way, since for the tags to overflow would 2261 require that the program somehow caused 2^30-1 different 2262 ExeContexts to be created, in which case it is probably in deep 2263 trouble. Not to mention V will have soaked up many tens of 2264 gigabytes of memory merely to store them all. 2265 2266 So having 64-bit origins doesn't really buy you anything, and has 2267 the following downsides: 2268 2269 Suppose that instead, an otag is a UWord. This would mean that, on 2270 a 64-bit target, 2271 2272 1. It becomes hard to shadow any element of guest state which is 2273 smaller than 8 bytes. To do so means you'd need to find some 2274 8-byte-sized hole in the guest state which you don't want to 2275 shadow, and use that instead to hold the otag. On ppc64, the 2276 condition code register(s) are split into 20 UChar sized pieces, 2277 all of which need to be tracked (guest_XER_SO .. guest_CR7_0) 2278 and so that would entail finding 160 bytes somewhere else in the 2279 guest state. 2280 2281 Even on x86, I want to track origins for %AH .. %DH (bits 15:8 2282 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of 2283 same) and so I had to look for 4 untracked otag-sized areas in 2284 the guest state to make that possible. 2285 2286 The same problem exists of course when origin tags are only 32 2287 bits, but it's less extreme. 2288 2289 2. (More compelling) it doubles the size of the origin shadow 2290 memory. Given that the shadow memory is organised as a fixed 2291 size cache, and that accuracy of tracking is limited by origins 2292 falling out the cache due to space conflicts, this isn't good. 2293 2294 > Another question: is the origin tracking perfect, or are there 2295 > cases where it fails to determine an origin? 2296 2297 It is imperfect for at least for the following reasons, and 2298 probably more: 2299 2300 * Insufficient capacity in the origin cache. When a line is 2301 evicted from the cache it is gone forever, and so subsequent 2302 queries for the line produce zero, indicating no origin 2303 information. Interestingly, a line containing all zeroes can be 2304 evicted "free" from the cache, since it contains no useful 2305 information, so there is scope perhaps for some cleverer cache 2306 management schemes. (*** NOTE, with the introduction of the 2307 second level origin tag cache, ocacheL2, this is no longer a 2308 problem. ***) 2309 2310 * The origin cache only stores one otag per 32-bits of address 2311 space, plus 4 bits indicating which of the 4 bytes has that tag 2312 and which are considered defined. The result is that if two 2313 undefined bytes in the same word are stored in memory, the first 2314 stored byte's origin will be lost and replaced by the origin for 2315 the second byte. 2316 2317 * Nonzero origin tags for defined values. Consider a binary 2318 operator application op(x,y). Suppose y is undefined (and so has 2319 a valid nonzero origin tag), and x is defined, but erroneously 2320 has a nonzero origin tag (defined values should have tag zero). 2321 If the erroneous tag has a numeric value greater than y's tag, 2322 then the rule for propagating origin tags though binary 2323 operations, which is simply to take the unsigned max of the two 2324 tags, will erroneously propagate x's tag rather than y's. 2325 2326 * Some obscure uses of x86/amd64 byte registers can cause lossage 2327 or confusion of origins. %AH .. %DH are treated as different 2328 from, and unrelated to, their parent registers, %EAX .. %EDX. 2329 So some weird sequences like 2330 2331 movb undefined-value, %AH 2332 movb defined-value, %AL 2333 .. use %AX or %EAX .. 2334 2335 will cause the origin attributed to %AH to be ignored, since %AL, 2336 %AX, %EAX are treated as the same register, and %AH as a 2337 completely separate one. 2338 2339 But having said all that, it actually seems to work fairly well in 2340 practice. 2341 */ 2342 2343 static UWord stats_ocacheL1_find = 0; 2344 static UWord stats_ocacheL1_found_at_1 = 0; 2345 static UWord stats_ocacheL1_found_at_N = 0; 2346 static UWord stats_ocacheL1_misses = 0; 2347 static UWord stats_ocacheL1_lossage = 0; 2348 static UWord stats_ocacheL1_movefwds = 0; 2349 2350 static UWord stats__ocacheL2_refs = 0; 2351 static UWord stats__ocacheL2_misses = 0; 2352 static UWord stats__ocacheL2_n_nodes_max = 0; 2353 2354 /* Cache of 32-bit values, one every 32 bits of address space */ 2355 2356 #define OC_BITS_PER_LINE 5 2357 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2)) 2358 2359 static INLINE UWord oc_line_offset ( Addr a ) { 2360 return (a >> 2) & (OC_W32S_PER_LINE - 1); 2361 } 2362 static INLINE Bool is_valid_oc_tag ( Addr tag ) { 2363 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1)); 2364 } 2365 2366 #define OC_LINES_PER_SET 2 2367 2368 #define OC_N_SET_BITS 20 2369 #define OC_N_SETS (1 << OC_N_SET_BITS) 2370 2371 /* These settings give: 2372 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful 2373 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful 2374 */ 2375 2376 #define OC_MOVE_FORWARDS_EVERY_BITS 7 2377 2378 2379 typedef 2380 struct { 2381 Addr tag; 2382 UInt w32[OC_W32S_PER_LINE]; 2383 UChar descr[OC_W32S_PER_LINE]; 2384 } 2385 OCacheLine; 2386 2387 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not 2388 in use, 'n' (nonzero) if it contains at least one valid origin tag, 2389 and 'z' if all the represented tags are zero. */ 2390 static UChar classify_OCacheLine ( OCacheLine* line ) 2391 { 2392 UWord i; 2393 if (line->tag == 1/*invalid*/) 2394 return 'e'; /* EMPTY */ 2395 tl_assert(is_valid_oc_tag(line->tag)); 2396 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2397 tl_assert(0 == ((~0xF) & line->descr[i])); 2398 if (line->w32[i] > 0 && line->descr[i] > 0) 2399 return 'n'; /* NONZERO - contains useful info */ 2400 } 2401 return 'z'; /* ZERO - no useful info */ 2402 } 2403 2404 typedef 2405 struct { 2406 OCacheLine line[OC_LINES_PER_SET]; 2407 } 2408 OCacheSet; 2409 2410 typedef 2411 struct { 2412 OCacheSet set[OC_N_SETS]; 2413 } 2414 OCache; 2415 2416 static OCache* ocacheL1 = NULL; 2417 static UWord ocacheL1_event_ctr = 0; 2418 2419 static void init_ocacheL2 ( void ); /* fwds */ 2420 static void init_OCache ( void ) 2421 { 2422 UWord line, set; 2423 tl_assert(MC_(clo_mc_level) >= 3); 2424 tl_assert(ocacheL1 == NULL); 2425 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache)); 2426 if (ocacheL1 == NULL) { 2427 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1", 2428 sizeof(OCache) ); 2429 } 2430 tl_assert(ocacheL1 != NULL); 2431 for (set = 0; set < OC_N_SETS; set++) { 2432 for (line = 0; line < OC_LINES_PER_SET; line++) { 2433 ocacheL1->set[set].line[line].tag = 1/*invalid*/; 2434 } 2435 } 2436 init_ocacheL2(); 2437 } 2438 2439 static void moveLineForwards ( OCacheSet* set, UWord lineno ) 2440 { 2441 OCacheLine tmp; 2442 stats_ocacheL1_movefwds++; 2443 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET); 2444 tmp = set->line[lineno-1]; 2445 set->line[lineno-1] = set->line[lineno]; 2446 set->line[lineno] = tmp; 2447 } 2448 2449 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) { 2450 UWord i; 2451 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2452 line->w32[i] = 0; /* NO ORIGIN */ 2453 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */ 2454 } 2455 line->tag = tag; 2456 } 2457 2458 ////////////////////////////////////////////////////////////// 2459 //// OCache backing store 2460 2461 static OSet* ocacheL2 = NULL; 2462 2463 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) { 2464 return VG_(malloc)(cc, szB); 2465 } 2466 static void ocacheL2_free ( void* v ) { 2467 VG_(free)( v ); 2468 } 2469 2470 /* Stats: # nodes currently in tree */ 2471 static UWord stats__ocacheL2_n_nodes = 0; 2472 2473 static void init_ocacheL2 ( void ) 2474 { 2475 tl_assert(!ocacheL2); 2476 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */ 2477 tl_assert(0 == offsetof(OCacheLine,tag)); 2478 ocacheL2 2479 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag), 2480 NULL, /* fast cmp */ 2481 ocacheL2_malloc, "mc.ioL2", ocacheL2_free); 2482 stats__ocacheL2_n_nodes = 0; 2483 } 2484 2485 /* Find line with the given tag in the tree, or NULL if not found. */ 2486 static OCacheLine* ocacheL2_find_tag ( Addr tag ) 2487 { 2488 OCacheLine* line; 2489 tl_assert(is_valid_oc_tag(tag)); 2490 stats__ocacheL2_refs++; 2491 line = VG_(OSetGen_Lookup)( ocacheL2, &tag ); 2492 return line; 2493 } 2494 2495 /* Delete the line with the given tag from the tree, if it is present, and 2496 free up the associated memory. */ 2497 static void ocacheL2_del_tag ( Addr tag ) 2498 { 2499 OCacheLine* line; 2500 tl_assert(is_valid_oc_tag(tag)); 2501 stats__ocacheL2_refs++; 2502 line = VG_(OSetGen_Remove)( ocacheL2, &tag ); 2503 if (line) { 2504 VG_(OSetGen_FreeNode)(ocacheL2, line); 2505 tl_assert(stats__ocacheL2_n_nodes > 0); 2506 stats__ocacheL2_n_nodes--; 2507 } 2508 } 2509 2510 /* Add a copy of the given line to the tree. It must not already be 2511 present. */ 2512 static void ocacheL2_add_line ( OCacheLine* line ) 2513 { 2514 OCacheLine* copy; 2515 tl_assert(is_valid_oc_tag(line->tag)); 2516 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) ); 2517 *copy = *line; 2518 stats__ocacheL2_refs++; 2519 VG_(OSetGen_Insert)( ocacheL2, copy ); 2520 stats__ocacheL2_n_nodes++; 2521 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max) 2522 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes; 2523 } 2524 2525 //// 2526 ////////////////////////////////////////////////////////////// 2527 2528 __attribute__((noinline)) 2529 static OCacheLine* find_OCacheLine_SLOW ( Addr a ) 2530 { 2531 OCacheLine *victim, *inL2; 2532 UChar c; 2533 UWord line; 2534 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2535 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2536 UWord tag = a & tagmask; 2537 tl_assert(setno >= 0 && setno < OC_N_SETS); 2538 2539 /* we already tried line == 0; skip therefore. */ 2540 for (line = 1; line < OC_LINES_PER_SET; line++) { 2541 if (ocacheL1->set[setno].line[line].tag == tag) { 2542 if (line == 1) { 2543 stats_ocacheL1_found_at_1++; 2544 } else { 2545 stats_ocacheL1_found_at_N++; 2546 } 2547 if (UNLIKELY(0 == (ocacheL1_event_ctr++ 2548 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) { 2549 moveLineForwards( &ocacheL1->set[setno], line ); 2550 line--; 2551 } 2552 return &ocacheL1->set[setno].line[line]; 2553 } 2554 } 2555 2556 /* A miss. Use the last slot. Implicitly this means we're 2557 ejecting the line in the last slot. */ 2558 stats_ocacheL1_misses++; 2559 tl_assert(line == OC_LINES_PER_SET); 2560 line--; 2561 tl_assert(line > 0); 2562 2563 /* First, move the to-be-ejected line to the L2 cache. */ 2564 victim = &ocacheL1->set[setno].line[line]; 2565 c = classify_OCacheLine(victim); 2566 switch (c) { 2567 case 'e': 2568 /* the line is empty (has invalid tag); ignore it. */ 2569 break; 2570 case 'z': 2571 /* line contains zeroes. We must ensure the backing store is 2572 updated accordingly, either by copying the line there 2573 verbatim, or by ensuring it isn't present there. We 2574 chosse the latter on the basis that it reduces the size of 2575 the backing store. */ 2576 ocacheL2_del_tag( victim->tag ); 2577 break; 2578 case 'n': 2579 /* line contains at least one real, useful origin. Copy it 2580 to the backing store. */ 2581 stats_ocacheL1_lossage++; 2582 inL2 = ocacheL2_find_tag( victim->tag ); 2583 if (inL2) { 2584 *inL2 = *victim; 2585 } else { 2586 ocacheL2_add_line( victim ); 2587 } 2588 break; 2589 default: 2590 tl_assert(0); 2591 } 2592 2593 /* Now we must reload the L1 cache from the backing tree, if 2594 possible. */ 2595 tl_assert(tag != victim->tag); /* stay sane */ 2596 inL2 = ocacheL2_find_tag( tag ); 2597 if (inL2) { 2598 /* We're in luck. It's in the L2. */ 2599 ocacheL1->set[setno].line[line] = *inL2; 2600 } else { 2601 /* Missed at both levels of the cache hierarchy. We have to 2602 declare it as full of zeroes (unknown origins). */ 2603 stats__ocacheL2_misses++; 2604 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag ); 2605 } 2606 2607 /* Move it one forwards */ 2608 moveLineForwards( &ocacheL1->set[setno], line ); 2609 line--; 2610 2611 return &ocacheL1->set[setno].line[line]; 2612 } 2613 2614 static INLINE OCacheLine* find_OCacheLine ( Addr a ) 2615 { 2616 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2617 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2618 UWord tag = a & tagmask; 2619 2620 stats_ocacheL1_find++; 2621 2622 if (OC_ENABLE_ASSERTIONS) { 2623 tl_assert(setno >= 0 && setno < OC_N_SETS); 2624 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1))); 2625 } 2626 2627 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) { 2628 return &ocacheL1->set[setno].line[0]; 2629 } 2630 2631 return find_OCacheLine_SLOW( a ); 2632 } 2633 2634 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag ) 2635 { 2636 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2637 //// Set the origins for a+0 .. a+7 2638 { OCacheLine* line; 2639 UWord lineoff = oc_line_offset(a); 2640 if (OC_ENABLE_ASSERTIONS) { 2641 tl_assert(lineoff >= 0 2642 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2643 } 2644 line = find_OCacheLine( a ); 2645 line->descr[lineoff+0] = 0xF; 2646 line->descr[lineoff+1] = 0xF; 2647 line->w32[lineoff+0] = otag; 2648 line->w32[lineoff+1] = otag; 2649 } 2650 //// END inlined, specialised version of MC_(helperc_b_store8) 2651 } 2652 2653 2654 /*------------------------------------------------------------*/ 2655 /*--- Aligned fast case permission setters, ---*/ 2656 /*--- for dealing with stacks ---*/ 2657 /*------------------------------------------------------------*/ 2658 2659 /*--------------------- 32-bit ---------------------*/ 2660 2661 /* Nb: by "aligned" here we mean 4-byte aligned */ 2662 2663 static INLINE void make_aligned_word32_undefined ( Addr a ) 2664 { 2665 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED); 2666 2667 #ifndef PERF_FAST_STACK2 2668 make_mem_undefined(a, 4); 2669 #else 2670 { 2671 UWord sm_off; 2672 SecMap* sm; 2673 2674 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2675 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW); 2676 make_mem_undefined(a, 4); 2677 return; 2678 } 2679 2680 sm = get_secmap_for_writing_low(a); 2681 sm_off = SM_OFF(a); 2682 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 2683 } 2684 #endif 2685 } 2686 2687 static INLINE 2688 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag ) 2689 { 2690 make_aligned_word32_undefined(a); 2691 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2692 //// Set the origins for a+0 .. a+3 2693 { OCacheLine* line; 2694 UWord lineoff = oc_line_offset(a); 2695 if (OC_ENABLE_ASSERTIONS) { 2696 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2697 } 2698 line = find_OCacheLine( a ); 2699 line->descr[lineoff] = 0xF; 2700 line->w32[lineoff] = otag; 2701 } 2702 //// END inlined, specialised version of MC_(helperc_b_store4) 2703 } 2704 2705 static INLINE 2706 void make_aligned_word32_noaccess ( Addr a ) 2707 { 2708 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS); 2709 2710 #ifndef PERF_FAST_STACK2 2711 MC_(make_mem_noaccess)(a, 4); 2712 #else 2713 { 2714 UWord sm_off; 2715 SecMap* sm; 2716 2717 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2718 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW); 2719 MC_(make_mem_noaccess)(a, 4); 2720 return; 2721 } 2722 2723 sm = get_secmap_for_writing_low(a); 2724 sm_off = SM_OFF(a); 2725 sm->vabits8[sm_off] = VA_BITS8_NOACCESS; 2726 2727 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2728 //// Set the origins for a+0 .. a+3. 2729 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2730 OCacheLine* line; 2731 UWord lineoff = oc_line_offset(a); 2732 if (OC_ENABLE_ASSERTIONS) { 2733 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2734 } 2735 line = find_OCacheLine( a ); 2736 line->descr[lineoff] = 0; 2737 } 2738 //// END inlined, specialised version of MC_(helperc_b_store4) 2739 } 2740 #endif 2741 } 2742 2743 /*--------------------- 64-bit ---------------------*/ 2744 2745 /* Nb: by "aligned" here we mean 8-byte aligned */ 2746 2747 static INLINE void make_aligned_word64_undefined ( Addr a ) 2748 { 2749 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED); 2750 2751 #ifndef PERF_FAST_STACK2 2752 make_mem_undefined(a, 8); 2753 #else 2754 { 2755 UWord sm_off16; 2756 SecMap* sm; 2757 2758 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2759 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW); 2760 make_mem_undefined(a, 8); 2761 return; 2762 } 2763 2764 sm = get_secmap_for_writing_low(a); 2765 sm_off16 = SM_OFF_16(a); 2766 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED; 2767 } 2768 #endif 2769 } 2770 2771 static INLINE 2772 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag ) 2773 { 2774 make_aligned_word64_undefined(a); 2775 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2776 //// Set the origins for a+0 .. a+7 2777 { OCacheLine* line; 2778 UWord lineoff = oc_line_offset(a); 2779 tl_assert(lineoff >= 0 2780 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2781 line = find_OCacheLine( a ); 2782 line->descr[lineoff+0] = 0xF; 2783 line->descr[lineoff+1] = 0xF; 2784 line->w32[lineoff+0] = otag; 2785 line->w32[lineoff+1] = otag; 2786 } 2787 //// END inlined, specialised version of MC_(helperc_b_store8) 2788 } 2789 2790 static INLINE 2791 void make_aligned_word64_noaccess ( Addr a ) 2792 { 2793 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS); 2794 2795 #ifndef PERF_FAST_STACK2 2796 MC_(make_mem_noaccess)(a, 8); 2797 #else 2798 { 2799 UWord sm_off16; 2800 SecMap* sm; 2801 2802 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2803 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW); 2804 MC_(make_mem_noaccess)(a, 8); 2805 return; 2806 } 2807 2808 sm = get_secmap_for_writing_low(a); 2809 sm_off16 = SM_OFF_16(a); 2810 sm->vabits16[sm_off16] = VA_BITS16_NOACCESS; 2811 2812 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2813 //// Clear the origins for a+0 .. a+7. 2814 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2815 OCacheLine* line; 2816 UWord lineoff = oc_line_offset(a); 2817 tl_assert(lineoff >= 0 2818 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2819 line = find_OCacheLine( a ); 2820 line->descr[lineoff+0] = 0; 2821 line->descr[lineoff+1] = 0; 2822 } 2823 //// END inlined, specialised version of MC_(helperc_b_store8) 2824 } 2825 #endif 2826 } 2827 2828 2829 /*------------------------------------------------------------*/ 2830 /*--- Stack pointer adjustment ---*/ 2831 /*------------------------------------------------------------*/ 2832 2833 #ifdef PERF_FAST_STACK 2834 # define MAYBE_USED 2835 #else 2836 # define MAYBE_USED __attribute__((unused)) 2837 #endif 2838 2839 /*--------------- adjustment by 4 bytes ---------------*/ 2840 2841 MAYBE_USED 2842 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu) 2843 { 2844 UInt otag = ecu | MC_OKIND_STACK; 2845 PROF_EVENT(MCPE_NEW_MEM_STACK_4); 2846 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2847 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2848 } else { 2849 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag ); 2850 } 2851 } 2852 2853 MAYBE_USED 2854 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP) 2855 { 2856 PROF_EVENT(MCPE_NEW_MEM_STACK_4); 2857 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2858 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2859 } else { 2860 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 ); 2861 } 2862 } 2863 2864 MAYBE_USED 2865 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP) 2866 { 2867 PROF_EVENT(MCPE_DIE_MEM_STACK_4); 2868 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2869 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2870 } else { 2871 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 ); 2872 } 2873 } 2874 2875 /*--------------- adjustment by 8 bytes ---------------*/ 2876 2877 MAYBE_USED 2878 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu) 2879 { 2880 UInt otag = ecu | MC_OKIND_STACK; 2881 PROF_EVENT(MCPE_NEW_MEM_STACK_8); 2882 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2883 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2884 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2885 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2886 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2887 } else { 2888 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag ); 2889 } 2890 } 2891 2892 MAYBE_USED 2893 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP) 2894 { 2895 PROF_EVENT(MCPE_NEW_MEM_STACK_8); 2896 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2897 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2898 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2899 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2900 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2901 } else { 2902 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 ); 2903 } 2904 } 2905 2906 MAYBE_USED 2907 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP) 2908 { 2909 PROF_EVENT(MCPE_DIE_MEM_STACK_8); 2910 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2912 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2913 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2914 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2915 } else { 2916 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 ); 2917 } 2918 } 2919 2920 /*--------------- adjustment by 12 bytes ---------------*/ 2921 2922 MAYBE_USED 2923 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu) 2924 { 2925 UInt otag = ecu | MC_OKIND_STACK; 2926 PROF_EVENT(MCPE_NEW_MEM_STACK_12); 2927 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2928 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2929 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2930 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2931 /* from previous test we don't have 8-alignment at offset +0, 2932 hence must have 8 alignment at offsets +4/-4. Hence safe to 2933 do 4 at +0 and then 8 at +4/. */ 2934 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2936 } else { 2937 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag ); 2938 } 2939 } 2940 2941 MAYBE_USED 2942 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP) 2943 { 2944 PROF_EVENT(MCPE_NEW_MEM_STACK_12); 2945 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2946 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2947 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2948 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2949 /* from previous test we don't have 8-alignment at offset +0, 2950 hence must have 8 alignment at offsets +4/-4. Hence safe to 2951 do 4 at +0 and then 8 at +4/. */ 2952 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2954 } else { 2955 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 ); 2956 } 2957 } 2958 2959 MAYBE_USED 2960 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP) 2961 { 2962 PROF_EVENT(MCPE_DIE_MEM_STACK_12); 2963 /* Note the -12 in the test */ 2964 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) { 2965 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at 2966 -4. */ 2967 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2968 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2969 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2970 /* We have 4-alignment at +0, but we don't have 8-alignment at 2971 -12. So we must have 8-alignment at -8. Hence do 4 at -12 2972 and then 8 at -8. */ 2973 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2974 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2975 } else { 2976 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 ); 2977 } 2978 } 2979 2980 /*--------------- adjustment by 16 bytes ---------------*/ 2981 2982 MAYBE_USED 2983 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu) 2984 { 2985 UInt otag = ecu | MC_OKIND_STACK; 2986 PROF_EVENT(MCPE_NEW_MEM_STACK_16); 2987 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2988 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 2989 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2990 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2991 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2992 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 2993 Hence do 4 at +0, 8 at +4, 4 at +12. */ 2994 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 2996 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 2997 } else { 2998 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag ); 2999 } 3000 } 3001 3002 MAYBE_USED 3003 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP) 3004 { 3005 PROF_EVENT(MCPE_NEW_MEM_STACK_16); 3006 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3007 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 3008 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3009 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3010 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3011 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 3012 Hence do 4 at +0, 8 at +4, 4 at +12. */ 3013 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3014 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 3015 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 3016 } else { 3017 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 ); 3018 } 3019 } 3020 3021 MAYBE_USED 3022 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP) 3023 { 3024 PROF_EVENT(MCPE_DIE_MEM_STACK_16); 3025 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3026 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */ 3027 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3028 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 3029 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3030 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */ 3031 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3032 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 3033 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 3034 } else { 3035 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 ); 3036 } 3037 } 3038 3039 /*--------------- adjustment by 32 bytes ---------------*/ 3040 3041 MAYBE_USED 3042 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu) 3043 { 3044 UInt otag = ecu | MC_OKIND_STACK; 3045 PROF_EVENT(MCPE_NEW_MEM_STACK_32); 3046 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3047 /* Straightforward */ 3048 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 3049 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 3050 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3051 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3052 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3053 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 3054 +0,+28. */ 3055 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 3056 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 3057 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 3058 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag ); 3059 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag ); 3060 } else { 3061 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag ); 3062 } 3063 } 3064 3065 MAYBE_USED 3066 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP) 3067 { 3068 PROF_EVENT(MCPE_NEW_MEM_STACK_32); 3069 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3070 /* Straightforward */ 3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3072 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3073 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3074 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3075 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3076 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 3077 +0,+28. */ 3078 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3079 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 3080 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 3081 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 ); 3082 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 ); 3083 } else { 3084 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 ); 3085 } 3086 } 3087 3088 MAYBE_USED 3089 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP) 3090 { 3091 PROF_EVENT(MCPE_DIE_MEM_STACK_32); 3092 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3093 /* Straightforward */ 3094 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3095 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3096 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3097 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3098 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3099 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and 3100 4 at -32,-4. */ 3101 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3102 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 ); 3103 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 ); 3104 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 3105 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 3106 } else { 3107 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 ); 3108 } 3109 } 3110 3111 /*--------------- adjustment by 112 bytes ---------------*/ 3112 3113 MAYBE_USED 3114 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu) 3115 { 3116 UInt otag = ecu | MC_OKIND_STACK; 3117 PROF_EVENT(MCPE_NEW_MEM_STACK_112); 3118 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3119 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 3120 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 3121 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3122 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3123 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3124 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3125 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3126 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3127 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3128 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3129 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3130 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3131 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3132 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3133 } else { 3134 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag ); 3135 } 3136 } 3137 3138 MAYBE_USED 3139 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP) 3140 { 3141 PROF_EVENT(MCPE_NEW_MEM_STACK_112); 3142 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3144 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3145 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3146 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3147 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3148 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3149 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3150 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3151 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3152 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3153 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3154 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3155 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3156 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3157 } else { 3158 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 ); 3159 } 3160 } 3161 3162 MAYBE_USED 3163 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP) 3164 { 3165 PROF_EVENT(MCPE_DIE_MEM_STACK_112); 3166 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3174 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3175 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3176 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3177 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3178 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3179 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3180 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3181 } else { 3182 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 ); 3183 } 3184 } 3185 3186 /*--------------- adjustment by 128 bytes ---------------*/ 3187 3188 MAYBE_USED 3189 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu) 3190 { 3191 UInt otag = ecu | MC_OKIND_STACK; 3192 PROF_EVENT(MCPE_NEW_MEM_STACK_128); 3193 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3194 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 3195 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 3196 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3197 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3198 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3199 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3200 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3201 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3202 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3203 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3204 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3205 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3206 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3207 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3208 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3209 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3210 } else { 3211 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag ); 3212 } 3213 } 3214 3215 MAYBE_USED 3216 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP) 3217 { 3218 PROF_EVENT(MCPE_NEW_MEM_STACK_128); 3219 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3220 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3221 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3222 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3223 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3224 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3225 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3226 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3227 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3228 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3229 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3230 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3231 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3232 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3233 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3234 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3235 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3236 } else { 3237 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 ); 3238 } 3239 } 3240 3241 MAYBE_USED 3242 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP) 3243 { 3244 PROF_EVENT(MCPE_DIE_MEM_STACK_128); 3245 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3246 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3247 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3248 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3249 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3250 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3251 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3252 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3253 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3254 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3255 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3256 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3257 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3258 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3259 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3260 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3261 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3262 } else { 3263 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 ); 3264 } 3265 } 3266 3267 /*--------------- adjustment by 144 bytes ---------------*/ 3268 3269 MAYBE_USED 3270 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu) 3271 { 3272 UInt otag = ecu | MC_OKIND_STACK; 3273 PROF_EVENT(MCPE_NEW_MEM_STACK_144); 3274 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3275 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 3276 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 3277 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3278 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3279 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3280 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3281 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3282 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3283 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3284 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3285 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3286 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3287 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3288 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3293 } else { 3294 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag ); 3295 } 3296 } 3297 3298 MAYBE_USED 3299 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP) 3300 { 3301 PROF_EVENT(MCPE_NEW_MEM_STACK_144); 3302 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3303 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3304 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3305 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3306 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3307 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3308 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3309 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3310 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3311 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3312 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3321 } else { 3322 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 ); 3323 } 3324 } 3325 3326 MAYBE_USED 3327 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP) 3328 { 3329 PROF_EVENT(MCPE_DIE_MEM_STACK_144); 3330 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3331 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3332 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3333 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3334 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3335 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3336 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3349 } else { 3350 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 ); 3351 } 3352 } 3353 3354 /*--------------- adjustment by 160 bytes ---------------*/ 3355 3356 MAYBE_USED 3357 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu) 3358 { 3359 UInt otag = ecu | MC_OKIND_STACK; 3360 PROF_EVENT(MCPE_NEW_MEM_STACK_160); 3361 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3362 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 3363 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3379 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3380 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag ); 3381 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag ); 3382 } else { 3383 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag ); 3384 } 3385 } 3386 3387 MAYBE_USED 3388 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP) 3389 { 3390 PROF_EVENT(MCPE_NEW_MEM_STACK_160); 3391 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3406 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3407 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3408 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3409 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3410 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 ); 3411 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 ); 3412 } else { 3413 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 ); 3414 } 3415 } 3416 3417 MAYBE_USED 3418 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP) 3419 { 3420 PROF_EVENT(MCPE_DIE_MEM_STACK_160); 3421 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160); 3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152); 3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3432 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3433 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3434 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3435 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3436 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3437 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3438 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3439 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3440 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3441 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3442 } else { 3443 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 ); 3444 } 3445 } 3446 3447 /*--------------- adjustment by N bytes ---------------*/ 3448 3449 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu ) 3450 { 3451 UInt otag = ecu | MC_OKIND_STACK; 3452 PROF_EVENT(MCPE_NEW_MEM_STACK); 3453 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag ); 3454 } 3455 3456 static void mc_new_mem_stack ( Addr a, SizeT len ) 3457 { 3458 PROF_EVENT(MCPE_NEW_MEM_STACK); 3459 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len ); 3460 } 3461 3462 static void mc_die_mem_stack ( Addr a, SizeT len ) 3463 { 3464 PROF_EVENT(MCPE_DIE_MEM_STACK); 3465 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len ); 3466 } 3467 3468 3469 /* The AMD64 ABI says: 3470 3471 "The 128-byte area beyond the location pointed to by %rsp is considered 3472 to be reserved and shall not be modified by signal or interrupt 3473 handlers. Therefore, functions may use this area for temporary data 3474 that is not needed across function calls. In particular, leaf functions 3475 may use this area for their entire stack frame, rather than adjusting 3476 the stack pointer in the prologue and epilogue. This area is known as 3477 red zone [sic]." 3478 3479 So after any call or return we need to mark this redzone as containing 3480 undefined values. 3481 3482 Consider this: we're in function f. f calls g. g moves rsp down 3483 modestly (say 16 bytes) and writes stuff all over the red zone, making it 3484 defined. g returns. f is buggy and reads from parts of the red zone 3485 that it didn't write on. But because g filled that area in, f is going 3486 to be picking up defined V bits and so any errors from reading bits of 3487 the red zone it didn't write, will be missed. The only solution I could 3488 think of was to make the red zone undefined when g returns to f. 3489 3490 This is in accordance with the ABI, which makes it clear the redzone 3491 is volatile across function calls. 3492 3493 The problem occurs the other way round too: f could fill the RZ up 3494 with defined values and g could mistakenly read them. So the RZ 3495 also needs to be nuked on function calls. 3496 */ 3497 3498 3499 /* Here's a simple cache to hold nia -> ECU mappings. It could be 3500 improved so as to have a lower miss rate. */ 3501 3502 static UWord stats__nia_cache_queries = 0; 3503 static UWord stats__nia_cache_misses = 0; 3504 3505 typedef 3506 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */ 3507 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */ 3508 WCacheEnt; 3509 3510 #define N_NIA_TO_ECU_CACHE 511 3511 3512 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE]; 3513 3514 static void init_nia_to_ecu_cache ( void ) 3515 { 3516 UWord i; 3517 Addr zero_addr = 0; 3518 ExeContext* zero_ec; 3519 UInt zero_ecu; 3520 /* Fill all the slots with an entry for address zero, and the 3521 relevant otags accordingly. Hence the cache is initially filled 3522 with valid data. */ 3523 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr); 3524 tl_assert(zero_ec); 3525 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec); 3526 tl_assert(VG_(is_plausible_ECU)(zero_ecu)); 3527 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) { 3528 nia_to_ecu_cache[i].nia0 = zero_addr; 3529 nia_to_ecu_cache[i].ecu0 = zero_ecu; 3530 nia_to_ecu_cache[i].nia1 = zero_addr; 3531 nia_to_ecu_cache[i].ecu1 = zero_ecu; 3532 } 3533 } 3534 3535 static inline UInt convert_nia_to_ecu ( Addr nia ) 3536 { 3537 UWord i; 3538 UInt ecu; 3539 ExeContext* ec; 3540 3541 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) ); 3542 3543 stats__nia_cache_queries++; 3544 i = nia % N_NIA_TO_ECU_CACHE; 3545 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE); 3546 3547 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia )) 3548 return nia_to_ecu_cache[i].ecu0; 3549 3550 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) { 3551 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; } 3552 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 ); 3553 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 ); 3554 # undef SWAP 3555 return nia_to_ecu_cache[i].ecu0; 3556 } 3557 3558 stats__nia_cache_misses++; 3559 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia); 3560 tl_assert(ec); 3561 ecu = VG_(get_ECU_from_ExeContext)(ec); 3562 tl_assert(VG_(is_plausible_ECU)(ecu)); 3563 3564 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0; 3565 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0; 3566 3567 nia_to_ecu_cache[i].nia0 = nia; 3568 nia_to_ecu_cache[i].ecu0 = (UWord)ecu; 3569 return ecu; 3570 } 3571 3572 3573 /* This marks the stack as addressible but undefined, after a call or 3574 return for a target that has an ABI defined stack redzone. It 3575 happens quite a lot and needs to be fast. This is the version for 3576 origin tracking. The non-origin-tracking version is below. */ 3577 VG_REGPARM(3) 3578 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia ) 3579 { 3580 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O); 3581 if (0) 3582 VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n", 3583 base, len, nia ); 3584 3585 UInt ecu = convert_nia_to_ecu ( nia ); 3586 tl_assert(VG_(is_plausible_ECU)(ecu)); 3587 3588 UInt otag = ecu | MC_OKIND_STACK; 3589 3590 # if 0 3591 /* Slow(ish) version, which is fairly easily seen to be correct. 3592 */ 3593 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) { 3594 make_aligned_word64_undefined_w_otag(base + 0, otag); 3595 make_aligned_word64_undefined_w_otag(base + 8, otag); 3596 make_aligned_word64_undefined_w_otag(base + 16, otag); 3597 make_aligned_word64_undefined_w_otag(base + 24, otag); 3598 3599 make_aligned_word64_undefined_w_otag(base + 32, otag); 3600 make_aligned_word64_undefined_w_otag(base + 40, otag); 3601 make_aligned_word64_undefined_w_otag(base + 48, otag); 3602 make_aligned_word64_undefined_w_otag(base + 56, otag); 3603 3604 make_aligned_word64_undefined_w_otag(base + 64, otag); 3605 make_aligned_word64_undefined_w_otag(base + 72, otag); 3606 make_aligned_word64_undefined_w_otag(base + 80, otag); 3607 make_aligned_word64_undefined_w_otag(base + 88, otag); 3608 3609 make_aligned_word64_undefined_w_otag(base + 96, otag); 3610 make_aligned_word64_undefined_w_otag(base + 104, otag); 3611 make_aligned_word64_undefined_w_otag(base + 112, otag); 3612 make_aligned_word64_undefined_w_otag(base + 120, otag); 3613 } else { 3614 MC_(make_mem_undefined_w_otag)(base, len, otag); 3615 } 3616 # endif 3617 3618 /* Idea is: go fast when 3619 * 8-aligned and length is 128 3620 * the sm is available in the main primary map 3621 * the address range falls entirely with a single secondary map 3622 If all those conditions hold, just update the V+A bits by writing 3623 directly into the vabits array. (If the sm was distinguished, this 3624 will make a copy and then write to it.) 3625 */ 3626 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) { 3627 /* Now we know the address range is suitably sized and aligned. */ 3628 UWord a_lo = (UWord)(base); 3629 UWord a_hi = (UWord)(base + 128 - 1); 3630 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3631 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) { 3632 /* Now we know the entire range is within the main primary map. */ 3633 UWord pm_off_lo = get_primary_map_low_offset(a_lo); 3634 UWord pm_off_hi = get_primary_map_low_offset(a_hi); 3635 if (LIKELY(pm_off_lo == pm_off_hi)) { 3636 /* Now we know that the entire address range falls within a 3637 single secondary map, and that that secondary 'lives' in 3638 the main primary map. */ 3639 SecMap* sm = get_secmap_for_writing_low(a_lo); 3640 UWord v_off16 = SM_OFF_16(a_lo); 3641 UShort* p = &sm->vabits16[v_off16]; 3642 p[ 0] = VA_BITS16_UNDEFINED; 3643 p[ 1] = VA_BITS16_UNDEFINED; 3644 p[ 2] = VA_BITS16_UNDEFINED; 3645 p[ 3] = VA_BITS16_UNDEFINED; 3646 p[ 4] = VA_BITS16_UNDEFINED; 3647 p[ 5] = VA_BITS16_UNDEFINED; 3648 p[ 6] = VA_BITS16_UNDEFINED; 3649 p[ 7] = VA_BITS16_UNDEFINED; 3650 p[ 8] = VA_BITS16_UNDEFINED; 3651 p[ 9] = VA_BITS16_UNDEFINED; 3652 p[10] = VA_BITS16_UNDEFINED; 3653 p[11] = VA_BITS16_UNDEFINED; 3654 p[12] = VA_BITS16_UNDEFINED; 3655 p[13] = VA_BITS16_UNDEFINED; 3656 p[14] = VA_BITS16_UNDEFINED; 3657 p[15] = VA_BITS16_UNDEFINED; 3658 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3659 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3660 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3661 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3662 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3663 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3664 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3665 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3666 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3667 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3668 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3669 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3670 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3671 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3672 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3673 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3674 return; 3675 } 3676 } 3677 } 3678 3679 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */ 3680 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) { 3681 /* Now we know the address range is suitably sized and aligned. */ 3682 UWord a_lo = (UWord)(base); 3683 UWord a_hi = (UWord)(base + 288 - 1); 3684 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3685 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3686 UWord pm_off_lo = get_primary_map_low_offset(a_lo); 3687 UWord pm_off_hi = get_primary_map_low_offset(a_hi); 3688 if (LIKELY(pm_off_lo == pm_off_hi)) { 3689 /* Now we know that the entire address range falls within a 3690 single secondary map, and that that secondary 'lives' in 3691 the main primary map. */ 3692 SecMap* sm = get_secmap_for_writing_low(a_lo); 3693 UWord v_off16 = SM_OFF_16(a_lo); 3694 UShort* p = &sm->vabits16[v_off16]; 3695 p[ 0] = VA_BITS16_UNDEFINED; 3696 p[ 1] = VA_BITS16_UNDEFINED; 3697 p[ 2] = VA_BITS16_UNDEFINED; 3698 p[ 3] = VA_BITS16_UNDEFINED; 3699 p[ 4] = VA_BITS16_UNDEFINED; 3700 p[ 5] = VA_BITS16_UNDEFINED; 3701 p[ 6] = VA_BITS16_UNDEFINED; 3702 p[ 7] = VA_BITS16_UNDEFINED; 3703 p[ 8] = VA_BITS16_UNDEFINED; 3704 p[ 9] = VA_BITS16_UNDEFINED; 3705 p[10] = VA_BITS16_UNDEFINED; 3706 p[11] = VA_BITS16_UNDEFINED; 3707 p[12] = VA_BITS16_UNDEFINED; 3708 p[13] = VA_BITS16_UNDEFINED; 3709 p[14] = VA_BITS16_UNDEFINED; 3710 p[15] = VA_BITS16_UNDEFINED; 3711 p[16] = VA_BITS16_UNDEFINED; 3712 p[17] = VA_BITS16_UNDEFINED; 3713 p[18] = VA_BITS16_UNDEFINED; 3714 p[19] = VA_BITS16_UNDEFINED; 3715 p[20] = VA_BITS16_UNDEFINED; 3716 p[21] = VA_BITS16_UNDEFINED; 3717 p[22] = VA_BITS16_UNDEFINED; 3718 p[23] = VA_BITS16_UNDEFINED; 3719 p[24] = VA_BITS16_UNDEFINED; 3720 p[25] = VA_BITS16_UNDEFINED; 3721 p[26] = VA_BITS16_UNDEFINED; 3722 p[27] = VA_BITS16_UNDEFINED; 3723 p[28] = VA_BITS16_UNDEFINED; 3724 p[29] = VA_BITS16_UNDEFINED; 3725 p[30] = VA_BITS16_UNDEFINED; 3726 p[31] = VA_BITS16_UNDEFINED; 3727 p[32] = VA_BITS16_UNDEFINED; 3728 p[33] = VA_BITS16_UNDEFINED; 3729 p[34] = VA_BITS16_UNDEFINED; 3730 p[35] = VA_BITS16_UNDEFINED; 3731 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3732 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3733 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3734 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3735 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3736 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3737 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3738 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3739 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3740 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3741 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3742 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3743 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3744 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3745 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3746 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3747 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag ); 3748 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag ); 3749 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag ); 3750 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag ); 3751 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag ); 3752 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag ); 3753 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag ); 3754 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag ); 3755 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag ); 3756 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag ); 3757 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag ); 3758 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag ); 3759 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag ); 3760 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag ); 3761 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag ); 3762 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag ); 3763 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag ); 3764 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag ); 3765 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag ); 3766 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag ); 3767 return; 3768 } 3769 } 3770 } 3771 3772 /* else fall into slow case */ 3773 MC_(make_mem_undefined_w_otag)(base, len, otag); 3774 } 3775 3776 3777 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is 3778 specialised for the non-origin-tracking case. */ 3779 VG_REGPARM(2) 3780 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len ) 3781 { 3782 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O); 3783 if (0) 3784 VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n", 3785 base, len ); 3786 3787 # if 0 3788 /* Slow(ish) version, which is fairly easily seen to be correct. 3789 */ 3790 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) { 3791 make_aligned_word64_undefined(base + 0); 3792 make_aligned_word64_undefined(base + 8); 3793 make_aligned_word64_undefined(base + 16); 3794 make_aligned_word64_undefined(base + 24); 3795 3796 make_aligned_word64_undefined(base + 32); 3797 make_aligned_word64_undefined(base + 40); 3798 make_aligned_word64_undefined(base + 48); 3799 make_aligned_word64_undefined(base + 56); 3800 3801 make_aligned_word64_undefined(base + 64); 3802 make_aligned_word64_undefined(base + 72); 3803 make_aligned_word64_undefined(base + 80); 3804 make_aligned_word64_undefined(base + 88); 3805 3806 make_aligned_word64_undefined(base + 96); 3807 make_aligned_word64_undefined(base + 104); 3808 make_aligned_word64_undefined(base + 112); 3809 make_aligned_word64_undefined(base + 120); 3810 } else { 3811 make_mem_undefined(base, len); 3812 } 3813 # endif 3814 3815 /* Idea is: go fast when 3816 * 8-aligned and length is 128 3817 * the sm is available in the main primary map 3818 * the address range falls entirely with a single secondary map 3819 If all those conditions hold, just update the V+A bits by writing 3820 directly into the vabits array. (If the sm was distinguished, this 3821 will make a copy and then write to it.) 3822 */ 3823 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) { 3824 /* Now we know the address range is suitably sized and aligned. */ 3825 UWord a_lo = (UWord)(base); 3826 UWord a_hi = (UWord)(base + 128 - 1); 3827 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3828 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) { 3829 /* Now we know the entire range is within the main primary map. */ 3830 UWord pm_off_lo = get_primary_map_low_offset(a_lo); 3831 UWord pm_off_hi = get_primary_map_low_offset(a_hi); 3832 if (LIKELY(pm_off_lo == pm_off_hi)) { 3833 /* Now we know that the entire address range falls within a 3834 single secondary map, and that that secondary 'lives' in 3835 the main primary map. */ 3836 SecMap* sm = get_secmap_for_writing_low(a_lo); 3837 UWord v_off16 = SM_OFF_16(a_lo); 3838 UShort* p = &sm->vabits16[v_off16]; 3839 p[ 0] = VA_BITS16_UNDEFINED; 3840 p[ 1] = VA_BITS16_UNDEFINED; 3841 p[ 2] = VA_BITS16_UNDEFINED; 3842 p[ 3] = VA_BITS16_UNDEFINED; 3843 p[ 4] = VA_BITS16_UNDEFINED; 3844 p[ 5] = VA_BITS16_UNDEFINED; 3845 p[ 6] = VA_BITS16_UNDEFINED; 3846 p[ 7] = VA_BITS16_UNDEFINED; 3847 p[ 8] = VA_BITS16_UNDEFINED; 3848 p[ 9] = VA_BITS16_UNDEFINED; 3849 p[10] = VA_BITS16_UNDEFINED; 3850 p[11] = VA_BITS16_UNDEFINED; 3851 p[12] = VA_BITS16_UNDEFINED; 3852 p[13] = VA_BITS16_UNDEFINED; 3853 p[14] = VA_BITS16_UNDEFINED; 3854 p[15] = VA_BITS16_UNDEFINED; 3855 return; 3856 } 3857 } 3858 } 3859 3860 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */ 3861 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) { 3862 /* Now we know the address range is suitably sized and aligned. */ 3863 UWord a_lo = (UWord)(base); 3864 UWord a_hi = (UWord)(base + 288 - 1); 3865 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3866 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3867 UWord pm_off_lo = get_primary_map_low_offset(a_lo); 3868 UWord pm_off_hi = get_primary_map_low_offset(a_hi); 3869 if (LIKELY(pm_off_lo == pm_off_hi)) { 3870 /* Now we know that the entire address range falls within a 3871 single secondary map, and that that secondary 'lives' in 3872 the main primary map. */ 3873 SecMap* sm = get_secmap_for_writing_low(a_lo); 3874 UWord v_off16 = SM_OFF_16(a_lo); 3875 UShort* p = &sm->vabits16[v_off16]; 3876 p[ 0] = VA_BITS16_UNDEFINED; 3877 p[ 1] = VA_BITS16_UNDEFINED; 3878 p[ 2] = VA_BITS16_UNDEFINED; 3879 p[ 3] = VA_BITS16_UNDEFINED; 3880 p[ 4] = VA_BITS16_UNDEFINED; 3881 p[ 5] = VA_BITS16_UNDEFINED; 3882 p[ 6] = VA_BITS16_UNDEFINED; 3883 p[ 7] = VA_BITS16_UNDEFINED; 3884 p[ 8] = VA_BITS16_UNDEFINED; 3885 p[ 9] = VA_BITS16_UNDEFINED; 3886 p[10] = VA_BITS16_UNDEFINED; 3887 p[11] = VA_BITS16_UNDEFINED; 3888 p[12] = VA_BITS16_UNDEFINED; 3889 p[13] = VA_BITS16_UNDEFINED; 3890 p[14] = VA_BITS16_UNDEFINED; 3891 p[15] = VA_BITS16_UNDEFINED; 3892 p[16] = VA_BITS16_UNDEFINED; 3893 p[17] = VA_BITS16_UNDEFINED; 3894 p[18] = VA_BITS16_UNDEFINED; 3895 p[19] = VA_BITS16_UNDEFINED; 3896 p[20] = VA_BITS16_UNDEFINED; 3897 p[21] = VA_BITS16_UNDEFINED; 3898 p[22] = VA_BITS16_UNDEFINED; 3899 p[23] = VA_BITS16_UNDEFINED; 3900 p[24] = VA_BITS16_UNDEFINED; 3901 p[25] = VA_BITS16_UNDEFINED; 3902 p[26] = VA_BITS16_UNDEFINED; 3903 p[27] = VA_BITS16_UNDEFINED; 3904 p[28] = VA_BITS16_UNDEFINED; 3905 p[29] = VA_BITS16_UNDEFINED; 3906 p[30] = VA_BITS16_UNDEFINED; 3907 p[31] = VA_BITS16_UNDEFINED; 3908 p[32] = VA_BITS16_UNDEFINED; 3909 p[33] = VA_BITS16_UNDEFINED; 3910 p[34] = VA_BITS16_UNDEFINED; 3911 p[35] = VA_BITS16_UNDEFINED; 3912 return; 3913 } 3914 } 3915 } 3916 3917 /* else fall into slow case */ 3918 make_mem_undefined(base, len); 3919 } 3920 3921 3922 /* And this is an even more specialised case, for the case where there 3923 is no origin tracking, and the length is 128. */ 3924 VG_REGPARM(1) 3925 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base ) 3926 { 3927 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O); 3928 if (0) 3929 VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base ); 3930 3931 # if 0 3932 /* Slow(ish) version, which is fairly easily seen to be correct. 3933 */ 3934 if (LIKELY( VG_IS_8_ALIGNED(base) )) { 3935 make_aligned_word64_undefined(base + 0); 3936 make_aligned_word64_undefined(base + 8); 3937 make_aligned_word64_undefined(base + 16); 3938 make_aligned_word64_undefined(base + 24); 3939 3940 make_aligned_word64_undefined(base + 32); 3941 make_aligned_word64_undefined(base + 40); 3942 make_aligned_word64_undefined(base + 48); 3943 make_aligned_word64_undefined(base + 56); 3944 3945 make_aligned_word64_undefined(base + 64); 3946 make_aligned_word64_undefined(base + 72); 3947 make_aligned_word64_undefined(base + 80); 3948 make_aligned_word64_undefined(base + 88); 3949 3950 make_aligned_word64_undefined(base + 96); 3951 make_aligned_word64_undefined(base + 104); 3952 make_aligned_word64_undefined(base + 112); 3953 make_aligned_word64_undefined(base + 120); 3954 } else { 3955 make_mem_undefined(base, 128); 3956 } 3957 # endif 3958 3959 /* Idea is: go fast when 3960 * 16-aligned and length is 128 3961 * the sm is available in the main primary map 3962 * the address range falls entirely with a single secondary map 3963 If all those conditions hold, just update the V+A bits by writing 3964 directly into the vabits array. (If the sm was distinguished, this 3965 will make a copy and then write to it.) 3966 3967 Typically this applies to amd64 'ret' instructions, since RSP is 3968 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI). 3969 */ 3970 if (LIKELY( VG_IS_16_ALIGNED(base) )) { 3971 /* Now we know the address range is suitably sized and aligned. */ 3972 UWord a_lo = (UWord)(base); 3973 UWord a_hi = (UWord)(base + 128 - 1); 3974 /* FIXME: come up with a sane story on the wraparound case 3975 (which of course cnanot happen, but still..) */ 3976 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow 3977 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) { 3978 /* Now we know the entire range is within the main primary map. */ 3979 UWord pm_off_lo = get_primary_map_low_offset(a_lo); 3980 UWord pm_off_hi = get_primary_map_low_offset(a_hi); 3981 if (LIKELY(pm_off_lo == pm_off_hi)) { 3982 /* Now we know that the entire address range falls within a 3983 single secondary map, and that that secondary 'lives' in 3984 the main primary map. */ 3985 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16); 3986 SecMap* sm = get_secmap_for_writing_low(a_lo); 3987 UWord v_off = SM_OFF(a_lo); 3988 UInt* w32 = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]); 3989 w32[ 0] = VA_BITS32_UNDEFINED; 3990 w32[ 1] = VA_BITS32_UNDEFINED; 3991 w32[ 2] = VA_BITS32_UNDEFINED; 3992 w32[ 3] = VA_BITS32_UNDEFINED; 3993 w32[ 4] = VA_BITS32_UNDEFINED; 3994 w32[ 5] = VA_BITS32_UNDEFINED; 3995 w32[ 6] = VA_BITS32_UNDEFINED; 3996 w32[ 7] = VA_BITS32_UNDEFINED; 3997 return; 3998 } 3999 } 4000 } 4001 4002 /* The same, but for when base is 8 % 16, which is the situation 4003 with RSP for amd64-ELF immediately after call instructions. 4004 */ 4005 if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned 4006 /* Now we know the address range is suitably sized and aligned. */ 4007 UWord a_lo = (UWord)(base); 4008 UWord a_hi = (UWord)(base + 128 - 1); 4009 /* FIXME: come up with a sane story on the wraparound case 4010 (which of course cnanot happen, but still..) */ 4011 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow 4012 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) { 4013 /* Now we know the entire range is within the main primary map. */ 4014 UWord pm_off_lo = get_primary_map_low_offset(a_lo); 4015 UWord pm_off_hi = get_primary_map_low_offset(a_hi); 4016 if (LIKELY(pm_off_lo == pm_off_hi)) { 4017 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8); 4018 /* Now we know that the entire address range falls within a 4019 single secondary map, and that that secondary 'lives' in 4020 the main primary map. */ 4021 SecMap* sm = get_secmap_for_writing_low(a_lo); 4022 UWord v_off16 = SM_OFF_16(a_lo); 4023 UShort* w16 = &sm->vabits16[v_off16]; 4024 UInt* w32 = ASSUME_ALIGNED(UInt*, &w16[1]); 4025 /* The following assertion is commented out for obvious 4026 performance reasons, but was verified as valid when 4027 running the entire testsuite and also Firefox. */ 4028 /* tl_assert(VG_IS_4_ALIGNED(w32)); */ 4029 w16[ 0] = VA_BITS16_UNDEFINED; // w16[0] 4030 w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2] 4031 w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4] 4032 w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6] 4033 w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8] 4034 w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10] 4035 w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12] 4036 w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14] 4037 w16[15] = VA_BITS16_UNDEFINED; // w16[15] 4038 return; 4039 } 4040 } 4041 } 4042 4043 /* else fall into slow case */ 4044 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE); 4045 make_mem_undefined(base, 128); 4046 } 4047 4048 4049 /*------------------------------------------------------------*/ 4050 /*--- Checking memory ---*/ 4051 /*------------------------------------------------------------*/ 4052 4053 typedef 4054 enum { 4055 MC_Ok = 5, 4056 MC_AddrErr = 6, 4057 MC_ValueErr = 7 4058 } 4059 MC_ReadResult; 4060 4061 4062 /* Check permissions for address range. If inadequate permissions 4063 exist, *bad_addr is set to the offending address, so the caller can 4064 know what it is. */ 4065 4066 /* Returns True if [a .. a+len) is not addressible. Otherwise, 4067 returns False, and if bad_addr is non-NULL, sets *bad_addr to 4068 indicate the lowest failing address. Functions below are 4069 similar. */ 4070 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr ) 4071 { 4072 SizeT i; 4073 UWord vabits2; 4074 4075 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS); 4076 for (i = 0; i < len; i++) { 4077 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP); 4078 vabits2 = get_vabits2(a); 4079 if (VA_BITS2_NOACCESS != vabits2) { 4080 if (bad_addr != NULL) *bad_addr = a; 4081 return False; 4082 } 4083 a++; 4084 } 4085 return True; 4086 } 4087 4088 static Bool is_mem_addressable ( Addr a, SizeT len, 4089 /*OUT*/Addr* bad_addr ) 4090 { 4091 SizeT i; 4092 UWord vabits2; 4093 4094 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE); 4095 for (i = 0; i < len; i++) { 4096 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP); 4097 vabits2 = get_vabits2(a); 4098 if (VA_BITS2_NOACCESS == vabits2) { 4099 if (bad_addr != NULL) *bad_addr = a; 4100 return False; 4101 } 4102 a++; 4103 } 4104 return True; 4105 } 4106 4107 static MC_ReadResult is_mem_defined ( Addr a, SizeT len, 4108 /*OUT*/Addr* bad_addr, 4109 /*OUT*/UInt* otag ) 4110 { 4111 SizeT i; 4112 UWord vabits2; 4113 4114 PROF_EVENT(MCPE_IS_MEM_DEFINED); 4115 DEBUG("is_mem_defined\n"); 4116 4117 if (otag) *otag = 0; 4118 if (bad_addr) *bad_addr = 0; 4119 for (i = 0; i < len; i++) { 4120 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP); 4121 vabits2 = get_vabits2(a); 4122 if (VA_BITS2_DEFINED != vabits2) { 4123 // Error! Nb: Report addressability errors in preference to 4124 // definedness errors. And don't report definedeness errors unless 4125 // --undef-value-errors=yes. 4126 if (bad_addr) { 4127 *bad_addr = a; 4128 } 4129 if (VA_BITS2_NOACCESS == vabits2) { 4130 return MC_AddrErr; 4131 } 4132 if (MC_(clo_mc_level) >= 2) { 4133 if (otag && MC_(clo_mc_level) == 3) { 4134 *otag = MC_(helperc_b_load1)( a ); 4135 } 4136 return MC_ValueErr; 4137 } 4138 } 4139 a++; 4140 } 4141 return MC_Ok; 4142 } 4143 4144 4145 /* Like is_mem_defined but doesn't give up at the first uninitialised 4146 byte -- the entire range is always checked. This is important for 4147 detecting errors in the case where a checked range strays into 4148 invalid memory, but that fact is not detected by the ordinary 4149 is_mem_defined(), because of an undefined section that precedes the 4150 out of range section, possibly as a result of an alignment hole in 4151 the checked data. This version always checks the entire range and 4152 can report both a definedness and an accessbility error, if 4153 necessary. */ 4154 static void is_mem_defined_comprehensive ( 4155 Addr a, SizeT len, 4156 /*OUT*/Bool* errorV, /* is there a definedness err? */ 4157 /*OUT*/Addr* bad_addrV, /* if so where? */ 4158 /*OUT*/UInt* otagV, /* and what's its otag? */ 4159 /*OUT*/Bool* errorA, /* is there an addressability err? */ 4160 /*OUT*/Addr* bad_addrA /* if so where? */ 4161 ) 4162 { 4163 SizeT i; 4164 UWord vabits2; 4165 Bool already_saw_errV = False; 4166 4167 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE); 4168 DEBUG("is_mem_defined_comprehensive\n"); 4169 4170 tl_assert(!(*errorV || *errorA)); 4171 4172 for (i = 0; i < len; i++) { 4173 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP); 4174 vabits2 = get_vabits2(a); 4175 switch (vabits2) { 4176 case VA_BITS2_DEFINED: 4177 a++; 4178 break; 4179 case VA_BITS2_UNDEFINED: 4180 case VA_BITS2_PARTDEFINED: 4181 if (!already_saw_errV) { 4182 *errorV = True; 4183 *bad_addrV = a; 4184 if (MC_(clo_mc_level) == 3) { 4185 *otagV = MC_(helperc_b_load1)( a ); 4186 } else { 4187 *otagV = 0; 4188 } 4189 already_saw_errV = True; 4190 } 4191 a++; /* keep going */ 4192 break; 4193 case VA_BITS2_NOACCESS: 4194 *errorA = True; 4195 *bad_addrA = a; 4196 return; /* give up now. */ 4197 default: 4198 tl_assert(0); 4199 } 4200 } 4201 } 4202 4203 4204 /* Check a zero-terminated ascii string. Tricky -- don't want to 4205 examine the actual bytes, to find the end, until we're sure it is 4206 safe to do so. */ 4207 4208 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag ) 4209 { 4210 UWord vabits2; 4211 4212 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ); 4213 DEBUG("mc_is_defined_asciiz\n"); 4214 4215 if (otag) *otag = 0; 4216 if (bad_addr) *bad_addr = 0; 4217 while (True) { 4218 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP); 4219 vabits2 = get_vabits2(a); 4220 if (VA_BITS2_DEFINED != vabits2) { 4221 // Error! Nb: Report addressability errors in preference to 4222 // definedness errors. And don't report definedeness errors unless 4223 // --undef-value-errors=yes. 4224 if (bad_addr) { 4225 *bad_addr = a; 4226 } 4227 if (VA_BITS2_NOACCESS == vabits2) { 4228 return MC_AddrErr; 4229 } 4230 if (MC_(clo_mc_level) >= 2) { 4231 if (otag && MC_(clo_mc_level) == 3) { 4232 *otag = MC_(helperc_b_load1)( a ); 4233 } 4234 return MC_ValueErr; 4235 } 4236 } 4237 /* Ok, a is safe to read. */ 4238 if (* ((UChar*)a) == 0) { 4239 return MC_Ok; 4240 } 4241 a++; 4242 } 4243 } 4244 4245 4246 /*------------------------------------------------------------*/ 4247 /*--- Memory event handlers ---*/ 4248 /*------------------------------------------------------------*/ 4249 4250 static 4251 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s, 4252 Addr base, SizeT size ) 4253 { 4254 Addr bad_addr; 4255 Bool ok = is_mem_addressable ( base, size, &bad_addr ); 4256 4257 if (!ok) { 4258 switch (part) { 4259 case Vg_CoreSysCall: 4260 MC_(record_memparam_error) ( tid, bad_addr, 4261 /*isAddrErr*/True, s, 0/*otag*/ ); 4262 break; 4263 4264 case Vg_CoreSignal: 4265 MC_(record_core_mem_error)( tid, s ); 4266 break; 4267 4268 default: 4269 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart"); 4270 } 4271 } 4272 } 4273 4274 static 4275 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s, 4276 Addr base, SizeT size ) 4277 { 4278 UInt otag = 0; 4279 Addr bad_addr; 4280 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag ); 4281 4282 if (MC_Ok != res) { 4283 Bool isAddrErr = ( MC_AddrErr == res ? True : False ); 4284 4285 switch (part) { 4286 case Vg_CoreSysCall: 4287 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s, 4288 isAddrErr ? 0 : otag ); 4289 break; 4290 4291 case Vg_CoreSysCallArgInMem: 4292 MC_(record_regparam_error) ( tid, s, otag ); 4293 break; 4294 4295 /* If we're being asked to jump to a silly address, record an error 4296 message before potentially crashing the entire system. */ 4297 case Vg_CoreTranslate: 4298 MC_(record_jump_error)( tid, bad_addr ); 4299 break; 4300 4301 default: 4302 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart"); 4303 } 4304 } 4305 } 4306 4307 static 4308 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid, 4309 const HChar* s, Addr str ) 4310 { 4311 MC_ReadResult res; 4312 Addr bad_addr = 0; // shut GCC up 4313 UInt otag = 0; 4314 4315 tl_assert(part == Vg_CoreSysCall); 4316 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag ); 4317 if (MC_Ok != res) { 4318 Bool isAddrErr = ( MC_AddrErr == res ? True : False ); 4319 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s, 4320 isAddrErr ? 0 : otag ); 4321 } 4322 } 4323 4324 /* Handling of mmap and mprotect is not as simple as it seems. 4325 4326 The underlying semantics are that memory obtained from mmap is 4327 always initialised, but may be inaccessible. And changes to the 4328 protection of memory do not change its contents and hence not its 4329 definedness state. Problem is we can't model 4330 inaccessible-but-with-some-definedness state; once we mark memory 4331 as inaccessible we lose all info about definedness, and so can't 4332 restore that if it is later made accessible again. 4333 4334 One obvious thing to do is this: 4335 4336 mmap/mprotect NONE -> noaccess 4337 mmap/mprotect other -> defined 4338 4339 The problem case here is: taking accessible memory, writing 4340 uninitialised data to it, mprotecting it NONE and later mprotecting 4341 it back to some accessible state causes the undefinedness to be 4342 lost. 4343 4344 A better proposal is: 4345 4346 (1) mmap NONE -> make noaccess 4347 (2) mmap other -> make defined 4348 4349 (3) mprotect NONE -> # no change 4350 (4) mprotect other -> change any "noaccess" to "defined" 4351 4352 (2) is OK because memory newly obtained from mmap really is defined 4353 (zeroed out by the kernel -- doing anything else would 4354 constitute a massive security hole.) 4355 4356 (1) is OK because the only way to make the memory usable is via 4357 (4), in which case we also wind up correctly marking it all as 4358 defined. 4359 4360 (3) is the weak case. We choose not to change memory state. 4361 (presumably the range is in some mixture of "defined" and 4362 "undefined", viz, accessible but with arbitrary V bits). Doing 4363 nothing means we retain the V bits, so that if the memory is 4364 later mprotected "other", the V bits remain unchanged, so there 4365 can be no false negatives. The bad effect is that if there's 4366 an access in the area, then MC cannot warn; but at least we'll 4367 get a SEGV to show, so it's better than nothing. 4368 4369 Consider the sequence (3) followed by (4). Any memory that was 4370 "defined" or "undefined" previously retains its state (as 4371 required). Any memory that was "noaccess" before can only have 4372 been made that way by (1), and so it's OK to change it to 4373 "defined". 4374 4375 See https://bugs.kde.org/show_bug.cgi?id=205541 4376 and https://bugs.kde.org/show_bug.cgi?id=210268 4377 */ 4378 static 4379 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx, 4380 ULong di_handle ) 4381 { 4382 if (rr || ww || xx) { 4383 /* (2) mmap/mprotect other -> defined */ 4384 MC_(make_mem_defined)(a, len); 4385 } else { 4386 /* (1) mmap/mprotect NONE -> noaccess */ 4387 MC_(make_mem_noaccess)(a, len); 4388 } 4389 } 4390 4391 static 4392 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx ) 4393 { 4394 if (rr || ww || xx) { 4395 /* (4) mprotect other -> change any "noaccess" to "defined" */ 4396 make_mem_defined_if_noaccess(a, len); 4397 } else { 4398 /* (3) mprotect NONE -> # no change */ 4399 /* do nothing */ 4400 } 4401 } 4402 4403 4404 static 4405 void mc_new_mem_startup( Addr a, SizeT len, 4406 Bool rr, Bool ww, Bool xx, ULong di_handle ) 4407 { 4408 // Because code is defined, initialised variables get put in the data 4409 // segment and are defined, and uninitialised variables get put in the 4410 // bss segment and are auto-zeroed (and so defined). 4411 // 4412 // It's possible that there will be padding between global variables. 4413 // This will also be auto-zeroed, and marked as defined by Memcheck. If 4414 // a program uses it, Memcheck will not complain. This is arguably a 4415 // false negative, but it's a grey area -- the behaviour is defined (the 4416 // padding is zeroed) but it's probably not what the user intended. And 4417 // we can't avoid it. 4418 // 4419 // Note: we generally ignore RWX permissions, because we can't track them 4420 // without requiring more than one A bit which would slow things down a 4421 // lot. But on Darwin the 0th page is mapped but !R and !W and !X. 4422 // So we mark any such pages as "unaddressable". 4423 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n", 4424 a, (ULong)len, rr, ww, xx); 4425 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle); 4426 } 4427 4428 static 4429 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len) 4430 { 4431 MC_(make_mem_defined)(a, len); 4432 } 4433 4434 4435 /*------------------------------------------------------------*/ 4436 /*--- Register event handlers ---*/ 4437 /*------------------------------------------------------------*/ 4438 4439 /* Try and get a nonzero origin for the guest state section of thread 4440 tid characterised by (offset,size). Return 0 if nothing to show 4441 for it. */ 4442 static UInt mb_get_origin_for_guest_offset ( ThreadId tid, 4443 Int offset, SizeT size ) 4444 { 4445 Int sh2off; 4446 UInt area[3]; 4447 UInt otag; 4448 sh2off = MC_(get_otrack_shadow_offset)( offset, size ); 4449 if (sh2off == -1) 4450 return 0; /* This piece of guest state is not tracked */ 4451 tl_assert(sh2off >= 0); 4452 tl_assert(0 == (sh2off % 4)); 4453 area[0] = 0x31313131; 4454 area[2] = 0x27272727; 4455 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 ); 4456 tl_assert(area[0] == 0x31313131); 4457 tl_assert(area[2] == 0x27272727); 4458 otag = area[1]; 4459 return otag; 4460 } 4461 4462 4463 /* When some chunk of guest state is written, mark the corresponding 4464 shadow area as valid. This is used to initialise arbitrarily large 4465 chunks of guest state, hence the _SIZE value, which has to be as 4466 big as the biggest guest state. 4467 */ 4468 static void mc_post_reg_write ( CorePart part, ThreadId tid, 4469 PtrdiffT offset, SizeT size) 4470 { 4471 # define MAX_REG_WRITE_SIZE 1728 4472 UChar area[MAX_REG_WRITE_SIZE]; 4473 tl_assert(size <= MAX_REG_WRITE_SIZE); 4474 VG_(memset)(area, V_BITS8_DEFINED, size); 4475 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area ); 4476 # undef MAX_REG_WRITE_SIZE 4477 } 4478 4479 static 4480 void mc_post_reg_write_clientcall ( ThreadId tid, 4481 PtrdiffT offset, SizeT size, Addr f) 4482 { 4483 mc_post_reg_write(/*dummy*/0, tid, offset, size); 4484 } 4485 4486 /* Look at the definedness of the guest's shadow state for 4487 [offset, offset+len). If any part of that is undefined, record 4488 a parameter error. 4489 */ 4490 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s, 4491 PtrdiffT offset, SizeT size) 4492 { 4493 Int i; 4494 Bool bad; 4495 UInt otag; 4496 4497 UChar area[16]; 4498 tl_assert(size <= 16); 4499 4500 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size ); 4501 4502 bad = False; 4503 for (i = 0; i < size; i++) { 4504 if (area[i] != V_BITS8_DEFINED) { 4505 bad = True; 4506 break; 4507 } 4508 } 4509 4510 if (!bad) 4511 return; 4512 4513 /* We've found some undefinedness. See if we can also find an 4514 origin for it. */ 4515 otag = mb_get_origin_for_guest_offset( tid, offset, size ); 4516 MC_(record_regparam_error) ( tid, s, otag ); 4517 } 4518 4519 4520 /*------------------------------------------------------------*/ 4521 /*--- Register-memory event handlers ---*/ 4522 /*------------------------------------------------------------*/ 4523 4524 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a, 4525 PtrdiffT guest_state_offset, SizeT size ) 4526 { 4527 SizeT i; 4528 UChar vbits8; 4529 Int offset; 4530 UInt d32; 4531 4532 /* Slow loop. */ 4533 for (i = 0; i < size; i++) { 4534 get_vbits8( a+i, &vbits8 ); 4535 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i, 4536 1, &vbits8 ); 4537 } 4538 4539 if (MC_(clo_mc_level) != 3) 4540 return; 4541 4542 /* Track origins. */ 4543 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size ); 4544 if (offset == -1) 4545 return; 4546 4547 switch (size) { 4548 case 1: 4549 d32 = MC_(helperc_b_load1)( a ); 4550 break; 4551 case 2: 4552 d32 = MC_(helperc_b_load2)( a ); 4553 break; 4554 case 4: 4555 d32 = MC_(helperc_b_load4)( a ); 4556 break; 4557 case 8: 4558 d32 = MC_(helperc_b_load8)( a ); 4559 break; 4560 case 16: 4561 d32 = MC_(helperc_b_load16)( a ); 4562 break; 4563 case 32: 4564 d32 = MC_(helperc_b_load32)( a ); 4565 break; 4566 default: 4567 tl_assert(0); 4568 } 4569 4570 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 ); 4571 } 4572 4573 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid, 4574 PtrdiffT guest_state_offset, Addr a, 4575 SizeT size ) 4576 { 4577 SizeT i; 4578 UChar vbits8; 4579 Int offset; 4580 UInt d32; 4581 4582 /* Slow loop. */ 4583 for (i = 0; i < size; i++) { 4584 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/, 4585 guest_state_offset+i, 1 ); 4586 set_vbits8( a+i, vbits8 ); 4587 } 4588 4589 if (MC_(clo_mc_level) != 3) 4590 return; 4591 4592 /* Track origins. */ 4593 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size ); 4594 if (offset == -1) 4595 return; 4596 4597 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 ); 4598 switch (size) { 4599 case 1: 4600 MC_(helperc_b_store1)( a, d32 ); 4601 break; 4602 case 2: 4603 MC_(helperc_b_store2)( a, d32 ); 4604 break; 4605 case 4: 4606 MC_(helperc_b_store4)( a, d32 ); 4607 break; 4608 case 8: 4609 MC_(helperc_b_store8)( a, d32 ); 4610 break; 4611 case 16: 4612 MC_(helperc_b_store16)( a, d32 ); 4613 break; 4614 case 32: 4615 MC_(helperc_b_store32)( a, d32 ); 4616 break; 4617 default: 4618 tl_assert(0); 4619 } 4620 } 4621 4622 4623 /*------------------------------------------------------------*/ 4624 /*--- Some static assertions ---*/ 4625 /*------------------------------------------------------------*/ 4626 4627 /* The handwritten assembly helpers below have baked-in assumptions 4628 about various constant values. These assertions attempt to make 4629 that a bit safer by checking those values and flagging changes that 4630 would make the assembly invalid. Not perfect but it's better than 4631 nothing. */ 4632 4633 STATIC_ASSERT(SM_CHUNKS * 4 == 65536); 4634 4635 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA); 4636 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55); 4637 4638 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000); 4639 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF); 4640 4641 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA); 4642 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5); 4643 4644 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000); 4645 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF); 4646 4647 STATIC_ASSERT(VA_BITS2_DEFINED == 2); 4648 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1); 4649 4650 STATIC_ASSERT(V_BITS8_DEFINED == 0x00); 4651 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF); 4652 4653 4654 /*------------------------------------------------------------*/ 4655 /*--- Functions called directly from generated code: ---*/ 4656 /*--- Load/store handlers. ---*/ 4657 /*------------------------------------------------------------*/ 4658 4659 /* Types: LOADV32, LOADV16, LOADV8 are: 4660 UWord fn ( Addr a ) 4661 so they return 32-bits on 32-bit machines and 64-bits on 4662 64-bit machines. Addr has the same size as a host word. 4663 4664 LOADV64 is always ULong fn ( Addr a ) 4665 4666 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits 4667 are a UWord, and for STOREV64 they are a ULong. 4668 */ 4669 4670 /* If any part of '_a' indicated by the mask is 1, either '_a' is not 4671 naturally '_sz/8'-aligned, or it exceeds the range covered by the 4672 primary map. This is all very tricky (and important!), so let's 4673 work through the maths by hand (below), *and* assert for these 4674 values at startup. */ 4675 #define MASK(_szInBytes) \ 4676 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) ) 4677 4678 /* MASK only exists so as to define this macro. */ 4679 #define UNALIGNED_OR_HIGH(_a,_szInBits) \ 4680 ((_a) & MASK((_szInBits>>3))) 4681 4682 /* On a 32-bit machine: 4683 4684 N_PRIMARY_BITS == 16, so 4685 N_PRIMARY_MAP == 0x10000, so 4686 N_PRIMARY_MAP-1 == 0xFFFF, so 4687 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so 4688 4689 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 ) 4690 = ~ ( 0xFFFF | 0xFFFF0000 ) 4691 = ~ 0xFFFF'FFFF 4692 = 0 4693 4694 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 ) 4695 = ~ ( 0xFFFE | 0xFFFF0000 ) 4696 = ~ 0xFFFF'FFFE 4697 = 1 4698 4699 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 ) 4700 = ~ ( 0xFFFC | 0xFFFF0000 ) 4701 = ~ 0xFFFF'FFFC 4702 = 3 4703 4704 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 ) 4705 = ~ ( 0xFFF8 | 0xFFFF0000 ) 4706 = ~ 0xFFFF'FFF8 4707 = 7 4708 4709 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value 4710 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for 4711 the 1-byte alignment case, it is always a zero value, since MASK(1) 4712 is zero. All as expected. 4713 4714 On a 64-bit machine, it's more complex, since we're testing 4715 simultaneously for misalignment and for the address being at or 4716 above 64G: 4717 4718 N_PRIMARY_BITS == 20, so 4719 N_PRIMARY_MAP == 0x100000, so 4720 N_PRIMARY_MAP-1 == 0xFFFFF, so 4721 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so 4722 4723 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 ) 4724 = ~ ( 0xFFFF | 0xF'FFFF'0000 ) 4725 = ~ 0xF'FFFF'FFFF 4726 = 0xFFFF'FFF0'0000'0000 4727 4728 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 ) 4729 = ~ ( 0xFFFE | 0xF'FFFF'0000 ) 4730 = ~ 0xF'FFFF'FFFE 4731 = 0xFFFF'FFF0'0000'0001 4732 4733 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 ) 4734 = ~ ( 0xFFFC | 0xF'FFFF'0000 ) 4735 = ~ 0xF'FFFF'FFFC 4736 = 0xFFFF'FFF0'0000'0003 4737 4738 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 ) 4739 = ~ ( 0xFFF8 | 0xF'FFFF'0000 ) 4740 = ~ 0xF'FFFF'FFF8 4741 = 0xFFFF'FFF0'0000'0007 4742 */ 4743 4744 /*------------------------------------------------------------*/ 4745 /*--- LOADV256 and LOADV128 ---*/ 4746 /*------------------------------------------------------------*/ 4747 4748 static INLINE 4749 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res, 4750 Addr a, SizeT nBits, Bool isBigEndian ) 4751 { 4752 PROF_EVENT(MCPE_LOADV_128_OR_256); 4753 4754 #ifndef PERF_FAST_LOADV 4755 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian ); 4756 return; 4757 #else 4758 { 4759 UWord sm_off16, vabits16, j; 4760 UWord nBytes = nBits / 8; 4761 UWord nULongs = nBytes / 8; 4762 SecMap* sm; 4763 4764 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) { 4765 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1); 4766 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian ); 4767 return; 4768 } 4769 4770 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is 4771 suitably aligned, is mapped, and addressible. */ 4772 for (j = 0; j < nULongs; j++) { 4773 sm = get_secmap_for_reading_low(a + 8*j); 4774 sm_off16 = SM_OFF_16(a + 8*j); 4775 vabits16 = sm->vabits16[sm_off16]; 4776 4777 // Convert V bits from compact memory form to expanded 4778 // register form. 4779 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) { 4780 res[j] = V_BITS64_DEFINED; 4781 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) { 4782 res[j] = V_BITS64_UNDEFINED; 4783 } else { 4784 /* Slow case: some block of 8 bytes are not all-defined or 4785 all-undefined. */ 4786 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2); 4787 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian ); 4788 return; 4789 } 4790 } 4791 return; 4792 } 4793 #endif 4794 } 4795 4796 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a ) 4797 { 4798 mc_LOADV_128_or_256(&res->w64[0], a, 256, True); 4799 } 4800 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a ) 4801 { 4802 mc_LOADV_128_or_256(&res->w64[0], a, 256, False); 4803 } 4804 4805 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a ) 4806 { 4807 mc_LOADV_128_or_256(&res->w64[0], a, 128, True); 4808 } 4809 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a ) 4810 { 4811 mc_LOADV_128_or_256(&res->w64[0], a, 128, False); 4812 } 4813 4814 /*------------------------------------------------------------*/ 4815 /*--- LOADV64 ---*/ 4816 /*------------------------------------------------------------*/ 4817 4818 static INLINE 4819 ULong mc_LOADV64 ( Addr a, Bool isBigEndian ) 4820 { 4821 PROF_EVENT(MCPE_LOADV64); 4822 4823 #ifndef PERF_FAST_LOADV 4824 return mc_LOADVn_slow( a, 64, isBigEndian ); 4825 #else 4826 { 4827 UWord sm_off16, vabits16; 4828 SecMap* sm; 4829 4830 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) { 4831 PROF_EVENT(MCPE_LOADV64_SLOW1); 4832 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian ); 4833 } 4834 4835 sm = get_secmap_for_reading_low(a); 4836 sm_off16 = SM_OFF_16(a); 4837 vabits16 = sm->vabits16[sm_off16]; 4838 4839 // Handle common case quickly: a is suitably aligned, is mapped, and 4840 // addressible. 4841 // Convert V bits from compact memory form to expanded register form. 4842 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) { 4843 return V_BITS64_DEFINED; 4844 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) { 4845 return V_BITS64_UNDEFINED; 4846 } else { 4847 /* Slow case: the 8 bytes are not all-defined or all-undefined. */ 4848 PROF_EVENT(MCPE_LOADV64_SLOW2); 4849 return mc_LOADVn_slow( a, 64, isBigEndian ); 4850 } 4851 } 4852 #endif 4853 } 4854 4855 // Generic for all platforms 4856 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a ) 4857 { 4858 return mc_LOADV64(a, True); 4859 } 4860 4861 // Non-generic assembly for arm32-linux 4862 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 4863 && defined(VGP_arm_linux) 4864 __asm__( /* Derived from the 32 bit assembly helper */ 4865 ".text \n" 4866 ".align 2 \n" 4867 ".global vgMemCheck_helperc_LOADV64le \n" 4868 ".type vgMemCheck_helperc_LOADV64le, %function \n" 4869 "vgMemCheck_helperc_LOADV64le: \n" 4870 " tst r0, #7 \n" 4871 " movw r3, #:lower16:primary_map \n" 4872 " bne .LLV64LEc4 \n" // if misaligned 4873 " lsr r2, r0, #16 \n" 4874 " movt r3, #:upper16:primary_map \n" 4875 " ldr r2, [r3, r2, lsl #2] \n" 4876 " uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000 4877 " movw r3, #0xAAAA \n" 4878 " lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0 4879 " ldrh r1, [r2, r1] \n" 4880 " cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED 4881 " bne .LLV64LEc0 \n" // if !all_defined 4882 " mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED 4883 " mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED 4884 " bx lr \n" 4885 ".LLV64LEc0: \n" 4886 " movw r3, #0x5555 \n" 4887 " cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED 4888 " bne .LLV64LEc4 \n" // if !all_undefined 4889 " mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED 4890 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED 4891 " bx lr \n" 4892 ".LLV64LEc4: \n" 4893 " push {r4, lr} \n" 4894 " mov r2, #0 \n" 4895 " mov r1, #64 \n" 4896 " bl mc_LOADVn_slow \n" 4897 " pop {r4, pc} \n" 4898 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n" 4899 ".previous\n" 4900 ); 4901 4902 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 4903 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) 4904 __asm__( 4905 ".text\n" 4906 ".align 16\n" 4907 ".global vgMemCheck_helperc_LOADV64le\n" 4908 ".type vgMemCheck_helperc_LOADV64le, @function\n" 4909 "vgMemCheck_helperc_LOADV64le:\n" 4910 " test $0x7, %eax\n" 4911 " jne .LLV64LE2\n" /* jump if not aligned */ 4912 " mov %eax, %ecx\n" 4913 " movzwl %ax, %edx\n" 4914 " shr $0x10, %ecx\n" 4915 " mov primary_map(,%ecx,4), %ecx\n" 4916 " shr $0x3, %edx\n" 4917 " movzwl (%ecx,%edx,2), %edx\n" 4918 " cmp $0xaaaa, %edx\n" 4919 " jne .LLV64LE1\n" /* jump if not all defined */ 4920 " xor %eax, %eax\n" /* return 0 in edx:eax */ 4921 " xor %edx, %edx\n" 4922 " ret\n" 4923 ".LLV64LE1:\n" 4924 " cmp $0x5555, %edx\n" 4925 " jne .LLV64LE2\n" /* jump if not all undefined */ 4926 " or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */ 4927 " or $0xffffffff, %edx\n" 4928 " ret\n" 4929 ".LLV64LE2:\n" 4930 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */ 4931 " mov $64, %edx\n" 4932 " jmp mc_LOADVn_slow\n" 4933 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n" 4934 ".previous\n" 4935 ); 4936 4937 #else 4938 // Generic for all platforms except {arm32,x86}-linux and x86-solaris 4939 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a ) 4940 { 4941 return mc_LOADV64(a, False); 4942 } 4943 #endif 4944 4945 /*------------------------------------------------------------*/ 4946 /*--- STOREV64 ---*/ 4947 /*------------------------------------------------------------*/ 4948 4949 static INLINE 4950 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian ) 4951 { 4952 PROF_EVENT(MCPE_STOREV64); 4953 4954 #ifndef PERF_FAST_STOREV 4955 // XXX: this slow case seems to be marginally faster than the fast case! 4956 // Investigate further. 4957 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4958 #else 4959 { 4960 UWord sm_off16, vabits16; 4961 SecMap* sm; 4962 4963 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) { 4964 PROF_EVENT(MCPE_STOREV64_SLOW1); 4965 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4966 return; 4967 } 4968 4969 sm = get_secmap_for_reading_low(a); 4970 sm_off16 = SM_OFF_16(a); 4971 vabits16 = sm->vabits16[sm_off16]; 4972 4973 // To understand the below cleverness, see the extensive comments 4974 // in MC_(helperc_STOREV8). 4975 if (LIKELY(V_BITS64_DEFINED == vbits64)) { 4976 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) { 4977 return; 4978 } 4979 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) { 4980 sm->vabits16[sm_off16] = VA_BITS16_DEFINED; 4981 return; 4982 } 4983 PROF_EVENT(MCPE_STOREV64_SLOW2); 4984 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4985 return; 4986 } 4987 if (V_BITS64_UNDEFINED == vbits64) { 4988 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) { 4989 return; 4990 } 4991 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) { 4992 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED; 4993 return; 4994 } 4995 PROF_EVENT(MCPE_STOREV64_SLOW3); 4996 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 4997 return; 4998 } 4999 5000 PROF_EVENT(MCPE_STOREV64_SLOW4); 5001 mc_STOREVn_slow( a, 64, vbits64, isBigEndian ); 5002 } 5003 #endif 5004 } 5005 5006 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 ) 5007 { 5008 mc_STOREV64(a, vbits64, True); 5009 } 5010 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 ) 5011 { 5012 mc_STOREV64(a, vbits64, False); 5013 } 5014 5015 /*------------------------------------------------------------*/ 5016 /*--- LOADV32 ---*/ 5017 /*------------------------------------------------------------*/ 5018 5019 static INLINE 5020 UWord mc_LOADV32 ( Addr a, Bool isBigEndian ) 5021 { 5022 PROF_EVENT(MCPE_LOADV32); 5023 5024 #ifndef PERF_FAST_LOADV 5025 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 5026 #else 5027 { 5028 UWord sm_off, vabits8; 5029 SecMap* sm; 5030 5031 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) { 5032 PROF_EVENT(MCPE_LOADV32_SLOW1); 5033 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 5034 } 5035 5036 sm = get_secmap_for_reading_low(a); 5037 sm_off = SM_OFF(a); 5038 vabits8 = sm->vabits8[sm_off]; 5039 5040 // Handle common case quickly: a is suitably aligned, is mapped, and the 5041 // entire word32 it lives in is addressible. 5042 // Convert V bits from compact memory form to expanded register form. 5043 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined). 5044 // Almost certainly not necessary, but be paranoid. 5045 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) { 5046 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED); 5047 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { 5048 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED); 5049 } else { 5050 /* Slow case: the 4 bytes are not all-defined or all-undefined. */ 5051 PROF_EVENT(MCPE_LOADV32_SLOW2); 5052 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian ); 5053 } 5054 } 5055 #endif 5056 } 5057 5058 // Generic for all platforms 5059 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a ) 5060 { 5061 return mc_LOADV32(a, True); 5062 } 5063 5064 // Non-generic assembly for arm32-linux 5065 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 5066 && defined(VGP_arm_linux) 5067 __asm__( /* Derived from NCode template */ 5068 ".text \n" 5069 ".align 2 \n" 5070 ".global vgMemCheck_helperc_LOADV32le \n" 5071 ".type vgMemCheck_helperc_LOADV32le, %function \n" 5072 "vgMemCheck_helperc_LOADV32le: \n" 5073 " tst r0, #3 \n" // 1 5074 " movw r3, #:lower16:primary_map \n" // 1 5075 " bne .LLV32LEc4 \n" // 2 if misaligned 5076 " lsr r2, r0, #16 \n" // 3 5077 " movt r3, #:upper16:primary_map \n" // 3 5078 " ldr r2, [r3, r2, lsl #2] \n" // 4 5079 " uxth r1, r0 \n" // 4 5080 " ldrb r1, [r2, r1, lsr #2] \n" // 5 5081 " cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED 5082 " bne .LLV32LEc0 \n" // 7 if !all_defined 5083 " mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED 5084 " bx lr \n" // 9 5085 ".LLV32LEc0: \n" 5086 " cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED 5087 " bne .LLV32LEc4 \n" // if !all_undefined 5088 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED 5089 " bx lr \n" 5090 ".LLV32LEc4: \n" 5091 " push {r4, lr} \n" 5092 " mov r2, #0 \n" 5093 " mov r1, #32 \n" 5094 " bl mc_LOADVn_slow \n" 5095 " pop {r4, pc} \n" 5096 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n" 5097 ".previous\n" 5098 ); 5099 5100 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 5101 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) 5102 __asm__( 5103 ".text\n" 5104 ".align 16\n" 5105 ".global vgMemCheck_helperc_LOADV32le\n" 5106 ".type vgMemCheck_helperc_LOADV32le, @function\n" 5107 "vgMemCheck_helperc_LOADV32le:\n" 5108 " test $0x3, %eax\n" 5109 " jnz .LLV32LE2\n" /* jump if misaligned */ 5110 " mov %eax, %edx\n" 5111 " shr $16, %edx\n" 5112 " mov primary_map(,%edx,4), %ecx\n" 5113 " movzwl %ax, %edx\n" 5114 " shr $2, %edx\n" 5115 " movzbl (%ecx,%edx,1), %edx\n" 5116 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */ 5117 " jne .LLV32LE1\n" /* jump if not completely defined */ 5118 " xor %eax, %eax\n" /* else return V_BITS32_DEFINED */ 5119 " ret\n" 5120 ".LLV32LE1:\n" 5121 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */ 5122 " jne .LLV32LE2\n" /* jump if not completely undefined */ 5123 " or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */ 5124 " ret\n" 5125 ".LLV32LE2:\n" 5126 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */ 5127 " mov $32, %edx\n" 5128 " jmp mc_LOADVn_slow\n" 5129 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n" 5130 ".previous\n" 5131 ); 5132 5133 #else 5134 // Generic for all platforms except {arm32,x86}-linux and x86-solaris 5135 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a ) 5136 { 5137 return mc_LOADV32(a, False); 5138 } 5139 #endif 5140 5141 /*------------------------------------------------------------*/ 5142 /*--- STOREV32 ---*/ 5143 /*------------------------------------------------------------*/ 5144 5145 static INLINE 5146 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian ) 5147 { 5148 PROF_EVENT(MCPE_STOREV32); 5149 5150 #ifndef PERF_FAST_STOREV 5151 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 5152 #else 5153 { 5154 UWord sm_off, vabits8; 5155 SecMap* sm; 5156 5157 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) { 5158 PROF_EVENT(MCPE_STOREV32_SLOW1); 5159 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 5160 return; 5161 } 5162 5163 sm = get_secmap_for_reading_low(a); 5164 sm_off = SM_OFF(a); 5165 vabits8 = sm->vabits8[sm_off]; 5166 5167 // To understand the below cleverness, see the extensive comments 5168 // in MC_(helperc_STOREV8). 5169 if (LIKELY(V_BITS32_DEFINED == vbits32)) { 5170 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) { 5171 return; 5172 } 5173 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) { 5174 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED; 5175 return; 5176 } 5177 PROF_EVENT(MCPE_STOREV32_SLOW2); 5178 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 5179 return; 5180 } 5181 if (V_BITS32_UNDEFINED == vbits32) { 5182 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) { 5183 return; 5184 } 5185 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) { 5186 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED; 5187 return; 5188 } 5189 PROF_EVENT(MCPE_STOREV32_SLOW3); 5190 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 5191 return; 5192 } 5193 5194 PROF_EVENT(MCPE_STOREV32_SLOW4); 5195 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian ); 5196 } 5197 #endif 5198 } 5199 5200 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 ) 5201 { 5202 mc_STOREV32(a, vbits32, True); 5203 } 5204 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 ) 5205 { 5206 mc_STOREV32(a, vbits32, False); 5207 } 5208 5209 /*------------------------------------------------------------*/ 5210 /*--- LOADV16 ---*/ 5211 /*------------------------------------------------------------*/ 5212 5213 static INLINE 5214 UWord mc_LOADV16 ( Addr a, Bool isBigEndian ) 5215 { 5216 PROF_EVENT(MCPE_LOADV16); 5217 5218 #ifndef PERF_FAST_LOADV 5219 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 5220 #else 5221 { 5222 UWord sm_off, vabits8; 5223 SecMap* sm; 5224 5225 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) { 5226 PROF_EVENT(MCPE_LOADV16_SLOW1); 5227 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 5228 } 5229 5230 sm = get_secmap_for_reading_low(a); 5231 sm_off = SM_OFF(a); 5232 vabits8 = sm->vabits8[sm_off]; 5233 // Handle common case quickly: a is suitably aligned, is mapped, and is 5234 // addressible. 5235 // Convert V bits from compact memory form to expanded register form 5236 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; } 5237 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; } 5238 else { 5239 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check 5240 // the two sub-bytes. 5241 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8); 5242 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; } 5243 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; } 5244 else { 5245 /* Slow case: the two bytes are not all-defined or all-undefined. */ 5246 PROF_EVENT(MCPE_LOADV16_SLOW2); 5247 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian ); 5248 } 5249 } 5250 } 5251 #endif 5252 } 5253 5254 // Generic for all platforms 5255 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a ) 5256 { 5257 return mc_LOADV16(a, True); 5258 } 5259 5260 // Non-generic assembly for arm32-linux 5261 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 5262 && defined(VGP_arm_linux) 5263 __asm__( /* Derived from NCode template */ 5264 ".text \n" 5265 ".align 2 \n" 5266 ".global vgMemCheck_helperc_LOADV16le \n" 5267 ".type vgMemCheck_helperc_LOADV16le, %function \n" 5268 "vgMemCheck_helperc_LOADV16le: \n" // 5269 " tst r0, #1 \n" // 5270 " bne .LLV16LEc12 \n" // if misaligned 5271 " lsr r2, r0, #16 \n" // r2 = pri-map-ix 5272 " movw r3, #:lower16:primary_map \n" // 5273 " uxth r1, r0 \n" // r1 = sec-map-offB 5274 " movt r3, #:upper16:primary_map \n" // 5275 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map 5276 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8 5277 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED? 5278 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0 5279 ".LLV16LEh9: \n" // 5280 " mov r0, #0xFFFFFFFF \n" // 5281 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe 5282 " bx lr \n" // 5283 ".LLV16LEc0: \n" // 5284 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED 5285 " bne .LLV16LEc4 \n" // 5286 ".LLV16LEc2: \n" // 5287 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe 5288 " bx lr \n" // 5289 ".LLV16LEc4: \n" // 5290 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned. 5291 // Extract the relevant 4 bits and inspect. 5292 " and r2, r0, #2 \n" // addr & 2 5293 " add r2, r2, r2 \n" // 2 * (addr & 2) 5294 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2)) 5295 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15 5296 5297 " cmp r1, #0xA \n" // VA_BITS4_DEFINED 5298 " beq .LLV16LEh9 \n" // 5299 5300 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED 5301 " beq .LLV16LEc2 \n" // 5302 5303 ".LLV16LEc12: \n" // 5304 " push {r4, lr} \n" // 5305 " mov r2, #0 \n" // 5306 " mov r1, #16 \n" // 5307 " bl mc_LOADVn_slow \n" // 5308 " pop {r4, pc} \n" // 5309 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n" 5310 ".previous\n" 5311 ); 5312 5313 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 5314 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) 5315 __asm__( 5316 ".text\n" 5317 ".align 16\n" 5318 ".global vgMemCheck_helperc_LOADV16le\n" 5319 ".type vgMemCheck_helperc_LOADV16le, @function\n" 5320 "vgMemCheck_helperc_LOADV16le:\n" 5321 " test $0x1, %eax\n" 5322 " jne .LLV16LE5\n" /* jump if not aligned */ 5323 " mov %eax, %edx\n" 5324 " shr $0x10, %edx\n" 5325 " mov primary_map(,%edx,4), %ecx\n" 5326 " movzwl %ax, %edx\n" 5327 " shr $0x2, %edx\n" 5328 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */ 5329 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */ 5330 " jne .LLV16LE2\n" /* jump if not all 32bits defined */ 5331 ".LLV16LE1:\n" 5332 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */ 5333 " ret\n" 5334 ".LLV16LE2:\n" 5335 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */ 5336 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */ 5337 ".LLV16LE3:\n" 5338 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */ 5339 " ret\n" 5340 ".LLV16LE4:\n" 5341 " mov %eax, %ecx\n" 5342 " and $0x2, %ecx\n" 5343 " add %ecx, %ecx\n" 5344 " sar %cl, %edx\n" 5345 " and $0xf, %edx\n" 5346 " cmp $0xa, %edx\n" 5347 " je .LLV16LE1\n" /* jump if all 16bits are defined */ 5348 " cmp $0x5, %edx\n" 5349 " je .LLV16LE3\n" /* jump if all 16bits are undefined */ 5350 ".LLV16LE5:\n" 5351 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */ 5352 " mov $16, %edx\n" 5353 " jmp mc_LOADVn_slow\n" 5354 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n" 5355 ".previous\n" 5356 ); 5357 5358 #else 5359 // Generic for all platforms except {arm32,x86}-linux and x86-solaris 5360 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a ) 5361 { 5362 return mc_LOADV16(a, False); 5363 } 5364 #endif 5365 5366 /*------------------------------------------------------------*/ 5367 /*--- STOREV16 ---*/ 5368 /*------------------------------------------------------------*/ 5369 5370 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */ 5371 static INLINE 5372 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 ) 5373 { 5374 UInt shift; 5375 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 5376 shift = (a & 2) << 1; // shift by 0 or 4 5377 vabits8 >>= shift; // shift the four bits to the bottom 5378 // check 2 x vabits2 != VA_BITS2_NOACCESS 5379 return ((0x3 & vabits8) != VA_BITS2_NOACCESS) 5380 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2); 5381 } 5382 5383 static INLINE 5384 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian ) 5385 { 5386 PROF_EVENT(MCPE_STOREV16); 5387 5388 #ifndef PERF_FAST_STOREV 5389 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 5390 #else 5391 { 5392 UWord sm_off, vabits8; 5393 SecMap* sm; 5394 5395 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) { 5396 PROF_EVENT(MCPE_STOREV16_SLOW1); 5397 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 5398 return; 5399 } 5400 5401 sm = get_secmap_for_reading_low(a); 5402 sm_off = SM_OFF(a); 5403 vabits8 = sm->vabits8[sm_off]; 5404 5405 // To understand the below cleverness, see the extensive comments 5406 // in MC_(helperc_STOREV8). 5407 if (LIKELY(V_BITS16_DEFINED == vbits16)) { 5408 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) { 5409 return; 5410 } 5411 if (!is_distinguished_sm(sm) 5412 && accessible_vabits4_in_vabits8(a, vabits8)) { 5413 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED, 5414 &(sm->vabits8[sm_off]) ); 5415 return; 5416 } 5417 PROF_EVENT(MCPE_STOREV16_SLOW2); 5418 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 5419 } 5420 if (V_BITS16_UNDEFINED == vbits16) { 5421 if (vabits8 == VA_BITS8_UNDEFINED) { 5422 return; 5423 } 5424 if (!is_distinguished_sm(sm) 5425 && accessible_vabits4_in_vabits8(a, vabits8)) { 5426 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED, 5427 &(sm->vabits8[sm_off]) ); 5428 return; 5429 } 5430 PROF_EVENT(MCPE_STOREV16_SLOW3); 5431 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 5432 return; 5433 } 5434 5435 PROF_EVENT(MCPE_STOREV16_SLOW4); 5436 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian ); 5437 } 5438 #endif 5439 } 5440 5441 5442 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 ) 5443 { 5444 mc_STOREV16(a, vbits16, True); 5445 } 5446 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 ) 5447 { 5448 mc_STOREV16(a, vbits16, False); 5449 } 5450 5451 /*------------------------------------------------------------*/ 5452 /*--- LOADV8 ---*/ 5453 /*------------------------------------------------------------*/ 5454 5455 /* Note: endianness is irrelevant for size == 1 */ 5456 5457 // Non-generic assembly for arm32-linux 5458 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 5459 && defined(VGP_arm_linux) 5460 __asm__( /* Derived from NCode template */ 5461 ".text \n" 5462 ".align 2 \n" 5463 ".global vgMemCheck_helperc_LOADV8 \n" 5464 ".type vgMemCheck_helperc_LOADV8, %function \n" 5465 "vgMemCheck_helperc_LOADV8: \n" // 5466 " lsr r2, r0, #16 \n" // r2 = pri-map-ix 5467 " movw r3, #:lower16:primary_map \n" // 5468 " uxth r1, r0 \n" // r1 = sec-map-offB 5469 " movt r3, #:upper16:primary_map \n" // 5470 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map 5471 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8 5472 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED? 5473 " bne .LLV8c0 \n" // no, goto .LLV8c0 5474 ".LLV8h9: \n" // 5475 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe 5476 " bx lr \n" // 5477 ".LLV8c0: \n" // 5478 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED 5479 " bne .LLV8c4 \n" // 5480 ".LLV8c2: \n" // 5481 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe 5482 " bx lr \n" // 5483 ".LLV8c4: \n" // 5484 // r1 holds sec-map-VABITS8 5485 // r0 holds the address. Extract the relevant 2 bits and inspect. 5486 " and r2, r0, #3 \n" // addr & 3 5487 " add r2, r2, r2 \n" // 2 * (addr & 3) 5488 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3)) 5489 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3 5490 5491 " cmp r1, #2 \n" // VA_BITS2_DEFINED 5492 " beq .LLV8h9 \n" // 5493 5494 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED 5495 " beq .LLV8c2 \n" // 5496 5497 " push {r4, lr} \n" // 5498 " mov r2, #0 \n" // 5499 " mov r1, #8 \n" // 5500 " bl mc_LOADVn_slow \n" // 5501 " pop {r4, pc} \n" // 5502 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n" 5503 ".previous\n" 5504 ); 5505 5506 /* Non-generic assembly for x86-linux */ 5507 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ 5508 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) 5509 __asm__( 5510 ".text\n" 5511 ".align 16\n" 5512 ".global vgMemCheck_helperc_LOADV8\n" 5513 ".type vgMemCheck_helperc_LOADV8, @function\n" 5514 "vgMemCheck_helperc_LOADV8:\n" 5515 " mov %eax, %edx\n" 5516 " shr $0x10, %edx\n" 5517 " mov primary_map(,%edx,4), %ecx\n" 5518 " movzwl %ax, %edx\n" 5519 " shr $0x2, %edx\n" 5520 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */ 5521 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */ 5522 " jne .LLV8LE2\n" /* jump if not defined */ 5523 ".LLV8LE1:\n" 5524 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */ 5525 " ret\n" 5526 ".LLV8LE2:\n" 5527 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */ 5528 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */ 5529 ".LLV8LE3:\n" 5530 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */ 5531 " ret\n" 5532 ".LLV8LE4:\n" 5533 " mov %eax, %ecx\n" 5534 " and $0x3, %ecx\n" 5535 " add %ecx, %ecx\n" 5536 " sar %cl, %edx\n" 5537 " and $0x3, %edx\n" 5538 " cmp $0x2, %edx\n" 5539 " je .LLV8LE1\n" /* jump if all 8bits are defined */ 5540 " cmp $0x1, %edx\n" 5541 " je .LLV8LE3\n" /* jump if all 8bits are undefined */ 5542 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */ 5543 " mov $0x8, %edx\n" 5544 " jmp mc_LOADVn_slow\n" 5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n" 5546 ".previous\n" 5547 ); 5548 5549 #else 5550 // Generic for all platforms except {arm32,x86}-linux and x86-solaris 5551 VG_REGPARM(1) 5552 UWord MC_(helperc_LOADV8) ( Addr a ) 5553 { 5554 PROF_EVENT(MCPE_LOADV8); 5555 5556 #ifndef PERF_FAST_LOADV 5557 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 5558 #else 5559 { 5560 UWord sm_off, vabits8; 5561 SecMap* sm; 5562 5563 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) { 5564 PROF_EVENT(MCPE_LOADV8_SLOW1); 5565 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 5566 } 5567 5568 sm = get_secmap_for_reading_low(a); 5569 sm_off = SM_OFF(a); 5570 vabits8 = sm->vabits8[sm_off]; 5571 // Convert V bits from compact memory form to expanded register form 5572 // Handle common case quickly: a is mapped, and the entire 5573 // word32 it lives in is addressible. 5574 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; } 5575 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; } 5576 else { 5577 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check 5578 // the single byte. 5579 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8); 5580 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; } 5581 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; } 5582 else { 5583 /* Slow case: the byte is not all-defined or all-undefined. */ 5584 PROF_EVENT(MCPE_LOADV8_SLOW2); 5585 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ ); 5586 } 5587 } 5588 } 5589 #endif 5590 } 5591 #endif 5592 5593 /*------------------------------------------------------------*/ 5594 /*--- STOREV8 ---*/ 5595 /*------------------------------------------------------------*/ 5596 5597 VG_REGPARM(2) 5598 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 ) 5599 { 5600 PROF_EVENT(MCPE_STOREV8); 5601 5602 #ifndef PERF_FAST_STOREV 5603 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 5604 #else 5605 { 5606 UWord sm_off, vabits8; 5607 SecMap* sm; 5608 5609 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) { 5610 PROF_EVENT(MCPE_STOREV8_SLOW1); 5611 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 5612 return; 5613 } 5614 5615 sm = get_secmap_for_reading_low(a); 5616 sm_off = SM_OFF(a); 5617 vabits8 = sm->vabits8[sm_off]; 5618 5619 // Clevernesses to speed up storing V bits. 5620 // The 64/32/16 bit cases also have similar clevernesses, but it 5621 // works a little differently to the code below. 5622 // 5623 // Cleverness 1: sometimes we don't have to write the shadow memory at 5624 // all, if we can tell that what we want to write is the same as what is 5625 // already there. These cases are marked below as "defined on defined" and 5626 // "undefined on undefined". 5627 // 5628 // Cleverness 2: 5629 // We also avoid to call mc_STOREVn_slow if the V bits can directly 5630 // be written in the secondary map. V bits can be directly written 5631 // if 4 conditions are respected: 5632 // * The address for which V bits are written is naturally aligned 5633 // on 1 byte for STOREV8 (this is always true) 5634 // on 2 bytes for STOREV16 5635 // on 4 bytes for STOREV32 5636 // on 8 bytes for STOREV64. 5637 // * V bits being written are either fully defined or fully undefined. 5638 // (for partially defined V bits, V bits cannot be directly written, 5639 // as the secondary vbits table must be maintained). 5640 // * the secmap is not distinguished (distinguished maps cannot be 5641 // modified). 5642 // * the memory corresponding to the V bits being written is 5643 // accessible (if one or more bytes are not accessible, 5644 // we must call mc_STOREVn_slow in order to report accessibility 5645 // errors). 5646 // Note that for STOREV32 and STOREV64, it is too expensive 5647 // to verify the accessibility of each byte for the benefit it 5648 // brings. Instead, a quicker check is done by comparing to 5649 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility, 5650 // but misses some opportunity of direct modifications. 5651 // Checking each byte accessibility was measured for 5652 // STOREV32+perf tests and was slowing down all perf tests. 5653 // The cases corresponding to cleverness 2 are marked below as 5654 // "direct mod". 5655 if (LIKELY(V_BITS8_DEFINED == vbits8)) { 5656 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) { 5657 return; // defined on defined 5658 } 5659 if (!is_distinguished_sm(sm) 5660 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) { 5661 // direct mod 5662 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED, 5663 &(sm->vabits8[sm_off]) ); 5664 return; 5665 } 5666 PROF_EVENT(MCPE_STOREV8_SLOW2); 5667 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 5668 return; 5669 } 5670 if (V_BITS8_UNDEFINED == vbits8) { 5671 if (vabits8 == VA_BITS8_UNDEFINED) { 5672 return; // undefined on undefined 5673 } 5674 if (!is_distinguished_sm(sm) 5675 && (VA_BITS2_NOACCESS 5676 != extract_vabits2_from_vabits8(a, vabits8))) { 5677 // direct mod 5678 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED, 5679 &(sm->vabits8[sm_off]) ); 5680 return; 5681 } 5682 PROF_EVENT(MCPE_STOREV8_SLOW3); 5683 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 5684 return; 5685 } 5686 5687 // Partially defined word 5688 PROF_EVENT(MCPE_STOREV8_SLOW4); 5689 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ ); 5690 } 5691 #endif 5692 } 5693 5694 5695 /*------------------------------------------------------------*/ 5696 /*--- Functions called directly from generated code: ---*/ 5697 /*--- Value-check failure handlers. ---*/ 5698 /*------------------------------------------------------------*/ 5699 5700 /* Call these ones when an origin is available ... */ 5701 VG_REGPARM(1) 5702 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) { 5703 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin ); 5704 } 5705 5706 VG_REGPARM(1) 5707 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) { 5708 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin ); 5709 } 5710 5711 VG_REGPARM(1) 5712 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) { 5713 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin ); 5714 } 5715 5716 VG_REGPARM(1) 5717 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) { 5718 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin ); 5719 } 5720 5721 VG_REGPARM(2) 5722 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) { 5723 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin ); 5724 } 5725 5726 /* ... and these when an origin isn't available. */ 5727 5728 VG_REGPARM(0) 5729 void MC_(helperc_value_check0_fail_no_o) ( void ) { 5730 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ ); 5731 } 5732 5733 VG_REGPARM(0) 5734 void MC_(helperc_value_check1_fail_no_o) ( void ) { 5735 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ ); 5736 } 5737 5738 VG_REGPARM(0) 5739 void MC_(helperc_value_check4_fail_no_o) ( void ) { 5740 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ ); 5741 } 5742 5743 VG_REGPARM(0) 5744 void MC_(helperc_value_check8_fail_no_o) ( void ) { 5745 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ ); 5746 } 5747 5748 VG_REGPARM(1) 5749 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) { 5750 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ ); 5751 } 5752 5753 5754 /*------------------------------------------------------------*/ 5755 /*--- Metadata get/set functions, for client requests. ---*/ 5756 /*------------------------------------------------------------*/ 5757 5758 // Nb: this expands the V+A bits out into register-form V bits, even though 5759 // they're in memory. This is for backward compatibility, and because it's 5760 // probably what the user wants. 5761 5762 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment 5763 error [no longer used], 3 == addressing error. */ 5764 /* Nb: We used to issue various definedness/addressability errors from here, 5765 but we took them out because they ranged from not-very-helpful to 5766 downright annoying, and they complicated the error data structures. */ 5767 static Int mc_get_or_set_vbits_for_client ( 5768 Addr a, 5769 Addr vbits, 5770 SizeT szB, 5771 Bool setting, /* True <=> set vbits, False <=> get vbits */ 5772 Bool is_client_request /* True <=> real user request 5773 False <=> internal call from gdbserver */ 5774 ) 5775 { 5776 SizeT i; 5777 Bool ok; 5778 UChar vbits8; 5779 5780 /* Check that arrays are addressible before doing any getting/setting. 5781 vbits to be checked only for real user request. */ 5782 for (i = 0; i < szB; i++) { 5783 if (VA_BITS2_NOACCESS == get_vabits2(a + i) || 5784 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) { 5785 return 3; 5786 } 5787 } 5788 5789 /* Do the copy */ 5790 if (setting) { 5791 /* setting */ 5792 for (i = 0; i < szB; i++) { 5793 ok = set_vbits8(a + i, ((UChar*)vbits)[i]); 5794 tl_assert(ok); 5795 } 5796 } else { 5797 /* getting */ 5798 for (i = 0; i < szB; i++) { 5799 ok = get_vbits8(a + i, &vbits8); 5800 tl_assert(ok); 5801 ((UChar*)vbits)[i] = vbits8; 5802 } 5803 if (is_client_request) 5804 // The bytes in vbits[] have now been set, so mark them as such. 5805 MC_(make_mem_defined)(vbits, szB); 5806 } 5807 5808 return 1; 5809 } 5810 5811 5812 /*------------------------------------------------------------*/ 5813 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/ 5814 /*------------------------------------------------------------*/ 5815 5816 /* For the memory leak detector, say whether an entire 64k chunk of 5817 address space is possibly in use, or not. If in doubt return 5818 True. 5819 */ 5820 Bool MC_(is_within_valid_secondary) ( Addr a ) 5821 { 5822 SecMap* sm = maybe_get_secmap_for ( a ); 5823 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) { 5824 /* Definitely not in use. */ 5825 return False; 5826 } else { 5827 return True; 5828 } 5829 } 5830 5831 5832 /* For the memory leak detector, say whether or not a given word 5833 address is to be regarded as valid. */ 5834 Bool MC_(is_valid_aligned_word) ( Addr a ) 5835 { 5836 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8); 5837 tl_assert(VG_IS_WORD_ALIGNED(a)); 5838 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED) 5839 return False; 5840 if (sizeof(UWord) == 8) { 5841 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED) 5842 return False; 5843 } 5844 if (UNLIKELY(MC_(in_ignored_range)(a))) 5845 return False; 5846 else 5847 return True; 5848 } 5849 5850 5851 /*------------------------------------------------------------*/ 5852 /*--- Initialisation ---*/ 5853 /*------------------------------------------------------------*/ 5854 5855 static void init_shadow_memory ( void ) 5856 { 5857 Int i; 5858 SecMap* sm; 5859 5860 tl_assert(V_BIT_UNDEFINED == 1); 5861 tl_assert(V_BIT_DEFINED == 0); 5862 tl_assert(V_BITS8_UNDEFINED == 0xFF); 5863 tl_assert(V_BITS8_DEFINED == 0); 5864 5865 /* Build the 3 distinguished secondaries */ 5866 sm = &sm_distinguished[SM_DIST_NOACCESS]; 5867 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS; 5868 5869 sm = &sm_distinguished[SM_DIST_UNDEFINED]; 5870 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED; 5871 5872 sm = &sm_distinguished[SM_DIST_DEFINED]; 5873 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED; 5874 5875 /* Set up the primary map. */ 5876 /* These entries gradually get overwritten as the used address 5877 space expands. */ 5878 for (i = 0; i < N_PRIMARY_MAP; i++) 5879 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS]; 5880 5881 /* Auxiliary primary maps */ 5882 init_auxmap_L1_L2(); 5883 5884 /* auxmap_size = auxmap_used = 0; 5885 no ... these are statically initialised */ 5886 5887 /* Secondary V bit table */ 5888 secVBitTable = createSecVBitTable(); 5889 } 5890 5891 5892 /*------------------------------------------------------------*/ 5893 /*--- Sanity check machinery (permanently engaged) ---*/ 5894 /*------------------------------------------------------------*/ 5895 5896 static Bool mc_cheap_sanity_check ( void ) 5897 { 5898 n_sanity_cheap++; 5899 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK); 5900 /* Check for sane operating level */ 5901 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3) 5902 return False; 5903 /* nothing else useful we can rapidly check */ 5904 return True; 5905 } 5906 5907 static Bool mc_expensive_sanity_check ( void ) 5908 { 5909 Int i; 5910 Word n_secmaps_found; 5911 SecMap* sm; 5912 const HChar* errmsg; 5913 Bool bad = False; 5914 5915 if (0) VG_(printf)("expensive sanity check\n"); 5916 if (0) return True; 5917 5918 n_sanity_expensive++; 5919 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK); 5920 5921 /* Check for sane operating level */ 5922 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3) 5923 return False; 5924 5925 /* Check that the 3 distinguished SMs are still as they should be. */ 5926 5927 /* Check noaccess DSM. */ 5928 sm = &sm_distinguished[SM_DIST_NOACCESS]; 5929 for (i = 0; i < SM_CHUNKS; i++) 5930 if (sm->vabits8[i] != VA_BITS8_NOACCESS) 5931 bad = True; 5932 5933 /* Check undefined DSM. */ 5934 sm = &sm_distinguished[SM_DIST_UNDEFINED]; 5935 for (i = 0; i < SM_CHUNKS; i++) 5936 if (sm->vabits8[i] != VA_BITS8_UNDEFINED) 5937 bad = True; 5938 5939 /* Check defined DSM. */ 5940 sm = &sm_distinguished[SM_DIST_DEFINED]; 5941 for (i = 0; i < SM_CHUNKS; i++) 5942 if (sm->vabits8[i] != VA_BITS8_DEFINED) 5943 bad = True; 5944 5945 if (bad) { 5946 VG_(printf)("memcheck expensive sanity: " 5947 "distinguished_secondaries have changed\n"); 5948 return False; 5949 } 5950 5951 /* If we're not checking for undefined value errors, the secondary V bit 5952 * table should be empty. */ 5953 if (MC_(clo_mc_level) == 1) { 5954 if (0 != VG_(OSetGen_Size)(secVBitTable)) 5955 return False; 5956 } 5957 5958 /* check the auxiliary maps, very thoroughly */ 5959 n_secmaps_found = 0; 5960 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found ); 5961 if (errmsg) { 5962 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg); 5963 return False; 5964 } 5965 5966 /* n_secmaps_found is now the number referred to by the auxiliary 5967 primary map. Now add on the ones referred to by the main 5968 primary map. */ 5969 for (i = 0; i < N_PRIMARY_MAP; i++) { 5970 if (primary_map[i] == NULL) { 5971 bad = True; 5972 } else { 5973 if (!is_distinguished_sm(primary_map[i])) 5974 n_secmaps_found++; 5975 } 5976 } 5977 5978 /* check that the number of secmaps issued matches the number that 5979 are reachable (iow, no secmap leaks) */ 5980 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs)) 5981 bad = True; 5982 5983 if (bad) { 5984 VG_(printf)("memcheck expensive sanity: " 5985 "apparent secmap leakage\n"); 5986 return False; 5987 } 5988 5989 if (bad) { 5990 VG_(printf)("memcheck expensive sanity: " 5991 "auxmap covers wrong address space\n"); 5992 return False; 5993 } 5994 5995 /* there is only one pointer to each secmap (expensive) */ 5996 5997 return True; 5998 } 5999 6000 /*------------------------------------------------------------*/ 6001 /*--- Command line args ---*/ 6002 /*------------------------------------------------------------*/ 6003 6004 /* 31 Aug 2015: Vectorised code is now so widespread that 6005 --partial-loads-ok needs to be enabled by default on all platforms. 6006 Not doing so causes lots of false errors. */ 6007 Bool MC_(clo_partial_loads_ok) = True; 6008 Long MC_(clo_freelist_vol) = 20*1000*1000LL; 6009 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL; 6010 LeakCheckMode MC_(clo_leak_check) = LC_Summary; 6011 VgRes MC_(clo_leak_resolution) = Vg_HighRes; 6012 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached); 6013 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached); 6014 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString) 6015 | H2S( LchLength64) 6016 | H2S( LchNewArray) 6017 | H2S( LchMultipleInheritance); 6018 Bool MC_(clo_xtree_leak) = False; 6019 const HChar* MC_(clo_xtree_leak_file) = "xtleak.kcg.%p"; 6020 Bool MC_(clo_workaround_gcc296_bugs) = False; 6021 Int MC_(clo_malloc_fill) = -1; 6022 Int MC_(clo_free_fill) = -1; 6023 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free; 6024 Int MC_(clo_mc_level) = 2; 6025 Bool MC_(clo_show_mismatched_frees) = True; 6026 Bool MC_(clo_expensive_definedness_checks) = False; 6027 Bool MC_(clo_ignore_range_below_sp) = False; 6028 UInt MC_(clo_ignore_range_below_sp__first_offset) = 0; 6029 UInt MC_(clo_ignore_range_below_sp__last_offset) = 0; 6030 6031 static const HChar * MC_(parse_leak_heuristics_tokens) = 6032 "-,stdstring,length64,newarray,multipleinheritance"; 6033 /* The first heuristic value (LchNone) has no keyword, as this is 6034 a fake heuristic used to collect the blocks found without any 6035 heuristic. */ 6036 6037 static Bool mc_process_cmd_line_options(const HChar* arg) 6038 { 6039 const HChar* tmp_str; 6040 Int tmp_show; 6041 6042 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 ); 6043 6044 /* Set MC_(clo_mc_level): 6045 1 = A bit tracking only 6046 2 = A and V bit tracking, but no V bit origins 6047 3 = A and V bit tracking, and V bit origins 6048 6049 Do this by inspecting --undef-value-errors= and 6050 --track-origins=. Reject the case --undef-value-errors=no 6051 --track-origins=yes as meaningless. 6052 */ 6053 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) { 6054 if (MC_(clo_mc_level) == 3) { 6055 goto bad_level; 6056 } else { 6057 MC_(clo_mc_level) = 1; 6058 return True; 6059 } 6060 } 6061 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) { 6062 if (MC_(clo_mc_level) == 1) 6063 MC_(clo_mc_level) = 2; 6064 return True; 6065 } 6066 if (0 == VG_(strcmp)(arg, "--track-origins=no")) { 6067 if (MC_(clo_mc_level) == 3) 6068 MC_(clo_mc_level) = 2; 6069 return True; 6070 } 6071 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) { 6072 if (MC_(clo_mc_level) == 1) { 6073 goto bad_level; 6074 } else { 6075 MC_(clo_mc_level) = 3; 6076 return True; 6077 } 6078 } 6079 6080 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {} 6081 else if VG_USET_CLO(arg, "--errors-for-leak-kinds", 6082 MC_(parse_leak_kinds_tokens), 6083 MC_(clo_error_for_leak_kinds)) {} 6084 else if VG_USET_CLO(arg, "--show-leak-kinds", 6085 MC_(parse_leak_kinds_tokens), 6086 MC_(clo_show_leak_kinds)) {} 6087 else if VG_USET_CLO(arg, "--leak-check-heuristics", 6088 MC_(parse_leak_heuristics_tokens), 6089 MC_(clo_leak_check_heuristics)) {} 6090 else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) { 6091 if (tmp_show) { 6092 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)(); 6093 } else { 6094 MC_(clo_show_leak_kinds) &= ~R2S(Reachable); 6095 } 6096 } 6097 else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) { 6098 if (tmp_show) { 6099 MC_(clo_show_leak_kinds) |= R2S(Possible); 6100 } else { 6101 MC_(clo_show_leak_kinds) &= ~R2S(Possible); 6102 } 6103 } 6104 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs", 6105 MC_(clo_workaround_gcc296_bugs)) {} 6106 6107 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol), 6108 0, 10*1000*1000*1000LL) {} 6109 6110 else if VG_BINT_CLO(arg, "--freelist-big-blocks", 6111 MC_(clo_freelist_big_blocks), 6112 0, 10*1000*1000*1000LL) {} 6113 6114 else if VG_XACT_CLO(arg, "--leak-check=no", 6115 MC_(clo_leak_check), LC_Off) {} 6116 else if VG_XACT_CLO(arg, "--leak-check=summary", 6117 MC_(clo_leak_check), LC_Summary) {} 6118 else if VG_XACT_CLO(arg, "--leak-check=yes", 6119 MC_(clo_leak_check), LC_Full) {} 6120 else if VG_XACT_CLO(arg, "--leak-check=full", 6121 MC_(clo_leak_check), LC_Full) {} 6122 6123 else if VG_XACT_CLO(arg, "--leak-resolution=low", 6124 MC_(clo_leak_resolution), Vg_LowRes) {} 6125 else if VG_XACT_CLO(arg, "--leak-resolution=med", 6126 MC_(clo_leak_resolution), Vg_MedRes) {} 6127 else if VG_XACT_CLO(arg, "--leak-resolution=high", 6128 MC_(clo_leak_resolution), Vg_HighRes) {} 6129 6130 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) { 6131 Bool ok = parse_ignore_ranges(tmp_str); 6132 if (!ok) { 6133 VG_(message)(Vg_DebugMsg, 6134 "ERROR: --ignore-ranges: " 6135 "invalid syntax, or end <= start in range\n"); 6136 return False; 6137 } 6138 if (gIgnoredAddressRanges) { 6139 UInt i; 6140 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) { 6141 UWord val = IAR_INVALID; 6142 UWord key_min = ~(UWord)0; 6143 UWord key_max = (UWord)0; 6144 VG_(indexRangeMap)( &key_min, &key_max, &val, 6145 gIgnoredAddressRanges, i ); 6146 tl_assert(key_min <= key_max); 6147 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */ 6148 if (key_max - key_min > limit && val == IAR_CommandLine) { 6149 VG_(message)(Vg_DebugMsg, 6150 "ERROR: --ignore-ranges: suspiciously large range:\n"); 6151 VG_(message)(Vg_DebugMsg, 6152 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max, 6153 key_max - key_min + 1); 6154 return False; 6155 } 6156 } 6157 } 6158 } 6159 6160 else if VG_STR_CLO(arg, "--ignore-range-below-sp", tmp_str) { 6161 /* This seems at first a bit weird, but: in order to imply 6162 a non-wrapped-around address range, the first offset needs to be 6163 larger than the second one. For example 6164 --ignore-range-below-sp=8192,8189 6165 would cause accesses to in the range [SP-8192, SP-8189] to be 6166 ignored. */ 6167 UInt offs1 = 0, offs2 = 0; 6168 Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2); 6169 // Ensure we used all the text after the '=' sign. 6170 if (ok && *tmp_str != 0) ok = False; 6171 if (!ok) { 6172 VG_(message)(Vg_DebugMsg, 6173 "ERROR: --ignore-range-below-sp: invalid syntax. " 6174 " Expected \"...=decimalnumber-decimalnumber\".\n"); 6175 return False; 6176 } 6177 if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) { 6178 VG_(message)(Vg_DebugMsg, 6179 "ERROR: --ignore-range-below-sp: suspiciously large " 6180 "offset(s): %u and %u\n", offs1, offs2); 6181 return False; 6182 } 6183 if (offs1 <= offs2) { 6184 VG_(message)(Vg_DebugMsg, 6185 "ERROR: --ignore-range-below-sp: invalid offsets " 6186 "(the first must be larger): %u and %u\n", offs1, offs2); 6187 return False; 6188 } 6189 tl_assert(offs1 > offs2); 6190 if (offs1 - offs2 > 4096 /*arbitrary*/) { 6191 VG_(message)(Vg_DebugMsg, 6192 "ERROR: --ignore-range-below-sp: suspiciously large " 6193 "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2); 6194 return False; 6195 } 6196 MC_(clo_ignore_range_below_sp) = True; 6197 MC_(clo_ignore_range_below_sp__first_offset) = offs1; 6198 MC_(clo_ignore_range_below_sp__last_offset) = offs2; 6199 return True; 6200 } 6201 6202 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {} 6203 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {} 6204 6205 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc", 6206 MC_(clo_keep_stacktraces), KS_alloc) {} 6207 else if VG_XACT_CLO(arg, "--keep-stacktraces=free", 6208 MC_(clo_keep_stacktraces), KS_free) {} 6209 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free", 6210 MC_(clo_keep_stacktraces), KS_alloc_and_free) {} 6211 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free", 6212 MC_(clo_keep_stacktraces), KS_alloc_then_free) {} 6213 else if VG_XACT_CLO(arg, "--keep-stacktraces=none", 6214 MC_(clo_keep_stacktraces), KS_none) {} 6215 6216 else if VG_BOOL_CLO(arg, "--show-mismatched-frees", 6217 MC_(clo_show_mismatched_frees)) {} 6218 else if VG_BOOL_CLO(arg, "--expensive-definedness-checks", 6219 MC_(clo_expensive_definedness_checks)) {} 6220 6221 else if VG_BOOL_CLO(arg, "--xtree-leak", 6222 MC_(clo_xtree_leak)) {} 6223 else if VG_STR_CLO (arg, "--xtree-leak-file", 6224 MC_(clo_xtree_leak_file)) {} 6225 6226 else 6227 return VG_(replacement_malloc_process_cmd_line_option)(arg); 6228 6229 return True; 6230 6231 6232 bad_level: 6233 VG_(fmsg_bad_option)(arg, 6234 "--track-origins=yes has no effect when --undef-value-errors=no.\n"); 6235 } 6236 6237 static void mc_print_usage(void) 6238 { 6239 VG_(printf)( 6240 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n" 6241 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n" 6242 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n" 6243 " [definite,possible]\n" 6244 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n" 6245 " [definite,possible]\n" 6246 " where kind is one of:\n" 6247 " definite indirect possible reachable all none\n" 6248 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n" 6249 " improving leak search false positive [all]\n" 6250 " where heur is one of:\n" 6251 " stdstring length64 newarray multipleinheritance all none\n" 6252 " --show-reachable=yes same as --show-leak-kinds=all\n" 6253 " --show-reachable=no --show-possibly-lost=yes\n" 6254 " same as --show-leak-kinds=definite,possible\n" 6255 " --show-reachable=no --show-possibly-lost=no\n" 6256 " same as --show-leak-kinds=definite\n" 6257 " --xtree-leak=no|yes output leak result in xtree format? [no]\n" 6258 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n" 6259 " --undef-value-errors=no|yes check for undefined value errors [yes]\n" 6260 " --track-origins=no|yes show origins of undefined values? [no]\n" 6261 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n" 6262 " --expensive-definedness-checks=no|yes\n" 6263 " Use extra-precise definedness tracking [no]\n" 6264 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n" 6265 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n" 6266 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n" 6267 " Use --ignore-range-below-sp instead.\n" 6268 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n" 6269 " --ignore-range-below-sp=<number>-<number> do not report errors for\n" 6270 " accesses at the given offsets below SP\n" 6271 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n" 6272 " --free-fill=<hexnumber> fill free'd areas with given value\n" 6273 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n" 6274 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n" 6275 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n" 6276 ); 6277 } 6278 6279 static void mc_print_debug_usage(void) 6280 { 6281 VG_(printf)( 6282 " (none)\n" 6283 ); 6284 } 6285 6286 6287 /*------------------------------------------------------------*/ 6288 /*--- Client blocks ---*/ 6289 /*------------------------------------------------------------*/ 6290 6291 /* Client block management: 6292 6293 This is managed as an expanding array of client block descriptors. 6294 Indices of live descriptors are issued to the client, so it can ask 6295 to free them later. Therefore we cannot slide live entries down 6296 over dead ones. Instead we must use free/inuse flags and scan for 6297 an empty slot at allocation time. This in turn means allocation is 6298 relatively expensive, so we hope this does not happen too often. 6299 6300 An unused block has start == size == 0 6301 */ 6302 6303 /* type CGenBlock is defined in mc_include.h */ 6304 6305 /* This subsystem is self-initialising. */ 6306 static UWord cgb_size = 0; 6307 static UWord cgb_used = 0; 6308 static CGenBlock* cgbs = NULL; 6309 6310 /* Stats for this subsystem. */ 6311 static ULong cgb_used_MAX = 0; /* Max in use. */ 6312 static ULong cgb_allocs = 0; /* Number of allocs. */ 6313 static ULong cgb_discards = 0; /* Number of discards. */ 6314 static ULong cgb_search = 0; /* Number of searches. */ 6315 6316 6317 /* Get access to the client block array. */ 6318 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks, 6319 /*OUT*/UWord* nBlocks ) 6320 { 6321 *blocks = cgbs; 6322 *nBlocks = cgb_used; 6323 } 6324 6325 6326 static 6327 Int alloc_client_block ( void ) 6328 { 6329 UWord i, sz_new; 6330 CGenBlock* cgbs_new; 6331 6332 cgb_allocs++; 6333 6334 for (i = 0; i < cgb_used; i++) { 6335 cgb_search++; 6336 if (cgbs[i].start == 0 && cgbs[i].size == 0) 6337 return i; 6338 } 6339 6340 /* Not found. Try to allocate one at the end. */ 6341 if (cgb_used < cgb_size) { 6342 cgb_used++; 6343 return cgb_used-1; 6344 } 6345 6346 /* Ok, we have to allocate a new one. */ 6347 tl_assert(cgb_used == cgb_size); 6348 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size); 6349 6350 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) ); 6351 for (i = 0; i < cgb_used; i++) 6352 cgbs_new[i] = cgbs[i]; 6353 6354 if (cgbs != NULL) 6355 VG_(free)( cgbs ); 6356 cgbs = cgbs_new; 6357 6358 cgb_size = sz_new; 6359 cgb_used++; 6360 if (cgb_used > cgb_used_MAX) 6361 cgb_used_MAX = cgb_used; 6362 return cgb_used-1; 6363 } 6364 6365 6366 static void show_client_block_stats ( void ) 6367 { 6368 VG_(message)(Vg_DebugMsg, 6369 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n", 6370 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search 6371 ); 6372 } 6373 static void print_monitor_help ( void ) 6374 { 6375 VG_(gdb_printf) 6376 ( 6377 "\n" 6378 "memcheck monitor commands:\n" 6379 " xb <addr> [<len>]\n" 6380 " prints validity bits for <len> (or 1) bytes at <addr>\n" 6381 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n" 6382 " Then prints the bytes values below the corresponding validity bits\n" 6383 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n" 6384 " Example: xb 0x8049c78 10\n" 6385 " get_vbits <addr> [<len>]\n" 6386 " Similar to xb, but only prints the validity bytes by group of 4.\n" 6387 " make_memory [noaccess|undefined\n" 6388 " |defined|Definedifaddressable] <addr> [<len>]\n" 6389 " mark <len> (or 1) bytes at <addr> with the given accessibility\n" 6390 " check_memory [addressable|defined] <addr> [<len>]\n" 6391 " check that <len> (or 1) bytes at <addr> have the given accessibility\n" 6392 " and outputs a description of <addr>\n" 6393 " leak_check [full*|summary|xtleak]\n" 6394 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n" 6395 " [heuristics heur1,heur2,...]\n" 6396 " [increased*|changed|any]\n" 6397 " [unlimited*|limited <max_loss_records_output>]\n" 6398 " * = defaults\n" 6399 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n" 6400 " where kind is one of:\n" 6401 " definite indirect possible reachable all none\n" 6402 " where heur is one of:\n" 6403 " stdstring length64 newarray multipleinheritance all none*\n" 6404 " Examples: leak_check\n" 6405 " leak_check summary any\n" 6406 " leak_check full kinds indirect,possible\n" 6407 " leak_check full reachable any limited 100\n" 6408 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n" 6409 " [unlimited*|limited <max_blocks>]\n" 6410 " [heuristics heur1,heur2,...]\n" 6411 " after a leak search, shows the list of blocks of <loss_record_nr>\n" 6412 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n" 6413 " With heuristics, only shows the blocks found via heur1,heur2,...\n" 6414 " * = defaults\n" 6415 " who_points_at <addr> [<len>]\n" 6416 " shows places pointing inside <len> (default 1) bytes at <addr>\n" 6417 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n" 6418 " with len > 1, will also show \"interior pointers\")\n" 6419 " xtmemory [<filename>]\n" 6420 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n" 6421 "\n"); 6422 } 6423 6424 /* Print szB bytes at address, with a format similar to the gdb command 6425 x /<szB>xb address. 6426 res[i] == 1 indicates the corresponding byte is addressable. */ 6427 static void gdb_xb (Addr address, SizeT szB, Int res[]) 6428 { 6429 UInt i; 6430 6431 for (i = 0; i < szB; i++) { 6432 UInt bnr = i % 8; 6433 if (bnr == 0) { 6434 if (i != 0) 6435 VG_(printf) ("\n"); // Terminate previous line 6436 VG_(printf) ("%p:", (void*)(address+i)); 6437 } 6438 if (res[i] == 1) 6439 VG_(printf) ("\t0x%02x", *(UChar*)(address+i)); 6440 else 6441 VG_(printf) ("\t0x??"); 6442 } 6443 VG_(printf) ("\n"); // Terminate previous line 6444 } 6445 6446 6447 /* Returns the address of the next non space character, 6448 or address of the string terminator. */ 6449 static HChar* next_non_space (HChar *s) 6450 { 6451 while (*s && *s == ' ') 6452 s++; 6453 return s; 6454 } 6455 6456 /* Parse an integer slice, i.e. a single integer or a range of integer. 6457 Syntax is: 6458 <integer>[..<integer> ] 6459 (spaces are allowed before and/or after ..). 6460 Return True if range correctly parsed, False otherwise. */ 6461 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr, 6462 UInt *from, UInt *to) 6463 { 6464 HChar* wl; 6465 HChar *endptr; 6466 endptr = NULL;//// 6467 wl = VG_(strtok_r) (s, " ", saveptr); 6468 6469 /* slice must start with an integer. */ 6470 if (wl == NULL) { 6471 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n"); 6472 return False; 6473 } 6474 *from = VG_(strtoull10) (wl, &endptr); 6475 if (endptr == wl) { 6476 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n"); 6477 return False; 6478 } 6479 6480 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') { 6481 /* wl token is an integer terminating the string 6482 or else next token does not start with . 6483 In both cases, the slice is a single integer. */ 6484 *to = *from; 6485 return True; 6486 } 6487 6488 if (*endptr == '\0') { 6489 // iii .. => get the next token 6490 wl = VG_(strtok_r) (NULL, " .", saveptr); 6491 } else { 6492 // It must be iii.. 6493 if (*endptr != '.' && *(endptr+1) != '.') { 6494 VG_(gdb_printf) ("expecting slice <from>..<to>\n"); 6495 return False; 6496 } 6497 if ( *(endptr+2) == ' ') { 6498 // It must be iii.. jjj => get the next token 6499 wl = VG_(strtok_r) (NULL, " .", saveptr); 6500 } else { 6501 // It must be iii..jjj 6502 wl = endptr+2; 6503 } 6504 } 6505 6506 *to = VG_(strtoull10) (wl, &endptr); 6507 if (*endptr != '\0') { 6508 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n"); 6509 return False; 6510 } 6511 6512 if (*from > *to) { 6513 VG_(gdb_printf) ("<from> cannot be bigger than <to> " 6514 "in slice <from>..<to>\n"); 6515 return False; 6516 } 6517 6518 return True; 6519 } 6520 6521 /* return True if request recognised, False otherwise */ 6522 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req) 6523 { 6524 HChar* wcmd; 6525 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */ 6526 HChar *ssaveptr; 6527 6528 VG_(strcpy) (s, req); 6529 6530 wcmd = VG_(strtok_r) (s, " ", &ssaveptr); 6531 /* NB: if possible, avoid introducing a new command below which 6532 starts with the same first letter(s) as an already existing 6533 command. This ensures a shorter abbreviation for the user. */ 6534 switch (VG_(keyword_id) 6535 ("help get_vbits leak_check make_memory check_memory " 6536 "block_list who_points_at xb xtmemory", 6537 wcmd, kwd_report_duplicated_matches)) { 6538 case -2: /* multiple matches */ 6539 return True; 6540 case -1: /* not found */ 6541 return False; 6542 case 0: /* help */ 6543 print_monitor_help(); 6544 return True; 6545 case 1: { /* get_vbits */ 6546 Addr address; 6547 SizeT szB = 1; 6548 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) { 6549 UChar vbits; 6550 Int i; 6551 Int unaddressable = 0; 6552 for (i = 0; i < szB; i++) { 6553 Int res = mc_get_or_set_vbits_for_client 6554 (address+i, (Addr) &vbits, 1, 6555 False, /* get them */ 6556 False /* is client request */ ); 6557 /* we are before the first character on next line, print a \n. */ 6558 if ((i % 32) == 0 && i != 0) 6559 VG_(printf) ("\n"); 6560 /* we are before the next block of 4 starts, print a space. */ 6561 else if ((i % 4) == 0 && i != 0) 6562 VG_(printf) (" "); 6563 if (res == 1) { 6564 VG_(printf) ("%02x", vbits); 6565 } else { 6566 tl_assert(3 == res); 6567 unaddressable++; 6568 VG_(printf) ("__"); 6569 } 6570 } 6571 VG_(printf) ("\n"); 6572 if (unaddressable) { 6573 VG_(printf) 6574 ("Address %p len %lu has %d bytes unaddressable\n", 6575 (void *)address, szB, unaddressable); 6576 } 6577 } 6578 return True; 6579 } 6580 case 2: { /* leak_check */ 6581 Int err = 0; 6582 LeakCheckParams lcp; 6583 HChar* xt_filename = NULL; 6584 HChar* kw; 6585 6586 lcp.mode = LC_Full; 6587 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached); 6588 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search. 6589 lcp.heuristics = 0; 6590 lcp.deltamode = LCD_Increased; 6591 lcp.max_loss_records_output = 999999999; 6592 lcp.requested_by_monitor_command = True; 6593 lcp.xt_filename = NULL; 6594 6595 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr); 6596 kw != NULL; 6597 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) { 6598 switch (VG_(keyword_id) 6599 ("full summary xtleak " 6600 "kinds reachable possibleleak definiteleak " 6601 "heuristics " 6602 "increased changed any " 6603 "unlimited limited ", 6604 kw, kwd_report_all)) { 6605 case -2: err++; break; 6606 case -1: err++; break; 6607 case 0: /* full */ 6608 lcp.mode = LC_Full; break; 6609 case 1: /* summary */ 6610 lcp.mode = LC_Summary; break; 6611 case 2: /* xtleak */ 6612 lcp.mode = LC_Full; 6613 xt_filename 6614 = VG_(expand_file_name)("--xtleak-mc_main.c", 6615 "xtleak.kcg.%p.%n"); 6616 lcp.xt_filename = xt_filename; 6617 break; 6618 case 3: { /* kinds */ 6619 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr); 6620 if (wcmd == NULL 6621 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens), 6622 True/*allow_all*/, 6623 wcmd, 6624 &lcp.show_leak_kinds)) { 6625 VG_(gdb_printf) ("missing or malformed leak kinds set\n"); 6626 err++; 6627 } 6628 break; 6629 } 6630 case 4: /* reachable */ 6631 lcp.show_leak_kinds = MC_(all_Reachedness)(); 6632 break; 6633 case 5: /* possibleleak */ 6634 lcp.show_leak_kinds 6635 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached); 6636 break; 6637 case 6: /* definiteleak */ 6638 lcp.show_leak_kinds = R2S(Unreached); 6639 break; 6640 case 7: { /* heuristics */ 6641 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr); 6642 if (wcmd == NULL 6643 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens), 6644 True,/*allow_all*/ 6645 wcmd, 6646 &lcp.heuristics)) { 6647 VG_(gdb_printf) ("missing or malformed heuristics set\n"); 6648 err++; 6649 } 6650 break; 6651 } 6652 case 8: /* increased */ 6653 lcp.deltamode = LCD_Increased; break; 6654 case 9: /* changed */ 6655 lcp.deltamode = LCD_Changed; break; 6656 case 10: /* any */ 6657 lcp.deltamode = LCD_Any; break; 6658 case 11: /* unlimited */ 6659 lcp.max_loss_records_output = 999999999; break; 6660 case 12: { /* limited */ 6661 Int int_value; 6662 const HChar* endptr; 6663 6664 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr); 6665 if (wcmd == NULL) { 6666 int_value = 0; 6667 endptr = "empty"; /* to report an error below */ 6668 } else { 6669 HChar *the_end; 6670 int_value = VG_(strtoll10) (wcmd, &the_end); 6671 endptr = the_end; 6672 } 6673 if (*endptr != '\0') 6674 VG_(gdb_printf) ("missing or malformed integer value\n"); 6675 else if (int_value > 0) 6676 lcp.max_loss_records_output = (UInt) int_value; 6677 else 6678 VG_(gdb_printf) ("max_loss_records_output must be >= 1," 6679 " got %d\n", int_value); 6680 break; 6681 } 6682 default: 6683 tl_assert (0); 6684 } 6685 } 6686 if (!err) 6687 MC_(detect_memory_leaks)(tid, &lcp); 6688 if (xt_filename != NULL) 6689 VG_(free)(xt_filename); 6690 return True; 6691 } 6692 6693 case 3: { /* make_memory */ 6694 Addr address; 6695 SizeT szB = 1; 6696 Int kwdid = VG_(keyword_id) 6697 ("noaccess undefined defined Definedifaddressable", 6698 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all); 6699 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) 6700 return True; 6701 switch (kwdid) { 6702 case -2: break; 6703 case -1: break; 6704 case 0: MC_(make_mem_noaccess) (address, szB); break; 6705 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid, 6706 MC_OKIND_USER ); break; 6707 case 2: MC_(make_mem_defined) ( address, szB ); break; 6708 case 3: make_mem_defined_if_addressable ( address, szB ); break;; 6709 default: tl_assert(0); 6710 } 6711 return True; 6712 } 6713 6714 case 4: { /* check_memory */ 6715 Addr address; 6716 SizeT szB = 1; 6717 Addr bad_addr; 6718 UInt okind; 6719 const HChar* src; 6720 UInt otag; 6721 UInt ecu; 6722 ExeContext* origin_ec; 6723 MC_ReadResult res; 6724 6725 Int kwdid = VG_(keyword_id) 6726 ("addressable defined", 6727 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all); 6728 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) 6729 return True; 6730 switch (kwdid) { 6731 case -2: break; 6732 case -1: break; 6733 case 0: /* addressable */ 6734 if (is_mem_addressable ( address, szB, &bad_addr )) 6735 VG_(printf) ("Address %p len %lu addressable\n", 6736 (void *)address, szB); 6737 else 6738 VG_(printf) 6739 ("Address %p len %lu not addressable:\nbad address %p\n", 6740 (void *)address, szB, (void *) bad_addr); 6741 MC_(pp_describe_addr) (address); 6742 break; 6743 case 1: /* defined */ 6744 res = is_mem_defined ( address, szB, &bad_addr, &otag ); 6745 if (MC_AddrErr == res) 6746 VG_(printf) 6747 ("Address %p len %lu not addressable:\nbad address %p\n", 6748 (void *)address, szB, (void *) bad_addr); 6749 else if (MC_ValueErr == res) { 6750 okind = otag & 3; 6751 switch (okind) { 6752 case MC_OKIND_STACK: 6753 src = " was created by a stack allocation"; break; 6754 case MC_OKIND_HEAP: 6755 src = " was created by a heap allocation"; break; 6756 case MC_OKIND_USER: 6757 src = " was created by a client request"; break; 6758 case MC_OKIND_UNKNOWN: 6759 src = ""; break; 6760 default: tl_assert(0); 6761 } 6762 VG_(printf) 6763 ("Address %p len %lu not defined:\n" 6764 "Uninitialised value at %p%s\n", 6765 (void *)address, szB, (void *) bad_addr, src); 6766 ecu = otag & ~3; 6767 if (VG_(is_plausible_ECU)(ecu)) { 6768 origin_ec = VG_(get_ExeContext_from_ECU)( ecu ); 6769 VG_(pp_ExeContext)( origin_ec ); 6770 } 6771 } 6772 else 6773 VG_(printf) ("Address %p len %lu defined\n", 6774 (void *)address, szB); 6775 MC_(pp_describe_addr) (address); 6776 break; 6777 default: tl_assert(0); 6778 } 6779 return True; 6780 } 6781 6782 case 5: { /* block_list */ 6783 HChar* wl; 6784 HChar *the_end; 6785 UInt lr_nr_from = 0; 6786 UInt lr_nr_to = 0; 6787 6788 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) { 6789 UInt limit_blocks = 999999999; 6790 Int int_value; 6791 UInt heuristics = 0; 6792 6793 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr); 6794 wl != NULL; 6795 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) { 6796 switch (VG_(keyword_id) ("unlimited limited heuristics ", 6797 wl, kwd_report_all)) { 6798 case -2: return True; 6799 case -1: return True; 6800 case 0: /* unlimited */ 6801 limit_blocks = 999999999; break; 6802 case 1: /* limited */ 6803 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr); 6804 if (wcmd == NULL) { 6805 VG_(gdb_printf) ("missing integer value\n"); 6806 return True; 6807 } 6808 int_value = VG_(strtoll10) (wcmd, &the_end); 6809 if (*the_end != '\0') { 6810 VG_(gdb_printf) ("malformed integer value\n"); 6811 return True; 6812 } 6813 if (int_value <= 0) { 6814 VG_(gdb_printf) ("max_blocks must be >= 1," 6815 " got %d\n", int_value); 6816 return True; 6817 } 6818 limit_blocks = (UInt) int_value; 6819 break; 6820 case 2: /* heuristics */ 6821 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr); 6822 if (wcmd == NULL 6823 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens), 6824 True,/*allow_all*/ 6825 wcmd, 6826 &heuristics)) { 6827 VG_(gdb_printf) ("missing or malformed heuristics set\n"); 6828 return True; 6829 } 6830 break; 6831 default: 6832 tl_assert (0); 6833 } 6834 } 6835 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user 6836 is 1 more than the index in lr_array. */ 6837 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1, 6838 lr_nr_to-1, 6839 limit_blocks, 6840 heuristics)) 6841 VG_(gdb_printf) ("invalid loss record nr\n"); 6842 } 6843 return True; 6844 } 6845 6846 case 6: { /* who_points_at */ 6847 Addr address; 6848 SizeT szB = 1; 6849 6850 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) 6851 return True; 6852 if (address == (Addr) 0) { 6853 VG_(gdb_printf) ("Cannot search who points at 0x0\n"); 6854 return True; 6855 } 6856 MC_(who_points_at) (address, szB); 6857 return True; 6858 } 6859 6860 case 7: { /* xb */ 6861 Addr address; 6862 SizeT szB = 1; 6863 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) { 6864 UChar vbits[8]; 6865 Int res[8]; 6866 Int i; 6867 Int unaddressable = 0; 6868 for (i = 0; i < szB; i++) { 6869 Int bnr = i % 8; 6870 res[bnr] = mc_get_or_set_vbits_for_client 6871 (address+i, (Addr) &vbits[bnr], 1, 6872 False, /* get them */ 6873 False /* is client request */ ); 6874 /* We going to print the first vabits of a new line. 6875 Terminate the previous line if needed: prints a line with the 6876 address and the data. */ 6877 if (bnr == 0) { 6878 if (i != 0) { 6879 VG_(printf) ("\n"); 6880 gdb_xb (address + i - 8, 8, res); 6881 } 6882 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout 6883 } 6884 if (res[bnr] == 1) { 6885 VG_(printf) ("\t %02x", vbits[bnr]); 6886 } else { 6887 tl_assert(3 == res[bnr]); 6888 unaddressable++; 6889 VG_(printf) ("\t __"); 6890 } 6891 } 6892 VG_(printf) ("\n"); 6893 if (szB % 8 == 0 && szB > 0) 6894 gdb_xb (address + szB - 8, 8, res); 6895 else 6896 gdb_xb (address + szB - szB % 8, szB % 8, res); 6897 if (unaddressable) { 6898 VG_(printf) 6899 ("Address %p len %lu has %d bytes unaddressable\n", 6900 (void *)address, szB, unaddressable); 6901 } 6902 } 6903 return True; 6904 } 6905 6906 case 8: { /* xtmemory */ 6907 HChar* filename; 6908 filename = VG_(strtok_r) (NULL, " ", &ssaveptr); 6909 MC_(xtmemory_report)(filename, False); 6910 return True; 6911 } 6912 6913 default: 6914 tl_assert(0); 6915 return False; 6916 } 6917 } 6918 6919 /*------------------------------------------------------------*/ 6920 /*--- Client requests ---*/ 6921 /*------------------------------------------------------------*/ 6922 6923 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret ) 6924 { 6925 Int i; 6926 Addr bad_addr; 6927 6928 if (!VG_IS_TOOL_USERREQ('M','C',arg[0]) 6929 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0] 6930 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0] 6931 && VG_USERREQ__FREELIKE_BLOCK != arg[0] 6932 && VG_USERREQ__CREATE_MEMPOOL != arg[0] 6933 && VG_USERREQ__DESTROY_MEMPOOL != arg[0] 6934 && VG_USERREQ__MEMPOOL_ALLOC != arg[0] 6935 && VG_USERREQ__MEMPOOL_FREE != arg[0] 6936 && VG_USERREQ__MEMPOOL_TRIM != arg[0] 6937 && VG_USERREQ__MOVE_MEMPOOL != arg[0] 6938 && VG_USERREQ__MEMPOOL_CHANGE != arg[0] 6939 && VG_USERREQ__MEMPOOL_EXISTS != arg[0] 6940 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0] 6941 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0] 6942 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]) 6943 return False; 6944 6945 switch (arg[0]) { 6946 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: { 6947 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr ); 6948 if (!ok) 6949 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 ); 6950 *ret = ok ? (UWord)NULL : bad_addr; 6951 break; 6952 } 6953 6954 case VG_USERREQ__CHECK_MEM_IS_DEFINED: { 6955 Bool errorV = False; 6956 Addr bad_addrV = 0; 6957 UInt otagV = 0; 6958 Bool errorA = False; 6959 Addr bad_addrA = 0; 6960 is_mem_defined_comprehensive( 6961 arg[1], arg[2], 6962 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA 6963 ); 6964 if (errorV) { 6965 MC_(record_user_error) ( tid, bad_addrV, 6966 /*isAddrErr*/False, otagV ); 6967 } 6968 if (errorA) { 6969 MC_(record_user_error) ( tid, bad_addrA, 6970 /*isAddrErr*/True, 0 ); 6971 } 6972 /* Return the lower of the two erring addresses, if any. */ 6973 *ret = 0; 6974 if (errorV && !errorA) { 6975 *ret = bad_addrV; 6976 } 6977 if (!errorV && errorA) { 6978 *ret = bad_addrA; 6979 } 6980 if (errorV && errorA) { 6981 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA; 6982 } 6983 break; 6984 } 6985 6986 case VG_USERREQ__DO_LEAK_CHECK: { 6987 LeakCheckParams lcp; 6988 6989 if (arg[1] == 0) 6990 lcp.mode = LC_Full; 6991 else if (arg[1] == 1) 6992 lcp.mode = LC_Summary; 6993 else { 6994 VG_(message)(Vg_UserMsg, 6995 "Warning: unknown memcheck leak search mode\n"); 6996 lcp.mode = LC_Full; 6997 } 6998 6999 lcp.show_leak_kinds = MC_(clo_show_leak_kinds); 7000 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds); 7001 lcp.heuristics = MC_(clo_leak_check_heuristics); 7002 7003 if (arg[2] == 0) 7004 lcp.deltamode = LCD_Any; 7005 else if (arg[2] == 1) 7006 lcp.deltamode = LCD_Increased; 7007 else if (arg[2] == 2) 7008 lcp.deltamode = LCD_Changed; 7009 else { 7010 VG_(message) 7011 (Vg_UserMsg, 7012 "Warning: unknown memcheck leak search deltamode\n"); 7013 lcp.deltamode = LCD_Any; 7014 } 7015 lcp.max_loss_records_output = 999999999; 7016 lcp.requested_by_monitor_command = False; 7017 lcp.xt_filename = NULL; 7018 7019 MC_(detect_memory_leaks)(tid, &lcp); 7020 *ret = 0; /* return value is meaningless */ 7021 break; 7022 } 7023 7024 case VG_USERREQ__MAKE_MEM_NOACCESS: 7025 MC_(make_mem_noaccess) ( arg[1], arg[2] ); 7026 *ret = -1; 7027 break; 7028 7029 case VG_USERREQ__MAKE_MEM_UNDEFINED: 7030 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid, 7031 MC_OKIND_USER ); 7032 *ret = -1; 7033 break; 7034 7035 case VG_USERREQ__MAKE_MEM_DEFINED: 7036 MC_(make_mem_defined) ( arg[1], arg[2] ); 7037 *ret = -1; 7038 break; 7039 7040 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE: 7041 make_mem_defined_if_addressable ( arg[1], arg[2] ); 7042 *ret = -1; 7043 break; 7044 7045 case VG_USERREQ__CREATE_BLOCK: /* describe a block */ 7046 if (arg[1] != 0 && arg[2] != 0) { 7047 i = alloc_client_block(); 7048 /* VG_(printf)("allocated %d %p\n", i, cgbs); */ 7049 cgbs[i].start = arg[1]; 7050 cgbs[i].size = arg[2]; 7051 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]); 7052 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ ); 7053 *ret = i; 7054 } else 7055 *ret = -1; 7056 break; 7057 7058 case VG_USERREQ__DISCARD: /* discard */ 7059 if (cgbs == NULL 7060 || arg[2] >= cgb_used || 7061 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) { 7062 *ret = 1; 7063 } else { 7064 tl_assert(arg[2] >= 0 && arg[2] < cgb_used); 7065 cgbs[arg[2]].start = cgbs[arg[2]].size = 0; 7066 VG_(free)(cgbs[arg[2]].desc); 7067 cgb_discards++; 7068 *ret = 0; 7069 } 7070 break; 7071 7072 case VG_USERREQ__GET_VBITS: 7073 *ret = mc_get_or_set_vbits_for_client 7074 ( arg[1], arg[2], arg[3], 7075 False /* get them */, 7076 True /* is client request */ ); 7077 break; 7078 7079 case VG_USERREQ__SET_VBITS: 7080 *ret = mc_get_or_set_vbits_for_client 7081 ( arg[1], arg[2], arg[3], 7082 True /* set them */, 7083 True /* is client request */ ); 7084 break; 7085 7086 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */ 7087 UWord** argp = (UWord**)arg; 7088 // MC_(bytes_leaked) et al were set by the last leak check (or zero 7089 // if no prior leak checks performed). 7090 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect); 7091 *argp[2] = MC_(bytes_dubious); 7092 *argp[3] = MC_(bytes_reachable); 7093 *argp[4] = MC_(bytes_suppressed); 7094 // there is no argp[5] 7095 //*argp[5] = MC_(bytes_indirect); 7096 // XXX need to make *argp[1-4] defined; currently done in the 7097 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero. 7098 *ret = 0; 7099 return True; 7100 } 7101 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */ 7102 UWord** argp = (UWord**)arg; 7103 // MC_(blocks_leaked) et al were set by the last leak check (or zero 7104 // if no prior leak checks performed). 7105 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect); 7106 *argp[2] = MC_(blocks_dubious); 7107 *argp[3] = MC_(blocks_reachable); 7108 *argp[4] = MC_(blocks_suppressed); 7109 // there is no argp[5] 7110 //*argp[5] = MC_(blocks_indirect); 7111 // XXX need to make *argp[1-4] defined; currently done in the 7112 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero. 7113 *ret = 0; 7114 return True; 7115 } 7116 case VG_USERREQ__MALLOCLIKE_BLOCK: { 7117 Addr p = (Addr)arg[1]; 7118 SizeT sizeB = arg[2]; 7119 UInt rzB = arg[3]; 7120 Bool is_zeroed = (Bool)arg[4]; 7121 7122 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed, 7123 MC_AllocCustom, MC_(malloc_list) ); 7124 if (rzB > 0) { 7125 MC_(make_mem_noaccess) ( p - rzB, rzB); 7126 MC_(make_mem_noaccess) ( p + sizeB, rzB); 7127 } 7128 return True; 7129 } 7130 case VG_USERREQ__RESIZEINPLACE_BLOCK: { 7131 Addr p = (Addr)arg[1]; 7132 SizeT oldSizeB = arg[2]; 7133 SizeT newSizeB = arg[3]; 7134 UInt rzB = arg[4]; 7135 7136 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB ); 7137 return True; 7138 } 7139 case VG_USERREQ__FREELIKE_BLOCK: { 7140 Addr p = (Addr)arg[1]; 7141 UInt rzB = arg[2]; 7142 7143 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom ); 7144 return True; 7145 } 7146 7147 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: { 7148 HChar* s = (HChar*)arg[1]; 7149 Addr dst = (Addr) arg[2]; 7150 Addr src = (Addr) arg[3]; 7151 SizeT len = (SizeT)arg[4]; 7152 MC_(record_overlap_error)(tid, s, src, dst, len); 7153 return True; 7154 } 7155 7156 case VG_USERREQ__CREATE_MEMPOOL: { 7157 Addr pool = (Addr)arg[1]; 7158 UInt rzB = arg[2]; 7159 Bool is_zeroed = (Bool)arg[3]; 7160 UInt flags = arg[4]; 7161 7162 // The create_mempool function does not know these mempool flags, 7163 // pass as booleans. 7164 MC_(create_mempool) ( pool, rzB, is_zeroed, 7165 (flags & VALGRIND_MEMPOOL_AUTO_FREE), 7166 (flags & VALGRIND_MEMPOOL_METAPOOL) ); 7167 return True; 7168 } 7169 7170 case VG_USERREQ__DESTROY_MEMPOOL: { 7171 Addr pool = (Addr)arg[1]; 7172 7173 MC_(destroy_mempool) ( pool ); 7174 return True; 7175 } 7176 7177 case VG_USERREQ__MEMPOOL_ALLOC: { 7178 Addr pool = (Addr)arg[1]; 7179 Addr addr = (Addr)arg[2]; 7180 UInt size = arg[3]; 7181 7182 MC_(mempool_alloc) ( tid, pool, addr, size ); 7183 return True; 7184 } 7185 7186 case VG_USERREQ__MEMPOOL_FREE: { 7187 Addr pool = (Addr)arg[1]; 7188 Addr addr = (Addr)arg[2]; 7189 7190 MC_(mempool_free) ( pool, addr ); 7191 return True; 7192 } 7193 7194 case VG_USERREQ__MEMPOOL_TRIM: { 7195 Addr pool = (Addr)arg[1]; 7196 Addr addr = (Addr)arg[2]; 7197 UInt size = arg[3]; 7198 7199 MC_(mempool_trim) ( pool, addr, size ); 7200 return True; 7201 } 7202 7203 case VG_USERREQ__MOVE_MEMPOOL: { 7204 Addr poolA = (Addr)arg[1]; 7205 Addr poolB = (Addr)arg[2]; 7206 7207 MC_(move_mempool) ( poolA, poolB ); 7208 return True; 7209 } 7210 7211 case VG_USERREQ__MEMPOOL_CHANGE: { 7212 Addr pool = (Addr)arg[1]; 7213 Addr addrA = (Addr)arg[2]; 7214 Addr addrB = (Addr)arg[3]; 7215 UInt size = arg[4]; 7216 7217 MC_(mempool_change) ( pool, addrA, addrB, size ); 7218 return True; 7219 } 7220 7221 case VG_USERREQ__MEMPOOL_EXISTS: { 7222 Addr pool = (Addr)arg[1]; 7223 7224 *ret = (UWord) MC_(mempool_exists) ( pool ); 7225 return True; 7226 } 7227 7228 case VG_USERREQ__GDB_MONITOR_COMMAND: { 7229 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]); 7230 if (handled) 7231 *ret = 1; 7232 else 7233 *ret = 0; 7234 return handled; 7235 } 7236 7237 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE: 7238 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: { 7239 Bool addRange 7240 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE; 7241 Bool ok 7242 = modify_ignore_ranges(addRange, arg[1], arg[2]); 7243 *ret = ok ? 1 : 0; 7244 return True; 7245 } 7246 7247 default: 7248 VG_(message)( 7249 Vg_UserMsg, 7250 "Warning: unknown memcheck client request code %llx\n", 7251 (ULong)arg[0] 7252 ); 7253 return False; 7254 } 7255 return True; 7256 } 7257 7258 7259 /*------------------------------------------------------------*/ 7260 /*--- Crude profiling machinery. ---*/ 7261 /*------------------------------------------------------------*/ 7262 7263 // We track a number of interesting events (using PROF_EVENT) 7264 // if MC_PROFILE_MEMORY is defined. 7265 7266 #ifdef MC_PROFILE_MEMORY 7267 7268 ULong MC_(event_ctr)[MCPE_LAST]; 7269 7270 /* Event counter names. Use the name of the function that increases the 7271 event counter. Drop any MC_() and mc_ prefices. */ 7272 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = { 7273 [MCPE_LOADVN_SLOW] = "LOADVn_slow", 7274 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop", 7275 [MCPE_STOREVN_SLOW] = "STOREVn_slow", 7276 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)", 7277 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined", 7278 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] = 7279 "make_aligned_word32_undefined_slow", 7280 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined", 7281 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] = 7282 "make_aligned_word64_undefined_slow", 7283 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess", 7284 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] = 7285 "make_aligned_word32_noaccess_slow", 7286 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess", 7287 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] = 7288 "make_aligned_word64_noaccess_slow", 7289 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess", 7290 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined", 7291 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag", 7292 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined", 7293 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check", 7294 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check", 7295 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state", 7296 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)", 7297 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)", 7298 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess", 7299 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)", 7300 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable", 7301 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)", 7302 [MCPE_IS_MEM_DEFINED] = "is_mem_defined", 7303 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)", 7304 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive", 7305 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] = 7306 "is_mem_defined_comprehensive(loop)", 7307 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz", 7308 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)", 7309 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD", 7310 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)", 7311 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms", 7312 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] = 7313 "set_address_range_perms(single-secmap)", 7314 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] = 7315 "set_address_range_perms(startof-secmap)", 7316 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] = 7317 "set_address_range_perms(multiple-secmaps)", 7318 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] = 7319 "set_address_range_perms(dist-sm1)", 7320 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] = 7321 "set_address_range_perms(dist-sm2)", 7322 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] = 7323 "set_address_range_perms(dist-sm1-quick)", 7324 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] = 7325 "set_address_range_perms(dist-sm2-quick)", 7326 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)", 7327 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)", 7328 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)", 7329 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)", 7330 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)", 7331 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)", 7332 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] = 7333 "set_address_range_perms(loop64K-free-dist-sm)", 7334 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)", 7335 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256", 7336 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1", 7337 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2", 7338 [MCPE_LOADV64] = "LOADV64", 7339 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1", 7340 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2", 7341 [MCPE_STOREV64] = "STOREV64", 7342 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1", 7343 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2", 7344 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3", 7345 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4", 7346 [MCPE_LOADV32] = "LOADV32", 7347 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1", 7348 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2", 7349 [MCPE_STOREV32] = "STOREV32", 7350 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1", 7351 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2", 7352 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3", 7353 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4", 7354 [MCPE_LOADV16] = "LOADV16", 7355 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1", 7356 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2", 7357 [MCPE_STOREV16] = "STOREV16", 7358 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1", 7359 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2", 7360 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3", 7361 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4", 7362 [MCPE_LOADV8] = "LOADV8", 7363 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1", 7364 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2", 7365 [MCPE_STOREV8] = "STOREV8", 7366 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1", 7367 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2", 7368 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3", 7369 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4", 7370 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4", 7371 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8", 7372 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12", 7373 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16", 7374 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32", 7375 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112", 7376 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128", 7377 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144", 7378 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160", 7379 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4", 7380 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8", 7381 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12", 7382 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16", 7383 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32", 7384 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112", 7385 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128", 7386 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144", 7387 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160", 7388 [MCPE_NEW_MEM_STACK] = "new_mem_stack", 7389 [MCPE_DIE_MEM_STACK] = "die_mem_stack", 7390 [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o", 7391 [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o", 7392 [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o", 7393 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16] 7394 = "MAKE_STACK_UNINIT_128_no_o_aligned_16", 7395 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8] 7396 = "MAKE_STACK_UNINIT_128_no_o_aligned_8", 7397 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE] 7398 = "MAKE_STACK_UNINIT_128_no_o_slowcase", 7399 }; 7400 7401 static void init_prof_mem ( void ) 7402 { 7403 Int i, name_count = 0; 7404 7405 for (i = 0; i < MCPE_LAST; i++) { 7406 MC_(event_ctr)[i] = 0; 7407 if (MC_(event_ctr_name)[i] != NULL) 7408 ++name_count; 7409 } 7410 7411 /* Make sure every profiling event has a name */ 7412 tl_assert(name_count == MCPE_LAST); 7413 } 7414 7415 static void done_prof_mem ( void ) 7416 { 7417 Int i, n; 7418 Bool spaced = False; 7419 for (i = n = 0; i < MCPE_LAST; i++) { 7420 if (!spaced && (n % 10) == 0) { 7421 VG_(printf)("\n"); 7422 spaced = True; 7423 } 7424 if (MC_(event_ctr)[i] > 0) { 7425 spaced = False; 7426 ++n; 7427 VG_(printf)( "prof mem event %3d: %11llu %s\n", 7428 i, MC_(event_ctr)[i], 7429 MC_(event_ctr_name)[i]); 7430 } 7431 } 7432 } 7433 7434 #else 7435 7436 static void init_prof_mem ( void ) { } 7437 static void done_prof_mem ( void ) { } 7438 7439 #endif 7440 7441 7442 /*------------------------------------------------------------*/ 7443 /*--- Origin tracking stuff ---*/ 7444 /*------------------------------------------------------------*/ 7445 7446 /*--------------------------------------------*/ 7447 /*--- Origin tracking: load handlers ---*/ 7448 /*--------------------------------------------*/ 7449 7450 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) { 7451 return or1 > or2 ? or1 : or2; 7452 } 7453 7454 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) { 7455 OCacheLine* line; 7456 UChar descr; 7457 UWord lineoff = oc_line_offset(a); 7458 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */ 7459 7460 if (OC_ENABLE_ASSERTIONS) { 7461 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 7462 } 7463 7464 line = find_OCacheLine( a ); 7465 7466 descr = line->descr[lineoff]; 7467 if (OC_ENABLE_ASSERTIONS) { 7468 tl_assert(descr < 0x10); 7469 } 7470 7471 if (LIKELY(0 == (descr & (1 << byteoff)))) { 7472 return 0; 7473 } else { 7474 return line->w32[lineoff]; 7475 } 7476 } 7477 7478 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) { 7479 OCacheLine* line; 7480 UChar descr; 7481 UWord lineoff, byteoff; 7482 7483 if (UNLIKELY(a & 1)) { 7484 /* Handle misaligned case, slowly. */ 7485 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 ); 7486 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 ); 7487 return merge_origins(oLo, oHi); 7488 } 7489 7490 lineoff = oc_line_offset(a); 7491 byteoff = a & 3; /* 0 or 2 */ 7492 7493 if (OC_ENABLE_ASSERTIONS) { 7494 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 7495 } 7496 line = find_OCacheLine( a ); 7497 7498 descr = line->descr[lineoff]; 7499 if (OC_ENABLE_ASSERTIONS) { 7500 tl_assert(descr < 0x10); 7501 } 7502 7503 if (LIKELY(0 == (descr & (3 << byteoff)))) { 7504 return 0; 7505 } else { 7506 return line->w32[lineoff]; 7507 } 7508 } 7509 7510 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) { 7511 OCacheLine* line; 7512 UChar descr; 7513 UWord lineoff; 7514 7515 if (UNLIKELY(a & 3)) { 7516 /* Handle misaligned case, slowly. */ 7517 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 ); 7518 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 ); 7519 return merge_origins(oLo, oHi); 7520 } 7521 7522 lineoff = oc_line_offset(a); 7523 if (OC_ENABLE_ASSERTIONS) { 7524 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 7525 } 7526 7527 line = find_OCacheLine( a ); 7528 7529 descr = line->descr[lineoff]; 7530 if (OC_ENABLE_ASSERTIONS) { 7531 tl_assert(descr < 0x10); 7532 } 7533 7534 if (LIKELY(0 == descr)) { 7535 return 0; 7536 } else { 7537 return line->w32[lineoff]; 7538 } 7539 } 7540 7541 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) { 7542 OCacheLine* line; 7543 UChar descrLo, descrHi, descr; 7544 UWord lineoff; 7545 7546 if (UNLIKELY(a & 7)) { 7547 /* Handle misaligned case, slowly. */ 7548 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 ); 7549 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 ); 7550 return merge_origins(oLo, oHi); 7551 } 7552 7553 lineoff = oc_line_offset(a); 7554 if (OC_ENABLE_ASSERTIONS) { 7555 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/ 7556 } 7557 7558 line = find_OCacheLine( a ); 7559 7560 descrLo = line->descr[lineoff + 0]; 7561 descrHi = line->descr[lineoff + 1]; 7562 descr = descrLo | descrHi; 7563 if (OC_ENABLE_ASSERTIONS) { 7564 tl_assert(descr < 0x10); 7565 } 7566 7567 if (LIKELY(0 == descr)) { 7568 return 0; /* both 32-bit chunks are defined */ 7569 } else { 7570 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0]; 7571 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1]; 7572 return merge_origins(oLo, oHi); 7573 } 7574 } 7575 7576 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) { 7577 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 ); 7578 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 ); 7579 UInt oBoth = merge_origins(oLo, oHi); 7580 return (UWord)oBoth; 7581 } 7582 7583 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) { 7584 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 ); 7585 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 ); 7586 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 ); 7587 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 ); 7588 UInt oAll = merge_origins(merge_origins(oQ0, oQ1), 7589 merge_origins(oQ2, oQ3)); 7590 return (UWord)oAll; 7591 } 7592 7593 7594 /*--------------------------------------------*/ 7595 /*--- Origin tracking: store handlers ---*/ 7596 /*--------------------------------------------*/ 7597 7598 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) { 7599 OCacheLine* line; 7600 UWord lineoff = oc_line_offset(a); 7601 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */ 7602 7603 if (OC_ENABLE_ASSERTIONS) { 7604 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 7605 } 7606 7607 line = find_OCacheLine( a ); 7608 7609 if (d32 == 0) { 7610 line->descr[lineoff] &= ~(1 << byteoff); 7611 } else { 7612 line->descr[lineoff] |= (1 << byteoff); 7613 line->w32[lineoff] = d32; 7614 } 7615 } 7616 7617 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) { 7618 OCacheLine* line; 7619 UWord lineoff, byteoff; 7620 7621 if (UNLIKELY(a & 1)) { 7622 /* Handle misaligned case, slowly. */ 7623 MC_(helperc_b_store1)( a + 0, d32 ); 7624 MC_(helperc_b_store1)( a + 1, d32 ); 7625 return; 7626 } 7627 7628 lineoff = oc_line_offset(a); 7629 byteoff = a & 3; /* 0 or 2 */ 7630 7631 if (OC_ENABLE_ASSERTIONS) { 7632 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 7633 } 7634 7635 line = find_OCacheLine( a ); 7636 7637 if (d32 == 0) { 7638 line->descr[lineoff] &= ~(3 << byteoff); 7639 } else { 7640 line->descr[lineoff] |= (3 << byteoff); 7641 line->w32[lineoff] = d32; 7642 } 7643 } 7644 7645 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) { 7646 OCacheLine* line; 7647 UWord lineoff; 7648 7649 if (UNLIKELY(a & 3)) { 7650 /* Handle misaligned case, slowly. */ 7651 MC_(helperc_b_store2)( a + 0, d32 ); 7652 MC_(helperc_b_store2)( a + 2, d32 ); 7653 return; 7654 } 7655 7656 lineoff = oc_line_offset(a); 7657 if (OC_ENABLE_ASSERTIONS) { 7658 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 7659 } 7660 7661 line = find_OCacheLine( a ); 7662 7663 if (d32 == 0) { 7664 line->descr[lineoff] = 0; 7665 } else { 7666 line->descr[lineoff] = 0xF; 7667 line->w32[lineoff] = d32; 7668 } 7669 } 7670 7671 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) { 7672 OCacheLine* line; 7673 UWord lineoff; 7674 7675 if (UNLIKELY(a & 7)) { 7676 /* Handle misaligned case, slowly. */ 7677 MC_(helperc_b_store4)( a + 0, d32 ); 7678 MC_(helperc_b_store4)( a + 4, d32 ); 7679 return; 7680 } 7681 7682 lineoff = oc_line_offset(a); 7683 if (OC_ENABLE_ASSERTIONS) { 7684 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/ 7685 } 7686 7687 line = find_OCacheLine( a ); 7688 7689 if (d32 == 0) { 7690 line->descr[lineoff + 0] = 0; 7691 line->descr[lineoff + 1] = 0; 7692 } else { 7693 line->descr[lineoff + 0] = 0xF; 7694 line->descr[lineoff + 1] = 0xF; 7695 line->w32[lineoff + 0] = d32; 7696 line->w32[lineoff + 1] = d32; 7697 } 7698 } 7699 7700 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) { 7701 MC_(helperc_b_store8)( a + 0, d32 ); 7702 MC_(helperc_b_store8)( a + 8, d32 ); 7703 } 7704 7705 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) { 7706 MC_(helperc_b_store8)( a + 0, d32 ); 7707 MC_(helperc_b_store8)( a + 8, d32 ); 7708 MC_(helperc_b_store8)( a + 16, d32 ); 7709 MC_(helperc_b_store8)( a + 24, d32 ); 7710 } 7711 7712 7713 /*--------------------------------------------*/ 7714 /*--- Origin tracking: sarp handlers ---*/ 7715 /*--------------------------------------------*/ 7716 7717 __attribute__((noinline)) 7718 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) { 7719 if ((a & 1) && len >= 1) { 7720 MC_(helperc_b_store1)( a, otag ); 7721 a++; 7722 len--; 7723 } 7724 if ((a & 2) && len >= 2) { 7725 MC_(helperc_b_store2)( a, otag ); 7726 a += 2; 7727 len -= 2; 7728 } 7729 if (len >= 4) 7730 tl_assert(0 == (a & 3)); 7731 while (len >= 4) { 7732 MC_(helperc_b_store4)( a, otag ); 7733 a += 4; 7734 len -= 4; 7735 } 7736 if (len >= 2) { 7737 MC_(helperc_b_store2)( a, otag ); 7738 a += 2; 7739 len -= 2; 7740 } 7741 if (len >= 1) { 7742 MC_(helperc_b_store1)( a, otag ); 7743 //a++; 7744 len--; 7745 } 7746 tl_assert(len == 0); 7747 } 7748 7749 __attribute__((noinline)) 7750 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) { 7751 if ((a & 1) && len >= 1) { 7752 MC_(helperc_b_store1)( a, 0 ); 7753 a++; 7754 len--; 7755 } 7756 if ((a & 2) && len >= 2) { 7757 MC_(helperc_b_store2)( a, 0 ); 7758 a += 2; 7759 len -= 2; 7760 } 7761 if (len >= 4) 7762 tl_assert(0 == (a & 3)); 7763 while (len >= 4) { 7764 MC_(helperc_b_store4)( a, 0 ); 7765 a += 4; 7766 len -= 4; 7767 } 7768 if (len >= 2) { 7769 MC_(helperc_b_store2)( a, 0 ); 7770 a += 2; 7771 len -= 2; 7772 } 7773 if (len >= 1) { 7774 MC_(helperc_b_store1)( a, 0 ); 7775 //a++; 7776 len--; 7777 } 7778 tl_assert(len == 0); 7779 } 7780 7781 7782 /*------------------------------------------------------------*/ 7783 /*--- Setup and finalisation ---*/ 7784 /*------------------------------------------------------------*/ 7785 7786 static void mc_post_clo_init ( void ) 7787 { 7788 /* If we've been asked to emit XML, mash around various other 7789 options so as to constrain the output somewhat. */ 7790 if (VG_(clo_xml)) { 7791 /* Extract as much info as possible from the leak checker. */ 7792 MC_(clo_leak_check) = LC_Full; 7793 } 7794 7795 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol) 7796 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 7797 VG_(message)(Vg_UserMsg, 7798 "Warning: --freelist-big-blocks value %lld has no effect\n" 7799 "as it is >= to --freelist-vol value %lld\n", 7800 MC_(clo_freelist_big_blocks), 7801 MC_(clo_freelist_vol)); 7802 } 7803 7804 if (MC_(clo_workaround_gcc296_bugs) 7805 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 7806 VG_(umsg)( 7807 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n" 7808 "Warning: Instead use: --ignore-range-below-sp=1024-1\n" 7809 "\n" 7810 ); 7811 } 7812 7813 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 ); 7814 7815 if (MC_(clo_mc_level) == 3) { 7816 /* We're doing origin tracking. */ 7817 # ifdef PERF_FAST_STACK 7818 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU ); 7819 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU ); 7820 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU ); 7821 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU ); 7822 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU ); 7823 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU ); 7824 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU ); 7825 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU ); 7826 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU ); 7827 # endif 7828 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU ); 7829 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU ); 7830 } else { 7831 /* Not doing origin tracking */ 7832 # ifdef PERF_FAST_STACK 7833 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 ); 7834 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 ); 7835 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 ); 7836 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 ); 7837 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 ); 7838 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 ); 7839 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 ); 7840 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 ); 7841 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 ); 7842 # endif 7843 VG_(track_new_mem_stack) ( mc_new_mem_stack ); 7844 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU ); 7845 } 7846 7847 // We assume that brk()/sbrk() does not initialise new memory. Is this 7848 // accurate? John Reiser says: 7849 // 7850 // 0) sbrk() can *decrease* process address space. No zero fill is done 7851 // for a decrease, not even the fragment on the high end of the last page 7852 // that is beyond the new highest address. For maximum safety and 7853 // portability, then the bytes in the last page that reside above [the 7854 // new] sbrk(0) should be considered to be uninitialized, but in practice 7855 // it is exceedingly likely that they will retain their previous 7856 // contents. 7857 // 7858 // 1) If an increase is large enough to require new whole pages, then 7859 // those new whole pages (like all new pages) are zero-filled by the 7860 // operating system. So if sbrk(0) already is page aligned, then 7861 // sbrk(PAGE_SIZE) *does* zero-fill the new memory. 7862 // 7863 // 2) Any increase that lies within an existing allocated page is not 7864 // changed. So if (x = sbrk(0)) is not page aligned, then 7865 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their 7866 // existing contents, and an additional PAGE_SIZE bytes which are zeroed. 7867 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest 7868 // of them come along for the ride because the operating system deals 7869 // only in whole pages. Again, for maximum safety and portability, then 7870 // anything that lives above [the new] sbrk(0) should be considered 7871 // uninitialized, but in practice will retain previous contents [zero in 7872 // this case.]" 7873 // 7874 // In short: 7875 // 7876 // A key property of sbrk/brk is that new whole pages that are supplied 7877 // by the operating system *do* get initialized to zero. 7878 // 7879 // As for the portability of all this: 7880 // 7881 // sbrk and brk are not POSIX. However, any system that is a derivative 7882 // of *nix has sbrk and brk because there are too many software (such as 7883 // the Bourne shell) which rely on the traditional memory map (.text, 7884 // .data+.bss, stack) and the existence of sbrk/brk. 7885 // 7886 // So we should arguably observe all this. However: 7887 // - The current inaccuracy has caused maybe one complaint in seven years(?) 7888 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I 7889 // doubt most programmers know the above information. 7890 // So I'm not terribly unhappy with marking it as undefined. --njn. 7891 // 7892 // [More: I think most of what John said only applies to sbrk(). It seems 7893 // that brk() always deals in whole pages. And since this event deals 7894 // directly with brk(), not with sbrk(), perhaps it would be reasonable to 7895 // just mark all memory it allocates as defined.] 7896 // 7897 # if !defined(VGO_solaris) 7898 if (MC_(clo_mc_level) == 3) 7899 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU ); 7900 else 7901 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU ); 7902 # else 7903 // On Solaris, brk memory has to be marked as defined, otherwise we get 7904 // many false positives. 7905 VG_(track_new_mem_brk) ( make_mem_defined_w_tid ); 7906 # endif 7907 7908 /* This origin tracking cache is huge (~100M), so only initialise 7909 if we need it. */ 7910 if (MC_(clo_mc_level) >= 3) { 7911 init_OCache(); 7912 tl_assert(ocacheL1 != NULL); 7913 tl_assert(ocacheL2 != NULL); 7914 } else { 7915 tl_assert(ocacheL1 == NULL); 7916 tl_assert(ocacheL2 == NULL); 7917 } 7918 7919 MC_(chunk_poolalloc) = VG_(newPA) 7920 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*), 7921 1000, 7922 VG_(malloc), 7923 "mc.cMC.1 (MC_Chunk pools)", 7924 VG_(free)); 7925 7926 /* Do not check definedness of guest state if --undef-value-errors=no */ 7927 if (MC_(clo_mc_level) >= 2) 7928 VG_(track_pre_reg_read) ( mc_pre_reg_read ); 7929 7930 if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) { 7931 if (MC_(clo_keep_stacktraces) == KS_none 7932 || MC_(clo_keep_stacktraces) == KS_free) 7933 VG_(fmsg_bad_option)("--keep-stacktraces", 7934 "To use --xtree-memory=full, you must" 7935 " keep at least the alloc stacktrace\n"); 7936 // Activate full xtree memory profiling. 7937 VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main)); 7938 } 7939 7940 } 7941 7942 static void print_SM_info(const HChar* type, Int n_SMs) 7943 { 7944 VG_(message)(Vg_DebugMsg, 7945 " memcheck: SMs: %s = %d (%luk, %luM)\n", 7946 type, 7947 n_SMs, 7948 n_SMs * sizeof(SecMap) / 1024UL, 7949 n_SMs * sizeof(SecMap) / (1024 * 1024UL) ); 7950 } 7951 7952 static void mc_print_stats (void) 7953 { 7954 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB; 7955 7956 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n", 7957 VG_(free_queue_volume), VG_(free_queue_length)); 7958 VG_(message)(Vg_DebugMsg, 7959 " memcheck: sanity checks: %d cheap, %d expensive\n", 7960 n_sanity_cheap, n_sanity_expensive ); 7961 VG_(message)(Vg_DebugMsg, 7962 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n", 7963 n_auxmap_L2_nodes, 7964 n_auxmap_L2_nodes * 64, 7965 n_auxmap_L2_nodes / 16 ); 7966 VG_(message)(Vg_DebugMsg, 7967 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n", 7968 n_auxmap_L1_searches, n_auxmap_L1_cmps, 7969 (10ULL * n_auxmap_L1_cmps) 7970 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1) 7971 ); 7972 VG_(message)(Vg_DebugMsg, 7973 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n", 7974 n_auxmap_L2_searches, n_auxmap_L2_nodes 7975 ); 7976 7977 print_SM_info("n_issued ", n_issued_SMs); 7978 print_SM_info("n_deissued ", n_deissued_SMs); 7979 print_SM_info("max_noaccess ", max_noaccess_SMs); 7980 print_SM_info("max_undefined", max_undefined_SMs); 7981 print_SM_info("max_defined ", max_defined_SMs); 7982 print_SM_info("max_non_DSM ", max_non_DSM_SMs); 7983 7984 // Three DSMs, plus the non-DSM ones 7985 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap); 7986 // The 3*sizeof(Word) bytes is the AVL node metadata size. 7987 // The VG_ROUNDUP is because the OSet pool allocator will/must align 7988 // the elements on pointer size. 7989 // Note that the pool allocator has some additional small overhead 7990 // which is not counted in the below. 7991 // Hardwiring this logic sucks, but I don't see how else to do it. 7992 max_secVBit_szB = max_secVBit_nodes * 7993 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*))); 7994 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB; 7995 7996 VG_(message)(Vg_DebugMsg, 7997 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n", 7998 max_secVBit_nodes, max_secVBit_szB / 1024, 7999 max_secVBit_szB / (1024 * 1024)); 8000 VG_(message)(Vg_DebugMsg, 8001 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n", 8002 sec_vbits_new_nodes + sec_vbits_updates, 8003 sec_vbits_new_nodes, sec_vbits_updates ); 8004 VG_(message)(Vg_DebugMsg, 8005 " memcheck: max shadow mem size: %luk, %luM\n", 8006 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024)); 8007 8008 if (MC_(clo_mc_level) >= 3) { 8009 VG_(message)(Vg_DebugMsg, 8010 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n", 8011 stats_ocacheL1_find, 8012 stats_ocacheL1_misses, 8013 stats_ocacheL1_lossage ); 8014 VG_(message)(Vg_DebugMsg, 8015 " ocacheL1: %'12lu at 0 %'12lu at 1\n", 8016 stats_ocacheL1_find - stats_ocacheL1_misses 8017 - stats_ocacheL1_found_at_1 8018 - stats_ocacheL1_found_at_N, 8019 stats_ocacheL1_found_at_1 ); 8020 VG_(message)(Vg_DebugMsg, 8021 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n", 8022 stats_ocacheL1_found_at_N, 8023 stats_ocacheL1_movefwds ); 8024 VG_(message)(Vg_DebugMsg, 8025 " ocacheL1: %'12lu sizeB %'12d useful\n", 8026 (SizeT)sizeof(OCache), 8027 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS ); 8028 VG_(message)(Vg_DebugMsg, 8029 " ocacheL2: %'12lu refs %'12lu misses\n", 8030 stats__ocacheL2_refs, 8031 stats__ocacheL2_misses ); 8032 VG_(message)(Vg_DebugMsg, 8033 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n", 8034 stats__ocacheL2_n_nodes_max, 8035 stats__ocacheL2_n_nodes ); 8036 VG_(message)(Vg_DebugMsg, 8037 " niacache: %'12lu refs %'12lu misses\n", 8038 stats__nia_cache_queries, stats__nia_cache_misses); 8039 } else { 8040 tl_assert(ocacheL1 == NULL); 8041 tl_assert(ocacheL2 == NULL); 8042 } 8043 } 8044 8045 8046 static void mc_fini ( Int exitcode ) 8047 { 8048 MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True); 8049 MC_(print_malloc_stats)(); 8050 8051 if (MC_(clo_leak_check) != LC_Off) { 8052 LeakCheckParams lcp; 8053 HChar* xt_filename = NULL; 8054 lcp.mode = MC_(clo_leak_check); 8055 lcp.show_leak_kinds = MC_(clo_show_leak_kinds); 8056 lcp.heuristics = MC_(clo_leak_check_heuristics); 8057 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds); 8058 lcp.deltamode = LCD_Any; 8059 lcp.max_loss_records_output = 999999999; 8060 lcp.requested_by_monitor_command = False; 8061 if (MC_(clo_xtree_leak)) { 8062 xt_filename = VG_(expand_file_name)("--xtree-leak-file", 8063 MC_(clo_xtree_leak_file)); 8064 lcp.xt_filename = xt_filename; 8065 lcp.mode = LC_Full; 8066 } 8067 else 8068 lcp.xt_filename = NULL; 8069 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp); 8070 if (MC_(clo_xtree_leak)) 8071 VG_(free)(xt_filename); 8072 } else { 8073 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 8074 VG_(umsg)( 8075 "For a detailed leak analysis, rerun with: --leak-check=full\n" 8076 "\n" 8077 ); 8078 } 8079 } 8080 8081 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) { 8082 VG_(message)(Vg_UserMsg, 8083 "For counts of detected and suppressed errors, rerun with: -v\n"); 8084 } 8085 8086 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1 8087 && MC_(clo_mc_level) == 2) { 8088 VG_(message)(Vg_UserMsg, 8089 "Use --track-origins=yes to see where " 8090 "uninitialised values come from\n"); 8091 } 8092 8093 /* Print a warning if any client-request generated ignore-ranges 8094 still exist. It would be reasonable to expect that a properly 8095 written program would remove any such ranges before exiting, and 8096 since they are a bit on the dangerous side, let's comment. By 8097 contrast ranges which are specified on the command line normally 8098 pertain to hardware mapped into the address space, and so we 8099 can't expect the client to have got rid of them. */ 8100 if (gIgnoredAddressRanges) { 8101 UInt i, nBad = 0; 8102 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) { 8103 UWord val = IAR_INVALID; 8104 UWord key_min = ~(UWord)0; 8105 UWord key_max = (UWord)0; 8106 VG_(indexRangeMap)( &key_min, &key_max, &val, 8107 gIgnoredAddressRanges, i ); 8108 if (val != IAR_ClientReq) 8109 continue; 8110 /* Print the offending range. Also, if it is the first, 8111 print a banner before it. */ 8112 nBad++; 8113 if (nBad == 1) { 8114 VG_(umsg)( 8115 "WARNING: exiting program has the following client-requested\n" 8116 "WARNING: address error disablement range(s) still in force,\n" 8117 "WARNING: " 8118 "possibly as a result of some mistake in the use of the\n" 8119 "WARNING: " 8120 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n" 8121 ); 8122 } 8123 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n", 8124 i, key_min, key_max, showIARKind(val)); 8125 } 8126 } 8127 8128 done_prof_mem(); 8129 8130 if (VG_(clo_stats)) 8131 mc_print_stats(); 8132 8133 if (0) { 8134 VG_(message)(Vg_DebugMsg, 8135 "------ Valgrind's client block stats follow ---------------\n" ); 8136 show_client_block_stats(); 8137 } 8138 } 8139 8140 /* mark the given addr/len unaddressable for watchpoint implementation 8141 The PointKind will be handled at access time */ 8142 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert, 8143 Addr addr, SizeT len) 8144 { 8145 /* GDBTD this is somewhat fishy. We might rather have to save the previous 8146 accessibility and definedness in gdbserver so as to allow restoring it 8147 properly. Currently, we assume that the user only watches things 8148 which are properly addressable and defined */ 8149 if (insert) 8150 MC_(make_mem_noaccess) (addr, len); 8151 else 8152 MC_(make_mem_defined) (addr, len); 8153 return True; 8154 } 8155 8156 static void mc_pre_clo_init(void) 8157 { 8158 VG_(details_name) ("Memcheck"); 8159 VG_(details_version) (NULL); 8160 VG_(details_description) ("a memory error detector"); 8161 VG_(details_copyright_author)( 8162 "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al."); 8163 VG_(details_bug_reports_to) (VG_BUGS_TO); 8164 VG_(details_avg_translation_sizeB) ( 640 ); 8165 8166 VG_(basic_tool_funcs) (mc_post_clo_init, 8167 MC_(instrument), 8168 mc_fini); 8169 8170 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) ); 8171 8172 8173 VG_(needs_core_errors) (); 8174 VG_(needs_tool_errors) (MC_(eq_Error), 8175 MC_(before_pp_Error), 8176 MC_(pp_Error), 8177 True,/*show TIDs for errors*/ 8178 MC_(update_Error_extra), 8179 MC_(is_recognised_suppression), 8180 MC_(read_extra_suppression_info), 8181 MC_(error_matches_suppression), 8182 MC_(get_error_name), 8183 MC_(get_extra_suppression_info), 8184 MC_(print_extra_suppression_use), 8185 MC_(update_extra_suppression_use)); 8186 VG_(needs_libc_freeres) (); 8187 VG_(needs_cxx_freeres) (); 8188 VG_(needs_command_line_options)(mc_process_cmd_line_options, 8189 mc_print_usage, 8190 mc_print_debug_usage); 8191 VG_(needs_client_requests) (mc_handle_client_request); 8192 VG_(needs_sanity_checks) (mc_cheap_sanity_check, 8193 mc_expensive_sanity_check); 8194 VG_(needs_print_stats) (mc_print_stats); 8195 VG_(needs_info_location) (MC_(pp_describe_addr)); 8196 VG_(needs_malloc_replacement) (MC_(malloc), 8197 MC_(__builtin_new), 8198 MC_(__builtin_vec_new), 8199 MC_(memalign), 8200 MC_(calloc), 8201 MC_(free), 8202 MC_(__builtin_delete), 8203 MC_(__builtin_vec_delete), 8204 MC_(realloc), 8205 MC_(malloc_usable_size), 8206 MC_MALLOC_DEFAULT_REDZONE_SZB ); 8207 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)(); 8208 8209 VG_(needs_xml_output) (); 8210 8211 VG_(track_new_mem_startup) ( mc_new_mem_startup ); 8212 8213 // Handling of mmap and mprotect isn't simple (well, it is simple, 8214 // but the justification isn't.) See comments above, just prior to 8215 // mc_new_mem_mmap. 8216 VG_(track_new_mem_mmap) ( mc_new_mem_mmap ); 8217 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect ); 8218 8219 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) ); 8220 8221 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) ); 8222 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) ); 8223 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) ); 8224 8225 /* Defer the specification of the new_mem_stack functions to the 8226 post_clo_init function, since we need to first parse the command 8227 line before deciding which set to use. */ 8228 8229 # ifdef PERF_FAST_STACK 8230 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 ); 8231 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 ); 8232 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 ); 8233 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 ); 8234 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 ); 8235 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 ); 8236 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 ); 8237 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 ); 8238 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 ); 8239 # endif 8240 VG_(track_die_mem_stack) ( mc_die_mem_stack ); 8241 8242 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) ); 8243 8244 VG_(track_pre_mem_read) ( check_mem_is_defined ); 8245 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz ); 8246 VG_(track_pre_mem_write) ( check_mem_is_addressable ); 8247 VG_(track_post_mem_write) ( mc_post_mem_write ); 8248 8249 VG_(track_post_reg_write) ( mc_post_reg_write ); 8250 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall ); 8251 8252 if (MC_(clo_mc_level) >= 2) { 8253 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg ); 8254 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem ); 8255 } 8256 8257 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint ); 8258 8259 init_shadow_memory(); 8260 // MC_(chunk_poolalloc) must be allocated in post_clo_init 8261 tl_assert(MC_(chunk_poolalloc) == NULL); 8262 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" ); 8263 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" ); 8264 init_prof_mem(); 8265 8266 tl_assert( mc_expensive_sanity_check() ); 8267 8268 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true. 8269 tl_assert(sizeof(UWord) == sizeof(Addr)); 8270 // Call me paranoid. I don't care. 8271 tl_assert(sizeof(void*) == sizeof(Addr)); 8272 8273 // BYTES_PER_SEC_VBIT_NODE must be a power of two. 8274 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE)); 8275 8276 /* This is small. Always initialise it. */ 8277 init_nia_to_ecu_cache(); 8278 8279 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know 8280 if we need to, since the command line args haven't been 8281 processed yet. Hence defer it to mc_post_clo_init. */ 8282 tl_assert(ocacheL1 == NULL); 8283 tl_assert(ocacheL2 == NULL); 8284 8285 /* Check some important stuff. See extensive comments above 8286 re UNALIGNED_OR_HIGH for background. */ 8287 # if VG_WORDSIZE == 4 8288 tl_assert(sizeof(void*) == 4); 8289 tl_assert(sizeof(Addr) == 4); 8290 tl_assert(sizeof(UWord) == 4); 8291 tl_assert(sizeof(Word) == 4); 8292 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL); 8293 tl_assert(MASK(1) == 0UL); 8294 tl_assert(MASK(2) == 1UL); 8295 tl_assert(MASK(4) == 3UL); 8296 tl_assert(MASK(8) == 7UL); 8297 # else 8298 tl_assert(VG_WORDSIZE == 8); 8299 tl_assert(sizeof(void*) == 8); 8300 tl_assert(sizeof(Addr) == 8); 8301 tl_assert(sizeof(UWord) == 8); 8302 tl_assert(sizeof(Word) == 8); 8303 tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL); 8304 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL); 8305 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL); 8306 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL); 8307 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL); 8308 # endif 8309 8310 /* Check some assertions to do with the instrumentation machinery. */ 8311 MC_(do_instrumentation_startup_checks)(); 8312 } 8313 8314 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT)); 8315 8316 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init) 8317 8318 /*--------------------------------------------------------------------*/ 8319 /*--- end mc_main.c ---*/ 8320 /*--------------------------------------------------------------------*/ 8321