1 2 /*--------------------------------------------------------------------*/ 3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/ 4 /*--- accessibility (A) and validity (V) status of each byte. ---*/ 5 /*--- mc_main.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of MemCheck, a heavyweight Valgrind tool for 10 detecting memory errors. 11 12 Copyright (C) 2000-2012 Julian Seward 13 jseward (at) acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 #include "pub_tool_basics.h" 34 #include "pub_tool_aspacemgr.h" 35 #include "pub_tool_gdbserver.h" 36 #include "pub_tool_poolalloc.h" 37 #include "pub_tool_hashtable.h" // For mc_include.h 38 #include "pub_tool_libcbase.h" 39 #include "pub_tool_libcassert.h" 40 #include "pub_tool_libcprint.h" 41 #include "pub_tool_machine.h" 42 #include "pub_tool_mallocfree.h" 43 #include "pub_tool_options.h" 44 #include "pub_tool_oset.h" 45 #include "pub_tool_replacemalloc.h" 46 #include "pub_tool_tooliface.h" 47 #include "pub_tool_threadstate.h" 48 49 #include "mc_include.h" 50 #include "memcheck.h" /* for client requests */ 51 52 53 /* Set to 1 to do a little more sanity checking */ 54 #define VG_DEBUG_MEMORY 0 55 56 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args) 57 58 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */ 59 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */ 60 61 62 /*------------------------------------------------------------*/ 63 /*--- Fast-case knobs ---*/ 64 /*------------------------------------------------------------*/ 65 66 // Comment these out to disable the fast cases (don't just set them to zero). 67 68 #define PERF_FAST_LOADV 1 69 #define PERF_FAST_STOREV 1 70 71 #define PERF_FAST_SARP 1 72 73 #define PERF_FAST_STACK 1 74 #define PERF_FAST_STACK2 1 75 76 /* Change this to 1 to enable assertions on origin tracking cache fast 77 paths */ 78 #define OC_ENABLE_ASSERTIONS 0 79 80 81 /*------------------------------------------------------------*/ 82 /*--- Comments on the origin tracking implementation ---*/ 83 /*------------------------------------------------------------*/ 84 85 /* See detailed comment entitled 86 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 87 which is contained further on in this file. */ 88 89 90 /*------------------------------------------------------------*/ 91 /*--- V bits and A bits ---*/ 92 /*------------------------------------------------------------*/ 93 94 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck 95 thinks the corresponding value bit is defined. And every memory byte 96 has an A bit, which tracks whether Memcheck thinks the program can access 97 it safely (ie. it's mapped, and has at least one of the RWX permission bits 98 set). So every N-bit register is shadowed with N V bits, and every memory 99 byte is shadowed with 8 V bits and one A bit. 100 101 In the implementation, we use two forms of compression (compressed V bits 102 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead 103 for memory. 104 105 Memcheck also tracks extra information about each heap block that is 106 allocated, for detecting memory leaks and other purposes. 107 */ 108 109 /*------------------------------------------------------------*/ 110 /*--- Basic A/V bitmap representation. ---*/ 111 /*------------------------------------------------------------*/ 112 113 /* All reads and writes are checked against a memory map (a.k.a. shadow 114 memory), which records the state of all memory in the process. 115 116 On 32-bit machines the memory map is organised as follows. 117 The top 16 bits of an address are used to index into a top-level 118 map table, containing 65536 entries. Each entry is a pointer to a 119 second-level map, which records the accesibililty and validity 120 permissions for the 65536 bytes indexed by the lower 16 bits of the 121 address. Each byte is represented by two bits (details are below). So 122 each second-level map contains 16384 bytes. This two-level arrangement 123 conveniently divides the 4G address space into 64k lumps, each size 64k 124 bytes. 125 126 All entries in the primary (top-level) map must point to a valid 127 secondary (second-level) map. Since many of the 64kB chunks will 128 have the same status for every bit -- ie. noaccess (for unused 129 address space) or entirely addressable and defined (for code segments) -- 130 there are three distinguished secondary maps, which indicate 'noaccess', 131 'undefined' and 'defined'. For these uniform 64kB chunks, the primary 132 map entry points to the relevant distinguished map. In practice, 133 typically more than half of the addressable memory is represented with 134 the 'undefined' or 'defined' distinguished secondary map, so it gives a 135 good saving. It also lets us set the V+A bits of large address regions 136 quickly in set_address_range_perms(). 137 138 On 64-bit machines it's more complicated. If we followed the same basic 139 scheme we'd have a four-level table which would require too many memory 140 accesses. So instead the top-level map table has 2^19 entries (indexed 141 using bits 16..34 of the address); this covers the bottom 32GB. Any 142 accesses above 32GB are handled with a slow, sparse auxiliary table. 143 Valgrind's address space manager tries very hard to keep things below 144 this 32GB barrier so that performance doesn't suffer too much. 145 146 Note that this file has a lot of different functions for reading and 147 writing shadow memory. Only a couple are strictly necessary (eg. 148 get_vabits2 and set_vabits2), most are just specialised for specific 149 common cases to improve performance. 150 151 Aside: the V+A bits are less precise than they could be -- we have no way 152 of marking memory as read-only. It would be great if we could add an 153 extra state VA_BITSn_READONLY. But then we'd have 5 different states, 154 which requires 2.3 bits to hold, and there's no way to do that elegantly 155 -- we'd have to double up to 4 bits of metadata per byte, which doesn't 156 seem worth it. 157 */ 158 159 /* --------------- Basic configuration --------------- */ 160 161 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */ 162 163 #if VG_WORDSIZE == 4 164 165 /* cover the entire address space */ 166 # define N_PRIMARY_BITS 16 167 168 #else 169 170 /* Just handle the first 32G fast and the rest via auxiliary 171 primaries. If you change this, Memcheck will assert at startup. 172 See the definition of UNALIGNED_OR_HIGH for extensive comments. */ 173 # define N_PRIMARY_BITS 19 174 175 #endif 176 177 178 /* Do not change this. */ 179 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS) 180 181 /* Do not change this. */ 182 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1) 183 184 185 /* --------------- Secondary maps --------------- */ 186 187 // Each byte of memory conceptually has an A bit, which indicates its 188 // addressability, and 8 V bits, which indicates its definedness. 189 // 190 // But because very few bytes are partially defined, we can use a nice 191 // compression scheme to reduce the size of shadow memory. Each byte of 192 // memory has 2 bits which indicates its state (ie. V+A bits): 193 // 194 // 00: noaccess (unaddressable but treated as fully defined) 195 // 01: undefined (addressable and fully undefined) 196 // 10: defined (addressable and fully defined) 197 // 11: partdefined (addressable and partially defined) 198 // 199 // In the "partdefined" case, we use a secondary table to store the V bits. 200 // Each entry in the secondary-V-bits table maps a byte address to its 8 V 201 // bits. 202 // 203 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for 204 // four bytes (32 bits) of memory are in each chunk. Hence the name 205 // "vabits8". This lets us get the V+A bits for four bytes at a time 206 // easily (without having to do any shifting and/or masking), and that is a 207 // very common operation. (Note that although each vabits8 chunk 208 // is 8 bits in size, it represents 32 bits of memory.) 209 // 210 // The representation is "inverse" little-endian... each 4 bytes of 211 // memory is represented by a 1 byte value, where: 212 // 213 // - the status of byte (a+0) is held in bits [1..0] 214 // - the status of byte (a+1) is held in bits [3..2] 215 // - the status of byte (a+2) is held in bits [5..4] 216 // - the status of byte (a+3) is held in bits [7..6] 217 // 218 // It's "inverse" because endianness normally describes a mapping from 219 // value bits to memory addresses; in this case the mapping is inverted. 220 // Ie. instead of particular value bits being held in certain addresses, in 221 // this case certain addresses are represented by particular value bits. 222 // See insert_vabits2_into_vabits8() for an example. 223 // 224 // But note that we don't compress the V bits stored in registers; they 225 // need to be explicit to made the shadow operations possible. Therefore 226 // when moving values between registers and memory we need to convert 227 // between the expanded in-register format and the compressed in-memory 228 // format. This isn't so difficult, it just requires careful attention in a 229 // few places. 230 231 // These represent eight bits of memory. 232 #define VA_BITS2_NOACCESS 0x0 // 00b 233 #define VA_BITS2_UNDEFINED 0x1 // 01b 234 #define VA_BITS2_DEFINED 0x2 // 10b 235 #define VA_BITS2_PARTDEFINED 0x3 // 11b 236 237 // These represent 16 bits of memory. 238 #define VA_BITS4_NOACCESS 0x0 // 00_00b 239 #define VA_BITS4_UNDEFINED 0x5 // 01_01b 240 #define VA_BITS4_DEFINED 0xa // 10_10b 241 242 // These represent 32 bits of memory. 243 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b 244 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b 245 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b 246 247 // These represent 64 bits of memory. 248 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2 249 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2 250 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2 251 252 253 #define SM_CHUNKS 16384 254 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2) 255 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3) 256 257 // Paranoia: it's critical for performance that the requested inlining 258 // occurs. So try extra hard. 259 #define INLINE inline __attribute__((always_inline)) 260 261 static INLINE Addr start_of_this_sm ( Addr a ) { 262 return (a & (~SM_MASK)); 263 } 264 static INLINE Bool is_start_of_sm ( Addr a ) { 265 return (start_of_this_sm(a) == a); 266 } 267 268 typedef 269 struct { 270 UChar vabits8[SM_CHUNKS]; 271 } 272 SecMap; 273 274 // 3 distinguished secondary maps, one for no-access, one for 275 // accessible but undefined, and one for accessible and defined. 276 // Distinguished secondaries may never be modified. 277 #define SM_DIST_NOACCESS 0 278 #define SM_DIST_UNDEFINED 1 279 #define SM_DIST_DEFINED 2 280 281 static SecMap sm_distinguished[3]; 282 283 static INLINE Bool is_distinguished_sm ( SecMap* sm ) { 284 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2]; 285 } 286 287 // Forward declaration 288 static void update_SM_counts(SecMap* oldSM, SecMap* newSM); 289 290 /* dist_sm points to one of our three distinguished secondaries. Make 291 a copy of it so that we can write to it. 292 */ 293 static SecMap* copy_for_writing ( SecMap* dist_sm ) 294 { 295 SecMap* new_sm; 296 tl_assert(dist_sm == &sm_distinguished[0] 297 || dist_sm == &sm_distinguished[1] 298 || dist_sm == &sm_distinguished[2]); 299 300 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap)); 301 if (new_sm == NULL) 302 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap", 303 sizeof(SecMap) ); 304 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap)); 305 update_SM_counts(dist_sm, new_sm); 306 return new_sm; 307 } 308 309 /* --------------- Stats --------------- */ 310 311 static Int n_issued_SMs = 0; 312 static Int n_deissued_SMs = 0; 313 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs 314 static Int n_undefined_SMs = 0; 315 static Int n_defined_SMs = 0; 316 static Int n_non_DSM_SMs = 0; 317 static Int max_noaccess_SMs = 0; 318 static Int max_undefined_SMs = 0; 319 static Int max_defined_SMs = 0; 320 static Int max_non_DSM_SMs = 0; 321 322 /* # searches initiated in auxmap_L1, and # base cmps required */ 323 static ULong n_auxmap_L1_searches = 0; 324 static ULong n_auxmap_L1_cmps = 0; 325 /* # of searches that missed in auxmap_L1 and therefore had to 326 be handed to auxmap_L2. And the number of nodes inserted. */ 327 static ULong n_auxmap_L2_searches = 0; 328 static ULong n_auxmap_L2_nodes = 0; 329 330 static Int n_sanity_cheap = 0; 331 static Int n_sanity_expensive = 0; 332 333 static Int n_secVBit_nodes = 0; 334 static Int max_secVBit_nodes = 0; 335 336 static void update_SM_counts(SecMap* oldSM, SecMap* newSM) 337 { 338 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --; 339 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--; 340 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --; 341 else { n_non_DSM_SMs --; 342 n_deissued_SMs ++; } 343 344 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++; 345 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++; 346 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++; 347 else { n_non_DSM_SMs ++; 348 n_issued_SMs ++; } 349 350 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs; 351 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs; 352 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs; 353 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs; 354 } 355 356 /* --------------- Primary maps --------------- */ 357 358 /* The main primary map. This covers some initial part of the address 359 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is 360 handled using the auxiliary primary map. 361 */ 362 static SecMap* primary_map[N_PRIMARY_MAP]; 363 364 365 /* An entry in the auxiliary primary map. base must be a 64k-aligned 366 value, and sm points at the relevant secondary map. As with the 367 main primary map, the secondary may be either a real secondary, or 368 one of the three distinguished secondaries. DO NOT CHANGE THIS 369 LAYOUT: the first word has to be the key for OSet fast lookups. 370 */ 371 typedef 372 struct { 373 Addr base; 374 SecMap* sm; 375 } 376 AuxMapEnt; 377 378 /* Tunable parameter: How big is the L1 queue? */ 379 #define N_AUXMAP_L1 24 380 381 /* Tunable parameter: How far along the L1 queue to insert 382 entries resulting from L2 lookups? */ 383 #define AUXMAP_L1_INSERT_IX 12 384 385 static struct { 386 Addr base; 387 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node 388 } 389 auxmap_L1[N_AUXMAP_L1]; 390 391 static OSet* auxmap_L2 = NULL; 392 393 static void init_auxmap_L1_L2 ( void ) 394 { 395 Int i; 396 for (i = 0; i < N_AUXMAP_L1; i++) { 397 auxmap_L1[i].base = 0; 398 auxmap_L1[i].ent = NULL; 399 } 400 401 tl_assert(0 == offsetof(AuxMapEnt,base)); 402 tl_assert(sizeof(Addr) == sizeof(void*)); 403 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base), 404 /*fastCmp*/ NULL, 405 VG_(malloc), "mc.iaLL.1", VG_(free) ); 406 } 407 408 /* Check representation invariants; if OK return NULL; else a 409 descriptive bit of text. Also return the number of 410 non-distinguished secondary maps referred to from the auxiliary 411 primary maps. */ 412 413 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found ) 414 { 415 Word i, j; 416 /* On a 32-bit platform, the L2 and L1 tables should 417 both remain empty forever. 418 419 On a 64-bit platform: 420 In the L2 table: 421 all .base & 0xFFFF == 0 422 all .base > MAX_PRIMARY_ADDRESS 423 In the L1 table: 424 all .base & 0xFFFF == 0 425 all (.base > MAX_PRIMARY_ADDRESS 426 .base & 0xFFFF == 0 427 and .ent points to an AuxMapEnt with the same .base) 428 or 429 (.base == 0 and .ent == NULL) 430 */ 431 *n_secmaps_found = 0; 432 if (sizeof(void*) == 4) { 433 /* 32-bit platform */ 434 if (VG_(OSetGen_Size)(auxmap_L2) != 0) 435 return "32-bit: auxmap_L2 is non-empty"; 436 for (i = 0; i < N_AUXMAP_L1; i++) 437 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL) 438 return "32-bit: auxmap_L1 is non-empty"; 439 } else { 440 /* 64-bit platform */ 441 UWord elems_seen = 0; 442 AuxMapEnt *elem, *res; 443 AuxMapEnt key; 444 /* L2 table */ 445 VG_(OSetGen_ResetIter)(auxmap_L2); 446 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) { 447 elems_seen++; 448 if (0 != (elem->base & (Addr)0xFFFF)) 449 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2"; 450 if (elem->base <= MAX_PRIMARY_ADDRESS) 451 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2"; 452 if (elem->sm == NULL) 453 return "64-bit: .sm in _L2 is NULL"; 454 if (!is_distinguished_sm(elem->sm)) 455 (*n_secmaps_found)++; 456 } 457 if (elems_seen != n_auxmap_L2_nodes) 458 return "64-bit: disagreement on number of elems in _L2"; 459 /* Check L1-L2 correspondence */ 460 for (i = 0; i < N_AUXMAP_L1; i++) { 461 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL) 462 continue; 463 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF)) 464 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1"; 465 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS) 466 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1"; 467 if (auxmap_L1[i].ent == NULL) 468 return "64-bit: .ent is NULL in auxmap_L1"; 469 if (auxmap_L1[i].ent->base != auxmap_L1[i].base) 470 return "64-bit: _L1 and _L2 bases are inconsistent"; 471 /* Look it up in auxmap_L2. */ 472 key.base = auxmap_L1[i].base; 473 key.sm = 0; 474 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 475 if (res == NULL) 476 return "64-bit: _L1 .base not found in _L2"; 477 if (res != auxmap_L1[i].ent) 478 return "64-bit: _L1 .ent disagrees with _L2 entry"; 479 } 480 /* Check L1 contains no duplicates */ 481 for (i = 0; i < N_AUXMAP_L1; i++) { 482 if (auxmap_L1[i].base == 0) 483 continue; 484 for (j = i+1; j < N_AUXMAP_L1; j++) { 485 if (auxmap_L1[j].base == 0) 486 continue; 487 if (auxmap_L1[j].base == auxmap_L1[i].base) 488 return "64-bit: duplicate _L1 .base entries"; 489 } 490 } 491 } 492 return NULL; /* ok */ 493 } 494 495 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent ) 496 { 497 Word i; 498 tl_assert(ent); 499 tl_assert(rank >= 0 && rank < N_AUXMAP_L1); 500 for (i = N_AUXMAP_L1-1; i > rank; i--) 501 auxmap_L1[i] = auxmap_L1[i-1]; 502 auxmap_L1[rank].base = ent->base; 503 auxmap_L1[rank].ent = ent; 504 } 505 506 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a ) 507 { 508 AuxMapEnt key; 509 AuxMapEnt* res; 510 Word i; 511 512 tl_assert(a > MAX_PRIMARY_ADDRESS); 513 a &= ~(Addr)0xFFFF; 514 515 /* First search the front-cache, which is a self-organising 516 list containing the most popular entries. */ 517 518 if (LIKELY(auxmap_L1[0].base == a)) 519 return auxmap_L1[0].ent; 520 if (LIKELY(auxmap_L1[1].base == a)) { 521 Addr t_base = auxmap_L1[0].base; 522 AuxMapEnt* t_ent = auxmap_L1[0].ent; 523 auxmap_L1[0].base = auxmap_L1[1].base; 524 auxmap_L1[0].ent = auxmap_L1[1].ent; 525 auxmap_L1[1].base = t_base; 526 auxmap_L1[1].ent = t_ent; 527 return auxmap_L1[0].ent; 528 } 529 530 n_auxmap_L1_searches++; 531 532 for (i = 0; i < N_AUXMAP_L1; i++) { 533 if (auxmap_L1[i].base == a) { 534 break; 535 } 536 } 537 tl_assert(i >= 0 && i <= N_AUXMAP_L1); 538 539 n_auxmap_L1_cmps += (ULong)(i+1); 540 541 if (i < N_AUXMAP_L1) { 542 if (i > 0) { 543 Addr t_base = auxmap_L1[i-1].base; 544 AuxMapEnt* t_ent = auxmap_L1[i-1].ent; 545 auxmap_L1[i-1].base = auxmap_L1[i-0].base; 546 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent; 547 auxmap_L1[i-0].base = t_base; 548 auxmap_L1[i-0].ent = t_ent; 549 i--; 550 } 551 return auxmap_L1[i].ent; 552 } 553 554 n_auxmap_L2_searches++; 555 556 /* First see if we already have it. */ 557 key.base = a; 558 key.sm = 0; 559 560 res = VG_(OSetGen_Lookup)(auxmap_L2, &key); 561 if (res) 562 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res ); 563 return res; 564 } 565 566 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a ) 567 { 568 AuxMapEnt *nyu, *res; 569 570 /* First see if we already have it. */ 571 res = maybe_find_in_auxmap( a ); 572 if (LIKELY(res)) 573 return res; 574 575 /* Ok, there's no entry in the secondary map, so we'll have 576 to allocate one. */ 577 a &= ~(Addr)0xFFFF; 578 579 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) ); 580 tl_assert(nyu); 581 nyu->base = a; 582 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS]; 583 VG_(OSetGen_Insert)( auxmap_L2, nyu ); 584 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu ); 585 n_auxmap_L2_nodes++; 586 return nyu; 587 } 588 589 /* --------------- SecMap fundamentals --------------- */ 590 591 // In all these, 'low' means it's definitely in the main primary map, 592 // 'high' means it's definitely in the auxiliary table. 593 594 static INLINE SecMap** get_secmap_low_ptr ( Addr a ) 595 { 596 UWord pm_off = a >> 16; 597 # if VG_DEBUG_MEMORY >= 1 598 tl_assert(pm_off < N_PRIMARY_MAP); 599 # endif 600 return &primary_map[ pm_off ]; 601 } 602 603 static INLINE SecMap** get_secmap_high_ptr ( Addr a ) 604 { 605 AuxMapEnt* am = find_or_alloc_in_auxmap(a); 606 return &am->sm; 607 } 608 609 static SecMap** get_secmap_ptr ( Addr a ) 610 { 611 return ( a <= MAX_PRIMARY_ADDRESS 612 ? get_secmap_low_ptr(a) 613 : get_secmap_high_ptr(a)); 614 } 615 616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a ) 617 { 618 return *get_secmap_low_ptr(a); 619 } 620 621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a ) 622 { 623 return *get_secmap_high_ptr(a); 624 } 625 626 static INLINE SecMap* get_secmap_for_writing_low(Addr a) 627 { 628 SecMap** p = get_secmap_low_ptr(a); 629 if (UNLIKELY(is_distinguished_sm(*p))) 630 *p = copy_for_writing(*p); 631 return *p; 632 } 633 634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a ) 635 { 636 SecMap** p = get_secmap_high_ptr(a); 637 if (UNLIKELY(is_distinguished_sm(*p))) 638 *p = copy_for_writing(*p); 639 return *p; 640 } 641 642 /* Produce the secmap for 'a', either from the primary map or by 643 ensuring there is an entry for it in the aux primary map. The 644 secmap may be a distinguished one as the caller will only want to 645 be able to read it. 646 */ 647 static INLINE SecMap* get_secmap_for_reading ( Addr a ) 648 { 649 return ( a <= MAX_PRIMARY_ADDRESS 650 ? get_secmap_for_reading_low (a) 651 : get_secmap_for_reading_high(a) ); 652 } 653 654 /* Produce the secmap for 'a', either from the primary map or by 655 ensuring there is an entry for it in the aux primary map. The 656 secmap may not be a distinguished one, since the caller will want 657 to be able to write it. If it is a distinguished secondary, make a 658 writable copy of it, install it, and return the copy instead. (COW 659 semantics). 660 */ 661 static SecMap* get_secmap_for_writing ( Addr a ) 662 { 663 return ( a <= MAX_PRIMARY_ADDRESS 664 ? get_secmap_for_writing_low (a) 665 : get_secmap_for_writing_high(a) ); 666 } 667 668 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't 669 allocate one if one doesn't already exist. This is used by the 670 leak checker. 671 */ 672 static SecMap* maybe_get_secmap_for ( Addr a ) 673 { 674 if (a <= MAX_PRIMARY_ADDRESS) { 675 return get_secmap_for_reading_low(a); 676 } else { 677 AuxMapEnt* am = maybe_find_in_auxmap(a); 678 return am ? am->sm : NULL; 679 } 680 } 681 682 /* --------------- Fundamental functions --------------- */ 683 684 static INLINE 685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 ) 686 { 687 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 688 *vabits8 &= ~(0x3 << shift); // mask out the two old bits 689 *vabits8 |= (vabits2 << shift); // mask in the two new bits 690 } 691 692 static INLINE 693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 ) 694 { 695 UInt shift; 696 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 697 shift = (a & 2) << 1; // shift by 0 or 4 698 *vabits8 &= ~(0xf << shift); // mask out the four old bits 699 *vabits8 |= (vabits4 << shift); // mask in the four new bits 700 } 701 702 static INLINE 703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 ) 704 { 705 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6 706 vabits8 >>= shift; // shift the two bits to the bottom 707 return 0x3 & vabits8; // mask out the rest 708 } 709 710 static INLINE 711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 ) 712 { 713 UInt shift; 714 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned 715 shift = (a & 2) << 1; // shift by 0 or 4 716 vabits8 >>= shift; // shift the four bits to the bottom 717 return 0xf & vabits8; // mask out the rest 718 } 719 720 // Note that these four are only used in slow cases. The fast cases do 721 // clever things like combine the auxmap check (in 722 // get_secmap_{read,writ}able) with alignment checks. 723 724 // *** WARNING! *** 725 // Any time this function is called, if it is possible that vabits2 726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the 727 // sec-V-bits table must also be set! 728 static INLINE 729 void set_vabits2 ( Addr a, UChar vabits2 ) 730 { 731 SecMap* sm = get_secmap_for_writing(a); 732 UWord sm_off = SM_OFF(a); 733 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 734 } 735 736 static INLINE 737 UChar get_vabits2 ( Addr a ) 738 { 739 SecMap* sm = get_secmap_for_reading(a); 740 UWord sm_off = SM_OFF(a); 741 UChar vabits8 = sm->vabits8[sm_off]; 742 return extract_vabits2_from_vabits8(a, vabits8); 743 } 744 745 // *** WARNING! *** 746 // Any time this function is called, if it is possible that any of the 747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the 748 // corresponding entry(s) in the sec-V-bits table must also be set! 749 static INLINE 750 UChar get_vabits8_for_aligned_word32 ( Addr a ) 751 { 752 SecMap* sm = get_secmap_for_reading(a); 753 UWord sm_off = SM_OFF(a); 754 UChar vabits8 = sm->vabits8[sm_off]; 755 return vabits8; 756 } 757 758 static INLINE 759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 ) 760 { 761 SecMap* sm = get_secmap_for_writing(a); 762 UWord sm_off = SM_OFF(a); 763 sm->vabits8[sm_off] = vabits8; 764 } 765 766 767 // Forward declarations 768 static UWord get_sec_vbits8(Addr a); 769 static void set_sec_vbits8(Addr a, UWord vbits8); 770 771 // Returns False if there was an addressability error. 772 static INLINE 773 Bool set_vbits8 ( Addr a, UChar vbits8 ) 774 { 775 Bool ok = True; 776 UChar vabits2 = get_vabits2(a); 777 if ( VA_BITS2_NOACCESS != vabits2 ) { 778 // Addressable. Convert in-register format to in-memory format. 779 // Also remove any existing sec V bit entry for the byte if no 780 // longer necessary. 781 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; } 782 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; } 783 else { vabits2 = VA_BITS2_PARTDEFINED; 784 set_sec_vbits8(a, vbits8); } 785 set_vabits2(a, vabits2); 786 787 } else { 788 // Unaddressable! Do nothing -- when writing to unaddressable 789 // memory it acts as a black hole, and the V bits can never be seen 790 // again. So we don't have to write them at all. 791 ok = False; 792 } 793 return ok; 794 } 795 796 // Returns False if there was an addressability error. In that case, we put 797 // all defined bits into vbits8. 798 static INLINE 799 Bool get_vbits8 ( Addr a, UChar* vbits8 ) 800 { 801 Bool ok = True; 802 UChar vabits2 = get_vabits2(a); 803 804 // Convert the in-memory format to in-register format. 805 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; } 806 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; } 807 else if ( VA_BITS2_NOACCESS == vabits2 ) { 808 *vbits8 = V_BITS8_DEFINED; // Make V bits defined! 809 ok = False; 810 } else { 811 tl_assert( VA_BITS2_PARTDEFINED == vabits2 ); 812 *vbits8 = get_sec_vbits8(a); 813 } 814 return ok; 815 } 816 817 818 /* --------------- Secondary V bit table ------------ */ 819 820 // This table holds the full V bit pattern for partially-defined bytes 821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow 822 // memory. 823 // 824 // Note: the nodes in this table can become stale. Eg. if you write a PDB, 825 // then overwrite the same address with a fully defined byte, the sec-V-bit 826 // node will not necessarily be removed. This is because checking for 827 // whether removal is necessary would slow down the fast paths. 828 // 829 // To avoid the stale nodes building up too much, we periodically (once the 830 // table reaches a certain size) garbage collect (GC) the table by 831 // traversing it and evicting any nodes not having PDB. 832 // If more than a certain proportion of nodes survived, we increase the 833 // table size so that GCs occur less often. 834 // 835 // This policy is designed to avoid bad table bloat in the worst case where 836 // a program creates huge numbers of stale PDBs -- we would get this bloat 837 // if we had no GC -- while handling well the case where a node becomes 838 // stale but shortly afterwards is rewritten with a PDB and so becomes 839 // non-stale again (which happens quite often, eg. in perf/bz2). If we just 840 // remove all stale nodes as soon as possible, we just end up re-adding a 841 // lot of them in later again. The "sufficiently stale" approach avoids 842 // this. (If a program has many live PDBs, performance will just suck, 843 // there's no way around that.) 844 // 845 // Further comments, JRS 14 Feb 2012. It turns out that the policy of 846 // holding on to stale entries for 2 GCs before discarding them can lead 847 // to massive space leaks. So we're changing to an arrangement where 848 // lines are evicted as soon as they are observed to be stale during a 849 // GC. This also has a side benefit of allowing the sufficiently_stale 850 // field to be removed from the SecVBitNode struct, reducing its size by 851 // 8 bytes, which is a substantial space saving considering that the 852 // struct was previously 32 or so bytes, on a 64 bit target. 853 // 854 // In order to try and mitigate the problem that the "sufficiently stale" 855 // heuristic was designed to avoid, the table size is allowed to drift 856 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This 857 // means that nodes will exist in the table longer on average, and hopefully 858 // will be deleted and re-added less frequently. 859 // 860 // The previous scaling up mechanism (now called STEPUP) is retained: 861 // if residency exceeds 50%, the table is scaled up, although by a 862 // factor sqrt(2) rather than 2 as before. This effectively doubles the 863 // frequency of GCs when there are many PDBs at reduces the tendency of 864 // stale PDBs to reside for long periods in the table. 865 866 static OSet* secVBitTable; 867 868 // Stats 869 static ULong sec_vbits_new_nodes = 0; 870 static ULong sec_vbits_updates = 0; 871 872 // This must be a power of two; this is checked in mc_pre_clo_init(). 873 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover 874 // a larger address range) they take more space but we can get multiple 875 // partially-defined bytes in one if they are close to each other, reducing 876 // the number of total nodes. In practice sometimes they are clustered (eg. 877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous 878 // row), but often not. So we choose something intermediate. 879 #define BYTES_PER_SEC_VBIT_NODE 16 880 881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if 882 // more than this many nodes survive a GC. 883 #define STEPUP_SURVIVOR_PROPORTION 0.5 884 #define STEPUP_GROWTH_FACTOR 1.414213562 885 886 // If the above heuristic doesn't apply, then we may make the table 887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than 888 // this many nodes survive a GC, _and_ the total table size does 889 // not exceed a fixed limit. The numbers are somewhat arbitrary, but 890 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5% 891 // effectively although gradually reduces residency and increases time 892 // between GCs for programs with small numbers of PDBs. The 80000 limit 893 // effectively limits the table size to around 2MB for programs with 894 // small numbers of PDBs, whilst giving a reasonably long lifetime to 895 // entries, to try and reduce the costs resulting from deleting and 896 // re-adding of entries. 897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15 898 #define DRIFTUP_GROWTH_FACTOR 1.015 899 #define DRIFTUP_MAX_SIZE 80000 900 901 // We GC the table when it gets this many nodes in it, ie. it's effectively 902 // the table size. It can change. 903 static Int secVBitLimit = 1000; 904 905 // The number of GCs done, used to age sec-V-bit nodes for eviction. 906 // Because it's unsigned, wrapping doesn't matter -- the right answer will 907 // come out anyway. 908 static UInt GCs_done = 0; 909 910 typedef 911 struct { 912 Addr a; 913 UChar vbits8[BYTES_PER_SEC_VBIT_NODE]; 914 } 915 SecVBitNode; 916 917 static OSet* createSecVBitTable(void) 918 { 919 OSet* newSecVBitTable; 920 newSecVBitTable = VG_(OSetGen_Create_With_Pool) 921 ( offsetof(SecVBitNode, a), 922 NULL, // use fast comparisons 923 VG_(malloc), "mc.cSVT.1 (sec VBit table)", 924 VG_(free), 925 1000, 926 sizeof(SecVBitNode)); 927 return newSecVBitTable; 928 } 929 930 static void gcSecVBitTable(void) 931 { 932 OSet* secVBitTable2; 933 SecVBitNode* n; 934 Int i, n_nodes = 0, n_survivors = 0; 935 936 GCs_done++; 937 938 // Create the new table. 939 secVBitTable2 = createSecVBitTable(); 940 941 // Traverse the table, moving fresh nodes into the new table. 942 VG_(OSetGen_ResetIter)(secVBitTable); 943 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) { 944 // Keep node if any of its bytes are non-stale. Using 945 // get_vabits2() for the lookup is not very efficient, but I don't 946 // think it matters. 947 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 948 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) { 949 // Found a non-stale byte, so keep => 950 // Insert a copy of the node into the new table. 951 SecVBitNode* n2 = 952 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode)); 953 *n2 = *n; 954 VG_(OSetGen_Insert)(secVBitTable2, n2); 955 break; 956 } 957 } 958 } 959 960 // Get the before and after sizes. 961 n_nodes = VG_(OSetGen_Size)(secVBitTable); 962 n_survivors = VG_(OSetGen_Size)(secVBitTable2); 963 964 // Destroy the old table, and put the new one in its place. 965 VG_(OSetGen_Destroy)(secVBitTable); 966 secVBitTable = secVBitTable2; 967 968 if (VG_(clo_verbosity) > 1) { 969 Char percbuf[7]; 970 VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf); 971 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n", 972 n_nodes, n_survivors, percbuf); 973 } 974 975 // Increase table size if necessary. 976 if ((Double)n_survivors 977 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) { 978 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR); 979 if (VG_(clo_verbosity) > 1) 980 VG_(message)(Vg_DebugMsg, 981 "memcheck GC: %d new table size (stepup)\n", 982 secVBitLimit); 983 } 984 else 985 if (secVBitLimit < DRIFTUP_MAX_SIZE 986 && (Double)n_survivors 987 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) { 988 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR); 989 if (VG_(clo_verbosity) > 1) 990 VG_(message)(Vg_DebugMsg, 991 "memcheck GC: %d new table size (driftup)\n", 992 secVBitLimit); 993 } 994 } 995 996 static UWord get_sec_vbits8(Addr a) 997 { 998 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 999 Int amod = a % BYTES_PER_SEC_VBIT_NODE; 1000 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 1001 UChar vbits8; 1002 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a); 1003 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 1004 // make it to the secondary V bits table. 1005 vbits8 = n->vbits8[amod]; 1006 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 1007 return vbits8; 1008 } 1009 1010 static void set_sec_vbits8(Addr a, UWord vbits8) 1011 { 1012 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE); 1013 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE; 1014 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned); 1015 // Shouldn't be fully defined or fully undefined -- those cases shouldn't 1016 // make it to the secondary V bits table. 1017 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8); 1018 if (n) { 1019 n->vbits8[amod] = vbits8; // update 1020 sec_vbits_updates++; 1021 } else { 1022 // Do a table GC if necessary. Nb: do this before creating and 1023 // inserting the new node, to avoid erroneously GC'ing the new node. 1024 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) { 1025 gcSecVBitTable(); 1026 } 1027 1028 // New node: assign the specific byte, make the rest invalid (they 1029 // should never be read as-is, but be cautious). 1030 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode)); 1031 n->a = aAligned; 1032 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) { 1033 n->vbits8[i] = V_BITS8_UNDEFINED; 1034 } 1035 n->vbits8[amod] = vbits8; 1036 1037 // Insert the new node. 1038 VG_(OSetGen_Insert)(secVBitTable, n); 1039 sec_vbits_new_nodes++; 1040 1041 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable); 1042 if (n_secVBit_nodes > max_secVBit_nodes) 1043 max_secVBit_nodes = n_secVBit_nodes; 1044 } 1045 } 1046 1047 /* --------------- Endianness helpers --------------- */ 1048 1049 /* Returns the offset in memory of the byteno-th most significant byte 1050 in a wordszB-sized word, given the specified endianness. */ 1051 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian, 1052 UWord byteno ) { 1053 return bigendian ? (wordszB-1-byteno) : byteno; 1054 } 1055 1056 1057 /* --------------- Ignored address ranges --------------- */ 1058 1059 #define M_IGNORE_RANGES 4 1060 1061 typedef 1062 struct { 1063 Int used; 1064 Addr start[M_IGNORE_RANGES]; 1065 Addr end[M_IGNORE_RANGES]; 1066 } 1067 IgnoreRanges; 1068 1069 static IgnoreRanges ignoreRanges; 1070 1071 INLINE Bool MC_(in_ignored_range) ( Addr a ) 1072 { 1073 Int i; 1074 if (LIKELY(ignoreRanges.used == 0)) 1075 return False; 1076 for (i = 0; i < ignoreRanges.used; i++) { 1077 if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i]) 1078 return True; 1079 } 1080 return False; 1081 } 1082 1083 /* Parse two Addr separated by a dash, or fail. */ 1084 1085 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 ) 1086 { 1087 Bool ok = VG_(parse_Addr) (ppc, result1); 1088 if (!ok) 1089 return False; 1090 if (**ppc != '-') 1091 return False; 1092 (*ppc)++; 1093 ok = VG_(parse_Addr) (ppc, result2); 1094 if (!ok) 1095 return False; 1096 return True; 1097 } 1098 1099 /* Parse a set of ranges separated by commas into 'ignoreRanges', or 1100 fail. */ 1101 1102 static Bool parse_ignore_ranges ( UChar* str0 ) 1103 { 1104 Addr start, end; 1105 Bool ok; 1106 UChar* str = str0; 1107 UChar** ppc = &str; 1108 ignoreRanges.used = 0; 1109 while (1) { 1110 ok = parse_range(ppc, &start, &end); 1111 if (!ok) 1112 return False; 1113 if (ignoreRanges.used >= M_IGNORE_RANGES) 1114 return False; 1115 ignoreRanges.start[ignoreRanges.used] = start; 1116 ignoreRanges.end[ignoreRanges.used] = end; 1117 ignoreRanges.used++; 1118 if (**ppc == 0) 1119 return True; 1120 if (**ppc != ',') 1121 return False; 1122 (*ppc)++; 1123 } 1124 /*NOTREACHED*/ 1125 return False; 1126 } 1127 1128 1129 /* --------------- Load/store slow cases. --------------- */ 1130 1131 static 1132 __attribute__((noinline)) 1133 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian ) 1134 { 1135 PROF_EVENT(30, "mc_LOADVn_slow"); 1136 1137 /* ------------ BEGIN semi-fast cases ------------ */ 1138 /* These deal quickly-ish with the common auxiliary primary map 1139 cases on 64-bit platforms. Are merely a speedup hack; can be 1140 omitted without loss of correctness/functionality. Note that in 1141 both cases the "sizeof(void*) == 8" causes these cases to be 1142 folded out by compilers on 32-bit platforms. These are derived 1143 from LOADV64 and LOADV32. 1144 */ 1145 if (LIKELY(sizeof(void*) == 8 1146 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1147 SecMap* sm = get_secmap_for_reading(a); 1148 UWord sm_off16 = SM_OFF_16(a); 1149 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 1150 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) 1151 return V_BITS64_DEFINED; 1152 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) 1153 return V_BITS64_UNDEFINED; 1154 /* else fall into the slow case */ 1155 } 1156 if (LIKELY(sizeof(void*) == 8 1157 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1158 SecMap* sm = get_secmap_for_reading(a); 1159 UWord sm_off = SM_OFF(a); 1160 UWord vabits8 = sm->vabits8[sm_off]; 1161 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) 1162 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED); 1163 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) 1164 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED); 1165 /* else fall into slow case */ 1166 } 1167 /* ------------ END semi-fast cases ------------ */ 1168 1169 ULong vbits64 = V_BITS64_UNDEFINED; /* result */ 1170 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */ 1171 SSizeT szB = nBits / 8; 1172 SSizeT i; /* Must be signed. */ 1173 SizeT n_addrs_bad = 0; 1174 Addr ai; 1175 UChar vbits8; 1176 Bool ok; 1177 1178 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1179 1180 /* Make up a 64-bit result V word, which contains the loaded data 1181 for valid addresses and Defined for invalid addresses. Iterate 1182 over the bytes in the word, from the most significant down to 1183 the least. The vbits to return are calculated into vbits64. 1184 Also compute the pessimising value to be used when 1185 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant 1186 info can be gleaned from pessim64) but is used as a 1187 cross-check. */ 1188 for (i = szB-1; i >= 0; i--) { 1189 PROF_EVENT(31, "mc_LOADVn_slow(loop)"); 1190 ai = a + byte_offset_w(szB, bigendian, i); 1191 ok = get_vbits8(ai, &vbits8); 1192 vbits64 <<= 8; 1193 vbits64 |= vbits8; 1194 if (!ok) n_addrs_bad++; 1195 pessim64 <<= 8; 1196 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED); 1197 } 1198 1199 /* In the common case, all the addresses involved are valid, so we 1200 just return the computed V bits and have done. */ 1201 if (LIKELY(n_addrs_bad == 0)) 1202 return vbits64; 1203 1204 /* If there's no possibility of getting a partial-loads-ok 1205 exemption, report the error and quit. */ 1206 if (!MC_(clo_partial_loads_ok)) { 1207 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1208 return vbits64; 1209 } 1210 1211 /* The partial-loads-ok excemption might apply. Find out if it 1212 does. If so, don't report an addressing error, but do return 1213 Undefined for the bytes that are out of range, so as to avoid 1214 false negatives. If it doesn't apply, just report an addressing 1215 error in the usual way. */ 1216 1217 /* Some code steps along byte strings in aligned word-sized chunks 1218 even when there is only a partially defined word at the end (eg, 1219 optimised strlen). This is allowed by the memory model of 1220 modern machines, since an aligned load cannot span two pages and 1221 thus cannot "partially fault". Despite such behaviour being 1222 declared undefined by ANSI C/C++. 1223 1224 Therefore, a load from a partially-addressible place is allowed 1225 if all of the following hold: 1226 - the command-line flag is set [by default, it isn't] 1227 - it's a word-sized, word-aligned load 1228 - at least one of the addresses in the word *is* valid 1229 1230 Since this suppresses the addressing error, we avoid false 1231 negatives by marking bytes undefined when they come from an 1232 invalid address. 1233 */ 1234 1235 /* "at least one of the addresses is invalid" */ 1236 tl_assert(pessim64 != V_BITS64_DEFINED); 1237 1238 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a) 1239 && n_addrs_bad < VG_WORDSIZE) { 1240 /* Exemption applies. Use the previously computed pessimising 1241 value for vbits64 and return the combined result, but don't 1242 flag an addressing error. The pessimising value is Defined 1243 for valid addresses and Undefined for invalid addresses. */ 1244 /* for assumption that doing bitwise or implements UifU */ 1245 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0); 1246 /* (really need "UifU" here...) 1247 vbits64 UifU= pessim64 (is pessimised by it, iow) */ 1248 vbits64 |= pessim64; 1249 return vbits64; 1250 } 1251 1252 /* Exemption doesn't apply. Flag an addressing error in the normal 1253 way. */ 1254 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); 1255 1256 return vbits64; 1257 } 1258 1259 1260 static 1261 __attribute__((noinline)) 1262 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian ) 1263 { 1264 SizeT szB = nBits / 8; 1265 SizeT i, n_addrs_bad = 0; 1266 UChar vbits8; 1267 Addr ai; 1268 Bool ok; 1269 1270 PROF_EVENT(35, "mc_STOREVn_slow"); 1271 1272 /* ------------ BEGIN semi-fast cases ------------ */ 1273 /* These deal quickly-ish with the common auxiliary primary map 1274 cases on 64-bit platforms. Are merely a speedup hack; can be 1275 omitted without loss of correctness/functionality. Note that in 1276 both cases the "sizeof(void*) == 8" causes these cases to be 1277 folded out by compilers on 32-bit platforms. These are derived 1278 from STOREV64 and STOREV32. 1279 */ 1280 if (LIKELY(sizeof(void*) == 8 1281 && nBits == 64 && VG_IS_8_ALIGNED(a))) { 1282 SecMap* sm = get_secmap_for_reading(a); 1283 UWord sm_off16 = SM_OFF_16(a); 1284 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; 1285 if (LIKELY( !is_distinguished_sm(sm) && 1286 (VA_BITS16_DEFINED == vabits16 || 1287 VA_BITS16_UNDEFINED == vabits16) )) { 1288 /* Handle common case quickly: a is suitably aligned, */ 1289 /* is mapped, and is addressible. */ 1290 // Convert full V-bits in register to compact 2-bit form. 1291 if (LIKELY(V_BITS64_DEFINED == vbytes)) { 1292 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED; 1293 return; 1294 } else if (V_BITS64_UNDEFINED == vbytes) { 1295 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED; 1296 return; 1297 } 1298 /* else fall into the slow case */ 1299 } 1300 /* else fall into the slow case */ 1301 } 1302 if (LIKELY(sizeof(void*) == 8 1303 && nBits == 32 && VG_IS_4_ALIGNED(a))) { 1304 SecMap* sm = get_secmap_for_reading(a); 1305 UWord sm_off = SM_OFF(a); 1306 UWord vabits8 = sm->vabits8[sm_off]; 1307 if (LIKELY( !is_distinguished_sm(sm) && 1308 (VA_BITS8_DEFINED == vabits8 || 1309 VA_BITS8_UNDEFINED == vabits8) )) { 1310 /* Handle common case quickly: a is suitably aligned, */ 1311 /* is mapped, and is addressible. */ 1312 // Convert full V-bits in register to compact 2-bit form. 1313 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) { 1314 sm->vabits8[sm_off] = VA_BITS8_DEFINED; 1315 return; 1316 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) { 1317 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 1318 return; 1319 } 1320 /* else fall into the slow case */ 1321 } 1322 /* else fall into the slow case */ 1323 } 1324 /* ------------ END semi-fast cases ------------ */ 1325 1326 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8); 1327 1328 /* Dump vbytes in memory, iterating from least to most significant 1329 byte. At the same time establish addressibility of the location. */ 1330 for (i = 0; i < szB; i++) { 1331 PROF_EVENT(36, "mc_STOREVn_slow(loop)"); 1332 ai = a + byte_offset_w(szB, bigendian, i); 1333 vbits8 = vbytes & 0xff; 1334 ok = set_vbits8(ai, vbits8); 1335 if (!ok) n_addrs_bad++; 1336 vbytes >>= 8; 1337 } 1338 1339 /* If an address error has happened, report it. */ 1340 if (n_addrs_bad > 0) 1341 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True ); 1342 } 1343 1344 1345 /*------------------------------------------------------------*/ 1346 /*--- Setting permissions over address ranges. ---*/ 1347 /*------------------------------------------------------------*/ 1348 1349 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16, 1350 UWord dsm_num ) 1351 { 1352 UWord sm_off, sm_off16; 1353 UWord vabits2 = vabits16 & 0x3; 1354 SizeT lenA, lenB, len_to_next_secmap; 1355 Addr aNext; 1356 SecMap* sm; 1357 SecMap** sm_ptr; 1358 SecMap* example_dsm; 1359 1360 PROF_EVENT(150, "set_address_range_perms"); 1361 1362 /* Check the V+A bits make sense. */ 1363 tl_assert(VA_BITS16_NOACCESS == vabits16 || 1364 VA_BITS16_UNDEFINED == vabits16 || 1365 VA_BITS16_DEFINED == vabits16); 1366 1367 // This code should never write PDBs; ensure this. (See comment above 1368 // set_vabits2().) 1369 tl_assert(VA_BITS2_PARTDEFINED != vabits2); 1370 1371 if (lenT == 0) 1372 return; 1373 1374 if (lenT > 256 * 1024 * 1024) { 1375 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) { 1376 Char* s = "unknown???"; 1377 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess"; 1378 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined"; 1379 if (vabits16 == VA_BITS16_DEFINED ) s = "defined"; 1380 VG_(message)(Vg_UserMsg, "Warning: set address range perms: " 1381 "large range [0x%lx, 0x%lx) (%s)\n", 1382 a, a + lenT, s); 1383 } 1384 } 1385 1386 #ifndef PERF_FAST_SARP 1387 /*------------------ debug-only case ------------------ */ 1388 { 1389 // Endianness doesn't matter here because all bytes are being set to 1390 // the same value. 1391 // Nb: We don't have to worry about updating the sec-V-bits table 1392 // after these set_vabits2() calls because this code never writes 1393 // VA_BITS2_PARTDEFINED values. 1394 SizeT i; 1395 for (i = 0; i < lenT; i++) { 1396 set_vabits2(a + i, vabits2); 1397 } 1398 return; 1399 } 1400 #endif 1401 1402 /*------------------ standard handling ------------------ */ 1403 1404 /* Get the distinguished secondary that we might want 1405 to use (part of the space-compression scheme). */ 1406 example_dsm = &sm_distinguished[dsm_num]; 1407 1408 // We have to handle ranges covering various combinations of partial and 1409 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case. 1410 // Cases marked with a '*' are common. 1411 // 1412 // TYPE PARTS USED 1413 // ---- ---------- 1414 // * one partial sec-map (p) 1 1415 // - one whole sec-map (P) 2 1416 // 1417 // * two partial sec-maps (pp) 1,3 1418 // - one partial, one whole sec-map (pP) 1,2 1419 // - one whole, one partial sec-map (Pp) 2,3 1420 // - two whole sec-maps (PP) 2,2 1421 // 1422 // * one partial, one whole, one partial (pPp) 1,2,3 1423 // - one partial, two whole (pPP) 1,2,2 1424 // - two whole, one partial (PPp) 2,2,3 1425 // - three whole (PPP) 2,2,2 1426 // 1427 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3 1428 // - one partial, N-1 whole (pP...PP) 1,2...2,2 1429 // - N-1 whole, one partial (PP...Pp) 2,2...2,3 1430 // - N whole (PP...PP) 2,2...2,3 1431 1432 // Break up total length (lenT) into two parts: length in the first 1433 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB. 1434 aNext = start_of_this_sm(a) + SM_SIZE; 1435 len_to_next_secmap = aNext - a; 1436 if ( lenT <= len_to_next_secmap ) { 1437 // Range entirely within one sec-map. Covers almost all cases. 1438 PROF_EVENT(151, "set_address_range_perms-single-secmap"); 1439 lenA = lenT; 1440 lenB = 0; 1441 } else if (is_start_of_sm(a)) { 1442 // Range spans at least one whole sec-map, and starts at the beginning 1443 // of a sec-map; skip to Part 2. 1444 PROF_EVENT(152, "set_address_range_perms-startof-secmap"); 1445 lenA = 0; 1446 lenB = lenT; 1447 goto part2; 1448 } else { 1449 // Range spans two or more sec-maps, first one is partial. 1450 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps"); 1451 lenA = len_to_next_secmap; 1452 lenB = lenT - lenA; 1453 } 1454 1455 //------------------------------------------------------------------------ 1456 // Part 1: Deal with the first sec_map. Most of the time the range will be 1457 // entirely within a sec_map and this part alone will suffice. Also, 1458 // doing it this way lets us avoid repeatedly testing for the crossing of 1459 // a sec-map boundary within these loops. 1460 //------------------------------------------------------------------------ 1461 1462 // If it's distinguished, make it undistinguished if necessary. 1463 sm_ptr = get_secmap_ptr(a); 1464 if (is_distinguished_sm(*sm_ptr)) { 1465 if (*sm_ptr == example_dsm) { 1466 // Sec-map already has the V+A bits that we want, so skip. 1467 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick"); 1468 a = aNext; 1469 lenA = 0; 1470 } else { 1471 PROF_EVENT(155, "set_address_range_perms-dist-sm1"); 1472 *sm_ptr = copy_for_writing(*sm_ptr); 1473 } 1474 } 1475 sm = *sm_ptr; 1476 1477 // 1 byte steps 1478 while (True) { 1479 if (VG_IS_8_ALIGNED(a)) break; 1480 if (lenA < 1) break; 1481 PROF_EVENT(156, "set_address_range_perms-loop1a"); 1482 sm_off = SM_OFF(a); 1483 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1484 a += 1; 1485 lenA -= 1; 1486 } 1487 // 8-aligned, 8 byte steps 1488 while (True) { 1489 if (lenA < 8) break; 1490 PROF_EVENT(157, "set_address_range_perms-loop8a"); 1491 sm_off16 = SM_OFF_16(a); 1492 ((UShort*)(sm->vabits8))[sm_off16] = vabits16; 1493 a += 8; 1494 lenA -= 8; 1495 } 1496 // 1 byte steps 1497 while (True) { 1498 if (lenA < 1) break; 1499 PROF_EVENT(158, "set_address_range_perms-loop1b"); 1500 sm_off = SM_OFF(a); 1501 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1502 a += 1; 1503 lenA -= 1; 1504 } 1505 1506 // We've finished the first sec-map. Is that it? 1507 if (lenB == 0) 1508 return; 1509 1510 //------------------------------------------------------------------------ 1511 // Part 2: Fast-set entire sec-maps at a time. 1512 //------------------------------------------------------------------------ 1513 part2: 1514 // 64KB-aligned, 64KB steps. 1515 // Nb: we can reach here with lenB < SM_SIZE 1516 tl_assert(0 == lenA); 1517 while (True) { 1518 if (lenB < SM_SIZE) break; 1519 tl_assert(is_start_of_sm(a)); 1520 PROF_EVENT(159, "set_address_range_perms-loop64K"); 1521 sm_ptr = get_secmap_ptr(a); 1522 if (!is_distinguished_sm(*sm_ptr)) { 1523 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm"); 1524 // Free the non-distinguished sec-map that we're replacing. This 1525 // case happens moderately often, enough to be worthwhile. 1526 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap)); 1527 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n"); 1528 } 1529 update_SM_counts(*sm_ptr, example_dsm); 1530 // Make the sec-map entry point to the example DSM 1531 *sm_ptr = example_dsm; 1532 lenB -= SM_SIZE; 1533 a += SM_SIZE; 1534 } 1535 1536 // We've finished the whole sec-maps. Is that it? 1537 if (lenB == 0) 1538 return; 1539 1540 //------------------------------------------------------------------------ 1541 // Part 3: Finish off the final partial sec-map, if necessary. 1542 //------------------------------------------------------------------------ 1543 1544 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE); 1545 1546 // If it's distinguished, make it undistinguished if necessary. 1547 sm_ptr = get_secmap_ptr(a); 1548 if (is_distinguished_sm(*sm_ptr)) { 1549 if (*sm_ptr == example_dsm) { 1550 // Sec-map already has the V+A bits that we want, so stop. 1551 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick"); 1552 return; 1553 } else { 1554 PROF_EVENT(162, "set_address_range_perms-dist-sm2"); 1555 *sm_ptr = copy_for_writing(*sm_ptr); 1556 } 1557 } 1558 sm = *sm_ptr; 1559 1560 // 8-aligned, 8 byte steps 1561 while (True) { 1562 if (lenB < 8) break; 1563 PROF_EVENT(163, "set_address_range_perms-loop8b"); 1564 sm_off16 = SM_OFF_16(a); 1565 ((UShort*)(sm->vabits8))[sm_off16] = vabits16; 1566 a += 8; 1567 lenB -= 8; 1568 } 1569 // 1 byte steps 1570 while (True) { 1571 if (lenB < 1) return; 1572 PROF_EVENT(164, "set_address_range_perms-loop1c"); 1573 sm_off = SM_OFF(a); 1574 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) ); 1575 a += 1; 1576 lenB -= 1; 1577 } 1578 } 1579 1580 1581 /* --- Set permissions for arbitrary address ranges --- */ 1582 1583 void MC_(make_mem_noaccess) ( Addr a, SizeT len ) 1584 { 1585 PROF_EVENT(40, "MC_(make_mem_noaccess)"); 1586 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len); 1587 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS ); 1588 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1589 ocache_sarp_Clear_Origins ( a, len ); 1590 } 1591 1592 static void make_mem_undefined ( Addr a, SizeT len ) 1593 { 1594 PROF_EVENT(41, "make_mem_undefined"); 1595 DEBUG("make_mem_undefined(%p, %lu)\n", a, len); 1596 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1597 } 1598 1599 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag ) 1600 { 1601 PROF_EVENT(41, "MC_(make_mem_undefined)"); 1602 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len); 1603 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED ); 1604 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1605 ocache_sarp_Set_Origins ( a, len, otag ); 1606 } 1607 1608 static 1609 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len, 1610 ThreadId tid, UInt okind ) 1611 { 1612 UInt ecu; 1613 ExeContext* here; 1614 /* VG_(record_ExeContext) checks for validity of tid, and asserts 1615 if it is invalid. So no need to do it here. */ 1616 tl_assert(okind <= 3); 1617 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ ); 1618 tl_assert(here); 1619 ecu = VG_(get_ECU_from_ExeContext)(here); 1620 tl_assert(VG_(is_plausible_ECU)(ecu)); 1621 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind ); 1622 } 1623 1624 static 1625 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) { 1626 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN ); 1627 } 1628 1629 1630 void MC_(make_mem_defined) ( Addr a, SizeT len ) 1631 { 1632 PROF_EVENT(42, "MC_(make_mem_defined)"); 1633 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len); 1634 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED ); 1635 if (UNLIKELY( MC_(clo_mc_level) == 3 )) 1636 ocache_sarp_Clear_Origins ( a, len ); 1637 } 1638 1639 /* For each byte in [a,a+len), if the byte is addressable, make it be 1640 defined, but if it isn't addressible, leave it alone. In other 1641 words a version of MC_(make_mem_defined) that doesn't mess with 1642 addressibility. Low-performance implementation. */ 1643 static void make_mem_defined_if_addressable ( Addr a, SizeT len ) 1644 { 1645 SizeT i; 1646 UChar vabits2; 1647 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len); 1648 for (i = 0; i < len; i++) { 1649 vabits2 = get_vabits2( a+i ); 1650 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) { 1651 set_vabits2(a+i, VA_BITS2_DEFINED); 1652 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1653 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1654 } 1655 } 1656 } 1657 } 1658 1659 /* Similarly (needed for mprotect handling ..) */ 1660 static void make_mem_defined_if_noaccess ( Addr a, SizeT len ) 1661 { 1662 SizeT i; 1663 UChar vabits2; 1664 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len); 1665 for (i = 0; i < len; i++) { 1666 vabits2 = get_vabits2( a+i ); 1667 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) { 1668 set_vabits2(a+i, VA_BITS2_DEFINED); 1669 if (UNLIKELY(MC_(clo_mc_level) >= 3)) { 1670 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */ 1671 } 1672 } 1673 } 1674 } 1675 1676 /* --- Block-copy permissions (needed for implementing realloc() and 1677 sys_mremap). --- */ 1678 1679 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len ) 1680 { 1681 SizeT i, j; 1682 UChar vabits2, vabits8; 1683 Bool aligned, nooverlap; 1684 1685 DEBUG("MC_(copy_address_range_state)\n"); 1686 PROF_EVENT(50, "MC_(copy_address_range_state)"); 1687 1688 if (len == 0 || src == dst) 1689 return; 1690 1691 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst); 1692 nooverlap = src+len <= dst || dst+len <= src; 1693 1694 if (nooverlap && aligned) { 1695 1696 /* Vectorised fast case, when no overlap and suitably aligned */ 1697 /* vector loop */ 1698 i = 0; 1699 while (len >= 4) { 1700 vabits8 = get_vabits8_for_aligned_word32( src+i ); 1701 set_vabits8_for_aligned_word32( dst+i, vabits8 ); 1702 if (LIKELY(VA_BITS8_DEFINED == vabits8 1703 || VA_BITS8_UNDEFINED == vabits8 1704 || VA_BITS8_NOACCESS == vabits8)) { 1705 /* do nothing */ 1706 } else { 1707 /* have to copy secondary map info */ 1708 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 )) 1709 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) ); 1710 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 )) 1711 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) ); 1712 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 )) 1713 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) ); 1714 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 )) 1715 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) ); 1716 } 1717 i += 4; 1718 len -= 4; 1719 } 1720 /* fixup loop */ 1721 while (len >= 1) { 1722 vabits2 = get_vabits2( src+i ); 1723 set_vabits2( dst+i, vabits2 ); 1724 if (VA_BITS2_PARTDEFINED == vabits2) { 1725 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 1726 } 1727 i++; 1728 len--; 1729 } 1730 1731 } else { 1732 1733 /* We have to do things the slow way */ 1734 if (src < dst) { 1735 for (i = 0, j = len-1; i < len; i++, j--) { 1736 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)"); 1737 vabits2 = get_vabits2( src+j ); 1738 set_vabits2( dst+j, vabits2 ); 1739 if (VA_BITS2_PARTDEFINED == vabits2) { 1740 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) ); 1741 } 1742 } 1743 } 1744 1745 if (src > dst) { 1746 for (i = 0; i < len; i++) { 1747 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)"); 1748 vabits2 = get_vabits2( src+i ); 1749 set_vabits2( dst+i, vabits2 ); 1750 if (VA_BITS2_PARTDEFINED == vabits2) { 1751 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) ); 1752 } 1753 } 1754 } 1755 } 1756 1757 } 1758 1759 1760 /*------------------------------------------------------------*/ 1761 /*--- Origin tracking stuff - cache basics ---*/ 1762 /*------------------------------------------------------------*/ 1763 1764 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION 1765 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1766 1767 Note that this implementation draws inspiration from the "origin 1768 tracking by value piggybacking" scheme described in "Tracking Bad 1769 Apples: Reporting the Origin of Null and Undefined Value Errors" 1770 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer, 1771 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is 1772 implemented completely differently. 1773 1774 Origin tags and ECUs -- about the shadow values 1775 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1776 1777 This implementation tracks the defining point of all uninitialised 1778 values using so called "origin tags", which are 32-bit integers, 1779 rather than using the values themselves to encode the origins. The 1780 latter, so-called value piggybacking", is what the OOPSLA07 paper 1781 describes. 1782 1783 Origin tags, as tracked by the machinery below, are 32-bit unsigned 1784 ints (UInts), regardless of the machine's word size. Each tag 1785 comprises an upper 30-bit ECU field and a lower 2-bit 1786 'kind' field. The ECU field is a number given out by m_execontext 1787 and has a 1-1 mapping with ExeContext*s. An ECU can be used 1788 directly as an origin tag (otag), but in fact we want to put 1789 additional information 'kind' field to indicate roughly where the 1790 tag came from. This helps print more understandable error messages 1791 for the user -- it has no other purpose. In summary: 1792 1793 * Both ECUs and origin tags are represented as 32-bit words 1794 1795 * m_execontext and the core-tool interface deal purely in ECUs. 1796 They have no knowledge of origin tags - that is a purely 1797 Memcheck-internal matter. 1798 1799 * all valid ECUs have the lowest 2 bits zero and at least 1800 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU)) 1801 1802 * to convert from an ECU to an otag, OR in one of the MC_OKIND_ 1803 constants defined in mc_include.h. 1804 1805 * to convert an otag back to an ECU, AND it with ~3 1806 1807 One important fact is that no valid otag is zero. A zero otag is 1808 used by the implementation to indicate "no origin", which could 1809 mean that either the value is defined, or it is undefined but the 1810 implementation somehow managed to lose the origin. 1811 1812 The ECU used for memory created by malloc etc is derived from the 1813 stack trace at the time the malloc etc happens. This means the 1814 mechanism can show the exact allocation point for heap-created 1815 uninitialised values. 1816 1817 In contrast, it is simply too expensive to create a complete 1818 backtrace for each stack allocation. Therefore we merely use a 1819 depth-1 backtrace for stack allocations, which can be done once at 1820 translation time, rather than N times at run time. The result of 1821 this is that, for stack created uninitialised values, Memcheck can 1822 only show the allocating function, and not what called it. 1823 Furthermore, compilers tend to move the stack pointer just once at 1824 the start of the function, to allocate all locals, and so in fact 1825 the stack origin almost always simply points to the opening brace 1826 of the function. Net result is, for stack origins, the mechanism 1827 can tell you in which function the undefined value was created, but 1828 that's all. Users will need to carefully check all locals in the 1829 specified function. 1830 1831 Shadowing registers and memory 1832 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1833 1834 Memory is shadowed using a two level cache structure (ocacheL1 and 1835 ocacheL2). Memory references are first directed to ocacheL1. This 1836 is a traditional 2-way set associative cache with 32-byte lines and 1837 approximate LRU replacement within each set. 1838 1839 A naive implementation would require storing one 32 bit otag for 1840 each byte of memory covered, a 4:1 space overhead. Instead, there 1841 is one otag for every 4 bytes of memory covered, plus a 4-bit mask 1842 that shows which of the 4 bytes have that shadow value and which 1843 have a shadow value of zero (indicating no origin). Hence a lot of 1844 space is saved, but the cost is that only one different origin per 1845 4 bytes of address space can be represented. This is a source of 1846 imprecision, but how much of a problem it really is remains to be 1847 seen. 1848 1849 A cache line that contains all zeroes ("no origins") contains no 1850 useful information, and can be ejected from the L1 cache "for 1851 free", in the sense that a read miss on the L1 causes a line of 1852 zeroes to be installed. However, ejecting a line containing 1853 nonzeroes risks losing origin information permanently. In order to 1854 prevent such lossage, ejected nonzero lines are placed in a 1855 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache 1856 lines. This can grow arbitrarily large, and so should ensure that 1857 Memcheck runs out of memory in preference to losing useful origin 1858 info due to cache size limitations. 1859 1860 Shadowing registers is a bit tricky, because the shadow values are 1861 32 bits, regardless of the size of the register. That gives a 1862 problem for registers smaller than 32 bits. The solution is to 1863 find spaces in the guest state that are unused, and use those to 1864 shadow guest state fragments smaller than 32 bits. For example, on 1865 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the 1866 shadow are allocated for the register's otag, then there are still 1867 12 bytes left over which could be used to shadow 3 other values. 1868 1869 This implies there is some non-obvious mapping from guest state 1870 (start,length) pairs to the relevant shadow offset (for the origin 1871 tags). And it is unfortunately guest-architecture specific. The 1872 mapping is contained in mc_machine.c, which is quite lengthy but 1873 straightforward. 1874 1875 Instrumenting the IR 1876 ~~~~~~~~~~~~~~~~~~~~ 1877 1878 Instrumentation is largely straightforward, and done by the 1879 functions schemeE and schemeS in mc_translate.c. These generate 1880 code for handling the origin tags of expressions (E) and statements 1881 (S) respectively. The rather strange names are a reference to the 1882 "compilation schemes" shown in Simon Peyton Jones' book "The 1883 Implementation of Functional Programming Languages" (Prentice Hall, 1884 1987, see 1885 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm). 1886 1887 schemeS merely arranges to move shadow values around the guest 1888 state to track the incoming IR. schemeE is largely trivial too. 1889 The only significant point is how to compute the otag corresponding 1890 to binary (or ternary, quaternary, etc) operator applications. The 1891 rule is simple: just take whichever value is larger (32-bit 1892 unsigned max). Constants get the special value zero. Hence this 1893 rule always propagates a nonzero (known) otag in preference to a 1894 zero (unknown, or more likely, value-is-defined) tag, as we want. 1895 If two different undefined values are inputs to a binary operator 1896 application, then which is propagated is arbitrary, but that 1897 doesn't matter, since the program is erroneous in using either of 1898 the values, and so there's no point in attempting to propagate 1899 both. 1900 1901 Since constants are abstracted to (otag) zero, much of the 1902 instrumentation code can be folded out without difficulty by the 1903 generic post-instrumentation IR cleanup pass, using these rules: 1904 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are 1905 constants is evaluated at JIT time. And the resulting dead code 1906 removal. In practice this causes surprisingly few Max32Us to 1907 survive through to backend code generation. 1908 1909 Integration with the V-bits machinery 1910 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1911 1912 This is again largely straightforward. Mostly the otag and V bits 1913 stuff are independent. The only point of interaction is when the V 1914 bits instrumenter creates a call to a helper function to report an 1915 uninitialised value error -- in that case it must first use schemeE 1916 to get hold of the origin tag expression for the value, and pass 1917 that to the helper too. 1918 1919 There is the usual stuff to do with setting address range 1920 permissions. When memory is painted undefined, we must also know 1921 the origin tag to paint with, which involves some tedious plumbing, 1922 particularly to do with the fast case stack handlers. When memory 1923 is painted defined or noaccess then the origin tags must be forced 1924 to zero. 1925 1926 One of the goals of the implementation was to ensure that the 1927 non-origin tracking mode isn't slowed down at all. To do this, 1928 various functions to do with memory permissions setting (again, 1929 mostly pertaining to the stack) are duplicated for the with- and 1930 without-otag case. 1931 1932 Dealing with stack redzones, and the NIA cache 1933 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1934 1935 This is one of the few non-obvious parts of the implementation. 1936 1937 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small 1938 reserved area below the stack pointer, that can be used as scratch 1939 space by compiler generated code for functions. In the Memcheck 1940 sources this is referred to as the "stack redzone". The important 1941 thing here is that such redzones are considered volatile across 1942 function calls and returns. So Memcheck takes care to mark them as 1943 undefined for each call and return, on the afflicted platforms. 1944 Past experience shows this is essential in order to get reliable 1945 messages about uninitialised values that come from the stack. 1946 1947 So the question is, when we paint a redzone undefined, what origin 1948 tag should we use for it? Consider a function f() calling g(). If 1949 we paint the redzone using an otag derived from the ExeContext of 1950 the CALL/BL instruction in f, then any errors in g causing it to 1951 use uninitialised values that happen to lie in the redzone, will be 1952 reported as having their origin in f. Which is highly confusing. 1953 1954 The same applies for returns: if, on a return, we paint the redzone 1955 using a origin tag derived from the ExeContext of the RET/BLR 1956 instruction in g, then any later errors in f causing it to use 1957 uninitialised values in the redzone, will be reported as having 1958 their origin in g. Which is just as confusing. 1959 1960 To do it right, in both cases we need to use an origin tag which 1961 pertains to the instruction which dynamically follows the CALL/BL 1962 or RET/BLR. In short, one derived from the NIA - the "next 1963 instruction address". 1964 1965 To make this work, Memcheck's redzone-painting helper, 1966 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the 1967 NIA. It converts the NIA to a 1-element ExeContext, and uses that 1968 ExeContext's ECU as the basis for the otag used to paint the 1969 redzone. The expensive part of this is converting an NIA into an 1970 ECU, since this happens once for every call and every return. So 1971 we use a simple 511-line, 2-way set associative cache 1972 (nia_to_ecu_cache) to cache the mappings, and that knocks most of 1973 the cost out. 1974 1975 Further background comments 1976 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1977 1978 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't 1979 > it really just the address of the relevant ExeContext? 1980 1981 Well, it's not the address, but a value which has a 1-1 mapping 1982 with ExeContexts, and is guaranteed not to be zero, since zero 1983 denotes (to memcheck) "unknown origin or defined value". So these 1984 UInts are just numbers starting at 4 and incrementing by 4; each 1985 ExeContext is given a number when it is created. (*** NOTE this 1986 confuses otags and ECUs; see comments above ***). 1987 1988 Making these otags 32-bit regardless of the machine's word size 1989 makes the 64-bit implementation easier (next para). And it doesn't 1990 really limit us in any way, since for the tags to overflow would 1991 require that the program somehow caused 2^30-1 different 1992 ExeContexts to be created, in which case it is probably in deep 1993 trouble. Not to mention V will have soaked up many tens of 1994 gigabytes of memory merely to store them all. 1995 1996 So having 64-bit origins doesn't really buy you anything, and has 1997 the following downsides: 1998 1999 Suppose that instead, an otag is a UWord. This would mean that, on 2000 a 64-bit target, 2001 2002 1. It becomes hard to shadow any element of guest state which is 2003 smaller than 8 bytes. To do so means you'd need to find some 2004 8-byte-sized hole in the guest state which you don't want to 2005 shadow, and use that instead to hold the otag. On ppc64, the 2006 condition code register(s) are split into 20 UChar sized pieces, 2007 all of which need to be tracked (guest_XER_SO .. guest_CR7_0) 2008 and so that would entail finding 160 bytes somewhere else in the 2009 guest state. 2010 2011 Even on x86, I want to track origins for %AH .. %DH (bits 15:8 2012 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of 2013 same) and so I had to look for 4 untracked otag-sized areas in 2014 the guest state to make that possible. 2015 2016 The same problem exists of course when origin tags are only 32 2017 bits, but it's less extreme. 2018 2019 2. (More compelling) it doubles the size of the origin shadow 2020 memory. Given that the shadow memory is organised as a fixed 2021 size cache, and that accuracy of tracking is limited by origins 2022 falling out the cache due to space conflicts, this isn't good. 2023 2024 > Another question: is the origin tracking perfect, or are there 2025 > cases where it fails to determine an origin? 2026 2027 It is imperfect for at least for the following reasons, and 2028 probably more: 2029 2030 * Insufficient capacity in the origin cache. When a line is 2031 evicted from the cache it is gone forever, and so subsequent 2032 queries for the line produce zero, indicating no origin 2033 information. Interestingly, a line containing all zeroes can be 2034 evicted "free" from the cache, since it contains no useful 2035 information, so there is scope perhaps for some cleverer cache 2036 management schemes. (*** NOTE, with the introduction of the 2037 second level origin tag cache, ocacheL2, this is no longer a 2038 problem. ***) 2039 2040 * The origin cache only stores one otag per 32-bits of address 2041 space, plus 4 bits indicating which of the 4 bytes has that tag 2042 and which are considered defined. The result is that if two 2043 undefined bytes in the same word are stored in memory, the first 2044 stored byte's origin will be lost and replaced by the origin for 2045 the second byte. 2046 2047 * Nonzero origin tags for defined values. Consider a binary 2048 operator application op(x,y). Suppose y is undefined (and so has 2049 a valid nonzero origin tag), and x is defined, but erroneously 2050 has a nonzero origin tag (defined values should have tag zero). 2051 If the erroneous tag has a numeric value greater than y's tag, 2052 then the rule for propagating origin tags though binary 2053 operations, which is simply to take the unsigned max of the two 2054 tags, will erroneously propagate x's tag rather than y's. 2055 2056 * Some obscure uses of x86/amd64 byte registers can cause lossage 2057 or confusion of origins. %AH .. %DH are treated as different 2058 from, and unrelated to, their parent registers, %EAX .. %EDX. 2059 So some wierd sequences like 2060 2061 movb undefined-value, %AH 2062 movb defined-value, %AL 2063 .. use %AX or %EAX .. 2064 2065 will cause the origin attributed to %AH to be ignored, since %AL, 2066 %AX, %EAX are treated as the same register, and %AH as a 2067 completely separate one. 2068 2069 But having said all that, it actually seems to work fairly well in 2070 practice. 2071 */ 2072 2073 static UWord stats_ocacheL1_find = 0; 2074 static UWord stats_ocacheL1_found_at_1 = 0; 2075 static UWord stats_ocacheL1_found_at_N = 0; 2076 static UWord stats_ocacheL1_misses = 0; 2077 static UWord stats_ocacheL1_lossage = 0; 2078 static UWord stats_ocacheL1_movefwds = 0; 2079 2080 static UWord stats__ocacheL2_refs = 0; 2081 static UWord stats__ocacheL2_misses = 0; 2082 static UWord stats__ocacheL2_n_nodes_max = 0; 2083 2084 /* Cache of 32-bit values, one every 32 bits of address space */ 2085 2086 #define OC_BITS_PER_LINE 5 2087 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2)) 2088 2089 static INLINE UWord oc_line_offset ( Addr a ) { 2090 return (a >> 2) & (OC_W32S_PER_LINE - 1); 2091 } 2092 static INLINE Bool is_valid_oc_tag ( Addr tag ) { 2093 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1)); 2094 } 2095 2096 #define OC_LINES_PER_SET 2 2097 2098 #define OC_N_SET_BITS 20 2099 #define OC_N_SETS (1 << OC_N_SET_BITS) 2100 2101 /* These settings give: 2102 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful 2103 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful 2104 */ 2105 2106 #define OC_MOVE_FORWARDS_EVERY_BITS 7 2107 2108 2109 typedef 2110 struct { 2111 Addr tag; 2112 UInt w32[OC_W32S_PER_LINE]; 2113 UChar descr[OC_W32S_PER_LINE]; 2114 } 2115 OCacheLine; 2116 2117 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not 2118 in use, 'n' (nonzero) if it contains at least one valid origin tag, 2119 and 'z' if all the represented tags are zero. */ 2120 static UChar classify_OCacheLine ( OCacheLine* line ) 2121 { 2122 UWord i; 2123 if (line->tag == 1/*invalid*/) 2124 return 'e'; /* EMPTY */ 2125 tl_assert(is_valid_oc_tag(line->tag)); 2126 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2127 tl_assert(0 == ((~0xF) & line->descr[i])); 2128 if (line->w32[i] > 0 && line->descr[i] > 0) 2129 return 'n'; /* NONZERO - contains useful info */ 2130 } 2131 return 'z'; /* ZERO - no useful info */ 2132 } 2133 2134 typedef 2135 struct { 2136 OCacheLine line[OC_LINES_PER_SET]; 2137 } 2138 OCacheSet; 2139 2140 typedef 2141 struct { 2142 OCacheSet set[OC_N_SETS]; 2143 } 2144 OCache; 2145 2146 static OCache* ocacheL1 = NULL; 2147 static UWord ocacheL1_event_ctr = 0; 2148 2149 static void init_ocacheL2 ( void ); /* fwds */ 2150 static void init_OCache ( void ) 2151 { 2152 UWord line, set; 2153 tl_assert(MC_(clo_mc_level) >= 3); 2154 tl_assert(ocacheL1 == NULL); 2155 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache)); 2156 if (ocacheL1 == NULL) { 2157 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1", 2158 sizeof(OCache) ); 2159 } 2160 tl_assert(ocacheL1 != NULL); 2161 for (set = 0; set < OC_N_SETS; set++) { 2162 for (line = 0; line < OC_LINES_PER_SET; line++) { 2163 ocacheL1->set[set].line[line].tag = 1/*invalid*/; 2164 } 2165 } 2166 init_ocacheL2(); 2167 } 2168 2169 static void moveLineForwards ( OCacheSet* set, UWord lineno ) 2170 { 2171 OCacheLine tmp; 2172 stats_ocacheL1_movefwds++; 2173 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET); 2174 tmp = set->line[lineno-1]; 2175 set->line[lineno-1] = set->line[lineno]; 2176 set->line[lineno] = tmp; 2177 } 2178 2179 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) { 2180 UWord i; 2181 for (i = 0; i < OC_W32S_PER_LINE; i++) { 2182 line->w32[i] = 0; /* NO ORIGIN */ 2183 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */ 2184 } 2185 line->tag = tag; 2186 } 2187 2188 ////////////////////////////////////////////////////////////// 2189 //// OCache backing store 2190 2191 static OSet* ocacheL2 = NULL; 2192 2193 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) { 2194 return VG_(malloc)(cc, szB); 2195 } 2196 static void ocacheL2_free ( void* v ) { 2197 VG_(free)( v ); 2198 } 2199 2200 /* Stats: # nodes currently in tree */ 2201 static UWord stats__ocacheL2_n_nodes = 0; 2202 2203 static void init_ocacheL2 ( void ) 2204 { 2205 tl_assert(!ocacheL2); 2206 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */ 2207 tl_assert(0 == offsetof(OCacheLine,tag)); 2208 ocacheL2 2209 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag), 2210 NULL, /* fast cmp */ 2211 ocacheL2_malloc, "mc.ioL2", ocacheL2_free); 2212 tl_assert(ocacheL2); 2213 stats__ocacheL2_n_nodes = 0; 2214 } 2215 2216 /* Find line with the given tag in the tree, or NULL if not found. */ 2217 static OCacheLine* ocacheL2_find_tag ( Addr tag ) 2218 { 2219 OCacheLine* line; 2220 tl_assert(is_valid_oc_tag(tag)); 2221 stats__ocacheL2_refs++; 2222 line = VG_(OSetGen_Lookup)( ocacheL2, &tag ); 2223 return line; 2224 } 2225 2226 /* Delete the line with the given tag from the tree, if it is present, and 2227 free up the associated memory. */ 2228 static void ocacheL2_del_tag ( Addr tag ) 2229 { 2230 OCacheLine* line; 2231 tl_assert(is_valid_oc_tag(tag)); 2232 stats__ocacheL2_refs++; 2233 line = VG_(OSetGen_Remove)( ocacheL2, &tag ); 2234 if (line) { 2235 VG_(OSetGen_FreeNode)(ocacheL2, line); 2236 tl_assert(stats__ocacheL2_n_nodes > 0); 2237 stats__ocacheL2_n_nodes--; 2238 } 2239 } 2240 2241 /* Add a copy of the given line to the tree. It must not already be 2242 present. */ 2243 static void ocacheL2_add_line ( OCacheLine* line ) 2244 { 2245 OCacheLine* copy; 2246 tl_assert(is_valid_oc_tag(line->tag)); 2247 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) ); 2248 tl_assert(copy); 2249 *copy = *line; 2250 stats__ocacheL2_refs++; 2251 VG_(OSetGen_Insert)( ocacheL2, copy ); 2252 stats__ocacheL2_n_nodes++; 2253 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max) 2254 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes; 2255 } 2256 2257 //// 2258 ////////////////////////////////////////////////////////////// 2259 2260 __attribute__((noinline)) 2261 static OCacheLine* find_OCacheLine_SLOW ( Addr a ) 2262 { 2263 OCacheLine *victim, *inL2; 2264 UChar c; 2265 UWord line; 2266 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2267 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2268 UWord tag = a & tagmask; 2269 tl_assert(setno >= 0 && setno < OC_N_SETS); 2270 2271 /* we already tried line == 0; skip therefore. */ 2272 for (line = 1; line < OC_LINES_PER_SET; line++) { 2273 if (ocacheL1->set[setno].line[line].tag == tag) { 2274 if (line == 1) { 2275 stats_ocacheL1_found_at_1++; 2276 } else { 2277 stats_ocacheL1_found_at_N++; 2278 } 2279 if (UNLIKELY(0 == (ocacheL1_event_ctr++ 2280 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) { 2281 moveLineForwards( &ocacheL1->set[setno], line ); 2282 line--; 2283 } 2284 return &ocacheL1->set[setno].line[line]; 2285 } 2286 } 2287 2288 /* A miss. Use the last slot. Implicitly this means we're 2289 ejecting the line in the last slot. */ 2290 stats_ocacheL1_misses++; 2291 tl_assert(line == OC_LINES_PER_SET); 2292 line--; 2293 tl_assert(line > 0); 2294 2295 /* First, move the to-be-ejected line to the L2 cache. */ 2296 victim = &ocacheL1->set[setno].line[line]; 2297 c = classify_OCacheLine(victim); 2298 switch (c) { 2299 case 'e': 2300 /* the line is empty (has invalid tag); ignore it. */ 2301 break; 2302 case 'z': 2303 /* line contains zeroes. We must ensure the backing store is 2304 updated accordingly, either by copying the line there 2305 verbatim, or by ensuring it isn't present there. We 2306 chosse the latter on the basis that it reduces the size of 2307 the backing store. */ 2308 ocacheL2_del_tag( victim->tag ); 2309 break; 2310 case 'n': 2311 /* line contains at least one real, useful origin. Copy it 2312 to the backing store. */ 2313 stats_ocacheL1_lossage++; 2314 inL2 = ocacheL2_find_tag( victim->tag ); 2315 if (inL2) { 2316 *inL2 = *victim; 2317 } else { 2318 ocacheL2_add_line( victim ); 2319 } 2320 break; 2321 default: 2322 tl_assert(0); 2323 } 2324 2325 /* Now we must reload the L1 cache from the backing tree, if 2326 possible. */ 2327 tl_assert(tag != victim->tag); /* stay sane */ 2328 inL2 = ocacheL2_find_tag( tag ); 2329 if (inL2) { 2330 /* We're in luck. It's in the L2. */ 2331 ocacheL1->set[setno].line[line] = *inL2; 2332 } else { 2333 /* Missed at both levels of the cache hierarchy. We have to 2334 declare it as full of zeroes (unknown origins). */ 2335 stats__ocacheL2_misses++; 2336 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag ); 2337 } 2338 2339 /* Move it one forwards */ 2340 moveLineForwards( &ocacheL1->set[setno], line ); 2341 line--; 2342 2343 return &ocacheL1->set[setno].line[line]; 2344 } 2345 2346 static INLINE OCacheLine* find_OCacheLine ( Addr a ) 2347 { 2348 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1); 2349 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1); 2350 UWord tag = a & tagmask; 2351 2352 stats_ocacheL1_find++; 2353 2354 if (OC_ENABLE_ASSERTIONS) { 2355 tl_assert(setno >= 0 && setno < OC_N_SETS); 2356 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1))); 2357 } 2358 2359 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) { 2360 return &ocacheL1->set[setno].line[0]; 2361 } 2362 2363 return find_OCacheLine_SLOW( a ); 2364 } 2365 2366 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag ) 2367 { 2368 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2369 //// Set the origins for a+0 .. a+7 2370 { OCacheLine* line; 2371 UWord lineoff = oc_line_offset(a); 2372 if (OC_ENABLE_ASSERTIONS) { 2373 tl_assert(lineoff >= 0 2374 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2375 } 2376 line = find_OCacheLine( a ); 2377 line->descr[lineoff+0] = 0xF; 2378 line->descr[lineoff+1] = 0xF; 2379 line->w32[lineoff+0] = otag; 2380 line->w32[lineoff+1] = otag; 2381 } 2382 //// END inlined, specialised version of MC_(helperc_b_store8) 2383 } 2384 2385 2386 /*------------------------------------------------------------*/ 2387 /*--- Aligned fast case permission setters, ---*/ 2388 /*--- for dealing with stacks ---*/ 2389 /*------------------------------------------------------------*/ 2390 2391 /*--------------------- 32-bit ---------------------*/ 2392 2393 /* Nb: by "aligned" here we mean 4-byte aligned */ 2394 2395 static INLINE void make_aligned_word32_undefined ( Addr a ) 2396 { 2397 PROF_EVENT(300, "make_aligned_word32_undefined"); 2398 2399 #ifndef PERF_FAST_STACK2 2400 make_mem_undefined(a, 4); 2401 #else 2402 { 2403 UWord sm_off; 2404 SecMap* sm; 2405 2406 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2407 PROF_EVENT(301, "make_aligned_word32_undefined-slow1"); 2408 make_mem_undefined(a, 4); 2409 return; 2410 } 2411 2412 sm = get_secmap_for_writing_low(a); 2413 sm_off = SM_OFF(a); 2414 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED; 2415 } 2416 #endif 2417 } 2418 2419 static INLINE 2420 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag ) 2421 { 2422 make_aligned_word32_undefined(a); 2423 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2424 //// Set the origins for a+0 .. a+3 2425 { OCacheLine* line; 2426 UWord lineoff = oc_line_offset(a); 2427 if (OC_ENABLE_ASSERTIONS) { 2428 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2429 } 2430 line = find_OCacheLine( a ); 2431 line->descr[lineoff] = 0xF; 2432 line->w32[lineoff] = otag; 2433 } 2434 //// END inlined, specialised version of MC_(helperc_b_store4) 2435 } 2436 2437 static INLINE 2438 void make_aligned_word32_noaccess ( Addr a ) 2439 { 2440 PROF_EVENT(310, "make_aligned_word32_noaccess"); 2441 2442 #ifndef PERF_FAST_STACK2 2443 MC_(make_mem_noaccess)(a, 4); 2444 #else 2445 { 2446 UWord sm_off; 2447 SecMap* sm; 2448 2449 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2450 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1"); 2451 MC_(make_mem_noaccess)(a, 4); 2452 return; 2453 } 2454 2455 sm = get_secmap_for_writing_low(a); 2456 sm_off = SM_OFF(a); 2457 sm->vabits8[sm_off] = VA_BITS8_NOACCESS; 2458 2459 //// BEGIN inlined, specialised version of MC_(helperc_b_store4) 2460 //// Set the origins for a+0 .. a+3. 2461 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2462 OCacheLine* line; 2463 UWord lineoff = oc_line_offset(a); 2464 if (OC_ENABLE_ASSERTIONS) { 2465 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); 2466 } 2467 line = find_OCacheLine( a ); 2468 line->descr[lineoff] = 0; 2469 } 2470 //// END inlined, specialised version of MC_(helperc_b_store4) 2471 } 2472 #endif 2473 } 2474 2475 /*--------------------- 64-bit ---------------------*/ 2476 2477 /* Nb: by "aligned" here we mean 8-byte aligned */ 2478 2479 static INLINE void make_aligned_word64_undefined ( Addr a ) 2480 { 2481 PROF_EVENT(320, "make_aligned_word64_undefined"); 2482 2483 #ifndef PERF_FAST_STACK2 2484 make_mem_undefined(a, 8); 2485 #else 2486 { 2487 UWord sm_off16; 2488 SecMap* sm; 2489 2490 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2491 PROF_EVENT(321, "make_aligned_word64_undefined-slow1"); 2492 make_mem_undefined(a, 8); 2493 return; 2494 } 2495 2496 sm = get_secmap_for_writing_low(a); 2497 sm_off16 = SM_OFF_16(a); 2498 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED; 2499 } 2500 #endif 2501 } 2502 2503 static INLINE 2504 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag ) 2505 { 2506 make_aligned_word64_undefined(a); 2507 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2508 //// Set the origins for a+0 .. a+7 2509 { OCacheLine* line; 2510 UWord lineoff = oc_line_offset(a); 2511 tl_assert(lineoff >= 0 2512 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2513 line = find_OCacheLine( a ); 2514 line->descr[lineoff+0] = 0xF; 2515 line->descr[lineoff+1] = 0xF; 2516 line->w32[lineoff+0] = otag; 2517 line->w32[lineoff+1] = otag; 2518 } 2519 //// END inlined, specialised version of MC_(helperc_b_store8) 2520 } 2521 2522 static INLINE 2523 void make_aligned_word64_noaccess ( Addr a ) 2524 { 2525 PROF_EVENT(330, "make_aligned_word64_noaccess"); 2526 2527 #ifndef PERF_FAST_STACK2 2528 MC_(make_mem_noaccess)(a, 8); 2529 #else 2530 { 2531 UWord sm_off16; 2532 SecMap* sm; 2533 2534 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) { 2535 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1"); 2536 MC_(make_mem_noaccess)(a, 8); 2537 return; 2538 } 2539 2540 sm = get_secmap_for_writing_low(a); 2541 sm_off16 = SM_OFF_16(a); 2542 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS; 2543 2544 //// BEGIN inlined, specialised version of MC_(helperc_b_store8) 2545 //// Clear the origins for a+0 .. a+7. 2546 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 2547 OCacheLine* line; 2548 UWord lineoff = oc_line_offset(a); 2549 tl_assert(lineoff >= 0 2550 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); 2551 line = find_OCacheLine( a ); 2552 line->descr[lineoff+0] = 0; 2553 line->descr[lineoff+1] = 0; 2554 } 2555 //// END inlined, specialised version of MC_(helperc_b_store8) 2556 } 2557 #endif 2558 } 2559 2560 2561 /*------------------------------------------------------------*/ 2562 /*--- Stack pointer adjustment ---*/ 2563 /*------------------------------------------------------------*/ 2564 2565 #ifdef PERF_FAST_STACK 2566 # define MAYBE_USED 2567 #else 2568 # define MAYBE_USED __attribute__((unused)) 2569 #endif 2570 2571 /*--------------- adjustment by 4 bytes ---------------*/ 2572 2573 MAYBE_USED 2574 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu) 2575 { 2576 UInt otag = ecu | MC_OKIND_STACK; 2577 PROF_EVENT(110, "new_mem_stack_4"); 2578 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2579 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2580 } else { 2581 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag ); 2582 } 2583 } 2584 2585 MAYBE_USED 2586 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP) 2587 { 2588 PROF_EVENT(110, "new_mem_stack_4"); 2589 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2590 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2591 } else { 2592 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 ); 2593 } 2594 } 2595 2596 MAYBE_USED 2597 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP) 2598 { 2599 PROF_EVENT(120, "die_mem_stack_4"); 2600 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2601 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2602 } else { 2603 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 ); 2604 } 2605 } 2606 2607 /*--------------- adjustment by 8 bytes ---------------*/ 2608 2609 MAYBE_USED 2610 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu) 2611 { 2612 UInt otag = ecu | MC_OKIND_STACK; 2613 PROF_EVENT(111, "new_mem_stack_8"); 2614 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2615 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 2616 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2617 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2618 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2619 } else { 2620 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag ); 2621 } 2622 } 2623 2624 MAYBE_USED 2625 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP) 2626 { 2627 PROF_EVENT(111, "new_mem_stack_8"); 2628 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2629 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2630 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2631 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2632 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2633 } else { 2634 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 ); 2635 } 2636 } 2637 2638 MAYBE_USED 2639 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP) 2640 { 2641 PROF_EVENT(121, "die_mem_stack_8"); 2642 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2643 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2644 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2645 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2646 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2647 } else { 2648 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 ); 2649 } 2650 } 2651 2652 /*--------------- adjustment by 12 bytes ---------------*/ 2653 2654 MAYBE_USED 2655 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu) 2656 { 2657 UInt otag = ecu | MC_OKIND_STACK; 2658 PROF_EVENT(112, "new_mem_stack_12"); 2659 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2660 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2661 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2662 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2663 /* from previous test we don't have 8-alignment at offset +0, 2664 hence must have 8 alignment at offsets +4/-4. Hence safe to 2665 do 4 at +0 and then 8 at +4/. */ 2666 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2667 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag ); 2668 } else { 2669 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag ); 2670 } 2671 } 2672 2673 MAYBE_USED 2674 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP) 2675 { 2676 PROF_EVENT(112, "new_mem_stack_12"); 2677 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2678 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2679 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2680 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2681 /* from previous test we don't have 8-alignment at offset +0, 2682 hence must have 8 alignment at offsets +4/-4. Hence safe to 2683 do 4 at +0 and then 8 at +4/. */ 2684 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2685 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2686 } else { 2687 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 ); 2688 } 2689 } 2690 2691 MAYBE_USED 2692 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP) 2693 { 2694 PROF_EVENT(122, "die_mem_stack_12"); 2695 /* Note the -12 in the test */ 2696 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) { 2697 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at 2698 -4. */ 2699 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2700 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2701 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2702 /* We have 4-alignment at +0, but we don't have 8-alignment at 2703 -12. So we must have 8-alignment at -8. Hence do 4 at -12 2704 and then 8 at -8. */ 2705 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2706 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2707 } else { 2708 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 ); 2709 } 2710 } 2711 2712 /*--------------- adjustment by 16 bytes ---------------*/ 2713 2714 MAYBE_USED 2715 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu) 2716 { 2717 UInt otag = ecu | MC_OKIND_STACK; 2718 PROF_EVENT(113, "new_mem_stack_16"); 2719 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2720 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 2721 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2722 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 2723 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2724 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 2725 Hence do 4 at +0, 8 at +4, 4 at +12. */ 2726 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2727 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 2728 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 2729 } else { 2730 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag ); 2731 } 2732 } 2733 2734 MAYBE_USED 2735 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP) 2736 { 2737 PROF_EVENT(113, "new_mem_stack_16"); 2738 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2739 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */ 2740 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2741 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2742 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2743 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4. 2744 Hence do 4 at +0, 8 at +4, 4 at +12. */ 2745 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2746 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2747 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 2748 } else { 2749 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 ); 2750 } 2751 } 2752 2753 MAYBE_USED 2754 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP) 2755 { 2756 PROF_EVENT(123, "die_mem_stack_16"); 2757 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2758 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */ 2759 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2760 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 ); 2761 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2762 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */ 2763 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2764 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2765 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2766 } else { 2767 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 ); 2768 } 2769 } 2770 2771 /*--------------- adjustment by 32 bytes ---------------*/ 2772 2773 MAYBE_USED 2774 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu) 2775 { 2776 UInt otag = ecu | MC_OKIND_STACK; 2777 PROF_EVENT(114, "new_mem_stack_32"); 2778 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2779 /* Straightforward */ 2780 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2781 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2782 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2783 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2784 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2785 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 2786 +0,+28. */ 2787 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2788 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag ); 2789 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag ); 2790 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag ); 2791 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag ); 2792 } else { 2793 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag ); 2794 } 2795 } 2796 2797 MAYBE_USED 2798 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP) 2799 { 2800 PROF_EVENT(114, "new_mem_stack_32"); 2801 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2802 /* Straightforward */ 2803 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2804 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2805 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2806 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2807 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2808 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at 2809 +0,+28. */ 2810 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2811 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 ); 2812 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 ); 2813 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 ); 2814 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 ); 2815 } else { 2816 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 ); 2817 } 2818 } 2819 2820 MAYBE_USED 2821 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP) 2822 { 2823 PROF_EVENT(124, "die_mem_stack_32"); 2824 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2825 /* Straightforward */ 2826 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2827 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2828 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2829 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2830 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2831 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and 2832 4 at -32,-4. */ 2833 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2834 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 ); 2835 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 ); 2836 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 ); 2837 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 ); 2838 } else { 2839 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 ); 2840 } 2841 } 2842 2843 /*--------------- adjustment by 112 bytes ---------------*/ 2844 2845 MAYBE_USED 2846 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu) 2847 { 2848 UInt otag = ecu | MC_OKIND_STACK; 2849 PROF_EVENT(115, "new_mem_stack_112"); 2850 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2851 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2852 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2853 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2854 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2855 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 2856 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 2857 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 2858 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 2859 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 2860 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 2861 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 2862 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 2863 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 2864 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 2865 } else { 2866 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag ); 2867 } 2868 } 2869 2870 MAYBE_USED 2871 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP) 2872 { 2873 PROF_EVENT(115, "new_mem_stack_112"); 2874 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2875 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2876 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2877 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2878 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2879 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 2880 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 2881 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 2882 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 2883 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 2884 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 2885 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 2886 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 2887 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 2888 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 2889 } else { 2890 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 ); 2891 } 2892 } 2893 2894 MAYBE_USED 2895 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP) 2896 { 2897 PROF_EVENT(125, "die_mem_stack_112"); 2898 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2899 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 2900 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 2901 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 2902 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 2903 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 2904 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 2905 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 2906 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 2907 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 2908 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 2909 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2910 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2912 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2913 } else { 2914 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 ); 2915 } 2916 } 2917 2918 /*--------------- adjustment by 128 bytes ---------------*/ 2919 2920 MAYBE_USED 2921 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu) 2922 { 2923 UInt otag = ecu | MC_OKIND_STACK; 2924 PROF_EVENT(116, "new_mem_stack_128"); 2925 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2926 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag ); 2927 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag ); 2928 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 2929 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 2930 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 2931 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 2932 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 2933 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 2934 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 2936 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 2937 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 2938 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 2939 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 2940 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 2941 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 2942 } else { 2943 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag ); 2944 } 2945 } 2946 2947 MAYBE_USED 2948 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP) 2949 { 2950 PROF_EVENT(116, "new_mem_stack_128"); 2951 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2952 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 2954 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 2955 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 2956 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 2957 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 2958 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 2959 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 2960 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 2961 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 2962 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 2963 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 2964 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 2965 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 2966 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 2967 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 2968 } else { 2969 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 ); 2970 } 2971 } 2972 2973 MAYBE_USED 2974 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP) 2975 { 2976 PROF_EVENT(126, "die_mem_stack_128"); 2977 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 2978 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 2979 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 2980 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 2981 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 2982 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 2983 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 2984 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 2985 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 2986 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 2987 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 2988 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 2989 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 2990 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 2991 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 2992 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 2993 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 2994 } else { 2995 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 ); 2996 } 2997 } 2998 2999 /*--------------- adjustment by 144 bytes ---------------*/ 3000 3001 MAYBE_USED 3002 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu) 3003 { 3004 UInt otag = ecu | MC_OKIND_STACK; 3005 PROF_EVENT(117, "new_mem_stack_144"); 3006 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3007 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 3008 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 3009 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3010 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3011 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3012 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3013 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3014 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3015 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3016 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3017 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3018 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3019 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3020 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3021 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3022 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3023 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3024 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3025 } else { 3026 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag ); 3027 } 3028 } 3029 3030 MAYBE_USED 3031 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP) 3032 { 3033 PROF_EVENT(117, "new_mem_stack_144"); 3034 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3035 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3036 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3037 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3038 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3039 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3040 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3041 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3042 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3043 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3044 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3045 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3046 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3047 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3048 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3049 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3050 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3051 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3052 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3053 } else { 3054 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 ); 3055 } 3056 } 3057 3058 MAYBE_USED 3059 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP) 3060 { 3061 PROF_EVENT(127, "die_mem_stack_144"); 3062 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3063 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3064 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3065 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3066 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3067 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3068 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3069 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3070 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3071 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3072 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3073 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3074 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3075 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3076 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3077 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3078 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3079 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3080 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3081 } else { 3082 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 ); 3083 } 3084 } 3085 3086 /*--------------- adjustment by 160 bytes ---------------*/ 3087 3088 MAYBE_USED 3089 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu) 3090 { 3091 UInt otag = ecu | MC_OKIND_STACK; 3092 PROF_EVENT(118, "new_mem_stack_160"); 3093 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3094 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag ); 3095 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag ); 3096 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag ); 3097 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag ); 3098 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag ); 3099 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag ); 3100 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag ); 3101 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag ); 3102 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag ); 3103 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag ); 3104 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag ); 3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag ); 3106 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag ); 3107 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag ); 3108 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag ); 3109 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag ); 3110 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag ); 3111 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag ); 3112 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag ); 3113 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag ); 3114 } else { 3115 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag ); 3116 } 3117 } 3118 3119 MAYBE_USED 3120 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP) 3121 { 3122 PROF_EVENT(118, "new_mem_stack_160"); 3123 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3124 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP ); 3125 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 ); 3126 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 ); 3127 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 ); 3128 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 ); 3129 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 ); 3130 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 ); 3131 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 ); 3132 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 ); 3133 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 ); 3134 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 ); 3135 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 ); 3136 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 ); 3137 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 ); 3138 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 ); 3139 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 ); 3140 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 ); 3141 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 ); 3142 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 ); 3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 ); 3144 } else { 3145 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 ); 3146 } 3147 } 3148 3149 MAYBE_USED 3150 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP) 3151 { 3152 PROF_EVENT(128, "die_mem_stack_160"); 3153 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) { 3154 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160); 3155 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152); 3156 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144); 3157 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136); 3158 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128); 3159 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120); 3160 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112); 3161 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104); 3162 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 ); 3163 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 ); 3164 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 ); 3165 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 ); 3166 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 ); 3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 ); 3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 ); 3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 ); 3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 ); 3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 ); 3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 ); 3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 ); 3174 } else { 3175 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 ); 3176 } 3177 } 3178 3179 /*--------------- adjustment by N bytes ---------------*/ 3180 3181 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu ) 3182 { 3183 UInt otag = ecu | MC_OKIND_STACK; 3184 PROF_EVENT(115, "new_mem_stack_w_otag"); 3185 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag ); 3186 } 3187 3188 static void mc_new_mem_stack ( Addr a, SizeT len ) 3189 { 3190 PROF_EVENT(115, "new_mem_stack"); 3191 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len ); 3192 } 3193 3194 static void mc_die_mem_stack ( Addr a, SizeT len ) 3195 { 3196 PROF_EVENT(125, "die_mem_stack"); 3197 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len ); 3198 } 3199 3200 3201 /* The AMD64 ABI says: 3202 3203 "The 128-byte area beyond the location pointed to by %rsp is considered 3204 to be reserved and shall not be modified by signal or interrupt 3205 handlers. Therefore, functions may use this area for temporary data 3206 that is not needed across function calls. In particular, leaf functions 3207 may use this area for their entire stack frame, rather than adjusting 3208 the stack pointer in the prologue and epilogue. This area is known as 3209 red zone [sic]." 3210 3211 So after any call or return we need to mark this redzone as containing 3212 undefined values. 3213 3214 Consider this: we're in function f. f calls g. g moves rsp down 3215 modestly (say 16 bytes) and writes stuff all over the red zone, making it 3216 defined. g returns. f is buggy and reads from parts of the red zone 3217 that it didn't write on. But because g filled that area in, f is going 3218 to be picking up defined V bits and so any errors from reading bits of 3219 the red zone it didn't write, will be missed. The only solution I could 3220 think of was to make the red zone undefined when g returns to f. 3221 3222 This is in accordance with the ABI, which makes it clear the redzone 3223 is volatile across function calls. 3224 3225 The problem occurs the other way round too: f could fill the RZ up 3226 with defined values and g could mistakenly read them. So the RZ 3227 also needs to be nuked on function calls. 3228 */ 3229 3230 3231 /* Here's a simple cache to hold nia -> ECU mappings. It could be 3232 improved so as to have a lower miss rate. */ 3233 3234 static UWord stats__nia_cache_queries = 0; 3235 static UWord stats__nia_cache_misses = 0; 3236 3237 typedef 3238 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */ 3239 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */ 3240 WCacheEnt; 3241 3242 #define N_NIA_TO_ECU_CACHE 511 3243 3244 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE]; 3245 3246 static void init_nia_to_ecu_cache ( void ) 3247 { 3248 UWord i; 3249 Addr zero_addr = 0; 3250 ExeContext* zero_ec; 3251 UInt zero_ecu; 3252 /* Fill all the slots with an entry for address zero, and the 3253 relevant otags accordingly. Hence the cache is initially filled 3254 with valid data. */ 3255 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr); 3256 tl_assert(zero_ec); 3257 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec); 3258 tl_assert(VG_(is_plausible_ECU)(zero_ecu)); 3259 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) { 3260 nia_to_ecu_cache[i].nia0 = zero_addr; 3261 nia_to_ecu_cache[i].ecu0 = zero_ecu; 3262 nia_to_ecu_cache[i].nia1 = zero_addr; 3263 nia_to_ecu_cache[i].ecu1 = zero_ecu; 3264 } 3265 } 3266 3267 static inline UInt convert_nia_to_ecu ( Addr nia ) 3268 { 3269 UWord i; 3270 UInt ecu; 3271 ExeContext* ec; 3272 3273 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) ); 3274 3275 stats__nia_cache_queries++; 3276 i = nia % N_NIA_TO_ECU_CACHE; 3277 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE); 3278 3279 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia )) 3280 return nia_to_ecu_cache[i].ecu0; 3281 3282 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) { 3283 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; } 3284 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 ); 3285 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 ); 3286 # undef SWAP 3287 return nia_to_ecu_cache[i].ecu0; 3288 } 3289 3290 stats__nia_cache_misses++; 3291 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia); 3292 tl_assert(ec); 3293 ecu = VG_(get_ECU_from_ExeContext)(ec); 3294 tl_assert(VG_(is_plausible_ECU)(ecu)); 3295 3296 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0; 3297 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0; 3298 3299 nia_to_ecu_cache[i].nia0 = nia; 3300 nia_to_ecu_cache[i].ecu0 = (UWord)ecu; 3301 return ecu; 3302 } 3303 3304 3305 /* Note that this serves both the origin-tracking and 3306 no-origin-tracking modes. We assume that calls to it are 3307 sufficiently infrequent that it isn't worth specialising for the 3308 with/without origin-tracking cases. */ 3309 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia ) 3310 { 3311 UInt otag; 3312 tl_assert(sizeof(UWord) == sizeof(SizeT)); 3313 if (0) 3314 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n", 3315 base, len, nia ); 3316 3317 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3318 UInt ecu = convert_nia_to_ecu ( nia ); 3319 tl_assert(VG_(is_plausible_ECU)(ecu)); 3320 otag = ecu | MC_OKIND_STACK; 3321 } else { 3322 tl_assert(nia == 0); 3323 otag = 0; 3324 } 3325 3326 # if 0 3327 /* Really slow version */ 3328 MC_(make_mem_undefined)(base, len, otag); 3329 # endif 3330 3331 # if 0 3332 /* Slow(ish) version, which is fairly easily seen to be correct. 3333 */ 3334 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) { 3335 make_aligned_word64_undefined(base + 0, otag); 3336 make_aligned_word64_undefined(base + 8, otag); 3337 make_aligned_word64_undefined(base + 16, otag); 3338 make_aligned_word64_undefined(base + 24, otag); 3339 3340 make_aligned_word64_undefined(base + 32, otag); 3341 make_aligned_word64_undefined(base + 40, otag); 3342 make_aligned_word64_undefined(base + 48, otag); 3343 make_aligned_word64_undefined(base + 56, otag); 3344 3345 make_aligned_word64_undefined(base + 64, otag); 3346 make_aligned_word64_undefined(base + 72, otag); 3347 make_aligned_word64_undefined(base + 80, otag); 3348 make_aligned_word64_undefined(base + 88, otag); 3349 3350 make_aligned_word64_undefined(base + 96, otag); 3351 make_aligned_word64_undefined(base + 104, otag); 3352 make_aligned_word64_undefined(base + 112, otag); 3353 make_aligned_word64_undefined(base + 120, otag); 3354 } else { 3355 MC_(make_mem_undefined)(base, len, otag); 3356 } 3357 # endif 3358 3359 /* Idea is: go fast when 3360 * 8-aligned and length is 128 3361 * the sm is available in the main primary map 3362 * the address range falls entirely with a single secondary map 3363 If all those conditions hold, just update the V+A bits by writing 3364 directly into the vabits array. (If the sm was distinguished, this 3365 will make a copy and then write to it.) 3366 */ 3367 3368 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) { 3369 /* Now we know the address range is suitably sized and aligned. */ 3370 UWord a_lo = (UWord)(base); 3371 UWord a_hi = (UWord)(base + 128 - 1); 3372 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3373 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3374 // Now we know the entire range is within the main primary map. 3375 SecMap* sm = get_secmap_for_writing_low(a_lo); 3376 SecMap* sm_hi = get_secmap_for_writing_low(a_hi); 3377 /* Now we know that the entire address range falls within a 3378 single secondary map, and that that secondary 'lives' in 3379 the main primary map. */ 3380 if (LIKELY(sm == sm_hi)) { 3381 // Finally, we know that the range is entirely within one secmap. 3382 UWord v_off = SM_OFF(a_lo); 3383 UShort* p = (UShort*)(&sm->vabits8[v_off]); 3384 p[ 0] = VA_BITS16_UNDEFINED; 3385 p[ 1] = VA_BITS16_UNDEFINED; 3386 p[ 2] = VA_BITS16_UNDEFINED; 3387 p[ 3] = VA_BITS16_UNDEFINED; 3388 p[ 4] = VA_BITS16_UNDEFINED; 3389 p[ 5] = VA_BITS16_UNDEFINED; 3390 p[ 6] = VA_BITS16_UNDEFINED; 3391 p[ 7] = VA_BITS16_UNDEFINED; 3392 p[ 8] = VA_BITS16_UNDEFINED; 3393 p[ 9] = VA_BITS16_UNDEFINED; 3394 p[10] = VA_BITS16_UNDEFINED; 3395 p[11] = VA_BITS16_UNDEFINED; 3396 p[12] = VA_BITS16_UNDEFINED; 3397 p[13] = VA_BITS16_UNDEFINED; 3398 p[14] = VA_BITS16_UNDEFINED; 3399 p[15] = VA_BITS16_UNDEFINED; 3400 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3401 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3402 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3403 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3404 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3405 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3406 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3407 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3408 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3409 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3410 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3411 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3412 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3413 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3414 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3415 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3416 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3417 } 3418 return; 3419 } 3420 } 3421 } 3422 3423 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */ 3424 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) { 3425 /* Now we know the address range is suitably sized and aligned. */ 3426 UWord a_lo = (UWord)(base); 3427 UWord a_hi = (UWord)(base + 288 - 1); 3428 tl_assert(a_lo < a_hi); // paranoia: detect overflow 3429 if (a_hi <= MAX_PRIMARY_ADDRESS) { 3430 // Now we know the entire range is within the main primary map. 3431 SecMap* sm = get_secmap_for_writing_low(a_lo); 3432 SecMap* sm_hi = get_secmap_for_writing_low(a_hi); 3433 /* Now we know that the entire address range falls within a 3434 single secondary map, and that that secondary 'lives' in 3435 the main primary map. */ 3436 if (LIKELY(sm == sm_hi)) { 3437 // Finally, we know that the range is entirely within one secmap. 3438 UWord v_off = SM_OFF(a_lo); 3439 UShort* p = (UShort*)(&sm->vabits8[v_off]); 3440 p[ 0] = VA_BITS16_UNDEFINED; 3441 p[ 1] = VA_BITS16_UNDEFINED; 3442 p[ 2] = VA_BITS16_UNDEFINED; 3443 p[ 3] = VA_BITS16_UNDEFINED; 3444 p[ 4] = VA_BITS16_UNDEFINED; 3445 p[ 5] = VA_BITS16_UNDEFINED; 3446 p[ 6] = VA_BITS16_UNDEFINED; 3447 p[ 7] = VA_BITS16_UNDEFINED; 3448 p[ 8] = VA_BITS16_UNDEFINED; 3449 p[ 9] = VA_BITS16_UNDEFINED; 3450 p[10] = VA_BITS16_UNDEFINED; 3451 p[11] = VA_BITS16_UNDEFINED; 3452 p[12] = VA_BITS16_UNDEFINED; 3453 p[13] = VA_BITS16_UNDEFINED; 3454 p[14] = VA_BITS16_UNDEFINED; 3455 p[15] = VA_BITS16_UNDEFINED; 3456 p[16] = VA_BITS16_UNDEFINED; 3457 p[17] = VA_BITS16_UNDEFINED; 3458 p[18] = VA_BITS16_UNDEFINED; 3459 p[19] = VA_BITS16_UNDEFINED; 3460 p[20] = VA_BITS16_UNDEFINED; 3461 p[21] = VA_BITS16_UNDEFINED; 3462 p[22] = VA_BITS16_UNDEFINED; 3463 p[23] = VA_BITS16_UNDEFINED; 3464 p[24] = VA_BITS16_UNDEFINED; 3465 p[25] = VA_BITS16_UNDEFINED; 3466 p[26] = VA_BITS16_UNDEFINED; 3467 p[27] = VA_BITS16_UNDEFINED; 3468 p[28] = VA_BITS16_UNDEFINED; 3469 p[29] = VA_BITS16_UNDEFINED; 3470 p[30] = VA_BITS16_UNDEFINED; 3471 p[31] = VA_BITS16_UNDEFINED; 3472 p[32] = VA_BITS16_UNDEFINED; 3473 p[33] = VA_BITS16_UNDEFINED; 3474 p[34] = VA_BITS16_UNDEFINED; 3475 p[35] = VA_BITS16_UNDEFINED; 3476 if (UNLIKELY( MC_(clo_mc_level) == 3 )) { 3477 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag ); 3478 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag ); 3479 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag ); 3480 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag ); 3481 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag ); 3482 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag ); 3483 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag ); 3484 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag ); 3485 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag ); 3486 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag ); 3487 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag ); 3488 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag ); 3489 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag ); 3490 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag ); 3491 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag ); 3492 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag ); 3493 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag ); 3494 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag ); 3495 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag ); 3496 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag ); 3497 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag ); 3498 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag ); 3499 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag ); 3500 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag ); 3501 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag ); 3502 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag ); 3503 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag ); 3504 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag ); 3505 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag ); 3506 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag ); 3507 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag ); 3508 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag ); 3509 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag ); 3510 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag ); 3511 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag ); 3512 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag ); 3513 } 3514 return; 3515 } 3516 } 3517 }