1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- Segment name management aspacemgr-segnames.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2015-2015 Florian Krohm 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 /* Segment names are stored in a string table. 32 33 The string table is organised into slots of varying length. Slots are 34 adjacent and there are no holes between slots. 35 A slot consists of two parts: 36 37 (1) a fixed size overhead of length 4 bytes 38 (2) a variable size payload of up to 65535 bytes 39 40 The segment name is stored in the payload area. Therefore: 41 a segment name cannot be longer than 65535 bytes including the '\0' 42 terminator. This looks like a reasonable limitation. 43 44 Overall slot layout: 45 46 | 4 bytes | max 65535 bytes | 47 +-----------------------------+-------------------------+ 48 | overhead | payload | 49 +-----------------------------+-------------------------+ 50 ^ ^ 51 | | 52 -4 +----- seg->fnIdx 53 54 Each slot is uniquely identified by an index which points to the first 55 byte of the payload area. It is this value that is stored in seg->fnIdx. 56 Note, that this value is at least 4. 57 58 A slot either holds a string or it is free. The status of a slot is 59 identified by the leftmost bit in the overhead field, the so called F-bit. 60 F-bit == 1 means that slot is free; otherwise it is occupied and holds a 61 string. 62 63 Slot containing a string (segment name): 64 65 bits | 1 | 15 | 16 | 66 +---+--------------+----------+-------------------------+ 67 | 0 | refcount | slotsize | the string including \0 | 68 +---+--------------+----------+-------------------------+ 69 ^ ^ ^ 70 | | | 71 -4 -2 +----- seg->fnIdx 72 73 Segment names are reference counted. 15 bits are available which allows 74 for up to 32767 references. If the string is referenced more than 32767 75 times, the reference count will be frozen and the slot can never 76 become free. I'm not unduly concerned. 77 Two bytes are reserved to hold the size of the slot. Well, it's actually 78 the size of the payload aread (i.e. the size of the slot minus the 79 overhead). Ah well -- the name sticks. 80 With two bytes to store the size, the payload area can be at most 65535 81 bytes large. 82 83 A free slot looks like this: 84 85 bits | 1 | 31 | 16 | 86 +---+-------------------------+----------+--------------+ 87 | 1 | index of next free slot | slotsize | .. unused .. | 88 +---+-------------------------+----------+--------------+ 89 ^ ^ 90 | | 91 -4 +----- seg->fnIdx 92 93 Free slots are chained together in a singly linked list. An index of 94 zero indicates the end of the chain. Note that zero cannot conflict 95 with an index into the string table as the minumum index is at least 96 four (see above). 97 98 The typical way to traverse the segment names is: 99 100 for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) { 101 if (is_freeslot(ix)) 102 do this 103 else 104 do that 105 } 106 107 Important detail: there is a sentinel at the end of the list, namely a 108 slot with a zero-sized payload area. 109 110 Whenever a new segment name needs to be stashed away, the list of free 111 slots is traversed and the first slot which is large enough is being taken 112 (first fit). There will be no splitting of slots, as that complicates 113 matters and without slot coalescing would lead to memory fragmentation. 114 So we leave it as is until a use case comes up that needs something better. 115 */ 116 117 #include "pub_core_basics.h" // types 118 #include "priv_aspacemgr.h" 119 120 // A few constants. 121 enum { 122 refcount_size = sizeof(UShort), 123 slotsize_size = sizeof(UShort), 124 overhead = refcount_size + slotsize_size, 125 max_refcount = 0x7fff, // 2 bytes - F-bit 126 max_slotsize = 0xffff, // 2 bytes 127 max_slotindex = 0x7fffffff, // 4 bytes - F-bit 128 fbit_mask = 0x80, 129 end_of_chain = 0 130 }; 131 132 /* The old segname implementation allowed for 1000 names on Android and 133 6000 names on other platforms. Each name was allowed to be 1000 characters 134 long. That was very wasteful. */ 135 #define VG_TABLE_SIZE 1000000 136 137 /* String table for segment names */ 138 139 static HChar segnames[VG_TABLE_SIZE]; /* her majesty, the string table */ 140 static SizeT segnames_used = 0; /* number of bytes used */ 141 static UInt num_segnames = 0; /* number of names in string table */ 142 static UInt num_slots = 0; /* number of slots in string table */ 143 static UInt freeslot_chain = end_of_chain; 144 145 static Bool 146 is_freeslot(UInt ix) 147 { 148 aspacem_assert(ix >= overhead && ix <= segnames_used); 149 return (segnames[ix - 4] & fbit_mask) != 0; 150 } 151 152 static void 153 put_slotindex(UInt ix, UInt slotindex) 154 { 155 aspacem_assert(ix >= overhead && ix <= segnames_used); 156 if (slotindex != 0) 157 aspacem_assert(slotindex >= overhead && slotindex <= segnames_used); 158 159 slotindex |= fbit_mask << 24; 160 segnames[ix - 1] = slotindex & 0xFF; slotindex >>= 8; 161 segnames[ix - 2] = slotindex & 0xFF; slotindex >>= 8; 162 segnames[ix - 3] = slotindex & 0xFF; slotindex >>= 8; 163 segnames[ix - 4] = slotindex & 0xFF; 164 } 165 166 static UInt 167 get_slotindex(UInt ix) 168 { 169 aspacem_assert(ix >= overhead && ix <= segnames_used); 170 aspacem_assert(is_freeslot(ix)); 171 172 // Avoid unexpected sign extension 173 const UChar *unames = (const UChar *)segnames; 174 175 UInt slotindex = 0; 176 slotindex |= unames[ix - 4]; slotindex <<= 8; 177 slotindex |= unames[ix - 3]; slotindex <<= 8; 178 slotindex |= unames[ix - 2]; slotindex <<= 8; 179 slotindex |= unames[ix - 1]; 180 181 return slotindex & max_slotindex; // removes the F-bit 182 } 183 184 static void 185 put_slotsize(UInt ix, UInt size) 186 { 187 aspacem_assert(ix >= overhead && ix <= segnames_used); 188 aspacem_assert(size <= max_slotsize); 189 segnames[ix - 1] = size & 0xff; 190 segnames[ix - 2] = size >> 8; 191 } 192 193 static UInt 194 get_slotsize(UInt ix) 195 { 196 aspacem_assert(ix >= overhead && ix <= segnames_used); 197 198 // Avoid unexpected sign extension 199 const UChar *unames = (const UChar *)segnames; 200 if (is_freeslot(ix)) 201 return (unames[ix] << 8) | unames[ix+1]; 202 else 203 return (unames[ix - 2] << 8) | unames[ix - 1]; 204 } 205 206 static void 207 put_refcount(UInt ix, UInt rc) 208 { 209 aspacem_assert(ix >= overhead && ix <= segnames_used); 210 aspacem_assert(rc <= max_refcount); 211 // rc <= max_refcount ensures that the F-bit is zero 212 segnames[ix - 3] = rc & 0xff; 213 segnames[ix - 4] = rc >> 8; 214 } 215 216 static UInt 217 get_refcount(UInt ix) 218 { 219 aspacem_assert(ix >= overhead && ix <= segnames_used); 220 // must not be a free slot 221 aspacem_assert(! is_freeslot(ix)); 222 223 // Avoid unexpected sign extension 224 const UChar *unames = (const UChar *)segnames; 225 return (unames[ix - 4] << 8) | unames[ix - 3]; 226 } 227 228 static void 229 inc_refcount(UInt ix) 230 { 231 aspacem_assert(ix >= overhead && ix <= segnames_used); 232 UInt rc = get_refcount(ix); 233 if (rc != max_refcount) 234 put_refcount(ix, rc + 1); 235 } 236 237 static void 238 dec_refcount(UInt ix) 239 { 240 aspacem_assert(ix >= overhead && ix <= segnames_used); 241 UInt rc = get_refcount(ix); 242 aspacem_assert(rc > 0); 243 if (rc != max_refcount) { 244 --rc; 245 if (rc != 0) { 246 put_refcount(ix, rc); 247 } else { 248 UInt size = get_slotsize(ix); 249 /* Chain this slot in the freelist */ 250 put_slotindex(ix, freeslot_chain); 251 get_slotindex(ix); 252 put_slotsize(ix + slotsize_size, size); 253 get_slotindex(ix); 254 freeslot_chain = ix; 255 --num_segnames; 256 if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0"); 257 } 258 } 259 } 260 261 static void 262 put_sentinel(UInt ix) 263 { 264 aspacem_assert(ix >= overhead && ix <= segnames_used); 265 266 put_refcount(ix, 0); 267 put_slotsize(ix, 0); 268 } 269 270 271 /* Searches the string table to find an index for the given name. 272 If none is found, an index is allocated and the name stored. 273 If running ouf of memory, return -1. */ 274 Int 275 ML_(am_allocate_segname)(const HChar *name) 276 { 277 UInt len, ix, size, next_freeslot; 278 279 aspacem_assert(name); 280 281 if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name); 282 283 len = VG_(strlen)(name); 284 285 /* First see if we already have the name. */ 286 for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) { 287 if (is_freeslot(ix)) continue; 288 if (VG_(strcmp)(name, segnames + ix) == 0) { 289 inc_refcount(ix); 290 return ix; 291 } 292 } 293 294 /* Is there a free slot in the string table from a previously "freed" 295 segment name ? */ 296 Int prev; 297 for (prev = -1, ix = freeslot_chain; ix != end_of_chain; 298 prev = ix, ix = next_freeslot) { 299 next_freeslot = get_slotindex(ix); // next in chain 300 size = get_slotsize(ix); 301 302 if (size >= len + 1) { 303 /* Note, if the size of the slot is a lot larger than the length 304 of the string we're about to store in it, we could split the 305 slot into two. But that complicates matters and as we're not 306 doing any coalescing of adjacent free slots this could lead to 307 fragmentation. */ 308 if (prev == -1) 309 freeslot_chain = next_freeslot; 310 else 311 put_slotindex(prev, next_freeslot); 312 put_refcount(ix, 1); 313 put_slotsize(ix, size); 314 VG_(strcpy)(segnames + ix, name); 315 ++num_segnames; 316 return ix; 317 } 318 } 319 320 /* We need to add a new name. */ 321 322 /* Note, that we need at least two bytes in the payload. The reason is 323 that the payload area will be used to store the size of the slot when 324 the slot is on the freelist. */ 325 if (len == 0) len = 1; 326 327 /* Is there enough room in the string table? The OVERHEAD is for the 328 sentinel following the payload of new slot. */ 329 SizeT need = len + 1 + overhead; 330 if (need > (sizeof segnames) - segnames_used) { 331 return -1; 332 } 333 334 ++num_segnames; 335 ++num_slots; 336 337 /* copy it in */ 338 ix = segnames_used; 339 put_refcount(ix, 1); 340 put_slotsize(ix, len + 1); 341 VG_(strcpy)(segnames + ix, name); 342 segnames_used += need; 343 344 /* Add sentinel at end of segment name list */ 345 put_sentinel(segnames_used); 346 347 return ix; 348 } 349 350 /* Debugging output */ 351 void 352 ML_(am_show_segnames)(Int logLevel, const HChar *prefix) 353 { 354 UInt size, ix, i; 355 356 VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n", 357 num_segnames, num_slots); 358 359 if (freeslot_chain == end_of_chain) 360 VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n"); 361 else 362 VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n", 363 freeslot_chain); 364 for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0; 365 ix += size + overhead, ++i) { 366 if (is_freeslot(ix)) 367 VG_(debugLog)(logLevel, "aspacem", 368 "(%u,%u,0) [free slot: size=%u next=%u]\n", i, ix, 369 get_slotsize(ix), get_slotindex(ix)); 370 else 371 VG_(debugLog)(logLevel, "aspacem", 372 "(%u,%u,%u) %s\n", i, ix, get_refcount(ix), 373 segnames + ix); 374 } 375 } 376 377 /* Returns a sequence number for the fnIdx position in segnames. 378 Used in aspacemgr debug output to associate a segment with 379 a segment name. */ 380 Int 381 ML_(am_segname_get_seqnr)(Int fnIdx) 382 { 383 SizeT ix, size; 384 Int seqnr = -1; 385 386 if (fnIdx == -1) return -1; // shortcut 387 388 for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) { 389 seqnr++; 390 if (ix == fnIdx) 391 return seqnr; 392 } 393 394 // We should always find the given index; something's busted 395 aspacem_assert(0); 396 return -1; 397 } 398 399 /* Initialise the string table for segment names. It contains an empty 400 string which is not referenced. */ 401 void 402 ML_(am_segnames_init)(void) 403 { 404 aspacem_assert(sizeof segnames >= overhead); 405 406 segnames_used = overhead; 407 put_sentinel(segnames_used); 408 } 409 410 /* Increase reference count of segment name identified by IX. */ 411 void 412 ML_(am_inc_refcount)(Int ix) 413 { 414 if (ix != -1) 415 inc_refcount(ix); 416 } 417 418 /* Decrease reference count of segment name identified by IX. */ 419 void 420 ML_(am_dec_refcount)(Int ix) 421 { 422 if (ix != -1) 423 dec_refcount(ix); 424 } 425 426 Bool 427 ML_(am_sane_segname)(Int ix) 428 { 429 return ix == -1 || (ix >= overhead && ix < segnames_used); 430 } 431 432 const HChar * 433 ML_(am_get_segname)(Int ix) 434 { 435 return (ix == -1) ? NULL : segnames + ix; 436 } 437 438 /*--------------------------------------------------------------------*/ 439 /*--- end ---*/ 440 /*--------------------------------------------------------------------*/ 441