1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- Segment name management aspacemgr-segnames.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2015-2015 Florian Krohm 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 /* Segment names are stored in a string table. 32 33 The string table is organised into slots of varying length. Slots are 34 adjacent and there are no holes between slots. 35 A slot consists of two parts: 36 37 (1) a fixed size overhead of length 4 bytes 38 (2) a variable size payload of up to 65535 bytes 39 40 The segment name is stored in the payload area. Therefore: 41 a segment name cannot be longer than 65535 bytes including the '\0' 42 terminator. This looks like a reasonable limitation. 43 44 Overall slot layout: 45 46 | 4 bytes | max 65535 bytes | 47 +-----------------------------+-------------------------+ 48 | overhead | payload | 49 +-----------------------------+-------------------------+ 50 ^ ^ 51 | | 52 -4 +----- seg->fnIdx 53 54 Each slot is uniquely identified by an index which points to the first 55 byte of the payload area. It is this value that is stored in seg->fnIdx. 56 Note, that this value is at least 4. 57 58 A slot either holds a string or it is free. The status of a slot is 59 identified by the leftmost bit in the overhead field, the so called F-bit. 60 F-bit == 1 means that slot is free; otherwise it is occupied and holds a 61 string. 62 63 Slot containing a string (segment name): 64 65 bits | 1 | 15 | 16 | 66 +---+--------------+----------+-------------------------+ 67 | 0 | refcount | slotsize | the string including \0 | 68 +---+--------------+----------+-------------------------+ 69 ^ ^ ^ 70 | | | 71 -4 -2 +----- seg->fnIdx 72 73 Segment names are reference counted. 15 bits are available which allows 74 for up to 32767 references. If the string is referenced more than 32767 75 times, the reference count will be frozen and the slot can never 76 become free. I'm not unduly concerned. 77 Two bytes are reserved to hold the size of the slot. Well, it's actually 78 the size of the payload aread (i.e. the size of the slot minus the 79 overhead). Ah well -- the name sticks. 80 With two bytes to store the size, the payload area can be at most 65535 81 bytes large. 82 83 A free slot looks like this: 84 85 bits | 1 | 31 | 16 | 86 +---+-------------------------+----------+--------------+ 87 | 1 | index of next free slot | slotsize | .. unused .. | 88 +---+-------------------------+----------+--------------+ 89 ^ ^ 90 | | 91 -4 +----- seg->fnIdx 92 93 Free slots are chained together in a singly linked list. An index of 94 zero indicates the end of the chain. Note that zero cannot conflict 95 with an index into the string table as the minimum index is at least 96 four (see above). 97 98 The typical way to traverse the segment names is: 99 100 for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) { 101 if (is_freeslot(ix)) 102 do this 103 else 104 do that 105 } 106 107 Important detail: there is a sentinel at the end of the list, namely a 108 slot with a zero-sized payload area. 109 110 Whenever a new segment name needs to be stashed away, the list of free 111 slots is traversed and the first slot which is large enough is being taken 112 (first fit). There will be no splitting of slots, as that complicates 113 matters and without slot coalescing would lead to memory fragmentation. 114 So we leave it as is until a use case comes up that needs something better. 115 */ 116 117 #include "pub_core_basics.h" // types 118 #include "priv_aspacemgr.h" 119 120 // A few constants. 121 enum { 122 refcount_size = sizeof(UShort), 123 slotsize_size = sizeof(UShort), 124 overhead = refcount_size + slotsize_size, 125 max_refcount = 0x7fff, // 2 bytes - F-bit 126 max_slotsize = 0xffff, // 2 bytes 127 max_slotindex = 0x7fffffff, // 4 bytes - F-bit 128 fbit_mask_value = 0x80, 129 end_of_chain = 0 130 }; 131 132 static const UInt fbit_mask = fbit_mask_value; 133 134 /* The old segname implementation allowed for 1000 names on Android and 135 6000 names on other platforms. Each name was allowed to be 1000 characters 136 long. That was very wasteful. */ 137 #define VG_TABLE_SIZE 1000000 138 139 /* String table for segment names */ 140 141 static HChar segnames[VG_TABLE_SIZE]; /* her majesty, the string table */ 142 static SizeT segnames_used = 0; /* number of bytes used */ 143 static UInt num_segnames = 0; /* number of names in string table */ 144 static UInt num_slots = 0; /* number of slots in string table */ 145 static UInt freeslot_chain = end_of_chain; 146 147 static Bool 148 is_freeslot(UInt ix) 149 { 150 aspacem_assert(ix >= overhead && ix <= segnames_used); 151 return (segnames[ix - 4] & fbit_mask) != 0; 152 } 153 154 static void 155 put_slotindex(UInt ix, UInt slotindex) 156 { 157 aspacem_assert(ix >= overhead && ix <= segnames_used); 158 if (slotindex != 0) 159 aspacem_assert(slotindex >= overhead && slotindex <= segnames_used); 160 161 slotindex |= fbit_mask << 24; 162 segnames[ix - 1] = slotindex & 0xFF; slotindex >>= 8; 163 segnames[ix - 2] = slotindex & 0xFF; slotindex >>= 8; 164 segnames[ix - 3] = slotindex & 0xFF; slotindex >>= 8; 165 segnames[ix - 4] = slotindex & 0xFF; 166 } 167 168 static UInt 169 get_slotindex(UInt ix) 170 { 171 aspacem_assert(ix >= overhead && ix <= segnames_used); 172 aspacem_assert(is_freeslot(ix)); 173 174 // Avoid unexpected sign extension 175 const UChar *unames = (const UChar *)segnames; 176 177 UInt slotindex = 0; 178 slotindex |= unames[ix - 4]; slotindex <<= 8; 179 slotindex |= unames[ix - 3]; slotindex <<= 8; 180 slotindex |= unames[ix - 2]; slotindex <<= 8; 181 slotindex |= unames[ix - 1]; 182 183 return slotindex & max_slotindex; // removes the F-bit 184 } 185 186 static void 187 put_slotsize(UInt ix, UInt size) 188 { 189 aspacem_assert(ix >= overhead && ix <= segnames_used); 190 aspacem_assert(size <= max_slotsize); 191 segnames[ix - 1] = size & 0xff; 192 segnames[ix - 2] = size >> 8; 193 } 194 195 static UInt 196 get_slotsize(UInt ix) 197 { 198 aspacem_assert(ix >= overhead && ix <= segnames_used); 199 200 // Avoid unexpected sign extension 201 const UChar *unames = (const UChar *)segnames; 202 if (is_freeslot(ix)) 203 return (unames[ix] << 8) | unames[ix+1]; 204 else 205 return (unames[ix - 2] << 8) | unames[ix - 1]; 206 } 207 208 static void 209 put_refcount(UInt ix, UInt rc) 210 { 211 aspacem_assert(ix >= overhead && ix <= segnames_used); 212 aspacem_assert(rc <= max_refcount); 213 // rc <= max_refcount ensures that the F-bit is zero 214 segnames[ix - 3] = rc & 0xff; 215 segnames[ix - 4] = rc >> 8; 216 } 217 218 static UInt 219 get_refcount(UInt ix) 220 { 221 aspacem_assert(ix >= overhead && ix <= segnames_used); 222 // must not be a free slot 223 aspacem_assert(! is_freeslot(ix)); 224 225 // Avoid unexpected sign extension 226 const UChar *unames = (const UChar *)segnames; 227 return (unames[ix - 4] << 8) | unames[ix - 3]; 228 } 229 230 static void 231 inc_refcount(UInt ix) 232 { 233 aspacem_assert(ix >= overhead && ix <= segnames_used); 234 UInt rc = get_refcount(ix); 235 if (rc != max_refcount) 236 put_refcount(ix, rc + 1); 237 } 238 239 static void 240 dec_refcount(UInt ix) 241 { 242 aspacem_assert(ix >= overhead && ix <= segnames_used); 243 UInt rc = get_refcount(ix); 244 aspacem_assert(rc > 0); 245 if (rc != max_refcount) { 246 --rc; 247 if (rc != 0) { 248 put_refcount(ix, rc); 249 } else { 250 UInt size = get_slotsize(ix); 251 /* Chain this slot in the freelist */ 252 put_slotindex(ix, freeslot_chain); 253 get_slotindex(ix); 254 put_slotsize(ix + slotsize_size, size); 255 get_slotindex(ix); 256 freeslot_chain = ix; 257 --num_segnames; 258 if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0"); 259 } 260 } 261 } 262 263 static void 264 put_sentinel(UInt ix) 265 { 266 aspacem_assert(ix >= overhead && ix <= segnames_used); 267 268 put_refcount(ix, 0); 269 put_slotsize(ix, 0); 270 } 271 272 273 /* Searches the string table to find an index for the given name. 274 If none is found, an index is allocated and the name stored. 275 If running ouf of memory, return -1. */ 276 Int 277 ML_(am_allocate_segname)(const HChar *name) 278 { 279 UInt len, ix, size, next_freeslot; 280 281 aspacem_assert(name); 282 283 if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name); 284 285 len = VG_(strlen)(name); 286 287 /* First see if we already have the name. */ 288 for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) { 289 if (is_freeslot(ix)) continue; 290 if (VG_(strcmp)(name, segnames + ix) == 0) { 291 inc_refcount(ix); 292 return ix; 293 } 294 } 295 296 /* Is there a free slot in the string table from a previously "freed" 297 segment name ? */ 298 Int prev; 299 for (prev = -1, ix = freeslot_chain; ix != end_of_chain; 300 prev = ix, ix = next_freeslot) { 301 next_freeslot = get_slotindex(ix); // next in chain 302 size = get_slotsize(ix); 303 304 if (size >= len + 1) { 305 /* Note, if the size of the slot is a lot larger than the length 306 of the string we're about to store in it, we could split the 307 slot into two. But that complicates matters and as we're not 308 doing any coalescing of adjacent free slots this could lead to 309 fragmentation. */ 310 if (prev == -1) 311 freeslot_chain = next_freeslot; 312 else 313 put_slotindex(prev, next_freeslot); 314 put_refcount(ix, 1); 315 put_slotsize(ix, size); 316 VG_(strcpy)(segnames + ix, name); 317 ++num_segnames; 318 return ix; 319 } 320 } 321 322 /* We need to add a new name. */ 323 324 /* Note, that we need at least two bytes in the payload. The reason is 325 that the payload area will be used to store the size of the slot when 326 the slot is on the freelist. */ 327 if (len == 0) len = 1; 328 329 /* Is there enough room in the string table? The OVERHEAD is for the 330 sentinel following the payload of new slot. */ 331 SizeT need = len + 1 + overhead; 332 if (need > (sizeof segnames) - segnames_used) { 333 return -1; 334 } 335 336 ++num_segnames; 337 ++num_slots; 338 339 /* copy it in */ 340 ix = segnames_used; 341 put_refcount(ix, 1); 342 put_slotsize(ix, len + 1); 343 VG_(strcpy)(segnames + ix, name); 344 segnames_used += need; 345 346 /* Add sentinel at end of segment name list */ 347 put_sentinel(segnames_used); 348 349 return ix; 350 } 351 352 /* Debugging output */ 353 void 354 ML_(am_show_segnames)(Int logLevel, const HChar *prefix) 355 { 356 UInt size, ix, i; 357 358 VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n", 359 num_segnames, num_slots); 360 361 if (freeslot_chain == end_of_chain) 362 VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n"); 363 else 364 VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n", 365 freeslot_chain); 366 for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0; 367 ix += size + overhead, ++i) { 368 if (is_freeslot(ix)) 369 VG_(debugLog)(logLevel, "aspacem", 370 "(%u,%u,0) [free slot: size=%u next=%u]\n", i, ix, 371 get_slotsize(ix), get_slotindex(ix)); 372 else 373 VG_(debugLog)(logLevel, "aspacem", 374 "(%u,%u,%u) %s\n", i, ix, get_refcount(ix), 375 segnames + ix); 376 } 377 } 378 379 /* Returns a sequence number for the fnIdx position in segnames. 380 Used in aspacemgr debug output to associate a segment with 381 a segment name. */ 382 Int 383 ML_(am_segname_get_seqnr)(Int fnIdx) 384 { 385 SizeT ix, size; 386 Int seqnr = -1; 387 388 if (fnIdx == -1) return -1; // shortcut 389 390 for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) { 391 seqnr++; 392 if (ix == fnIdx) 393 return seqnr; 394 } 395 396 // We should always find the given index; something's busted 397 aspacem_assert(0); 398 return -1; 399 } 400 401 /* Initialise the string table for segment names. It contains an empty 402 string which is not referenced. */ 403 void 404 ML_(am_segnames_init)(void) 405 { 406 aspacem_assert(sizeof segnames >= overhead); 407 408 segnames_used = overhead; 409 put_sentinel(segnames_used); 410 } 411 412 /* Increase reference count of segment name identified by IX. */ 413 void 414 ML_(am_inc_refcount)(Int ix) 415 { 416 if (ix != -1) 417 inc_refcount(ix); 418 } 419 420 /* Decrease reference count of segment name identified by IX. */ 421 void 422 ML_(am_dec_refcount)(Int ix) 423 { 424 if (ix != -1) 425 dec_refcount(ix); 426 } 427 428 Bool 429 ML_(am_sane_segname)(Int ix) 430 { 431 return ix == -1 || (ix >= overhead && ix < segnames_used); 432 } 433 434 const HChar * 435 ML_(am_get_segname)(Int ix) 436 { 437 return (ix == -1) ? NULL : segnames + ix; 438 } 439 440 /*--------------------------------------------------------------------*/ 441 /*--- end ---*/ 442 /*--------------------------------------------------------------------*/ 443