Home | History | Annotate | Download | only in m_aspacemgr
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- Segment name management                 aspacemgr-segnames.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2015-2015  Florian Krohm
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /* Segment names are stored in a string table.
     32 
     33    The string table is organised into slots of varying length. Slots are
     34    adjacent and there are no holes between slots.
     35    A slot consists of two parts:
     36 
     37    (1) a fixed size overhead of length 4 bytes
     38    (2) a variable size payload of up to 65535 bytes
     39 
     40    The segment name is stored in the payload area. Therefore:
     41    a segment name cannot be longer than 65535 bytes including the '\0'
     42    terminator. This looks like a reasonable limitation.
     43 
     44    Overall slot layout:
     45 
     46        |          4 bytes            |    max 65535 bytes      |
     47        +-----------------------------+-------------------------+
     48        |          overhead           |         payload         |
     49        +-----------------------------+-------------------------+
     50        ^                             ^
     51        |                             |
     52       -4                             +----- seg->fnIdx
     53 
     54    Each slot is uniquely identified by an index which points to the first
     55    byte of the payload area. It is this value that is stored in seg->fnIdx.
     56    Note, that this value is at least 4.
     57 
     58    A slot either holds a string or it is free. The status of a slot is
     59    identified by the leftmost bit in the overhead field, the so called F-bit.
     60    F-bit == 1 means that slot is free; otherwise it is occupied and holds a
     61    string.
     62 
     63    Slot containing a string (segment name):
     64 
     65   bits | 1 |      15      |    16    |
     66        +---+--------------+----------+-------------------------+
     67        | 0 |   refcount   | slotsize | the string including \0 |
     68        +---+--------------+----------+-------------------------+
     69        ^                  ^          ^
     70        |                  |          |
     71       -4                 -2          +----- seg->fnIdx
     72 
     73    Segment names are reference counted. 15 bits are available which allows
     74    for up to 32767 references. If the string is referenced more than 32767
     75    times, the reference count will be frozen and the slot can never
     76    become free. I'm not unduly concerned.
     77    Two bytes are reserved to hold the size of the slot. Well, it's actually
     78    the size of the payload aread (i.e. the size of the slot minus the
     79    overhead). Ah well -- the name sticks.
     80    With two bytes to store the size, the payload area can be at most 65535
     81    bytes large.
     82 
     83    A free slot looks like this:
     84 
     85   bits | 1 |           31            |    16    |
     86        +---+-------------------------+----------+--------------+
     87        | 1 | index of next free slot | slotsize | .. unused .. |
     88        +---+-------------------------+----------+--------------+
     89        ^                             ^
     90        |                             |
     91       -4                             +----- seg->fnIdx
     92 
     93    Free slots are chained together in a singly linked list. An index of
     94    zero indicates the end of the chain. Note that zero cannot conflict
     95    with an index into the string table as the minimum index is at least
     96    four (see above).
     97 
     98    The typical way to traverse the segment names is:
     99 
    100    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
    101       if (is_freeslot(ix))
    102          do this
    103       else
    104          do that
    105    }
    106 
    107    Important detail: there is a sentinel at the end of the list, namely a
    108    slot with a zero-sized payload area.
    109 
    110    Whenever a new segment name needs to be stashed away, the list of free
    111    slots is traversed and the first slot which is large enough is being taken
    112    (first fit). There will be no splitting of slots, as that complicates
    113    matters and without slot coalescing would lead to memory fragmentation.
    114    So we leave it as is until a use case comes up that needs something better.
    115 */
    116 
    117 #include "pub_core_basics.h"     // types
    118 #include "priv_aspacemgr.h"
    119 
    120 // A few constants.
    121 enum {
    122    refcount_size = sizeof(UShort),
    123    slotsize_size = sizeof(UShort),
    124    overhead = refcount_size + slotsize_size,
    125    max_refcount  = 0x7fff,      // 2 bytes - F-bit
    126    max_slotsize  = 0xffff,      // 2 bytes
    127    max_slotindex = 0x7fffffff,  // 4 bytes - F-bit
    128    fbit_mask_value = 0x80,
    129    end_of_chain = 0
    130 };
    131 
    132 static const UInt fbit_mask = fbit_mask_value;
    133 
    134 /* The old segname implementation allowed for 1000 names on Android and
    135    6000 names on other platforms. Each name was allowed to be 1000 characters
    136    long. That was very wasteful. */
    137 #define VG_TABLE_SIZE 1000000
    138 
    139 /* String table for segment names */
    140 
    141 static HChar segnames[VG_TABLE_SIZE];  /* her majesty, the string table */
    142 static SizeT segnames_used = 0;        /* number of bytes used */
    143 static UInt  num_segnames = 0;         /* number of names in string table */
    144 static UInt  num_slots = 0;            /* number of slots in string table */
    145 static UInt  freeslot_chain = end_of_chain;
    146 
    147 static Bool
    148 is_freeslot(UInt ix)
    149 {
    150    aspacem_assert(ix >= overhead && ix <= segnames_used);
    151    return (segnames[ix - 4] & fbit_mask) != 0;
    152 }
    153 
    154 static void
    155 put_slotindex(UInt ix, UInt slotindex)
    156 {
    157    aspacem_assert(ix >= overhead && ix <= segnames_used);
    158    if (slotindex != 0)
    159       aspacem_assert(slotindex >= overhead && slotindex <= segnames_used);
    160 
    161    slotindex |= fbit_mask << 24;
    162    segnames[ix - 1] = slotindex & 0xFF;   slotindex >>= 8;
    163    segnames[ix - 2] = slotindex & 0xFF;   slotindex >>= 8;
    164    segnames[ix - 3] = slotindex & 0xFF;   slotindex >>= 8;
    165    segnames[ix - 4] = slotindex & 0xFF;
    166 }
    167 
    168 static UInt
    169 get_slotindex(UInt ix)
    170 {
    171    aspacem_assert(ix >= overhead && ix <= segnames_used);
    172    aspacem_assert(is_freeslot(ix));
    173 
    174    // Avoid unexpected sign extension
    175    const UChar *unames = (const UChar *)segnames;
    176 
    177    UInt slotindex = 0;
    178    slotindex |= unames[ix - 4];   slotindex <<= 8;
    179    slotindex |= unames[ix - 3];   slotindex <<= 8;
    180    slotindex |= unames[ix - 2];   slotindex <<= 8;
    181    slotindex |= unames[ix - 1];
    182 
    183    return slotindex & max_slotindex;   // removes the F-bit
    184 }
    185 
    186 static void
    187 put_slotsize(UInt ix, UInt size)
    188 {
    189    aspacem_assert(ix >= overhead && ix <= segnames_used);
    190    aspacem_assert(size <= max_slotsize);
    191    segnames[ix - 1] = size & 0xff;
    192    segnames[ix - 2] = size >> 8;
    193 }
    194 
    195 static UInt
    196 get_slotsize(UInt ix)
    197 {
    198    aspacem_assert(ix >= overhead && ix <= segnames_used);
    199 
    200    // Avoid unexpected sign extension
    201    const UChar *unames = (const UChar *)segnames;
    202    if (is_freeslot(ix))
    203       return (unames[ix] << 8) | unames[ix+1];
    204    else
    205       return (unames[ix - 2] << 8) | unames[ix - 1];
    206 }
    207 
    208 static void
    209 put_refcount(UInt ix, UInt rc)
    210 {
    211    aspacem_assert(ix >= overhead && ix <= segnames_used);
    212    aspacem_assert(rc <= max_refcount);
    213    // rc <= max_refcount ensures that the F-bit is zero
    214    segnames[ix - 3] = rc & 0xff;
    215    segnames[ix - 4] = rc >> 8;
    216 }
    217 
    218 static UInt
    219 get_refcount(UInt ix)
    220 {
    221    aspacem_assert(ix >= overhead && ix <= segnames_used);
    222    // must not be a free slot
    223    aspacem_assert(! is_freeslot(ix));
    224 
    225    // Avoid unexpected sign extension
    226    const UChar *unames = (const UChar *)segnames;
    227    return (unames[ix - 4] << 8) | unames[ix - 3];
    228 }
    229 
    230 static void
    231 inc_refcount(UInt ix)
    232 {
    233    aspacem_assert(ix >= overhead && ix <= segnames_used);
    234    UInt rc = get_refcount(ix);
    235    if (rc != max_refcount)
    236       put_refcount(ix, rc + 1);
    237 }
    238 
    239 static void
    240 dec_refcount(UInt ix)
    241 {
    242    aspacem_assert(ix >= overhead && ix <= segnames_used);
    243    UInt rc = get_refcount(ix);
    244    aspacem_assert(rc > 0);
    245    if (rc != max_refcount) {
    246       --rc;
    247       if (rc != 0) {
    248          put_refcount(ix, rc);
    249       } else {
    250          UInt size = get_slotsize(ix);
    251          /* Chain this slot in the freelist */
    252          put_slotindex(ix, freeslot_chain);
    253          get_slotindex(ix);
    254          put_slotsize(ix + slotsize_size, size);
    255          get_slotindex(ix);
    256          freeslot_chain = ix;
    257          --num_segnames;
    258          if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0");
    259       }
    260    }
    261 }
    262 
    263 static void
    264 put_sentinel(UInt ix)
    265 {
    266    aspacem_assert(ix >= overhead && ix <= segnames_used);
    267 
    268    put_refcount(ix, 0);
    269    put_slotsize(ix, 0);
    270 }
    271 
    272 
    273 /* Searches the string table to find an index for the given name.
    274    If none is found, an index is allocated and the name stored.
    275    If running ouf of memory, return -1. */
    276 Int
    277 ML_(am_allocate_segname)(const HChar *name)
    278 {
    279    UInt len, ix, size, next_freeslot;
    280 
    281    aspacem_assert(name);
    282 
    283    if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name);
    284 
    285    len = VG_(strlen)(name);
    286 
    287    /* First see if we already have the name. */
    288    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
    289       if (is_freeslot(ix)) continue;
    290       if (VG_(strcmp)(name, segnames + ix) == 0) {
    291          inc_refcount(ix);
    292          return ix;
    293       }
    294    }
    295 
    296    /* Is there a free slot in the string table from a previously "freed"
    297       segment name ? */
    298    Int prev;
    299    for (prev = -1, ix = freeslot_chain; ix != end_of_chain;
    300         prev = ix, ix = next_freeslot) {
    301       next_freeslot = get_slotindex(ix);  // next in chain
    302       size = get_slotsize(ix);
    303 
    304       if (size >= len + 1) {
    305          /* Note, if the size of the slot is a lot larger than the length
    306             of the string we're about to store in it, we could split the
    307             slot into two. But that complicates matters and as we're not
    308             doing any coalescing of adjacent free slots this could lead to
    309             fragmentation. */
    310          if (prev == -1)
    311             freeslot_chain = next_freeslot;
    312          else
    313             put_slotindex(prev, next_freeslot);
    314          put_refcount(ix, 1);
    315          put_slotsize(ix, size);
    316          VG_(strcpy)(segnames + ix, name);
    317          ++num_segnames;
    318          return ix;
    319       }
    320    }
    321 
    322    /* We need to add a new name. */
    323 
    324    /* Note, that we need at least two bytes in the payload. The reason is
    325       that the payload area will be used to store the size of the slot when
    326       the slot is on the freelist. */
    327    if (len == 0) len = 1;
    328 
    329    /* Is there enough room in the string table? The OVERHEAD is for the
    330       sentinel following the payload of new slot. */
    331    SizeT need = len + 1 + overhead;
    332    if (need > (sizeof segnames) - segnames_used) {
    333       return -1;
    334    }
    335 
    336    ++num_segnames;
    337    ++num_slots;
    338 
    339    /* copy it in */
    340    ix = segnames_used;
    341    put_refcount(ix, 1);
    342    put_slotsize(ix, len + 1);
    343    VG_(strcpy)(segnames + ix, name);
    344    segnames_used += need;
    345 
    346    /* Add sentinel at end of segment name list */
    347    put_sentinel(segnames_used);
    348 
    349    return ix;
    350 }
    351 
    352 /* Debugging output */
    353 void
    354 ML_(am_show_segnames)(Int logLevel, const HChar *prefix)
    355 {
    356    UInt size, ix, i;
    357 
    358    VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n",
    359                  num_segnames, num_slots);
    360 
    361    if (freeslot_chain == end_of_chain)
    362       VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n");
    363    else
    364       VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n",
    365                     freeslot_chain);
    366    for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0;
    367         ix += size + overhead, ++i) {
    368       if (is_freeslot(ix))
    369          VG_(debugLog)(logLevel, "aspacem",
    370                        "(%u,%u,0) [free slot: size=%u  next=%u]\n", i, ix,
    371                        get_slotsize(ix), get_slotindex(ix));
    372       else
    373          VG_(debugLog)(logLevel, "aspacem",
    374                        "(%u,%u,%u) %s\n", i, ix, get_refcount(ix),
    375                        segnames + ix);
    376    }
    377 }
    378 
    379 /* Returns a sequence number for the fnIdx position in segnames.
    380    Used in aspacemgr debug output to associate a segment with
    381    a segment name. */
    382 Int
    383 ML_(am_segname_get_seqnr)(Int fnIdx)
    384 {
    385    SizeT ix, size;
    386    Int seqnr = -1;
    387 
    388    if (fnIdx == -1) return -1;   // shortcut
    389 
    390    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
    391       seqnr++;
    392       if (ix == fnIdx)
    393          return seqnr;
    394    }
    395 
    396    // We should always find the given index; something's busted
    397    aspacem_assert(0);
    398    return -1;
    399 }
    400 
    401 /* Initialise the string table for segment names. It contains an empty
    402    string which is not referenced. */
    403 void
    404 ML_(am_segnames_init)(void)
    405 {
    406    aspacem_assert(sizeof segnames >= overhead);
    407 
    408    segnames_used = overhead;
    409    put_sentinel(segnames_used);
    410 }
    411 
    412 /* Increase reference count of segment name identified by IX. */
    413 void
    414 ML_(am_inc_refcount)(Int ix)
    415 {
    416    if (ix != -1)
    417       inc_refcount(ix);
    418 }
    419 
    420 /* Decrease reference count of segment name identified by IX. */
    421 void
    422 ML_(am_dec_refcount)(Int ix)
    423 {
    424    if (ix != -1)
    425       dec_refcount(ix);
    426 }
    427 
    428 Bool
    429 ML_(am_sane_segname)(Int ix)
    430 {
    431    return ix == -1 || (ix >= overhead && ix < segnames_used);
    432 }
    433 
    434 const HChar *
    435 ML_(am_get_segname)(Int ix)
    436 {
    437    return (ix == -1) ? NULL : segnames + ix;
    438 }
    439 
    440 /*--------------------------------------------------------------------*/
    441 /*--- end                                                          ---*/
    442 /*--------------------------------------------------------------------*/
    443