Home | History | Annotate | Download | only in m_aspacemgr
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- Segment name management                 aspacemgr-segnames.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2015-2015  Florian Krohm
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /* Segment names are stored in a string table.
     32 
     33    The string table is organised into slots of varying length. Slots are
     34    adjacent and there are no holes between slots.
     35    A slot consists of two parts:
     36 
     37    (1) a fixed size overhead of length 4 bytes
     38    (2) a variable size payload of up to 65535 bytes
     39 
     40    The segment name is stored in the payload area. Therefore:
     41    a segment name cannot be longer than 65535 bytes including the '\0'
     42    terminator. This looks like a reasonable limitation.
     43 
     44    Overall slot layout:
     45 
     46        |          4 bytes            |    max 65535 bytes      |
     47        +-----------------------------+-------------------------+
     48        |          overhead           |         payload         |
     49        +-----------------------------+-------------------------+
     50        ^                             ^
     51        |                             |
     52       -4                             +----- seg->fnIdx
     53 
     54    Each slot is uniquely identified by an index which points to the first
     55    byte of the payload area. It is this value that is stored in seg->fnIdx.
     56    Note, that this value is at least 4.
     57 
     58    A slot either holds a string or it is free. The status of a slot is
     59    identified by the leftmost bit in the overhead field, the so called F-bit.
     60    F-bit == 1 means that slot is free; otherwise it is occupied and holds a
     61    string.
     62 
     63    Slot containing a string (segment name):
     64 
     65   bits | 1 |      15      |    16    |
     66        +---+--------------+----------+-------------------------+
     67        | 0 |   refcount   | slotsize | the string including \0 |
     68        +---+--------------+----------+-------------------------+
     69        ^                  ^          ^
     70        |                  |          |
     71       -4                 -2          +----- seg->fnIdx
     72 
     73    Segment names are reference counted. 15 bits are available which allows
     74    for up to 32767 references. If the string is referenced more than 32767
     75    times, the reference count will be frozen and the slot can never
     76    become free. I'm not unduly concerned.
     77    Two bytes are reserved to hold the size of the slot. Well, it's actually
     78    the size of the payload aread (i.e. the size of the slot minus the
     79    overhead). Ah well -- the name sticks.
     80    With two bytes to store the size, the payload area can be at most 65535
     81    bytes large.
     82 
     83    A free slot looks like this:
     84 
     85   bits | 1 |           31            |    16    |
     86        +---+-------------------------+----------+--------------+
     87        | 1 | index of next free slot | slotsize | .. unused .. |
     88        +---+-------------------------+----------+--------------+
     89        ^                             ^
     90        |                             |
     91       -4                             +----- seg->fnIdx
     92 
     93    Free slots are chained together in a singly linked list. An index of
     94    zero indicates the end of the chain. Note that zero cannot conflict
     95    with an index into the string table as the minumum index is at least
     96    four (see above).
     97 
     98    The typical way to traverse the segment names is:
     99 
    100    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
    101       if (is_freeslot(ix))
    102          do this
    103       else
    104          do that
    105    }
    106 
    107    Important detail: there is a sentinel at the end of the list, namely a
    108    slot with a zero-sized payload area.
    109 
    110    Whenever a new segment name needs to be stashed away, the list of free
    111    slots is traversed and the first slot which is large enough is being taken
    112    (first fit). There will be no splitting of slots, as that complicates
    113    matters and without slot coalescing would lead to memory fragmentation.
    114    So we leave it as is until a use case comes up that needs something better.
    115 */
    116 
    117 #include "pub_core_basics.h"     // types
    118 #include "priv_aspacemgr.h"
    119 
    120 // A few constants.
    121 enum {
    122    refcount_size = sizeof(UShort),
    123    slotsize_size = sizeof(UShort),
    124    overhead = refcount_size + slotsize_size,
    125    max_refcount  = 0x7fff,      // 2 bytes - F-bit
    126    max_slotsize  = 0xffff,      // 2 bytes
    127    max_slotindex = 0x7fffffff,  // 4 bytes - F-bit
    128    fbit_mask = 0x80,
    129    end_of_chain = 0
    130 };
    131 
    132 /* The old segname implementation allowed for 1000 names on Android and
    133    6000 names on other platforms. Each name was allowed to be 1000 characters
    134    long. That was very wasteful. */
    135 #define VG_TABLE_SIZE 1000000
    136 
    137 /* String table for segment names */
    138 
    139 static HChar segnames[VG_TABLE_SIZE];  /* her majesty, the string table */
    140 static SizeT segnames_used = 0;        /* number of bytes used */
    141 static UInt  num_segnames = 0;         /* number of names in string table */
    142 static UInt  num_slots = 0;            /* number of slots in string table */
    143 static UInt  freeslot_chain = end_of_chain;
    144 
    145 static Bool
    146 is_freeslot(UInt ix)
    147 {
    148    aspacem_assert(ix >= overhead && ix <= segnames_used);
    149    return (segnames[ix - 4] & fbit_mask) != 0;
    150 }
    151 
    152 static void
    153 put_slotindex(UInt ix, UInt slotindex)
    154 {
    155    aspacem_assert(ix >= overhead && ix <= segnames_used);
    156    if (slotindex != 0)
    157       aspacem_assert(slotindex >= overhead && slotindex <= segnames_used);
    158 
    159    slotindex |= fbit_mask << 24;
    160    segnames[ix - 1] = slotindex & 0xFF;   slotindex >>= 8;
    161    segnames[ix - 2] = slotindex & 0xFF;   slotindex >>= 8;
    162    segnames[ix - 3] = slotindex & 0xFF;   slotindex >>= 8;
    163    segnames[ix - 4] = slotindex & 0xFF;
    164 }
    165 
    166 static UInt
    167 get_slotindex(UInt ix)
    168 {
    169    aspacem_assert(ix >= overhead && ix <= segnames_used);
    170    aspacem_assert(is_freeslot(ix));
    171 
    172    // Avoid unexpected sign extension
    173    const UChar *unames = (const UChar *)segnames;
    174 
    175    UInt slotindex = 0;
    176    slotindex |= unames[ix - 4];   slotindex <<= 8;
    177    slotindex |= unames[ix - 3];   slotindex <<= 8;
    178    slotindex |= unames[ix - 2];   slotindex <<= 8;
    179    slotindex |= unames[ix - 1];
    180 
    181    return slotindex & max_slotindex;   // removes the F-bit
    182 }
    183 
    184 static void
    185 put_slotsize(UInt ix, UInt size)
    186 {
    187    aspacem_assert(ix >= overhead && ix <= segnames_used);
    188    aspacem_assert(size <= max_slotsize);
    189    segnames[ix - 1] = size & 0xff;
    190    segnames[ix - 2] = size >> 8;
    191 }
    192 
    193 static UInt
    194 get_slotsize(UInt ix)
    195 {
    196    aspacem_assert(ix >= overhead && ix <= segnames_used);
    197 
    198    // Avoid unexpected sign extension
    199    const UChar *unames = (const UChar *)segnames;
    200    if (is_freeslot(ix))
    201       return (unames[ix] << 8) | unames[ix+1];
    202    else
    203       return (unames[ix - 2] << 8) | unames[ix - 1];
    204 }
    205 
    206 static void
    207 put_refcount(UInt ix, UInt rc)
    208 {
    209    aspacem_assert(ix >= overhead && ix <= segnames_used);
    210    aspacem_assert(rc <= max_refcount);
    211    // rc <= max_refcount ensures that the F-bit is zero
    212    segnames[ix - 3] = rc & 0xff;
    213    segnames[ix - 4] = rc >> 8;
    214 }
    215 
    216 static UInt
    217 get_refcount(UInt ix)
    218 {
    219    aspacem_assert(ix >= overhead && ix <= segnames_used);
    220    // must not be a free slot
    221    aspacem_assert(! is_freeslot(ix));
    222 
    223    // Avoid unexpected sign extension
    224    const UChar *unames = (const UChar *)segnames;
    225    return (unames[ix - 4] << 8) | unames[ix - 3];
    226 }
    227 
    228 static void
    229 inc_refcount(UInt ix)
    230 {
    231    aspacem_assert(ix >= overhead && ix <= segnames_used);
    232    UInt rc = get_refcount(ix);
    233    if (rc != max_refcount)
    234       put_refcount(ix, rc + 1);
    235 }
    236 
    237 static void
    238 dec_refcount(UInt ix)
    239 {
    240    aspacem_assert(ix >= overhead && ix <= segnames_used);
    241    UInt rc = get_refcount(ix);
    242    aspacem_assert(rc > 0);
    243    if (rc != max_refcount) {
    244       --rc;
    245       if (rc != 0) {
    246          put_refcount(ix, rc);
    247       } else {
    248          UInt size = get_slotsize(ix);
    249          /* Chain this slot in the freelist */
    250          put_slotindex(ix, freeslot_chain);
    251          get_slotindex(ix);
    252          put_slotsize(ix + slotsize_size, size);
    253          get_slotindex(ix);
    254          freeslot_chain = ix;
    255          --num_segnames;
    256          if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0");
    257       }
    258    }
    259 }
    260 
    261 static void
    262 put_sentinel(UInt ix)
    263 {
    264    aspacem_assert(ix >= overhead && ix <= segnames_used);
    265 
    266    put_refcount(ix, 0);
    267    put_slotsize(ix, 0);
    268 }
    269 
    270 
    271 /* Searches the string table to find an index for the given name.
    272    If none is found, an index is allocated and the name stored.
    273    If running ouf of memory, return -1. */
    274 Int
    275 ML_(am_allocate_segname)(const HChar *name)
    276 {
    277    UInt len, ix, size, next_freeslot;
    278 
    279    aspacem_assert(name);
    280 
    281    if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name);
    282 
    283    len = VG_(strlen)(name);
    284 
    285    /* First see if we already have the name. */
    286    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
    287       if (is_freeslot(ix)) continue;
    288       if (VG_(strcmp)(name, segnames + ix) == 0) {
    289          inc_refcount(ix);
    290          return ix;
    291       }
    292    }
    293 
    294    /* Is there a free slot in the string table from a previously "freed"
    295       segment name ? */
    296    Int prev;
    297    for (prev = -1, ix = freeslot_chain; ix != end_of_chain;
    298         prev = ix, ix = next_freeslot) {
    299       next_freeslot = get_slotindex(ix);  // next in chain
    300       size = get_slotsize(ix);
    301 
    302       if (size >= len + 1) {
    303          /* Note, if the size of the slot is a lot larger than the length
    304             of the string we're about to store in it, we could split the
    305             slot into two. But that complicates matters and as we're not
    306             doing any coalescing of adjacent free slots this could lead to
    307             fragmentation. */
    308          if (prev == -1)
    309             freeslot_chain = next_freeslot;
    310          else
    311             put_slotindex(prev, next_freeslot);
    312          put_refcount(ix, 1);
    313          put_slotsize(ix, size);
    314          VG_(strcpy)(segnames + ix, name);
    315          ++num_segnames;
    316          return ix;
    317       }
    318    }
    319 
    320    /* We need to add a new name. */
    321 
    322    /* Note, that we need at least two bytes in the payload. The reason is
    323       that the payload area will be used to store the size of the slot when
    324       the slot is on the freelist. */
    325    if (len == 0) len = 1;
    326 
    327    /* Is there enough room in the string table? The OVERHEAD is for the
    328       sentinel following the payload of new slot. */
    329    SizeT need = len + 1 + overhead;
    330    if (need > (sizeof segnames) - segnames_used) {
    331       return -1;
    332    }
    333 
    334    ++num_segnames;
    335    ++num_slots;
    336 
    337    /* copy it in */
    338    ix = segnames_used;
    339    put_refcount(ix, 1);
    340    put_slotsize(ix, len + 1);
    341    VG_(strcpy)(segnames + ix, name);
    342    segnames_used += need;
    343 
    344    /* Add sentinel at end of segment name list */
    345    put_sentinel(segnames_used);
    346 
    347    return ix;
    348 }
    349 
    350 /* Debugging output */
    351 void
    352 ML_(am_show_segnames)(Int logLevel, const HChar *prefix)
    353 {
    354    UInt size, ix, i;
    355 
    356    VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n",
    357                  num_segnames, num_slots);
    358 
    359    if (freeslot_chain == end_of_chain)
    360       VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n");
    361    else
    362       VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n",
    363                     freeslot_chain);
    364    for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0;
    365         ix += size + overhead, ++i) {
    366       if (is_freeslot(ix))
    367          VG_(debugLog)(logLevel, "aspacem",
    368                        "(%u,%u,0) [free slot: size=%u  next=%u]\n", i, ix,
    369                        get_slotsize(ix), get_slotindex(ix));
    370       else
    371          VG_(debugLog)(logLevel, "aspacem",
    372                        "(%u,%u,%u) %s\n", i, ix, get_refcount(ix),
    373                        segnames + ix);
    374    }
    375 }
    376 
    377 /* Returns a sequence number for the fnIdx position in segnames.
    378    Used in aspacemgr debug output to associate a segment with
    379    a segment name. */
    380 Int
    381 ML_(am_segname_get_seqnr)(Int fnIdx)
    382 {
    383    SizeT ix, size;
    384    Int seqnr = -1;
    385 
    386    if (fnIdx == -1) return -1;   // shortcut
    387 
    388    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
    389       seqnr++;
    390       if (ix == fnIdx)
    391          return seqnr;
    392    }
    393 
    394    // We should always find the given index; something's busted
    395    aspacem_assert(0);
    396    return -1;
    397 }
    398 
    399 /* Initialise the string table for segment names. It contains an empty
    400    string which is not referenced. */
    401 void
    402 ML_(am_segnames_init)(void)
    403 {
    404    aspacem_assert(sizeof segnames >= overhead);
    405 
    406    segnames_used = overhead;
    407    put_sentinel(segnames_used);
    408 }
    409 
    410 /* Increase reference count of segment name identified by IX. */
    411 void
    412 ML_(am_inc_refcount)(Int ix)
    413 {
    414    if (ix != -1)
    415       inc_refcount(ix);
    416 }
    417 
    418 /* Decrease reference count of segment name identified by IX. */
    419 void
    420 ML_(am_dec_refcount)(Int ix)
    421 {
    422    if (ix != -1)
    423       dec_refcount(ix);
    424 }
    425 
    426 Bool
    427 ML_(am_sane_segname)(Int ix)
    428 {
    429    return ix == -1 || (ix >= overhead && ix < segnames_used);
    430 }
    431 
    432 const HChar *
    433 ML_(am_get_segname)(Int ix)
    434 {
    435    return (ix == -1) ? NULL : segnames + ix;
    436 }
    437 
    438 /*--------------------------------------------------------------------*/
    439 /*--- end                                                          ---*/
    440 /*--------------------------------------------------------------------*/
    441