Home | History | Annotate | Download | only in libdb
      1 /**
      2  * @file odb.h
      3  * This file contains various definitions and interface for management
      4  * of in-memory, through mmaped file, growable hash table, that stores
      5  * sample files.
      6  *
      7  * @remark Copyright 2002 OProfile authors
      8  * @remark Read the file COPYING
      9  *
     10  * @author Philippe Elie
     11  */
     12 
     13 #ifndef ODB_HASH_H
     14 #define ODB_HASH_H
     15 
     16 #include <stddef.h>
     17 #include <stdint.h>
     18 
     19 #include "op_list.h"
     20 
     21 /** the type of key. 64-bit because CG needs 32-bit pair {from,to} */
     22 typedef uint64_t odb_key_t;
     23 /** the type of an information in the database */
     24 typedef unsigned int odb_value_t;
     25 /** the type of index (node number), list are implemented through index */
     26 typedef unsigned int odb_index_t;
     27 /** the type store node number */
     28 typedef odb_index_t odb_node_nr_t;
     29 /** store the hash mask, hash table size are always power of two */
     30 typedef odb_index_t odb_hash_mask_t;
     31 
     32 /* there is (bucket factor * nr node) entry in hash table, this can seem
     33  * excessive but hash coding eip don't give a good distributions and our
     34  * goal is to get a O(1) amortized insert time. bucket factor must be a
     35  * power of two. FIXME: see big comment in odb_hash_add_node, you must
     36  * re-enable zeroing hash table if BUCKET_FACTOR > 2 (roughly exact, you
     37  * want to read the comment in odb_hash_add_node() if you tune this define)
     38  */
     39 #define BUCKET_FACTOR 1
     40 
     41 /** a db hash node */
     42 typedef struct {
     43 	odb_key_t key;			/**< eip */
     44 	odb_value_t value;		/**< samples count */
     45 	odb_index_t next;		/**< next entry for this bucket */
     46 } odb_node_t;
     47 
     48 /** the minimal information which must be stored in the file to reload
     49  * properly the data base, following this header is the node array then
     50  * the hash table (when growing we avoid to copy node array)
     51  */
     52 typedef struct {
     53 	odb_node_nr_t size;		/**< in node nr (power of two) */
     54 	odb_node_nr_t current_size;	/**< nr used node + 1, node 0 unused */
     55 	int padding[6];			/**< for padding and future use */
     56 } odb_descr_t;
     57 
     58 /** a "database". this is an in memory only description.
     59  *
     60  * We allow to manage a database inside a mapped file with an "header" of
     61  * unknown size so odb_open get a parameter to specify the size of this header.
     62  * A typical use is:
     63  *
     64  * struct header { int etc; ... };
     65  * odb_open(&hash, filename, ODB_RW, sizeof(header));
     66  * so on this library have no dependency on the header type.
     67  *
     68  * the internal memory layout from base_memory is:
     69  *  the unknown header (sizeof_header)
     70  *  odb_descr_t
     71  *  the node array: (descr->size * sizeof(odb_node_t) entries
     72  *  the hash table: array of odb_index_t indexing the node array
     73  *    (descr->size * BUCKET_FACTOR) entries
     74  */
     75 typedef struct odb_data {
     76 	odb_node_t * node_base;		/**< base memory area of the page */
     77 	odb_index_t * hash_base;	/**< base memory of hash table */
     78 	odb_descr_t * descr;		/**< the current state of database */
     79 	odb_hash_mask_t hash_mask;	/**< == descr->size - 1 */
     80 	unsigned int sizeof_header;	/**< from base_memory to odb header */
     81 	unsigned int offset_node;	/**< from base_memory to node array */
     82 	void * base_memory;		/**< base memory of the maped memory */
     83 	int fd;				/**< mmaped memory file descriptor */
     84 	char * filename;                /**< full path name of sample file */
     85 	int ref_count;                  /**< reference count */
     86 	struct list_head list;          /**< hash bucket list */
     87 } odb_data_t;
     88 
     89 typedef struct {
     90 	odb_data_t * data;
     91 } odb_t;
     92 
     93 #ifdef __cplusplus
     94 extern "C" {
     95 #endif
     96 
     97 /* db_manage.c */
     98 
     99 /** how to open the DB file */
    100 enum odb_rw {
    101 	ODB_RDONLY = 0,	/**< open for read only */
    102 	ODB_RDWR = 1	/**< open for read and/or write */
    103 };
    104 
    105 /**
    106  * odb_init - initialize a DB file
    107  * @param odb the DB file to init
    108  */
    109 void odb_init(odb_t * odb);
    110 
    111 /**
    112  * odb_open - open a DB file
    113  * @param odb the data base object to setup
    114  * @param filename the filename where go the maped memory
    115  * @param rw \enum ODB_RW if opening for writing, else \enum ODB_RDONLY
    116  * @param sizeof_header size of the file header if any
    117  *
    118  * The sizeof_header parameter allows the data file to have a header
    119  * at the start of the file which is skipped.
    120  * odb_open() always preallocate a few number of pages.
    121  * returns 0 on success, errno on failure
    122  */
    123 int odb_open(odb_t * odb, char const * filename,
    124              enum odb_rw rw, size_t sizeof_header);
    125 
    126 /** Close the given ODB file */
    127 void odb_close(odb_t * odb);
    128 
    129 /** return the number of times this sample file is open */
    130 int odb_open_count(odb_t const * odb);
    131 
    132 /** return the start of the mapped data */
    133 void * odb_get_data(odb_t * odb);
    134 
    135 /** issue a msync on the used size of the mmaped file */
    136 void odb_sync(odb_t const * odb);
    137 
    138 /**
    139  * grow the hashtable in such way current_size is the index of the first free
    140  * node. Take care all node pointer can be invalidated by this call.
    141  *
    142  * Node allocation is done in a two step way 1st) ensure a free node exist
    143  * eventually, caller can setup it, 2nd) commit the node allocation with
    144  * odb_commit_reservation().
    145  * This is done in this way to ensure node setup is visible from another
    146  * process like pp tools in an atomic way.
    147  *
    148  * returns 0 on success, non zero on failure in this case this function do
    149  * nothing and errno is set by the first libc call failure allowing to retry
    150  * after cleanup some program resource.
    151  */
    152 int odb_grow_hashtable(odb_data_t * data);
    153 /**
    154  * commit a previously successfull node reservation. This can't fail.
    155  */
    156 static __inline void odb_commit_reservation(odb_data_t * data)
    157 {
    158 	++data->descr->current_size;
    159 }
    160 
    161 /** "immpossible" node number to indicate an error from odb_hash_add_node() */
    162 #define ODB_NODE_NR_INVALID ((odb_node_nr_t)-1)
    163 
    164 /* db_debug.c */
    165 /** check that the hash is well built */
    166 int odb_check_hash(odb_t const * odb);
    167 
    168 /* db_stat.c */
    169 typedef struct odb_hash_stat_t odb_hash_stat_t;
    170 odb_hash_stat_t * odb_hash_stat(odb_t const * odb);
    171 void odb_hash_display_stat(odb_hash_stat_t const * stats);
    172 void odb_hash_free_stat(odb_hash_stat_t * stats);
    173 
    174 /* db_insert.c */
    175 /** update info at key by incrementing its associated value by one,
    176  * if the key does not exist a new node is created and the value associated
    177  * is set to one.
    178  *
    179  * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
    180  */
    181 int odb_update_node(odb_t * odb, odb_key_t key);
    182 
    183 /**
    184  * odb_update_node_with_offset
    185  * @param odb the data base object to setup
    186  * @param key the hash key
    187  * @param offset the offset to be added
    188  *
    189  * update info at key by adding the specified offset to its associated value,
    190  * if the key does not exist a new node is created and the value associated
    191  * is set to offset.
    192  *
    193  * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
    194  */
    195 int odb_update_node_with_offset(odb_t * odb,
    196 				odb_key_t key,
    197 				unsigned long int offset);
    198 
    199 /** Add a new node w/o regarding if a node with the same key already exists
    200  *
    201  * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
    202  */
    203 int odb_add_node(odb_t * odb, odb_key_t key, odb_value_t value);
    204 
    205 /* db_travel.c */
    206 /**
    207  * return a base pointer to the node array and number of node in this array
    208  * caller then will iterate through:
    209  *
    210  * odb_node_nr_t node_nr, pos;
    211  * odb_node_t * node = odb_get_iterator(odb, &node_nr);
    212  *	for ( pos = 0 ; pos < node_nr ; ++pos)
    213  *		// do something
    214  *
    215  *  note than caller does not need to filter nil key as it's a valid key,
    216  * The returned range is all valid (i.e. should never contain zero value).
    217  */
    218 odb_node_t * odb_get_iterator(odb_t const * odb, odb_node_nr_t * nr);
    219 
    220 static __inline unsigned int
    221 odb_do_hash(odb_data_t const * data, odb_key_t value)
    222 {
    223 	/* FIXME: better hash for eip value, needs to instrument code
    224 	 * and do a lot of tests ... */
    225 	/* trying to combine high order bits his a no-op: inside a binary image
    226 	 * high order bits don't vary a lot, hash table start with 7 bits mask
    227 	 * so this hash coding use bits 0-7, 8-15. Hash table is stored in
    228 	 * files avoiding to rebuilding them at profiling re-start so
    229 	 * on changing do_hash() change the file format!
    230 	 */
    231 	uint32_t temp = (value >> 32) ^ value;
    232 	return ((temp << 0) ^ (temp >> 8)) & data->hash_mask;
    233 }
    234 
    235 #ifdef __cplusplus
    236 }
    237 #endif
    238 
    239 #endif /* !ODB_H */
    240