Home | History | Annotate | Download | only in lib
      1 /*
      2  * lib/prio_tree.c - priority search tree
      3  *
      4  * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh (at) umich.edu>
      5  *
      6  * This file is released under the GPL v2.
      7  *
      8  * Based on the radix priority search tree proposed by Edward M. McCreight
      9  * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
     10  *
     11  * 02Feb2004	Initial version
     12  */
     13 
     14 #include <stdlib.h>
     15 #include <limits.h>
     16 
     17 #include "../compiler/compiler.h"
     18 #include "prio_tree.h"
     19 
     20 #define ARRAY_SIZE(x)    (sizeof((x)) / (sizeof((x)[0])))
     21 
     22 /*
     23  * A clever mix of heap and radix trees forms a radix priority search tree (PST)
     24  * which is useful for storing intervals, e.g, we can consider a vma as a closed
     25  * interval of file pages [offset_begin, offset_end], and store all vmas that
     26  * map a file in a PST. Then, using the PST, we can answer a stabbing query,
     27  * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
     28  * given input interval X (a set of consecutive file pages), in "O(log n + m)"
     29  * time where 'log n' is the height of the PST, and 'm' is the number of stored
     30  * intervals (vmas) that overlap (map) with the input interval X (the set of
     31  * consecutive file pages).
     32  *
     33  * In our implementation, we store closed intervals of the form [radix_index,
     34  * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
     35  * is designed for storing intervals with unique radix indices, i.e., each
     36  * interval have different radix_index. However, this limitation can be easily
     37  * overcome by using the size, i.e., heap_index - radix_index, as part of the
     38  * index, so we index the tree using [(radix_index,size), heap_index].
     39  *
     40  * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
     41  * machine, the maximum height of a PST can be 64. We can use a balanced version
     42  * of the priority search tree to optimize the tree height, but the balanced
     43  * tree proposed by McCreight is too complex and memory-hungry for our purpose.
     44  */
     45 
     46 static void get_index(const struct prio_tree_node *node,
     47 		      unsigned long *radix, unsigned long *heap)
     48 {
     49 	*radix = node->start;
     50 	*heap = node->last;
     51 }
     52 
     53 static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
     54 
     55 static void fio_init prio_tree_init(void)
     56 {
     57 	unsigned int i;
     58 
     59 	for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
     60 		index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
     61 	index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
     62 }
     63 
     64 /*
     65  * Maximum heap_index that can be stored in a PST with index_bits bits
     66  */
     67 static inline unsigned long prio_tree_maxindex(unsigned int bits)
     68 {
     69 	return index_bits_to_maxindex[bits - 1];
     70 }
     71 
     72 /*
     73  * Extend a priority search tree so that it can store a node with heap_index
     74  * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
     75  * However, this function is used rarely and the common case performance is
     76  * not bad.
     77  */
     78 static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
     79 		struct prio_tree_node *node, unsigned long max_heap_index)
     80 {
     81 	struct prio_tree_node *first = NULL, *prev, *last = NULL;
     82 
     83 	if (max_heap_index > prio_tree_maxindex(root->index_bits))
     84 		root->index_bits++;
     85 
     86 	while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
     87 		root->index_bits++;
     88 
     89 		if (prio_tree_empty(root))
     90 			continue;
     91 
     92 		if (first == NULL) {
     93 			first = root->prio_tree_node;
     94 			prio_tree_remove(root, root->prio_tree_node);
     95 			INIT_PRIO_TREE_NODE(first);
     96 			last = first;
     97 		} else {
     98 			prev = last;
     99 			last = root->prio_tree_node;
    100 			prio_tree_remove(root, root->prio_tree_node);
    101 			INIT_PRIO_TREE_NODE(last);
    102 			prev->left = last;
    103 			last->parent = prev;
    104 		}
    105 	}
    106 
    107 	INIT_PRIO_TREE_NODE(node);
    108 
    109 	if (first) {
    110 		node->left = first;
    111 		first->parent = node;
    112 	} else
    113 		last = node;
    114 
    115 	if (!prio_tree_empty(root)) {
    116 		last->left = root->prio_tree_node;
    117 		last->left->parent = last;
    118 	}
    119 
    120 	root->prio_tree_node = node;
    121 	return node;
    122 }
    123 
    124 /*
    125  * Replace a prio_tree_node with a new node and return the old node
    126  */
    127 struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
    128 		struct prio_tree_node *old, struct prio_tree_node *node)
    129 {
    130 	INIT_PRIO_TREE_NODE(node);
    131 
    132 	if (prio_tree_root(old)) {
    133 		assert(root->prio_tree_node == old);
    134 		/*
    135 		 * We can reduce root->index_bits here. However, it is complex
    136 		 * and does not help much to improve performance (IMO).
    137 		 */
    138 		node->parent = node;
    139 		root->prio_tree_node = node;
    140 	} else {
    141 		node->parent = old->parent;
    142 		if (old->parent->left == old)
    143 			old->parent->left = node;
    144 		else
    145 			old->parent->right = node;
    146 	}
    147 
    148 	if (!prio_tree_left_empty(old)) {
    149 		node->left = old->left;
    150 		old->left->parent = node;
    151 	}
    152 
    153 	if (!prio_tree_right_empty(old)) {
    154 		node->right = old->right;
    155 		old->right->parent = node;
    156 	}
    157 
    158 	return old;
    159 }
    160 
    161 /*
    162  * Insert a prio_tree_node @node into a radix priority search tree @root. The
    163  * algorithm typically takes O(log n) time where 'log n' is the number of bits
    164  * required to represent the maximum heap_index. In the worst case, the algo
    165  * can take O((log n)^2) - check prio_tree_expand.
    166  *
    167  * If a prior node with same radix_index and heap_index is already found in
    168  * the tree, then returns the address of the prior node. Otherwise, inserts
    169  * @node into the tree and returns @node.
    170  */
    171 struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
    172 		struct prio_tree_node *node)
    173 {
    174 	struct prio_tree_node *cur, *res = node;
    175 	unsigned long radix_index, heap_index;
    176 	unsigned long r_index, h_index, index, mask;
    177 	int size_flag = 0;
    178 
    179 	get_index(node, &radix_index, &heap_index);
    180 
    181 	if (prio_tree_empty(root) ||
    182 			heap_index > prio_tree_maxindex(root->index_bits))
    183 		return prio_tree_expand(root, node, heap_index);
    184 
    185 	cur = root->prio_tree_node;
    186 	mask = 1UL << (root->index_bits - 1);
    187 
    188 	while (mask) {
    189 		get_index(cur, &r_index, &h_index);
    190 
    191 		if (r_index == radix_index && h_index == heap_index)
    192 			return cur;
    193 
    194                 if (h_index < heap_index ||
    195 		    (h_index == heap_index && r_index > radix_index)) {
    196 			struct prio_tree_node *tmp = node;
    197 			node = prio_tree_replace(root, cur, node);
    198 			cur = tmp;
    199 			/* swap indices */
    200 			index = r_index;
    201 			r_index = radix_index;
    202 			radix_index = index;
    203 			index = h_index;
    204 			h_index = heap_index;
    205 			heap_index = index;
    206 		}
    207 
    208 		if (size_flag)
    209 			index = heap_index - radix_index;
    210 		else
    211 			index = radix_index;
    212 
    213 		if (index & mask) {
    214 			if (prio_tree_right_empty(cur)) {
    215 				INIT_PRIO_TREE_NODE(node);
    216 				cur->right = node;
    217 				node->parent = cur;
    218 				return res;
    219 			} else
    220 				cur = cur->right;
    221 		} else {
    222 			if (prio_tree_left_empty(cur)) {
    223 				INIT_PRIO_TREE_NODE(node);
    224 				cur->left = node;
    225 				node->parent = cur;
    226 				return res;
    227 			} else
    228 				cur = cur->left;
    229 		}
    230 
    231 		mask >>= 1;
    232 
    233 		if (!mask) {
    234 			mask = 1UL << (BITS_PER_LONG - 1);
    235 			size_flag = 1;
    236 		}
    237 	}
    238 	/* Should not reach here */
    239 	assert(0);
    240 	return NULL;
    241 }
    242 
    243 /*
    244  * Remove a prio_tree_node @node from a radix priority search tree @root. The
    245  * algorithm takes O(log n) time where 'log n' is the number of bits required
    246  * to represent the maximum heap_index.
    247  */
    248 void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
    249 {
    250 	struct prio_tree_node *cur;
    251 	unsigned long r_index, h_index_right, h_index_left;
    252 
    253 	cur = node;
    254 
    255 	while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
    256 		if (!prio_tree_left_empty(cur))
    257 			get_index(cur->left, &r_index, &h_index_left);
    258 		else {
    259 			cur = cur->right;
    260 			continue;
    261 		}
    262 
    263 		if (!prio_tree_right_empty(cur))
    264 			get_index(cur->right, &r_index, &h_index_right);
    265 		else {
    266 			cur = cur->left;
    267 			continue;
    268 		}
    269 
    270 		/* both h_index_left and h_index_right cannot be 0 */
    271 		if (h_index_left >= h_index_right)
    272 			cur = cur->left;
    273 		else
    274 			cur = cur->right;
    275 	}
    276 
    277 	if (prio_tree_root(cur)) {
    278 		assert(root->prio_tree_node == cur);
    279 		INIT_PRIO_TREE_ROOT(root);
    280 		return;
    281 	}
    282 
    283 	if (cur->parent->right == cur)
    284 		cur->parent->right = cur->parent;
    285 	else
    286 		cur->parent->left = cur->parent;
    287 
    288 	while (cur != node)
    289 		cur = prio_tree_replace(root, cur->parent, cur);
    290 }
    291 
    292 /*
    293  * Following functions help to enumerate all prio_tree_nodes in the tree that
    294  * overlap with the input interval X [radix_index, heap_index]. The enumeration
    295  * takes O(log n + m) time where 'log n' is the height of the tree (which is
    296  * proportional to # of bits required to represent the maximum heap_index) and
    297  * 'm' is the number of prio_tree_nodes that overlap the interval X.
    298  */
    299 
    300 static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
    301 		unsigned long *r_index, unsigned long *h_index)
    302 {
    303 	if (prio_tree_left_empty(iter->cur))
    304 		return NULL;
    305 
    306 	get_index(iter->cur->left, r_index, h_index);
    307 
    308 	if (iter->r_index <= *h_index) {
    309 		iter->cur = iter->cur->left;
    310 		iter->mask >>= 1;
    311 		if (iter->mask) {
    312 			if (iter->size_level)
    313 				iter->size_level++;
    314 		} else {
    315 			if (iter->size_level) {
    316 				assert(prio_tree_left_empty(iter->cur));
    317 				assert(prio_tree_right_empty(iter->cur));
    318 				iter->size_level++;
    319 				iter->mask = ULONG_MAX;
    320 			} else {
    321 				iter->size_level = 1;
    322 				iter->mask = 1UL << (BITS_PER_LONG - 1);
    323 			}
    324 		}
    325 		return iter->cur;
    326 	}
    327 
    328 	return NULL;
    329 }
    330 
    331 static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
    332 		unsigned long *r_index, unsigned long *h_index)
    333 {
    334 	unsigned long value;
    335 
    336 	if (prio_tree_right_empty(iter->cur))
    337 		return NULL;
    338 
    339 	if (iter->size_level)
    340 		value = iter->value;
    341 	else
    342 		value = iter->value | iter->mask;
    343 
    344 	if (iter->h_index < value)
    345 		return NULL;
    346 
    347 	get_index(iter->cur->right, r_index, h_index);
    348 
    349 	if (iter->r_index <= *h_index) {
    350 		iter->cur = iter->cur->right;
    351 		iter->mask >>= 1;
    352 		iter->value = value;
    353 		if (iter->mask) {
    354 			if (iter->size_level)
    355 				iter->size_level++;
    356 		} else {
    357 			if (iter->size_level) {
    358 				assert(prio_tree_left_empty(iter->cur));
    359 				assert(prio_tree_right_empty(iter->cur));
    360 				iter->size_level++;
    361 				iter->mask = ULONG_MAX;
    362 			} else {
    363 				iter->size_level = 1;
    364 				iter->mask = 1UL << (BITS_PER_LONG - 1);
    365 			}
    366 		}
    367 		return iter->cur;
    368 	}
    369 
    370 	return NULL;
    371 }
    372 
    373 static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
    374 {
    375 	iter->cur = iter->cur->parent;
    376 	if (iter->mask == ULONG_MAX)
    377 		iter->mask = 1UL;
    378 	else if (iter->size_level == 1)
    379 		iter->mask = 1UL;
    380 	else
    381 		iter->mask <<= 1;
    382 	if (iter->size_level)
    383 		iter->size_level--;
    384 	if (!iter->size_level && (iter->value & iter->mask))
    385 		iter->value ^= iter->mask;
    386 	return iter->cur;
    387 }
    388 
    389 static inline int overlap(struct prio_tree_iter *iter,
    390 		unsigned long r_index, unsigned long h_index)
    391 {
    392 	return iter->h_index >= r_index && iter->r_index <= h_index;
    393 }
    394 
    395 /*
    396  * prio_tree_first:
    397  *
    398  * Get the first prio_tree_node that overlaps with the interval [radix_index,
    399  * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
    400  * traversal of the tree.
    401  */
    402 static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter)
    403 {
    404 	struct prio_tree_root *root;
    405 	unsigned long r_index, h_index;
    406 
    407 	INIT_PRIO_TREE_ITER(iter);
    408 
    409 	root = iter->root;
    410 	if (prio_tree_empty(root))
    411 		return NULL;
    412 
    413 	get_index(root->prio_tree_node, &r_index, &h_index);
    414 
    415 	if (iter->r_index > h_index)
    416 		return NULL;
    417 
    418 	iter->mask = 1UL << (root->index_bits - 1);
    419 	iter->cur = root->prio_tree_node;
    420 
    421 	while (1) {
    422 		if (overlap(iter, r_index, h_index))
    423 			return iter->cur;
    424 
    425 		if (prio_tree_left(iter, &r_index, &h_index))
    426 			continue;
    427 
    428 		if (prio_tree_right(iter, &r_index, &h_index))
    429 			continue;
    430 
    431 		break;
    432 	}
    433 	return NULL;
    434 }
    435 
    436 /*
    437  * prio_tree_next:
    438  *
    439  * Get the next prio_tree_node that overlaps with the input interval in iter
    440  */
    441 struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter)
    442 {
    443 	unsigned long r_index, h_index;
    444 
    445 	if (iter->cur == NULL)
    446 		return prio_tree_first(iter);
    447 
    448 repeat:
    449 	while (prio_tree_left(iter, &r_index, &h_index))
    450 		if (overlap(iter, r_index, h_index))
    451 			return iter->cur;
    452 
    453 	while (!prio_tree_right(iter, &r_index, &h_index)) {
    454 	    	while (!prio_tree_root(iter->cur) &&
    455 				iter->cur->parent->right == iter->cur)
    456 			prio_tree_parent(iter);
    457 
    458 		if (prio_tree_root(iter->cur))
    459 			return NULL;
    460 
    461 		prio_tree_parent(iter);
    462 	}
    463 
    464 	if (overlap(iter, r_index, h_index))
    465 		return iter->cur;
    466 
    467 	goto repeat;
    468 }
    469