Home | History | Annotate | Download | only in lib
      1 /*
      2  * lib/prio_tree.c - priority search tree
      3  *
      4  * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh (at) umich.edu>
      5  *
      6  * This file is released under the GPL v2.
      7  *
      8  * Based on the radix priority search tree proposed by Edward M. McCreight
      9  * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
     10  *
     11  * 02Feb2004	Initial version
     12  */
     13 
     14 #include <stdlib.h>
     15 #include <limits.h>
     16 #include "../fio.h"
     17 #include "prio_tree.h"
     18 
     19 /*
     20  * A clever mix of heap and radix trees forms a radix priority search tree (PST)
     21  * which is useful for storing intervals, e.g, we can consider a vma as a closed
     22  * interval of file pages [offset_begin, offset_end], and store all vmas that
     23  * map a file in a PST. Then, using the PST, we can answer a stabbing query,
     24  * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
     25  * given input interval X (a set of consecutive file pages), in "O(log n + m)"
     26  * time where 'log n' is the height of the PST, and 'm' is the number of stored
     27  * intervals (vmas) that overlap (map) with the input interval X (the set of
     28  * consecutive file pages).
     29  *
     30  * In our implementation, we store closed intervals of the form [radix_index,
     31  * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
     32  * is designed for storing intervals with unique radix indices, i.e., each
     33  * interval have different radix_index. However, this limitation can be easily
     34  * overcome by using the size, i.e., heap_index - radix_index, as part of the
     35  * index, so we index the tree using [(radix_index,size), heap_index].
     36  *
     37  * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
     38  * machine, the maximum height of a PST can be 64. We can use a balanced version
     39  * of the priority search tree to optimize the tree height, but the balanced
     40  * tree proposed by McCreight is too complex and memory-hungry for our purpose.
     41  */
     42 
     43 static void get_index(const struct prio_tree_node *node,
     44 		      unsigned long *radix, unsigned long *heap)
     45 {
     46 	*radix = node->start;
     47 	*heap = node->last;
     48 }
     49 
     50 static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
     51 
     52 static void fio_init prio_tree_init(void)
     53 {
     54 	unsigned int i;
     55 
     56 	for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
     57 		index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
     58 	index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
     59 }
     60 
     61 /*
     62  * Maximum heap_index that can be stored in a PST with index_bits bits
     63  */
     64 static inline unsigned long prio_tree_maxindex(unsigned int bits)
     65 {
     66 	return index_bits_to_maxindex[bits - 1];
     67 }
     68 
     69 /*
     70  * Extend a priority search tree so that it can store a node with heap_index
     71  * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
     72  * However, this function is used rarely and the common case performance is
     73  * not bad.
     74  */
     75 static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
     76 		struct prio_tree_node *node, unsigned long max_heap_index)
     77 {
     78 	struct prio_tree_node *first = NULL, *prev, *last = NULL;
     79 
     80 	if (max_heap_index > prio_tree_maxindex(root->index_bits))
     81 		root->index_bits++;
     82 
     83 	while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
     84 		root->index_bits++;
     85 
     86 		if (prio_tree_empty(root))
     87 			continue;
     88 
     89 		if (first == NULL) {
     90 			first = root->prio_tree_node;
     91 			prio_tree_remove(root, root->prio_tree_node);
     92 			INIT_PRIO_TREE_NODE(first);
     93 			last = first;
     94 		} else {
     95 			prev = last;
     96 			last = root->prio_tree_node;
     97 			prio_tree_remove(root, root->prio_tree_node);
     98 			INIT_PRIO_TREE_NODE(last);
     99 			prev->left = last;
    100 			last->parent = prev;
    101 		}
    102 	}
    103 
    104 	INIT_PRIO_TREE_NODE(node);
    105 
    106 	if (first) {
    107 		node->left = first;
    108 		first->parent = node;
    109 	} else
    110 		last = node;
    111 
    112 	if (!prio_tree_empty(root)) {
    113 		last->left = root->prio_tree_node;
    114 		last->left->parent = last;
    115 	}
    116 
    117 	root->prio_tree_node = node;
    118 	return node;
    119 }
    120 
    121 /*
    122  * Replace a prio_tree_node with a new node and return the old node
    123  */
    124 struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
    125 		struct prio_tree_node *old, struct prio_tree_node *node)
    126 {
    127 	INIT_PRIO_TREE_NODE(node);
    128 
    129 	if (prio_tree_root(old)) {
    130 		assert(root->prio_tree_node == old);
    131 		/*
    132 		 * We can reduce root->index_bits here. However, it is complex
    133 		 * and does not help much to improve performance (IMO).
    134 		 */
    135 		node->parent = node;
    136 		root->prio_tree_node = node;
    137 	} else {
    138 		node->parent = old->parent;
    139 		if (old->parent->left == old)
    140 			old->parent->left = node;
    141 		else
    142 			old->parent->right = node;
    143 	}
    144 
    145 	if (!prio_tree_left_empty(old)) {
    146 		node->left = old->left;
    147 		old->left->parent = node;
    148 	}
    149 
    150 	if (!prio_tree_right_empty(old)) {
    151 		node->right = old->right;
    152 		old->right->parent = node;
    153 	}
    154 
    155 	return old;
    156 }
    157 
    158 /*
    159  * Insert a prio_tree_node @node into a radix priority search tree @root. The
    160  * algorithm typically takes O(log n) time where 'log n' is the number of bits
    161  * required to represent the maximum heap_index. In the worst case, the algo
    162  * can take O((log n)^2) - check prio_tree_expand.
    163  *
    164  * If a prior node with same radix_index and heap_index is already found in
    165  * the tree, then returns the address of the prior node. Otherwise, inserts
    166  * @node into the tree and returns @node.
    167  */
    168 struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
    169 		struct prio_tree_node *node)
    170 {
    171 	struct prio_tree_node *cur, *res = node;
    172 	unsigned long radix_index, heap_index;
    173 	unsigned long r_index, h_index, index, mask;
    174 	int size_flag = 0;
    175 
    176 	get_index(node, &radix_index, &heap_index);
    177 
    178 	if (prio_tree_empty(root) ||
    179 			heap_index > prio_tree_maxindex(root->index_bits))
    180 		return prio_tree_expand(root, node, heap_index);
    181 
    182 	cur = root->prio_tree_node;
    183 	mask = 1UL << (root->index_bits - 1);
    184 
    185 	while (mask) {
    186 		get_index(cur, &r_index, &h_index);
    187 
    188 		if (r_index == radix_index && h_index == heap_index)
    189 			return cur;
    190 
    191                 if (h_index < heap_index ||
    192 		    (h_index == heap_index && r_index > radix_index)) {
    193 			struct prio_tree_node *tmp = node;
    194 			node = prio_tree_replace(root, cur, node);
    195 			cur = tmp;
    196 			/* swap indices */
    197 			index = r_index;
    198 			r_index = radix_index;
    199 			radix_index = index;
    200 			index = h_index;
    201 			h_index = heap_index;
    202 			heap_index = index;
    203 		}
    204 
    205 		if (size_flag)
    206 			index = heap_index - radix_index;
    207 		else
    208 			index = radix_index;
    209 
    210 		if (index & mask) {
    211 			if (prio_tree_right_empty(cur)) {
    212 				INIT_PRIO_TREE_NODE(node);
    213 				cur->right = node;
    214 				node->parent = cur;
    215 				return res;
    216 			} else
    217 				cur = cur->right;
    218 		} else {
    219 			if (prio_tree_left_empty(cur)) {
    220 				INIT_PRIO_TREE_NODE(node);
    221 				cur->left = node;
    222 				node->parent = cur;
    223 				return res;
    224 			} else
    225 				cur = cur->left;
    226 		}
    227 
    228 		mask >>= 1;
    229 
    230 		if (!mask) {
    231 			mask = 1UL << (BITS_PER_LONG - 1);
    232 			size_flag = 1;
    233 		}
    234 	}
    235 	/* Should not reach here */
    236 	assert(0);
    237 	return NULL;
    238 }
    239 
    240 /*
    241  * Remove a prio_tree_node @node from a radix priority search tree @root. The
    242  * algorithm takes O(log n) time where 'log n' is the number of bits required
    243  * to represent the maximum heap_index.
    244  */
    245 void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
    246 {
    247 	struct prio_tree_node *cur;
    248 	unsigned long r_index, h_index_right, h_index_left;
    249 
    250 	cur = node;
    251 
    252 	while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
    253 		if (!prio_tree_left_empty(cur))
    254 			get_index(cur->left, &r_index, &h_index_left);
    255 		else {
    256 			cur = cur->right;
    257 			continue;
    258 		}
    259 
    260 		if (!prio_tree_right_empty(cur))
    261 			get_index(cur->right, &r_index, &h_index_right);
    262 		else {
    263 			cur = cur->left;
    264 			continue;
    265 		}
    266 
    267 		/* both h_index_left and h_index_right cannot be 0 */
    268 		if (h_index_left >= h_index_right)
    269 			cur = cur->left;
    270 		else
    271 			cur = cur->right;
    272 	}
    273 
    274 	if (prio_tree_root(cur)) {
    275 		assert(root->prio_tree_node == cur);
    276 		INIT_PRIO_TREE_ROOT(root);
    277 		return;
    278 	}
    279 
    280 	if (cur->parent->right == cur)
    281 		cur->parent->right = cur->parent;
    282 	else
    283 		cur->parent->left = cur->parent;
    284 
    285 	while (cur != node)
    286 		cur = prio_tree_replace(root, cur->parent, cur);
    287 }
    288 
    289 /*
    290  * Following functions help to enumerate all prio_tree_nodes in the tree that
    291  * overlap with the input interval X [radix_index, heap_index]. The enumeration
    292  * takes O(log n + m) time where 'log n' is the height of the tree (which is
    293  * proportional to # of bits required to represent the maximum heap_index) and
    294  * 'm' is the number of prio_tree_nodes that overlap the interval X.
    295  */
    296 
    297 static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
    298 		unsigned long *r_index, unsigned long *h_index)
    299 {
    300 	if (prio_tree_left_empty(iter->cur))
    301 		return NULL;
    302 
    303 	get_index(iter->cur->left, r_index, h_index);
    304 
    305 	if (iter->r_index <= *h_index) {
    306 		iter->cur = iter->cur->left;
    307 		iter->mask >>= 1;
    308 		if (iter->mask) {
    309 			if (iter->size_level)
    310 				iter->size_level++;
    311 		} else {
    312 			if (iter->size_level) {
    313 				assert(prio_tree_left_empty(iter->cur));
    314 				assert(prio_tree_right_empty(iter->cur));
    315 				iter->size_level++;
    316 				iter->mask = ULONG_MAX;
    317 			} else {
    318 				iter->size_level = 1;
    319 				iter->mask = 1UL << (BITS_PER_LONG - 1);
    320 			}
    321 		}
    322 		return iter->cur;
    323 	}
    324 
    325 	return NULL;
    326 }
    327 
    328 static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
    329 		unsigned long *r_index, unsigned long *h_index)
    330 {
    331 	unsigned long value;
    332 
    333 	if (prio_tree_right_empty(iter->cur))
    334 		return NULL;
    335 
    336 	if (iter->size_level)
    337 		value = iter->value;
    338 	else
    339 		value = iter->value | iter->mask;
    340 
    341 	if (iter->h_index < value)
    342 		return NULL;
    343 
    344 	get_index(iter->cur->right, r_index, h_index);
    345 
    346 	if (iter->r_index <= *h_index) {
    347 		iter->cur = iter->cur->right;
    348 		iter->mask >>= 1;
    349 		iter->value = value;
    350 		if (iter->mask) {
    351 			if (iter->size_level)
    352 				iter->size_level++;
    353 		} else {
    354 			if (iter->size_level) {
    355 				assert(prio_tree_left_empty(iter->cur));
    356 				assert(prio_tree_right_empty(iter->cur));
    357 				iter->size_level++;
    358 				iter->mask = ULONG_MAX;
    359 			} else {
    360 				iter->size_level = 1;
    361 				iter->mask = 1UL << (BITS_PER_LONG - 1);
    362 			}
    363 		}
    364 		return iter->cur;
    365 	}
    366 
    367 	return NULL;
    368 }
    369 
    370 static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
    371 {
    372 	iter->cur = iter->cur->parent;
    373 	if (iter->mask == ULONG_MAX)
    374 		iter->mask = 1UL;
    375 	else if (iter->size_level == 1)
    376 		iter->mask = 1UL;
    377 	else
    378 		iter->mask <<= 1;
    379 	if (iter->size_level)
    380 		iter->size_level--;
    381 	if (!iter->size_level && (iter->value & iter->mask))
    382 		iter->value ^= iter->mask;
    383 	return iter->cur;
    384 }
    385 
    386 static inline int overlap(struct prio_tree_iter *iter,
    387 		unsigned long r_index, unsigned long h_index)
    388 {
    389 	return iter->h_index >= r_index && iter->r_index <= h_index;
    390 }
    391 
    392 /*
    393  * prio_tree_first:
    394  *
    395  * Get the first prio_tree_node that overlaps with the interval [radix_index,
    396  * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
    397  * traversal of the tree.
    398  */
    399 static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter)
    400 {
    401 	struct prio_tree_root *root;
    402 	unsigned long r_index, h_index;
    403 
    404 	INIT_PRIO_TREE_ITER(iter);
    405 
    406 	root = iter->root;
    407 	if (prio_tree_empty(root))
    408 		return NULL;
    409 
    410 	get_index(root->prio_tree_node, &r_index, &h_index);
    411 
    412 	if (iter->r_index > h_index)
    413 		return NULL;
    414 
    415 	iter->mask = 1UL << (root->index_bits - 1);
    416 	iter->cur = root->prio_tree_node;
    417 
    418 	while (1) {
    419 		if (overlap(iter, r_index, h_index))
    420 			return iter->cur;
    421 
    422 		if (prio_tree_left(iter, &r_index, &h_index))
    423 			continue;
    424 
    425 		if (prio_tree_right(iter, &r_index, &h_index))
    426 			continue;
    427 
    428 		break;
    429 	}
    430 	return NULL;
    431 }
    432 
    433 /*
    434  * prio_tree_next:
    435  *
    436  * Get the next prio_tree_node that overlaps with the input interval in iter
    437  */
    438 struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter)
    439 {
    440 	unsigned long r_index, h_index;
    441 
    442 	if (iter->cur == NULL)
    443 		return prio_tree_first(iter);
    444 
    445 repeat:
    446 	while (prio_tree_left(iter, &r_index, &h_index))
    447 		if (overlap(iter, r_index, h_index))
    448 			return iter->cur;
    449 
    450 	while (!prio_tree_right(iter, &r_index, &h_index)) {
    451 	    	while (!prio_tree_root(iter->cur) &&
    452 				iter->cur->parent->right == iter->cur)
    453 			prio_tree_parent(iter);
    454 
    455 		if (prio_tree_root(iter->cur))
    456 			return NULL;
    457 
    458 		prio_tree_parent(iter);
    459 	}
    460 
    461 	if (overlap(iter, r_index, h_index))
    462 		return iter->cur;
    463 
    464 	goto repeat;
    465 }
    466