Home | History | Annotate | Download | only in ext2fs
      1 /*
      2  * fallocate.c -- Allocate large chunks of file.
      3  *
      4  * Copyright (C) 2014 Oracle.
      5  *
      6  * %Begin-Header%
      7  * This file may be redistributed under the terms of the GNU Library
      8  * General Public License, version 2.
      9  * %End-Header%
     10  */
     11 
     12 #include "config.h"
     13 
     14 #if HAVE_SYS_TYPES_H
     15 #include <sys/types.h>
     16 #endif
     17 
     18 #include "ext2_fs.h"
     19 #include "ext2fs.h"
     20 #define min(a, b) ((a) < (b) ? (a) : (b))
     21 
     22 #undef DEBUG
     23 
     24 #ifdef DEBUG
     25 # define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
     26 #else
     27 # define dbg_printf(f, a...)
     28 #endif
     29 
     30 /*
     31  * Extent-based fallocate code.
     32  *
     33  * Find runs of unmapped logical blocks by starting at start and walking the
     34  * extents until we reach the end of the range we want.
     35  *
     36  * For each run of unmapped blocks, try to find the extents on either side of
     37  * the range.  If there's a left extent that can grow by at least a cluster and
     38  * there are lblocks between start and the next lcluster after start, see if
     39  * there's an implied cluster allocation; if so, zero the blocks (if the left
     40  * extent is initialized) and adjust the extent.  Ditto for the blocks between
     41  * the end of the last full lcluster and end, if there's a right extent.
     42  *
     43  * Try to attach as much as we can to the left extent, then try to attach as
     44  * much as we can to the right extent.  For the remainder, try to allocate the
     45  * whole range; map in whatever we get; and repeat until we're done.
     46  *
     47  * To attach to a left extent, figure out the maximum amount we can add to the
     48  * extent and try to allocate that much, and append if successful.  To attach
     49  * to a right extent, figure out the max we can add to the extent, try to
     50  * allocate that much, and prepend if successful.
     51  *
     52  * We need an alloc_range function that tells us how much we can allocate given
     53  * a maximum length and one of a suggested start, a fixed start, or a fixed end
     54  * point.
     55  *
     56  * Every time we modify the extent tree we also need to update the block stats.
     57  *
     58  * At the end, update i_blocks and i_size appropriately.
     59  */
     60 
     61 static void dbg_print_extent(const char *desc EXT2FS_ATTR((unused)),
     62 		const struct ext2fs_extent *extent EXT2FS_ATTR((unused)))
     63 {
     64 #ifdef DEBUG
     65 	if (desc)
     66 		printf("%s: ", desc);
     67 	printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ",
     68 	       extent->e_lblk, extent->e_lblk + extent->e_len - 1,
     69 	       extent->e_len, extent->e_pblk);
     70 	if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF)
     71 		fputs("LEAF ", stdout);
     72 	if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
     73 		fputs("UNINIT ", stdout);
     74 	if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
     75 		fputs("2ND_VISIT ", stdout);
     76 	if (!extent->e_flags)
     77 		fputs("(none)", stdout);
     78 	fputc('\n', stdout);
     79 	fflush(stdout);
     80 #endif
     81 }
     82 
     83 static errcode_t claim_range(ext2_filsys fs, struct ext2_inode *inode,
     84 			     blk64_t blk, blk64_t len)
     85 {
     86 	blk64_t	clusters;
     87 
     88 	clusters = (len + EXT2FS_CLUSTER_RATIO(fs) - 1) /
     89 		   EXT2FS_CLUSTER_RATIO(fs);
     90 	ext2fs_block_alloc_stats_range(fs, blk,
     91 			clusters * EXT2FS_CLUSTER_RATIO(fs), +1);
     92 	return ext2fs_iblk_add_blocks(fs, inode, clusters);
     93 }
     94 
     95 static errcode_t ext_falloc_helper(ext2_filsys fs,
     96 				   int flags,
     97 				   ext2_ino_t ino,
     98 				   struct ext2_inode *inode,
     99 				   ext2_extent_handle_t handle,
    100 				   struct ext2fs_extent *left_ext,
    101 				   struct ext2fs_extent *right_ext,
    102 				   blk64_t range_start, blk64_t range_len,
    103 				   blk64_t alloc_goal)
    104 {
    105 	struct ext2fs_extent	newex, ex;
    106 	int			op;
    107 	blk64_t			fillable, pblk, plen, x, y;
    108 	blk64_t			eof_blk = 0, cluster_fill = 0;
    109 	errcode_t		err;
    110 	blk_t			max_extent_len, max_uninit_len, max_init_len;
    111 
    112 #ifdef DEBUG
    113 	printf("%s: ", __func__);
    114 	if (left_ext)
    115 		printf("left_ext=%llu--%llu, ", left_ext->e_lblk,
    116 		       left_ext->e_lblk + left_ext->e_len - 1);
    117 	if (right_ext)
    118 		printf("right_ext=%llu--%llu, ", right_ext->e_lblk,
    119 		       right_ext->e_lblk + right_ext->e_len - 1);
    120 	printf("start=%llu len=%llu, goal=%llu\n", range_start, range_len,
    121 	       alloc_goal);
    122 	fflush(stdout);
    123 #endif
    124 	/* Can't create initialized extents past EOF? */
    125 	if (!(flags & EXT2_FALLOCATE_INIT_BEYOND_EOF))
    126 		eof_blk = EXT2_I_SIZE(inode) / fs->blocksize;
    127 
    128 	/* The allocation goal must be as far into a cluster as range_start. */
    129 	alloc_goal = (alloc_goal & ~EXT2FS_CLUSTER_MASK(fs)) |
    130 		     (range_start & EXT2FS_CLUSTER_MASK(fs));
    131 
    132 	max_uninit_len = EXT_UNINIT_MAX_LEN & ~EXT2FS_CLUSTER_MASK(fs);
    133 	max_init_len = EXT_INIT_MAX_LEN & ~EXT2FS_CLUSTER_MASK(fs);
    134 
    135 	/* We must lengthen the left extent to the end of the cluster */
    136 	if (left_ext && EXT2FS_CLUSTER_RATIO(fs) > 1) {
    137 		/* How many more blocks can be attached to left_ext? */
    138 		if (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
    139 			fillable = max_uninit_len - left_ext->e_len;
    140 		else
    141 			fillable = max_init_len - left_ext->e_len;
    142 
    143 		if (fillable > range_len)
    144 			fillable = range_len;
    145 		if (fillable == 0)
    146 			goto expand_right;
    147 
    148 		/*
    149 		 * If range_start isn't on a cluster boundary, try an
    150 		 * implied cluster allocation for left_ext.
    151 		 */
    152 		cluster_fill = EXT2FS_CLUSTER_RATIO(fs) -
    153 			       (range_start & EXT2FS_CLUSTER_MASK(fs));
    154 		cluster_fill &= EXT2FS_CLUSTER_MASK(fs);
    155 		if (cluster_fill == 0)
    156 			goto expand_right;
    157 
    158 		if (cluster_fill > fillable)
    159 			cluster_fill = fillable;
    160 
    161 		/* Don't expand an initialized left_ext beyond EOF */
    162 		if (!(flags & EXT2_FALLOCATE_INIT_BEYOND_EOF)) {
    163 			x = left_ext->e_lblk + left_ext->e_len - 1;
    164 			dbg_printf("%s: lend=%llu newlend=%llu eofblk=%llu\n",
    165 				   __func__, x, x + cluster_fill, eof_blk);
    166 			if (eof_blk >= x && eof_blk <= x + cluster_fill)
    167 				cluster_fill = eof_blk - x;
    168 			if (cluster_fill == 0)
    169 				goto expand_right;
    170 		}
    171 
    172 		err = ext2fs_extent_goto(handle, left_ext->e_lblk);
    173 		if (err)
    174 			goto expand_right;
    175 		left_ext->e_len += cluster_fill;
    176 		range_start += cluster_fill;
    177 		range_len -= cluster_fill;
    178 		alloc_goal += cluster_fill;
    179 
    180 		dbg_print_extent("ext_falloc clus left+", left_ext);
    181 		err = ext2fs_extent_replace(handle, 0, left_ext);
    182 		if (err)
    183 			goto out;
    184 		err = ext2fs_extent_fix_parents(handle);
    185 		if (err)
    186 			goto out;
    187 
    188 		/* Zero blocks */
    189 		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) {
    190 			err = ext2fs_zero_blocks2(fs, left_ext->e_pblk +
    191 						  left_ext->e_len -
    192 						  cluster_fill, cluster_fill,
    193 						  NULL, NULL);
    194 			if (err)
    195 				goto out;
    196 		}
    197 	}
    198 
    199 expand_right:
    200 	/* We must lengthen the right extent to the beginning of the cluster */
    201 	if (right_ext && EXT2FS_CLUSTER_RATIO(fs) > 1) {
    202 		/* How much can we attach to right_ext? */
    203 		if (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
    204 			fillable = max_uninit_len - right_ext->e_len;
    205 		else
    206 			fillable = max_init_len - right_ext->e_len;
    207 
    208 		if (fillable > range_len)
    209 			fillable = range_len;
    210 		if (fillable == 0)
    211 			goto try_merge;
    212 
    213 		/*
    214 		 * If range_end isn't on a cluster boundary, try an implied
    215 		 * cluster allocation for right_ext.
    216 		 */
    217 		cluster_fill = right_ext->e_lblk & EXT2FS_CLUSTER_MASK(fs);
    218 		if (cluster_fill == 0)
    219 			goto try_merge;
    220 
    221 		err = ext2fs_extent_goto(handle, right_ext->e_lblk);
    222 		if (err)
    223 			goto out;
    224 
    225 		if (cluster_fill > fillable)
    226 			cluster_fill = fillable;
    227 		right_ext->e_lblk -= cluster_fill;
    228 		right_ext->e_pblk -= cluster_fill;
    229 		right_ext->e_len += cluster_fill;
    230 		range_len -= cluster_fill;
    231 
    232 		dbg_print_extent("ext_falloc clus right+", right_ext);
    233 		err = ext2fs_extent_replace(handle, 0, right_ext);
    234 		if (err)
    235 			goto out;
    236 		err = ext2fs_extent_fix_parents(handle);
    237 		if (err)
    238 			goto out;
    239 
    240 		/* Zero blocks if necessary */
    241 		if (!(right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) {
    242 			err = ext2fs_zero_blocks2(fs, right_ext->e_pblk,
    243 						  cluster_fill, NULL, NULL);
    244 			if (err)
    245 				goto out;
    246 		}
    247 	}
    248 
    249 try_merge:
    250 	/* Merge both extents together, perhaps? */
    251 	if (left_ext && right_ext) {
    252 		/* Are the two extents mergeable? */
    253 		if ((left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) !=
    254 		    (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT))
    255 			goto try_left;
    256 
    257 		/* User requires init/uninit but extent is uninit/init. */
    258 		if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
    259 		     (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) ||
    260 		    ((flags & EXT2_FALLOCATE_FORCE_UNINIT) &&
    261 		     !(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)))
    262 			goto try_left;
    263 
    264 		/*
    265 		 * Skip initialized extent unless user wants to zero blocks
    266 		 * or requires init extent.
    267 		 */
    268 		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
    269 		    (!(flags & EXT2_FALLOCATE_ZERO_BLOCKS) ||
    270 		     !(flags & EXT2_FALLOCATE_FORCE_INIT)))
    271 			goto try_left;
    272 
    273 		/* Will it even fit? */
    274 		x = left_ext->e_len + range_len + right_ext->e_len;
    275 		if (x > (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT ?
    276 				max_uninit_len : max_init_len))
    277 			goto try_left;
    278 
    279 		err = ext2fs_extent_goto(handle, left_ext->e_lblk);
    280 		if (err)
    281 			goto try_left;
    282 
    283 		/* Allocate blocks */
    284 		y = left_ext->e_pblk + left_ext->e_len;
    285 		err = ext2fs_new_range(fs, EXT2_NEWRANGE_FIXED_GOAL |
    286 				       EXT2_NEWRANGE_MIN_LENGTH, y,
    287 				       right_ext->e_pblk - y + 1, NULL,
    288 				       &pblk, &plen);
    289 		if (err)
    290 			goto try_left;
    291 		if (pblk + plen != right_ext->e_pblk)
    292 			goto try_left;
    293 		err = claim_range(fs, inode, pblk, plen);
    294 		if (err)
    295 			goto out;
    296 
    297 		/* Modify extents */
    298 		left_ext->e_len = x;
    299 		dbg_print_extent("ext_falloc merge", left_ext);
    300 		err = ext2fs_extent_replace(handle, 0, left_ext);
    301 		if (err)
    302 			goto out;
    303 		err = ext2fs_extent_fix_parents(handle);
    304 		if (err)
    305 			goto out;
    306 		err = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_LEAF, &newex);
    307 		if (err)
    308 			goto out;
    309 		err = ext2fs_extent_delete(handle, 0);
    310 		if (err)
    311 			goto out;
    312 		err = ext2fs_extent_fix_parents(handle);
    313 		if (err)
    314 			goto out;
    315 		*right_ext = *left_ext;
    316 
    317 		/* Zero blocks */
    318 		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
    319 		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
    320 			err = ext2fs_zero_blocks2(fs, range_start, range_len,
    321 						  NULL, NULL);
    322 			if (err)
    323 				goto out;
    324 		}
    325 
    326 		return 0;
    327 	}
    328 
    329 try_left:
    330 	/* Extend the left extent */
    331 	if (left_ext) {
    332 		/* How many more blocks can be attached to left_ext? */
    333 		if (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
    334 			fillable = max_uninit_len - left_ext->e_len;
    335 		else if (flags & EXT2_FALLOCATE_ZERO_BLOCKS)
    336 			fillable = max_init_len - left_ext->e_len;
    337 		else
    338 			fillable = 0;
    339 
    340 		/* User requires init/uninit but extent is uninit/init. */
    341 		if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
    342 		     (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) ||
    343 		    ((flags & EXT2_FALLOCATE_FORCE_UNINIT) &&
    344 		     !(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)))
    345 			goto try_right;
    346 
    347 		if (fillable > range_len)
    348 			fillable = range_len;
    349 
    350 		/* Don't expand an initialized left_ext beyond EOF */
    351 		x = left_ext->e_lblk + left_ext->e_len - 1;
    352 		if (!(flags & EXT2_FALLOCATE_INIT_BEYOND_EOF)) {
    353 			dbg_printf("%s: lend=%llu newlend=%llu eofblk=%llu\n",
    354 				   __func__, x, x + fillable, eof_blk);
    355 			if (eof_blk >= x && eof_blk <= x + fillable)
    356 				fillable = eof_blk - x;
    357 		}
    358 
    359 		if (fillable == 0)
    360 			goto try_right;
    361 
    362 		/* Test if the right edge of the range is already mapped? */
    363 		if (EXT2FS_CLUSTER_RATIO(fs) > 1) {
    364 			err = ext2fs_map_cluster_block(fs, ino, inode,
    365 					x + fillable, &pblk);
    366 			if (err)
    367 				goto out;
    368 			if (pblk)
    369 				fillable -= 1 + ((x + fillable)
    370 						 & EXT2FS_CLUSTER_MASK(fs));
    371 			if (fillable == 0)
    372 				goto try_right;
    373 		}
    374 
    375 		/* Allocate range of blocks */
    376 		x = left_ext->e_pblk + left_ext->e_len;
    377 		err = ext2fs_new_range(fs, EXT2_NEWRANGE_FIXED_GOAL |
    378 				EXT2_NEWRANGE_MIN_LENGTH,
    379 				x, fillable, NULL, &pblk, &plen);
    380 		if (err)
    381 			goto try_right;
    382 		err = claim_range(fs, inode, pblk, plen);
    383 		if (err)
    384 			goto out;
    385 
    386 		/* Modify left_ext */
    387 		err = ext2fs_extent_goto(handle, left_ext->e_lblk);
    388 		if (err)
    389 			goto out;
    390 		range_start += plen;
    391 		range_len -= plen;
    392 		left_ext->e_len += plen;
    393 		dbg_print_extent("ext_falloc left+", left_ext);
    394 		err = ext2fs_extent_replace(handle, 0, left_ext);
    395 		if (err)
    396 			goto out;
    397 		err = ext2fs_extent_fix_parents(handle);
    398 		if (err)
    399 			goto out;
    400 
    401 		/* Zero blocks if necessary */
    402 		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
    403 		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
    404 			err = ext2fs_zero_blocks2(fs, pblk, plen, NULL, NULL);
    405 			if (err)
    406 				goto out;
    407 		}
    408 	}
    409 
    410 try_right:
    411 	/* Extend the right extent */
    412 	if (right_ext) {
    413 		/* How much can we attach to right_ext? */
    414 		if (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
    415 			fillable = max_uninit_len - right_ext->e_len;
    416 		else if (flags & EXT2_FALLOCATE_ZERO_BLOCKS)
    417 			fillable = max_init_len - right_ext->e_len;
    418 		else
    419 			fillable = 0;
    420 
    421 		/* User requires init/uninit but extent is uninit/init. */
    422 		if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
    423 		     (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) ||
    424 		    ((flags & EXT2_FALLOCATE_FORCE_UNINIT) &&
    425 		     !(right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)))
    426 			goto try_anywhere;
    427 
    428 		if (fillable > range_len)
    429 			fillable = range_len;
    430 		if (fillable == 0)
    431 			goto try_anywhere;
    432 
    433 		/* Test if the left edge of the range is already mapped? */
    434 		if (EXT2FS_CLUSTER_RATIO(fs) > 1) {
    435 			err = ext2fs_map_cluster_block(fs, ino, inode,
    436 					right_ext->e_lblk - fillable, &pblk);
    437 			if (err)
    438 				goto out;
    439 			if (pblk)
    440 				fillable -= EXT2FS_CLUSTER_RATIO(fs) -
    441 						((right_ext->e_lblk - fillable)
    442 						 & EXT2FS_CLUSTER_MASK(fs));
    443 			if (fillable == 0)
    444 				goto try_anywhere;
    445 		}
    446 
    447 		/*
    448 		 * FIXME: It would be nice if we could handle allocating a
    449 		 * variable range from a fixed end point instead of just
    450 		 * skipping to the general allocator if the whole range is
    451 		 * unavailable.
    452 		 */
    453 		err = ext2fs_new_range(fs, EXT2_NEWRANGE_FIXED_GOAL |
    454 				EXT2_NEWRANGE_MIN_LENGTH,
    455 				right_ext->e_pblk - fillable,
    456 				fillable, NULL, &pblk, &plen);
    457 		if (err)
    458 			goto try_anywhere;
    459 		err = claim_range(fs, inode,
    460 			      pblk & ~EXT2FS_CLUSTER_MASK(fs),
    461 			      plen + (pblk & EXT2FS_CLUSTER_MASK(fs)));
    462 		if (err)
    463 			goto out;
    464 
    465 		/* Modify right_ext */
    466 		err = ext2fs_extent_goto(handle, right_ext->e_lblk);
    467 		if (err)
    468 			goto out;
    469 		range_len -= plen;
    470 		right_ext->e_lblk -= plen;
    471 		right_ext->e_pblk -= plen;
    472 		right_ext->e_len += plen;
    473 		dbg_print_extent("ext_falloc right+", right_ext);
    474 		err = ext2fs_extent_replace(handle, 0, right_ext);
    475 		if (err)
    476 			goto out;
    477 		err = ext2fs_extent_fix_parents(handle);
    478 		if (err)
    479 			goto out;
    480 
    481 		/* Zero blocks if necessary */
    482 		if (!(right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
    483 		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
    484 			err = ext2fs_zero_blocks2(fs, pblk,
    485 					plen + cluster_fill, NULL, NULL);
    486 			if (err)
    487 				goto out;
    488 		}
    489 	}
    490 
    491 try_anywhere:
    492 	/* Try implied cluster alloc on the left and right ends */
    493 	if (range_len > 0 && (range_start & EXT2FS_CLUSTER_MASK(fs))) {
    494 		cluster_fill = EXT2FS_CLUSTER_RATIO(fs) -
    495 			       (range_start & EXT2FS_CLUSTER_MASK(fs));
    496 		cluster_fill &= EXT2FS_CLUSTER_MASK(fs);
    497 		if (cluster_fill > range_len)
    498 			cluster_fill = range_len;
    499 		newex.e_lblk = range_start;
    500 		err = ext2fs_map_cluster_block(fs, ino, inode, newex.e_lblk,
    501 					       &pblk);
    502 		if (err)
    503 			goto out;
    504 		if (pblk == 0)
    505 			goto try_right_implied;
    506 		newex.e_pblk = pblk;
    507 		newex.e_len = cluster_fill;
    508 		newex.e_flags = (flags & EXT2_FALLOCATE_FORCE_INIT ? 0 :
    509 				 EXT2_EXTENT_FLAGS_UNINIT);
    510 		dbg_print_extent("ext_falloc iclus left+", &newex);
    511 		ext2fs_extent_goto(handle, newex.e_lblk);
    512 		err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
    513 					&ex);
    514 		if (err == EXT2_ET_NO_CURRENT_NODE)
    515 			ex.e_lblk = 0;
    516 		else if (err)
    517 			goto out;
    518 
    519 		if (ex.e_lblk > newex.e_lblk)
    520 			op = 0; /* insert before */
    521 		else
    522 			op = EXT2_EXTENT_INSERT_AFTER;
    523 		dbg_printf("%s: inserting %s lblk %llu newex=%llu\n",
    524 			   __func__, op ? "after" : "before", ex.e_lblk,
    525 			   newex.e_lblk);
    526 		err = ext2fs_extent_insert(handle, op, &newex);
    527 		if (err)
    528 			goto out;
    529 		err = ext2fs_extent_fix_parents(handle);
    530 		if (err)
    531 			goto out;
    532 
    533 		if (!(newex.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
    534 		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
    535 			err = ext2fs_zero_blocks2(fs, newex.e_pblk,
    536 						  newex.e_len, NULL, NULL);
    537 			if (err)
    538 				goto out;
    539 		}
    540 
    541 		range_start += cluster_fill;
    542 		range_len -= cluster_fill;
    543 	}
    544 
    545 try_right_implied:
    546 	y = range_start + range_len;
    547 	if (range_len > 0 && (y & EXT2FS_CLUSTER_MASK(fs))) {
    548 		cluster_fill = y & EXT2FS_CLUSTER_MASK(fs);
    549 		if (cluster_fill > range_len)
    550 			cluster_fill = range_len;
    551 		newex.e_lblk = y & ~EXT2FS_CLUSTER_MASK(fs);
    552 		err = ext2fs_map_cluster_block(fs, ino, inode, newex.e_lblk,
    553 					       &pblk);
    554 		if (err)
    555 			goto out;
    556 		if (pblk == 0)
    557 			goto no_implied;
    558 		newex.e_pblk = pblk;
    559 		newex.e_len = cluster_fill;
    560 		newex.e_flags = (flags & EXT2_FALLOCATE_FORCE_INIT ? 0 :
    561 				 EXT2_EXTENT_FLAGS_UNINIT);
    562 		dbg_print_extent("ext_falloc iclus right+", &newex);
    563 		ext2fs_extent_goto(handle, newex.e_lblk);
    564 		err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
    565 					&ex);
    566 		if (err == EXT2_ET_NO_CURRENT_NODE)
    567 			ex.e_lblk = 0;
    568 		else if (err)
    569 			goto out;
    570 
    571 		if (ex.e_lblk > newex.e_lblk)
    572 			op = 0; /* insert before */
    573 		else
    574 			op = EXT2_EXTENT_INSERT_AFTER;
    575 		dbg_printf("%s: inserting %s lblk %llu newex=%llu\n",
    576 			   __func__, op ? "after" : "before", ex.e_lblk,
    577 			   newex.e_lblk);
    578 		err = ext2fs_extent_insert(handle, op, &newex);
    579 		if (err)
    580 			goto out;
    581 		err = ext2fs_extent_fix_parents(handle);
    582 		if (err)
    583 			goto out;
    584 
    585 		if (!(newex.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
    586 		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
    587 			err = ext2fs_zero_blocks2(fs, newex.e_pblk,
    588 						  newex.e_len, NULL, NULL);
    589 			if (err)
    590 				goto out;
    591 		}
    592 
    593 		range_len -= cluster_fill;
    594 	}
    595 
    596 no_implied:
    597 	if (range_len == 0)
    598 		return 0;
    599 
    600 	newex.e_lblk = range_start;
    601 	if (flags & EXT2_FALLOCATE_FORCE_INIT) {
    602 		max_extent_len = max_init_len;
    603 		newex.e_flags = 0;
    604 	} else {
    605 		max_extent_len = max_uninit_len;
    606 		newex.e_flags = EXT2_EXTENT_FLAGS_UNINIT;
    607 	}
    608 	pblk = alloc_goal;
    609 	y = range_len;
    610 	for (x = 0; x < y;) {
    611 		cluster_fill = newex.e_lblk & EXT2FS_CLUSTER_MASK(fs);
    612 		fillable = min(range_len + cluster_fill, max_extent_len);
    613 		err = ext2fs_new_range(fs, 0, pblk & ~EXT2FS_CLUSTER_MASK(fs),
    614 				       fillable,
    615 				       NULL, &pblk, &plen);
    616 		if (err)
    617 			goto out;
    618 		err = claim_range(fs, inode, pblk, plen);
    619 		if (err)
    620 			goto out;
    621 
    622 		/* Create extent */
    623 		newex.e_pblk = pblk + cluster_fill;
    624 		newex.e_len = plen - cluster_fill;
    625 		dbg_print_extent("ext_falloc create", &newex);
    626 		ext2fs_extent_goto(handle, newex.e_lblk);
    627 		err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
    628 					&ex);
    629 		if (err == EXT2_ET_NO_CURRENT_NODE)
    630 			ex.e_lblk = 0;
    631 		else if (err)
    632 			goto out;
    633 
    634 		if (ex.e_lblk > newex.e_lblk)
    635 			op = 0; /* insert before */
    636 		else
    637 			op = EXT2_EXTENT_INSERT_AFTER;
    638 		dbg_printf("%s: inserting %s lblk %llu newex=%llu\n",
    639 			   __func__, op ? "after" : "before", ex.e_lblk,
    640 			   newex.e_lblk);
    641 		err = ext2fs_extent_insert(handle, op, &newex);
    642 		if (err)
    643 			goto out;
    644 		err = ext2fs_extent_fix_parents(handle);
    645 		if (err)
    646 			goto out;
    647 
    648 		if (!(newex.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
    649 		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
    650 			err = ext2fs_zero_blocks2(fs, pblk, plen, NULL, NULL);
    651 			if (err)
    652 				goto out;
    653 		}
    654 
    655 		/* Update variables at end of loop */
    656 		x += plen - cluster_fill;
    657 		range_len -= plen - cluster_fill;
    658 		newex.e_lblk += plen - cluster_fill;
    659 		pblk += plen - cluster_fill;
    660 		if (pblk >= ext2fs_blocks_count(fs->super))
    661 			pblk = fs->super->s_first_data_block;
    662 	}
    663 
    664 out:
    665 	return err;
    666 }
    667 
    668 static errcode_t extent_fallocate(ext2_filsys fs, int flags, ext2_ino_t ino,
    669 				      struct ext2_inode *inode, blk64_t goal,
    670 				      blk64_t start, blk64_t len)
    671 {
    672 	ext2_extent_handle_t	handle;
    673 	struct ext2fs_extent	left_extent, right_extent;
    674 	struct ext2fs_extent	*left_adjacent, *right_adjacent;
    675 	errcode_t		err;
    676 	blk64_t			range_start, range_end = 0, end, next;
    677 	blk64_t			count, goal_distance;
    678 
    679 	end = start + len - 1;
    680 	err = ext2fs_extent_open2(fs, ino, inode, &handle);
    681 	if (err)
    682 		return err;
    683 
    684 	/*
    685 	 * Find the extent closest to the start of the alloc range.  We don't
    686 	 * check the return value because _goto() sets the current node to the
    687 	 * next-lowest extent if 'start' is in a hole; or the next-highest
    688 	 * extent if there aren't any lower ones; or doesn't set a current node
    689 	 * if there was a real error reading the extent tree.  In that case,
    690 	 * _get() will error out.
    691 	 */
    692 start_again:
    693 	ext2fs_extent_goto(handle, start);
    694 	err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &left_extent);
    695 	if (err == EXT2_ET_NO_CURRENT_NODE) {
    696 		blk64_t max_blocks = ext2fs_blocks_count(fs->super);
    697 
    698 		if (goal == ~0ULL)
    699 			goal = ext2fs_find_inode_goal(fs, ino, inode, start);
    700 		err = ext2fs_find_first_zero_block_bitmap2(fs->block_map,
    701 						goal, max_blocks - 1, &goal);
    702 		goal += start;
    703 		err = ext_falloc_helper(fs, flags, ino, inode, handle, NULL,
    704 					NULL, start, len, goal);
    705 		goto errout;
    706 	} else if (err)
    707 		goto errout;
    708 
    709 	dbg_print_extent("ext_falloc initial", &left_extent);
    710 	next = left_extent.e_lblk + left_extent.e_len;
    711 	if (left_extent.e_lblk > start) {
    712 		/* The nearest extent we found was beyond start??? */
    713 		goal = left_extent.e_pblk - (left_extent.e_lblk - start);
    714 		err = ext_falloc_helper(fs, flags, ino, inode, handle, NULL,
    715 					&left_extent, start,
    716 					left_extent.e_lblk - start, goal);
    717 		if (err)
    718 			goto errout;
    719 
    720 		goto start_again;
    721 	} else if (next >= start) {
    722 		range_start = next;
    723 		left_adjacent = &left_extent;
    724 	} else {
    725 		range_start = start;
    726 		left_adjacent = NULL;
    727 	}
    728 	goal = left_extent.e_pblk + (range_start - left_extent.e_lblk);
    729 	goal_distance = range_start - next;
    730 
    731 	do {
    732 		err = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_LEAF,
    733 					   &right_extent);
    734 		dbg_printf("%s: ino=%d get next =%d\n", __func__, ino,
    735 			   (int)err);
    736 		dbg_print_extent("ext_falloc next", &right_extent);
    737 		/* Stop if we've seen this extent before */
    738 		if (!err && right_extent.e_lblk <= left_extent.e_lblk)
    739 			err = EXT2_ET_EXTENT_NO_NEXT;
    740 
    741 		if (err && err != EXT2_ET_EXTENT_NO_NEXT)
    742 			goto errout;
    743 		if (err == EXT2_ET_EXTENT_NO_NEXT ||
    744 		    right_extent.e_lblk > end + 1) {
    745 			range_end = end;
    746 			right_adjacent = NULL;
    747 		} else {
    748 			/* Handle right_extent.e_lblk <= end */
    749 			range_end = right_extent.e_lblk - 1;
    750 			right_adjacent = &right_extent;
    751 		}
    752 		if (err != EXT2_ET_EXTENT_NO_NEXT &&
    753 		    goal_distance > (range_end - right_extent.e_lblk)) {
    754 			goal = right_extent.e_pblk -
    755 					(right_extent.e_lblk - range_start);
    756 			goal_distance = range_end - right_extent.e_lblk;
    757 		}
    758 
    759 		dbg_printf("%s: ino=%d rstart=%llu rend=%llu\n", __func__, ino,
    760 			   range_start, range_end);
    761 		err = 0;
    762 		if (range_start <= range_end) {
    763 			count = range_end - range_start + 1;
    764 			err = ext_falloc_helper(fs, flags, ino, inode, handle,
    765 						left_adjacent, right_adjacent,
    766 						range_start, count, goal);
    767 			if (err)
    768 				goto errout;
    769 		}
    770 
    771 		if (range_end == end)
    772 			break;
    773 
    774 		err = ext2fs_extent_goto(handle, right_extent.e_lblk);
    775 		if (err)
    776 			goto errout;
    777 		next = right_extent.e_lblk + right_extent.e_len;
    778 		left_extent = right_extent;
    779 		left_adjacent = &left_extent;
    780 		range_start = next;
    781 		goal = left_extent.e_pblk + (range_start - left_extent.e_lblk);
    782 		goal_distance = range_start - next;
    783 	} while (range_end < end);
    784 
    785 errout:
    786 	ext2fs_extent_free(handle);
    787 	return err;
    788 }
    789 
    790 /*
    791  * Map physical blocks to a range of logical blocks within a file.  The range
    792  * of logical blocks are (start, start + len).  If there are already extents,
    793  * the mappings will try to extend the mappings; otherwise, it will try to map
    794  * start as if logical block 0 points to goal.  If goal is ~0ULL, then the goal
    795  * is calculated based on the inode group.
    796  *
    797  * Flags:
    798  * - EXT2_FALLOCATE_ZERO_BLOCKS: Zero the blocks that are allocated.
    799  * - EXT2_FALLOCATE_FORCE_INIT: Create only initialized extents.
    800  * - EXT2_FALLOCATE_FORCE_UNINIT: Create only uninitialized extents.
    801  * - EXT2_FALLOCATE_INIT_BEYOND_EOF: Create extents beyond EOF.
    802  *
    803  * If neither FORCE_INIT nor FORCE_UNINIT are specified, this function will
    804  * try to expand any extents it finds, zeroing blocks as necessary.
    805  */
    806 errcode_t ext2fs_fallocate(ext2_filsys fs, int flags, ext2_ino_t ino,
    807 			   struct ext2_inode *inode, blk64_t goal,
    808 			   blk64_t start, blk64_t len)
    809 {
    810 	struct ext2_inode	inode_buf;
    811 	blk64_t			blk, x;
    812 	errcode_t		err;
    813 
    814 	if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
    815 	    (flags & EXT2_FALLOCATE_FORCE_UNINIT)) ||
    816 	   (flags & ~EXT2_FALLOCATE_ALL_FLAGS))
    817 		return EXT2_ET_INVALID_ARGUMENT;
    818 
    819 	if (len > ext2fs_blocks_count(fs->super))
    820 		return EXT2_ET_BLOCK_ALLOC_FAIL;
    821 	else if (len == 0)
    822 		return 0;
    823 
    824 	/* Read inode structure if necessary */
    825 	if (!inode) {
    826 		err = ext2fs_read_inode(fs, ino, &inode_buf);
    827 		if (err)
    828 			return err;
    829 		inode = &inode_buf;
    830 	}
    831 	dbg_printf("%s: ino=%d start=%llu len=%llu goal=%llu\n", __func__, ino,
    832 		   start, len, goal);
    833 
    834 	if (inode->i_flags & EXT4_EXTENTS_FL) {
    835 		err = extent_fallocate(fs, flags, ino, inode, goal, start, len);
    836 		goto out;
    837 	}
    838 
    839 	/* XXX: Allocate a bunch of blocks the slow way */
    840 	for (blk = start; blk < start + len; blk++) {
    841 		err = ext2fs_bmap2(fs, ino, inode, NULL, 0, blk, 0, &x);
    842 		if (err)
    843 			return err;
    844 		if (x)
    845 			continue;
    846 
    847 		err = ext2fs_bmap2(fs, ino, inode, NULL,
    848 				   BMAP_ALLOC | BMAP_UNINIT | BMAP_ZERO, blk,
    849 				   0, &x);
    850 		if (err)
    851 			return err;
    852 	}
    853 
    854 out:
    855 	if (inode == &inode_buf)
    856 		ext2fs_write_inode(fs, ino, inode);
    857 	return err;
    858 }
    859