Home | History | Annotate | Download | only in ext2fs
      1 /*
      2  * punch.c --- deallocate blocks allocated to an inode
      3  *
      4  * Copyright (C) 2010 Theodore Ts'o.
      5  *
      6  * %Begin-Header%
      7  * This file may be redistributed under the terms of the GNU Library
      8  * General Public License, version 2.
      9  * %End-Header%
     10  */
     11 
     12 #include "config.h"
     13 #include <stdio.h>
     14 #include <string.h>
     15 #if HAVE_UNISTD_H
     16 #include <unistd.h>
     17 #endif
     18 #include <errno.h>
     19 
     20 #include "ext2_fs.h"
     21 #include "ext2fs.h"
     22 #include "ext2fsP.h"
     23 
     24 #undef PUNCH_DEBUG
     25 
     26 /*
     27  * This function returns 1 if the specified block is all zeros
     28  */
     29 static int check_zero_block(char *buf, int blocksize)
     30 {
     31 	char	*cp = buf;
     32 	int	left = blocksize;
     33 
     34 	while (left > 0) {
     35 		if (*cp++)
     36 			return 0;
     37 		left--;
     38 	}
     39 	return 1;
     40 }
     41 
     42 /*
     43  * This clever recursive function handles i_blocks[] as well as
     44  * indirect, double indirect, and triple indirect blocks.  It iterates
     45  * over the entries in the i_blocks array or indirect blocks, and for
     46  * each one, will recursively handle any indirect blocks and then
     47  * frees and deallocates the blocks.
     48  */
     49 static errcode_t ind_punch(ext2_filsys fs, struct ext2_inode *inode,
     50 			   char *block_buf, blk_t *p, int level,
     51 			   blk64_t start, blk64_t count, int max)
     52 {
     53 	errcode_t	retval;
     54 	blk_t		b;
     55 	int		i;
     56 	blk64_t		offset, incr;
     57 	int		freed = 0;
     58 
     59 #ifdef PUNCH_DEBUG
     60 	printf("Entering ind_punch, level %d, start %llu, count %llu, "
     61 	       "max %d\n", level, start, count, max);
     62 #endif
     63 	incr = 1ULL << ((EXT2_BLOCK_SIZE_BITS(fs->super) - 2) * level);
     64 	for (i = 0, offset = 0; i < max; i++, p++, offset += incr) {
     65 		if (offset >= start + count)
     66 			break;
     67 		if (*p == 0 || (offset+incr) <= start)
     68 			continue;
     69 		b = *p;
     70 		if (level > 0) {
     71 			blk_t start2;
     72 #ifdef PUNCH_DEBUG
     73 			printf("Reading indirect block %u\n", b);
     74 #endif
     75 			retval = ext2fs_read_ind_block(fs, b, block_buf);
     76 			if (retval)
     77 				return retval;
     78 			start2 = (start > offset) ? start - offset : 0;
     79 			retval = ind_punch(fs, inode, block_buf + fs->blocksize,
     80 					   (blk_t *) block_buf, level - 1,
     81 					   start2, count - offset,
     82 					   fs->blocksize >> 2);
     83 			if (retval)
     84 				return retval;
     85 			retval = ext2fs_write_ind_block(fs, b, block_buf);
     86 			if (retval)
     87 				return retval;
     88 			if (!check_zero_block(block_buf, fs->blocksize))
     89 				continue;
     90 		}
     91 #ifdef PUNCH_DEBUG
     92 		printf("Freeing block %u (offset %llu)\n", b, offset);
     93 #endif
     94 		ext2fs_block_alloc_stats(fs, b, -1);
     95 		*p = 0;
     96 		freed++;
     97 	}
     98 #ifdef PUNCH_DEBUG
     99 	printf("Freed %d blocks\n", freed);
    100 #endif
    101 	return ext2fs_iblk_sub_blocks(fs, inode, freed);
    102 }
    103 
    104 #define BLK_T_MAX ((blk_t)~0ULL)
    105 static errcode_t ext2fs_punch_ind(ext2_filsys fs, struct ext2_inode *inode,
    106 				  char *block_buf, blk64_t start, blk64_t end)
    107 {
    108 	errcode_t		retval;
    109 	char			*buf = 0;
    110 	int			level;
    111 	int			num = EXT2_NDIR_BLOCKS;
    112 	blk_t			*bp = inode->i_block;
    113 	blk_t			addr_per_block;
    114 	blk64_t			max = EXT2_NDIR_BLOCKS;
    115 	blk_t			count;
    116 
    117 	/* Check start/end don't overflow the 2^32-1 indirect block limit */
    118 	if (start > BLK_T_MAX)
    119 		return 0;
    120 	if (end >= BLK_T_MAX || end - start + 1 >= BLK_T_MAX)
    121 		count = BLK_T_MAX - start;
    122 	else
    123 		count = end - start + 1;
    124 
    125 	if (!block_buf) {
    126 		retval = ext2fs_get_array(3, fs->blocksize, &buf);
    127 		if (retval)
    128 			return retval;
    129 		block_buf = buf;
    130 	}
    131 
    132 	addr_per_block = (blk_t)fs->blocksize >> 2;
    133 
    134 	for (level = 0; level < 4; level++, max *= (blk64_t)addr_per_block) {
    135 #ifdef PUNCH_DEBUG
    136 		printf("Main loop level %d, start %llu count %u "
    137 		       "max %llu num %d\n", level, start, count, max, num);
    138 #endif
    139 		if (start < max) {
    140 			retval = ind_punch(fs, inode, block_buf, bp, level,
    141 					   start, count, num);
    142 			if (retval)
    143 				goto errout;
    144 			if (count > max)
    145 				count -= max - start;
    146 			else
    147 				break;
    148 			start = 0;
    149 		} else
    150 			start -= max;
    151 		bp += num;
    152 		if (level == 0) {
    153 			num = 1;
    154 			max = 1;
    155 		}
    156 	}
    157 	retval = 0;
    158 errout:
    159 	if (buf)
    160 		ext2fs_free_mem(&buf);
    161 	return retval;
    162 }
    163 #undef BLK_T_MAX
    164 
    165 #ifdef PUNCH_DEBUG
    166 
    167 #define dbg_printf(f, a...)  printf(f, ## a)
    168 
    169 static void dbg_print_extent(char *desc, struct ext2fs_extent *extent)
    170 {
    171 	if (desc)
    172 		printf("%s: ", desc);
    173 	printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ",
    174 	       extent->e_lblk, extent->e_lblk + extent->e_len - 1,
    175 	       extent->e_len, extent->e_pblk);
    176 	if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF)
    177 		fputs("LEAF ", stdout);
    178 	if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
    179 		fputs("UNINIT ", stdout);
    180 	if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
    181 		fputs("2ND_VISIT ", stdout);
    182 	if (!extent->e_flags)
    183 		fputs("(none)", stdout);
    184 	fputc('\n', stdout);
    185 
    186 }
    187 #else
    188 #define dbg_print_extent(desc, ex)	do { } while (0)
    189 #define dbg_printf(f, a...)		do { } while (0)
    190 #endif
    191 
    192 /* Free a range of blocks, respecting cluster boundaries */
    193 static errcode_t punch_extent_blocks(ext2_filsys fs, ext2_ino_t ino,
    194 				     struct ext2_inode *inode,
    195 				     blk64_t lfree_start, blk64_t free_start,
    196 				     __u32 free_count, int *freed)
    197 {
    198 	blk64_t		pblk;
    199 	int		freed_now = 0;
    200 	__u32		cluster_freed;
    201 	errcode_t	retval = 0;
    202 
    203 	/* No bigalloc?  Just free each block. */
    204 	if (EXT2FS_CLUSTER_RATIO(fs) == 1) {
    205 		*freed += free_count;
    206 		while (free_count-- > 0)
    207 			ext2fs_block_alloc_stats2(fs, free_start++, -1);
    208 		return retval;
    209 	}
    210 
    211 	/*
    212 	 * Try to free up to the next cluster boundary.  We assume that all
    213 	 * blocks in a logical cluster map to blocks from the same physical
    214 	 * cluster, and that the offsets within the [pl]clusters match.
    215 	 */
    216 	if (free_start & EXT2FS_CLUSTER_MASK(fs)) {
    217 		retval = ext2fs_map_cluster_block(fs, ino, inode,
    218 						  lfree_start, &pblk);
    219 		if (retval)
    220 			goto errout;
    221 		if (!pblk) {
    222 			ext2fs_block_alloc_stats2(fs, free_start, -1);
    223 			freed_now++;
    224 		}
    225 		cluster_freed = EXT2FS_CLUSTER_RATIO(fs) -
    226 			(free_start & EXT2FS_CLUSTER_MASK(fs));
    227 		if (cluster_freed > free_count)
    228 			cluster_freed = free_count;
    229 		free_count -= cluster_freed;
    230 		free_start += cluster_freed;
    231 		lfree_start += cluster_freed;
    232 	}
    233 
    234 	/* Free whole clusters from the middle of the range. */
    235 	while (free_count > 0 && free_count >= (unsigned) EXT2FS_CLUSTER_RATIO(fs)) {
    236 		ext2fs_block_alloc_stats2(fs, free_start, -1);
    237 		freed_now++;
    238 		cluster_freed = EXT2FS_CLUSTER_RATIO(fs);
    239 		free_count -= cluster_freed;
    240 		free_start += cluster_freed;
    241 		lfree_start += cluster_freed;
    242 	}
    243 
    244 	/* Try to free the last cluster. */
    245 	if (free_count > 0) {
    246 		retval = ext2fs_map_cluster_block(fs, ino, inode,
    247 						  lfree_start, &pblk);
    248 		if (retval)
    249 			goto errout;
    250 		if (!pblk) {
    251 			ext2fs_block_alloc_stats2(fs, free_start, -1);
    252 			freed_now++;
    253 		}
    254 	}
    255 
    256 errout:
    257 	*freed += freed_now;
    258 	return retval;
    259 }
    260 
    261 static errcode_t ext2fs_punch_extent(ext2_filsys fs, ext2_ino_t ino,
    262 				     struct ext2_inode *inode,
    263 				     blk64_t start, blk64_t end)
    264 {
    265 	ext2_extent_handle_t	handle = 0;
    266 	struct ext2fs_extent	extent;
    267 	errcode_t		retval;
    268 	blk64_t			free_start, next, lfree_start;
    269 	__u32			free_count, newlen;
    270 	int			freed = 0;
    271 	int			op;
    272 
    273 	retval = ext2fs_extent_open2(fs, ino, inode, &handle);
    274 	if (retval)
    275 		return retval;
    276 	/*
    277 	 * Find the extent closest to the start of the punch range.  We don't
    278 	 * check the return value because _goto() sets the current node to the
    279 	 * next-lowest extent if 'start' is in a hole, and doesn't set a
    280 	 * current node if there was a real error reading the extent tree.
    281 	 * In that case, _get() will error out.
    282 	 *
    283 	 * Note: If _get() returns 'no current node', that simply means that
    284 	 * there aren't any blocks mapped past this point in the file, so we're
    285 	 * done.
    286 	 */
    287 	ext2fs_extent_goto(handle, start);
    288 	retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent);
    289 	if (retval == EXT2_ET_NO_CURRENT_NODE) {
    290 		retval = 0;
    291 		goto errout;
    292 	} else if (retval)
    293 		goto errout;
    294 	while (1) {
    295 		op = EXT2_EXTENT_NEXT_LEAF;
    296 		dbg_print_extent("main loop", &extent);
    297 		next = extent.e_lblk + extent.e_len;
    298 		dbg_printf("start %llu, end %llu, next %llu\n",
    299 			   (unsigned long long) start,
    300 			   (unsigned long long) end,
    301 			   (unsigned long long) next);
    302 		if (start <= extent.e_lblk) {
    303 			/*
    304 			 * Have we iterated past the end of the punch region?
    305 			 * If so, we can stop.
    306 			 */
    307 			if (end < extent.e_lblk)
    308 				break;
    309 			dbg_printf("Case #%d\n", 1);
    310 			/* Start of deleted region before extent;
    311 			   adjust beginning of extent */
    312 			free_start = extent.e_pblk;
    313 			lfree_start = extent.e_lblk;
    314 			if (next > end)
    315 				free_count = end - extent.e_lblk + 1;
    316 			else
    317 				free_count = extent.e_len;
    318 			extent.e_len -= free_count;
    319 			extent.e_lblk += free_count;
    320 			extent.e_pblk += free_count;
    321 		} else if (end >= next-1) {
    322 			/*
    323 			 * Is the punch region beyond this extent?  This can
    324 			 * happen if start is already inside a hole.  Try to
    325 			 * advance to the next extent if this is the case.
    326 			 */
    327 			if (start >= next)
    328 				goto next_extent;
    329 			/* End of deleted region after extent;
    330 			   adjust end of extent */
    331 			dbg_printf("Case #%d\n", 2);
    332 			newlen = start - extent.e_lblk;
    333 			free_start = extent.e_pblk + newlen;
    334 			lfree_start = extent.e_lblk + newlen;
    335 			free_count = extent.e_len - newlen;
    336 			extent.e_len = newlen;
    337 		} else {
    338 			struct ext2fs_extent	newex;
    339 
    340 			dbg_printf("Case #%d\n", 3);
    341 			/* The hard case; we need to split the extent */
    342 			newex.e_pblk = extent.e_pblk +
    343 				(end + 1 - extent.e_lblk);
    344 			newex.e_lblk = end + 1;
    345 			newex.e_len = next - end - 1;
    346 			newex.e_flags = extent.e_flags;
    347 
    348 			extent.e_len = start - extent.e_lblk;
    349 			free_start = extent.e_pblk + extent.e_len;
    350 			lfree_start = extent.e_lblk + extent.e_len;
    351 			free_count = end - start + 1;
    352 
    353 			dbg_print_extent("inserting", &newex);
    354 			retval = ext2fs_extent_insert(handle,
    355 					EXT2_EXTENT_INSERT_AFTER, &newex);
    356 			if (retval)
    357 				goto errout;
    358 			retval = ext2fs_extent_fix_parents(handle);
    359 			if (retval)
    360 				goto errout;
    361 			/*
    362 			 * Now pointing at inserted extent; so go back.
    363 			 *
    364 			 * We cannot use EXT2_EXTENT_PREV to go back; note the
    365 			 * subtlety in the comment for fix_parents().
    366 			 */
    367 			retval = ext2fs_extent_goto(handle, extent.e_lblk);
    368 			if (retval)
    369 				goto errout;
    370 		}
    371 		if (extent.e_len) {
    372 			dbg_print_extent("replacing", &extent);
    373 			retval = ext2fs_extent_replace(handle, 0, &extent);
    374 			if (retval)
    375 				goto errout;
    376 			retval = ext2fs_extent_fix_parents(handle);
    377 		} else {
    378 			struct ext2fs_extent	newex;
    379 			blk64_t			old_lblk, next_lblk;
    380 			dbg_printf("deleting current extent%s\n", "");
    381 
    382 			/*
    383 			 * Save the location of the next leaf, then slip
    384 			 * back to the current extent.
    385 			 */
    386 			retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
    387 						   &newex);
    388 			if (retval)
    389 				goto errout;
    390 			old_lblk = newex.e_lblk;
    391 
    392 			retval = ext2fs_extent_get(handle,
    393 						   EXT2_EXTENT_NEXT_LEAF,
    394 						   &newex);
    395 			if (retval == EXT2_ET_EXTENT_NO_NEXT)
    396 				next_lblk = old_lblk;
    397 			else if (retval)
    398 				goto errout;
    399 			else
    400 				next_lblk = newex.e_lblk;
    401 
    402 			retval = ext2fs_extent_goto(handle, old_lblk);
    403 			if (retval)
    404 				goto errout;
    405 
    406 			/* Now delete the extent. */
    407 			retval = ext2fs_extent_delete(handle, 0);
    408 			if (retval)
    409 				goto errout;
    410 
    411 			retval = ext2fs_extent_fix_parents(handle);
    412 			if (retval && retval != EXT2_ET_NO_CURRENT_NODE)
    413 				goto errout;
    414 			retval = 0;
    415 
    416 			/*
    417 			 * Jump forward to the next extent.  If there are
    418 			 * errors, the ext2fs_extent_get down below will
    419 			 * capture them for us.
    420 			 */
    421 			(void)ext2fs_extent_goto(handle, next_lblk);
    422 			op = EXT2_EXTENT_CURRENT;
    423 		}
    424 		if (retval)
    425 			goto errout;
    426 		dbg_printf("Free start %llu, free count = %u\n",
    427 		       free_start, free_count);
    428 		retval = punch_extent_blocks(fs, ino, inode, lfree_start,
    429 					     free_start, free_count, &freed);
    430 		if (retval)
    431 			goto errout;
    432 	next_extent:
    433 		retval = ext2fs_extent_get(handle, op,
    434 					   &extent);
    435 		if (retval == EXT2_ET_EXTENT_NO_NEXT ||
    436 		    retval == EXT2_ET_NO_CURRENT_NODE)
    437 			break;
    438 		if (retval)
    439 			goto errout;
    440 	}
    441 	dbg_printf("Freed %d blocks\n", freed);
    442 	retval = ext2fs_iblk_sub_blocks(fs, inode, freed);
    443 errout:
    444 	ext2fs_extent_free(handle);
    445 	return retval;
    446 }
    447 
    448 static errcode_t ext2fs_punch_inline_data(ext2_filsys fs, ext2_ino_t ino,
    449 					  struct ext2_inode *inode,
    450 					  blk64_t start,
    451 					  blk64_t end EXT2FS_ATTR((unused)))
    452 {
    453 	errcode_t retval;
    454 
    455 	/*
    456 	 * In libext2fs ext2fs_punch is based on block unit.  So that
    457 	 * means that if start > 0 we don't need to do nothing.  Due
    458 	 * to this we will remove all inline data in ext2fs_punch()
    459 	 * now.
    460 	 */
    461 	if (start > 0)
    462 		return 0;
    463 
    464 	memset((char *)inode->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE);
    465 	inode->i_size = 0;
    466 	retval = ext2fs_write_inode(fs, ino, inode);
    467 	if (retval)
    468 		return retval;
    469 
    470 	return ext2fs_inline_data_ea_remove(fs, ino);
    471 }
    472 
    473 /*
    474  * Deallocate all logical _blocks_ starting at start to end, inclusive.
    475  * If end is ~0ULL, then this is effectively truncate.
    476  */
    477 errcode_t ext2fs_punch(ext2_filsys fs, ext2_ino_t ino,
    478 		       struct ext2_inode *inode,
    479 		       char *block_buf, blk64_t start,
    480 		       blk64_t end)
    481 {
    482 	errcode_t		retval;
    483 	struct ext2_inode	inode_buf;
    484 
    485 	if (start > end)
    486 		return EINVAL;
    487 
    488 	/* Read inode structure if necessary */
    489 	if (!inode) {
    490 		retval = ext2fs_read_inode(fs, ino, &inode_buf);
    491 		if (retval)
    492 			return retval;
    493 		inode = &inode_buf;
    494 	}
    495 	if (inode->i_flags & EXT4_INLINE_DATA_FL)
    496 		return ext2fs_punch_inline_data(fs, ino, inode, start, end);
    497 	else if (inode->i_flags & EXT4_EXTENTS_FL)
    498 		retval = ext2fs_punch_extent(fs, ino, inode, start, end);
    499 	else
    500 		retval = ext2fs_punch_ind(fs, inode, block_buf, start, end);
    501 	if (retval)
    502 		return retval;
    503 
    504 #ifdef PUNCH_DEBUG
    505 	printf("%u: write inode size now %u blocks %u\n",
    506 		ino, inode->i_size, inode->i_blocks);
    507 #endif
    508 	return ext2fs_write_inode(fs, ino, inode);
    509 }
    510