Home | History | Annotate | Download | only in ext2fs
      1 /*
      2  * undo_io.c --- This is the undo io manager that copies the old data that
      3  * copies the old data being overwritten into a tdb database
      4  *
      5  * Copyright IBM Corporation, 2007
      6  * Author Aneesh Kumar K.V <aneesh.kumar (at) linux.vnet.ibm.com>
      7  *
      8  * %Begin-Header%
      9  * This file may be redistributed under the terms of the GNU Library
     10  * General Public License, version 2.
     11  * %End-Header%
     12  */
     13 
     14 #ifndef _LARGEFILE_SOURCE
     15 #define _LARGEFILE_SOURCE
     16 #endif
     17 #ifndef _LARGEFILE64_SOURCE
     18 #define _LARGEFILE64_SOURCE
     19 #endif
     20 
     21 #include "config.h"
     22 #include <stdio.h>
     23 #include <string.h>
     24 #if HAVE_UNISTD_H
     25 #include <unistd.h>
     26 #endif
     27 #if HAVE_ERRNO_H
     28 #include <errno.h>
     29 #endif
     30 #include <fcntl.h>
     31 #include <time.h>
     32 #ifdef __linux__
     33 #include <sys/utsname.h>
     34 #endif
     35 #if HAVE_SYS_STAT_H
     36 #include <sys/stat.h>
     37 #endif
     38 #if HAVE_SYS_TYPES_H
     39 #include <sys/types.h>
     40 #endif
     41 #if HAVE_SYS_RESOURCE_H
     42 #include <sys/resource.h>
     43 #endif
     44 #include <limits.h>
     45 
     46 #include "ext2_fs.h"
     47 #include "ext2fs.h"
     48 #include "ext2fsP.h"
     49 
     50 #ifdef __GNUC__
     51 #define ATTR(x) __attribute__(x)
     52 #else
     53 #define ATTR(x)
     54 #endif
     55 
     56 #undef DEBUG
     57 
     58 #ifdef DEBUG
     59 # define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
     60 #else
     61 # define dbg_printf(f, a...)
     62 #endif
     63 
     64 /*
     65  * For checking structure magic numbers...
     66  */
     67 
     68 #define EXT2_CHECK_MAGIC(struct, code) \
     69 	  if ((struct)->magic != (code)) return (code)
     70 /*
     71  * Undo file format: The file is cut up into undo_header.block_size blocks.
     72  * The first block contains the header.
     73  * The second block contains the superblock.
     74  * There is then a repeating series of blocks as follows:
     75  *   A key block, which contains undo_keys to map the following data blocks.
     76  *   Data blocks
     77  * (Note that there are pointers to the first key block and the sb, so this
     78  * order isn't strictly necessary.)
     79  */
     80 #define E2UNDO_MAGIC "E2UNDO02"
     81 #define KEYBLOCK_MAGIC 0xCADECADE
     82 
     83 #define E2UNDO_STATE_FINISHED	0x1	/* undo file is complete */
     84 
     85 #define E2UNDO_MIN_BLOCK_SIZE	1024	/* undo blocks are no less than 1KB */
     86 #define E2UNDO_MAX_BLOCK_SIZE	1048576	/* undo blocks are no more than 1MB */
     87 
     88 struct undo_header {
     89 	char magic[8];		/* "E2UNDO02" */
     90 	__le64 num_keys;	/* how many keys? */
     91 	__le64 super_offset;	/* where in the file is the superblock copy? */
     92 	__le64 key_offset;	/* where do the key/data block chunks start? */
     93 	__le32 block_size;	/* block size of the undo file */
     94 	__le32 fs_block_size;	/* block size of the target device */
     95 	__le32 sb_crc;		/* crc32c of the superblock */
     96 	__le32 state;		/* e2undo state flags */
     97 	__le32 f_compat;	/* compatible features */
     98 	__le32 f_incompat;	/* incompatible features (none so far) */
     99 	__le32 f_rocompat;	/* ro compatible features (none so far) */
    100 	__le32 pad32;		/* padding for fs_offset */
    101 	__le64 fs_offset;	/* filesystem offset */
    102 	__u8 padding[436];	/* padding */
    103 	__le32 header_crc;	/* crc32c of this header (but not this field) */
    104 };
    105 
    106 #define E2UNDO_MAX_EXTENT_BLOCKS	512	/* max extent size, in blocks */
    107 
    108 struct undo_key {
    109 	__le64 fsblk;		/* where in the fs does the block go */
    110 	__le32 blk_crc;		/* crc32c of the block */
    111 	__le32 size;		/* how many bytes in this block? */
    112 };
    113 
    114 struct undo_key_block {
    115 	__le32 magic;		/* KEYBLOCK_MAGIC number */
    116 	__le32 crc;		/* block checksum */
    117 	__le64 reserved;	/* zero */
    118 
    119 #if __STDC_VERSION__ >= 199901L
    120 	struct undo_key keys[];		/* keys, which come immediately after */
    121 #else
    122 	struct undo_key keys[0];	/* keys, which come immediately after */
    123 #endif
    124 };
    125 
    126 struct undo_private_data {
    127 	int	magic;
    128 
    129 	/* the undo file io channel */
    130 	io_channel undo_file;
    131 	blk64_t undo_blk_num;			/* next free block */
    132 	blk64_t key_blk_num;			/* current key block location */
    133 	blk64_t super_blk_num;			/* superblock location */
    134 	blk64_t first_key_blk;			/* first key block location */
    135 	struct undo_key_block *keyb;
    136 	size_t num_keys, keys_in_block;
    137 
    138 	/* The backing io channel */
    139 	io_channel real;
    140 
    141 	unsigned long long tdb_data_size;
    142 	int tdb_written;
    143 
    144 	/* to support offset in unix I/O manager */
    145 	ext2_loff_t offset;
    146 
    147 	ext2fs_block_bitmap written_block_map;
    148 	struct struct_ext2_filsys fake_fs;
    149 	char *tdb_file;
    150 	struct undo_header hdr;
    151 };
    152 #define KEYS_PER_BLOCK(d) (((d)->tdb_data_size / sizeof(struct undo_key)) - 1)
    153 
    154 #define E2UNDO_FEATURE_COMPAT_FS_OFFSET 0x1	/* the filesystem offset */
    155 
    156 static inline void e2undo_set_feature_fs_offset(struct undo_header *header) {
    157 	header->f_compat |= ext2fs_le32_to_cpu(E2UNDO_FEATURE_COMPAT_FS_OFFSET);
    158 }
    159 
    160 static inline void e2undo_clear_feature_fs_offset(struct undo_header *header) {
    161 	header->f_compat &= ~ext2fs_le32_to_cpu(E2UNDO_FEATURE_COMPAT_FS_OFFSET);
    162 }
    163 
    164 static io_manager undo_io_backing_manager;
    165 static char *tdb_file;
    166 static int actual_size;
    167 
    168 errcode_t set_undo_io_backing_manager(io_manager manager)
    169 {
    170 	/*
    171 	 * We may want to do some validation later
    172 	 */
    173 	undo_io_backing_manager = manager;
    174 	return 0;
    175 }
    176 
    177 errcode_t set_undo_io_backup_file(char *file_name)
    178 {
    179 	tdb_file = strdup(file_name);
    180 
    181 	if (tdb_file == NULL) {
    182 		return EXT2_ET_NO_MEMORY;
    183 	}
    184 
    185 	return 0;
    186 }
    187 
    188 static errcode_t write_undo_indexes(struct undo_private_data *data, int flush)
    189 {
    190 	errcode_t retval;
    191 	struct ext2_super_block super;
    192 	io_channel channel;
    193 	int block_size;
    194 	__u32 sb_crc, hdr_crc;
    195 
    196 	/* Spit out a key block, if there's any data */
    197 	if (data->keys_in_block) {
    198 		data->keyb->magic = ext2fs_cpu_to_le32(KEYBLOCK_MAGIC);
    199 		data->keyb->crc = 0;
    200 		data->keyb->crc = ext2fs_cpu_to_le32(
    201 					 ext2fs_crc32c_le(~0,
    202 					 (unsigned char *)data->keyb,
    203 					 data->tdb_data_size));
    204 		dbg_printf("Writing keyblock to blk %llu\n", data->key_blk_num);
    205 		retval = io_channel_write_blk64(data->undo_file,
    206 						data->key_blk_num,
    207 						1, data->keyb);
    208 		if (retval)
    209 			return retval;
    210 		/* Move on to the next key block if it's full. */
    211 		if (data->keys_in_block == KEYS_PER_BLOCK(data)) {
    212 			memset(data->keyb, 0, data->tdb_data_size);
    213 			data->keys_in_block = 0;
    214 			data->key_blk_num = data->undo_blk_num;
    215 			data->undo_blk_num++;
    216 		}
    217 	}
    218 
    219 	/* Prepare superblock for write */
    220 	channel = data->real;
    221 	block_size = channel->block_size;
    222 
    223 	io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
    224 	retval = io_channel_read_blk64(channel, 1, -SUPERBLOCK_SIZE, &super);
    225 	if (retval)
    226 		goto err_out;
    227 	sb_crc = ext2fs_crc32c_le(~0, (unsigned char *)&super, SUPERBLOCK_SIZE);
    228 	super.s_magic = ~super.s_magic;
    229 
    230 	/* Write the undo header to disk. */
    231 	memcpy(data->hdr.magic, E2UNDO_MAGIC, sizeof(data->hdr.magic));
    232 	data->hdr.num_keys = ext2fs_cpu_to_le64(data->num_keys);
    233 	data->hdr.super_offset = ext2fs_cpu_to_le64(data->super_blk_num);
    234 	data->hdr.key_offset = ext2fs_cpu_to_le64(data->first_key_blk);
    235 	data->hdr.fs_block_size = ext2fs_cpu_to_le32(block_size);
    236 	data->hdr.sb_crc = ext2fs_cpu_to_le32(sb_crc);
    237 	data->hdr.fs_offset = ext2fs_cpu_to_le64(data->offset);
    238 	if (data->offset)
    239 		e2undo_set_feature_fs_offset(&data->hdr);
    240 	else
    241 		e2undo_clear_feature_fs_offset(&data->hdr);
    242 	hdr_crc = ext2fs_crc32c_le(~0, (unsigned char *)&data->hdr,
    243 				   sizeof(data->hdr) -
    244 				   sizeof(data->hdr.header_crc));
    245 	data->hdr.header_crc = ext2fs_cpu_to_le32(hdr_crc);
    246 	retval = io_channel_write_blk64(data->undo_file, 0,
    247 					-(int)sizeof(data->hdr),
    248 					&data->hdr);
    249 	if (retval)
    250 		goto err_out;
    251 
    252 	/*
    253 	 * Record the entire superblock (in FS byte order) so that we can't
    254 	 * apply e2undo files to the wrong FS or out of order.
    255 	 */
    256 	dbg_printf("Writing superblock to block %llu\n", data->super_blk_num);
    257 	retval = io_channel_write_blk64(data->undo_file, data->super_blk_num,
    258 					-SUPERBLOCK_SIZE, &super);
    259 	if (retval)
    260 		goto err_out;
    261 
    262 	if (flush)
    263 		retval = io_channel_flush(data->undo_file);
    264 err_out:
    265 	io_channel_set_blksize(channel, block_size);
    266 	return retval;
    267 }
    268 
    269 static errcode_t undo_setup_tdb(struct undo_private_data *data)
    270 {
    271 	int i;
    272 	errcode_t retval;
    273 
    274 	if (data->tdb_written == 1)
    275 		return 0;
    276 
    277 	data->tdb_written = 1;
    278 
    279 	/* Make a bitmap to track what we've written */
    280 	memset(&data->fake_fs, 0, sizeof(data->fake_fs));
    281 	data->fake_fs.blocksize = data->tdb_data_size;
    282 	retval = ext2fs_alloc_generic_bmap(&data->fake_fs,
    283 				EXT2_ET_MAGIC_BLOCK_BITMAP64,
    284 				EXT2FS_BMAP64_RBTREE,
    285 				0, ~1ULL, ~1ULL,
    286 				"undo block map", &data->written_block_map);
    287 	if (retval)
    288 		return retval;
    289 
    290 	/* Allocate key block */
    291 	retval = ext2fs_get_mem(data->tdb_data_size, &data->keyb);
    292 	if (retval)
    293 		return retval;
    294 	data->key_blk_num = data->first_key_blk;
    295 
    296 	/* Record block size */
    297 	dbg_printf("Undo block size %llu\n", data->tdb_data_size);
    298 	dbg_printf("Keys per block %llu\n", KEYS_PER_BLOCK(data));
    299 	data->hdr.block_size = ext2fs_cpu_to_le32(data->tdb_data_size);
    300 	io_channel_set_blksize(data->undo_file, data->tdb_data_size);
    301 
    302 	/* Ensure that we have space for header blocks */
    303 	for (i = 0; i <= 2; i++) {
    304 		retval = io_channel_read_blk64(data->undo_file, i, 1,
    305 					       data->keyb);
    306 		if (retval)
    307 			memset(data->keyb, 0, data->tdb_data_size);
    308 		retval = io_channel_write_blk64(data->undo_file, i, 1,
    309 						data->keyb);
    310 		if (retval)
    311 			return retval;
    312 		retval = io_channel_flush(data->undo_file);
    313 		if (retval)
    314 			return retval;
    315 	}
    316 	memset(data->keyb, 0, data->tdb_data_size);
    317 	return 0;
    318 }
    319 
    320 static errcode_t undo_write_tdb(io_channel channel,
    321 				unsigned long long block, int count)
    322 
    323 {
    324 	int size, sz;
    325 	unsigned long long block_num, backing_blk_num;
    326 	errcode_t retval = 0;
    327 	ext2_loff_t offset;
    328 	struct undo_private_data *data;
    329 	unsigned char *read_ptr;
    330 	unsigned long long end_block;
    331 	unsigned long long data_size;
    332 	struct undo_key *key;
    333 	__u32 blk_crc;
    334 
    335 	data = (struct undo_private_data *) channel->private_data;
    336 
    337 	if (data->undo_file == NULL) {
    338 		/*
    339 		 * Transaction database not initialized
    340 		 */
    341 		return 0;
    342 	}
    343 
    344 	if (count == 1)
    345 		size = channel->block_size;
    346 	else {
    347 		if (count < 0)
    348 			size = -count;
    349 		else
    350 			size = count * channel->block_size;
    351 	}
    352 
    353 	retval = undo_setup_tdb(data);
    354 	if (retval)
    355 		return retval;
    356 	/*
    357 	 * Data is stored in tdb database as blocks of tdb_data_size size
    358 	 * This helps in efficient lookup further.
    359 	 *
    360 	 * We divide the disk to blocks of tdb_data_size.
    361 	 */
    362 	offset = (block * channel->block_size) + data->offset ;
    363 	block_num = offset / data->tdb_data_size;
    364 	end_block = (offset + size - 1) / data->tdb_data_size;
    365 
    366 	while (block_num <= end_block) {
    367 		__u32 keysz;
    368 
    369 		/*
    370 		 * Check if we have the record already
    371 		 */
    372 		if (ext2fs_test_block_bitmap2(data->written_block_map,
    373 						   block_num)) {
    374 			/* Try the next block */
    375 			block_num++;
    376 			continue;
    377 		}
    378 		ext2fs_mark_block_bitmap2(data->written_block_map, block_num);
    379 
    380 		/*
    381 		 * Read one block using the backing I/O manager
    382 		 * The backing I/O manager block size may be
    383 		 * different from the tdb_data_size.
    384 		 * Also we need to recalcuate the block number with respect
    385 		 * to the backing I/O manager.
    386 		 */
    387 		offset = block_num * data->tdb_data_size +
    388 				(data->offset % data->tdb_data_size);
    389 		backing_blk_num = (offset - data->offset) / channel->block_size;
    390 
    391 		retval = ext2fs_get_mem(data->tdb_data_size, &read_ptr);
    392 		if (retval) {
    393 			return retval;
    394 		}
    395 
    396 		memset(read_ptr, 0, data->tdb_data_size);
    397 		actual_size = 0;
    398 		if ((data->tdb_data_size % channel->block_size) == 0)
    399 			sz = data->tdb_data_size / channel->block_size;
    400 		else
    401 			sz = -data->tdb_data_size;
    402 		retval = io_channel_read_blk64(data->real, backing_blk_num,
    403 					     sz, read_ptr);
    404 		if (retval) {
    405 			if (retval != EXT2_ET_SHORT_READ) {
    406 				free(read_ptr);
    407 				return retval;
    408 			}
    409 			/*
    410 			 * short read so update the record size
    411 			 * accordingly
    412 			 */
    413 			data_size = actual_size;
    414 		} else {
    415 			data_size = data->tdb_data_size;
    416 		}
    417 		if (data_size == 0) {
    418 			free(read_ptr);
    419 			block_num++;
    420 			continue;
    421 		}
    422 		dbg_printf("Read %llu bytes from FS block %llu (blk=%llu cnt=%llu)\n",
    423 		       data_size, backing_blk_num, block, data->tdb_data_size);
    424 		if ((data_size % data->undo_file->block_size) == 0)
    425 			sz = data_size / data->undo_file->block_size;
    426 		else
    427 			sz = -data_size;;
    428 		/* extend this key? */
    429 		if (data->keys_in_block) {
    430 			key = data->keyb->keys + data->keys_in_block - 1;
    431 			keysz = ext2fs_le32_to_cpu(key->size);
    432 		} else {
    433 			key = NULL;
    434 			keysz = 0;
    435 		}
    436 		if (key != NULL &&
    437 		    (ext2fs_le64_to_cpu(key->fsblk) * channel->block_size +
    438 		     channel->block_size - 1 +
    439 		     keysz) / channel->block_size == backing_blk_num &&
    440 		    E2UNDO_MAX_EXTENT_BLOCKS * data->tdb_data_size >
    441 		    keysz + data_size) {
    442 			blk_crc = ext2fs_le32_to_cpu(key->blk_crc);
    443 			blk_crc = ext2fs_crc32c_le(blk_crc, read_ptr, data_size);
    444 			key->blk_crc = ext2fs_cpu_to_le32(blk_crc);
    445 			key->size = ext2fs_cpu_to_le32(keysz + data_size);
    446 		} else {
    447 			data->num_keys++;
    448 			key = data->keyb->keys + data->keys_in_block;
    449 			data->keys_in_block++;
    450 			key->fsblk = ext2fs_cpu_to_le64(backing_blk_num);
    451 			blk_crc = ext2fs_crc32c_le(~0, read_ptr, data_size);
    452 			key->blk_crc = ext2fs_cpu_to_le32(blk_crc);
    453 			key->size = ext2fs_cpu_to_le32(data_size);
    454 		}
    455 		dbg_printf("Writing block %llu to offset %llu size %d key %zu\n",
    456 		       block_num,
    457 		       data->undo_blk_num,
    458 		       sz, data->num_keys - 1);
    459 		retval = io_channel_write_blk64(data->undo_file,
    460 					data->undo_blk_num, sz, read_ptr);
    461 		if (retval) {
    462 			free(read_ptr);
    463 			return retval;
    464 		}
    465 		data->undo_blk_num++;
    466 		free(read_ptr);
    467 
    468 		/* Write out the key block */
    469 		retval = write_undo_indexes(data, 0);
    470 		if (retval)
    471 			return retval;
    472 
    473 		/* Next block */
    474 		block_num++;
    475 	}
    476 
    477 	return retval;
    478 }
    479 
    480 static errcode_t undo_io_read_error(io_channel channel ATTR((unused)),
    481 				    unsigned long block ATTR((unused)),
    482 				    int count ATTR((unused)),
    483 				    void *data ATTR((unused)),
    484 				    size_t size ATTR((unused)),
    485 				    int actual,
    486 				    errcode_t error ATTR((unused)))
    487 {
    488 	actual_size = actual;
    489 	return error;
    490 }
    491 
    492 static void undo_err_handler_init(io_channel channel)
    493 {
    494 	channel->read_error = undo_io_read_error;
    495 }
    496 
    497 static int check_filesystem(struct undo_header *hdr, io_channel undo_file,
    498 			    unsigned int blocksize, blk64_t super_block,
    499 			    io_channel channel)
    500 {
    501 	struct ext2_super_block super, *sb;
    502 	char *buf;
    503 	__u32 sb_crc;
    504 	errcode_t retval;
    505 
    506 	io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
    507 	retval = io_channel_read_blk64(channel, 1, -SUPERBLOCK_SIZE, &super);
    508 	if (retval)
    509 		return retval;
    510 
    511 	/*
    512 	 * Compare the FS and the undo file superblock so that we don't
    513 	 * append to something that doesn't match this FS.
    514 	 */
    515 	retval = ext2fs_get_mem(blocksize, &buf);
    516 	if (retval)
    517 		return retval;
    518 	retval = io_channel_read_blk64(undo_file, super_block,
    519 				       -SUPERBLOCK_SIZE, buf);
    520 	if (retval)
    521 		goto out;
    522 	sb = (struct ext2_super_block *)buf;
    523 	sb->s_magic = ~sb->s_magic;
    524 	if (memcmp(&super, buf, sizeof(super))) {
    525 		retval = -1;
    526 		goto out;
    527 	}
    528 	sb_crc = ext2fs_crc32c_le(~0, (unsigned char *)buf, SUPERBLOCK_SIZE);
    529 	if (ext2fs_le32_to_cpu(hdr->sb_crc) != sb_crc) {
    530 		retval = -1;
    531 		goto out;
    532 	}
    533 
    534 out:
    535 	ext2fs_free_mem(&buf);
    536 	return retval;
    537 }
    538 
    539 /*
    540  * Try to re-open the undo file, so that we can resume where we left off.
    541  * That way, the user can pass the same undo file to various programs as
    542  * part of an FS upgrade instead of having to create multiple files and
    543  * then apply them in correct order.
    544  */
    545 static errcode_t try_reopen_undo_file(int undo_fd,
    546 				      struct undo_private_data *data)
    547 {
    548 	struct undo_header hdr;
    549 	struct undo_key *dkey;
    550 	ext2fs_struct_stat statbuf;
    551 	unsigned int blocksize, fs_blocksize;
    552 	blk64_t super_block, lblk;
    553 	size_t num_keys, keys_per_block, i;
    554 	__u32 hdr_crc, key_crc;
    555 	errcode_t retval;
    556 
    557 	/* Zero size already? */
    558 	retval = ext2fs_fstat(undo_fd, &statbuf);
    559 	if (retval)
    560 		goto bad_file;
    561 	if (statbuf.st_size == 0)
    562 		goto out;
    563 
    564 	/* check the file header */
    565 	retval = io_channel_read_blk64(data->undo_file, 0, -(int)sizeof(hdr),
    566 				       &hdr);
    567 	if (retval)
    568 		goto bad_file;
    569 
    570 	if (memcmp(hdr.magic, E2UNDO_MAGIC,
    571 		    sizeof(hdr.magic)))
    572 		goto bad_file;
    573 	hdr_crc = ext2fs_crc32c_le(~0, (unsigned char *)&hdr,
    574 				   sizeof(struct undo_header) -
    575 				   sizeof(__u32));
    576 	if (ext2fs_le32_to_cpu(hdr.header_crc) != hdr_crc)
    577 		goto bad_file;
    578 	blocksize = ext2fs_le32_to_cpu(hdr.block_size);
    579 	fs_blocksize = ext2fs_le32_to_cpu(hdr.fs_block_size);
    580 	if (blocksize > E2UNDO_MAX_BLOCK_SIZE ||
    581 	    blocksize < E2UNDO_MIN_BLOCK_SIZE ||
    582 	    !blocksize || !fs_blocksize)
    583 		goto bad_file;
    584 	super_block = ext2fs_le64_to_cpu(hdr.super_offset);
    585 	num_keys = ext2fs_le64_to_cpu(hdr.num_keys);
    586 	io_channel_set_blksize(data->undo_file, blocksize);
    587 	/*
    588 	 * Do not compare hdr.f_compat with the available compatible
    589 	 * features set, because a "missing" compatible feature should
    590 	 * not cause any problems.
    591 	 */
    592 	if (hdr.f_incompat || hdr.f_rocompat)
    593 		goto bad_file;
    594 
    595 	/* Superblock matches this FS? */
    596 	if (check_filesystem(&hdr, data->undo_file, blocksize, super_block,
    597 			     data->real) != 0) {
    598 		retval = EXT2_ET_UNDO_FILE_WRONG;
    599 		goto out;
    600 	}
    601 
    602 	/* Try to set ourselves up */
    603 	data->tdb_data_size = blocksize;
    604 	retval = undo_setup_tdb(data);
    605 	if (retval)
    606 		goto bad_file;
    607 	data->num_keys = num_keys;
    608 	data->super_blk_num = super_block;
    609 	data->first_key_blk = ext2fs_le64_to_cpu(hdr.key_offset);
    610 
    611 	/* load the written block map */
    612 	keys_per_block = KEYS_PER_BLOCK(data);
    613 	lblk = data->first_key_blk;
    614 	dbg_printf("nr_keys=%lu, kpb=%zu, blksz=%u\n",
    615 		   num_keys, keys_per_block, blocksize);
    616 	for (i = 0; i < num_keys; i += keys_per_block) {
    617 		size_t j, max_j;
    618 		__le32 crc;
    619 
    620 		data->key_blk_num = lblk;
    621 		retval = io_channel_read_blk64(data->undo_file,
    622 					       lblk, 1, data->keyb);
    623 		if (retval)
    624 			goto bad_key_replay;
    625 
    626 		/* check keys */
    627 		if (ext2fs_le32_to_cpu(data->keyb->magic) != KEYBLOCK_MAGIC) {
    628 			retval = EXT2_ET_UNDO_FILE_CORRUPT;
    629 			goto bad_key_replay;
    630 		}
    631 		crc = data->keyb->crc;
    632 		data->keyb->crc = 0;
    633 		key_crc = ext2fs_crc32c_le(~0, (unsigned char *)data->keyb,
    634 					   blocksize);
    635 		if (ext2fs_le32_to_cpu(crc) != key_crc) {
    636 			retval = EXT2_ET_UNDO_FILE_CORRUPT;
    637 			goto bad_key_replay;
    638 		}
    639 
    640 		/* load keys from key block */
    641 		lblk++;
    642 		max_j = data->num_keys - i;
    643 		if (max_j > keys_per_block)
    644 			max_j = keys_per_block;
    645 		for (j = 0, dkey = data->keyb->keys;
    646 		     j < max_j;
    647 		     j++, dkey++) {
    648 			blk64_t fsblk = ext2fs_le64_to_cpu(dkey->fsblk);
    649 			blk64_t undo_blk = fsblk * fs_blocksize / blocksize;
    650 			size_t size = ext2fs_le32_to_cpu(dkey->size);
    651 
    652 			ext2fs_mark_block_bitmap_range2(data->written_block_map,
    653 					 undo_blk,
    654 					(size + blocksize - 1) / blocksize);
    655 			lblk += (size + blocksize - 1) / blocksize;
    656 			data->undo_blk_num = lblk;
    657 			data->keys_in_block = j + 1;
    658 		}
    659 	}
    660 	dbg_printf("Reopen undo, keyblk=%llu undoblk=%llu nrkeys=%zu kib=%zu\n",
    661 		   data->key_blk_num, data->undo_blk_num, data->num_keys,
    662 		   data->keys_in_block);
    663 
    664 	data->hdr.state = hdr.state & ~E2UNDO_STATE_FINISHED;
    665 	data->hdr.f_compat = hdr.f_compat;
    666 	data->hdr.f_incompat = hdr.f_incompat;
    667 	data->hdr.f_rocompat = hdr.f_rocompat;
    668 	return retval;
    669 
    670 bad_key_replay:
    671 	data->key_blk_num = data->undo_blk_num = 0;
    672 	data->keys_in_block = 0;
    673 	ext2fs_free_mem(&data->keyb);
    674 	ext2fs_free_generic_bitmap(data->written_block_map);
    675 	data->tdb_written = 0;
    676 	goto out;
    677 bad_file:
    678 	retval = EXT2_ET_UNDO_FILE_CORRUPT;
    679 out:
    680 	return retval;
    681 }
    682 
    683 static void undo_atexit(void *p)
    684 {
    685 	struct undo_private_data *data = p;
    686 	errcode_t err;
    687 
    688 	err = write_undo_indexes(data, 1);
    689 	io_channel_close(data->undo_file);
    690 
    691 	com_err(data->tdb_file, err, "while force-closing undo file");
    692 }
    693 
    694 static errcode_t undo_open(const char *name, int flags, io_channel *channel)
    695 {
    696 	io_channel	io = NULL;
    697 	struct undo_private_data *data = NULL;
    698 	int		undo_fd = -1;
    699 	errcode_t	retval;
    700 
    701 	if (name == 0)
    702 		return EXT2_ET_BAD_DEVICE_NAME;
    703 	retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
    704 	if (retval)
    705 		goto cleanup;
    706 	memset(io, 0, sizeof(struct struct_io_channel));
    707 	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
    708 	retval = ext2fs_get_mem(sizeof(struct undo_private_data), &data);
    709 	if (retval)
    710 		goto cleanup;
    711 
    712 	io->manager = undo_io_manager;
    713 	retval = ext2fs_get_mem(strlen(name)+1, &io->name);
    714 	if (retval)
    715 		goto cleanup;
    716 
    717 	strcpy(io->name, name);
    718 	io->private_data = data;
    719 	io->block_size = 1024;
    720 	io->read_error = 0;
    721 	io->write_error = 0;
    722 	io->refcount = 1;
    723 
    724 	memset(data, 0, sizeof(struct undo_private_data));
    725 	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
    726 	data->super_blk_num = 1;
    727 	data->first_key_blk = 2;
    728 	data->undo_blk_num = 3;
    729 
    730 	if (undo_io_backing_manager) {
    731 		retval = undo_io_backing_manager->open(name, flags,
    732 						       &data->real);
    733 		if (retval)
    734 			goto cleanup;
    735 
    736 		data->tdb_file = strdup(tdb_file);
    737 		if (data->tdb_file == NULL)
    738 			goto cleanup;
    739 		undo_fd = ext2fs_open_file(data->tdb_file, O_RDWR | O_CREAT,
    740 					   0600);
    741 		if (undo_fd < 0)
    742 			goto cleanup;
    743 
    744 		retval = undo_io_backing_manager->open(data->tdb_file,
    745 						       IO_FLAG_RW,
    746 						       &data->undo_file);
    747 		if (retval)
    748 			goto cleanup;
    749 	} else {
    750 		data->real = NULL;
    751 		data->undo_file = NULL;
    752 	}
    753 
    754 	if (data->real)
    755 		io->flags = (io->flags & ~CHANNEL_FLAGS_DISCARD_ZEROES) |
    756 			    (data->real->flags & CHANNEL_FLAGS_DISCARD_ZEROES);
    757 
    758 	/*
    759 	 * setup err handler for read so that we know
    760 	 * when the backing manager fails do short read
    761 	 */
    762 	if (data->real)
    763 		undo_err_handler_init(data->real);
    764 
    765 	if (data->undo_file) {
    766 		retval = try_reopen_undo_file(undo_fd, data);
    767 		if (retval)
    768 			goto cleanup;
    769 	}
    770 	retval = ext2fs_add_exit_fn(undo_atexit, data);
    771 	if (retval)
    772 		goto cleanup;
    773 
    774 	*channel = io;
    775 	if (undo_fd >= 0)
    776 		close(undo_fd);
    777 	return retval;
    778 
    779 cleanup:
    780 	ext2fs_remove_exit_fn(undo_atexit, data);
    781 	if (undo_fd >= 0)
    782 		close(undo_fd);
    783 	if (data && data->undo_file)
    784 		io_channel_close(data->undo_file);
    785 	if (data && data->tdb_file)
    786 		free(data->tdb_file);
    787 	if (data && data->real)
    788 		io_channel_close(data->real);
    789 	if (data)
    790 		ext2fs_free_mem(&data);
    791 	if (io)
    792 		ext2fs_free_mem(&io);
    793 	return retval;
    794 }
    795 
    796 static errcode_t undo_close(io_channel channel)
    797 {
    798 	struct undo_private_data *data;
    799 	errcode_t	err, retval = 0;
    800 
    801 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    802 	data = (struct undo_private_data *) channel->private_data;
    803 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    804 
    805 	if (--channel->refcount > 0)
    806 		return 0;
    807 	/* Before closing write the file system identity */
    808 	if (!getenv("UNDO_IO_SIMULATE_UNFINISHED"))
    809 		data->hdr.state = ext2fs_cpu_to_le32(E2UNDO_STATE_FINISHED);
    810 	err = write_undo_indexes(data, 1);
    811 	ext2fs_remove_exit_fn(undo_atexit, data);
    812 	if (data->real)
    813 		retval = io_channel_close(data->real);
    814 	if (data->tdb_file)
    815 		free(data->tdb_file);
    816 	if (data->undo_file)
    817 		io_channel_close(data->undo_file);
    818 	ext2fs_free_mem(&data->keyb);
    819 	if (data->written_block_map)
    820 		ext2fs_free_generic_bitmap(data->written_block_map);
    821 	ext2fs_free_mem(&channel->private_data);
    822 	if (channel->name)
    823 		ext2fs_free_mem(&channel->name);
    824 	ext2fs_free_mem(&channel);
    825 
    826 	if (err)
    827 		return err;
    828 	return retval;
    829 }
    830 
    831 static errcode_t undo_set_blksize(io_channel channel, int blksize)
    832 {
    833 	struct undo_private_data *data;
    834 	errcode_t		retval = 0;
    835 
    836 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    837 	data = (struct undo_private_data *) channel->private_data;
    838 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    839 
    840 	if (blksize > E2UNDO_MAX_BLOCK_SIZE || blksize < E2UNDO_MIN_BLOCK_SIZE)
    841 		return EXT2_ET_INVALID_ARGUMENT;
    842 
    843 	if (data->real)
    844 		retval = io_channel_set_blksize(data->real, blksize);
    845 	/*
    846 	 * Set the block size used for tdb
    847 	 */
    848 	if (!data->tdb_data_size || !data->tdb_written)
    849 		data->tdb_data_size = blksize;
    850 	channel->block_size = blksize;
    851 	return retval;
    852 }
    853 
    854 static errcode_t undo_read_blk64(io_channel channel, unsigned long long block,
    855 			       int count, void *buf)
    856 {
    857 	errcode_t	retval = 0;
    858 	struct undo_private_data *data;
    859 
    860 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    861 	data = (struct undo_private_data *) channel->private_data;
    862 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    863 
    864 	if (data->real)
    865 		retval = io_channel_read_blk64(data->real, block, count, buf);
    866 
    867 	return retval;
    868 }
    869 
    870 static errcode_t undo_read_blk(io_channel channel, unsigned long block,
    871 			       int count, void *buf)
    872 {
    873 	return undo_read_blk64(channel, block, count, buf);
    874 }
    875 
    876 static errcode_t undo_write_blk64(io_channel channel, unsigned long long block,
    877 				int count, const void *buf)
    878 {
    879 	struct undo_private_data *data;
    880 	errcode_t	retval = 0;
    881 
    882 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    883 	data = (struct undo_private_data *) channel->private_data;
    884 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    885 	/*
    886 	 * First write the existing content into database
    887 	 */
    888 	retval = undo_write_tdb(channel, block, count);
    889 	if (retval)
    890 		 return retval;
    891 	if (data->real)
    892 		retval = io_channel_write_blk64(data->real, block, count, buf);
    893 
    894 	return retval;
    895 }
    896 
    897 static errcode_t undo_write_blk(io_channel channel, unsigned long block,
    898 				int count, const void *buf)
    899 {
    900 	return undo_write_blk64(channel, block, count, buf);
    901 }
    902 
    903 static errcode_t undo_write_byte(io_channel channel, unsigned long offset,
    904 				 int size, const void *buf)
    905 {
    906 	struct undo_private_data *data;
    907 	errcode_t	retval = 0;
    908 	ext2_loff_t	location;
    909 	unsigned long blk_num, count;;
    910 
    911 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    912 	data = (struct undo_private_data *) channel->private_data;
    913 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    914 
    915 	location = offset + data->offset;
    916 	blk_num = location/channel->block_size;
    917 	/*
    918 	 * the size specified may spread across multiple blocks
    919 	 * also make sure we account for the fact that block start
    920 	 * offset for tdb is different from the backing I/O manager
    921 	 * due to possible different block size
    922 	 */
    923 	count = (size + (location % channel->block_size) +
    924 			channel->block_size  -1)/channel->block_size;
    925 	retval = undo_write_tdb(channel, blk_num, count);
    926 	if (retval)
    927 		return retval;
    928 	if (data->real && data->real->manager->write_byte)
    929 		retval = io_channel_write_byte(data->real, offset, size, buf);
    930 
    931 	return retval;
    932 }
    933 
    934 static errcode_t undo_discard(io_channel channel, unsigned long long block,
    935 			      unsigned long long count)
    936 {
    937 	struct undo_private_data *data;
    938 	errcode_t	retval = 0;
    939 	int icount;
    940 
    941 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    942 	data = (struct undo_private_data *) channel->private_data;
    943 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    944 
    945 	if (count > INT_MAX)
    946 		return EXT2_ET_UNIMPLEMENTED;
    947 	icount = count;
    948 
    949 	/*
    950 	 * First write the existing content into database
    951 	 */
    952 	retval = undo_write_tdb(channel, block, icount);
    953 	if (retval)
    954 		return retval;
    955 	if (data->real)
    956 		retval = io_channel_discard(data->real, block, count);
    957 
    958 	return retval;
    959 }
    960 
    961 static errcode_t undo_zeroout(io_channel channel, unsigned long long block,
    962 			      unsigned long long count)
    963 {
    964 	struct undo_private_data *data;
    965 	errcode_t	retval = 0;
    966 	int icount;
    967 
    968 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    969 	data = (struct undo_private_data *) channel->private_data;
    970 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    971 
    972 	if (count > INT_MAX)
    973 		return EXT2_ET_UNIMPLEMENTED;
    974 	icount = count;
    975 
    976 	/*
    977 	 * First write the existing content into database
    978 	 */
    979 	retval = undo_write_tdb(channel, block, icount);
    980 	if (retval)
    981 		return retval;
    982 	if (data->real)
    983 		retval = io_channel_zeroout(data->real, block, count);
    984 
    985 	return retval;
    986 }
    987 
    988 static errcode_t undo_cache_readahead(io_channel channel,
    989 				      unsigned long long block,
    990 				      unsigned long long count)
    991 {
    992 	struct undo_private_data *data;
    993 	errcode_t	retval = 0;
    994 
    995 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    996 	data = (struct undo_private_data *) channel->private_data;
    997 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    998 
    999 	if (data->real)
   1000 		retval = io_channel_cache_readahead(data->real, block, count);
   1001 
   1002 	return retval;
   1003 }
   1004 
   1005 /*
   1006  * Flush data buffers to disk.
   1007  */
   1008 static errcode_t undo_flush(io_channel channel)
   1009 {
   1010 	errcode_t	retval = 0;
   1011 	struct undo_private_data *data;
   1012 
   1013 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
   1014 	data = (struct undo_private_data *) channel->private_data;
   1015 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
   1016 
   1017 	if (data->real)
   1018 		retval = io_channel_flush(data->real);
   1019 
   1020 	return retval;
   1021 }
   1022 
   1023 static errcode_t undo_set_option(io_channel channel, const char *option,
   1024 				 const char *arg)
   1025 {
   1026 	errcode_t	retval = 0;
   1027 	struct undo_private_data *data;
   1028 	unsigned long tmp;
   1029 	char *end;
   1030 
   1031 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
   1032 	data = (struct undo_private_data *) channel->private_data;
   1033 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
   1034 
   1035 	if (!strcmp(option, "tdb_data_size")) {
   1036 		if (!arg)
   1037 			return EXT2_ET_INVALID_ARGUMENT;
   1038 
   1039 		tmp = strtoul(arg, &end, 0);
   1040 		if (*end)
   1041 			return EXT2_ET_INVALID_ARGUMENT;
   1042 		if (tmp > E2UNDO_MAX_BLOCK_SIZE || tmp < E2UNDO_MIN_BLOCK_SIZE)
   1043 			return EXT2_ET_INVALID_ARGUMENT;
   1044 		if (!data->tdb_data_size || !data->tdb_written) {
   1045 			data->tdb_written = -1;
   1046 			data->tdb_data_size = tmp;
   1047 		}
   1048 		return 0;
   1049 	}
   1050 	/*
   1051 	 * Need to support offset option to work with
   1052 	 * Unix I/O manager
   1053 	 */
   1054 	if (data->real && data->real->manager->set_option) {
   1055 		retval = data->real->manager->set_option(data->real,
   1056 							option, arg);
   1057 	}
   1058 	if (!retval && !strcmp(option, "offset")) {
   1059 		if (!arg)
   1060 			return EXT2_ET_INVALID_ARGUMENT;
   1061 
   1062 		tmp = strtoul(arg, &end, 0);
   1063 		if (*end)
   1064 			return EXT2_ET_INVALID_ARGUMENT;
   1065 		data->offset = tmp;
   1066 	}
   1067 	return retval;
   1068 }
   1069 
   1070 static errcode_t undo_get_stats(io_channel channel, io_stats *stats)
   1071 {
   1072 	errcode_t	retval = 0;
   1073 	struct undo_private_data *data;
   1074 
   1075 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
   1076 	data = (struct undo_private_data *) channel->private_data;
   1077 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
   1078 
   1079 	if (data->real)
   1080 		retval = (data->real->manager->get_stats)(data->real, stats);
   1081 
   1082 	return retval;
   1083 }
   1084 
   1085 static struct struct_io_manager struct_undo_manager = {
   1086 	.magic		= EXT2_ET_MAGIC_IO_MANAGER,
   1087 	.name		= "Undo I/O Manager",
   1088 	.open		= undo_open,
   1089 	.close		= undo_close,
   1090 	.set_blksize	= undo_set_blksize,
   1091 	.read_blk	= undo_read_blk,
   1092 	.write_blk	= undo_write_blk,
   1093 	.flush		= undo_flush,
   1094 	.write_byte	= undo_write_byte,
   1095 	.set_option	= undo_set_option,
   1096 	.get_stats	= undo_get_stats,
   1097 	.read_blk64	= undo_read_blk64,
   1098 	.write_blk64	= undo_write_blk64,
   1099 	.discard	= undo_discard,
   1100 	.zeroout	= undo_zeroout,
   1101 	.cache_readahead	= undo_cache_readahead,
   1102 };
   1103 
   1104 io_manager undo_io_manager = &struct_undo_manager;
   1105