Home | History | Annotate | Download | only in debugfs
      1 /*
      2  * linux/fs/jbd2/recovery.c
      3  *
      4  * Written by Stephen C. Tweedie <sct (at) redhat.com>, 1999
      5  *
      6  * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
      7  *
      8  * This file is part of the Linux kernel and is made available under
      9  * the terms of the GNU General Public License, version 2, or at your
     10  * option, any later version, incorporated herein by reference.
     11  *
     12  * Journal recovery routines for the generic filesystem journaling code;
     13  * part of the ext2fs journaling system.
     14  */
     15 
     16 #ifndef __KERNEL__
     17 #include "jfs_user.h"
     18 #else
     19 #include <linux/time.h>
     20 #include <linux/fs.h>
     21 #include <linux/jbd2.h>
     22 #include <linux/errno.h>
     23 #include <linux/crc32.h>
     24 #include <linux/blkdev.h>
     25 #endif
     26 
     27 /*
     28  * Maintain information about the progress of the recovery job, so that
     29  * the different passes can carry information between them.
     30  */
     31 struct recovery_info
     32 {
     33 	tid_t		start_transaction;
     34 	tid_t		end_transaction;
     35 
     36 	int		nr_replays;
     37 	int		nr_revokes;
     38 	int		nr_revoke_hits;
     39 };
     40 
     41 enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
     42 static int do_one_pass(journal_t *journal,
     43 				struct recovery_info *info, enum passtype pass);
     44 static int scan_revoke_records(journal_t *, struct buffer_head *,
     45 				tid_t, struct recovery_info *);
     46 
     47 #ifdef __KERNEL__
     48 
     49 /* Release readahead buffers after use */
     50 static void journal_brelse_array(struct buffer_head *b[], int n)
     51 {
     52 	while (--n >= 0)
     53 		brelse (b[n]);
     54 }
     55 
     56 
     57 /*
     58  * When reading from the journal, we are going through the block device
     59  * layer directly and so there is no readahead being done for us.  We
     60  * need to implement any readahead ourselves if we want it to happen at
     61  * all.  Recovery is basically one long sequential read, so make sure we
     62  * do the IO in reasonably large chunks.
     63  *
     64  * This is not so critical that we need to be enormously clever about
     65  * the readahead size, though.  128K is a purely arbitrary, good-enough
     66  * fixed value.
     67  */
     68 
     69 #define MAXBUF 8
     70 static int do_readahead(journal_t *journal, unsigned int start)
     71 {
     72 	int err;
     73 	unsigned int max, nbufs, next;
     74 	unsigned long long blocknr;
     75 	struct buffer_head *bh;
     76 
     77 	struct buffer_head * bufs[MAXBUF];
     78 
     79 	/* Do up to 128K of readahead */
     80 	max = start + (128 * 1024 / journal->j_blocksize);
     81 	if (max > journal->j_maxlen)
     82 		max = journal->j_maxlen;
     83 
     84 	/* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
     85 	 * a time to the block device IO layer. */
     86 
     87 	nbufs = 0;
     88 
     89 	for (next = start; next < max; next++) {
     90 		err = journal_bmap(journal, next, &blocknr);
     91 
     92 		if (err) {
     93 			printk(KERN_ERR "JBD2: bad block at offset %u\n",
     94 				next);
     95 			goto failed;
     96 		}
     97 
     98 		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
     99 		if (!bh) {
    100 			err = -ENOMEM;
    101 			goto failed;
    102 		}
    103 
    104 		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
    105 			bufs[nbufs++] = bh;
    106 			if (nbufs == MAXBUF) {
    107 				ll_rw_block(READ, nbufs, bufs);
    108 				journal_brelse_array(bufs, nbufs);
    109 				nbufs = 0;
    110 			}
    111 		} else
    112 			brelse(bh);
    113 	}
    114 
    115 	if (nbufs)
    116 		ll_rw_block(READ, nbufs, bufs);
    117 	err = 0;
    118 
    119 failed:
    120 	if (nbufs)
    121 		journal_brelse_array(bufs, nbufs);
    122 	return err;
    123 }
    124 
    125 #endif /* __KERNEL__ */
    126 
    127 
    128 /*
    129  * Read a block from the journal
    130  */
    131 
    132 static int jread(struct buffer_head **bhp, journal_t *journal,
    133 		 unsigned int offset)
    134 {
    135 	int err;
    136 	unsigned long long blocknr;
    137 	struct buffer_head *bh;
    138 
    139 	*bhp = NULL;
    140 
    141 	if (offset >= journal->j_maxlen) {
    142 		printk(KERN_ERR "JBD2: corrupted journal superblock\n");
    143 		return -EFSCORRUPTED;
    144 	}
    145 
    146 	err = journal_bmap(journal, offset, &blocknr);
    147 
    148 	if (err) {
    149 		printk(KERN_ERR "JBD2: bad block at offset %u\n",
    150 			offset);
    151 		return err;
    152 	}
    153 
    154 	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
    155 	if (!bh)
    156 		return -ENOMEM;
    157 
    158 	if (!buffer_uptodate(bh)) {
    159 		/* If this is a brand new buffer, start readahead.
    160                    Otherwise, we assume we are already reading it.  */
    161 		if (!buffer_req(bh))
    162 			do_readahead(journal, offset);
    163 		wait_on_buffer(bh);
    164 	}
    165 
    166 	if (!buffer_uptodate(bh)) {
    167 		printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
    168 			offset);
    169 		brelse(bh);
    170 		return -EIO;
    171 	}
    172 
    173 	*bhp = bh;
    174 	return 0;
    175 }
    176 
    177 static int jbd2_descr_block_csum_verify(journal_t *j,
    178 					void *buf)
    179 {
    180 	struct journal_block_tail *tail;
    181 	__u32 provided;
    182 	__u32 calculated;
    183 
    184 	if (!journal_has_csum_v2or3(j))
    185 		return 1;
    186 
    187 	tail = (struct journal_block_tail *)((char *)buf + j->j_blocksize -
    188 			sizeof(struct journal_block_tail));
    189 	provided = tail->t_checksum;
    190 	tail->t_checksum = 0;
    191 	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
    192 	tail->t_checksum = provided;
    193 
    194 	return provided == ext2fs_cpu_to_be32(calculated);
    195 }
    196 
    197 /*
    198  * Count the number of in-use tags in a journal descriptor block.
    199  */
    200 
    201 static int count_tags(journal_t *journal, struct buffer_head *bh)
    202 {
    203 	char *			tagp;
    204 	journal_block_tag_t *	tag;
    205 	int			nr = 0, size = journal->j_blocksize;
    206 	int			tag_bytes = journal_tag_bytes(journal);
    207 
    208 	if (journal_has_csum_v2or3(journal))
    209 		size -= sizeof(struct journal_block_tail);
    210 
    211 	tagp = &bh->b_data[sizeof(journal_header_t)];
    212 
    213 	while ((tagp - bh->b_data + tag_bytes) <= size) {
    214 		tag = (journal_block_tag_t *) tagp;
    215 
    216 		nr++;
    217 		tagp += tag_bytes;
    218 		if (!(tag->t_flags & ext2fs_cpu_to_be16(JFS_FLAG_SAME_UUID)))
    219 			tagp += 16;
    220 
    221 		if (tag->t_flags & ext2fs_cpu_to_be16(JFS_FLAG_LAST_TAG))
    222 			break;
    223 	}
    224 
    225 	return nr;
    226 }
    227 
    228 
    229 /* Make sure we wrap around the log correctly! */
    230 #define wrap(journal, var)						\
    231 do {									\
    232 	if (var >= (journal)->j_last)					\
    233 		var -= ((journal)->j_last - (journal)->j_first);	\
    234 } while (0)
    235 
    236 /**
    237  * journal_recover - recovers a on-disk journal
    238  * @journal: the journal to recover
    239  *
    240  * The primary function for recovering the log contents when mounting a
    241  * journaled device.
    242  *
    243  * Recovery is done in three passes.  In the first pass, we look for the
    244  * end of the log.  In the second, we assemble the list of revoke
    245  * blocks.  In the third and final pass, we replay any un-revoked blocks
    246  * in the log.
    247  */
    248 int journal_recover(journal_t *journal)
    249 {
    250 	int			err, err2;
    251 	journal_superblock_t *	sb;
    252 
    253 	struct recovery_info	info;
    254 
    255 	memset(&info, 0, sizeof(info));
    256 	sb = journal->j_superblock;
    257 
    258 	/*
    259 	 * The journal superblock's s_start field (the current log head)
    260 	 * is always zero if, and only if, the journal was cleanly
    261 	 * unmounted.
    262 	 */
    263 
    264 	if (!sb->s_start) {
    265 		jbd_debug(1, "No recovery required, last transaction %d\n",
    266 			  ext2fs_be32_to_cpu(sb->s_sequence));
    267 		journal->j_transaction_sequence = ext2fs_be32_to_cpu(sb->s_sequence) + 1;
    268 		return 0;
    269 	}
    270 
    271 	err = do_one_pass(journal, &info, PASS_SCAN);
    272 	if (!err)
    273 		err = do_one_pass(journal, &info, PASS_REVOKE);
    274 	if (!err)
    275 		err = do_one_pass(journal, &info, PASS_REPLAY);
    276 
    277 	jbd_debug(1, "JBD2: recovery, exit status %d, "
    278 		  "recovered transactions %u to %u\n",
    279 		  err, info.start_transaction, info.end_transaction);
    280 	jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
    281 		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
    282 
    283 	/* Restart the log at the next transaction ID, thus invalidating
    284 	 * any existing commit records in the log. */
    285 	journal->j_transaction_sequence = ++info.end_transaction;
    286 
    287 	journal_clear_revoke(journal);
    288 	err2 = sync_blockdev(journal->j_fs_dev);
    289 	if (!err)
    290 		err = err2;
    291 	/* Make sure all replayed data is on permanent storage */
    292 	if (journal->j_flags & JFS_BARRIER) {
    293 		err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
    294 		if (!err)
    295 			err = err2;
    296 	}
    297 	return err;
    298 }
    299 
    300 /**
    301  * journal_skip_recovery - Start journal and wipe exiting records
    302  * @journal: journal to startup
    303  *
    304  * Locate any valid recovery information from the journal and set up the
    305  * journal structures in memory to ignore it (presumably because the
    306  * caller has evidence that it is out of date).
    307  * This function does'nt appear to be exorted..
    308  *
    309  * We perform one pass over the journal to allow us to tell the user how
    310  * much recovery information is being erased, and to let us initialise
    311  * the journal transaction sequence numbers to the next unused ID.
    312  */
    313 int journal_skip_recovery(journal_t *journal)
    314 {
    315 	int			err;
    316 
    317 	struct recovery_info	info;
    318 
    319 	memset (&info, 0, sizeof(info));
    320 
    321 	err = do_one_pass(journal, &info, PASS_SCAN);
    322 
    323 	if (err) {
    324 		printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
    325 		++journal->j_transaction_sequence;
    326 	} else {
    327 #ifdef CONFIG_JFS_DEBUG
    328 		int dropped = info.end_transaction -
    329 			ext2fs_be32_to_cpu(journal->j_superblock->s_sequence);
    330 		jbd_debug(1,
    331 			  "JBD2: ignoring %d transaction%s from the journal.\n",
    332 			  dropped, (dropped == 1) ? "" : "s");
    333 #endif
    334 		journal->j_transaction_sequence = ++info.end_transaction;
    335 	}
    336 
    337 	journal->j_tail = 0;
    338 	return err;
    339 }
    340 
    341 static inline __u32 get_be32(__be32 *p)
    342 {
    343 	unsigned char *cp = (unsigned char *) p;
    344 	__u32 ret;
    345 
    346 	ret = *cp++;
    347 	ret = (ret << 8) + *cp++;
    348 	ret = (ret << 8) + *cp++;
    349 	ret = (ret << 8) + *cp++;
    350 	return ret;
    351 }
    352 
    353 static inline unsigned long long read_tag_block(journal_t *journal,
    354 						journal_block_tag_t *tag)
    355 {
    356 	unsigned long long block = get_be32(&tag->t_blocknr);
    357 	if (jfs_has_feature_64bit(journal))
    358 		block |= (u64)get_be32(&tag->t_blocknr_high) << 32;
    359 	return block;
    360 }
    361 
    362 /*
    363  * calc_chksums calculates the checksums for the blocks described in the
    364  * descriptor block.
    365  */
    366 static int calc_chksums(journal_t *journal, struct buffer_head *bh,
    367 			unsigned long *next_log_block, __u32 *crc32_sum)
    368 {
    369 	int i, num_blks, err;
    370 	unsigned long io_block;
    371 	struct buffer_head *obh;
    372 
    373 	num_blks = count_tags(journal, bh);
    374 	/* Calculate checksum of the descriptor block. */
    375 	*crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
    376 
    377 	for (i = 0; i < num_blks; i++) {
    378 		io_block = (*next_log_block)++;
    379 		wrap(journal, *next_log_block);
    380 		err = jread(&obh, journal, io_block);
    381 		if (err) {
    382 			printk(KERN_ERR "JBD2: IO error %d recovering block "
    383 				"%lu in log\n", err, io_block);
    384 			return 1;
    385 		} else {
    386 			*crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
    387 				     obh->b_size);
    388 		}
    389 		put_bh(obh);
    390 	}
    391 	return 0;
    392 }
    393 
    394 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
    395 {
    396 	struct commit_header *h;
    397 	__u32 provided;
    398 	__u32 calculated;
    399 
    400 	if (!journal_has_csum_v2or3(j))
    401 		return 1;
    402 
    403 	h = buf;
    404 	provided = h->h_chksum[0];
    405 	h->h_chksum[0] = 0;
    406 	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
    407 	h->h_chksum[0] = provided;
    408 
    409 	return provided == ext2fs_cpu_to_be32(calculated);
    410 }
    411 
    412 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
    413 				      void *buf, __u32 sequence)
    414 {
    415 	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
    416 	__u32 csum32;
    417 	__u32 seq;
    418 
    419 	if (!journal_has_csum_v2or3(j))
    420 		return 1;
    421 
    422 	seq = ext2fs_cpu_to_be32(sequence);
    423 	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
    424 	csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
    425 
    426 	if (jfs_has_feature_csum3(j))
    427 		return tag3->t_checksum == ext2fs_cpu_to_be32(csum32);
    428 
    429 	return tag->t_checksum == ext2fs_cpu_to_be16(csum32);
    430 }
    431 
    432 static int do_one_pass(journal_t *journal,
    433 			struct recovery_info *info, enum passtype pass)
    434 {
    435 	unsigned int		first_commit_ID, next_commit_ID;
    436 	unsigned long		next_log_block;
    437 	int			err, success = 0;
    438 	journal_superblock_t *	sb;
    439 	journal_header_t *	tmp;
    440 	struct buffer_head *	bh;
    441 	unsigned int		sequence;
    442 	int			blocktype;
    443 	int			tag_bytes = journal_tag_bytes(journal);
    444 	__u32			crc32_sum = ~0; /* Transactional Checksums */
    445 	int			descr_csum_size = 0;
    446 	int			block_error = 0;
    447 
    448 	/*
    449 	 * First thing is to establish what we expect to find in the log
    450 	 * (in terms of transaction IDs), and where (in terms of log
    451 	 * block offsets): query the superblock.
    452 	 */
    453 
    454 	sb = journal->j_superblock;
    455 	next_commit_ID = ext2fs_be32_to_cpu(sb->s_sequence);
    456 	next_log_block = ext2fs_be32_to_cpu(sb->s_start);
    457 
    458 	first_commit_ID = next_commit_ID;
    459 	if (pass == PASS_SCAN)
    460 		info->start_transaction = first_commit_ID;
    461 
    462 	jbd_debug(1, "Starting recovery pass %d\n", pass);
    463 
    464 	/*
    465 	 * Now we walk through the log, transaction by transaction,
    466 	 * making sure that each transaction has a commit block in the
    467 	 * expected place.  Each complete transaction gets replayed back
    468 	 * into the main filesystem.
    469 	 */
    470 
    471 	while (1) {
    472 		int			flags;
    473 		char *			tagp;
    474 		journal_block_tag_t *	tag;
    475 		struct buffer_head *	obh;
    476 		struct buffer_head *	nbh;
    477 
    478 		cond_resched();
    479 
    480 		/* If we already know where to stop the log traversal,
    481 		 * check right now that we haven't gone past the end of
    482 		 * the log. */
    483 
    484 		if (pass != PASS_SCAN)
    485 			if (tid_geq(next_commit_ID, info->end_transaction))
    486 				break;
    487 
    488 		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
    489 			  next_commit_ID, next_log_block, journal->j_last);
    490 
    491 		/* Skip over each chunk of the transaction looking
    492 		 * either the next descriptor block or the final commit
    493 		 * record. */
    494 
    495 		jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
    496 		err = jread(&bh, journal, next_log_block);
    497 		if (err)
    498 			goto failed;
    499 
    500 		next_log_block++;
    501 		wrap(journal, next_log_block);
    502 
    503 		/* What kind of buffer is it?
    504 		 *
    505 		 * If it is a descriptor block, check that it has the
    506 		 * expected sequence number.  Otherwise, we're all done
    507 		 * here. */
    508 
    509 		tmp = (journal_header_t *)bh->b_data;
    510 
    511 		if (tmp->h_magic != ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER)) {
    512 			brelse(bh);
    513 			break;
    514 		}
    515 
    516 		blocktype = ext2fs_be32_to_cpu(tmp->h_blocktype);
    517 		sequence = ext2fs_be32_to_cpu(tmp->h_sequence);
    518 		jbd_debug(3, "Found magic %d, sequence %d\n",
    519 			  blocktype, sequence);
    520 
    521 		if (sequence != next_commit_ID) {
    522 			brelse(bh);
    523 			break;
    524 		}
    525 
    526 		/* OK, we have a valid descriptor block which matches
    527 		 * all of the sequence number checks.  What are we going
    528 		 * to do with it?  That depends on the pass... */
    529 
    530 		switch(blocktype) {
    531 		case JFS_DESCRIPTOR_BLOCK:
    532 			/* Verify checksum first */
    533 			if (journal_has_csum_v2or3(journal))
    534 				descr_csum_size =
    535 					sizeof(struct journal_block_tail);
    536 			if (descr_csum_size > 0 &&
    537 			    !jbd2_descr_block_csum_verify(journal,
    538 							  bh->b_data)) {
    539 				err = -EFSBADCRC;
    540 				brelse(bh);
    541 				goto failed;
    542 			}
    543 
    544 			/* If it is a valid descriptor block, replay it
    545 			 * in pass REPLAY; if journal_checksums enabled, then
    546 			 * calculate checksums in PASS_SCAN, otherwise,
    547 			 * just skip over the blocks it describes. */
    548 			if (pass != PASS_REPLAY) {
    549 				if (pass == PASS_SCAN &&
    550 				    jfs_has_feature_checksum(journal) &&
    551 				    !info->end_transaction) {
    552 					if (calc_chksums(journal, bh,
    553 							&next_log_block,
    554 							&crc32_sum)) {
    555 						put_bh(bh);
    556 						break;
    557 					}
    558 					put_bh(bh);
    559 					continue;
    560 				}
    561 				next_log_block += count_tags(journal, bh);
    562 				wrap(journal, next_log_block);
    563 				put_bh(bh);
    564 				continue;
    565 			}
    566 
    567 			/* A descriptor block: we can now write all of
    568 			 * the data blocks.  Yay, useful work is finally
    569 			 * getting done here! */
    570 
    571 			tagp = &bh->b_data[sizeof(journal_header_t)];
    572 			while ((tagp - bh->b_data + tag_bytes)
    573 			       <= journal->j_blocksize - descr_csum_size) {
    574 				unsigned long io_block;
    575 
    576 				tag = (journal_block_tag_t *) tagp;
    577 				flags = ext2fs_be16_to_cpu(tag->t_flags);
    578 
    579 				io_block = next_log_block++;
    580 				wrap(journal, next_log_block);
    581 				err = jread(&obh, journal, io_block);
    582 				if (err) {
    583 					/* Recover what we can, but
    584 					 * report failure at the end. */
    585 					success = err;
    586 					printk(KERN_ERR
    587 						"JBD2: IO error %d recovering "
    588 						"block %ld in log\n",
    589 						err, io_block);
    590 				} else {
    591 					unsigned long long blocknr;
    592 
    593 					J_ASSERT(obh != NULL);
    594 					blocknr = read_tag_block(journal,
    595 								 tag);
    596 
    597 					/* If the block has been
    598 					 * revoked, then we're all done
    599 					 * here. */
    600 					if (journal_test_revoke
    601 					    (journal, blocknr,
    602 					     next_commit_ID)) {
    603 						brelse(obh);
    604 						++info->nr_revoke_hits;
    605 						goto skip_write;
    606 					}
    607 
    608 					/* Look for block corruption */
    609 					if (!jbd2_block_tag_csum_verify(
    610 						journal, tag, obh->b_data,
    611 						ext2fs_be32_to_cpu(tmp->h_sequence))) {
    612 						brelse(obh);
    613 						success = -EFSBADCRC;
    614 						printk(KERN_ERR "JBD2: Invalid "
    615 						       "checksum recovering "
    616 						       "block %llu in log\n",
    617 						       blocknr);
    618 						block_error = 1;
    619 						goto skip_write;
    620 					}
    621 
    622 					/* Find a buffer for the new
    623 					 * data being restored */
    624 					nbh = __getblk(journal->j_fs_dev,
    625 							blocknr,
    626 							journal->j_blocksize);
    627 					if (nbh == NULL) {
    628 						printk(KERN_ERR
    629 						       "JBD2: Out of memory "
    630 						       "during recovery.\n");
    631 						err = -ENOMEM;
    632 						brelse(bh);
    633 						brelse(obh);
    634 						goto failed;
    635 					}
    636 
    637 					lock_buffer(nbh);
    638 					memcpy(nbh->b_data, obh->b_data,
    639 							journal->j_blocksize);
    640 					if (flags & JFS_FLAG_ESCAPE) {
    641 						__u32 magic = ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER);
    642 						memcpy(nbh->b_data, &magic,
    643 						       sizeof(magic));
    644 					}
    645 
    646 					BUFFER_TRACE(nbh, "marking dirty");
    647 					set_buffer_uptodate(nbh);
    648 					mark_buffer_dirty(nbh);
    649 					BUFFER_TRACE(nbh, "marking uptodate");
    650 					++info->nr_replays;
    651 					/* ll_rw_block(WRITE, 1, &nbh); */
    652 					unlock_buffer(nbh);
    653 					brelse(obh);
    654 					brelse(nbh);
    655 				}
    656 
    657 			skip_write:
    658 				tagp += tag_bytes;
    659 				if (!(flags & JFS_FLAG_SAME_UUID))
    660 					tagp += 16;
    661 
    662 				if (flags & JFS_FLAG_LAST_TAG)
    663 					break;
    664 			}
    665 
    666 			brelse(bh);
    667 			continue;
    668 
    669 		case JFS_COMMIT_BLOCK:
    670 			/*     How to differentiate between interrupted commit
    671 			 *               and journal corruption ?
    672 			 *
    673 			 * {nth transaction}
    674 			 *        Checksum Verification Failed
    675 			 *			 |
    676 			 *		 ____________________
    677 			 *		|		     |
    678 			 * 	async_commit             sync_commit
    679 			 *     		|                    |
    680 			 *		| GO TO NEXT    "Journal Corruption"
    681 			 *		| TRANSACTION
    682 			 *		|
    683 			 * {(n+1)th transanction}
    684 			 *		|
    685 			 * 	 _______|______________
    686 			 * 	|	 	      |
    687 			 * Commit block found	Commit block not found
    688 			 *      |		      |
    689 			 * "Journal Corruption"       |
    690 			 *		 _____________|_________
    691 			 *     		|	           	|
    692 			 *	nth trans corrupt	OR   nth trans
    693 			 *	and (n+1)th interrupted     interrupted
    694 			 *	before commit block
    695 			 *      could reach the disk.
    696 			 *	(Cannot find the difference in above
    697 			 *	 mentioned conditions. Hence assume
    698 			 *	 "Interrupted Commit".)
    699 			 */
    700 
    701 			/* Found an expected commit block: if checksums
    702 			 * are present verify them in PASS_SCAN; else not
    703 			 * much to do other than move on to the next sequence
    704 			 * number. */
    705 			if (pass == PASS_SCAN &&
    706 			    jfs_has_feature_checksum(journal)) {
    707 				int chksum_err, chksum_seen;
    708 				struct commit_header *cbh =
    709 					(struct commit_header *)bh->b_data;
    710 				unsigned found_chksum =
    711 					ext2fs_be32_to_cpu(cbh->h_chksum[0]);
    712 
    713 				chksum_err = chksum_seen = 0;
    714 
    715 				if (info->end_transaction) {
    716 					journal->j_failed_commit =
    717 						info->end_transaction;
    718 					brelse(bh);
    719 					break;
    720 				}
    721 
    722 				if (crc32_sum == found_chksum &&
    723 				    cbh->h_chksum_type == JFS_CRC32_CHKSUM &&
    724 				    cbh->h_chksum_size ==
    725 						JFS_CRC32_CHKSUM_SIZE)
    726 				       chksum_seen = 1;
    727 				else if (!(cbh->h_chksum_type == 0 &&
    728 					     cbh->h_chksum_size == 0 &&
    729 					     found_chksum == 0 &&
    730 					     !chksum_seen))
    731 				/*
    732 				 * If fs is mounted using an old kernel and then
    733 				 * kernel with journal_chksum is used then we
    734 				 * get a situation where the journal flag has
    735 				 * checksum flag set but checksums are not
    736 				 * present i.e chksum = 0, in the individual
    737 				 * commit blocks.
    738 				 * Hence to avoid checksum failures, in this
    739 				 * situation, this extra check is added.
    740 				 */
    741 						chksum_err = 1;
    742 
    743 				if (chksum_err) {
    744 					info->end_transaction = next_commit_ID;
    745 
    746 					if (!jfs_has_feature_async_commit(journal)){
    747 						journal->j_failed_commit =
    748 							next_commit_ID;
    749 						brelse(bh);
    750 						break;
    751 					}
    752 				}
    753 				crc32_sum = ~0;
    754 			}
    755 			if (pass == PASS_SCAN &&
    756 			    !jbd2_commit_block_csum_verify(journal,
    757 							   bh->b_data)) {
    758 				info->end_transaction = next_commit_ID;
    759 
    760 				if (!jfs_has_feature_async_commit(journal)) {
    761 					journal->j_failed_commit =
    762 						next_commit_ID;
    763 					brelse(bh);
    764 					break;
    765 				}
    766 			}
    767 			brelse(bh);
    768 			next_commit_ID++;
    769 			continue;
    770 
    771 		case JFS_REVOKE_BLOCK:
    772 			/* If we aren't in the REVOKE pass, then we can
    773 			 * just skip over this block. */
    774 			if (pass != PASS_REVOKE) {
    775 				brelse(bh);
    776 				continue;
    777 			}
    778 
    779 			err = scan_revoke_records(journal, bh,
    780 						  next_commit_ID, info);
    781 			brelse(bh);
    782 			if (err)
    783 				goto failed;
    784 			continue;
    785 
    786 		default:
    787 			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
    788 				  blocktype);
    789 			brelse(bh);
    790 			goto done;
    791 		}
    792 	}
    793 
    794  done:
    795 	/*
    796 	 * We broke out of the log scan loop: either we came to the
    797 	 * known end of the log or we found an unexpected block in the
    798 	 * log.  If the latter happened, then we know that the "current"
    799 	 * transaction marks the end of the valid log.
    800 	 */
    801 
    802 	if (pass == PASS_SCAN) {
    803 		if (!info->end_transaction)
    804 			info->end_transaction = next_commit_ID;
    805 	} else {
    806 		/* It's really bad news if different passes end up at
    807 		 * different places (but possible due to IO errors). */
    808 		if (info->end_transaction != next_commit_ID) {
    809 			printk(KERN_ERR "JBD2: recovery pass %d ended at "
    810 				"transaction %u, expected %u\n",
    811 				pass, next_commit_ID, info->end_transaction);
    812 			if (!success)
    813 				success = -EIO;
    814 		}
    815 	}
    816 	if (block_error && success == 0)
    817 		success = -EIO;
    818 	return success;
    819 
    820  failed:
    821 	return err;
    822 }
    823 
    824 static int jbd2_revoke_block_csum_verify(journal_t *j,
    825 					 void *buf)
    826 {
    827 	struct journal_revoke_tail *tail;
    828 	__u32 provided;
    829 	__u32 calculated;
    830 
    831 	if (!journal_has_csum_v2or3(j))
    832 		return 1;
    833 
    834 	tail = (struct journal_revoke_tail *)((char *)buf + j->j_blocksize -
    835 			sizeof(struct journal_revoke_tail));
    836 	provided = tail->r_checksum;
    837 	tail->r_checksum = 0;
    838 	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
    839 	tail->r_checksum = provided;
    840 
    841 	return provided == ext2fs_cpu_to_be32(calculated);
    842 }
    843 
    844 /* Scan a revoke record, marking all blocks mentioned as revoked. */
    845 
    846 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
    847 			       tid_t sequence, struct recovery_info *info)
    848 {
    849 	journal_revoke_header_t *header;
    850 	int offset, max;
    851 	unsigned csum_size = 0;
    852 	__u32 rcount;
    853 	int record_len = 4;
    854 
    855 	header = (journal_revoke_header_t *) bh->b_data;
    856 	offset = sizeof(journal_revoke_header_t);
    857 	rcount = ext2fs_be32_to_cpu(header->r_count);
    858 
    859 	if (!jbd2_revoke_block_csum_verify(journal, header))
    860 		return -EFSBADCRC;
    861 
    862 	if (journal_has_csum_v2or3(journal))
    863 		csum_size = sizeof(struct journal_revoke_tail);
    864 	if (rcount > journal->j_blocksize - csum_size)
    865 		return -EINVAL;
    866 	max = rcount;
    867 
    868 	if (jfs_has_feature_64bit(journal))
    869 		record_len = 8;
    870 
    871 	while (offset + record_len <= max) {
    872 		unsigned long long blocknr;
    873 		int err;
    874 
    875 		if (record_len == 4)
    876 			blocknr = ext2fs_be32_to_cpu(* ((__u32 *) (bh->b_data+offset)));
    877 		else
    878 			blocknr = ext2fs_be64_to_cpu(* ((__u64 *) (bh->b_data+offset)));
    879 		offset += record_len;
    880 		err = journal_set_revoke(journal, blocknr, sequence);
    881 		if (err)
    882 			return err;
    883 		++info->nr_revokes;
    884 	}
    885 	return 0;
    886 }
    887