LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: Duane Griffin <duaneg@dghda.com>
Cc: linux-ext4@vger.kernel.org, linux-kernel@vger.kernel.org,
	Theodore Tso <tytso@mit.edu>,
	sct@redhat.com, akpm@linux-foundation.org, adilger@clusterfs.com
Subject: Re: [RFC, PATCH 4/6] jbd: refactor nested journal log recovery loop into separate functions
Date: Tue, 11 Mar 2008 15:35:50 +0100	[thread overview]
Message-ID: <20080311143550.GB6544@atrey.karlin.mff.cuni.cz> (raw)
In-Reply-To: <8644b32ddec999bbc1da0ac55ad7b66d0b8176de.1204685366.git.duaneg@dghda.com>

> The do_one_pass function iterates through the jbd log operating on blocks
> depending on the pass. During the replay pass descriptor blocks are processed
> by an inner loop which replays them. The nesting makes the code difficult to
> follow or to modify. Splitting the inner loop and replay code into separate
> functions simplifies matters.
> 
> The refactored code no longer unnecessarily reads revoked data blocks, so
> potential read errors from such blocks will cause differing behaviour. Aside
> from that there should be no functional effect.
  Hmm, if I read your patch correctly, previously we aborted journal
replay when we found a block with unknown block magic but now we
continue replaying. Why have you done such change? And similarly when
some error happened when parsing revoke records block...

									Honza

> 
> Signed-off-by: Duane Griffin <duaneg@dghda.com>
> ---
> 
> Note the TODO before the block read in the outer loop below. We could possibly
> attempt to continue after a failed read as we do when replaying descriptor
> blocks. However if it was a descriptor block we'd likely bomb out on the next
> iteration anyway, so I'm not sure it would be worth it. Thoughts?
> 
>  fs/jbd/recovery.c |  243 ++++++++++++++++++++++++++++-------------------------
>  1 files changed, 127 insertions(+), 116 deletions(-)
> 
> diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
> index 2b8edf4..e2ac78f 100644
> --- a/fs/jbd/recovery.c
> +++ b/fs/jbd/recovery.c
> @@ -307,6 +307,101 @@ int journal_skip_recovery(journal_t *journal)
>  	return err;
>  }
>  
> +static int replay_data_block(
> +	journal_t *journal, struct buffer_head *obh, char *data,
> +	int flags, unsigned long blocknr)
> +{
> +	struct buffer_head *nbh;
> +
> +	/* Find a buffer for the new data being restored */
> +	nbh = __getblk(journal->j_fs_dev, blocknr, journal->j_blocksize);
> +	if (nbh == NULL)
> +		return -ENOMEM;
> +
> +	lock_buffer(nbh);
> +	memcpy(nbh->b_data, obh->b_data, journal->j_blocksize);
> +	if (flags & JFS_FLAG_ESCAPE)
> +		*((__be32 *)data) = cpu_to_be32(JFS_MAGIC_NUMBER);
> +
> +	BUFFER_TRACE(nbh, "marking dirty");
> +	set_buffer_uptodate(nbh);
> +	mark_buffer_dirty(nbh);
> +	BUFFER_TRACE(nbh, "marking uptodate");
> +	/* ll_rw_block(WRITE, 1, &nbh); */
> +	unlock_buffer(nbh);
> +	brelse(nbh);
> +
> +	return 0;
> +}
> +
> +/* Replay a descriptor block: write all of the data blocks. */
> +static int replay_descriptor_block(
> +	journal_t *journal, char *data,
> +	unsigned int next_commit_ID,
> +	unsigned long *next_log_block,
> +	struct recovery_info *info)
> +{
> +	int err;
> +	int success = 0;
> +	char *tagp;
> +	unsigned long io_block;
> +	struct buffer_head *obh;
> +	unsigned long next = *next_log_block;
> +
> +	tagp = &data[sizeof(journal_header_t)];
> +	while ((tagp - data + sizeof(journal_block_tag_t)) <=
> +	       journal->j_blocksize) {
> +		unsigned long blocknr;
> +		journal_block_tag_t *tag = (journal_block_tag_t *) tagp;
> +		int flags = be32_to_cpu(tag->t_flags);
> +
> +		io_block = next++;
> +		wrap(journal, next);
> +		blocknr = be32_to_cpu(tag->t_blocknr);
> +
> +		/* If the block has been revoked, then we're all done here. */
> +		if (journal_test_revoke(journal, blocknr, next_commit_ID)) {
> +			++info->nr_revoke_hits;
> +			goto skip_write;
> +		}
> +
> +		err = jread(&obh, journal, io_block);
> +		if (err) {
> +
> +			/* Recover what we can, but
> +			 * report failure at the end. */
> +			success = err;
> +			printk(KERN_ERR "JBD: IO error %d recovering "
> +			       "block %ld in log\n", err, io_block);
> +			continue;
> +		}
> +		J_ASSERT(obh != NULL);
> +
> +		err = replay_data_block(journal, obh, data, flags, blocknr);
> +		if (err) {
> +			brelse(obh);
> +			printk(KERN_ERR "JBD: Out of memory during recovery\n");
> +			success = err;
> +			goto failed;
> +		}
> +
> +		++info->nr_replays;
> +		brelse(obh);
> +
> +skip_write:
> +		tagp += sizeof(journal_block_tag_t);
> +		if (!(flags & JFS_FLAG_SAME_UUID))
> +			tagp += 16;
> +
> +		if (flags & JFS_FLAG_LAST_TAG)
> +			break;
> +	}
> +
> +	*next_log_block = next;
> +failed:
> +	return success;
> +}
> +
>  static int do_one_pass(journal_t *journal,
>  			struct recovery_info *info, enum passtype pass)
>  {
> @@ -316,8 +411,6 @@ static int do_one_pass(journal_t *journal,
>  	journal_superblock_t *	sb;
>  	journal_header_t *	tmp;
>  	struct buffer_head *	bh;
> -	unsigned int		sequence;
> -	int			blocktype;
>  
>  	/* Precompute the maximum metadata descriptors in a descriptor block */
>  	int			MAX_BLOCKS_PER_DESC;
> @@ -348,11 +441,8 @@ static int do_one_pass(journal_t *journal,
>  	 */
>  
>  	while (1) {
> -		int			flags;
> -		char *			tagp;
> -		journal_block_tag_t *	tag;
> -		struct buffer_head *	obh;
> -		struct buffer_head *	nbh;
> +		int blocktype;
> +		unsigned int sequence;
>  
>  		cond_resched();
>  
> @@ -371,10 +461,13 @@ static int do_one_pass(journal_t *journal,
>  		 * either the next descriptor block or the final commit
>  		 * record. */
>  
> +		/* TODO: Should we continue on error? */
>  		jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
>  		err = jread(&bh, journal, next_log_block);
> -		if (err)
> +		if (err) {
> +			success = err;
>  			goto failed;
> +		}
>  
>  		next_log_block++;
>  		wrap(journal, next_log_block);
> @@ -406,137 +499,57 @@ static int do_one_pass(journal_t *journal,
>  		 * all of the sequence number checks.  What are we going
>  		 * to do with it?  That depends on the pass... */
>  
> -		switch(blocktype) {
> +		switch (blocktype) {
>  		case JFS_DESCRIPTOR_BLOCK:
>  			/* If it is a valid descriptor block, replay it
>  			 * in pass REPLAY; otherwise, just skip over the
>  			 * blocks it describes. */
> -			if (pass != PASS_REPLAY) {
> +			if (pass == PASS_REPLAY) {
> +				err = replay_descriptor_block(
> +					journal,
> +					bh->b_data,
> +					next_commit_ID,
> +					&next_log_block,
> +					info);
> +			} else {
>  				next_log_block +=
>  					count_tags(bh, journal->j_blocksize);
>  				wrap(journal, next_log_block);
> -				brelse(bh);
> -				continue;
>  			}
> -
> -			/* A descriptor block: we can now write all of
> -			 * the data blocks.  Yay, useful work is finally
> -			 * getting done here! */
> -
> -			tagp = &bh->b_data[sizeof(journal_header_t)];
> -			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
> -			       <= journal->j_blocksize) {
> -				unsigned long io_block;
> -
> -				tag = (journal_block_tag_t *) tagp;
> -				flags = be32_to_cpu(tag->t_flags);
> -
> -				io_block = next_log_block++;
> -				wrap(journal, next_log_block);
> -				err = jread(&obh, journal, io_block);
> -				if (err) {
> -					/* Recover what we can, but
> -					 * report failure at the end. */
> -					success = err;
> -					printk (KERN_ERR
> -						"JBD: IO error %d recovering "
> -						"block %ld in log\n",
> -						err, io_block);
> -				} else {
> -					unsigned long blocknr;
> -
> -					J_ASSERT(obh != NULL);
> -					blocknr = be32_to_cpu(tag->t_blocknr);
> -
> -					/* If the block has been
> -					 * revoked, then we're all done
> -					 * here. */
> -					if (journal_test_revoke
> -					    (journal, blocknr,
> -					     next_commit_ID)) {
> -						brelse(obh);
> -						++info->nr_revoke_hits;
> -						goto skip_write;
> -					}
> -
> -					/* Find a buffer for the new
> -					 * data being restored */
> -					nbh = __getblk(journal->j_fs_dev,
> -							blocknr,
> -							journal->j_blocksize);
> -					if (nbh == NULL) {
> -						printk(KERN_ERR
> -						       "JBD: Out of memory "
> -						       "during recovery.\n");
> -						err = -ENOMEM;
> -						brelse(bh);
> -						brelse(obh);
> -						goto failed;
> -					}
> -
> -					lock_buffer(nbh);
> -					memcpy(nbh->b_data, obh->b_data,
> -							journal->j_blocksize);
> -					if (flags & JFS_FLAG_ESCAPE) {
> -						*((__be32 *)bh->b_data) =
> -						cpu_to_be32(JFS_MAGIC_NUMBER);
> -					}
> -
> -					BUFFER_TRACE(nbh, "marking dirty");
> -					set_buffer_uptodate(nbh);
> -					mark_buffer_dirty(nbh);
> -					BUFFER_TRACE(nbh, "marking uptodate");
> -					++info->nr_replays;
> -					/* ll_rw_block(WRITE, 1, &nbh); */
> -					unlock_buffer(nbh);
> -					brelse(obh);
> -					brelse(nbh);
> -				}
> -
> -			skip_write:
> -				tagp += sizeof(journal_block_tag_t);
> -				if (!(flags & JFS_FLAG_SAME_UUID))
> -					tagp += 16;
> -
> -				if (flags & JFS_FLAG_LAST_TAG)
> -					break;
> -			}
> -
> -			brelse(bh);
> -			continue;
> +			break;
>  
>  		case JFS_COMMIT_BLOCK:
>  			/* Found an expected commit block: not much to
>  			 * do other than move on to the next sequence
>  			 * number. */
> -			brelse(bh);
>  			next_commit_ID++;
> -			continue;
> +			break;
>  
>  		case JFS_REVOKE_BLOCK:
>  			/* If we aren't in the REVOKE pass, then we can
>  			 * just skip over this block. */
> -			if (pass != PASS_REVOKE) {
> -				brelse(bh);
> -				continue;
> +			if (pass == PASS_REVOKE) {
> +				err = scan_revoke_records(
> +					journal, bh, next_commit_ID, info);
>  			}
> -
> -			err = scan_revoke_records(journal, bh,
> -						  next_commit_ID, info);
> -			brelse(bh);
> -			if (err)
> -				goto failed;
> -			continue;
> +			break;
>  
>  		default:
>  			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
>  				  blocktype);
> -			brelse(bh);
> -			goto done;
> +			break;
>  		}
> +
> +		brelse(bh);
> +
> +		/* Immediately fail on OOM; on other errors try to recover
> +		 * as much as we can, but report the failure at the end. */
> +		if (err)
> +			success = err;
> +		if (err == -ENOMEM)
> +			goto failed;
>  	}
>  
> - done:
>  	/*
>  	 * We broke out of the log scan loop: either we came to the
>  	 * known end of the log or we found an unexpected block in the
> @@ -558,10 +571,8 @@ static int do_one_pass(journal_t *journal,
>  		}
>  	}
>  
> -	return success;
> -
>   failed:
> -	return err;
> +	return success;
>  }
>  
>  
> -- 
> 1.5.3.7
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

  parent reply	other threads:[~2008-03-11 14:36 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-06  1:59 [RFC, PATCH 0/6] ext3: do not modify data on-disk when mounting read-only filesystem Duane Griffin
     [not found] ` <1204768754-29655-2-git-send-email-duaneg@dghda.com>
2008-03-06  1:59   ` [RFC, PATCH 1/6] jbd: eliminate duplicated code in revocation table init/destroy functions Duane Griffin
     [not found]     ` <1204768754-29655-3-git-send-email-duaneg@dghda.com>
2008-03-06  1:59       ` [RFC, PATCH 2/6] jbd: replace potentially false assertion with if block Duane Griffin
2008-03-08 14:52         ` Christoph Hellwig
     [not found]     ` <1204768754-29655-4-git-send-email-duaneg@dghda.com>
2008-03-06  1:59       ` [RFC, PATCH 3/6] jbd: only create debugfs entries if cache initialisation is successful Duane Griffin
     [not found]     ` <1204768754-29655-5-git-send-email-duaneg@dghda.com>
2008-03-06  1:59       ` [RFC, PATCH 4/6] jbd: refactor nested journal log recovery loop into separate functions Duane Griffin
2008-03-08 14:53         ` Christoph Hellwig
2008-03-08 18:40           ` Duane Griffin
2008-03-11 14:35         ` Jan Kara [this message]
2008-03-12  1:02           ` Duane Griffin
2008-03-12 10:50             ` Jan Kara
     [not found]     ` <1204768754-29655-6-git-send-email-duaneg@dghda.com>
2008-03-06  1:59       ` [RFC, PATCH 5/6] jbd: add support for read-only log recovery Duane Griffin
2008-03-11 15:05         ` Jan Kara
2008-03-12  1:40           ` Duane Griffin
2008-03-12 10:51             ` Jan Kara
     [not found]     ` <1204768754-29655-7-git-send-email-duaneg@dghda.com>
2008-03-06  1:59       ` [RFC, PATCH 6/6] ext3: do not write to the disk when mounting a dirty read-only filesystem Duane Griffin
2008-03-06  7:17         ` Andreas Dilger
2008-03-06 11:19           ` Duane Griffin
2008-03-11 15:11           ` Jan Kara
2008-03-12  2:42             ` Duane Griffin
2008-03-12 10:53               ` Jan Kara
2008-03-06  3:42 ` [RFC, PATCH 0/6] ext3: do not modify data on-disk when mounting " Andrew Morton
2008-03-06 11:20   ` Duane Griffin
2008-03-13  3:22 ` Daniel Phillips
2008-03-13 12:35   ` Duane Griffin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080311143550.GB6544@atrey.karlin.mff.cuni.cz \
    --to=jack@suse.cz \
    --cc=adilger@clusterfs.com \
    --cc=akpm@linux-foundation.org \
    --cc=duaneg@dghda.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sct@redhat.com \
    --cc=tytso@mit.edu \
    --subject='Re: [RFC, PATCH 4/6] jbd: refactor nested journal log recovery loop into separate functions' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).