LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] Fix commit of ordered data buffers
@ 2006-09-11 21:05 Jan Kara
  2006-09-11 22:12 ` Andrew Morton
  2006-09-28  8:31 ` Zhang, Yanmin
  0 siblings, 2 replies; 12+ messages in thread
From: Jan Kara @ 2006-09-11 21:05 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, Badari Pulavarty

[-- Attachment #1: Type: text/plain, Size: 532 bytes --]

  Hi Andrew,

  here is the patch that came out of the thread "set_page_buffer_dirty
should skip unmapped buffers". It fixes several flaws in the code
writing out ordered data buffers during commit. It definitely fixed the
problem Badari was seeing with fsx-linux test.  Could you include it
into -mm? Since there are quite complex interactions with other JBD code
and the locking is kind of ugly, I'd leave it in -mm for a while whether
some bug does not emerge ;). Thanks.

								Honza

-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

[-- Attachment #2: jbd-2.6.18-rc6-1-orderedwrite.diff --]
[-- Type: text/plain, Size: 6911 bytes --]

Original commit code assumes, that when a buffer on BJ_SyncData list is locked,
it is being written to disk. But this is not true and hence it can lead to a
potential data loss on crash. Also the code didn't count with the fact that
journal_dirty_data() can steal buffers from committing transaction and hence
could write buffers that no longer belong to the committing transaction.
Finally it could possibly happen that we tried writing out one buffer several
times.

The patch below tries to solve these problems by a complete rewrite of the data
commit code. We go through buffers on t_sync_datalist, lock buffers needing
write out and store them in an array. Buffers are also immediately refiled to
BJ_Locked list or unfiled (if the write out is completed). When the array is
full or we have to block on buffer lock, we submit all accumulated buffers for
IO.

Signed-off-by: Jan Kara <jack@suse.cz>

diff -rupX /home/jack/.kerndiffexclude linux-2.6.18-rc6/fs/jbd/commit.c linux-2.6.18-rc6-1-orderedwrite/fs/jbd/commit.c
--- linux-2.6.18-rc6/fs/jbd/commit.c	2006-09-06 18:20:48.000000000 +0200
+++ linux-2.6.18-rc6-1-orderedwrite/fs/jbd/commit.c	2006-09-08 01:05:35.000000000 +0200
@@ -160,6 +160,117 @@ static int journal_write_commit_record(j
 	return (ret == -EIO);
 }
 
+void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+{
+	int i;
+
+	for (i = 0; i < bufs; i++) {
+		wbuf[i]->b_end_io = end_buffer_write_sync;
+		/* We use-up our safety reference in submit_bh() */
+		submit_bh(WRITE, wbuf[i]);
+	}
+}
+
+/*
+ *  Submit all the data buffers to disk
+ */
+static void journal_submit_data_buffers(journal_t *journal,
+				transaction_t *commit_transaction)
+{
+	struct journal_head *jh;
+	struct buffer_head *bh;
+	int locked;
+	int bufs = 0;
+	struct buffer_head **wbuf = journal->j_wbuf;
+
+	/*
+	 * Whenever we unlock the journal and sleep, things can get added
+	 * onto ->t_sync_datalist, so we have to keep looping back to
+	 * write_out_data until we *know* that the list is empty.
+	 *
+	 * Cleanup any flushed data buffers from the data list.  Even in
+	 * abort mode, we want to flush this out as soon as possible.
+	 */
+write_out_data:
+	cond_resched();
+	spin_lock(&journal->j_list_lock);
+
+	while (commit_transaction->t_sync_datalist) {
+		jh = commit_transaction->t_sync_datalist;
+		bh = jh2bh(jh);
+		locked = 0;
+
+		/* Get reference just to make sure buffer does not disappear
+		 * when we are forced to drop various locks */
+		get_bh(bh);
+		/* If the buffer is dirty, we need to submit IO and hence
+		 * we need the buffer lock. We try to lock the buffer without
+		 * blocking. If we fail, we need to drop j_list_lock and do
+		 * blocking lock_buffer().
+		 */
+		if (buffer_dirty(bh)) {
+			if (test_set_buffer_locked(bh)) {
+				BUFFER_TRACE(bh, "needs blocking lock");
+				spin_unlock(&journal->j_list_lock);
+				/* Write out all data to prevent deadlocks */
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				lock_buffer(bh);
+				spin_lock(&journal->j_list_lock);
+			}
+			locked = 1;
+		}
+		/* We have to get bh_state lock. Again out of order, sigh. */
+		if (!inverted_lock(journal, bh)) {
+			jbd_lock_bh_state(bh);
+			spin_lock(&journal->j_list_lock);
+		}
+		/* Someone already cleaned up the buffer? */
+		if (!buffer_jbd(bh)
+			|| jh->b_transaction != commit_transaction
+			|| jh->b_jlist != BJ_SyncData) {
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			BUFFER_TRACE(bh, "already cleaned up");
+			put_bh(bh);
+			continue;
+		}
+		if (locked && test_clear_buffer_dirty(bh)) {
+			BUFFER_TRACE(bh, "needs writeout, adding to array");
+			wbuf[bufs++] = bh;
+			__journal_file_buffer(jh, commit_transaction,
+						BJ_Locked);
+			jbd_unlock_bh_state(bh);
+			if (bufs == journal->j_wbufsize) {
+				spin_unlock(&journal->j_list_lock);
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				goto write_out_data;
+			}
+		}
+		else {
+			BUFFER_TRACE(bh, "writeout complete: unfile");
+			__journal_unfile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			journal_remove_journal_head(bh);
+			/* Once for our safety reference, once for
+			 * journal_remove_journal_head() */
+			put_bh(bh);
+			put_bh(bh);
+		}
+
+		if (lock_need_resched(&journal->j_list_lock)) {
+			spin_unlock(&journal->j_list_lock);
+			goto write_out_data;
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+	journal_do_submit_data(wbuf, bufs);
+}
+
 /*
  * journal_commit_transaction
  *
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
 	 */
-
 	err = 0;
-	/*
-	 * Whenever we unlock the journal and sleep, things can get added
-	 * onto ->t_sync_datalist, so we have to keep looping back to
-	 * write_out_data until we *know* that the list is empty.
-	 */
-	bufs = 0;
-	/*
-	 * Cleanup any flushed data buffers from the data list.  Even in
-	 * abort mode, we want to flush this out as soon as possible.
-	 */
-write_out_data:
-	cond_resched();
-	spin_lock(&journal->j_list_lock);
-
-	while (commit_transaction->t_sync_datalist) {
-		struct buffer_head *bh;
-
-		jh = commit_transaction->t_sync_datalist;
-		commit_transaction->t_sync_datalist = jh->b_tnext;
-		bh = jh2bh(jh);
-		if (buffer_locked(bh)) {
-			BUFFER_TRACE(bh, "locked");
-			if (!inverted_lock(journal, bh))
-				goto write_out_data;
-			__journal_temp_unlink_buffer(jh);
-			__journal_file_buffer(jh, commit_transaction,
-						BJ_Locked);
-			jbd_unlock_bh_state(bh);
-			if (lock_need_resched(&journal->j_list_lock)) {
-				spin_unlock(&journal->j_list_lock);
-				goto write_out_data;
-			}
-		} else {
-			if (buffer_dirty(bh)) {
-				BUFFER_TRACE(bh, "start journal writeout");
-				get_bh(bh);
-				wbuf[bufs++] = bh;
-				if (bufs == journal->j_wbufsize) {
-					jbd_debug(2, "submit %d writes\n",
-							bufs);
-					spin_unlock(&journal->j_list_lock);
-					ll_rw_block(SWRITE, bufs, wbuf);
-					journal_brelse_array(wbuf, bufs);
-					bufs = 0;
-					goto write_out_data;
-				}
-			} else {
-				BUFFER_TRACE(bh, "writeout complete: unfile");
-				if (!inverted_lock(journal, bh))
-					goto write_out_data;
-				__journal_unfile_buffer(jh);
-				jbd_unlock_bh_state(bh);
-				journal_remove_journal_head(bh);
-				put_bh(bh);
-				if (lock_need_resched(&journal->j_list_lock)) {
-					spin_unlock(&journal->j_list_lock);
-					goto write_out_data;
-				}
-			}
-		}
-	}
-
-	if (bufs) {
-		spin_unlock(&journal->j_list_lock);
-		ll_rw_block(SWRITE, bufs, wbuf);
-		journal_brelse_array(wbuf, bufs);
-		spin_lock(&journal->j_list_lock);
-	}
+	journal_submit_data_buffers(journal, commit_transaction);
 
 	/*
 	 * Wait for all previously submitted IO to complete.
 	 */
+	spin_lock(&journal->j_list_lock);
 	while (commit_transaction->t_locked_list) {
 		struct buffer_head *bh;
 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2006-09-11 21:05 [PATCH] Fix commit of ordered data buffers Jan Kara
@ 2006-09-11 22:12 ` Andrew Morton
  2006-09-28  8:31 ` Zhang, Yanmin
  1 sibling, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2006-09-11 22:12 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-kernel, Badari Pulavarty, stable

On Mon, 11 Sep 2006 23:05:30 +0200
Jan Kara <jack@suse.cz> wrote:

>   here is the patch that came out of the thread "set_page_buffer_dirty
> should skip unmapped buffers". It fixes several flaws in the code
> writing out ordered data buffers during commit. It definitely fixed the
> problem Badari was seeing with fsx-linux test.  Could you include it
> into -mm? Since there are quite complex interactions with other JBD code
> and the locking is kind of ugly, I'd leave it in -mm for a while whether
> some bug does not emerge ;). Thanks.

yup.  Thanks, guys.  I'll take a close look at this.  I'll aim to get it
into 2.6.19-rc1 a week or so after 2.6.18 is released.  Once it has cooked
in mainline for a couple of weeks it should be then suitable for a 2.6.18.x
backport.  That'll be around the 2.6.19-rc2 timeframe.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2006-09-11 21:05 [PATCH] Fix commit of ordered data buffers Jan Kara
  2006-09-11 22:12 ` Andrew Morton
@ 2006-09-28  8:31 ` Zhang, Yanmin
  2006-09-28 21:35   ` Jan Kara
  1 sibling, 1 reply; 12+ messages in thread
From: Zhang, Yanmin @ 2006-09-28  8:31 UTC (permalink / raw)
  To: Jan Kara; +Cc: akpm, LKML, Badari Pulavarty, fedora-ia64-list

On Tue, 2006-09-12 at 05:05, Jan Kara wrote:
>   Hi Andrew,
> 
>   here is the patch that came out of the thread "set_page_buffer_dirty
> should skip unmapped buffers". It fixes several flaws in the code
> writing out ordered data buffers during commit. It definitely fixed the
> problem Badari was seeing with fsx-linux test.  Could you include it
> into -mm? Since there are quite complex interactions with other JBD code
> and the locking is kind of ugly, I'd leave it in -mm for a while whether
> some bug does not emerge ;). Thanks.
> 
> 								Honza
I also worked on it because I didn't know you were working on it until I
located the root cause and tried to check bugzilla.

I reviewed your patch.

+		if (!inverted_lock(journal, bh)) {
+			jbd_lock_bh_state(bh);
+			spin_lock(&journal->j_list_lock);
+		}
Should journal->j_list_lock be unlocked before jbd_lock_bh_state(bh)?


The fsx-linux test issue is a race between journal_commit_transaction
and journal_dirty_data. After journal_commit_transaction adds buffer_head pointers
to wbuf, it might unlock journal->j_list_lock. Although all buffer head in wbuf are locked,
does that prevent journal_dirty_data from unlinking the buffer head from the transaction
and fsx-linux from truncating it?

I'm not a journal expert. But I want to discuss it.

My investigation is below (Scenario):

fsx-linux starts journal_dirty_data and journal_dirty_data links a jh to
journal->j_running_transaction's t_sync_datalist, kjournald might not
write the buffer to disk quickly, but saves it to array wbuf.
Then, fsx-linux starts the second journal_dirty_data of a new transaction
might submit the same buffer head and move the jh to the new transaction's
t_sync_datalist.
Then, fsx-linux truncates the last a couple of buffers of a page.
Then, block_write_full_page calls invalidatepage to invalidate the last a couple
of buffers of the page, so the journal_heads of the buffer_head are unlinked and
are marked as unmapped.
Then, fsx-linux extend the file and does a msync after changing the page content
by mmaping the page, so the page (inclduing the last buffer head) is marked dirty
again.
Then, kjournald's journal_commit_transaction goes through wbuf to submit_bh all
dirty buffers, but one buffer head is already marked as unmapped. A bug check is
triggerred.

>From above scenario, as long as the late calls doesn't try to lock the buffer head,
the race condition still exists.

I think the right way is to let journal_dirty_data to wait till wbuf is flushed.

Below is my patch. Any idea?

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>

---

diff -Nraup linux-2.6.18-rc7/fs/jbd/commit.c linux-2.6.18-rc7_jbd/fs/jbd/commit.c
--- linux-2.6.18-rc7/fs/jbd/commit.c	2006-09-20 08:57:12.000000000 +0800
+++ linux-2.6.18-rc7_jbd/fs/jbd/commit.c	2006-09-27 16:33:14.000000000 +0800
@@ -384,6 +384,8 @@ write_out_data:
 		spin_lock(&journal->j_list_lock);
 	}
 
+	wake_up(&journal->j_wait_commit_sync_datalist_free);
+
 	/*
 	 * Wait for all previously submitted IO to complete.
 	 */
diff -Nraup linux-2.6.18-rc7/fs/jbd/journal.c linux-2.6.18-rc7_jbd/fs/jbd/journal.c
--- linux-2.6.18-rc7/fs/jbd/journal.c	2006-09-20 08:57:12.000000000 +0800
+++ linux-2.6.18-rc7_jbd/fs/jbd/journal.c	2006-09-27 16:11:27.000000000 +0800
@@ -656,6 +656,7 @@ static journal_t * journal_init_common (
 
 	init_waitqueue_head(&journal->j_wait_transaction_locked);
 	init_waitqueue_head(&journal->j_wait_logspace);
+	init_waitqueue_head(&journal->j_wait_commit_sync_datalist_free);
 	init_waitqueue_head(&journal->j_wait_done_commit);
 	init_waitqueue_head(&journal->j_wait_checkpoint);
 	init_waitqueue_head(&journal->j_wait_commit);
diff -Nraup linux-2.6.18-rc7/fs/jbd/transaction.c linux-2.6.18-rc7_jbd/fs/jbd/transaction.c
--- linux-2.6.18-rc7/fs/jbd/transaction.c	2006-09-20 08:57:12.000000000 +0800
+++ linux-2.6.18-rc7_jbd/fs/jbd/transaction.c	2006-09-28 14:42:05.000000000 +0800
@@ -965,6 +965,8 @@ int journal_dirty_data(handle_t *handle,
 	 * never, ever allow this to happen: there's nothing we can do
 	 * about it in this layer.
 	 */
+
+repeat_ifcase2:
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 	if (jh->b_transaction) {
@@ -1032,6 +1034,32 @@ int journal_dirty_data(handle_t *handle,
 				   time if it is redirtied */
 			}
 
+			if (jh->b_transaction != NULL &&
+				journal->j_committing_transaction ==
+				jh->b_transaction &&
+				jh->b_jlist == BJ_SyncData) {
+
+				wait_queue_head_t *queue_head;
+				spin_unlock(&journal->j_list_lock);
+				jbd_unlock_bh_state(bh);
+
+				if (need_brelse) {
+					BUFFER_TRACE(bh, "brelse");
+					__brelse(bh);
+					need_brelse = 0;
+				}
+
+				queue_head =
+				  &journal->j_wait_commit_sync_datalist_free;
+				wait_event(*queue_head,
+					!(jh->b_transaction != NULL &&
+					 journal->j_committing_transaction ==
+					 	jh->b_transaction &&
+					 jh->b_jlist == BJ_SyncData));
+
+				goto repeat_ifcase2;
+			}
+
 			/* journal_clean_data_list() may have got there first */
 			if (jh->b_transaction != NULL) {
 				JBUFFER_TRACE(jh, "unfile from commit");
diff -Nraup linux-2.6.18-rc7/include/linux/jbd.h linux-2.6.18-rc7_jbd/include/linux/jbd.h
--- linux-2.6.18-rc7/include/linux/jbd.h	2006-09-20 08:57:13.000000000 +0800
+++ linux-2.6.18-rc7_jbd/include/linux/jbd.h	2006-09-27 16:29:47.000000000 +0800
@@ -583,6 +583,8 @@ struct transaction_s 
  * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction
  *  to start committing, or for a barrier lock to be released
  * @j_wait_logspace: Wait queue for waiting for checkpointing to complete
+ * @j_wait_commit_sync_datalist_free: Wait queue for waiting for commit
+ *  transaction sync_datalist becomes null
  * @j_wait_done_commit: Wait queue for waiting for commit to complete 
  * @j_wait_checkpoint:  Wait queue to trigger checkpointing
  * @j_wait_commit: Wait queue to trigger commit
@@ -686,6 +688,12 @@ struct journal_s
 	/* Wait queue for waiting for checkpointing to complete */
 	wait_queue_head_t	j_wait_logspace;
 
+	/*
+	 * Wait queue for waiting for commit transaction
+	 * sync_datalist becomes null
+	 */
+	wait_queue_head_t	j_wait_commit_sync_datalist_free;
+
 	/* Wait queue for waiting for commit to complete */
 	wait_queue_head_t	j_wait_done_commit;
 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2006-09-28  8:31 ` Zhang, Yanmin
@ 2006-09-28 21:35   ` Jan Kara
  2006-09-29  1:27     ` Zhang, Yanmin
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Kara @ 2006-09-28 21:35 UTC (permalink / raw)
  To: Zhang, Yanmin; +Cc: Jan Kara, akpm, LKML, Badari Pulavarty, fedora-ia64-list

  Hello,

> On Tue, 2006-09-12 at 05:05, Jan Kara wrote:
> >   Hi Andrew,
> > 
> >   here is the patch that came out of the thread "set_page_buffer_dirty
> > should skip unmapped buffers". It fixes several flaws in the code
> > writing out ordered data buffers during commit. It definitely fixed the
> > problem Badari was seeing with fsx-linux test.  Could you include it
> > into -mm? Since there are quite complex interactions with other JBD code
> > and the locking is kind of ugly, I'd leave it in -mm for a while whether
> > some bug does not emerge ;). Thanks.
> > 
> > 								Honza
> I also worked on it because I didn't know you were working on it until I
> located the root cause and tried to check bugzilla.
> 
> I reviewed your patch.
> 
> +		if (!inverted_lock(journal, bh)) {
> +			jbd_lock_bh_state(bh);
> +			spin_lock(&journal->j_list_lock);
> +		}
> Should journal->j_list_lock be unlocked before jbd_lock_bh_state(bh)?
  It does not matter... The ordering of locking matters, ordering of
unlocking does not.

> The fsx-linux test issue is a race between journal_commit_transaction
> and journal_dirty_data. After journal_commit_transaction adds buffer_head pointers
> to wbuf, it might unlock journal->j_list_lock. Although all buffer head in wbuf are locked,
> does that prevent journal_dirty_data from unlinking the buffer head from the transaction
> and fsx-linux from truncating it?
  Yes, it does. Because the buffers are locked *and dirty*. Nothing can
clear the dirty bit while we are holding the lock and
journal_dirty_data() also waits until it can safely write out the buffer
- which is after we release the buffer lock.

> I'm not a journal expert. But I want to discuss it.
> 
> My investigation is below (Scenario):
> 
> fsx-linux starts journal_dirty_data and journal_dirty_data links a jh to
> journal->j_running_transaction's t_sync_datalist, kjournald might not
> write the buffer to disk quickly, but saves it to array wbuf.
> Then, fsx-linux starts the second journal_dirty_data of a new transaction
> might submit the same buffer head and move the jh to the new transaction's
> t_sync_datalist.
  Yes, but this happens only after the buffer is removed from wbuf[] as
I explain above.

> Then, fsx-linux truncates the last a couple of buffers of a page.
> Then, block_write_full_page calls invalidatepage to invalidate the last a couple
> of buffers of the page, so the journal_heads of the buffer_head are unlinked and
> are marked as unmapped.
> Then, fsx-linux extend the file and does a msync after changing the page content
> by mmaping the page, so the page (inclduing the last buffer head) is marked dirty
> again.
> Then, kjournald's journal_commit_transaction goes through wbuf to submit_bh all
> dirty buffers, but one buffer head is already marked as unmapped. A bug check is
> triggerred.
> 
> >From above scenario, as long as the late calls doesn't try to lock the buffer head,
> the race condition still exists.
> 
> I think the right way is to let journal_dirty_data to wait till wbuf is flushed.
  This actually happens in my fix too. And my fix has also a bonus of
fixing a few other flaws... Otherwise your patch seems to be right.

									Honza

-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2006-09-28 21:35   ` Jan Kara
@ 2006-09-29  1:27     ` Zhang, Yanmin
  2006-09-29  9:21       ` Jan Kara
  0 siblings, 1 reply; 12+ messages in thread
From: Zhang, Yanmin @ 2006-09-29  1:27 UTC (permalink / raw)
  To: Jan Kara; +Cc: akpm, LKML, Badari Pulavarty, ia64 Fedora Core Development

On Fri, 2006-09-29 at 05:35, Jan Kara wrote:
>   Hello,
> 
> > On Tue, 2006-09-12 at 05:05, Jan Kara wrote:
> > >   Hi Andrew,
> > > 
> > >   here is the patch that came out of the thread "set_page_buffer_dirty
> > > should skip unmapped buffers". It fixes several flaws in the code
> > > writing out ordered data buffers during commit. It definitely fixed the
> > > problem Badari was seeing with fsx-linux test.  Could you include it
> > > into -mm? Since there are quite complex interactions with other JBD code
> > > and the locking is kind of ugly, I'd leave it in -mm for a while whether
> > > some bug does not emerge ;). Thanks.
> > > 
> > > 								Honza
> > The fsx-linux test issue is a race between journal_commit_transaction
> > and journal_dirty_data. After journal_commit_transaction adds buffer_head pointers
> > to wbuf, it might unlock journal->j_list_lock. Although all buffer head in wbuf are locked,
> > does that prevent journal_dirty_data from unlinking the buffer head from the transaction
> > and fsx-linux from truncating it?
>   Yes, it does. Because the buffers are locked *and dirty*. Nothing can
> clear the dirty bit while we are holding the lock and
> journal_dirty_data() also waits until it can safely write out the buffer
> - which is after we release the buffer lock.
With your patch, it's not true because journal_submit_data_buffers clear the dirty
flag, so later journal_dirty_data won't try to lock/flush the buffer. journal_dirty_data
would just move the jh to the t_sync_datalist of a new transaction.

> 
> > I'm not a journal expert. But I want to discuss it.
> > 
> > My investigation is below (Scenario):
> > 
> > fsx-linux starts journal_dirty_data and journal_dirty_data links a jh to
> > journal->j_running_transaction's t_sync_datalist, kjournald might not
> > write the buffer to disk quickly, but saves it to array wbuf.
> > Then, fsx-linux starts the second journal_dirty_data of a new transaction
> > might submit the same buffer head and move the jh to the new transaction's
> > t_sync_datalist.
>   Yes, but this happens only after the buffer is removed from wbuf[] as
> I explain above.
> 
> > Then, fsx-linux truncates the last a couple of buffers of a page.
> > Then, block_write_full_page calls invalidatepage to invalidate the last a couple
> > of buffers of the page, so the journal_heads of the buffer_head are unlinked and
> > are marked as unmapped.
> > Then, fsx-linux extend the file and does a msync after changing the page content
> > by mmaping the page, so the page (inclduing the last buffer head) is marked dirty
> > again.
> > Then, kjournald's journal_commit_transaction goes through wbuf to submit_bh all
> > dirty buffers, but one buffer head is already marked as unmapped. A bug check is
> > triggerred.
I think the reason that your patch fixes it is that journal_invalidatepage
will lock the buffer before calling journal_unmap_buffer. So the last step to trigger
the bug will be synced with journal_commit_transaction.

> I think the right way is to let journal_dirty_data to wait till wbuf is flushed.
>   This actually happens in my fix too. And my fix has also a bonus of
> fixing a few other flaws... Otherwise your patch seems to be right.
Other flaws could be fixed by other small patches to make it clearer.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2006-09-29  1:27     ` Zhang, Yanmin
@ 2006-09-29  9:21       ` Jan Kara
  0 siblings, 0 replies; 12+ messages in thread
From: Jan Kara @ 2006-09-29  9:21 UTC (permalink / raw)
  To: Zhang, Yanmin; +Cc: akpm, LKML, Badari Pulavarty, ia64 Fedora Core Development

  Hello,

> On Fri, 2006-09-29 at 05:35, Jan Kara wrote:
> > > The fsx-linux test issue is a race between journal_commit_transaction
> > > and journal_dirty_data. After journal_commit_transaction adds buffer_head pointers
> > > to wbuf, it might unlock journal->j_list_lock. Although all buffer head in wbuf are locked,
> > > does that prevent journal_dirty_data from unlinking the buffer head from the transaction
> > > and fsx-linux from truncating it?
> >   Yes, it does. Because the buffers are locked *and dirty*. Nothing can
> > clear the dirty bit while we are holding the lock and
> > journal_dirty_data() also waits until it can safely write out the buffer
> > - which is after we release the buffer lock.
> With your patch, it's not true because journal_submit_data_buffers clear the dirty
> flag, so later journal_dirty_data won't try to lock/flush the buffer. journal_dirty_data
> would just move the jh to the t_sync_datalist of a new transaction.
  Umm, yes. You're right, my previous explanation was bogus. But that
should do no harm as we do not touch the journal_head of the buffer in
wbuf array. We just eventually send it to disk. We are guarded against
truncate/memory pressure because we hold the buffer lock so that should
be fine too.

> > > I'm not a journal expert. But I want to discuss it.
> > > 
> > > My investigation is below (Scenario):
> > > 
> > > fsx-linux starts journal_dirty_data and journal_dirty_data links a jh to
> > > journal->j_running_transaction's t_sync_datalist, kjournald might not
> > > write the buffer to disk quickly, but saves it to array wbuf.
> > > Then, fsx-linux starts the second journal_dirty_data of a new transaction
> > > might submit the same buffer head and move the jh to the new transaction's
> > > t_sync_datalist.
> >   Yes, but this happens only after the buffer is removed from wbuf[] as
> > I explain above.
> > 
> > > Then, fsx-linux truncates the last a couple of buffers of a page.
> > > Then, block_write_full_page calls invalidatepage to invalidate the last a couple
> > > of buffers of the page, so the journal_heads of the buffer_head are unlinked and
> > > are marked as unmapped.
> > > Then, fsx-linux extend the file and does a msync after changing the page content
> > > by mmaping the page, so the page (inclduing the last buffer head) is marked dirty
> > > again.
> > > Then, kjournald's journal_commit_transaction goes through wbuf to submit_bh all
> > > dirty buffers, but one buffer head is already marked as unmapped. A bug check is
> > > triggerred.
> I think the reason that your patch fixes it is that journal_invalidatepage
> will lock the buffer before calling journal_unmap_buffer. So the last step to trigger
> the bug will be synced with journal_commit_transaction.
> 
> > I think the right way is to let journal_dirty_data to wait till wbuf is flushed.
> >   This actually happens in my fix too. And my fix has also a bonus of
> > fixing a few other flaws... Otherwise your patch seems to be right.
> Other flaws could be fixed by other small patches to make it clearer.
  Actually not quite - I've been thinking about the other problems for
quite some while and I did not find a way to fix other flaws in a
non-intrusive way. I also like small and clean fixes but sometimes one
has to simply rewrite the code...

							Bye
								Honza

-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2005-09-14 18:30     ` Andrew Morton
@ 2005-09-15  7:23       ` Jan Kara
  0 siblings, 0 replies; 12+ messages in thread
From: Jan Kara @ 2005-09-15  7:23 UTC (permalink / raw)
  To: Andrew Morton; +Cc: sct, linux-kernel

> > > An alternative is to just lock the buffer in journal_commit_transaction(),
> > > if it was locked-and-dirty.  And remove the call to ll_rw_block() and
> > > submit the locked buffers by hand.
> >
> >   Yes, this has the advantage that we can move the buffer to t_locked_list
> > in the right time and so we don't change the semantics of t_locked_list.
> > OTOH the locking will be a bit more complicated (we'd need to acquire and
> > drop j_list_lock almost for every bh while currently we do it only once
> > per batch)
> 
> Only need to drop the spinlock if test_set_buffer_locked() fails.
  Ahh, good point.

> > spin_lock(&journal->j_list_lock);
> > while (commit_transaction->t_sync_datalist) {
> > 	jh = commit_transaction->t_sync_datalist;
> > 	bh = jh2bh(jh);
> > 	journal_grab_journal_head(bh);
> > 	if (buffer_dirty(bh)) {
> > 		get_bh(bh);
> > 		spin_unlock(&journal->j_list_lock);
> >                 lock_buffer(bh);
> > 		if (buffer_dirty(bh))
> > 			/* submit the buffer */
> > 		jbd_lock_bh_state(bh);
> > 		spin_lock(&journal->j_list_lock);
> > 		/* Check that somebody did not move the jh elsewhere */
> > 	}
> > 	else {
> > 		if (!inverted_lock(journal, bh))
> > 			goto write_out_data;
> > 	}
> > 	__journal_temp_unlink_buffer(jh);
> > 	__journal_file_buffer(jh, commit_transaction, BJ_Locked);
> > 	journal_put_journal_head(bh);
> > 	jbd_unlock_bh_state(bh);
> > }
> > 
> > If you prefer something like this I can code it up...
> 
> If the code is conceptually simpler then I think it's worth doing, even if
> the actual implementation is similarly or even more complex.
> 
> So yes please, let's see how it looks.
  OK, will do.

> > > That would mean that if someone had redirtied a buffer which was on
> > > t_sync_datalist *while* it was under writeout, we'd end up waiting on that
> > > writeout to complete before submitting more I/O.  But I suspect that's
> > > pretty rare.
> > > 
> > > One thing which concerns me with your approach is livelocks: if some process
> > > sits in a tight loop writing to the same part of the same file, will it
> > > cause kjournald to get stuck?
> >
> >   No, because as soon as we find the buffer in t_sync_datalist we move
> > it to t_locked_list and submit it for IO - this case is one reason why I
> > introduced that new meaning to t_locked_list.
> 
> Right.  But the buffer can be redirtied while it's on t_locked_list, even
> while the I/O is in flight.  What happens then?  Will kjournald try to
> rewrite it?
  No. With my patch journaling code writes only data buffers in
t_sync_data_list and moves them to t_locked_list even before the actual
submit so we really write each buffer exactly once in the
journal_commit_transaction().
  Originally it worked as follows: buffer has been first submitted
for IO, then if we eventually came over it for the second time and found
it has been locked, we moved it to t_locked_list. If we found a clean
buffer in t_sync_data_list we just removed it from the transaction.
Now the livelock you were talking about was prevented by the clever
code in journal_dirty_data() that has been (and still is) checking
whether the buffer is a part of committing transaction and if so, it
sends it to disk and refiles it to the new transaction.

								Honza

-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2005-09-14 12:03   ` Jan Kara
@ 2005-09-14 18:30     ` Andrew Morton
  2005-09-15  7:23       ` Jan Kara
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2005-09-14 18:30 UTC (permalink / raw)
  To: Jan Kara; +Cc: sct, linux-kernel

Jan Kara <jack@suse.cz> wrote:
>
> > Also, I don't think it works.  See ll_rw_block()'s handling of
> > already-locked buffers..
>
>   We send it to disk with SWRITE - hence ll_rw_block() wait for the buffer
> lock for us. Or do you have something else in mind?
>

OK.
 
> > An alternative is to just lock the buffer in journal_commit_transaction(),
> > if it was locked-and-dirty.  And remove the call to ll_rw_block() and
> > submit the locked buffers by hand.
>
>   Yes, this has the advantage that we can move the buffer to t_locked_list
> in the right time and so we don't change the semantics of t_locked_list.
> OTOH the locking will be a bit more complicated (we'd need to acquire and
> drop j_list_lock almost for every bh while currently we do it only once
> per batch)

Only need to drop the spinlock if test_set_buffer_locked() fails.

> 
> spin_lock(&journal->j_list_lock);
> while (commit_transaction->t_sync_datalist) {
> 	jh = commit_transaction->t_sync_datalist;
> 	bh = jh2bh(jh);
> 	journal_grab_journal_head(bh);
> 	if (buffer_dirty(bh)) {
> 		get_bh(bh);
> 		spin_unlock(&journal->j_list_lock);
>                 lock_buffer(bh);
> 		if (buffer_dirty(bh))
> 			/* submit the buffer */
> 		jbd_lock_bh_state(bh);
> 		spin_lock(&journal->j_list_lock);
> 		/* Check that somebody did not move the jh elsewhere */
> 	}
> 	else {
> 		if (!inverted_lock(journal, bh))
> 			goto write_out_data;
> 	}
> 	__journal_temp_unlink_buffer(jh);
> 	__journal_file_buffer(jh, commit_transaction, BJ_Locked);
> 	journal_put_journal_head(bh);
> 	jbd_unlock_bh_state(bh);
> }
> 
> If you prefer something like this I can code it up...

If the code is conceptually simpler then I think it's worth doing, even if
the actual implementation is similarly or even more complex.

So yes please, let's see how it looks.

> > That would mean that if someone had redirtied a buffer which was on
> > t_sync_datalist *while* it was under writeout, we'd end up waiting on that
> > writeout to complete before submitting more I/O.  But I suspect that's
> > pretty rare.
> > 
> > One thing which concerns me with your approach is livelocks: if some process
> > sits in a tight loop writing to the same part of the same file, will it
> > cause kjournald to get stuck?
>
>   No, because as soon as we find the buffer in t_sync_datalist we move
> it to t_locked_list and submit it for IO - this case is one reason why I
> introduced that new meaning to t_locked_list.

Right.  But the buffer can be redirtied while it's on t_locked_list, even
while the I/O is in flight.  What happens then?  Will kjournald try to
rewrite it?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2005-09-14  1:43 ` Andrew Morton
  2005-09-14 12:03   ` Jan Kara
@ 2005-09-14 13:02   ` Stephen C. Tweedie
  1 sibling, 0 replies; 12+ messages in thread
From: Stephen C. Tweedie @ 2005-09-14 13:02 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jan Kara, linux-kernel, Stephen Tweedie

Hi,

On Wed, 2005-09-14 at 02:43, Andrew Morton wrote:

> An alternative is to just lock the buffer in journal_commit_transaction(),
> if it was locked-and-dirty.  

That was my first reaction too.  We're using SWRITE so we'll end up
locking them anyway; and if synchronising against other lockers is an
issue, then locking them ourselves early is the obvious protection.

--Stephen


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2005-09-14  1:43 ` Andrew Morton
@ 2005-09-14 12:03   ` Jan Kara
  2005-09-14 18:30     ` Andrew Morton
  2005-09-14 13:02   ` Stephen C. Tweedie
  1 sibling, 1 reply; 12+ messages in thread
From: Jan Kara @ 2005-09-14 12:03 UTC (permalink / raw)
  To: Andrew Morton; +Cc: sct, linux-kernel

> Jan Kara <jack@suse.cz> wrote:
> >
> > When a buffer is locked it does not mean that write-out is in progress. We
> >  have to check if the buffer is dirty and if it is we have to submit it
> >  for write-out. We unconditionally move the buffer to t_locked_list so
> >  that we don't mistake unprocessed buffer and buffer not yet given to
> >  ll_rw_block(). This subtly changes the meaning of buffer states in
> >  t_locked_list - unlock buffer (for list users different from
> >  journal_commit_transaction()) does not mean it has been written. But
> >  only journal_unmap_buffer() cares and it now checks if the buffer
> >  is not dirty.
> 
> Seems complex.  It means that t_locked_list takes on an additional (and
> undocumented!) meaning.
  Sorry, if we agree on some final form I'll add the appropriate
comment to the header file.

> Also, I don't think it works.  See ll_rw_block()'s handling of
> already-locked buffers..
  We send it to disk with SWRITE - hence ll_rw_block() wait for the buffer
lock for us. Or do you have something else in mind?

> An alternative is to just lock the buffer in journal_commit_transaction(),
> if it was locked-and-dirty.  And remove the call to ll_rw_block() and
> submit the locked buffers by hand.
  Yes, this has the advantage that we can move the buffer to t_locked_list
in the right time and so we don't change the semantics of t_locked_list.
OTOH the locking will be a bit more complicated (we'd need to acquire and
drop j_list_lock almost for every bh while currently we do it only once
per batch) - the code would have to be like:

spin_lock(&journal->j_list_lock);
while (commit_transaction->t_sync_datalist) {
	jh = commit_transaction->t_sync_datalist;
	bh = jh2bh(jh);
	journal_grab_journal_head(bh);
	if (buffer_dirty(bh)) {
		get_bh(bh);
		spin_unlock(&journal->j_list_lock);
                lock_buffer(bh);
		if (buffer_dirty(bh))
			/* submit the buffer */
		jbd_lock_bh_state(bh);
		spin_lock(&journal->j_list_lock);
		/* Check that somebody did not move the jh elsewhere */
	}
	else {
		if (!inverted_lock(journal, bh))
			goto write_out_data;
	}
	__journal_temp_unlink_buffer(jh);
	__journal_file_buffer(jh, commit_transaction, BJ_Locked);
	journal_put_journal_head(bh);
	jbd_unlock_bh_state(bh);
}

If you prefer something like this I can code it up...

> That would mean that if someone had redirtied a buffer which was on
> t_sync_datalist *while* it was under writeout, we'd end up waiting on that
> writeout to complete before submitting more I/O.  But I suspect that's
> pretty rare.
> 
> One thing which concerns me with your approach is livelocks: if some process
> sits in a tight loop writing to the same part of the same file, will it
> cause kjournald to get stuck?
  No, because as soon as we find the buffer in t_sync_datalist we move
it to t_locked_list and submit it for IO - this case is one reason why I
introduced that new meaning to t_locked_list.

> The problem we have here is "was the buffer dirtied before this commit
> started, or after?".  In the former case we are obliged to write it.  In
> the later case we are not, and in trying to do this we risk livelocking.

								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix commit of ordered data buffers
  2005-09-13 15:30 Jan Kara
@ 2005-09-14  1:43 ` Andrew Morton
  2005-09-14 12:03   ` Jan Kara
  2005-09-14 13:02   ` Stephen C. Tweedie
  0 siblings, 2 replies; 12+ messages in thread
From: Andrew Morton @ 2005-09-14  1:43 UTC (permalink / raw)
  To: Jan Kara; +Cc: sct, linux-kernel

Jan Kara <jack@suse.cz> wrote:
>
> When a buffer is locked it does not mean that write-out is in progress. We
>  have to check if the buffer is dirty and if it is we have to submit it
>  for write-out. We unconditionally move the buffer to t_locked_list so
>  that we don't mistake unprocessed buffer and buffer not yet given to
>  ll_rw_block(). This subtly changes the meaning of buffer states in
>  t_locked_list - unlock buffer (for list users different from
>  journal_commit_transaction()) does not mean it has been written. But
>  only journal_unmap_buffer() cares and it now checks if the buffer
>  is not dirty.

Seems complex.  It means that t_locked_list takes on an additional (and
undocumented!) meaning.

Also, I don't think it works.  See ll_rw_block()'s handling of
already-locked buffers..

An alternative is to just lock the buffer in journal_commit_transaction(),
if it was locked-and-dirty.  And remove the call to ll_rw_block() and
submit the locked buffers by hand.

That would mean that if someone had redirtied a buffer which was on
t_sync_datalist *while* it was under writeout, we'd end up waiting on that
writeout to complete before submitting more I/O.  But I suspect that's
pretty rare.

One thing which concerns me with your approach is livelocks: if some process
sits in a tight loop writing to the same part of the same file, will it
cause kjournald to get stuck?

The problem we have here is "was the buffer dirtied before this commit
started, or after?".  In the former case we are obliged to write it.  In
the later case we are not, and in trying to do this we risk livelocking.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] Fix commit of ordered data buffers
@ 2005-09-13 15:30 Jan Kara
  2005-09-14  1:43 ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Kara @ 2005-09-13 15:30 UTC (permalink / raw)
  To: akpm, sct; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1444 bytes --]

  Hello,

  attached patch should fix handling of ordered data buffers in
journal_commit_write(). Currently the code simply moves the buffer
from t_sync_data list to t_locked list when the buffer is locked.
But in theory (I'm not sure this currently really happens for ext3
ordered data buffer) the buffer can be locked just for "examination"
purposes and not being sent to disk. Hence it could happen we don't
write some ordered buffer when we should.
  What the patch does is that it writes the buffer when it is dirty
(even if it is locked - hence it can happen we call ll_rw_block()
on the buffer currently under write-out but in this case ll_rw_block()
just waits until IO is complete and returns (as the buffer won't be
dirty anymore) and this race with pdflush should be rare enough not
to affect the performance). The patch also moves buffer to t_locked
list immediately after queing the buffer for write-out (as otherwise
we would have to detect which buffers are already queued and that's not
nice). This changes a bit a logic of t_locked list - unlocked buffer
for the users different from journal_commit_transaction() does not mean
the buffer is already written. But only journal_unmap_buffer() cares
about this changes and I changed that one to check if the buffer is not
dirty.
  Andrew, if you think the change is fine, please put it into -mm so
that it gets some testing.

							Honza

-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

[-- Attachment #2: jbd-2.6.13-mm1-2-orderedwrite.diff --]
[-- Type: text/plain, Size: 4249 bytes --]

When a buffer is locked it does not mean that write-out is in progress. We
have to check if the buffer is dirty and if it is we have to submit it
for write-out. We unconditionally move the buffer to t_locked_list so
that we don't mistake unprocessed buffer and buffer not yet given to
ll_rw_block(). This subtly changes the meaning of buffer states in
t_locked_list - unlock buffer (for list users different from
journal_commit_transaction()) does not mean it has been written. But
only journal_unmap_buffer() cares and it now checks if the buffer
is not dirty.

Signed-off-by: Jan Kara <jack@suse.cz>

diff -rupX /home/jack/.kerndiffexclude linux-2.6.13-mm1-1-invalidatepage/fs/jbd/commit.c linux-2.6.13-mm1-2-orderedwrite/fs/jbd/commit.c
--- linux-2.6.13-mm1-1-invalidatepage/fs/jbd/commit.c	2005-09-11 22:49:07.000000000 +0200
+++ linux-2.6.13-mm1-2-orderedwrite/fs/jbd/commit.c	2005-09-17 01:53:46.000000000 +0200
@@ -337,19 +337,32 @@ write_out_data:
 		jh = commit_transaction->t_sync_datalist;
 		commit_transaction->t_sync_datalist = jh->b_tnext;
 		bh = jh2bh(jh);
-		if (buffer_locked(bh)) {
-			BUFFER_TRACE(bh, "locked");
+
+		/*
+		 * If the buffer is dirty, it needs a write-out (the write-out
+		 * may be already in progress but that should be rare so giving
+		 * the buffer once more to ll_rw_block() should not affect
+		 * the performance). We move the buffer out of t_sync_datalist
+		 * to t_locked_list to make a progress. This handling of dirty
+		 * buffer has the disadvantage that for functions different
+		 * from journal_commit_transaction() unlocked buffer in
+		 * t_locked_list does not mean it has been written. *BUT* the
+		 * only place that cares is journal_unmap_buffer() and that
+		 * checks that the buffer is not dirty.
+		 *
+		 * If the buffer is locked and not dirty, someone has submitted
+		 * the buffer for IO before us so we just move the buffer to
+		 * t_locked_list (to wait for IO completion).
+		 */
+		if (buffer_locked(bh) || buffer_dirty(bh)) {
+			BUFFER_TRACE(bh, "locked or dirty");
 			if (!inverted_lock(journal, bh))
 				goto write_out_data;
 			__journal_temp_unlink_buffer(jh);
 			__journal_file_buffer(jh, commit_transaction,
 						BJ_Locked);
 			jbd_unlock_bh_state(bh);
-			if (lock_need_resched(&journal->j_list_lock)) {
-				spin_unlock(&journal->j_list_lock);
-				goto write_out_data;
-			}
-		} else {
+
 			if (buffer_dirty(bh)) {
 				BUFFER_TRACE(bh, "start journal writeout");
 				get_bh(bh);
@@ -363,19 +376,19 @@ write_out_data:
 					bufs = 0;
 					goto write_out_data;
 				}
-			} else {
-				BUFFER_TRACE(bh, "writeout complete: unfile");
-				if (!inverted_lock(journal, bh))
-					goto write_out_data;
-				__journal_unfile_buffer(jh);
-				jbd_unlock_bh_state(bh);
-				journal_remove_journal_head(bh);
-				put_bh(bh);
-				if (lock_need_resched(&journal->j_list_lock)) {
-					spin_unlock(&journal->j_list_lock);
-					goto write_out_data;
-				}
 			}
+		} else {
+			BUFFER_TRACE(bh, "writeout complete: unfile");
+			if (!inverted_lock(journal, bh))
+				goto write_out_data;
+			__journal_unfile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+			journal_remove_journal_head(bh);
+			put_bh(bh);
+		}
+		if (lock_need_resched(&journal->j_list_lock)) {
+			spin_unlock(&journal->j_list_lock);
+			goto write_out_data;
 		}
 	}
 
diff -rupX /home/jack/.kerndiffexclude linux-2.6.13-mm1-1-invalidatepage/fs/jbd/transaction.c linux-2.6.13-mm1-2-orderedwrite/fs/jbd/transaction.c
--- linux-2.6.13-mm1-1-invalidatepage/fs/jbd/transaction.c	2005-09-11 22:49:07.000000000 +0200
+++ linux-2.6.13-mm1-2-orderedwrite/fs/jbd/transaction.c	2005-09-17 01:53:46.000000000 +0200
@@ -1814,11 +1814,11 @@ static int journal_unmap_buffer(journal_
 			}
 		}
 	} else if (transaction == journal->j_committing_transaction) {
-		if (jh->b_jlist == BJ_Locked) {
+		if (jh->b_jlist == BJ_Locked && !buffer_dirty(bh)) {
 			/*
 			 * The buffer is on the committing transaction's locked
-			 * list.  We have the buffer locked, so I/O has
-			 * completed.  So we can nail the buffer now.
+			 * list.  We have the buffer locked and !dirty, so I/O
+			 * has completed.  So we can nail the buffer now.
 			 */
 			may_free = __dispose_buffer(jh, transaction);
 			goto zap_buffer;

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2006-09-29  9:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-11 21:05 [PATCH] Fix commit of ordered data buffers Jan Kara
2006-09-11 22:12 ` Andrew Morton
2006-09-28  8:31 ` Zhang, Yanmin
2006-09-28 21:35   ` Jan Kara
2006-09-29  1:27     ` Zhang, Yanmin
2006-09-29  9:21       ` Jan Kara
  -- strict thread matches above, loose matches on Subject: below --
2005-09-13 15:30 Jan Kara
2005-09-14  1:43 ` Andrew Morton
2005-09-14 12:03   ` Jan Kara
2005-09-14 18:30     ` Andrew Morton
2005-09-15  7:23       ` Jan Kara
2005-09-14 13:02   ` Stephen C. Tweedie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).