LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, christopher.leech@intel.com,
	linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: akpm@linux-foundation.org, torvalds@linux-foundation.org,
	yur@emcraft.com, wd@denx.de, arjan@linux.intel.com,
	rmk+kernel@arm.linux.org.uk
Subject: [PATCH 2.6.21-rc4 06/15] md: move write operations to raid5_run_ops
Date: Thu, 22 Mar 2007 23:52:07 -0700	[thread overview]
Message-ID: <20070323065207.15570.15540.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20070323064856.15570.45052.stgit@dwillia2-linux.ch.intel.com>

handle_stripe sets STRIPE_OP_PREXOR, STRIPE_OP_BIODRAIN, STRIPE_OP_POSTXOR
to request a write to the stripe cache.  raid5_run_ops is triggerred to run
and executes the request outside the stripe lock.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c |  152 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 131 insertions(+), 21 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0397e33..4d1adb5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1788,7 +1788,75 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 	}
 }
 
+static int handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
+{
+	int i, pd_idx = sh->pd_idx, disks = sh->disks;
+	int locked=0;
+
+	if (rcw == 0) {
+		/* skip the drain operation on an expand */
+		if (!expand) {
+			BUG_ON(test_and_set_bit(STRIPE_OP_BIODRAIN,
+				&sh->ops.pending));
+			sh->ops.count++;
+		}
+
+		BUG_ON(test_and_set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending));
+		sh->ops.count++;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+
+			if (dev->towrite) {
+				set_bit(R5_LOCKED, &dev->flags);
+				if (!expand)
+					clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	} else {
+		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
+			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
+
+		BUG_ON(test_and_set_bit(STRIPE_OP_PREXOR, &sh->ops.pending) ||
+			test_and_set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ||
+			test_and_set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending));
+
+		sh->ops.count += 3;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+			if (i==pd_idx)
+				continue;
 
+			/* For a read-modify write there may be blocks that are
+			 * locked for reading while others are ready to be written
+			 * so we distinguish these blocks by the R5_Wantprexor bit
+			 */
+			if (dev->towrite &&
+			    (test_bit(R5_UPTODATE, &dev->flags) ||
+			    test_bit(R5_Wantcompute, &dev->flags))) {
+				set_bit(R5_Wantprexor, &dev->flags);
+				set_bit(R5_LOCKED, &dev->flags);
+				clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	}
+
+	/* keep the parity disk locked while asynchronous operations
+	 * are in flight
+	 */
+	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
+	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+	locked++;
+
+	PRINTK("%s: stripe %llu locked: %d pending: %lx\n",
+		__FUNCTION__, (unsigned long long)sh->sector,
+		locked, sh->ops.pending);
+
+	return locked;
+}
 
 /*
  * Each stripe/dev can have one or more bion attached.
@@ -2151,8 +2219,67 @@ static void handle_stripe5(struct stripe_head *sh)
 		set_bit(STRIPE_HANDLE, &sh->state);
 	}
 
-	/* now to consider writing and what else, if anything should be read */
-	if (to_write) {
+	/* Now we check to see if any write operations have recently
+	 * completed
+	 */
+
+	/* leave prexor set until postxor is done, allows us to distinguish
+	 * a rmw from a rcw during biodrain
+	 */
+	if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+
+		for (i=disks; i--;)
+			clear_bit(R5_Wantprexor, &sh->dev[i].flags);
+	}
+
+	/* if only POSTXOR is set then this is an 'expand' postxor */
+	if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+
+		/* All the 'written' buffers and the parity block are ready to be
+		 * written back to disk
+		 */
+		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+		for (i=disks; i--;) {
+			dev = &sh->dev[i];
+			if (test_bit(R5_LOCKED, &dev->flags) &&
+				(i == sh->pd_idx || dev->written)) {
+				PRINTK("Writing block %d\n", i);
+				set_bit(R5_Wantwrite, &dev->flags);
+				if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+					sh->ops.count++;
+				if (!test_bit(R5_Insync, &dev->flags)
+				    || (i==sh->pd_idx && failed == 0))
+					set_bit(STRIPE_INSYNC, &sh->state);
+			}
+		}
+		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+			atomic_dec(&conf->preread_active_stripes);
+			if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+				md_wakeup_thread(conf->mddev->thread);
+		}
+	}
+
+	/* 1/ Now to consider new write requests and what else, if anything should be read
+	 * 2/ Check operations clobber the parity block so do not start new writes while
+	 *    a check is in flight
+	 * 3/ Write operations do not stack
+	 */
+	if (to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
+		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 		int rmw=0, rcw=0;
 		for (i=disks ; i--;) {
 			/* would I have to read this buffer for read_modify_write */
@@ -2219,25 +2346,8 @@ static void handle_stripe5(struct stripe_head *sh)
 			}
 		/* now if nothing is locked, and if we have enough data, we can start a write request */
 		if (locked == 0 && (rcw == 0 ||rmw == 0) &&
-		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-			PRINTK("Computing parity...\n");
-			compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
-			/* now every locked buffer is ready to be written */
-			for (i=disks; i--;)
-				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
-					PRINTK("Writing block %d\n", i);
-					locked++;
-					set_bit(R5_Wantwrite, &sh->dev[i].flags);
-					if (!test_bit(R5_Insync, &sh->dev[i].flags)
-					    || (i==sh->pd_idx && failed == 0))
-						set_bit(STRIPE_INSYNC, &sh->state);
-				}
-			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-				atomic_dec(&conf->preread_active_stripes);
-				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
-					md_wakeup_thread(conf->mddev->thread);
-			}
-		}
+		    !test_bit(STRIPE_BIT_DELAY, &sh->state))
+			locked += handle_write_operations5(sh, rcw, 0);
 	}
 
 	/* maybe we need to check and possibly fix the parity for this stripe

  parent reply	other threads:[~2007-03-23  6:52 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-23  6:51 [PATCH 2.6.21-rc4 00/15] md raid5 acceleration and async_tx Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 01/15] dmaengine: add base support for the async_tx api Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 02/15] ARM: Add drivers/dma to arch/arm/Kconfig Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 03/15] dmaengine: add the async_tx api Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 04/15] md: add raid5_run_ops and support routines Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 05/15] md: use raid5_run_ops for stripe cache operations Dan Williams
2007-03-23  6:52 ` Dan Williams [this message]
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 07/15] md: move raid5 compute block operations to raid5_run_ops Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 08/15] md: move raid5 parity checks " Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 09/15] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 10/15] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 11/15] md: move raid5 io requests to raid5_run_ops Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 12/15] md: remove raid5 compute_block and compute_parity5 Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 13/15] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 14/15] iop13xx: Surface the iop13xx adma units to the iop-adma driver Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 15/15] iop3xx: Surface the iop3xx DMA and AAU " Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070323065207.15570.15540.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@linux.intel.com \
    --cc=christopher.leech@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=rmk+kernel@arm.linux.org.uk \
    --cc=torvalds@linux-foundation.org \
    --cc=wd@denx.de \
    --cc=yur@emcraft.com \
    --subject='Re: [PATCH 2.6.21-rc4 06/15] md: move write operations to raid5_run_ops' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).