LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, linux-raid@vger.kernel.org
Cc: johnpol@2ka.mipt.ru, christopher.leech@intel.com,
	arjan@infradead.org, linux-kernel@vger.kernel.org
Subject: [PATCH 05/12] md: move write operations to raid5_run_ops
Date: Mon, 22 Jan 2007 20:29:20 -0700	[thread overview]
Message-ID: <20070123032920.29114.62441.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <1169522364.8362.113.camel@dwillia2-linux.ch.intel.com>

From: Dan Williams <dan.j.williams@intel.com>

handle_stripe sets STRIPE_OP_PREXOR, STRIPE_OP_BIODRAIN, STRIPE_OP_POSTXOR
to request a write to the stripe cache.  raid5_run_ops is triggerred to run
and executes the request outside the stripe lock.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c |  152 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 131 insertions(+), 21 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2c74f9b..2390657 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1788,7 +1788,75 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 	}
 }
 
+static int handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
+{
+	int i, pd_idx = sh->pd_idx, disks = sh->disks;
+	int locked=0;
+
+	if (rcw == 0) {
+		/* skip the drain operation on an expand */
+		if (!expand) {
+			BUG_ON(test_and_set_bit(STRIPE_OP_BIODRAIN,
+				&sh->ops.pending));
+			sh->ops.count++;
+		}
+
+		BUG_ON(test_and_set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending));
+		sh->ops.count++;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+
+			if (dev->towrite) {
+				set_bit(R5_LOCKED, &dev->flags);
+				if (!expand)
+					clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	} else {
+		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
+			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
+
+		BUG_ON(test_and_set_bit(STRIPE_OP_PREXOR, &sh->ops.pending) ||
+			test_and_set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ||
+			test_and_set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending));
+
+		sh->ops.count += 3;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+			if (i==pd_idx)
+				continue;
 
+			/* For a read-modify write there may be blocks that are
+			 * locked for reading while others are ready to be written
+			 * so we distinguish these blocks by the R5_Wantprexor bit
+			 */
+			if (dev->towrite &&
+			    (test_bit(R5_UPTODATE, &dev->flags) ||
+			    test_bit(R5_Wantcompute, &dev->flags))) {
+				set_bit(R5_Wantprexor, &dev->flags);
+				set_bit(R5_LOCKED, &dev->flags);
+				clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	}
+
+	/* keep the parity disk locked while asynchronous operations
+	 * are in flight
+	 */
+	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
+	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+	locked++;
+
+	PRINTK("%s: stripe %llu locked: %d pending: %lx\n",
+		__FUNCTION__, (unsigned long long)sh->sector,
+		locked, sh->ops.pending);
+
+	return locked;
+}
 
 /*
  * Each stripe/dev can have one or more bion attached.
@@ -2151,8 +2219,67 @@ static void handle_stripe5(struct stripe_head *sh)
 		set_bit(STRIPE_HANDLE, &sh->state);
 	}
 
-	/* now to consider writing and what else, if anything should be read */
-	if (to_write) {
+	/* Now we check to see if any write operations have recently
+	 * completed
+	 */
+
+	/* leave prexor set until postxor is done, allows us to distinguish
+	 * a rmw from a rcw during biodrain
+	 */
+	if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+
+		for (i=disks; i--;)
+			clear_bit(R5_Wantprexor, &sh->dev[i].flags);
+	}
+
+	/* if only POSTXOR is set then this is an 'expand' postxor */
+	if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+
+		/* All the 'written' buffers and the parity block are ready to be
+		 * written back to disk
+		 */
+		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+		for (i=disks; i--;) {
+			dev = &sh->dev[i];
+			if (test_bit(R5_LOCKED, &dev->flags) &&
+				(i == sh->pd_idx || dev->written)) {
+				PRINTK("Writing block %d\n", i);
+				set_bit(R5_Wantwrite, &dev->flags);
+				if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+					sh->ops.count++;
+				if (!test_bit(R5_Insync, &dev->flags)
+				    || (i==sh->pd_idx && failed == 0))
+					set_bit(STRIPE_INSYNC, &sh->state);
+			}
+		}
+		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+			atomic_dec(&conf->preread_active_stripes);
+			if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+				md_wakeup_thread(conf->mddev->thread);
+		}
+	}
+
+	/* 1/ Now to consider new write requests and what else, if anything should be read
+	 * 2/ Check operations clobber the parity block so do not start new writes while
+	 *    a check is in flight
+	 * 3/ Write operations do not stack
+	 */
+	if (to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
+		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 		int rmw=0, rcw=0;
 		for (i=disks ; i--;) {
 			/* would I have to read this buffer for read_modify_write */
@@ -2219,25 +2346,8 @@ static void handle_stripe5(struct stripe_head *sh)
 			}
 		/* now if nothing is locked, and if we have enough data, we can start a write request */
 		if (locked == 0 && (rcw == 0 ||rmw == 0) &&
-		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-			PRINTK("Computing parity...\n");
-			compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
-			/* now every locked buffer is ready to be written */
-			for (i=disks; i--;)
-				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
-					PRINTK("Writing block %d\n", i);
-					locked++;
-					set_bit(R5_Wantwrite, &sh->dev[i].flags);
-					if (!test_bit(R5_Insync, &sh->dev[i].flags)
-					    || (i==sh->pd_idx && failed == 0))
-						set_bit(STRIPE_INSYNC, &sh->state);
-				}
-			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-				atomic_dec(&conf->preread_active_stripes);
-				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
-					md_wakeup_thread(conf->mddev->thread);
-			}
-		}
+		    !test_bit(STRIPE_BIT_DELAY, &sh->state))
+			locked += handle_write_operations5(sh, rcw, 0);
 	}
 
 	/* maybe we need to check and possibly fix the parity for this stripe

  parent reply	other threads:[~2007-01-23  3:32 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1169522364.8362.113.camel@dwillia2-linux.ch.intel.com>
2007-01-23  3:28 ` [PATCH 01/12] dmaengine: add base support for the async_tx api Dan Williams
2007-01-23  3:29 ` [PATCH 02/12] dmaengine: add " Dan Williams
2007-01-23  3:29 ` [PATCH 03/12] md: add raid5_run_ops and support routines Dan Williams
2007-01-23  3:29 ` [PATCH 04/12] md: use raid5_run_ops for stripe cache operations Dan Williams
2007-01-23  3:29 ` Dan Williams [this message]
2007-01-23  3:29 ` [PATCH 06/12] md: move raid5 compute block operations to raid5_run_ops Dan Williams
2007-01-23  3:29 ` [PATCH 07/12] md: move raid5 parity checks " Dan Williams
2007-01-23  3:29 ` [PATCH 08/12] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2007-01-23  3:29 ` [PATCH 09/12] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2007-01-23  3:29 ` [PATCH 10/12] md: move raid5 io requests to raid5_run_ops Dan Williams
2007-01-23  3:29 ` [PATCH 11/12] md: remove raid5 compute_block and compute_parity5 Dan Williams
2007-01-23  3:29 ` [PATCH 12/12] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070123032920.29114.62441.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=arjan@infradead.org \
    --cc=christopher.leech@intel.com \
    --cc=johnpol@2ka.mipt.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --subject='Re: [PATCH 05/12] md: move write operations to raid5_run_ops' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).