LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, linux-raid@vger.kernel.org
Cc: johnpol@2ka.mipt.ru, christopher.leech@intel.com,
	arjan@infradead.org, linux-kernel@vger.kernel.org
Subject: [PATCH 06/12] md: move raid5 compute block operations to raid5_run_ops
Date: Mon, 22 Jan 2007 20:29:25 -0700	[thread overview]
Message-ID: <20070123032925.29114.10389.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <1169522364.8362.113.camel@dwillia2-linux.ch.intel.com>

From: Dan Williams <dan.j.williams@intel.com>

handle_stripe sets STRIPE_OP_COMPUTE_BLK to request servicing from
raid5_run_ops.  It also sets a flag for the block being computed to let
other parts of handle_stripe submit dependent operations.  raid5_run_ops
guarantees that the compute operation completes before any dependent
operation starts.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c |  125 +++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 93 insertions(+), 32 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2390657..279a30c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1980,7 +1980,7 @@ static void handle_stripe5(struct stripe_head *sh)
 	int i;
 	int syncing, expanding, expanded;
 	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
-	int non_overwrite = 0;
+	int compute=0, req_compute=0, non_overwrite=0;
 	int failed_num=0;
 	struct r5dev *dev;
 	unsigned long pending=0;
@@ -2032,8 +2032,8 @@ static void handle_stripe5(struct stripe_head *sh)
 		/* now count some things */
 		if (test_bit(R5_LOCKED, &dev->flags)) locked++;
 		if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+		if (test_bit(R5_Wantcompute, &dev->flags)) BUG_ON(++compute > 1);
 
-		
 		if (dev->toread) to_read++;
 		if (dev->towrite) {
 			to_write++;
@@ -2188,31 +2188,82 @@ static void handle_stripe5(struct stripe_head *sh)
 	 * parity, or to satisfy requests
 	 * or to load a block that is being partially written.
 	 */
-	if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) {
-		for (i=disks; i--;) {
-			dev = &sh->dev[i];
-			if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
-			    (dev->toread ||
-			     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-			     syncing ||
-			     expanding ||
-			     (failed && (sh->dev[failed_num].toread ||
-					 (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags))))
-				    )
-				) {
-				/* we would like to get this block, possibly
-				 * by computing it, but we might not be able to
+	if (to_read || non_overwrite || (syncing && (uptodate + compute < disks)) || expanding ||
+		test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
+
+		/* Clear completed compute operations.  Parity recovery
+		 * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
+		 * later on in this routine
+		 */
+		if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
+			!test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+			clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
+			clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
+			clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+		}
+
+		/* look for blocks to read/compute, skip this if a compute
+		 * is already in flight, or if the stripe contents are in the
+		 * midst of changing due to a write
+		 */
+		if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
+			!test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
+			!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+			for (i=disks; i--;) {
+				dev = &sh->dev[i];
+
+				/* don't schedule compute operations or reads on
+				 * the parity block while a check is in flight
 				 */
-				if (uptodate == disks-1) {
-					PRINTK("Computing block %d\n", i);
-					compute_block(sh, i);
-					uptodate++;
-				} else if (test_bit(R5_Insync, &dev->flags)) {
-					set_bit(R5_LOCKED, &dev->flags);
-					set_bit(R5_Wantread, &dev->flags);
-					locked++;
-					PRINTK("Reading block %d (sync=%d)\n", 
-						i, syncing);
+				if ((i == sh->pd_idx) && test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
+					continue;
+
+				if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+				     (dev->toread ||
+				     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+				     syncing ||
+				     expanding ||
+				     (failed && (sh->dev[failed_num].toread ||
+						 (sh->dev[failed_num].towrite &&
+						 	!test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags))))
+					    )
+					) {
+					/* 1/ We would like to get this block, possibly
+					 * by computing it, but we might not be able to.
+					 *
+					 * 2/ Since parity check operations potentially
+					 * make the parity block !uptodate it will need
+					 * to be refreshed before any compute operations
+					 * on data disks are scheduled.
+					 *
+					 * 3/ We hold off parity block re-reads until check
+					 * operations have quiesced.
+					 */
+					if ((uptodate == disks-1) && !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+						set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+						set_bit(R5_Wantcompute, &dev->flags);
+						sh->ops.target = i;
+						BUG_ON(req_compute++);
+						sh->ops.count++;
+						/* Careful: from this point on 'uptodate' is in the eye of
+						 * raid5_run_ops which services 'compute' operations before
+						 * writes. R5_Wantcompute flags a block that will be R5_UPTODATE
+						 * by the time it is needed for a subsequent operation.
+						 */
+						uptodate++;
+					} else if ((uptodate < disks-1) && test_bit(R5_Insync, &dev->flags)) {
+						/* Note: we hold off compute operations while checks are in flight,
+						 * but we still prefer 'compute' over 'read' hence we only read if
+						 * (uptodate < disks-1)
+						 */
+						set_bit(R5_LOCKED, &dev->flags);
+						set_bit(R5_Wantread, &dev->flags);
+						if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+							sh->ops.count++;
+						locked++;
+						PRINTK("Reading block %d (sync=%d)\n",
+							i, syncing);
+					}
 				}
 			}
 		}
@@ -2287,7 +2338,7 @@ static void handle_stripe5(struct stripe_head *sh)
 			if ((dev->towrite || i == sh->pd_idx) &&
 			    (!test_bit(R5_LOCKED, &dev->flags) 
 				    ) &&
-			    !test_bit(R5_UPTODATE, &dev->flags)) {
+			    !(test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Wantcompute, &dev->flags))) {
 				if (test_bit(R5_Insync, &dev->flags)
 /*				    && !(!mddev->insync && i == sh->pd_idx) */
 					)
@@ -2298,7 +2349,7 @@ static void handle_stripe5(struct stripe_head *sh)
 			if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
 			    (!test_bit(R5_LOCKED, &dev->flags) 
 				    ) &&
-			    !test_bit(R5_UPTODATE, &dev->flags)) {
+			    !(test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Wantcompute, &dev->flags))) {
 				if (test_bit(R5_Insync, &dev->flags)) rcw++;
 				else rcw += 2*disks;
 			}
@@ -2311,7 +2362,8 @@ static void handle_stripe5(struct stripe_head *sh)
 			for (i=disks; i--;) {
 				dev = &sh->dev[i];
 				if ((dev->towrite || i == sh->pd_idx) &&
-				    !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+				    !test_bit(R5_LOCKED, &dev->flags) &&
+				    !(test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Wantcompute, &dev->flags)) &&
 				    test_bit(R5_Insync, &dev->flags)) {
 					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
 					{
@@ -2330,7 +2382,8 @@ static void handle_stripe5(struct stripe_head *sh)
 			for (i=disks; i--;) {
 				dev = &sh->dev[i];
 				if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
-				    !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+				    !test_bit(R5_LOCKED, &dev->flags) &&
+				    !(test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Wantcompute, &dev->flags)) &&
 				    test_bit(R5_Insync, &dev->flags)) {
 					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
 					{
@@ -2345,8 +2398,16 @@ static void handle_stripe5(struct stripe_head *sh)
 				}
 			}
 		/* now if nothing is locked, and if we have enough data, we can start a write request */
-		if (locked == 0 && (rcw == 0 ||rmw == 0) &&
-		    !test_bit(STRIPE_BIT_DELAY, &sh->state))
+		/* since handle_stripe can be called at any time we need to handle the case
+		 * where a compute block operation has been submitted and then a subsequent
+		 * call wants to start a write request.  raid5_run_ops only handles the case where
+		 * compute block and postxor are requested simultaneously.  If this
+		 * is not the case then new writes need to be held off until the compute
+		 * completes.
+		 */
+		if ((req_compute || !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
+			(locked == 0 && (rcw == 0 ||rmw == 0) &&
+			!test_bit(STRIPE_BIT_DELAY, &sh->state)))
 			locked += handle_write_operations5(sh, rcw, 0);
 	}
 

  parent reply	other threads:[~2007-01-23  3:32 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1169522364.8362.113.camel@dwillia2-linux.ch.intel.com>
2007-01-23  3:28 ` [PATCH 01/12] dmaengine: add base support for the async_tx api Dan Williams
2007-01-23  3:29 ` [PATCH 02/12] dmaengine: add " Dan Williams
2007-01-23  3:29 ` [PATCH 03/12] md: add raid5_run_ops and support routines Dan Williams
2007-01-23  3:29 ` [PATCH 04/12] md: use raid5_run_ops for stripe cache operations Dan Williams
2007-01-23  3:29 ` [PATCH 05/12] md: move write operations to raid5_run_ops Dan Williams
2007-01-23  3:29 ` Dan Williams [this message]
2007-01-23  3:29 ` [PATCH 07/12] md: move raid5 parity checks " Dan Williams
2007-01-23  3:29 ` [PATCH 08/12] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2007-01-23  3:29 ` [PATCH 09/12] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2007-01-23  3:29 ` [PATCH 10/12] md: move raid5 io requests to raid5_run_ops Dan Williams
2007-01-23  3:29 ` [PATCH 11/12] md: remove raid5 compute_block and compute_parity5 Dan Williams
2007-01-23  3:29 ` [PATCH 12/12] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070123032925.29114.10389.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=arjan@infradead.org \
    --cc=christopher.leech@intel.com \
    --cc=johnpol@2ka.mipt.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --subject='Re: [PATCH 06/12] md: move raid5 compute block operations to raid5_run_ops' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).