LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH -mm] sata_nv: cleanup ADMA error handling
@ 2007-01-15  2:40 Robert Hancock
  2007-01-15 14:28 ` Robert Hancock
  0 siblings, 1 reply; 2+ messages in thread
From: Robert Hancock @ 2007-01-15  2:40 UTC (permalink / raw)
  To: linux-kernel, linux-ide; +Cc: Jeff Garzik, Allen Martin

[-- Attachment #1: Type: text/plain, Size: 1618 bytes --]

Should apply to -mm tree or current libata-dev git tree. Sorry for 
attaching, but I seem to have misplaced the magic incantations to make 
Thunderbird not destroy inline patches..

---

This cleans up a few issues with the error handling in sata_nv in ADMA 
mode to make it more consistent with other NCQ-capable drivers like ahci 
and sata_sil24:

-When a command failed, we would effectively set AC_ERR_DEV on the 
queued command always. In the case of NCQ commands this prevents libata 
from doing a log page query to determine the details of the failed 
command, since it thinks we've already analyzed. Just set flags in the 
port ehi->err_mask, then freeze or abort and let libata figure out what 
went wrong.

-The code handled NV_ADMA_STAT_CPBERR as a "really bad error" which 
caused it to set error flags on every queued command. I don't know 
exactly what this flag means (no docs, grr!) but from what I can guess 
from the standard ADMA spec, it just means that one or more of the CPBs 
had an error, so we just need to go through and do our normal checks in 
this case.

-In the error_handler function the code would always go through and do 
an ADMA channel reset and also dump out the state of all the CPBs. This 
reset seems heinous in this situation since we haven't even decided to 
reset anything yet. The output seems redundant at this point since 
libata already dumps the state of all active commands on errors (and it 
also triggers at times when it shouldn't, like when suspending). Do the 
ADMA reset only on hardreset and remove the output.

Signed-off-by: Robert Hancock <hancockr@shaw.ca>


[-- Attachment #2: sata_nv-cleanup-error-handling.patch --]
[-- Type: text/plain, Size: 7999 bytes --]

--- linux-2.6.20-rc5/drivers/ata/sata_nv.c	2007-01-12 23:18:08.000000000 -0600
+++ linux-2.6.20-rc5edit/drivers/ata/sata_nv.c	2007-01-14 20:08:35.000000000 -0600
@@ -646,53 +646,64 @@ static unsigned int nv_adma_tf_to_cpb(st
 	return idx;
 }
 
-static void nv_adma_check_cpb(struct ata_port *ap, int cpb_num, int force_err)
+static int nv_adma_check_cpb(struct ata_port *ap, int cpb_num, int force_err)
 {
 	struct nv_adma_port_priv *pp = ap->private_data;
-	int complete = 0, have_err = 0;
 	u8 flags = pp->cpb[cpb_num].resp_flags;
 
 	VPRINTK("CPB %d, flags=0x%x\n", cpb_num, flags);
 
-	if (flags & NV_CPB_RESP_DONE) {
-		VPRINTK("CPB flags done, flags=0x%x\n", flags);
-		complete = 1;
-	}
-	if (flags & NV_CPB_RESP_ATA_ERR) {
-		ata_port_printk(ap, KERN_ERR, "CPB flags ATA err, flags=0x%x\n", flags);
-		have_err = 1;
-		complete = 1;
-	}
-	if (flags & NV_CPB_RESP_CMD_ERR) {
-		ata_port_printk(ap, KERN_ERR, "CPB flags CMD err, flags=0x%x\n", flags);
-		have_err = 1;
-		complete = 1;
-	}
-	if (flags & NV_CPB_RESP_CPB_ERR) {
-		ata_port_printk(ap, KERN_ERR, "CPB flags CPB err, flags=0x%x\n", flags);
-		have_err = 1;
-		complete = 1;
+	if(unlikely((force_err ||
+		     flags & (NV_CPB_RESP_ATA_ERR |
+			      NV_CPB_RESP_CMD_ERR |
+			      NV_CPB_RESP_CPB_ERR)))) {
+		struct ata_eh_info *ehi = &ap->eh_info;
+		int freeze = 0;
+		ata_ehi_clear_desc(ehi);
+		ata_ehi_push_desc(ehi, "CPB resp_flags 0x%x", flags );
+		if(flags & NV_CPB_RESP_ATA_ERR) {
+			ata_ehi_push_desc(ehi, ": ATA error");
+			ehi->err_mask |= AC_ERR_DEV;
+		}
+		else if(flags & NV_CPB_RESP_CMD_ERR) {
+			ata_ehi_push_desc(ehi, ": CMD error");
+			ehi->err_mask |= AC_ERR_DEV;
+		}
+		else if(flags & NV_CPB_RESP_CPB_ERR) {
+			ata_ehi_push_desc(ehi, ": CPB error");
+			ehi->err_mask |= AC_ERR_SYSTEM;
+			freeze = 1;
+		}
+		else {
+			/* notifier error, but no error in CPB flags? */
+			ehi->err_mask |= AC_ERR_OTHER;
+			freeze = 1;
+		}
+		/* Kill all commands. EH will determine what actually failed. */
+		if(freeze)
+			ata_port_freeze(ap);
+		else
+			ata_port_abort(ap);
+		return 1;
 	}
-	if(complete || force_err)
-	{
+		
+	if (flags & NV_CPB_RESP_DONE) {
 		struct ata_queued_cmd *qc = ata_qc_from_tag(ap, cpb_num);
+		VPRINTK("CPB flags done, flags=0x%x\n", flags);
 		if(likely(qc)) {
-			u8 ata_status = 0;
-			/* Only use the ATA port status for non-NCQ commands.
+			/* Grab the ATA port status for non-NCQ commands.
 			   For NCQ commands the current status may have nothing to do with
 			   the command just completed. */
-			if(qc->tf.protocol != ATA_PROT_NCQ)
-				ata_status = readb(pp->ctl_block + (ATA_REG_STATUS * 4));
-
-			if(have_err || force_err)
-				ata_status |= ATA_ERR;
-
-			qc->err_mask |= ac_err_mask(ata_status);
+			if(qc->tf.protocol != ATA_PROT_NCQ) {
+				u8 ata_status = readb(pp->ctl_block + (ATA_REG_STATUS * 4));
+				qc->err_mask |= ac_err_mask(ata_status);
+			}
 			DPRINTK("Completing qc from tag %d with err_mask %u\n",cpb_num,
 				qc->err_mask);
 			ata_qc_complete(qc);
 		}
 	}
+	return 0;
 }
 
 static int nv_host_intr(struct ata_port *ap, u8 irq_stat)
@@ -743,7 +754,6 @@ static irqreturn_t nv_adma_interrupt(int
 			void __iomem *mmio = pp->ctl_block;
 			u16 status;
 			u32 gen_ctl;
-			int have_global_err = 0;
 			u32 notifier, notifier_error;
 
 			/* if in ATA register mode, use standard ata interrupt handler */
@@ -774,42 +784,50 @@ static irqreturn_t nv_adma_interrupt(int
 			readw(mmio + NV_ADMA_STAT); /* flush posted write */
 			rmb();
 
-			/* freeze if hotplugged */
-			if (unlikely(status & (NV_ADMA_STAT_HOTPLUG | NV_ADMA_STAT_HOTUNPLUG))) {
-				ata_port_printk(ap, KERN_NOTICE, "Hotplug event, freezing\n");
+			handled++; /* irq handled if we got here */
+
+			/* freeze if hotplugged or controller error */
+			if (unlikely(status & (NV_ADMA_STAT_HOTPLUG |
+					       NV_ADMA_STAT_HOTUNPLUG |
+					       NV_ADMA_STAT_TIMEOUT))) {
+				struct ata_eh_info *ehi = &ap->eh_info;
+				ata_ehi_clear_desc(ehi);
+				ata_ehi_push_desc(ehi, "ADMA status 0x%08x", status );
+				if(status & NV_ADMA_STAT_TIMEOUT) {
+					ehi->err_mask |= AC_ERR_SYSTEM;
+					ata_ehi_push_desc(ehi, ": timeout");
+				}
+				else if(status & NV_ADMA_STAT_HOTPLUG) {
+					ata_ehi_hotplugged(ehi);
+					ata_ehi_push_desc(ehi, ": hotplug");
+				}
+				else if(status & NV_ADMA_STAT_HOTUNPLUG) {
+					ata_ehi_hotplugged(ehi);
+					ata_ehi_push_desc(ehi, ": hot unplug");
+				}
 				ata_port_freeze(ap);
-				handled++;
 				continue;
 			}
 
-			if (status & NV_ADMA_STAT_TIMEOUT) {
-				ata_port_printk(ap, KERN_ERR, "timeout, stat=0x%x\n", status);
-				have_global_err = 1;
-			}
-			if (status & NV_ADMA_STAT_CPBERR) {
-				ata_port_printk(ap, KERN_ERR, "CPB error, stat=0x%x\n", status);
-				have_global_err = 1;
-			}
-			if ((status & NV_ADMA_STAT_DONE) || have_global_err) {
+			if (status & (NV_ADMA_STAT_DONE |
+				      NV_ADMA_STAT_CPBERR)) {
 				/** Check CPBs for completed commands */
 
 				if(ata_tag_valid(ap->active_tag))
 					/* Non-NCQ command */
-					nv_adma_check_cpb(ap, ap->active_tag, have_global_err ||
-						(notifier_error & (1 << ap->active_tag)));
+					nv_adma_check_cpb(ap, ap->active_tag, 
+						notifier_error & (1 << ap->active_tag));
 				else {
-					int pos;
+					int pos, error = 0;
 					u32 active = ap->sactive;
-					while( (pos = ffs(active)) ) {
+					while( (pos = ffs(active)) && !error) {
 						pos--;
-						nv_adma_check_cpb(ap, pos, have_global_err ||
-							(notifier_error & (1 << pos)) );
+						error = nv_adma_check_cpb(ap, pos,
+							notifier_error & (1 << pos) );
 						active &= ~(1 << pos );
 					}
 				}
 			}
-
-			handled++; /* irq handled if we got here */
 		}
 	}
 	
@@ -1373,13 +1391,7 @@ static int nv_hardreset(struct ata_port 
 	return sata_std_hardreset(ap, &dummy);
 }
 
-static void nv_error_handler(struct ata_port *ap)
-{
-	ata_bmdma_drive_eh(ap, ata_std_prereset, ata_std_softreset,
-			   nv_hardreset, ata_std_postreset);
-}
-
-static void nv_adma_error_handler(struct ata_port *ap)
+static int nv_adma_hardreset(struct ata_port *ap, unsigned int *class)
 {
 	struct nv_adma_port_priv *pp = ap->private_data;
 	if(!(pp->flags & NV_ADMA_PORT_REGISTER_MODE)) {
@@ -1387,28 +1399,9 @@ static void nv_adma_error_handler(struct
 		int i;
 		u16 tmp;
 
-		u32 notifier = readl(mmio + NV_ADMA_NOTIFIER);
-		u32 notifier_error = readl(mmio + NV_ADMA_NOTIFIER_ERROR);
-		u32 gen_ctl = readl(pp->gen_block + NV_ADMA_GEN_CTL);
-		u32 status = readw(mmio + NV_ADMA_STAT);
-
-		ata_port_printk(ap, KERN_ERR, "EH in ADMA mode, notifier 0x%X "
-			"notifier_error 0x%X gen_ctl 0x%X status 0x%X\n",
-			notifier, notifier_error, gen_ctl, status);
-
-		for( i=0;i<NV_ADMA_MAX_CPBS;i++) {
-			struct nv_adma_cpb *cpb = &pp->cpb[i];
-			if( cpb->ctl_flags || cpb->resp_flags )
-				ata_port_printk(ap, KERN_ERR,
-					"CPB %d: ctl_flags 0x%x, resp_flags 0x%x\n",
-					i, cpb->ctl_flags, cpb->resp_flags);
-		}
-
-		/* Push us back into port register mode for error handling. */
+		/* Push us back into port register mode for hardreset. */
 		nv_adma_register_mode(ap);
 
-		ata_port_printk(ap, KERN_ERR, "Resetting port\n");
-
 		/* Mark all of the CPBs as invalid to prevent them from being executed */
 		for( i=0;i<NV_ADMA_MAX_CPBS;i++)
 			pp->cpb[i].ctl_flags &= ~NV_CPB_CTL_CPB_VALID;
@@ -1424,11 +1417,21 @@ static void nv_adma_error_handler(struct
 		writew(tmp & ~NV_ADMA_CTL_CHANNEL_RESET, mmio + NV_ADMA_CTL);
 		readl( mmio + NV_ADMA_CTL );	/* flush posted write */
 	}
+	return nv_hardreset(ap, class);
+}
 
+static void nv_error_handler(struct ata_port *ap)
+{
 	ata_bmdma_drive_eh(ap, ata_std_prereset, ata_std_softreset,
 			   nv_hardreset, ata_std_postreset);
 }
 
+static void nv_adma_error_handler(struct ata_port *ap)
+{
+	ata_bmdma_drive_eh(ap, ata_std_prereset, ata_std_softreset,
+			   nv_adma_hardreset, ata_std_postreset);
+}
+
 static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version = 0;

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-01-15 14:31 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-15  2:40 [PATCH -mm] sata_nv: cleanup ADMA error handling Robert Hancock
2007-01-15 14:28 ` Robert Hancock

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).