LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
To: iommu@lists.linux-foundation.org,
	LKML <linux-kernel@vger.kernel.org>,
	Joerg Roedel <joro@8bytes.org>,
	David Woodhouse <dwmw2@infradead.org>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Alex Williamson <alex.williamson@redhat.com>,
	Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>,
	"Liu, Yi L" <yi.l.liu@intel.com>,
	"Tian, Kevin" <kevin.tian@intel.com>,
	Raj Ashok <ashok.raj@intel.com>,
	Jean Delvare <khali@linux-fr.org>,
	"Christoph Hellwig" <hch@infradead.org>,
	"Lu Baolu" <baolu.lu@linux.intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	Liu, Yi L <yi.l.liu@linux.intel.com>
Subject: [PATCH v5 17/23] iommu/vt-d: report non-recoverable faults to device
Date: Fri, 11 May 2018 13:54:09 -0700	[thread overview]
Message-ID: <1526072055-86990-18-git-send-email-jacob.jun.pan@linux.intel.com> (raw)
In-Reply-To: <1526072055-86990-1-git-send-email-jacob.jun.pan@linux.intel.com>

Currently, dmar fault IRQ handler does nothing more than rate
limited printk, no critical hardware handling need to be done
in IRQ context.
For some use case such as vIOMMU, it might be useful to report
non-recoverable faults outside host IOMMU subsystem. DMAR fault
can come from both DMA and interrupt remapping which has to be
set up early before threaded IRQ is available.
This patch adds an option and a workqueue such that when faults
are requested, DMAR fault IRQ handler can use the IOMMU fault
reporting API to report.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
---
 drivers/iommu/dmar.c        | 159 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/iommu/intel-iommu.c |   6 +-
 include/linux/dmar.h        |   2 +-
 include/linux/intel-iommu.h |   1 +
 4 files changed, 159 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 0b5b052..ef846e3 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1110,6 +1110,12 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	return err;
 }
 
+static inline void dmar_free_fault_wq(struct intel_iommu *iommu)
+{
+	if (iommu->fault_wq)
+		destroy_workqueue(iommu->fault_wq);
+}
+
 static void free_iommu(struct intel_iommu *iommu)
 {
 	if (intel_iommu_enabled) {
@@ -1126,6 +1132,7 @@ static void free_iommu(struct intel_iommu *iommu)
 		free_irq(iommu->irq, iommu);
 		dmar_free_hwirq(iommu->irq);
 		iommu->irq = 0;
+		dmar_free_fault_wq(iommu);
 	}
 
 	if (iommu->qi) {
@@ -1554,6 +1561,31 @@ static const char *irq_remap_fault_reasons[] =
 	"Blocked an interrupt request due to source-id verification failure",
 };
 
+/* fault data and status */
+enum intel_iommu_fault_reason {
+	INTEL_IOMMU_FAULT_REASON_SW,
+	INTEL_IOMMU_FAULT_REASON_ROOT_NOT_PRESENT,
+	INTEL_IOMMU_FAULT_REASON_CONTEXT_NOT_PRESENT,
+	INTEL_IOMMU_FAULT_REASON_CONTEXT_INVALID,
+	INTEL_IOMMU_FAULT_REASON_BEYOND_ADDR_WIDTH,
+	INTEL_IOMMU_FAULT_REASON_PTE_WRITE_ACCESS,
+	INTEL_IOMMU_FAULT_REASON_PTE_READ_ACCESS,
+	INTEL_IOMMU_FAULT_REASON_NEXT_PT_INVALID,
+	INTEL_IOMMU_FAULT_REASON_ROOT_ADDR_INVALID,
+	INTEL_IOMMU_FAULT_REASON_CONTEXT_PTR_INVALID,
+	INTEL_IOMMU_FAULT_REASON_NONE_ZERO_RTP,
+	INTEL_IOMMU_FAULT_REASON_NONE_ZERO_CTP,
+	INTEL_IOMMU_FAULT_REASON_NONE_ZERO_PTE,
+	NR_INTEL_IOMMU_FAULT_REASON,
+};
+
+/* fault reasons that are allowed to be reported outside IOMMU subsystem */
+#define INTEL_IOMMU_FAULT_REASON_ALLOWED			\
+	((1ULL << INTEL_IOMMU_FAULT_REASON_BEYOND_ADDR_WIDTH) |	\
+		(1ULL << INTEL_IOMMU_FAULT_REASON_PTE_WRITE_ACCESS) |	\
+		(1ULL << INTEL_IOMMU_FAULT_REASON_PTE_READ_ACCESS))
+
+
 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 {
 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
@@ -1634,11 +1666,91 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
+static enum iommu_fault_reason to_iommu_fault_reason(u8 reason)
+{
+	if (reason >= NR_INTEL_IOMMU_FAULT_REASON) {
+		pr_warn("unknown DMAR fault reason %d\n", reason);
+		return IOMMU_FAULT_REASON_UNKNOWN;
+	}
+	switch (reason) {
+	case INTEL_IOMMU_FAULT_REASON_SW:
+	case INTEL_IOMMU_FAULT_REASON_ROOT_NOT_PRESENT:
+	case INTEL_IOMMU_FAULT_REASON_CONTEXT_NOT_PRESENT:
+	case INTEL_IOMMU_FAULT_REASON_CONTEXT_INVALID:
+	case INTEL_IOMMU_FAULT_REASON_BEYOND_ADDR_WIDTH:
+	case INTEL_IOMMU_FAULT_REASON_ROOT_ADDR_INVALID:
+	case INTEL_IOMMU_FAULT_REASON_CONTEXT_PTR_INVALID:
+		return IOMMU_FAULT_REASON_INTERNAL;
+	case INTEL_IOMMU_FAULT_REASON_NEXT_PT_INVALID:
+	case INTEL_IOMMU_FAULT_REASON_PTE_WRITE_ACCESS:
+	case INTEL_IOMMU_FAULT_REASON_PTE_READ_ACCESS:
+		return IOMMU_FAULT_REASON_PERMISSION;
+	default:
+		return IOMMU_FAULT_REASON_UNKNOWN;
+	}
+}
+
+struct dmar_fault_work {
+	struct work_struct fault_work;
+	struct intel_iommu *iommu;
+	u64 addr;
+	int type;
+	int fault_type;
+	enum intel_iommu_fault_reason reason;
+	u16 sid;
+};
+
+static void report_fault_to_device(struct work_struct *work)
+{
+	struct dmar_fault_work *dfw = container_of(work, struct dmar_fault_work,
+						fault_work);
+	struct iommu_fault_event event;
+	struct pci_dev *pdev;
+	u8 bus, devfn;
+
+	memset(&event, 0, sizeof(struct iommu_fault_event));
+
+	/* check if fault reason is permitted to report outside IOMMU */
+	if (!((1 << dfw->reason) & INTEL_IOMMU_FAULT_REASON_ALLOWED)) {
+		pr_debug("Fault reason %d not allowed to report to device\n",
+			dfw->reason);
+		goto free_work;
+	}
+
+	bus = PCI_BUS_NUM(dfw->sid);
+	devfn = PCI_DEVFN(PCI_SLOT(dfw->sid), PCI_FUNC(dfw->sid));
+	/*
+	 * we need to check if the fault reporting is requested for the
+	 * offending device.
+	 */
+	pdev = pci_get_domain_bus_and_slot(dfw->iommu->segment, bus, devfn);
+	if (!pdev) {
+		pr_warn("No PCI device found for source ID %x\n", dfw->sid);
+		goto free_work;
+	}
+	/*
+	 * unrecoverable fault is reported per IOMMU, notifier handler can
+	 * resolve PCI device based on source ID.
+	 */
+	event.reason = to_iommu_fault_reason(dfw->reason);
+	event.addr = dfw->addr;
+	event.type = IOMMU_FAULT_DMA_UNRECOV;
+	event.prot = dfw->type ? IOMMU_READ : IOMMU_WRITE;
+	dev_warn(&pdev->dev, "report device unrecoverable fault: %d, %x, %d\n",
+		event.reason, dfw->sid, event.type);
+	iommu_report_device_fault(&pdev->dev, &event);
+	pci_dev_put(pdev);
+
+free_work:
+	kfree(dfw);
+}
+
 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 		u8 fault_reason, u16 source_id, unsigned long long addr)
 {
 	const char *reason;
 	int fault_type;
+	struct dmar_fault_work *dfw;
 
 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
@@ -1647,11 +1759,29 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 			source_id >> 8, PCI_SLOT(source_id & 0xFF),
 			PCI_FUNC(source_id & 0xFF), addr >> 48,
 			fault_reason, reason);
-	else
+	else {
 		pr_err("[%s] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02d] %s\n",
 		       type ? "DMA Read" : "DMA Write",
 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
 		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+	}
+
+	dfw = kmalloc(sizeof(*dfw), GFP_ATOMIC);
+	if (!dfw)
+		return -ENOMEM;
+
+	INIT_WORK(&dfw->fault_work, report_fault_to_device);
+	dfw->addr = addr;
+	dfw->type = type;
+	dfw->fault_type = fault_type;
+	dfw->reason = fault_reason;
+	dfw->sid = source_id;
+	dfw->iommu = iommu;
+	if (!queue_work(iommu->fault_wq, &dfw->fault_work)) {
+		kfree(dfw);
+		return -EBUSY;
+	}
+
 	return 0;
 }
 
@@ -1731,10 +1861,28 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-int dmar_set_interrupt(struct intel_iommu *iommu)
+static int dmar_set_fault_wq(struct intel_iommu *iommu)
+{
+	if (iommu->fault_wq)
+		return 0;
+
+	iommu->fault_wq = alloc_ordered_workqueue(iommu->name, 0);
+	if (!iommu->fault_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int dmar_set_interrupt(struct intel_iommu *iommu, bool queue_fault)
 {
 	int irq, ret;
 
+	/* fault can be reported back to device drivers via a wq */
+	if (queue_fault) {
+		ret = dmar_set_fault_wq(iommu);
+		if (ret)
+			pr_err("Failed to create fault handling workqueue\n");
+	}
 	/*
 	 * Check if the fault interrupt is already initialized.
 	 */
@@ -1748,10 +1896,11 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
 		pr_err("No free IRQ vectors\n");
 		return -EINVAL;
 	}
-
 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
-	if (ret)
+	if (ret) {
 		pr_err("Can't request irq\n");
+		dmar_free_fault_wq(iommu);
+	}
 	return ret;
 }
 
@@ -1765,7 +1914,7 @@ int __init enable_drhd_fault_handling(void)
 	 */
 	for_each_iommu(iommu, drhd) {
 		u32 fault_status;
-		int ret = dmar_set_interrupt(iommu);
+		int ret = dmar_set_interrupt(iommu, false);
 
 		if (ret) {
 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 684bd98..3949b3cf 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3401,10 +3401,10 @@ static int __init init_dmars(void)
 				goto free_iommu;
 		}
 #endif
-		ret = dmar_set_interrupt(iommu);
+		ret = dmar_set_interrupt(iommu, true);
+
 		if (ret)
 			goto free_iommu;
-
 		if (!translation_pre_enabled(iommu))
 			iommu_enable_translation(iommu);
 
@@ -4291,7 +4291,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 			goto disable_iommu;
 	}
 #endif
-	ret = dmar_set_interrupt(iommu);
+	ret = dmar_set_interrupt(iommu, true);
 	if (ret)
 		goto disable_iommu;
 
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e2433bc..21f2162 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -278,7 +278,7 @@ extern void dmar_msi_unmask(struct irq_data *data);
 extern void dmar_msi_mask(struct irq_data *data);
 extern void dmar_msi_read(int irq, struct msi_msg *msg);
 extern void dmar_msi_write(int irq, struct msi_msg *msg);
-extern int dmar_set_interrupt(struct intel_iommu *iommu);
+extern int dmar_set_interrupt(struct intel_iommu *iommu, bool queue_fault);
 extern irqreturn_t dmar_fault(int irq, void *dev_id);
 extern int dmar_alloc_hwirq(int id, int node, void *arg);
 extern void dmar_free_hwirq(int irq);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 5ac0c28..b3a26c7 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -472,6 +472,7 @@ struct intel_iommu {
 	struct iommu_device iommu;  /* IOMMU core code handle */
 	int		node;
 	u32		flags;      /* Software defined flags */
+	struct workqueue_struct *fault_wq; /* Reporting IOMMU fault to device */
 };
 
 /* PCI domain-device relationship */
-- 
2.7.4

  parent reply	other threads:[~2018-05-11 20:54 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-11 20:53 [PATCH v5 00/23] IOMMU and VT-d driver support for Shared Virtual Address (SVA) Jacob Pan
2018-05-11 20:53 ` [PATCH v5 01/23] iommu: introduce bind_pasid_table API function Jacob Pan
2018-08-23 16:34   ` Auger Eric
2018-08-24 12:47     ` Liu, Yi L
2018-08-24 13:20       ` Auger Eric
2018-08-28 17:04         ` Jacob Pan
2018-08-24 15:00   ` Auger Eric
2018-08-28  5:14     ` Jacob Pan
2018-08-28  8:34       ` Auger Eric
2018-08-28 16:36         ` Jacob Pan
2018-05-11 20:53 ` [PATCH v5 02/23] iommu/vt-d: move device_domain_info to header Jacob Pan
2018-05-11 20:53 ` [PATCH v5 03/23] iommu/vt-d: add a flag for pasid table bound status Jacob Pan
2018-05-13  7:33   ` Lu Baolu
2018-05-14 18:51     ` Jacob Pan
2018-05-13  8:01   ` Lu Baolu
2018-05-14 18:52     ` Jacob Pan
2018-05-11 20:53 ` [PATCH v5 04/23] iommu/vt-d: add bind_pasid_table function Jacob Pan
2018-05-13  9:29   ` Lu Baolu
2018-05-14 20:22     ` Jacob Pan
2018-05-11 20:53 ` [PATCH v5 05/23] iommu: introduce iommu invalidate API function Jacob Pan
2018-05-11 20:53 ` [PATCH v5 06/23] iommu/vt-d: add definitions for PFSID Jacob Pan
2018-05-14  1:36   ` Lu Baolu
2018-05-14 20:30     ` Jacob Pan
2018-05-11 20:53 ` [PATCH v5 07/23] iommu/vt-d: fix dev iotlb pfsid use Jacob Pan
2018-05-14  1:52   ` Lu Baolu
2018-05-14 20:38     ` Jacob Pan
2018-05-11 20:54 ` [PATCH v5 08/23] iommu/vt-d: support flushing more translation cache types Jacob Pan
2018-05-14  2:18   ` Lu Baolu
2018-05-14 20:46     ` Jacob Pan
2018-05-17  8:44   ` kbuild test robot
2018-05-11 20:54 ` [PATCH v5 09/23] iommu/vt-d: add svm/sva invalidate function Jacob Pan
2018-05-14  3:35   ` Lu Baolu
2018-05-14 20:49     ` Jacob Pan
2018-05-11 20:54 ` [PATCH v5 10/23] iommu: introduce device fault data Jacob Pan
2018-09-21 10:07   ` Auger Eric
2018-09-21 17:05     ` Jacob Pan
2018-09-26 10:20       ` Auger Eric
2018-05-11 20:54 ` [PATCH v5 11/23] driver core: add per device iommu param Jacob Pan
2018-05-14  5:27   ` Lu Baolu
2018-05-14 20:52     ` Jacob Pan
2018-05-11 20:54 ` [PATCH v5 12/23] iommu: add a timeout parameter for prq response Jacob Pan
2018-05-11 20:54 ` [PATCH v5 13/23] iommu: introduce device fault report API Jacob Pan
2018-05-14  6:01   ` Lu Baolu
2018-05-14 20:55     ` Jacob Pan
2018-05-15  6:52       ` Lu Baolu
2018-05-17 11:41   ` Liu, Yi L
2018-05-17 15:59     ` Jacob Pan
2018-05-17 23:22       ` Liu, Yi L
2018-05-21 23:03         ` Jacob Pan
2018-09-06  9:25   ` Auger Eric
2018-09-06 12:42     ` Jean-Philippe Brucker
2018-09-06 13:14       ` Auger Eric
2018-09-06 17:06         ` Jean-Philippe Brucker
2018-09-07  7:11           ` Auger Eric
2018-09-07 11:23             ` Jean-Philippe Brucker
2018-09-14 13:24   ` Auger Eric
2018-09-17 16:57     ` Jacob Pan
2018-09-25 14:58   ` Jean-Philippe Brucker
2018-09-25 22:17     ` Jacob Pan
2018-09-26 10:14       ` Jean-Philippe Brucker
2018-05-11 20:54 ` [PATCH v5 14/23] iommu: introduce page response function Jacob Pan
2018-05-14  6:39   ` Lu Baolu
2018-05-29 16:13     ` Jacob Pan
2018-09-10 14:52   ` Auger Eric
2018-09-10 17:50     ` Jacob Pan
2018-09-10 19:06       ` Auger Eric
2018-05-11 20:54 ` [PATCH v5 15/23] iommu: handle page response timeout Jacob Pan
2018-05-14  7:43   ` Lu Baolu
2018-05-29 16:20     ` Jacob Pan
2018-05-30  7:46       ` Lu Baolu
2018-05-11 20:54 ` [PATCH v5 16/23] iommu/config: add build dependency for dmar Jacob Pan
2018-05-11 20:54 ` Jacob Pan [this message]
2018-05-14  8:17   ` [PATCH v5 17/23] iommu/vt-d: report non-recoverable faults to device Lu Baolu
2018-05-29 17:33     ` Jacob Pan
2018-05-11 20:54 ` [PATCH v5 18/23] iommu/intel-svm: report device page request Jacob Pan
2018-05-11 20:54 ` [PATCH v5 19/23] iommu/intel-svm: replace dev ops with fault report API Jacob Pan
2018-05-11 20:54 ` [PATCH v5 20/23] iommu/intel-svm: do not flush iotlb for viommu Jacob Pan
2018-05-11 20:54 ` [PATCH v5 21/23] iommu/vt-d: add intel iommu page response function Jacob Pan
2018-05-11 20:54 ` [PATCH v5 22/23] trace/iommu: add sva trace events Jacob Pan
2018-05-11 20:54 ` [PATCH v5 23/23] iommu: use sva invalidate and device fault trace event Jacob Pan
2018-05-29 15:54 ` [PATCH v5 00/23] IOMMU and VT-d driver support for Shared Virtual Address (SVA) Jacob Pan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1526072055-86990-18-git-send-email-jacob.jun.pan@linux.intel.com \
    --to=jacob.jun.pan@linux.intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=ashok.raj@intel.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=dwmw2@infradead.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@infradead.org \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jean-philippe.brucker@arm.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=khali@linux-fr.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=yi.l.liu@intel.com \
    --cc=yi.l.liu@linux.intel.com \
    --subject='Re: [PATCH v5 17/23] iommu/vt-d: report non-recoverable faults to device' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).