LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org,
	Zwane Mwaikambo <zwane@infradead.org>,
	Ashok Raj <ashok.raj@intel.com>, Ingo Molnar <mingo@elte.hu>,
	"Lu, Yinghai" <yinghai.lu@amd.com>,
	Natalie Protasevich <protasnb@gmail.com>, Andi Kleen <ak@suse.de>,
	"Siddha, Suresh B" <suresh.b.siddha@intel.com>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 13/14] x86_64 irq: Safely cleanup an irq after moving it.
Date: Fri, 23 Feb 2007 04:40:58 -0700	[thread overview]
Message-ID: <m1lkipt839.fsf_-_@ebiederm.dsl.xmission.com> (raw)
In-Reply-To: <m1ps81t87h.fsf_-_@ebiederm.dsl.xmission.com> (Eric W. Biederman's message of "Fri, 23 Feb 2007 04:38:26 -0700")


The problem:  After moving an interrupt when is it safe to teardown
the data structures for receiving the interrupt at the old location?

With a normal pci device it is possible to issue a read to a device
to flush all posted writes.  This does not work for the oldest ioapics
because they are on a 3-wire apic bus which is a completely different
data path.  For some more modern ioapics when everything is using
front side bus delivery you can flush interrupts by simply issuing a
read to the ioapic.  For other modern ioapics emperical testing has
shown that this does not work.

So it appears the only reliable way to know the last of the irqs from an
ioapic have been received from before the ioapic was reprogrammed is to
received the first irq from the ioapic from after it was reprogrammed.

Once we know the last irq message has been received from an ioapic
into a local apic we then need to know that irq message has been
processed through the local apics.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/x86_64/kernel/entry.S   |    3 ++
 arch/x86_64/kernel/i8259.c   |    6 +++-
 arch/x86_64/kernel/io_apic.c |   78 +++++++++++++++++++++++++++++++++++++++---
 include/asm-x86_64/hw_irq.h  |    9 ++++-
 4 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 9f5dac6..ed4350c 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -675,6 +675,9 @@ END(invalidate_interrupt\num)
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 END(call_function_interrupt)
+ENTRY(irq_move_cleanup_interrupt)
+	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
+END(irq_move_cleanup_interrupt)
 #endif
 
 ENTRY(apic_timer_interrupt)
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 45d8563..21d95b7 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -450,6 +450,7 @@ void spurious_interrupt(void);
 void error_interrupt(void);
 void reschedule_interrupt(void);
 void call_function_interrupt(void);
+void irq_move_cleanup_interrupt(void);
 void invalidate_interrupt0(void);
 void invalidate_interrupt1(void);
 void invalidate_interrupt2(void);
@@ -537,7 +538,10 @@ void __init init_IRQ(void)
 
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-#endif	
+
+	/* Low priority IPI to cleanup after moving an irq */
+	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+#endif
 	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 	set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 8dede0b..48593f6 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -36,6 +36,7 @@
 #include <acpi/acpi_bus.h>
 #endif
 
+#include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
@@ -49,7 +50,10 @@
 
 struct irq_cfg {
 	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
 	u8 vector;
+	u8 move_in_progress : 1;
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -652,7 +656,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-	cpumask_t old_mask = CPU_MASK_NONE;
 	unsigned int old_vector;
 	int cpu;
 	struct irq_cfg *cfg;
@@ -663,18 +666,20 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	/* Only try and allocate irqs on cpus that are present */
 	cpus_and(mask, mask, cpu_online_map);
 
+	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+		return -EBUSY;
+
 	old_vector = cfg->vector;
 	if (old_vector) {
 		cpumask_t tmp;
 		cpus_and(tmp, cfg->domain, mask);
 		if (!cpus_empty(tmp))
 			return 0;
-		cpus_and(old_mask, cfg->domain, cpu_online_map);
 	}
 
 	for_each_cpu_mask(cpu, mask) {
 		cpumask_t domain, new_mask;
-		int new_cpu, old_cpu;
+		int new_cpu;
 		int vector, offset;
 
 		domain = vector_allocation_domain(cpu);
@@ -699,8 +704,10 @@ next:
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		for_each_cpu_mask(old_cpu, old_mask)
-			per_cpu(vector_irq, old_cpu)[old_vector] = -1;
+		if (old_vector) {
+			cfg->move_in_progress = 1;
+			cfg->old_domain = cfg->domain;
+		}
 		for_each_cpu_mask(new_cpu, new_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
@@ -1360,8 +1367,68 @@ static int ioapic_retrigger_irq(unsigned int irq)
  * races.
  */
 
+#ifdef CONFIG_SMP
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+	ack_APIC_irq();
+	exit_idle();
+	irq_enter();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		unsigned int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+		if (irq >= NR_IRQS)
+			continue;
+
+		desc = irq_desc + irq;
+		cfg = irq_cfg + irq;
+		spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+			goto unlock;
+
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cfg->move_cleanup_count--;
+unlock:
+		spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned vector, me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	vector = ~get_irq_regs()->orig_rax;
+	me = smp_processor_id();
+	if ((vector == cfg->vector) &&
+	    cpu_isset(smp_processor_id(), cfg->domain)) {
+		cpumask_t cleanup_mask;
+
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+}
+#else
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
 static void ack_apic_edge(unsigned int irq)
 {
+	irq_complete_move(irq);
 	move_native_irq(irq);
 	ack_APIC_irq();
 }
@@ -1370,6 +1437,7 @@ static void ack_apic_level(unsigned int irq)
 {
 	int do_unmask_irq = 0;
 
+	irq_complete_move(irq);
 #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
 	/* If we are moving the irq we need to mask it */
 	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
index dc395ed..2e4b7a5 100644
--- a/include/asm-x86_64/hw_irq.h
+++ b/include/asm-x86_64/hw_irq.h
@@ -32,10 +32,15 @@
 #define IA32_SYSCALL_VECTOR	0x80
 
 
+/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
+ * cleanup after irq migration.
+ */
+#define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
+ 
 /*
  * Vectors 0x20-0x2f are used for ISA interrupts.
  */
-#define IRQ0_VECTOR		FIRST_EXTERNAL_VECTOR
+#define IRQ0_VECTOR		FIRST_EXTERNAL_VECTOR + 0x10
 #define IRQ1_VECTOR		IRQ0_VECTOR + 1
 #define IRQ2_VECTOR		IRQ0_VECTOR + 2
 #define IRQ3_VECTOR		IRQ0_VECTOR + 3
@@ -82,7 +87,7 @@
 
 /*
  * First APIC vector available to drivers: (vectors 0x30-0xee)
- * we start at 0x31 to spread out vectors evenly between priority
+ * we start at 0x41 to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
  */
 #define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
-- 
1.5.0.g53756


  reply	other threads:[~2007-02-23 11:42 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <200701221116.13154.luigi.genoni@pirelli.com>
2007-01-22 17:14 ` System crash after "No irq handler for vector" linux 2.6.19 Eric W. Biederman
     [not found]   ` <200701231051.32945.luigi.genoni@pirelli.com>
2007-01-23 12:18     ` Eric W. Biederman
     [not found]       ` <Pine.LNX.4.64.0701232052330.32111@baldios.it.pirelli.com>
2007-01-31  8:39         ` Eric W. Biederman
     [not found]           ` <200701311549.22512.luigi.genoni@pirelli.com>
2007-02-01  5:56             ` [PATCH] x86_64: Survive having no irq mapping for a vector Eric W. Biederman
2007-02-01  5:59             ` System crash after "No irq handler for vector" linux 2.6.19 Eric W. Biederman
2007-02-01  7:20             ` Eric W. Biederman
     [not found]               ` <200702021848.55921.luigi.genoni@pirelli.com>
2007-02-02 18:02                 ` Eric W. Biederman
     [not found]                   ` <200702021905.39922.luigi.genoni@pirelli.com>
2007-02-02 18:32                     ` Eric W. Biederman
2007-02-03  0:31                     ` [PATCH 1/2] x86_64 irq: Simplfy __assign_irq_vector Eric W. Biederman
2007-02-03  0:35                       ` [PATCH 2/2] x86_64 irq: Handle irqs pending in IRR during irq migration Eric W. Biederman
2007-02-03  1:05                         ` Andrew Morton
2007-02-03  1:39                           ` Eric W. Biederman
2007-02-03  2:01                             ` Andrew Morton
2007-02-03  7:32                           ` Arjan van de Ven
2007-02-03  7:55                             ` Eric W. Biederman
2007-02-03 14:31                               ` l.genoni
2007-02-03 10:01                         ` Andi Kleen
2007-02-03 10:22                           ` Eric W. Biederman
2007-02-03 10:26                             ` Andi Kleen
2007-02-06  7:36                         ` Ingo Molnar
2007-02-06  8:57                           ` Eric W. Biederman
     [not found]                           ` <200702061012.25910.luigi.genoni@pirelli.com>
2007-02-06 22:05                             ` Eric W. Biederman
2007-02-06 22:16                           ` Eric W. Biederman
2007-02-06 22:25                             ` Ingo Molnar
2007-02-07  2:33                               ` Eric W. Biederman
2007-02-08 11:48                               ` Eric W. Biederman
2007-02-08 20:19                                 ` Eric W. Biederman
2007-02-09  6:40                                   ` Eric W. Biederman
2007-02-10 23:52                                     ` What are the real ioapic rte programming constraints? Eric W. Biederman
2007-02-11  5:57                                       ` Zwane Mwaikambo
2007-02-11 10:20                                         ` Eric W. Biederman
2007-02-11 16:16                                           ` Zwane Mwaikambo
2007-02-11 22:01                                             ` Eric W. Biederman
2007-02-12  1:05                                               ` Zwane Mwaikambo
2007-02-12  4:51                                                 ` Eric W. Biederman
2007-02-23 10:51                                                   ` Conclusions from my investigation about ioapic programming Eric W. Biederman
2007-02-23 11:10                                                     ` [PATCH 0/14] x86_64 irq related fixes and cleanups Eric W. Biederman
2007-02-23 11:11                                                       ` [PATCH 01/14] x86_64 irq: Simplfy __assign_irq_vector Eric W. Biederman
2007-02-23 11:13                                                         ` [PATCH 02/14] irq: Remove set_native_irq_info Eric W. Biederman
2007-02-23 11:15                                                           ` [PATCH 03/14] x86_64 irq: Kill declaration of removed array, interrupt Eric W. Biederman
2007-02-23 11:16                                                             ` [PATCH 04/14] x86_64 irq: Remove the unused vector parameter from ioapic_register_intr Eric W. Biederman
2007-02-23 11:19                                                               ` [PATCH 05/14] x86_64 irq: Refactor setup_IO_APIC_irq Eric W. Biederman
2007-02-23 11:20                                                                 ` [PATCH 06/14] x86_64 irq: Simplfiy the set_affinity logic Eric W. Biederman
2007-02-23 11:23                                                                   ` [PATCH 07/14] x86_64 irq: In __DO_ACTION perform the FINAL action for every entry Eric W. Biederman
2007-02-23 11:26                                                                     ` [PATCH 08/14] x86_64 irq: Use NR_IRQS not NR_IRQ_VECTORS Eric W. Biederman
2007-02-23 11:32                                                                       ` [PATCH 09/14] x86_64 irq: Begin consolidating per_irq data in structures Eric W. Biederman
2007-02-23 11:35                                                                         ` [PATCH 10/14] x86_64 irq: Simplify assign_irq_vector's arguments Eric W. Biederman
2007-02-23 11:36                                                                           ` [PATCH 11/14] x86_64 irq: Remove unnecessary irq 0 setup Eric W. Biederman
2007-02-23 11:38                                                                             ` [PATCH 12/14] x86_64 irq: Add constants for the reserved IRQ vectors Eric W. Biederman
2007-02-23 11:40                                                                               ` Eric W. Biederman [this message]
2007-02-25 11:53                                                                                 ` [PATCH 13/14] x86_64 irq: Safely cleanup an irq after moving it Mika Penttilä
2007-02-25 12:09                                                                                   ` Eric W. Biederman
2007-02-23 11:46                                                                               ` [PATCH 14/14] genirq: Mask irqs when migrating them Eric W. Biederman
2007-02-23 12:01                                                                                 ` [PATCH] x86_64 irq: Document what works and why on ioapics Eric W. Biederman
2007-02-24  2:06                                                                                 ` [PATCH 14/14] genirq: Mask irqs when migrating them Siddha, Suresh B
2007-02-27 20:26                                                                                   ` Andrew Morton
2007-02-27 20:41                                                                                     ` Eric W. Biederman
2007-02-25 10:43                                                                               ` [PATCH 12/14] x86_64 irq: Add constants for the reserved IRQ vectors Pavel Machek
2007-02-25 11:15                                                                                 ` Eric W. Biederman
2007-02-25 19:48                                                                                   ` Pavel Machek
2007-02-25 21:01                                                                                     ` Eric W. Biederman
2007-02-25 21:13                                                                                       ` Pavel Machek
2007-02-23 16:48                                                     ` Conclusions from my investigation about ioapic programming Jeff V. Merkey
2007-02-23 18:10                                                       ` Eric W. Biederman
2007-02-23 17:48                                                         ` Jeff V. Merkey
2007-02-24  4:05                                                           ` Eric W. Biederman
2007-02-24  5:44                                                             ` Jeffrey V. Merkey
2007-02-23 17:48                                                         ` Jeff V. Merkey
     [not found]                                           ` <32209efe0702111212j77f5011xe2430cb13c13686@mail.gmail.com>
2007-02-11 21:36                                             ` What are the real ioapic rte programming constraints? Eric W. Biederman
2007-02-03  9:50                       ` [PATCH 1/2] x86_64 irq: Simplfy __assign_irq_vector Andi Kleen
2007-02-03  0:40                     ` System crash after "No irq handler for vector" linux 2.6.19 Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m1lkipt839.fsf_-_@ebiederm.dsl.xmission.com \
    --to=ebiederm@xmission.com \
    --cc=ak@suse.de \
    --cc=akpm@osdl.org \
    --cc=ashok.raj@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=protasnb@gmail.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=torvalds@linux-foundation.org \
    --cc=yinghai.lu@amd.com \
    --cc=zwane@infradead.org \
    --subject='Re: [PATCH 13/14] x86_64 irq: Safely cleanup an irq after moving it.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).