LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC 8/8] x86_64: Support for new UV apic
@ 2008-03-24 18:21 Jack Steiner
  2008-03-25 10:25 ` Andi Kleen
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Jack Steiner @ 2008-03-24 18:21 UTC (permalink / raw)
  To: mingo, tglx; +Cc: linux-mm, linux-kernel


UV supports really big systems. So big, in fact, that the APICID register
does not contain enough bits to contain an APICID that is unique across all
cpus.

The UV BIOS supports 3 APICID modes:

	- legacy mode. This mode uses the old APIC mode where
	  APICID is in bits [31:24] of the APICID register.

	- x2apic mode. This mode is whitebox-compatible. APICIDs
	  are unique across all cpus. Standard x2apic APIC operations
	  (Intel-defined) can be used for IPIs. The node identifier
	  fits within the Intel-defined portion of the APICID register.

	- x2apic-uv mode. In this mode, the APICIDs on each node have
	  unique IDs, but IDs on different node are not unique. For example,
	  if each mode has 32 cpus, the APICIDs on each node might be
	  0 - 31. Every node has the same set of IDs.
	  The UV hub is used to route IPIs/interrupts to the correct node.
	  Traditional APIC operations WILL NOT WORK.

In x2apic-uv mode, the ACPI tables all contain a full unique ID (note:
exact bit layout still changing but the following is close):
	
	nnnnnnnnnnlc0cch
		n = unique node number
		l = socket number on board
		c = core
		h = hyperthread
		
Only the "lc0cch" bits are written to the APICID register. The remaining bits are
supplied by having the get_apic_id() function "OR" the extra bits into the value
read from the APICID register. (Hmmm.. why not keep the ENTIRE APICID register
in per-cpu data....)

The x2apic-uv mode is recognized by the MADT table containing:
	  oem_id = "SGI"
	  oem_table_id = "UV-X"
	

(NOTE: a work-in-progress. Pieces missing....)


	Signed-off-by: Jack Steiner <steiner@sgi.com>

---
 arch/x86/kernel/Makefile         |    2 
 arch/x86/kernel/genapic_64.c     |   15 +
 arch/x86/kernel/genx2apic_uv_x.c |  305 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup64.c        |    4 
 arch/x86/kernel/smpboot_64.c     |    7 
 include/asm-x86/genapic_64.h     |    5 
 6 files changed, 335 insertions(+), 3 deletions(-)

Index: linux/arch/x86/kernel/genapic_64.c
===================================================================
--- linux.orig/arch/x86/kernel/genapic_64.c	2008-03-21 15:37:05.000000000 -0500
+++ linux/arch/x86/kernel/genapic_64.c	2008-03-21 15:49:38.000000000 -0500
@@ -30,6 +30,7 @@ u16 x86_cpu_to_apicid_init[NR_CPUS] __in
 void *x86_cpu_to_apicid_early_ptr;
 DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 struct genapic __read_mostly *genapic = &apic_flat;
 
@@ -40,6 +41,9 @@ static enum uv_system_type uv_system_typ
  */
 void __init setup_apic_routing(void)
 {
+	if (uv_system_type == UV_NON_UNIQUE_APIC)
+		genapic = &apic_x2apic_uv_x;
+	else
 #ifdef CONFIG_ACPI
 	/*
 	 * Quirk: some x86_64 machines can only use physical APIC mode
@@ -69,7 +73,16 @@ void send_IPI_self(int vector)
 
 unsigned int get_apic_id(void)
 {
-	return (apic_read(APIC_ID) >> 24) & 0xFFu;
+	unsigned int id;
+
+	preempt_disable();
+	id = apic_read(APIC_ID);
+	if (uv_system_type >= UV_X2APIC)
+		id  |= __get_cpu_var(x2apic_extra_bits);
+	else
+		id = (id >> 24) & 0xFFu;;
+	preempt_enable();
+	return id;
 }
 
 int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
Index: linux/arch/x86/kernel/genx2apic_uv_x.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/x86/kernel/genx2apic_uv_x.c	2008-03-24 09:21:56.000000000 -0500
@@ -0,0 +1,305 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV APIC functions (note: not an Intel compatible APIC)
+ *
+ * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+#include <asm/uv_mmrs.h>
+#include <asm/uv_hub.h>
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL(__uv_hub_info);
+
+struct uv_blade_info *uv_blade_info;
+EXPORT_SYMBOL_GPL(uv_blade_info);
+
+short *uv_node_to_blade;
+EXPORT_SYMBOL_GPL(uv_node_to_blade);
+
+short *uv_cpu_to_blade;
+EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+
+short uv_possible_blades;
+EXPORT_SYMBOL_GPL(uv_possible_blades);
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+/* Probably incorrect for UV  ZZZ */
+
+static cpumask_t uv_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+static cpumask_t uv_vector_allocation_domain(int cpu)
+{
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
+}
+
+int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+{
+	unsigned long val;
+	int nasid;
+
+	printk(KERN_DEBUG "ZZZZZZZZZZZ send SIPI to apicid 0x%x, start 0x%x\n",
+	       phys_apicid, start_rip);
+	nasid = uv_apicid_to_nasid(phys_apicid);
+	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+	    (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT);
+	printk(KERN_DEBUG "ZZZZZZZZZZZ      nasid %d, val 0x%lx\n", nasid, val);
+	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	return 0;
+}
+
+static void uv_send_IPI_one(int cpu, int vector)
+{
+	unsigned long val, apicid;
+	int nasid;
+
+	apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
+	nasid = uv_apicid_to_nasid(apicid);
+	val =
+	    (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid <<
+					      UVH_IPI_INT_APIC_ID_SHFT) |
+	    (vector << UVH_IPI_INT_VECTOR_SHFT);
+	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	printk(KERN_DEBUG
+	     "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n",
+	     cpu, apicid, vector, nasid, val);
+}
+
+static void uv_send_IPI_mask(cpumask_t mask, int vector)
+{
+	unsigned long flags;
+	unsigned int cpu;
+
+	local_irq_save(flags);
+	for (cpu = 0; cpu < NR_CPUS; ++cpu)
+		if (cpu_isset(cpu, mask))
+			uv_send_IPI_one(cpu, vector);
+	local_irq_restore(flags);
+}
+
+static void uv_send_IPI_allbutself(int vector)
+{
+	cpumask_t mask = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		uv_send_IPI_mask(mask, vector);
+}
+
+static void uv_send_IPI_all(int vector)
+{
+	uv_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int uv_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	else
+		return BAD_APICID;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+	return get_apic_id() >> index_msb;
+}
+
+#ifdef ZZZ
+static void uv_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+#endif
+
+struct genapic apic_x2apic_uv_x = {
+	.name = "UV large system",
+	.int_delivery_mode = dest_Fixed,
+	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+	.target_cpus = uv_target_cpus,
+	.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
+	.apic_id_registered = uv_apic_id_registered,
+	.send_IPI_all = uv_send_IPI_all,
+	.send_IPI_allbutself = uv_send_IPI_allbutself,
+	.send_IPI_mask = uv_send_IPI_mask,
+	/* ZZZ.send_IPI_self = uv_send_IPI_self, */
+	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
+};
+
+static __cpuinit void set_x2apic_extra_bits(int nasid)
+{
+	__get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6);
+}
+
+/*
+ * Called on boot cpu.
+ */
+static __init void uv_system_init(void)
+{
+	union uvh_si_addr_map_config_u m_n_config;
+	int bytes, nid, cpu, lcpu, nasid, last_nasid, blade;
+	unsigned long mmr_base;
+
+	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+	mmr_base =
+	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+	    ~UV_MMR_ENABLE;
+	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+	last_nasid = -1;
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+		if (nasid != last_nasid)
+			uv_possible_blades++;
+		last_nasid = nasid;
+	}
+	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+	uv_blade_info = alloc_bootmem_pages(bytes);
+	memset(uv_blade_info, 255, bytes);
+
+	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+	uv_node_to_blade = alloc_bootmem_pages(bytes);
+	memset(uv_node_to_blade, 255, bytes);
+
+	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+	uv_cpu_to_blade = alloc_bootmem_pages(bytes);
+	memset(uv_cpu_to_blade, 255, bytes);
+
+	last_nasid = -1;
+	blade = -1;
+	lcpu = -1;
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+		if (nasid != last_nasid) {
+			blade++;
+			lcpu = -1;
+			uv_blade_info[blade].nr_posible_cpus = 0;
+			uv_blade_info[blade].nr_online_cpus = 0;
+		}
+		last_nasid = nasid;
+		lcpu++;
+
+		uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt;
+		uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt;
+		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+		uv_cpu_hub_info(cpu)->local_nasid = nasid;
+		uv_cpu_hub_info(cpu)->gnode_upper =
+		    nasid & ~((1 << uv_hub_info->n_val) - 1);
+		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_blade_info[blade].nasid = nasid;
+		uv_blade_info[blade].nr_posible_cpus++;
+		uv_node_to_blade[nid] = blade;
+		uv_cpu_to_blade[cpu] = blade;
+
+		printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n",
+		       cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid);
+		printk(KERN_DEBUG "UV   lcpu %d, blade %d\n", lcpu, blade);
+
+#ifdef ZZZ
+		printk(KERN_DEBUG "UV ZZZZ nasid %d\n", nasid);
+		printk(KERN_DEBUG "UV  ZZZ local paddr %p\n",
+		       __pa(uv_local_mmr_address
+			    (UVH_LB_BAU_SB_DESCRIPTOR_BASE)));
+		printk(KERN_DEBUG "UV  ZZZ global paddr %p\n",
+		       __pa(uv_global_mmr64_address
+			    (nasid, UVH_LB_BAU_SB_DESCRIPTOR_BASE)));
+		printk(KERN_DEBUG "UV  ZZZ global32 paddr %p\n",
+		       __pa(uv_global_mmr32_address
+			    (nasid, UVH_LB_BAU_SB_DESCRIPTOR_BASE_32)));
+		printk(KERN_DEBUG "UV  ZZZ local addr %p\n",
+		       uv_local_mmr_address(UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+		printk(KERN_DEBUG "UV  ZZZ global addr %p\n",
+		       uv_global_mmr64_address(nasid,
+					       UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+		printk(KERN_DEBUG "UV  ZZZ global32 addr %p\n",
+		       uv_global_mmr32_address(nasid,
+					UVH_LB_BAU_SB_DESCRIPTOR_BASE_32));
+
+		printk(KERN_DEBUG "UV  ZZZ local 0x%lx\n",
+		       uv_read_local_mmr(UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+		printk(KERN_DEBUG "UV  ZZZ global 0x%lx\n",
+		       uv_read_global_mmr64(nasid,
+					    UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+		printk(KERN_DEBUG "UV  ZZZ global32 0x%lx\n",
+		       uv_read_global_mmr32(nasid,
+					    UVH_LB_BAU_SB_DESCRIPTOR_BASE_32));
+#endif
+	}
+}
+
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ */
+void __cpuinit uv_cpu_init(void)
+{
+	if (!uv_node_to_blade)
+		uv_system_init();
+
+	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		set_x2apic_extra_bits(uv_hub_info->local_nasid);
+
+#ifndef ZZZ
+	printk(KERN_DEBUG
+	       "UV cpu %d, lcpu %d, blade %d, nasid %d/%d/%d, possible %d,"
+	       " online %d, cputoblade %d, uv_node_to_blade_id %d\n",
+	       smp_processor_id(), uv_blade_processor_id(), uv_numa_blade_id(),
+	       uv_blade_to_nasid(uv_numa_blade_id()),
+	       uv_cpu_to_nasid(smp_processor_id()),
+	       uv_node_to_nasid(numa_node_id()),
+	       uv_blade_nr_possible_cpus(uv_numa_blade_id()),
+	       uv_blade_nr_online_cpus(uv_numa_blade_id()),
+	       uv_cpu_to_blade_id(smp_processor_id()),
+	       uv_node_to_blade_id(numa_node_id()));
+
+	printk(KERN_DEBUG
+	       "UV cpu %d: hdw_apic_id 0x%x, extra_apic 0x%x, nasid 0x%x, "
+	       "M %d, N %d, nasid_h 0x%x, mmrs 0x%lx\n",
+	       smp_processor_id(), apic_read(APIC_ID),
+	       __get_cpu_var(x2apic_extra_bits), uv_hub_info->local_nasid,
+	       uv_hub_info->m_val, uv_hub_info->n_val, uv_hub_info->gnode_upper,
+	       uv_hub_info->global_mmr_base);
+#endif
+}
Index: linux/arch/x86/kernel/setup64.c
===================================================================
--- linux.orig/arch/x86/kernel/setup64.c	2008-03-21 15:36:35.000000000 -0500
+++ linux/arch/x86/kernel/setup64.c	2008-03-21 15:49:38.000000000 -0500
@@ -24,6 +24,7 @@
 #include <asm/proto.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/genapic.h>
 
 #ifndef CONFIG_DEBUG_BOOT_PARAMS
 struct boot_params __initdata boot_params;
@@ -355,4 +356,7 @@ void __cpuinit cpu_init (void)
 	fpu_init(); 
 
 	raw_local_save_flags(kernel_eflags);
+
+	if (is_uv_system())
+		uv_cpu_init();
 }
Index: linux/arch/x86/kernel/Makefile
===================================================================
--- linux.orig/arch/x86/kernel/Makefile	2008-03-21 15:36:35.000000000 -0500
+++ linux/arch/x86/kernel/Makefile	2008-03-21 15:49:38.000000000 -0500
@@ -90,7 +90,7 @@ scx200-y			+= scx200_32.o
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-        obj-y				+= genapic_64.o genapic_flat_64.o
+        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
 
Index: linux/arch/x86/kernel/smpboot_64.c
===================================================================
--- linux.orig/arch/x86/kernel/smpboot_64.c	2008-03-21 15:36:45.000000000 -0500
+++ linux/arch/x86/kernel/smpboot_64.c	2008-03-21 15:49:38.000000000 -0500
@@ -60,6 +60,7 @@
 #include <asm/hw_irq.h>
 #include <asm/numa.h>
 #include <asm/trampoline.h>
+#include <asm/genapic.h>
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -418,6 +419,9 @@ static int __cpuinit wakeup_secondary_vi
 	unsigned long send_status, accept_status = 0;
 	int maxlvt, num_starts, j;
 
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		return uv_wakeup_secondary(phys_apicid, start_rip);
+
 	Dprintk("Asserting INIT.\n");
 
 	/*
@@ -679,7 +683,8 @@ do_rest:
 				/* trampoline code not run */
 				printk("Not responding.\n");
 #ifdef APIC_DEBUG
-			inquire_remote_apic(apicid);
+			if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
+				inquire_remote_apic(apicid);
 #endif
 		}
 	}
Index: linux/include/asm-x86/genapic_64.h
===================================================================
--- linux.orig/include/asm-x86/genapic_64.h	2008-03-21 15:37:05.000000000 -0500
+++ linux/include/asm-x86/genapic_64.h	2008-03-21 15:49:38.000000000 -0500
@@ -39,4 +39,9 @@ enum uv_system_type {UV_NONE, UV_LEGACY_
 extern enum uv_system_type get_uv_system_type(void);
 extern int is_uv_system(void);
 
+extern struct genapic apic_x2apic_uv_x;
+DECLARE_PER_CPU(int, x2apic_extra_bits);
+extern void uv_cpu_init(void);
+extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
+
 #endif

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-24 18:21 [RFC 8/8] x86_64: Support for new UV apic Jack Steiner
@ 2008-03-25 10:25 ` Andi Kleen
  2008-03-25 17:56   ` Jack Steiner
  2008-03-25 14:30 ` Ingo Molnar
  2008-03-30 20:41 ` Yinghai Lu
  2 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2008-03-25 10:25 UTC (permalink / raw)
  To: Jack Steiner; +Cc: mingo, tglx, linux-mm, linux-kernel

Jack Steiner <steiner@sgi.com> writes:

>  unsigned int get_apic_id(void)
>  {
> -	return (apic_read(APIC_ID) >> 24) & 0xFFu;
> +	unsigned int id;
> +
> +	preempt_disable();
> +	id = apic_read(APIC_ID);
> +	if (uv_system_type >= UV_X2APIC)
> +		id  |= __get_cpu_var(x2apic_extra_bits);
> +	else
> +		id = (id >> 24) & 0xFFu;;
> +	preempt_enable();
> +	return id;

Really caller should have done preempt_disable(), otherwise
the value can be wrong as soon as you return.

Better probably to just WARN_ON if preemption is on

(just be careful it does not trigger in oopses and machine checks)

> +
> +DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
> +EXPORT_PER_CPU_SYMBOL(__uv_hub_info);

GPL export too?

> +
> +struct uv_blade_info *uv_blade_info;
> +EXPORT_SYMBOL_GPL(uv_blade_info);
> +
> +short *uv_node_to_blade;
> +EXPORT_SYMBOL_GPL(uv_node_to_blade);
> +
> +short *uv_cpu_to_blade;
> +EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
> +
> +short uv_possible_blades;
> +EXPORT_SYMBOL_GPL(uv_possible_blades);
> +
> +/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
> +/* Probably incorrect for UV  ZZZ */

Actually it should be correct. Except for UV you likely really need a
NUMA aware irqbalanced. I used to have some old very hackish patches
to implement that in irqbalanced, but never pushed it because the
systems I was working on didn't really need it.


> +
> +static void uv_send_IPI_one(int cpu, int vector)
> +{
> +	unsigned long val, apicid;
> +	int nasid;
> +
> +	apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */

Instead of doing that it might be better to implement __read_mostly per CPU variables
(should not be very hard) 

> +static void uv_send_IPI_mask(cpumask_t mask, int vector)
> +{
> +	unsigned long flags;
> +	unsigned int cpu;
> +
> +	local_irq_save(flags);
> +	for (cpu = 0; cpu < NR_CPUS; ++cpu)
> +		if (cpu_isset(cpu, mask))
> +			uv_send_IPI_one(cpu, vector);
> +	local_irq_restore(flags);

This could disable interrupts for a long time could't it?  Really needed?


> +	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
> +	uv_blade_info = alloc_bootmem_pages(bytes);
> +	memset(uv_blade_info, 255, bytes);

255?  Strange poison value.

> ===================================================================
> --- linux.orig/arch/x86/kernel/Makefile	2008-03-21 15:36:35.000000000 -0500
> +++ linux/arch/x86/kernel/Makefile	2008-03-21 15:49:38.000000000 -0500
> @@ -90,7 +90,7 @@ scx200-y			+= scx200_32.o
>  ###
>  # 64 bit specific files
>  ifeq ($(CONFIG_X86_64),y)
> -        obj-y				+= genapic_64.o genapic_flat_64.o
> +        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o

Definitely should be a CONFIG

> @@ -418,6 +419,9 @@ static int __cpuinit wakeup_secondary_vi
>  	unsigned long send_status, accept_status = 0;
>  	int maxlvt, num_starts, j;
>  
> +	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
> +		return uv_wakeup_secondary(phys_apicid, start_rip);
> +

This should be probably factored properly (didn't Jeremy have smp_ops 
for this some time ago) so that even the default case is a call.

>  	Dprintk("Asserting INIT.\n");
>  
>  	/*
> @@ -679,7 +683,8 @@ do_rest:
>  				/* trampoline code not run */
>  				printk("Not responding.\n");
>  #ifdef APIC_DEBUG
> -			inquire_remote_apic(apicid);
> +			if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
> +				inquire_remote_apic(apicid);

Dito.


-Andi

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-24 18:21 [RFC 8/8] x86_64: Support for new UV apic Jack Steiner
  2008-03-25 10:25 ` Andi Kleen
@ 2008-03-25 14:30 ` Ingo Molnar
  2008-03-25 16:31   ` Jack Steiner
  2008-03-30 20:41 ` Yinghai Lu
  2 siblings, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-03-25 14:30 UTC (permalink / raw)
  To: Jack Steiner; +Cc: tglx, linux-mm, linux-kernel


* Jack Steiner <steiner@sgi.com> wrote:

> Index: linux/arch/x86/kernel/genapic_64.c

> @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
>  
>  unsigned int get_apic_id(void)
>  {
> -	return (apic_read(APIC_ID) >> 24) & 0xFFu;
> +	unsigned int id;
> +
> +	preempt_disable();
> +	id = apic_read(APIC_ID);
> +	if (uv_system_type >= UV_X2APIC)
> +		id  |= __get_cpu_var(x2apic_extra_bits);
> +	else
> +		id = (id >> 24) & 0xFFu;;
> +	preempt_enable();
> +	return id;

dont we want to put get_apic_id() into struct genapic instead? We 
already have ID management there.

also, we want to unify 32-bit and 64-bit genapic code and just have 
genapic all across x86.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-25 14:30 ` Ingo Molnar
@ 2008-03-25 16:31   ` Jack Steiner
  2008-03-26  3:24     ` Glauber Costa
  0 siblings, 1 reply; 24+ messages in thread
From: Jack Steiner @ 2008-03-25 16:31 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: tglx, linux-mm, linux-kernel

On Tue, Mar 25, 2008 at 03:30:59PM +0100, Ingo Molnar wrote:
> 
> * Jack Steiner <steiner@sgi.com> wrote:
> 
> > Index: linux/arch/x86/kernel/genapic_64.c
> 
> > @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
> >  
> >  unsigned int get_apic_id(void)
> >  {
> > -	return (apic_read(APIC_ID) >> 24) & 0xFFu;
> > +	unsigned int id;
> > +
> > +	preempt_disable();
> > +	id = apic_read(APIC_ID);
> > +	if (uv_system_type >= UV_X2APIC)
> > +		id  |= __get_cpu_var(x2apic_extra_bits);
> > +	else
> > +		id = (id >> 24) & 0xFFu;;
> > +	preempt_enable();
> > +	return id;
> 
> dont we want to put get_apic_id() into struct genapic instead? We 
> already have ID management there.
> 
> also, we want to unify 32-bit and 64-bit genapic code and just have 
> genapic all across x86.

Long term, I think that makes sense. However, I think that should be a
separate series of patches since there are significant differences between
the 32-bit and 64-bit genapic structs.

--- jack

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-25 10:25 ` Andi Kleen
@ 2008-03-25 17:56   ` Jack Steiner
  2008-03-25 18:06     ` Andi Kleen
  2008-03-26  7:38     ` Ingo Molnar
  0 siblings, 2 replies; 24+ messages in thread
From: Jack Steiner @ 2008-03-25 17:56 UTC (permalink / raw)
  To: Andi Kleen; +Cc: mingo, tglx, linux-mm, linux-kernel

On Tue, Mar 25, 2008 at 11:25:34AM +0100, Andi Kleen wrote:
> Jack Steiner <steiner@sgi.com> writes:
> 
> >  unsigned int get_apic_id(void)
> >  {
> > -	return (apic_read(APIC_ID) >> 24) & 0xFFu;
> > +	unsigned int id;
> > +
> > +	preempt_disable();
> > +	id = apic_read(APIC_ID);
> > +	if (uv_system_type >= UV_X2APIC)
> > +		id  |= __get_cpu_var(x2apic_extra_bits);
> > +	else
> > +		id = (id >> 24) & 0xFFu;;
> > +	preempt_enable();
> > +	return id;
> 
> Really caller should have done preempt_disable(), otherwise
> the value can be wrong as soon as you return.
> 
> Better probably to just WARN_ON if preemption is on

Will do (assuming it doesn't ripple thru too much code eliminating
warnings - doesn't look bad at first glance).


> 
> (just be careful it does not trigger in oopses and machine checks)
> 
> > +
> > +DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
> > +EXPORT_PER_CPU_SYMBOL(__uv_hub_info);
> 
> GPL export too?

Yes.


> 
> > +
> > +struct uv_blade_info *uv_blade_info;
> > +EXPORT_SYMBOL_GPL(uv_blade_info);
> > +
> > +short *uv_node_to_blade;
> > +EXPORT_SYMBOL_GPL(uv_node_to_blade);
> > +
> > +short *uv_cpu_to_blade;
> > +EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
> > +
> > +short uv_possible_blades;
> > +EXPORT_SYMBOL_GPL(uv_possible_blades);
> > +
> > +/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
> > +/* Probably incorrect for UV  ZZZ */
> 
> Actually it should be correct. Except for UV you likely really need a
> NUMA aware irqbalanced. I used to have some old very hackish patches
> to implement that in irqbalanced, but never pushed it because the
> systems I was working on didn't really need it.

Deleted comment.

> 
> 
> > +
> > +static void uv_send_IPI_one(int cpu, int vector)
> > +{
> > +	unsigned long val, apicid;
> > +	int nasid;
> > +
> > +	apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
> 
> Instead of doing that it might be better to implement __read_mostly per CPU variables
> (should not be very hard) 

Added to list of loose-ends that need addressing.


> 
> > +static void uv_send_IPI_mask(cpumask_t mask, int vector)
> > +{
> > +	unsigned long flags;
> > +	unsigned int cpu;
> > +
> > +	local_irq_save(flags);
> > +	for (cpu = 0; cpu < NR_CPUS; ++cpu)
> > +		if (cpu_isset(cpu, mask))
> > +			uv_send_IPI_one(cpu, vector);
> > +	local_irq_restore(flags);
> 
> This could disable interrupts for a long time could't it?  Really needed?

No, not sure why I did that. Deleted the irq disable...


> 
> 
> > +	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
> > +	uv_blade_info = alloc_bootmem_pages(bytes);
> > +	memset(uv_blade_info, 255, bytes);
> 
> 255?  Strange poison value.

Deleted the memset. Should not be depending on poison values. Was useful
in debugging but it has outlived it's usefulness.


> 
> > ===================================================================
> > --- linux.orig/arch/x86/kernel/Makefile	2008-03-21 15:36:35.000000000 -0500
> > +++ linux/arch/x86/kernel/Makefile	2008-03-21 15:49:38.000000000 -0500
> > @@ -90,7 +90,7 @@ scx200-y			+= scx200_32.o
> >  ###
> >  # 64 bit specific files
> >  ifeq ($(CONFIG_X86_64),y)
> > -        obj-y				+= genapic_64.o genapic_flat_64.o
> > +        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
> 
> Definitely should be a CONFIG

Not sure that I understand why. The overhead of UV is minimal & we want UV
enabled in all distro kernels. OTOH, small embedded systems probably want to
eliminate every last bit of unneeded code.

Might make sense to have a config option. Thoughts????


> 
> > @@ -418,6 +419,9 @@ static int __cpuinit wakeup_secondary_vi
> >  	unsigned long send_status, accept_status = 0;
> >  	int maxlvt, num_starts, j;
> >  
> > +	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
> > +		return uv_wakeup_secondary(phys_apicid, start_rip);
> > +
> 
> This should be probably factored properly (didn't Jeremy have smp_ops 
> for this some time ago) so that even the default case is a call.

By factored, do you means something like:
	is_uv_legacy_system()
	is_us_non_unique_apicid_system()
	...

Or maybe:
	is_uv_system_type(x)   # where x is UV_NON_UNIQUE_APIC, etc


> -Andi

Thanks for the careful review.

--- jack

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-25 17:56   ` Jack Steiner
@ 2008-03-25 18:06     ` Andi Kleen
  2008-03-26  2:23       ` Jeremy Fitzhardinge
  2008-03-26  7:38     ` Ingo Molnar
  1 sibling, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2008-03-25 18:06 UTC (permalink / raw)
  To: Jack Steiner; +Cc: Andi Kleen, mingo, tglx, linux-mm, linux-kernel

> > This should be probably factored properly (didn't Jeremy have smp_ops 
> > for this some time ago) so that even the default case is a call.
> 
> By factored, do you means something like:
> 	is_uv_legacy_system()
> 	is_us_non_unique_apicid_system()
> 	...
> 
> Or maybe:
> 	is_uv_system_type(x)   # where x is UV_NON_UNIQUE_APIC, etc

No instead of having lots of if (xyz_system) do_xyz_special()
go through smp_ops for the whole thing so that UV would just have a 
special smp_ops that has special implementions or wrappers. 

Oops I see smp_ops are currently only implemented
for 32bit. Ok do it only once smp_ops exist on 64bit too. 

-Andi

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-25 18:06     ` Andi Kleen
@ 2008-03-26  2:23       ` Jeremy Fitzhardinge
  2008-03-26  3:22         ` Glauber Costa
  2008-03-26  7:29         ` Ingo Molnar
  0 siblings, 2 replies; 24+ messages in thread
From: Jeremy Fitzhardinge @ 2008-03-26  2:23 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jack Steiner, mingo, tglx, linux-mm, linux-kernel,
	Glauber de Oliveira Costa

Andi Kleen wrote:
> No instead of having lots of if (xyz_system) do_xyz_special()
> go through smp_ops for the whole thing so that UV would just have a 
> special smp_ops that has special implementions or wrappers. 
>
> Oops I see smp_ops are currently only implemented
> for 32bit. Ok do it only once smp_ops exist on 64bit too. 
>   

I think glommer has patches to unify smp stuff, which should include 
smp_ops.

    J

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-26  2:23       ` Jeremy Fitzhardinge
@ 2008-03-26  3:22         ` Glauber Costa
  2008-03-26  7:29         ` Ingo Molnar
  1 sibling, 0 replies; 24+ messages in thread
From: Glauber Costa @ 2008-03-26  3:22 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Andi Kleen, Jack Steiner, mingo, tglx, linux-mm, linux-kernel

On Tue, Mar 25, 2008 at 11:23 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Andi Kleen wrote:
>  > No instead of having lots of if (xyz_system) do_xyz_special()
>  > go through smp_ops for the whole thing so that UV would just have a
>  > special smp_ops that has special implementions or wrappers.
>  >
>  > Oops I see smp_ops are currently only implemented
>  > for 32bit. Ok do it only once smp_ops exist on 64bit too.
>  >
>
>  I think glommer has patches to unify smp stuff, which should include
>  smp_ops.
>

They are already merged in ingo's tree.

I'm still about to post some last-minute issues, but the full smp_ops
support is there.
-- 
Glauber Costa.
"Free as in Freedom"
http://glommer.net

"The less confident you are, the more serious you have to act."

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-25 16:31   ` Jack Steiner
@ 2008-03-26  3:24     ` Glauber Costa
  0 siblings, 0 replies; 24+ messages in thread
From: Glauber Costa @ 2008-03-26  3:24 UTC (permalink / raw)
  To: Jack Steiner; +Cc: Ingo Molnar, tglx, linux-mm, linux-kernel

On Tue, Mar 25, 2008 at 1:31 PM, Jack Steiner <steiner@sgi.com> wrote:
> On Tue, Mar 25, 2008 at 03:30:59PM +0100, Ingo Molnar wrote:
>  >
>  > * Jack Steiner <steiner@sgi.com> wrote:
>  >
>  > > Index: linux/arch/x86/kernel/genapic_64.c
>  >
>  > > @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
>  > >
>  > >  unsigned int get_apic_id(void)
>  > >  {
>  > > -   return (apic_read(APIC_ID) >> 24) & 0xFFu;
>  > > +   unsigned int id;
>  > > +
>  > > +   preempt_disable();
>  > > +   id = apic_read(APIC_ID);
>  > > +   if (uv_system_type >= UV_X2APIC)
>  > > +           id  |= __get_cpu_var(x2apic_extra_bits);
>  > > +   else
>  > > +           id = (id >> 24) & 0xFFu;;
>  > > +   preempt_enable();
>  > > +   return id;
>  >
>  > dont we want to put get_apic_id() into struct genapic instead? We
>  > already have ID management there.
>  >
>  > also, we want to unify 32-bit and 64-bit genapic code and just have
>  > genapic all across x86.
>
>  Long term, I think that makes sense. However, I think that should be a
>  separate series of patches since there are significant differences between
>  the 32-bit and 64-bit genapic structs.
>
However, if you add more code, they'll keep diverging. The moment you
touch them, and get your
hands warmed up, is the perfect moment for an integration series.

-- 
Glauber Costa.
"Free as in Freedom"
http://glommer.net

"The less confident you are, the more serious you have to act."

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-26  2:23       ` Jeremy Fitzhardinge
  2008-03-26  3:22         ` Glauber Costa
@ 2008-03-26  7:29         ` Ingo Molnar
  1 sibling, 0 replies; 24+ messages in thread
From: Ingo Molnar @ 2008-03-26  7:29 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Andi Kleen, Jack Steiner, tglx, linux-mm, linux-kernel,
	Glauber de Oliveira Costa


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> Andi Kleen wrote:
>> No instead of having lots of if (xyz_system) do_xyz_special()
>> go through smp_ops for the whole thing so that UV would just have a 
>> special smp_ops that has special implementions or wrappers. 
>> Oops I see smp_ops are currently only implemented
>> for 32bit. Ok do it only once smp_ops exist on 64bit too.   
>
> I think glommer has patches to unify smp stuff, which should include 
> smp_ops.

yep, x86.git/latest has all that work included.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-25 17:56   ` Jack Steiner
  2008-03-25 18:06     ` Andi Kleen
@ 2008-03-26  7:38     ` Ingo Molnar
  2008-03-30 20:23       ` Yinghai Lu
  1 sibling, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-03-26  7:38 UTC (permalink / raw)
  To: Jack Steiner; +Cc: Andi Kleen, tglx, linux-mm, linux-kernel


* Jack Steiner <steiner@sgi.com> wrote:

> > > -        obj-y				+= genapic_64.o genapic_flat_64.o
> > > +        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
> > 
> > Definitely should be a CONFIG
> 
> Not sure that I understand why. The overhead of UV is minimal & we 
> want UV enabled in all distro kernels. OTOH, small embedded systems 
> probably want to eliminate every last bit of unneeded code.
> 
> Might make sense to have a config option. Thoughts????

i wouldnt mind having UV enabled by default (it can be a config option 
but default-enabled on generic kernels so all distros will pick this hw 
support up), but we definitely need the genapic unification before we 
can add more features.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-26  7:38     ` Ingo Molnar
@ 2008-03-30 20:23       ` Yinghai Lu
  2008-03-30 21:03         ` Jack Steiner
  0 siblings, 1 reply; 24+ messages in thread
From: Yinghai Lu @ 2008-03-30 20:23 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Jack Steiner, Andi Kleen, tglx, linux-mm, linux-kernel

On Wed, Mar 26, 2008 at 12:38 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  * Jack Steiner <steiner@sgi.com> wrote:
>
>  > > > -        obj-y                            += genapic_64.o genapic_flat_64.o
>  > > > +        obj-y                            += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
>  > >
>  > > Definitely should be a CONFIG
>  >
>  > Not sure that I understand why. The overhead of UV is minimal & we
>  > want UV enabled in all distro kernels. OTOH, small embedded systems
>  > probably want to eliminate every last bit of unneeded code.
>  >
>  > Might make sense to have a config option. Thoughts????
>
>  i wouldnt mind having UV enabled by default (it can be a config option
>  but default-enabled on generic kernels so all distros will pick this hw
>  support up), but we definitely need the genapic unification before we
>  can add more features.

config option would be reasonable.
for x86_64
subarch already have X86_PC, X86_VSMP.
we have X86_UVSMP

YH

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-24 18:21 [RFC 8/8] x86_64: Support for new UV apic Jack Steiner
  2008-03-25 10:25 ` Andi Kleen
  2008-03-25 14:30 ` Ingo Molnar
@ 2008-03-30 20:41 ` Yinghai Lu
  2008-03-30 21:08   ` Jack Steiner
  2 siblings, 1 reply; 24+ messages in thread
From: Yinghai Lu @ 2008-03-30 20:41 UTC (permalink / raw)
  To: Jack Steiner; +Cc: mingo, tglx, linux-mm, linux-kernel

On Mon, Mar 24, 2008 at 11:21 AM, Jack Steiner <steiner@sgi.com> wrote:
>
>  UV supports really big systems. So big, in fact, that the APICID register
>  does not contain enough bits to contain an APICID that is unique across all
>  cpus.
>
>  The UV BIOS supports 3 APICID modes:
>
>         - legacy mode. This mode uses the old APIC mode where
>           APICID is in bits [31:24] of the APICID register.
>
>         - x2apic mode. This mode is whitebox-compatible. APICIDs
>           are unique across all cpus. Standard x2apic APIC operations
>           (Intel-defined) can be used for IPIs. The node identifier
>           fits within the Intel-defined portion of the APICID register.
>
>         - x2apic-uv mode. In this mode, the APICIDs on each node have
>           unique IDs, but IDs on different node are not unique. For example,
>           if each mode has 32 cpus, the APICIDs on each node might be
>           0 - 31. Every node has the same set of IDs.
>           The UV hub is used to route IPIs/interrupts to the correct node.
>           Traditional APIC operations WILL NOT WORK.
>
>  In x2apic-uv mode, the ACPI tables all contain a full unique ID (note:
>  exact bit layout still changing but the following is close):
>
>         nnnnnnnnnnlc0cch
>                 n = unique node number
>                 l = socket number on board
>                 c = core
>                 h = hyperthread
>
>  Only the "lc0cch" bits are written to the APICID register. The remaining bits are
>  supplied by having the get_apic_id() function "OR" the extra bits into the value
>  read from the APICID register. (Hmmm.. why not keep the ENTIRE APICID register
>  in per-cpu data....)
>
>  The x2apic-uv mode is recognized by the MADT table containing:
>           oem_id = "SGI"
>           oem_table_id = "UV-X"
>
>
>  (NOTE: a work-in-progress. Pieces missing....)
>
>
>         Signed-off-by: Jack Steiner <steiner@sgi.com>
>
>  ---
>   arch/x86/kernel/Makefile         |    2
>   arch/x86/kernel/genapic_64.c     |   15 +
>   arch/x86/kernel/genx2apic_uv_x.c |  305 +++++++++++++++++++++++++++++++++++++++
>   arch/x86/kernel/setup64.c        |    4
>   arch/x86/kernel/smpboot_64.c     |    7
>   include/asm-x86/genapic_64.h     |    5
>   6 files changed, 335 insertions(+), 3 deletions(-)
>
>  Index: linux/arch/x86/kernel/genapic_64.c
>  ===================================================================
>  --- linux.orig/arch/x86/kernel/genapic_64.c     2008-03-21 15:37:05.000000000 -0500
>  +++ linux/arch/x86/kernel/genapic_64.c  2008-03-21 15:49:38.000000000 -0500
>  @@ -30,6 +30,7 @@ u16 x86_cpu_to_apicid_init[NR_CPUS] __in
>   void *x86_cpu_to_apicid_early_ptr;
>   DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
>   EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
>  +DEFINE_PER_CPU(int, x2apic_extra_bits);
>
>   struct genapic __read_mostly *genapic = &apic_flat;
>
>  @@ -40,6 +41,9 @@ static enum uv_system_type uv_system_typ
>   */
>   void __init setup_apic_routing(void)
>   {
>  +       if (uv_system_type == UV_NON_UNIQUE_APIC)
>  +               genapic = &apic_x2apic_uv_x;
>  +       else
>   #ifdef CONFIG_ACPI
>         /*
>          * Quirk: some x86_64 machines can only use physical APIC mode
>  @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
>
>   unsigned int get_apic_id(void)
>   {
>  -       return (apic_read(APIC_ID) >> 24) & 0xFFu;
>  +       unsigned int id;
>  +
>  +       preempt_disable();
>  +       id = apic_read(APIC_ID);
>  +       if (uv_system_type >= UV_X2APIC)
>  +               id  |= __get_cpu_var(x2apic_extra_bits);
>  +       else
>  +               id = (id >> 24) & 0xFFu;;
>  +       preempt_enable();
>  +       return id;
>

you can not shift id here.

GET_APIC_ID will shift that again.

you apic id will be 0 for all cpu

YH

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-30 20:23       ` Yinghai Lu
@ 2008-03-30 21:03         ` Jack Steiner
  2008-03-30 21:18           ` Andi Kleen
  0 siblings, 1 reply; 24+ messages in thread
From: Jack Steiner @ 2008-03-30 21:03 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Ingo Molnar, Andi Kleen, tglx, linux-mm, linux-kernel

On Sun, Mar 30, 2008 at 01:23:12PM -0700, Yinghai Lu wrote:
> On Wed, Mar 26, 2008 at 12:38 AM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> >  * Jack Steiner <steiner@sgi.com> wrote:
> >
> >  > > > -        obj-y                            += genapic_64.o genapic_flat_64.o
> >  > > > +        obj-y                            += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
> >  > >
> >  > > Definitely should be a CONFIG
> >  >
> >  > Not sure that I understand why. The overhead of UV is minimal & we
> >  > want UV enabled in all distro kernels. OTOH, small embedded systems
> >  > probably want to eliminate every last bit of unneeded code.
> >  >
> >  > Might make sense to have a config option. Thoughts????
> >
> >  i wouldnt mind having UV enabled by default (it can be a config option
> >  but default-enabled on generic kernels so all distros will pick this hw
> >  support up), but we definitely need the genapic unification before we
> >  can add more features.
> 
> config option would be reasonable.
> for x86_64
> subarch already have X86_PC, X86_VSMP.
> we have X86_UVSMP

If there was a significant differece between UV and generic kernels
(or hardware), then I would agree. However, the only significant
difference is the APIC model on large systems. Small systems are
exactly compatible.

The problem with subarch is that we want 1 binary kernel to support
both generic hardware AND uv hardware. This restriction is desirable
for the distros and software vendors. Otherwise, additional kernel
images would have to be built, released, & certified.

--- jack


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-30 20:41 ` Yinghai Lu
@ 2008-03-30 21:08   ` Jack Steiner
  2008-03-30 23:24     ` Yinghai Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Jack Steiner @ 2008-03-30 21:08 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: mingo, tglx, linux-mm, linux-kernel

> >   unsigned int get_apic_id(void)
> >   {
> >  -       return (apic_read(APIC_ID) >> 24) & 0xFFu;
> >  +       unsigned int id;
> >  +
> >  +       preempt_disable();
> >  +       id = apic_read(APIC_ID);
> >  +       if (uv_system_type >= UV_X2APIC)
> >  +               id  |= __get_cpu_var(x2apic_extra_bits);
> >  +       else
> >  +               id = (id >> 24) & 0xFFu;;
> >  +       preempt_enable();
> >  +       return id;
> >
> 
> you can not shift id here.
> 
> GET_APIC_ID will shift that again.
> 
> you apic id will be 0 for all cpu
> 

I think this is fixed in the patch that I submitted on Friday. I
had to rework the GET_APIC_ID() changes because of the unification
of -32 & -64 apic code. I think the new code is much cleaner...


--- jack

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-30 21:03         ` Jack Steiner
@ 2008-03-30 21:18           ` Andi Kleen
  2008-03-30 23:29             ` Yinghai Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2008-03-30 21:18 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Yinghai Lu, Ingo Molnar, Andi Kleen, tglx, linux-mm, linux-kernel

> If there was a significant differece between UV and generic kernels
> (or hardware), then I would agree. However, the only significant
> difference is the APIC model on large systems. Small systems are
> exactly compatible.
> 
> The problem with subarch is that we want 1 binary kernel to support

x86-64 subarchs are more options than true subarchs. They generally
do not prevent the kernel from running on other systems, just
control addition of some additional code or special data layout. They are 
quite different from the i386 subarchs or those of other architectures.

The main reason vSMP is called a subarch is that it pads a lot
of data structures to 4K and you don't really want that on your
normal kernel, but there isn't anything in there that would
prevent booting on a normal system.

The UV option certainly doesn't have this issue.

> both generic hardware AND uv hardware. This restriction is desirable
> for the distros and software vendors. Otherwise, additional kernel
> images would have to be built, released, & certified.

I think an option would be fine, just don't call it a subarch. I don't
feel strongly about it, as you point out it is not really very much
code.

-Andi


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-30 21:08   ` Jack Steiner
@ 2008-03-30 23:24     ` Yinghai Lu
  0 siblings, 0 replies; 24+ messages in thread
From: Yinghai Lu @ 2008-03-30 23:24 UTC (permalink / raw)
  To: Jack Steiner; +Cc: mingo, tglx, linux-mm, linux-kernel

On Sun, Mar 30, 2008 at 2:08 PM, Jack Steiner <steiner@sgi.com> wrote:
> > >   unsigned int get_apic_id(void)
>  > >   {
>  > >  -       return (apic_read(APIC_ID) >> 24) & 0xFFu;
>  > >  +       unsigned int id;
>  > >  +
>  > >  +       preempt_disable();
>  > >  +       id = apic_read(APIC_ID);
>  > >  +       if (uv_system_type >= UV_X2APIC)
>  > >  +               id  |= __get_cpu_var(x2apic_extra_bits);
>  > >  +       else
>  > >  +               id = (id >> 24) & 0xFFu;;
>  > >  +       preempt_enable();
>  > >  +       return id;
>  > >
>  >
>  > you can not shift id here.
>  >
>  > GET_APIC_ID will shift that again.
>  >
>  > you apic id will be 0 for all cpu
>  >
>
>  I think this is fixed in the patch that I submitted on Friday. I
>  had to rework the GET_APIC_ID() changes because of the unification
>  of -32 & -64 apic code. I think the new code is much cleaner...

i think Ingo already put you Friday's version in x86.git#latest.

that is wrong too with extra shift.

YH

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-30 21:18           ` Andi Kleen
@ 2008-03-30 23:29             ` Yinghai Lu
  2008-03-31  2:18               ` Jack Steiner
  2008-03-31  6:48               ` Andi Kleen
  0 siblings, 2 replies; 24+ messages in thread
From: Yinghai Lu @ 2008-03-30 23:29 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Jack Steiner, Ingo Molnar, tglx, linux-mm, linux-kernel

On Sun, Mar 30, 2008 at 2:18 PM, Andi Kleen <andi@firstfloor.org> wrote:
> > If there was a significant differece between UV and generic kernels
>  > (or hardware), then I would agree. However, the only significant
>  > difference is the APIC model on large systems. Small systems are
>  > exactly compatible.
>  >
>  > The problem with subarch is that we want 1 binary kernel to support
>
>  x86-64 subarchs are more options than true subarchs. They generally
>  do not prevent the kernel from running on other systems, just
>  control addition of some additional code or special data layout. They are
>  quite different from the i386 subarchs or those of other architectures.
>
>  The main reason vSMP is called a subarch is that it pads a lot
>  of data structures to 4K and you don't really want that on your
>  normal kernel, but there isn't anything in there that would
>  prevent booting on a normal system.
>
>  The UV option certainly doesn't have this issue.
>
>
>  > both generic hardware AND uv hardware. This restriction is desirable
>  > for the distros and software vendors. Otherwise, additional kernel
>  > images would have to be built, released, & certified.
>
>  I think an option would be fine, just don't call it a subarch. I don't
>  feel strongly about it, as you point out it is not really very much
>  code.

if the calling path like GET_APIC_ID is keeping checking if it is UV
box after boot time, that may not good.

don't need make other hundreds of machine keep running the code only
for several big box all the time.

YH

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-30 23:29             ` Yinghai Lu
@ 2008-03-31  2:18               ` Jack Steiner
  2008-03-31  2:20                 ` Yinghai Lu
  2008-03-31  6:48               ` Andi Kleen
  1 sibling, 1 reply; 24+ messages in thread
From: Jack Steiner @ 2008-03-31  2:18 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Andi Kleen, Ingo Molnar, tglx, linux-mm, linux-kernel

> 
> if the calling path like GET_APIC_ID is keeping checking if it is UV
> box after boot time, that may not good.
> 
> don't need make other hundreds of machine keep running the code only
> for several big box all the time.
> 
> YH


I added trace code to see how often GET_APIC_ID() is called. For
my 8p AMD box, the  function is called 6 times per cpu during boot.
I have not seen any other calls to the function after early boot
although I'm they occur under some circumstances.

Can you think of a workload where the frequency of calls is
high enough to be significant??

--- jack

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-31  2:18               ` Jack Steiner
@ 2008-03-31  2:20                 ` Yinghai Lu
  2008-03-31 12:33                   ` Ingo Molnar
  0 siblings, 1 reply; 24+ messages in thread
From: Yinghai Lu @ 2008-03-31  2:20 UTC (permalink / raw)
  To: Jack Steiner; +Cc: Andi Kleen, Ingo Molnar, tglx, linux-mm, linux-kernel

On Sun, Mar 30, 2008 at 7:18 PM, Jack Steiner <steiner@sgi.com> wrote:
> >
>  > if the calling path like GET_APIC_ID is keeping checking if it is UV
>  > box after boot time, that may not good.
>  >
>  > don't need make other hundreds of machine keep running the code only
>  > for several big box all the time.
>  >
>  > YH
>
>
>  I added trace code to see how often GET_APIC_ID() is called. For
>  my 8p AMD box, the  function is called 6 times per cpu during boot.
>  I have not seen any other calls to the function after early boot
>  although I'm they occur under some circumstances.

then it is ok.

YH

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-30 23:29             ` Yinghai Lu
  2008-03-31  2:18               ` Jack Steiner
@ 2008-03-31  6:48               ` Andi Kleen
  1 sibling, 0 replies; 24+ messages in thread
From: Andi Kleen @ 2008-03-31  6:48 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andi Kleen, Jack Steiner, Ingo Molnar, tglx, linux-mm, linux-kernel

> if the calling path like GET_APIC_ID is keeping checking if it is UV
> box after boot time, that may not good.

I don't think GET_APIC_ID is anywhere on a critical path. As long
as it doesn't lead to code bloat that shouldn't be an issue.

-Andi

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-31  2:20                 ` Yinghai Lu
@ 2008-03-31 12:33                   ` Ingo Molnar
  2008-03-31 12:52                     ` Andi Kleen
  2008-03-31 18:42                     ` Yinghai Lu
  0 siblings, 2 replies; 24+ messages in thread
From: Ingo Molnar @ 2008-03-31 12:33 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Jack Steiner, Andi Kleen, tglx, linux-mm, linux-kernel


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> On Sun, Mar 30, 2008 at 7:18 PM, Jack Steiner <steiner@sgi.com> wrote:
> > >
> >  > if the calling path like GET_APIC_ID is keeping checking if it is 
> >  > UV box after boot time, that may not good.
> >  >
> >  > don't need make other hundreds of machine keep running the code 
> >  > only for several big box all the time.
> >  >
> >  > YH
> >
> >
> >  I added trace code to see how often GET_APIC_ID() is called. For my 
> >  8p AMD box, the function is called 6 times per cpu during boot. I 
> >  have not seen any other calls to the function after early boot 
> >  although I'm they occur under some circumstances.
> 
> then it is ok.

yes - and even if it were called more frequently, having generic code 
and having the possibility of an as generic as possible kernel image 
(and kernel rpms) is still a very important feature. In that sense 
subarch support is actively harmful and we are trying to move away from 
that model.

It is very nice that Jack has managed to make UV a generic platform 
instead of a subarch - and i'd encourage all future PC platform 
extensions to work via that model. The status of current PC 
subarchitectures is the following:

 - mach-visws: obsolete. We could drop it today - it's been years since 
                         i saw real VISWS bugreports.

 - mach-voyager: obsolete.

 - mach-es7000: on the way out - latest ES7000's are generic.

 - mach-rdc321x: it's being de-sub-architectured. It's about one patch 
                 away from becoming a non-subarch.

so we are just a few patches and a few well-directed voltage spikes away 
from being able to remove the subarch complication from x86 altogether 
;-)

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-31 12:33                   ` Ingo Molnar
@ 2008-03-31 12:52                     ` Andi Kleen
  2008-03-31 18:42                     ` Yinghai Lu
  1 sibling, 0 replies; 24+ messages in thread
From: Andi Kleen @ 2008-03-31 12:52 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Yinghai Lu, Jack Steiner, Andi Kleen, tglx, linux-mm, linux-kernel

>  - mach-voyager: obsolete.

You just have to convince James @)

> 
>  - mach-es7000: on the way out - latest ES7000's are generic.

That would mean dropping support for prev-gen es7000 which are not that
old actually (only a few years). But I think with a little effort the 
old es7000 code could be fit into the generic architecture. It is not
that far away from a normal PC.

> 
>  - mach-rdc321x: it's being de-sub-architectured. It's about one patch 
>                  away from becoming a non-subarch.

mach-numaq (not in arch, but spread out all over the port) 
	I hear there are only a one or two machines running left.
	Unfortunately they are in test.kernel.org, but hopefully
	they will die soon.
	NUMAQ has quite a lot of ugly ifdefs and special cases that would be 
	great to eliminate

mach-bigsmp/mach-summit (also in asm only)
	obsolete, should be deprecated for mach-generic
	(just generic currently pulls in code from them)
	A good first step would be to just disable the separate CONFIG
	options and only allow using them through generic.

-Andi


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 8/8] x86_64: Support for new UV apic
  2008-03-31 12:33                   ` Ingo Molnar
  2008-03-31 12:52                     ` Andi Kleen
@ 2008-03-31 18:42                     ` Yinghai Lu
  1 sibling, 0 replies; 24+ messages in thread
From: Yinghai Lu @ 2008-03-31 18:42 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Jack Steiner, Andi Kleen, tglx, linux-mm, linux-kernel

On Mon, Mar 31, 2008 at 5:33 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
>
>  * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>  > On Sun, Mar 30, 2008 at 7:18 PM, Jack Steiner <steiner@sgi.com> wrote:
>  > > >
>  > >  > if the calling path like GET_APIC_ID is keeping checking if it is
>  > >  > UV box after boot time, that may not good.
>  > >  >
>  > >  > don't need make other hundreds of machine keep running the code
>  > >  > only for several big box all the time.
>  > >  >
>  > >  > YH
>  > >
>  > >
>  > >  I added trace code to see how often GET_APIC_ID() is called. For my
>  > >  8p AMD box, the function is called 6 times per cpu during boot. I
>  > >  have not seen any other calls to the function after early boot
>  > >  although I'm they occur under some circumstances.
>  >
>  > then it is ok.
>
>  yes - and even if it were called more frequently, having generic code
>  and having the possibility of an as generic as possible kernel image
>  (and kernel rpms) is still a very important feature. In that sense
>  subarch support is actively harmful and we are trying to move away from
>  that model.

regarding LinuxBIOS = coreboot + TinyKernel. some box need to use
64bit kernel, because 32 bit kernel could mess up the 64 bit
resources, and final kernel kexeced is 64 bit.

and TinyKernel need to stay with coreboot in MB flash rom, and that
flash is about 2M...

So hope it is easier to use MACRO to mask platform detect code out.

YH

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2008-03-31 18:42 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-24 18:21 [RFC 8/8] x86_64: Support for new UV apic Jack Steiner
2008-03-25 10:25 ` Andi Kleen
2008-03-25 17:56   ` Jack Steiner
2008-03-25 18:06     ` Andi Kleen
2008-03-26  2:23       ` Jeremy Fitzhardinge
2008-03-26  3:22         ` Glauber Costa
2008-03-26  7:29         ` Ingo Molnar
2008-03-26  7:38     ` Ingo Molnar
2008-03-30 20:23       ` Yinghai Lu
2008-03-30 21:03         ` Jack Steiner
2008-03-30 21:18           ` Andi Kleen
2008-03-30 23:29             ` Yinghai Lu
2008-03-31  2:18               ` Jack Steiner
2008-03-31  2:20                 ` Yinghai Lu
2008-03-31 12:33                   ` Ingo Molnar
2008-03-31 12:52                     ` Andi Kleen
2008-03-31 18:42                     ` Yinghai Lu
2008-03-31  6:48               ` Andi Kleen
2008-03-25 14:30 ` Ingo Molnar
2008-03-25 16:31   ` Jack Steiner
2008-03-26  3:24     ` Glauber Costa
2008-03-30 20:41 ` Yinghai Lu
2008-03-30 21:08   ` Jack Steiner
2008-03-30 23:24     ` Yinghai Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).