LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: travis@sgi.com
To: Andrew Morton <akpm@linux-foundation.org>,
	Andi Kleen <ak@suse.de>,
	mingo@elte.hu
Cc: Eric Dumazet <dada1@cosmosbay.com>,
	Christoph Lameter <clameter@sgi.com>,
	Jack Steiner <steiner@sgi.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [PATCH 08/10] x86: Change NR_CPUS arrays in numa_64 V3
Date: Wed, 16 Jan 2008 09:09:10 -0800	[thread overview]
Message-ID: <20080116170903.114188000@sgi.com> (raw)
In-Reply-To: 20080116170902.006151000@sgi.com

[-- Attachment #1: NR_CPUS-arrays-in-numa_64 --]
[-- Type: text/plain, Size: 9494 bytes --]

Change the following static arrays sized by NR_CPUS to
per_cpu data variables:

	char cpu_to_node_map[NR_CPUS];


Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
---
V1->V2:
    - Removed extraneous casts
    - Fix !NUMA builds with '#ifdef CONFIG_NUMA"

V2->V3:
    - add early_cpu_to_node function to keep cpu_to_node efficient
    - move and rename smp_set_apicids() to setup_percpu_maps()
    - call setup_percpu_maps() as early as possible
---
 arch/x86/kernel/setup64.c    |   37 +++++++++++++++++++++++++++++++++++--
 arch/x86/kernel/setup_64.c   |    6 +++++-
 arch/x86/kernel/smpboot_64.c |   27 ++-------------------------
 arch/x86/mm/numa_64.c        |   20 ++++++++++++++++----
 arch/x86/mm/srat_64.c        |    5 +++--
 include/asm-x86/numa_64.h    |    9 ---------
 include/asm-x86/topology.h   |   23 +++++++++++++++++++++--
 mm/page_alloc.c              |    2 +-
 net/sunrpc/svc.c             |    1 +
 9 files changed, 84 insertions(+), 46 deletions(-)

--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -80,6 +80,36 @@ static int __init nonx32_setup(char *str
 __setup("noexec32=", nonx32_setup);
 
 /*
+ * Copy data used in early init routines from the initial arrays to the
+ * per cpu data areas.  These arrays then become expendable and the *_ptrs
+ * are zeroed indicating that the static arrays are gone.
+ */
+void __init setup_percpu_maps(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu_offset(cpu)) {
+			per_cpu(x86_cpu_to_apicid, cpu) =
+						x86_cpu_to_apicid_init[cpu];
+#ifdef CONFIG_NUMA
+			per_cpu(x86_cpu_to_node_map, cpu) =
+						x86_cpu_to_node_map_init[cpu];
+#endif
+		}
+		else
+			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
+									cpu);
+	}
+
+	/* indicate the early static arrays are gone */
+	x86_cpu_to_apicid_early_ptr = NULL;
+#ifdef CONFIG_NUMA
+	x86_cpu_to_node_map_early_ptr = NULL;
+#endif
+}
+
+/*
  * Great future plan:
  * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
  * Always point %gs to its beginning
@@ -100,18 +130,21 @@ void __init setup_per_cpu_areas(void)
 	for_each_cpu_mask (i, cpu_possible_map) {
 		char *ptr;
 
-		if (!NODE_DATA(cpu_to_node(i))) {
+		if (!NODE_DATA(early_cpu_to_node(i))) {
 			printk("cpu with no node %d, num_online_nodes %d\n",
 			       i, num_online_nodes());
 			ptr = alloc_bootmem_pages(size);
 		} else { 
-			ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
+			ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
 		}
 		if (!ptr)
 			panic("Cannot allocate cpu data for CPU %d\n", i);
 		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 	}
+
+	/* setup percpu data maps early */
+	setup_percpu_maps();
 } 
 
 void pda_init(int cpu)
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -63,6 +63,7 @@
 #include <asm/cacheflush.h>
 #include <asm/mce.h>
 #include <asm/ds.h>
+#include <asm/topology.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
@@ -372,8 +373,11 @@ void __init setup_arch(char **cmdline_p)
 	io_delay_init();
 
 #ifdef CONFIG_SMP
-	/* setup to use the static apicid table during kernel startup */
+	/* setup to use the early static init tables during kernel startup */
 	x86_cpu_to_apicid_early_ptr = (void *)&x86_cpu_to_apicid_init;
+#ifdef CONFIG_NUMA
+	x86_cpu_to_node_map_early_ptr = (void *)&x86_cpu_to_node_map_init;
+#endif
 #endif
 
 #ifdef CONFIG_ACPI
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -702,7 +702,7 @@ do_rest:
 	if (boot_error) {
 		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
 		clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
-		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
+		clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
 		cpu_clear(cpu, cpu_present_map);
 		cpu_clear(cpu, cpu_possible_map);
 		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
@@ -851,28 +851,6 @@ static int __init smp_sanity_check(unsig
 	return 0;
 }
 
-/*
- * Copy data used in early init routines from the initial arrays to the
- * per cpu data areas.  These arrays then become expendable and the
- * *_ptrs are zeroed indicating that the static arrays are gone.
- */
-void __init smp_set_apicids(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		if (per_cpu_offset(cpu))
-			per_cpu(x86_cpu_to_apicid, cpu) =
-						x86_cpu_to_apicid_init[cpu];
-		else
-			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
-									cpu);
-	}
-
-	/* indicate the early static arrays are gone */
-	x86_cpu_to_apicid_early_ptr = NULL;
-}
-
 static void __init smp_cpu_index_default(void)
 {
 	int i;
@@ -895,7 +873,6 @@ void __init smp_prepare_cpus(unsigned in
 	smp_cpu_index_default();
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
-	smp_set_apicids();
 	set_cpu_sibling_map(0);
 
 	if (smp_sanity_check(max_cpus) < 0) {
@@ -1049,7 +1026,7 @@ void remove_cpu_from_maps(void)
 	cpu_clear(cpu, cpu_callout_map);
 	cpu_clear(cpu, cpu_callin_map);
 	clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
-	clear_node_cpumask(cpu);
+	clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
 }
 
 int __cpu_disable(void)
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,10 +31,14 @@ bootmem_data_t plat_node_bdata[MAX_NUMNO
 
 struct memnode memnode;
 
-u16 cpu_to_node_map[NR_CPUS] __read_mostly = {
+u16 x86_cpu_to_node_map_init[NR_CPUS] __initdata = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
-EXPORT_SYMBOL(cpu_to_node_map);
+void *x86_cpu_to_node_map_early_ptr;
+EXPORT_SYMBOL(x86_cpu_to_node_map_init);
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
+DEFINE_PER_CPU(u16, x86_cpu_to_node_map) = NUMA_NO_NODE;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 
 u16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
@@ -545,7 +549,7 @@ void __init numa_initmem_init(unsigned l
 	node_set(0, node_possible_map);
 	for (i = 0; i < NR_CPUS; i++)
 		numa_set_node(i, 0);
-	/* we can't use cpumask_of_cpu() yet */
+	/* cpumask_of_cpu() may not be available during early startup */
 	memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0]));
 	cpu_set(0, node_to_cpumask_map[0]);
 	e820_register_active_regions(0, start_pfn, end_pfn);
@@ -559,8 +563,16 @@ __cpuinit void numa_add_cpu(int cpu)
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
+	u16 *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
+
 	cpu_pda(cpu)->nodenumber = node;
-	cpu_to_node_map[cpu] = node;
+
+	if(cpu_to_node_map)
+		cpu_to_node_map[cpu] = node;
+	else if(per_cpu_offset(cpu))
+		per_cpu(x86_cpu_to_node_map, cpu) = node;
+	else
+		Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
 }
 
 unsigned long __init numa_free_all_bootmem(void)
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,9 +382,10 @@ int __init acpi_scan_nodes(unsigned long
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_to_node(i) == NUMA_NO_NODE)
+		int node = cpu_to_node(i);
+		if (node == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(cpu_to_node(i), node_possible_map))
+		if (!node_isset(node, node_possible_map))
 			numa_set_node(i, NUMA_NO_NODE);
 	}
 	numa_init_array();
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -29,17 +29,8 @@ extern void setup_node_bootmem(int nodei
 
 #ifdef CONFIG_NUMA
 extern void __init init_cpu_to_node(void);
-
-static inline void clear_node_cpumask(int cpu)
-{
-	clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
-}
-
 #else
 #define init_cpu_to_node() do {} while (0)
-#define clear_node_cpumask(cpu) do {} while (0)
 #endif
 
-#define NUMA_NO_NODE 0xffff
-
 #endif
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -30,13 +30,32 @@
 #include <asm/mpspec.h>
 
 /* Mappings between logical cpu number and node number */
-extern u16 cpu_to_node_map[];
+DECLARE_PER_CPU(u16, x86_cpu_to_node_map);
+extern u16 __initdata x86_cpu_to_node_map_init[];
+extern void *x86_cpu_to_node_map_early_ptr;
 extern cpumask_t node_to_cpumask_map[];
 
+#define NUMA_NO_NODE	((u16)(~0))
+
 /* Returns the number of the node containing CPU 'cpu' */
+static inline int early_cpu_to_node(int cpu)
+{
+	u16 *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
+
+	if (cpu_to_node_map)
+		return cpu_to_node_map[cpu];
+	else if(per_cpu_offset(cpu))
+		return per_cpu(x86_cpu_to_node_map, cpu);
+	else
+		return NUMA_NO_NODE;
+}
+
 static inline int cpu_to_node(int cpu)
 {
-	return cpu_to_node_map[cpu];
+	if(per_cpu_offset(cpu))
+		return per_cpu(x86_cpu_to_node_map, cpu);
+	else
+		return NUMA_NO_NODE;
 }
 
 /*
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1783,7 +1783,7 @@ EXPORT_SYMBOL(free_pages);
 static unsigned int nr_free_zone_pages(int offset)
 {
 	/* Just pick one node, since fallback list is circular */
-	pg_data_t *pgdat = NODE_DATA(numa_node_id());
+	pg_data_t *pgdat = NODE_DATA(cpu_to_node(raw_smp_processor_id()));
 	unsigned int sum = 0;
 
 	struct zonelist *zonelist = pgdat->node_zonelists + offset;
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>

-- 

  parent reply	other threads:[~2008-01-16 17:13 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-16 17:09 [PATCH 00/10] x86: Reduce memory and intra-node effects with large count NR_CPUs V3 travis
2008-01-16 17:09 ` [PATCH 01/10] x86: Change size of APICIDs from u8 to u16 V3 travis
2008-01-16 17:09 ` [PATCH 02/10] x86: Change size of node ids " travis
2008-01-16 17:53   ` Eric Dumazet
2008-01-16 18:10     ` Mike Travis
2008-01-16 19:16     ` Mike Travis
2008-01-16 20:37       ` Eric Dumazet
2008-01-16 17:09 ` [PATCH 03/10] x86: Change NR_CPUS arrays in powernow-k8 V3 travis
2008-01-16 17:09 ` [PATCH 04/10] x86: Change NR_CPUS arrays in intel_cacheinfo V3 travis
2008-01-16 17:09 ` [PATCH 05/10] x86: Change NR_CPUS arrays in smpboot_64 V3 travis
2008-01-16 17:09 ` [PATCH 06/10] x86: Change NR_CPUS arrays in topology V3 travis
2008-01-16 17:09 ` [PATCH 07/10] x86: Cleanup x86_cpu_to_apicid references V3 travis
2008-01-16 17:09 ` travis [this message]
2008-01-16 17:09 ` [PATCH 09/10] x86: Change NR_CPUS arrays in acpi-cpufreq V3 travis
2008-01-16 17:09 ` [PATCH 10/10] x86: Change bios_cpu_apicid to percpu data variable V3 travis
2008-01-16 18:01 ` [PATCH 00/10] x86: Reduce memory and intra-node effects with large count NR_CPUs V3 Frans Pop
2008-01-16 18:14   ` Mike Travis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080116170903.114188000@sgi.com \
    --to=travis@sgi.com \
    --cc=ak@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=clameter@sgi.com \
    --cc=dada1@cosmosbay.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=steiner@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).