LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Yinghai Lu <yinghai@kernel.org>
To: Tejun Heo <tj@kernel.org>
Cc: mingo@redhat.com, hpa@zytor.com, tglx@linutronix.de,
rientjes@google.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 09/14] x86-32, NUMA: Move lowmem address space reservation to init_alloc_remap()
Date: Tue, 05 Apr 2011 11:41:09 -0700 [thread overview]
Message-ID: <4D9B6245.9000600@kernel.org> (raw)
In-Reply-To: <1301955840-7246-10-git-send-email-tj@kernel.org>
On 04/04/2011 03:23 PM, Tejun Heo wrote:
> Remap alloc init is done in the following stages.
>
> 1. init_alloc_remap() calculates how much memory is necessary for each
> node and reserves node local memory.
>
> 2. initmem_init() collects how much each node needs and reserves a
> single contiguous lowmem area which can contain all.
>
> 3. init_remap_allocator() initializes allocator parameters from the
> determined lowmem address and per-node offsets.
>
> 4. Actual remap happens.
>
> There is no reason for the lowmem remap area to be reserved as a
> single contiguous area at one go. They don't interact with each other
> and the memblock allocator will put them side-by-side anyway.
>
> This patch breaks up the single lowmem address reservation and put
> per-node lowmem address reservation into init_alloc_remap() and
> initializes allocator parameters directly in the function as all the
> addresses are determined there. This merges steps 2 and 3 into 1.
>
> While at it, remove now largely irrelevant comments in
> init_alloc_remap().
>
> This change causes the following behavior changes.
>
> * Remap lowmem areas are allocated in smaller per-node chunks.
>
> * Remap lowmem area reservation failure fail future remap allocations
> instead of panicking.
>
> * Remap allocator initialization is less verbose.
>
> Signed-off-by: Tejun Heo<tj@kernel.org>
> Cc: Yinghai Lu<yinghai@kernel.org>
> Cc: David Rientjes<rientjes@google.com>
> Cc: Thomas Gleixner<tglx@linutronix.de
> Cc: Ingo Molnar<mingo@redhat.com>
> Cc: "H. Peter Anvin"<hpa@zytor.com>
> ---
> arch/x86/mm/numa_32.c | 82 +++++++++++++++----------------------------------
> 1 files changed, 25 insertions(+), 57 deletions(-)
>
> diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
> index c127543..12bb34c 100644
> --- a/arch/x86/mm/numa_32.c
> +++ b/arch/x86/mm/numa_32.c
> @@ -108,9 +108,6 @@ static unsigned long node_remap_size[MAX_NUMNODES];
> static void *node_remap_start_vaddr[MAX_NUMNODES];
> void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
>
> -static unsigned long kva_start_pfn;
> -static unsigned long kva_pages;
> -
> int __cpuinit numa_cpu_node(int cpu)
> {
> return apic->x86_32_numa_cpu_node(cpu);
> @@ -266,7 +263,8 @@ void resume_map_numa_kva(pgd_t *pgd_base)
> static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
> {
> unsigned long size;
> - u64 node_pa;
> + u64 node_pa, remap_pa;
> + void *remap_va;
>
> /*
> * The acpi/srat node info can show hot-add memroy zones where
> @@ -287,6 +285,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
> size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
> size = ALIGN(size, LARGE_PAGE_BYTES);
>
> + /* allocate node memory and the lowmem remap area */
> node_pa = memblock_find_in_range(node_start_pfn[nid]<< PAGE_SHIFT,
> (u64)node_end_pfn[nid]<< PAGE_SHIFT,
> size, LARGE_PAGE_BYTES);
> @@ -295,45 +294,35 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
> size, nid);
> return 0;
> }
> + memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
> +
> + remap_pa = memblock_find_in_range(min_low_pfn<< PAGE_SHIFT,
> + max_low_pfn<< PAGE_SHIFT,
> + size, LARGE_PAGE_BYTES);
> + if (remap_pa == MEMBLOCK_ERROR) {
> + pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
> + size, nid);
> + memblock_x86_free_range(node_pa, node_pa + size);
> + return 0;
> + }
> + memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
> + remap_va = phys_to_virt(remap_pa);
>
> + /* initialize remap allocator parameters */
> + node_remap_start_pfn[nid] = node_pa>> PAGE_SHIFT;
> node_remap_size[nid] = size>> PAGE_SHIFT;
> node_remap_offset[nid] = offset;
> - printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
> - size>> PAGE_SHIFT, nid, node_pa>> PAGE_SHIFT);
>
> - /*
> - * prevent kva address below max_low_pfn want it on system
> - * with less memory later.
> - * layout will be: KVA address , KVA RAM
> - *
> - * we are supposed to only record the one less then
> - * max_low_pfn but we could have some hole in high memory,
> - * and it will only check page_is_ram(pfn)&&
> - * !page_is_reserved_early(pfn) to decide to use it as free.
> - * So memblock_x86_reserve_range here, hope we don't run out
> - * of that array
> - */
> - memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
> + node_remap_start_vaddr[nid] = remap_va;
> + node_remap_end_vaddr[nid] = remap_va + size;
> + node_remap_alloc_vaddr[nid] = remap_va + ALIGN(sizeof(pg_data_t), PAGE_SIZE);
>
> - node_remap_start_pfn[nid] = node_pa>> PAGE_SHIFT;
> + printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
> + nid, node_pa, node_pa + size, remap_va, remap_va + size);
>
> return size>> PAGE_SHIFT;
> }
>
> -static void init_remap_allocator(int nid)
> -{
> - node_remap_start_vaddr[nid] = pfn_to_kaddr(
> - kva_start_pfn + node_remap_offset[nid]);
> - node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
> - (node_remap_size[nid] * PAGE_SIZE);
> - node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
> - ALIGN(sizeof(pg_data_t), PAGE_SIZE);
> -
> - printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
> - (ulong) node_remap_start_vaddr[nid],
> - (ulong) node_remap_end_vaddr[nid]);
> -}
> -
> void __init initmem_init(void)
> {
> unsigned long reserve_pages = 0;
> @@ -352,25 +341,7 @@ void __init initmem_init(void)
>
> for_each_online_node(nid)
> reserve_pages += init_alloc_remap(nid, reserve_pages);
> - kva_pages = roundup(reserve_pages, PTRS_PER_PTE);
> - printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
> - reserve_pages);
> -
> - kva_start_pfn = memblock_find_in_range(min_low_pfn<< PAGE_SHIFT,
> - max_low_pfn<< PAGE_SHIFT,
> - kva_pages<< PAGE_SHIFT,
> - PTRS_PER_PTE<< PAGE_SHIFT)>> PAGE_SHIFT;
> - if (kva_start_pfn == MEMBLOCK_ERROR)
> - panic("Can not get kva space\n");
> -
> - printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
> - kva_start_pfn, max_low_pfn);
> - printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
> -
> - /* avoid clash with initrd */
> - memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
> - (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
> - "KVA PG");
> +
> #ifdef CONFIG_HIGHMEM
> highstart_pfn = highend_pfn = max_pfn;
> if (max_pfn> max_low_pfn)
> @@ -390,11 +361,8 @@ void __init initmem_init(void)
>
> printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
> (ulong) pfn_to_kaddr(max_low_pfn));
> - for_each_online_node(nid) {
> - init_remap_allocator(nid);
> -
> + for_each_online_node(nid)
> allocate_pgdat(nid);
> - }
> remap_numa_kva();
>
> printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
Acked-by: Yinghai Lu <yinghai@kernel.org>
next prev parent reply other threads:[~2011-04-05 18:41 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-04-04 22:23 [PATCHSET tip:x86/mm] x86-32, NUMA: Clean up alloc_remap Tejun Heo
2011-04-04 22:23 ` [PATCH 01/14] x86-32, NUMA: Fix failure condition check in alloc_remap() Tejun Heo
2011-04-05 17:20 ` Yinghai Lu
2011-04-07 2:03 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 02/14] x86-32, NUMA: Align pgdat size while initializing alloc_remap Tejun Heo
2011-04-05 17:28 ` Yinghai Lu
2011-04-07 2:04 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 03/14] x86-32, NUMA: Remove redundant top-down alloc code from remap initialization Tejun Heo
2011-04-05 17:37 ` Yinghai Lu
2011-04-07 2:04 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 04/14] x86-32, NUMA: Reorganize calculate_numa_remap_page() Tejun Heo
2011-04-05 17:42 ` Yinghai Lu
2011-04-07 2:05 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 05/14] x86-32, NUMA: Rename @node_kva to @node_pa in init_alloc_remap() Tejun Heo
2011-04-05 17:44 ` Yinghai Lu
2011-04-07 2:05 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 06/14] x86-32, NUMA: Make @size in init_aloc_remap() represent bytes Tejun Heo
2011-04-05 17:54 ` Yinghai Lu
2011-04-07 2:05 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 07/14] x86-32, NUMA: Calculate remap size in common code Tejun Heo
2011-04-05 18:04 ` Yinghai Lu
2011-04-07 2:06 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 08/14] x86-32, NUMA: Make init_alloc_remap() less panicky Tejun Heo
2011-04-07 2:06 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 09/14] x86-32, NUMA: Move lowmem address space reservation to init_alloc_remap() Tejun Heo
2011-04-05 18:41 ` Yinghai Lu [this message]
2011-04-07 2:07 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 10/14] x86-32, NUMA: Move remapping for remap allocator into init_alloc_remap() Tejun Heo
2011-04-05 18:59 ` Yinghai Lu
2011-04-07 2:07 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 11/14] x86-32, NUMA: Make pgdat allocation use alloc_remap() Tejun Heo
2011-04-05 19:14 ` Yinghai Lu
2011-04-07 2:08 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 12/14] x86-32, NUMA: Remove now useless node_remap_offset[] Tejun Heo
2011-04-05 19:15 ` Yinghai Lu
2011-04-07 2:08 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:23 ` [PATCH 13/14] x86-32, NUMA: Remove redundant node_remap_size[] Tejun Heo
2011-04-07 2:08 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
2011-04-04 22:24 ` [PATCH 14/14] x86-32, NUMA: Update remap allocator comments Tejun Heo
2011-04-07 2:09 ` [tip:x86/numa] x86-32, numa: " tip-bot for Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4D9B6245.9000600@kernel.org \
--to=yinghai@kernel.org \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=rientjes@google.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).