LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] x86: trim mtrr don't close gap for resource allocation.
[not found] ` <200803181255.10402.yhlu.kernel@gmail.com>
@ 2008-03-18 23:44 ` Yinghai Lu
2008-03-21 10:44 ` Ingo Molnar
0 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-03-18 23:44 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar, H. Peter Anvin
Cc: steve, Jesse Barnes, kernel list
[PATCH] x86: trim mtrr don't close gap for resource allocation.
for
http://bugzilla.kernel.org/show_bug.cgi?id=10232
use update_memory_range instead of add_memory_range directly
to avoid closing that gap.
Signed-off-by: Yinghai Lu <yhlu.kenrel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,7 +711,8 @@ int __init mtrr_trim_uncached_memory(uns
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
- add_memory_region(trim_start, trim_size, E820_RESERVED);
+ update_memory_range(trim_start, trim_size, E820_RAM,
+ E820_RESERVED);
update_e820();
return 1;
}
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -749,6 +749,32 @@ static int __init parse_memmap(char *arg
return 0;
}
early_param("memmap", parse_memmap);
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ int i;
+
+ BUG_ON(old_type == new_type);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 final_start, final_end;
+ if (ei->type != old_type)
+ continue;
+ /* totally covered? */
+ if (ei->addr >= start && ei->size <= size) {
+ ei->type = new_type;
+ continue;
+ }
+ /* partially covered */
+ final_start = max(start, ei->addr);
+ final_end = min(start + size, ei->addr + ei->size);
+ if (final_start >= final_end)
+ continue;
+ add_memory_region(final_start, final_end - final_start,
+ new_type);
+ }
+}
void __init update_e820(void)
{
u8 nr_map;
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -735,6 +735,33 @@ void __init finish_e820_parsing(void)
}
}
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ int i;
+
+ BUG_ON(old_type == new_type);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 final_start, final_end;
+ if (ei->type != old_type)
+ continue;
+ /* totally covered? */
+ if (ei->addr >= start && ei->size <= size) {
+ ei->type = new_type;
+ continue;
+ }
+ /* partially covered */
+ final_start = max(start, ei->addr);
+ final_end = min(start + size, ei->addr + ei->size);
+ if (final_start >= final_end)
+ continue;
+ add_memory_region(final_start, final_end - final_start,
+ new_type);
+ }
+}
+
void __init update_e820(void)
{
u8 nr_map;
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -28,6 +28,8 @@ extern void find_max_pfn(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
extern void add_memory_region(unsigned long long start,
unsigned long long size, int type);
+extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type);
extern void e820_register_memory(void);
extern void limit_regions(unsigned long long size);
extern void print_memory_map(char *who);
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -18,6 +18,8 @@ extern unsigned long find_e820_area(unsi
unsigned long size, unsigned long align);
extern void add_memory_region(unsigned long start, unsigned long size,
int type);
+extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 02/12] mm: fix boundary checking in free_bootmem_core fix
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
[not found] ` <200803181255.10402.yhlu.kernel@gmail.com>
@ 2008-03-19 21:03 ` Yinghai Lu
2008-03-19 21:03 ` [PATCH 03/12] x86_64: free_bootmem should take phys Yinghai Lu
` (9 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:03 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] mm: fix boundary checking in free_bootmem_core fix
[PATCH] mm: fix boundary checking in free_bootmem_core
make the free_bootmem_core could handle out of range case. we could use
bdata_list to make sure the range can be freed for sure.
so next time, we don't need to loop online nodes and could use
free_bootmem directly.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -432,7 +432,9 @@ int __init reserve_bootmem(unsigned long
void __init free_bootmem(unsigned long addr, unsigned long size)
{
- free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
+ bootmem_data_t *bdata;
+ list_for_each_entry(bdata, &bdata_list, list)
+ free_bootmem_core(bdata, addr, size);
}
unsigned long __init free_all_bootmem(void)
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 03/12] x86_64: free_bootmem should take phys
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
[not found] ` <200803181255.10402.yhlu.kernel@gmail.com>
2008-03-19 21:03 ` [PATCH 02/12] mm: fix boundary checking in free_bootmem_core fix Yinghai Lu
@ 2008-03-19 21:03 ` Yinghai Lu
2008-03-19 21:03 ` [PATCH 04/12] x86_64: reserve dma32 early for gart Yinghai Lu
` (8 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:03 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] x86_64: free_bootmem should take phys
so use nodedata_phys directly.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -221,8 +221,7 @@ void __init setup_node_bootmem(int nodei
bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
if (bootmap == NULL) {
if (nodedata_phys < start || nodedata_phys >= end)
- free_bootmem((unsigned long)node_data[nodeid],
- pgdat_size);
+ free_bootmem(nodedata_phys, pgdat_size);
node_data[nodeid] = NULL;
return;
}
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 04/12] x86_64: reserve dma32 early for gart
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (2 preceding siblings ...)
2008-03-19 21:03 ` [PATCH 03/12] x86_64: free_bootmem should take phys Yinghai Lu
@ 2008-03-19 21:03 ` Yinghai Lu
2008-03-19 21:04 ` [PATCH 05/12] mm: make mem_map allocation continuous Yinghai Lu
` (7 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:03 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] x86_64: reserve dma32 early for gart
one system with 256g when numa is disabled said:
Your BIOS doesn't leave a aperture memory hole
Please enable the IOMMU option in the BIOS setup
This costs you 64 MB of RAM
Cannot allocate aperture memory hole (ffff8101c0000000,65536K)
Kernel panic - not syncing: Not enough memory for aperture
Pid: 0, comm: swapper Not tainted 2.6.25-rc4-x86-latest.git #33
Call Trace:
[<ffffffff84037c62>] panic+0xb2/0x190
[<ffffffff840381fc>] ? release_console_sem+0x7c/0x250
[<ffffffff847b1628>] ? __alloc_bootmem_nopanic+0x48/0x90
[<ffffffff847b0ac9>] ? free_bootmem+0x29/0x50
[<ffffffff847ac1f7>] gart_iommu_hole_init+0x5e7/0x680
[<ffffffff847b255b>] ? alloc_large_system_hash+0x16b/0x310
[<ffffffff84506a2f>] ? _etext+0x0/0x1
[<ffffffff847a2e8c>] pci_iommu_alloc+0x1c/0x40
[<ffffffff847ac795>] mem_init+0x45/0x1a0
[<ffffffff8479ff35>] start_kernel+0x295/0x380
[<ffffffff8479f1c2>] _sinittext+0x1c2/0x230
the root cause is : memmap PMD is too big,
[ffffe200e0600000-ffffe200e07fffff] PMD ->ffff81383c000000 on node 0
almost near 4G..., and vmemmap_alloc_block will use up the ram under 4G.
solution will be:
1. make memmap allocation get memory above 4G...
2. reserve some dma32 range early before we try to set up memmap for all.
and release that before pci_iommu_alloc, so gart or swiotlb could get some
range under 4g limit for sure.
the patch is using method 2.
because method1 may need more code to handle SPARSEMEM and SPASEMEM_VMEMMAP
will get
Your BIOS doesn't leave a aperture memory hole
Please enable the IOMMU option in the BIOS setup
This costs you 64 MB of RAM
Mapping aperture over 65536 KB of RAM @ 4000000
Memory: 264245736k/268959744k available (8484k kernel code, 4187464k reserved, 4004k data, 724k init)
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/pci-dma_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/pci-dma_64.c
+++ linux-2.6/arch/x86/kernel/pci-dma_64.c
@@ -8,6 +8,8 @@
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/dmar.h>
+#include <linux/bootmem.h>
+#include <asm/proto.h>
#include <asm/io.h>
#include <asm/gart.h>
#include <asm/calgary.h>
@@ -286,8 +288,53 @@ static __init int iommu_setup(char *p)
}
early_param("iommu", iommu_setup);
+static __initdata void *dma32_bootmem_ptr;
+static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
+
+static int __init parse_dma32_size_opt(char *p)
+{
+ if (!p)
+ return -EINVAL;
+ dma32_bootmem_size = memparse(p, &p);
+ return 0;
+}
+early_param("dma32_size", parse_dma32_size_opt);
+
+void __init dma32_reserve_bootmem(void)
+{
+ unsigned long size, align;
+ if (end_pfn <= MAX_DMA32_PFN)
+ return;
+
+ align = 64ULL<<20;
+ size = round_up(dma32_bootmem_size, align);
+ dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
+ __pa(MAX_DMA_ADDRESS));
+ if (dma32_bootmem_ptr)
+ dma32_bootmem_size = size;
+ else
+ dma32_bootmem_size = 0;
+}
+static void __init dma32_free_bootmem(void)
+{
+ int node;
+
+ if (end_pfn <= MAX_DMA32_PFN)
+ return;
+
+ if (!dma32_bootmem_ptr)
+ return;
+
+ free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size);
+
+ dma32_bootmem_ptr = NULL;
+ dma32_bootmem_size = 0;
+}
+
void __init pci_iommu_alloc(void)
{
+ /* free the range so iommu could get some range less than 4G */
+ dma32_free_bootmem();
/*
* The order of these functions is important for
* fall-back/fail-over reasons
Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -397,6 +397,8 @@ void __init setup_arch(char **cmdline_p)
early_res_to_bootmem();
+ dma32_reserve_bootmem();
+
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
Index: linux-2.6/include/asm-x86/pci_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/pci_64.h
+++ linux-2.6/include/asm-x86/pci_64.h
@@ -25,6 +25,7 @@ extern int (*pci_config_write)(int seg,
+extern void dma32_reserve_bootmem(void);
extern void pci_iommu_alloc(void);
/* The PCI address space does equal the physical memory
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 05/12] mm: make mem_map allocation continuous.
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (3 preceding siblings ...)
2008-03-19 21:03 ` [PATCH 04/12] x86_64: reserve dma32 early for gart Yinghai Lu
@ 2008-03-19 21:04 ` Yinghai Lu
2008-03-19 21:04 ` [PATCH 06/12] mm: fix alloc_bootmem_core to use fast searching for all nodes Yinghai Lu
` (6 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:04 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] mm: make mem_map allocation continuous.
vmemmap allocation current got
[ffffe20000000000-ffffe200001fffff] PMD ->ffff810001400000 on node 0
[ffffe20000200000-ffffe200003fffff] PMD ->ffff810001800000 on node 0
[ffffe20000400000-ffffe200005fffff] PMD ->ffff810001c00000 on node 0
[ffffe20000600000-ffffe200007fffff] PMD ->ffff810002000000 on node 0
[ffffe20000800000-ffffe200009fffff] PMD ->ffff810002400000 on node 0
...
there is 2M hole between them.
the rootcause is that usemap (24 bytes) will be allocated after every 2M
mem_map. and it will push next vmemmap (2M) to next align (2M).
solution:
try to allocate mem_map continously.
after patch, will get
[ffffe20000000000-ffffe200001fffff] PMD ->ffff810001400000 on node 0
[ffffe20000200000-ffffe200003fffff] PMD ->ffff810001600000 on node 0
[ffffe20000400000-ffffe200005fffff] PMD ->ffff810001800000 on node 0
[ffffe20000600000-ffffe200007fffff] PMD ->ffff810001a00000 on node 0
[ffffe20000800000-ffffe200009fffff] PMD ->ffff810001c00000 on node 0
...
and usemap will share in page because of they are allocated continuously too.
sparse_early_usemap_alloc: usemap = ffff810024e00000 size = 24
sparse_early_usemap_alloc: usemap = ffff810024e00080 size = 24
sparse_early_usemap_alloc: usemap = ffff810024e00100 size = 24
sparse_early_usemap_alloc: usemap = ffff810024e00180 size = 24
...
so we make the bootmem allocation more compact and use less memory for usemap.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/mm/sparse.c
===================================================================
--- linux-2.6.orig/mm/sparse.c
+++ linux-2.6/mm/sparse.c
@@ -285,6 +286,8 @@ struct page __init *sparse_early_mem_map
return NULL;
}
+/* section_map pointer array is 64k */
+static __initdata struct page *section_map[NR_MEM_SECTIONS];
/*
* Allocate the accumulated non-linear sections, allocate a mem_map
* for each and record the physical to section mapping.
@@ -295,14 +298,29 @@ void __init sparse_init(void)
struct page *map;
unsigned long *usemap;
+ /*
+ * map is using big page (aka 2M in x86 64 bit)
+ * usemap is less one page (aka 24 bytes)
+ * so alloc 2M (with 2M align) and 24 bytes in turn will
+ * make next 2M slip to one more 2M later.
+ * then in big system, the memmory will have a lot hole...
+ * here try to allocate 2M pages continously.
+ */
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
if (!present_section_nr(pnum))
continue;
+ section_map[pnum] = sparse_early_mem_map_alloc(pnum);
+ }
- map = sparse_early_mem_map_alloc(pnum);
- if (!map)
+
+ for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+ if (!present_section_nr(pnum))
continue;
+ map = section_map[pnum];
+ if (!map)
+ continue;
+
usemap = sparse_early_usemap_alloc(pnum);
if (!usemap)
continue;
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 06/12] mm: fix alloc_bootmem_core to use fast searching for all nodes
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (4 preceding siblings ...)
2008-03-19 21:04 ` [PATCH 05/12] mm: make mem_map allocation continuous Yinghai Lu
@ 2008-03-19 21:04 ` Yinghai Lu
2008-03-19 21:04 ` [PATCH 07/12] mm: offset align in alloc_bootmem v3 Yinghai Lu
` (5 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:04 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] mm: fix alloc_bootmem_core to use fast searching for all nodes
make the nodes other than node 0 could use bdata->last_success for fast search too.
we need to use __alloc_bootmem_core for vmemmap allocation for other nodes when
numa and sparsemem/vmemmap are enabled.
also make fail_block path increase i with incr only needed after ALIGN to avoid
extra increase when size is large than align.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -238,28 +238,32 @@ __alloc_bootmem_core(struct bootmem_data
* We try to allocate bootmem pages above 'goal'
* first, then we try to allocate lower pages.
*/
- if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
- preferred = goal - bdata->node_boot_start;
+ preferred = 0;
+ if (goal && PFN_DOWN(goal) < end_pfn) {
+ if (goal > bdata->node_boot_start)
+ preferred = goal - bdata->node_boot_start;
if (bdata->last_success >= preferred)
if (!limit || (limit && limit > bdata->last_success))
preferred = bdata->last_success;
- } else
- preferred = 0;
+ }
preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
incr = align >> PAGE_SHIFT ? : 1;
restart_scan:
- for (i = preferred; i < eidx; i += incr) {
+ for (i = preferred; i < eidx;) {
unsigned long j;
+
i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
i = ALIGN(i, incr);
if (i >= eidx)
break;
- if (test_bit(i, bdata->node_bootmem_map))
+ if (test_bit(i, bdata->node_bootmem_map)) {
+ i += incr;
continue;
+ }
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
@@ -270,6 +274,8 @@ restart_scan:
goto found;
fail_block:
i = ALIGN(j, incr);
+ if (i == j)
+ i += incr;
}
if (preferred > offset) {
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 07/12] mm: offset align in alloc_bootmem v3
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (5 preceding siblings ...)
2008-03-19 21:04 ` [PATCH 06/12] mm: fix alloc_bootmem_core to use fast searching for all nodes Yinghai Lu
@ 2008-03-19 21:04 ` Yinghai Lu
2008-03-19 21:04 ` [PATCH 08/12] mm: allocate section_map for sparse_init Yinghai Lu
` (4 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:04 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] mm: offset align in alloc_bootmem v3
need offset alignment when node_boot_start's alignment is less than
align required
use local node_boot_start to match align. so don't add extra opteration in
search loop.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -206,9 +206,11 @@ void * __init
__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal, unsigned long limit)
{
- unsigned long offset, remaining_size, areasize, preferred;
+ unsigned long areasize, preferred;
unsigned long i, start = 0, incr, eidx, end_pfn;
void *ret;
+ unsigned long node_boot_start;
+ void *node_bootmem_map;
if (!size) {
printk("__alloc_bootmem_core(): zero-sized request\n");
@@ -216,23 +218,29 @@ __alloc_bootmem_core(struct bootmem_data
}
BUG_ON(align & (align-1));
- if (limit && bdata->node_boot_start >= limit)
- return NULL;
-
/* on nodes without memory - bootmem_map is NULL */
if (!bdata->node_bootmem_map)
return NULL;
+ /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
+ node_boot_start = bdata->node_boot_start;
+ node_bootmem_map = bdata->node_bootmem_map;
+ if (align) {
+ node_boot_start = ALIGN(bdata->node_boot_start, align);
+ if (node_boot_start > bdata->node_boot_start)
+ node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
+ PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
+ }
+
+ if (limit && node_boot_start >= limit)
+ return NULL;
+
end_pfn = bdata->node_low_pfn;
limit = PFN_DOWN(limit);
if (limit && end_pfn > limit)
end_pfn = limit;
- eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
- offset = 0;
- if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
- offset = align - (bdata->node_boot_start & (align - 1UL));
- offset = PFN_DOWN(offset);
+ eidx = end_pfn - PFN_DOWN(node_boot_start);
/*
* We try to allocate bootmem pages above 'goal'
@@ -240,15 +248,16 @@ __alloc_bootmem_core(struct bootmem_data
*/
preferred = 0;
if (goal && PFN_DOWN(goal) < end_pfn) {
- if (goal > bdata->node_boot_start)
- preferred = goal - bdata->node_boot_start;
+ if (goal > node_boot_start)
+ preferred = goal - node_boot_start;
- if (bdata->last_success >= preferred)
+ if (bdata->last_success > node_boot_start &&
+ bdata->last_success - node_boot_start >= preferred)
if (!limit || (limit && limit > bdata->last_success))
- preferred = bdata->last_success;
+ preferred = bdata->last_success - node_boot_start;
}
- preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+ preferred = PFN_DOWN(ALIGN(preferred, align));
areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
incr = align >> PAGE_SHIFT ? : 1;
@@ -256,18 +265,18 @@ restart_scan:
for (i = preferred; i < eidx;) {
unsigned long j;
- i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+ i = find_next_zero_bit(node_bootmem_map, eidx, i);
i = ALIGN(i, incr);
if (i >= eidx)
break;
- if (test_bit(i, bdata->node_bootmem_map)) {
+ if (test_bit(i, node_bootmem_map)) {
i += incr;
continue;
}
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
- if (test_bit(j, bdata->node_bootmem_map))
+ if (test_bit(j, node_bootmem_map))
goto fail_block;
}
start = i;
@@ -278,14 +287,14 @@ restart_scan:
i += incr;
}
- if (preferred > offset) {
- preferred = offset;
+ if (preferred > 0) {
+ preferred = 0;
goto restart_scan;
}
return NULL;
found:
- bdata->last_success = PFN_PHYS(start);
+ bdata->last_success = PFN_PHYS(start) + node_boot_start;
BUG_ON(start >= eidx);
/*
@@ -295,6 +304,7 @@ found:
*/
if (align < PAGE_SIZE &&
bdata->last_offset && bdata->last_pos+1 == start) {
+ unsigned long offset, remaining_size;
offset = ALIGN(bdata->last_offset, align);
BUG_ON(offset > PAGE_SIZE);
remaining_size = PAGE_SIZE - offset;
@@ -303,14 +313,12 @@ found:
/* last_pos unchanged */
bdata->last_offset = offset + size;
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
- offset +
- bdata->node_boot_start);
+ offset + node_boot_start);
} else {
remaining_size = size - remaining_size;
areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
- offset +
- bdata->node_boot_start);
+ offset + node_boot_start);
bdata->last_pos = start + areasize - 1;
bdata->last_offset = remaining_size;
}
@@ -318,14 +326,14 @@ found:
} else {
bdata->last_pos = start + areasize - 1;
bdata->last_offset = size & ~PAGE_MASK;
- ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+ ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
}
/*
* Reserve the area now:
*/
for (i = start; i < start + areasize; i++)
- if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
+ if (unlikely(test_and_set_bit(i, node_bootmem_map)))
BUG();
memset(ret, 0, size);
return ret;
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 08/12] mm: allocate section_map for sparse_init
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (6 preceding siblings ...)
2008-03-19 21:04 ` [PATCH 07/12] mm: offset align in alloc_bootmem v3 Yinghai Lu
@ 2008-03-19 21:04 ` Yinghai Lu
2008-03-19 21:04 ` [PATCH 09/12] mm: make reserve_bootmem can crossed the nodes v2 Yinghai Lu
` (3 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:04 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] mm: allocate section_map for sparse_init
allocate section_map in bootmem instead of using __initdata.
need to apply after
[PATCH] mm: fix boundary checking in free_bootmem_core
[PATCH] mm: make mem_map allocation continuous.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/mm/sparse.c
===================================================================
--- linux-2.6.orig/mm/sparse.c
+++ linux-2.6/mm/sparse.c
@@ -285,8 +285,6 @@ struct page __init *sparse_early_mem_map
return NULL;
}
-/* section_map pointer array is 64k */
-static __initdata struct page *section_map[NR_MEM_SECTIONS];
/*
* Allocate the accumulated non-linear sections, allocate a mem_map
* for each and record the physical to section mapping.
@@ -296,6 +294,9 @@ void __init sparse_init(void)
unsigned long pnum;
struct page *map;
unsigned long *usemap;
+ struct page **section_map;
+ int size;
+ int node;
/*
* map is using big page (aka 2M in x86 64 bit)
@@ -305,13 +306,17 @@ void __init sparse_init(void)
* then in big system, the memmory will have a lot hole...
* here try to allocate 2M pages continously.
*/
+ size = sizeof(struct page *) * NR_MEM_SECTIONS;
+ section_map = alloc_bootmem(size);
+ if (!section_map)
+ panic("can not allocate section_map\n");
+
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
if (!present_section_nr(pnum))
continue;
section_map[pnum] = sparse_early_mem_map_alloc(pnum);
}
-
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
if (!present_section_nr(pnum))
continue;
@@ -327,6 +332,8 @@ void __init sparse_init(void)
sparse_init_one_section(__nr_to_section(pnum), pnum, map,
usemap);
}
+
+ free_bootmem(__pa(section_map), size);
}
#ifdef CONFIG_MEMORY_HOTPLUG
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 09/12] mm: make reserve_bootmem can crossed the nodes v2
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (7 preceding siblings ...)
2008-03-19 21:04 ` [PATCH 08/12] mm: allocate section_map for sparse_init Yinghai Lu
@ 2008-03-19 21:04 ` Yinghai Lu
2008-03-19 21:04 ` [PATCH 10/12] x86_64: make reserve_bootmem_generic to use new reserve_bootmem Yinghai Lu
` (2 subsequent siblings)
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:04 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] mm: make reserve_bootmem can crossed the nodes v2
split reserve_bootmem_core to two function, one check conflicts, and one set bits.
and make reserve_bootmem to loop bdata_list to cross the nodes.
user could be crashkernel and ramdisk..., in case the range cross the nodes
v2, fix out of range check
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -111,44 +111,71 @@ static unsigned long __init init_bootmem
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
-static int __init reserve_bootmem_core(bootmem_data_t *bdata,
+static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
unsigned long addr, unsigned long size, int flags)
{
unsigned long sidx, eidx;
unsigned long i;
- int ret;
+
+ BUG_ON(!size);
+
+ /* out of range, don't hold other */
+ if (addr + size < bdata->node_boot_start ||
+ PFN_DOWN(addr) > bdata->node_low_pfn)
+ return 0;
/*
- * round up, partially reserved pages are considered
- * fully reserved.
+ * Round up to index to the range.
*/
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
+
+ eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+
+ for (i = sidx; i < eidx; i++)
+ if (test_bit(i, bdata->node_bootmem_map)) {
+ if (flags & BOOTMEM_EXCLUSIVE)
+ return -EBUSY;
+ }
+
+ return 0;
+
+}
+static void __init reserve_bootmem_core(bootmem_data_t *bdata,
+ unsigned long addr, unsigned long size, int flags)
+{
+ unsigned long sidx, eidx;
+ unsigned long i;
+
BUG_ON(!size);
- BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
- BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
- BUG_ON(addr < bdata->node_boot_start);
- sidx = PFN_DOWN(addr - bdata->node_boot_start);
+ /* out of range */
+ if (addr + size < bdata->node_boot_start ||
+ PFN_DOWN(addr) > bdata->node_low_pfn)
+ return;
+
+ /*
+ * Round up to index to the range.
+ */
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
+
eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
for (i = sidx; i < eidx; i++)
if (test_and_set_bit(i, bdata->node_bootmem_map)) {
#ifdef CONFIG_DEBUG_BOOTMEM
printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
#endif
- if (flags & BOOTMEM_EXCLUSIVE) {
- ret = -EBUSY;
- goto err;
- }
}
-
- return 0;
-
-err:
- /* unreserve memory we accidentally reserved */
- for (i--; i >= sidx; i--)
- clear_bit(i, bdata->node_bootmem_map);
-
- return ret;
}
static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
@@ -415,6 +442,11 @@ unsigned long __init init_bootmem_node(p
void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
+ int ret;
+
+ ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+ if (ret < 0)
+ return;
reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
}
@@ -440,7 +472,16 @@ unsigned long __init init_bootmem(unsign
int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
- return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags);
+ int ret;
+ bootmem_data_t *bdata;
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ret = can_reserve_bootmem_core(bdata, addr, size, flags);
+ if (ret < 0)
+ return ret;
+ }
+ list_for_each_entry(bdata, &bdata_list, list)
+ reserve_bootmem_core(bdata, addr, size, flags);
+ return 0;
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 10/12] x86_64: make reserve_bootmem_generic to use new reserve_bootmem
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (8 preceding siblings ...)
2008-03-19 21:04 ` [PATCH 09/12] mm: make reserve_bootmem can crossed the nodes v2 Yinghai Lu
@ 2008-03-19 21:04 ` Yinghai Lu
2008-03-21 10:50 ` Ingo Molnar
2008-03-19 21:04 ` [PATCH 11/12] x86_64: do not reserve ramdisk two times Yinghai Lu
2008-03-19 21:05 ` [PATCH 12/12] x86_64: fix setup_node_bootmem to support big mem excluding with memmap Yinghai Lu
11 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:04 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] x86_64: make reserve_bootmem_generic to use new reserve_bootmem
[PATCH] mm: make reserve_bootmem can crossed the nodes
provides new resever_bootmem, let reserve_bootmem_generic use that.
acctualy reserve_bootme_generic is used to reserve initramdisk.
so we can make sure even bootloader or kexec load that cross the nodes, reserve_bootmem
still works.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -648,7 +648,7 @@ void free_initrd_mem(unsigned long start
void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
{
#ifdef CONFIG_NUMA
- int nid = phys_to_nid(phys);
+ int nid, next_nid;
#endif
unsigned long pfn = phys >> PAGE_SHIFT;
@@ -667,10 +667,14 @@ void __init reserve_bootmem_generic(unsi
/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
+ nid = phys_to_nid(phys);
+ next_nid = phys_to_nid(phys + len - 1);
+ if (nid == next_nid)
reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
-#else
- reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ else
#endif
+ reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
dma_reserve += len / PAGE_SIZE;
set_dma_reserve(dma_reserve);
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 11/12] x86_64: do not reserve ramdisk two times
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (9 preceding siblings ...)
2008-03-19 21:04 ` [PATCH 10/12] x86_64: make reserve_bootmem_generic to use new reserve_bootmem Yinghai Lu
@ 2008-03-19 21:04 ` Yinghai Lu
2008-03-19 21:05 ` [PATCH 12/12] x86_64: fix setup_node_bootmem to support big mem excluding with memmap Yinghai Lu
11 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:04 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] x86_64: do not reserve ramdisk two times
ramdisk is reserved via reserve_early in x86_64_start_kernel,
later early_res_to_bootmem() will convert to reservation in bootmem.
so don't need to reserve that again.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c
+++ linux-2.6/arch/x86/kernel/head64.c
@@ -110,6 +110,7 @@ void __init x86_64_start_kernel(char * r
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -117,6 +118,7 @@ void __init x86_64_start_kernel(char * r
unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
+#endif
reserve_ebda();
Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -421,11 +421,14 @@ void __init setup_arch(char **cmdline_p)
unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
if (ramdisk_end <= end_of_mem) {
- reserve_bootmem_generic(ramdisk_image, ramdisk_size);
+ /*
+ * don't need to reserve again, already reserved early
+ * in x86_64_start_kernel, and early_res_to_bootmem
+ * convert that to reserved in bootmem
+ */
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
} else {
- /* Assumes everything on node 0 */
free_bootmem(ramdisk_image, ramdisk_size);
printk(KERN_ERR "initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 12/12] x86_64: fix setup_node_bootmem to support big mem excluding with memmap
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
` (10 preceding siblings ...)
2008-03-19 21:04 ` [PATCH 11/12] x86_64: do not reserve ramdisk two times Yinghai Lu
@ 2008-03-19 21:05 ` Yinghai Lu
2008-03-21 10:52 ` Ingo Molnar
11 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-03-19 21:05 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar; +Cc: Christoph Lameter, kernel list
[PATCH] x86_64: fix setup_node_bootmem to support big mem excluding with memmap
typical case: four sockets system, every node has 4g ram, and we are using
memmap=10g$4g to mask out memory on node1 and node2
when numa is enabled, early_node_mem is used to get node_data and node_bootmap
if it can not get from same node with find_e820_area, it will use alloc_bootmem
to get buff from previous nodes.
so check it and issue info about it.
need to move early_res_to_bootmem into every setup_node_bootmem.
and it takes range that node has. otherwise alloc_bootmem could return addr
that reserved early.
need to apply it after
[PATCH] mm: make reserve_bootmem can crossed the nodes
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -188,6 +188,7 @@ void __init setup_node_bootmem(int nodei
unsigned long bootmap_start, nodedata_phys;
void *bootmap;
const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+ int nid;
start = round_up(start, ZONE_ALIGN);
@@ -210,9 +211,20 @@ void __init setup_node_bootmem(int nodei
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
- /* Find a place for the bootmem map */
+ /*
+ * Find a place for the bootmem map
+ * nodedata_phys could be on other nodes by alloc_bootmem,
+ * so need to sure bootmap_start not to be small, otherwise
+ * early_node_mem will get that with find_e820_area instead
+ * of alloc_bootmem, that could clash with reserved range
+ */
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+ nid = phys_to_nid(nodedata_phys);
+ if (nid == nodeid)
+ bootmap_start = round_up(nodedata_phys + pgdat_size,
+ PAGE_SIZE);
+ else
+ bootmap_start = round_up(start, PAGE_SIZE);
/*
* SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
@@ -237,10 +249,29 @@ void __init setup_node_bootmem(int nodei
free_bootmem_with_active_regions(nodeid, end);
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
- BOOTMEM_DEFAULT);
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
- bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+ /*
+ * convert early reserve to bootmem reserve earlier
+ * otherwise early_node_mem could use early reserved mem
+ * on previous node
+ */
+ early_res_to_bootmem(start, end);
+
+ /*
+ * in some case early_node_mem could use alloc_bootmem
+ * to get range on other node, don't reserve that again
+ */
+ if (nid != nodeid)
+ printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
+ else
+ reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
+ pgdat_size, BOOTMEM_DEFAULT);
+ nid = phys_to_nid(bootmap_start);
+ if (nid != nodeid)
+ printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
+ else
+ reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+ bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+
#ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid);
#endif
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -83,14 +83,19 @@ void __init reserve_early(unsigned long
strncpy(r->name, name, sizeof(r->name) - 1);
}
-void __init early_res_to_bootmem(void)
+void __init early_res_to_bootmem(unsigned long start, unsigned long end)
{
int i;
+ unsigned long final_start, final_end;
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
struct early_res *r = &early_res[i];
- printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
- r->start, r->end - 1, r->name);
- reserve_bootmem_generic(r->start, r->end - r->start);
+ final_start = max(start, r->start);
+ final_end = min(end, r->end);
+ if (final_start >= final_end)
+ continue;
+ printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
+ final_start, final_end - 1, r->name);
+ reserve_bootmem_generic(final_start, final_end - final_start);
}
}
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -41,7 +41,7 @@ extern struct e820map e820;
extern void update_e820(void);
extern void reserve_early(unsigned long start, unsigned long end, char *name);
-extern void early_res_to_bootmem(void);
+extern void early_res_to_bootmem(unsigned long start, unsigned long end);
#endif/*!__ASSEMBLY__*/
Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
+ early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
}
#endif
@@ -395,8 +396,6 @@ void __init setup_arch(char **cmdline_p)
contig_initmem_init(0, end_pfn);
#endif
- early_res_to_bootmem();
-
dma32_reserve_bootmem();
#ifdef CONFIG_ACPI_SLEEP
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] x86: trim mtrr don't close gap for resource allocation.
2008-03-18 23:44 ` [PATCH] x86: trim mtrr don't close gap for resource allocation Yinghai Lu
@ 2008-03-21 10:44 ` Ingo Molnar
0 siblings, 0 replies; 17+ messages in thread
From: Ingo Molnar @ 2008-03-21 10:44 UTC (permalink / raw)
To: yhlu.kernel
Cc: Andrew Morton, H. Peter Anvin, steve, Jesse Barnes, kernel list
* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
> [PATCH] x86: trim mtrr don't close gap for resource allocation.
>
> for
> http://bugzilla.kernel.org/show_bug.cgi?id=10232
>
> use update_memory_range instead of add_memory_range directly to avoid
> closing that gap.
thanks Yinghai, applied, and queued up for 2.6.25.
Ingo
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 10/12] x86_64: make reserve_bootmem_generic to use new reserve_bootmem
2008-03-19 21:04 ` [PATCH 10/12] x86_64: make reserve_bootmem_generic to use new reserve_bootmem Yinghai Lu
@ 2008-03-21 10:50 ` Ingo Molnar
0 siblings, 0 replies; 17+ messages in thread
From: Ingo Molnar @ 2008-03-21 10:50 UTC (permalink / raw)
To: yhlu.kernel; +Cc: Andrew Morton, Christoph Lameter, kernel list
* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
> [PATCH] x86_64: make reserve_bootmem_generic to use new reserve_bootmem
>
> [PATCH] mm: make reserve_bootmem can crossed the nodes
>
> provides new resever_bootmem, let reserve_bootmem_generic use that.
>
> acctualy reserve_bootme_generic is used to reserve initramdisk. so we
> can make sure even bootloader or kexec load that cross the nodes,
> reserve_bootmem still works.
.25 fix, correct? But due to dependency on the mm/bootmem.c change this
patch should go via Andrew.
Acked-by: Ingo Molnar <mingo@elte.hu>
Ingo
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 12/12] x86_64: fix setup_node_bootmem to support big mem excluding with memmap
2008-03-19 21:05 ` [PATCH 12/12] x86_64: fix setup_node_bootmem to support big mem excluding with memmap Yinghai Lu
@ 2008-03-21 10:52 ` Ingo Molnar
0 siblings, 0 replies; 17+ messages in thread
From: Ingo Molnar @ 2008-03-21 10:52 UTC (permalink / raw)
To: yhlu.kernel; +Cc: Andrew Morton, Christoph Lameter, kernel list
* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
> need to apply it after
> [PATCH] mm: make reserve_bootmem can crossed the nodes
this too should go via Andrew due to the mm/bootmem.c dependency. (can
pick it up into x86.git as well if the mm/bootmem.c goes into -git)
Ingo
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] x86: trim mtrr don't close gap for resource allocation
2008-03-17 0:38 [PATCH] x86: trim mtrr don't close gap for resource allocation Yinghai Lu
@ 2008-03-17 16:35 ` Jesse Barnes
0 siblings, 0 replies; 17+ messages in thread
From: Jesse Barnes @ 2008-03-17 16:35 UTC (permalink / raw)
To: Yinghai Lu
Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, steve, Andi Kleen,
Linux Kernel ML
The new function could probably use some higher level comments (though the
rest of the e820 routines seem similarly devoid of description so it's not a
big deal). And given that this patch fixes a regression, it should probably
get upstream quickly.
Thanks for fixing this so quickly, Yinghai.
Acked-by: Jesse Barnes <jesse.barnes@intel.com>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH] x86: trim mtrr don't close gap for resource allocation
@ 2008-03-17 0:38 Yinghai Lu
2008-03-17 16:35 ` Jesse Barnes
0 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-03-17 0:38 UTC (permalink / raw)
To: Andrew Morton, Ingo Molnar, H. Peter Anvin, steve, Jesse Barnes,
Andi Kleen
Cc: Linux Kernel ML
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: update_memory_region.patch --]
[-- Type: text/x-patch; name=update_memory_region.patch, Size: 4067 bytes --]
[PATCH] x86: trim mtrr don't close gap for resource allocation.
for
http://bugzilla.kernel.org/show_bug.cgi?id=10232
use update_memory_range instead of add_memory_range directly
to avoid closing that gap.
Signed-off-by: Yinghai Lu <yhlu.kenrel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,7 +711,8 @@ int __init mtrr_trim_uncached_memory(uns
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
- add_memory_region(trim_start, trim_size, E820_RESERVED);
+ update_memory_range(trim_start, trim_size, E820_RAM,
+ E820_RESERVED);
update_e820();
return 1;
}
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -736,6 +736,32 @@ static int __init parse_memmap(char *arg
return 0;
}
early_param("memmap", parse_memmap);
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ int i;
+
+ BUG_ON(old_type == new_type);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 final_start, final_end;
+ if (ei->type != old_type)
+ continue;
+ /* totally covered? */
+ if (ei->addr >= start && ei->size <= size) {
+ ei->type = new_type;
+ continue;
+ }
+ /* partially covered */
+ final_start = max(start, ei->addr);
+ final_end = min(start + size, ei->addr + ei->size);
+ if (final_start >= final_end)
+ continue;
+ add_memory_region(final_start, final_end - final_start,
+ new_type);
+ }
+}
void __init update_e820(void)
{
u8 nr_map;
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -731,6 +731,33 @@ void __init finish_e820_parsing(void)
}
}
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ int i;
+
+ BUG_ON(old_type == new_type);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 final_start, final_end;
+ if (ei->type != old_type)
+ continue;
+ /* totally covered? */
+ if (ei->addr >= start && ei->size <= size) {
+ ei->type = new_type;
+ continue;
+ }
+ /* partially covered */
+ final_start = max(start, ei->addr);
+ final_end = min(start + size, ei->addr + ei->size);
+ if (final_start >= final_end)
+ continue;
+ add_memory_region(final_start, final_end - final_start,
+ new_type);
+ }
+}
+
void __init update_e820(void)
{
u8 nr_map;
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -28,6 +28,8 @@ extern void find_max_pfn(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
extern void add_memory_region(unsigned long long start,
unsigned long long size, int type);
+extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type);
extern void e820_register_memory(void);
extern void limit_regions(unsigned long long size);
extern void print_memory_map(char *who);
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -18,6 +18,8 @@ extern unsigned long find_e820_area(unsi
unsigned long size, unsigned long align);
extern void add_memory_region(unsigned long start, unsigned long size,
int type);
+extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2008-03-21 10:52 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <200803181237.33861.yhlu.kernel@gmail.com>
[not found] ` <200803181255.10402.yhlu.kernel@gmail.com>
2008-03-18 23:44 ` [PATCH] x86: trim mtrr don't close gap for resource allocation Yinghai Lu
2008-03-21 10:44 ` Ingo Molnar
2008-03-19 21:03 ` [PATCH 02/12] mm: fix boundary checking in free_bootmem_core fix Yinghai Lu
2008-03-19 21:03 ` [PATCH 03/12] x86_64: free_bootmem should take phys Yinghai Lu
2008-03-19 21:03 ` [PATCH 04/12] x86_64: reserve dma32 early for gart Yinghai Lu
2008-03-19 21:04 ` [PATCH 05/12] mm: make mem_map allocation continuous Yinghai Lu
2008-03-19 21:04 ` [PATCH 06/12] mm: fix alloc_bootmem_core to use fast searching for all nodes Yinghai Lu
2008-03-19 21:04 ` [PATCH 07/12] mm: offset align in alloc_bootmem v3 Yinghai Lu
2008-03-19 21:04 ` [PATCH 08/12] mm: allocate section_map for sparse_init Yinghai Lu
2008-03-19 21:04 ` [PATCH 09/12] mm: make reserve_bootmem can crossed the nodes v2 Yinghai Lu
2008-03-19 21:04 ` [PATCH 10/12] x86_64: make reserve_bootmem_generic to use new reserve_bootmem Yinghai Lu
2008-03-21 10:50 ` Ingo Molnar
2008-03-19 21:04 ` [PATCH 11/12] x86_64: do not reserve ramdisk two times Yinghai Lu
2008-03-19 21:05 ` [PATCH 12/12] x86_64: fix setup_node_bootmem to support big mem excluding with memmap Yinghai Lu
2008-03-21 10:52 ` Ingo Molnar
2008-03-17 0:38 [PATCH] x86: trim mtrr don't close gap for resource allocation Yinghai Lu
2008-03-17 16:35 ` Jesse Barnes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).