LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [PATCH 2/2] x86_64: make early_node_mem return align address [not found] <200801290053.45776.yinghai.lu@sun.com> @ 2008-01-29 9:05 ` Yinghai Lu 2008-01-29 9:33 ` Andi Kleen 2008-01-29 18:08 ` Yinghai Lu 2008-01-29 9:05 ` [PATCH 1/2] print out node_data addr and bootmap_start addr Yinghai Lu 1 sibling, 2 replies; 10+ messages in thread From: Yinghai Lu @ 2008-01-29 9:05 UTC (permalink / raw) To: Ingo Molnar, Christoph Lameter; +Cc: Andrew Morton, Andi Kleen, linux-kernel [PATCH 2/2] x86_64: make early_node_mem return align address boot oops when system get 64g or 128g installed Calling initcall 0xffffffff80bc33b6: sctp_init+0x0/0x711() BUG: unable to handle kernel NULL pointer dereference at 000000000000005f IP: [<ffffffff802bfe55>] proc_register+0xe7/0x10f PGD 0 Oops: 0000 [1] SMP CPU 0 Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.24-smp-g5a514e21-dirty #6 RIP: 0010:[<ffffffff802bfe55>] [<ffffffff802bfe55>] proc_register+0xe7/0x10f RSP: 0000:ffff810824c57e60 EFLAGS: 00010246 RAX: 000000000000d7d7 RBX: ffff811024c5fa80 RCX: ffff810824c57e08 RDX: 0000000000000000 RSI: 0000000000000195 RDI: ffffffff80cc2460 RBP: ffffffffffffffff R08: 0000000000000000 R09: ffff811024c5fa80 R10: 0000000000000000 R11: 0000000000000002 R12: ffff810824c57e6c R13: 0000000000000000 R14: ffff810824c57ee0 R15: 00000006abd25bee FS: 0000000000000000(0000) GS:ffffffff80b4d000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 000000000000005f CR3: 0000000000201000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 1, threadinfo ffff810824c56000, task ffff812024c52000) Stack: ffffffff80a57348 0000019500000000 ffff811024c5fa80 0000000000000000 00000000ffffff97 ffffffff802bfef0 0000000000000000 ffffffffffffffff 0000000000000000 ffffffff80bc3b4b ffff810824c57ee0 ffffffff80bc34a5 Call Trace: [<ffffffff802bfef0>] ? create_proc_entry+0x73/0x8a [<ffffffff80bc3b4b>] ? sctp_snmp_proc_init+0x1c/0x34 [<ffffffff80bc34a5>] ? sctp_init+0xef/0x711 [<ffffffff80b976e3>] ? kernel_init+0x175/0x2e1 [<ffffffff8020ccf8>] ? child_rip+0xa/0x12 [<ffffffff80b9756e>] ? kernel_init+0x0/0x2e1 [<ffffffff8020ccee>] ? child_rip+0x0/0x12 Code: 1e 48 83 7b 38 00 75 08 48 c7 43 38 f0 e8 82 80 48 83 7b 30 00 75 08 48 c7 43 30 d0 e9 82 80 48 c7 c7 60 24 cc 80 e8 bd 5a 54 00 <48> 8b 45 60 48 89 6b 58 48 89 5d 60 48 89 43 50 fe 05 f5 25 a0 RIP [<ffffffff802bfe55>] proc_register+0xe7/0x10f RSP <ffff810824c57e60> CR2: 000000000000005f ---[ end trace 02c2d78def82877a ]--- Kernel panic - not syncing: Attempted to kill init! it turns out some variables near end of bss is corrupted already. in System.map we have ffffffff80d40420 b rsi_table ffffffff80d40620 B krb5_seq_lock ffffffff80d40628 b i.20437 ffffffff80d40630 b xprt_rdma_inline_write_padding ffffffff80d40638 b sunrpc_table_header ffffffff80d40640 b zero ffffffff80d40644 b min_memreg ffffffff80d40648 b rpcrdma_tk_lock_g ffffffff80d40650 B sctp_assocs_id_lock ffffffff80d40658 B proc_net_sctp ffffffff80d40660 B sctp_assocs_id ffffffff80d40680 B sysctl_sctp_mem ffffffff80d40690 B sysctl_sctp_rmem ffffffff80d406a0 B sysctl_sctp_wmem ffffffff80d406b0 b sctp_ctl_socket ffffffff80d406b8 b sctp_pf_inet6_specific ffffffff80d406c0 b sctp_pf_inet_specific ffffffff80d406c8 b sctp_af_v4_specific ffffffff80d406d0 b sctp_af_v6_specific ffffffff80d406d8 b sctp_rand.33270 ffffffff80d406dc b sctp_memory_pressure ffffffff80d406e0 b sctp_sockets_allocated ffffffff80d406e4 b sctp_memory_allocated ffffffff80d406e8 b sctp_sysctl_header ffffffff80d406f0 b zero ffffffff80d406f4 A __bss_stop ffffffff80d406f4 A _end and setup_node_bootmem() will use that page 0xd40000 for bootmap Bootmem setup node 0 0000000000000000-0000000828000000 NODE_DATA [000000000008a485 - 0000000000091484] bootmap [0000000000d406f4 - 0000000000e456f3] pages 105 Bootmem setup node 1 0000000828000000-0000001028000000 NODE_DATA [0000000828000000 - 0000000828006fff] bootmap [0000000828007000 - 0000000828106fff] pages 100 Bootmem setup node 2 0000001028000000-0000001828000000 NODE_DATA [0000001028000000 - 0000001028006fff] bootmap [0000001028007000 - 0000001028106fff] pages 100 Bootmem setup node 3 0000001828000000-0000002028000000 NODE_DATA [0000001828000000 - 0000001828006fff] bootmap [0000001828007000 - 0000001828106fff] pages 100 actually, setup_node_bootmem hope to make NODE_DATA to be ZONE_ALIGN (1<<(11+12)), and bootmap will after that in PAGE. the patch update early_node_mem, and find_e820_mem to make sure we can extra range for alignment. Signed-off-by: Yinghai Lu <yinghai.lu@sun.com> Index: linux-2.6/arch/x86/kernel/e820_64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/e820_64.c +++ linux-2.6/arch/x86/kernel/e820_64.c @@ -169,9 +169,10 @@ int __init e820_all_mapped(unsigned long * Find a free area in a specific range. */ unsigned long __init find_e820_area(unsigned long start, unsigned long end, - unsigned size) + unsigned size, unsigned long align) { int i; + unsigned long mask = ~(align - 1); for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; @@ -185,7 +186,7 @@ unsigned long __init find_e820_area(unsi continue; while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) ; - last = PAGE_ALIGN(addr) + size; + last = ((addr + align - 1) & mask) + size; if (last > ei->addr + ei->size) continue; if (last > end) Index: linux-2.6/arch/x86/kernel/setup_64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup_64.c +++ linux-2.6/arch/x86/kernel/setup_64.c @@ -182,7 +182,8 @@ contig_initmem_init(unsigned long start_ unsigned long bootmap_size, bootmap; bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, + PAGE_SIZE); if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n", bootmap_size); bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); Index: linux-2.6/arch/x86/mm/init_64.c =================================================================== --- linux-2.6.orig/arch/x86/mm/init_64.c +++ linux-2.6/arch/x86/mm/init_64.c @@ -354,7 +354,7 @@ static void __init find_early_table_spac * need roughly 0.5KB per GB. */ start = 0x8000; - table_start = find_e820_area(start, end, tables); + table_start = find_e820_area(start, end, tables, PAGE_SIZE); if (table_start == -1UL) panic("Cannot find space for the kernel page tables"); Index: linux-2.6/arch/x86/mm/numa_64.c =================================================================== --- linux-2.6.orig/arch/x86/mm/numa_64.c +++ linux-2.6/arch/x86/mm/numa_64.c @@ -94,7 +94,7 @@ static int __init allocate_cachealigned_ pad_addr = 0x8000; nodemap_size = pad + memnodemapsize; nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT, - nodemap_size); + nodemap_size, PAGE_SIZE); if (nodemap_addr == -1UL) { printk(KERN_ERR "NUMA: Unable to allocate Memory to Node hash map\n"); @@ -164,13 +164,16 @@ int early_pfn_to_nid(unsigned long pfn) } static void * __init early_node_mem(int nodeid, unsigned long start, - unsigned long end, unsigned long size) + unsigned long end, unsigned long size, + unsigned long align) { - unsigned long mem = find_e820_area(start, end, size); + unsigned long mem = find_e820_area(start, end, size, align); void *ptr; - if (mem != -1L) + if (mem != -1L) { + mem = round_up(mem, align); return __va(mem); + } ptr = __alloc_bootmem_nopanic(size, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)); if (ptr == NULL) { @@ -198,7 +201,8 @@ void __init setup_node_bootmem(int nodei start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; - node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size); + node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, + ZONE_ALIGN); if (node_data[nodeid] == NULL) return; nodedata_phys = __pa(node_data[nodeid]); @@ -213,7 +217,7 @@ void __init setup_node_bootmem(int nodei bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); bootmap = early_node_mem(nodeid, bootmap_start, end, - bootmap_pages<<PAGE_SHIFT); + bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); if (bootmap == NULL) { if (nodedata_phys < start || nodedata_phys >= end) free_bootmem((unsigned long)node_data[nodeid], Index: linux-2.6/include/asm-x86/e820_64.h =================================================================== --- linux-2.6.orig/include/asm-x86/e820_64.h +++ linux-2.6/include/asm-x86/e820_64.h @@ -15,7 +15,7 @@ #ifndef __ASSEMBLY__ extern unsigned long find_e820_area(unsigned long start, unsigned long end, - unsigned size); + unsigned size, unsigned long align); extern void add_memory_region(unsigned long start, unsigned long size, int type); extern void setup_memory_region(void); ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2] x86_64: make early_node_mem return align address 2008-01-29 9:05 ` [PATCH 2/2] x86_64: make early_node_mem return align address Yinghai Lu @ 2008-01-29 9:33 ` Andi Kleen 2008-01-29 17:41 ` Yinghai Lu 2008-01-29 18:08 ` Yinghai Lu 1 sibling, 1 reply; 10+ messages in thread From: Andi Kleen @ 2008-01-29 9:33 UTC (permalink / raw) To: Yinghai Lu; +Cc: Ingo Molnar, Christoph Lameter, Andrew Morton, linux-kernel On Tuesday 29 January 2008 10:05, Yinghai Lu wrote: > [PATCH 2/2] x86_64: make early_node_mem return align address > > boot oops when system get 64g or 128g installed Probably it should just use reserve_early(). Does this patch work? The alignment change is needed at some point too, but only to relax the alignment to not force all early allocations to be page padded. -Andi --- Use early reservation for early node data Signed-off-by: Andi Kleen <ak@suse.de> Index: linux/arch/x86/mm/numa_64.c =================================================================== --- linux.orig/arch/x86/mm/numa_64.c +++ linux/arch/x86/mm/numa_64.c @@ -169,8 +169,10 @@ static void * __init early_node_mem(int unsigned long mem = find_e820_area(start, end, size); void *ptr; - if (mem != -1L) + if (mem != -1L) { + reserve_early(mem, mem + size); return __va(mem); + } ptr = __alloc_bootmem_nopanic(size, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)); if (ptr == NULL) { ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2] x86_64: make early_node_mem return align address 2008-01-29 9:33 ` Andi Kleen @ 2008-01-29 17:41 ` Yinghai Lu 2008-01-30 2:55 ` Andi Kleen 0 siblings, 1 reply; 10+ messages in thread From: Yinghai Lu @ 2008-01-29 17:41 UTC (permalink / raw) To: Andi Kleen; +Cc: Ingo Molnar, Christoph Lameter, Andrew Morton, linux-kernel On Tuesday 29 January 2008 01:33:29 am Andi Kleen wrote: > On Tuesday 29 January 2008 10:05, Yinghai Lu wrote: > > [PATCH 2/2] x86_64: make early_node_mem return align address > > > > boot oops when system get 64g or 128g installed > > Probably it should just use reserve_early(). Does this patch work? > > The alignment change is needed at some point too, but only to > relax the alignment to not force all early allocations to be page > padded. No, my patch doesn't force all early allocations to be page padded. for find_e820_mem, i just change PAGE_ALIGN to be aligned align parameter.... only make early_node_mem have aligned data. because it seems it like to...and assume that. I think your patch will get early panic about overlap between bss and bootmem... like the 256g machine, bss is overlapped with early page table... so could change - node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size); + node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, + ZONE_ALIGN); ===> - node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size); + node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, + PAGE_SIZE); or - if (mem != -1L) + if (mem != -1L) { + mem = round_up(mem, PAGE_SIZE); return __va(mem); + } YH ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2] x86_64: make early_node_mem return align address 2008-01-29 17:41 ` Yinghai Lu @ 2008-01-30 2:55 ` Andi Kleen 2008-01-30 3:24 ` Yinghai Lu 0 siblings, 1 reply; 10+ messages in thread From: Andi Kleen @ 2008-01-30 2:55 UTC (permalink / raw) To: Yinghai Lu; +Cc: Ingo Molnar, Christoph Lameter, Andrew Morton, linux-kernel On Tuesday 29 January 2008 18:41, Yinghai Lu wrote: > On Tuesday 29 January 2008 01:33:29 am Andi Kleen wrote: > > On Tuesday 29 January 2008 10:05, Yinghai Lu wrote: > > > [PATCH 2/2] x86_64: make early_node_mem return align address > > > > > > boot oops when system get 64g or 128g installed > > > > Probably it should just use reserve_early(). Does this patch work? > > > > The alignment change is needed at some point too, but only to > > relax the alignment to not force all early allocations to be page > > padded. > > No, my patch doesn't force all early allocations to be page padded. > for find_e820_mem, i just change PAGE_ALIGN to be aligned align > parameter.... They are already all PAGE_ALIGN()ed (which is too strict, but needs some care to fix properly), but your patch uses it the wrong way. The PAGE_ALIGNment was added some time ago to avoid such over lapping, but it should not actually be needed for that anymore. > > only make early_node_mem have aligned data. because it seems it like > to...and assume that. Using alignment doesn't seem the correct way to avoid overlapping. If there is still overlap then some reservation needs to be extended. > I think your patch will get early panic about overlap between bss and > bootmem... like the 256g machine, bss is overlapped with early page > table... Well did you test it? bss should have been reserved by this line in head64.c reserve_early(__pa_symbol(&_text), __pa_symbol(&_end)); (in git-x86). In earlier kernels it was checked for explicitely by the e820 allocator. -Andi > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2] x86_64: make early_node_mem return align address 2008-01-30 2:55 ` Andi Kleen @ 2008-01-30 3:24 ` Yinghai Lu 0 siblings, 0 replies; 10+ messages in thread From: Yinghai Lu @ 2008-01-30 3:24 UTC (permalink / raw) To: Andi Kleen; +Cc: Ingo Molnar, Christoph Lameter, Andrew Morton, linux-kernel On Tuesday 29 January 2008 06:55:45 pm Andi Kleen wrote: > On Tuesday 29 January 2008 18:41, Yinghai Lu wrote: > > On Tuesday 29 January 2008 01:33:29 am Andi Kleen wrote: > > > On Tuesday 29 January 2008 10:05, Yinghai Lu wrote: > > > > [PATCH 2/2] x86_64: make early_node_mem return align address > > > > > > > > boot oops when system get 64g or 128g installed > > > > > > Probably it should just use reserve_early(). Does this patch work? > > > > > > The alignment change is needed at some point too, but only to > > > relax the alignment to not force all early allocations to be page > > > padded. > > > > No, my patch doesn't force all early allocations to be page padded. > > for find_e820_mem, i just change PAGE_ALIGN to be aligned align > > parameter.... > > They are already all PAGE_ALIGN()ed (which is too strict, but needs > some care to fix properly), but your patch uses it the wrong way. > The PAGE_ALIGNment was added some time ago to avoid such over > lapping, but it should not actually be needed for that anymore. > > > > > only make early_node_mem have aligned data. because it seems it like > > to...and assume that. > > Using alignment doesn't seem the correct way to avoid overlapping. > > If there is still overlap then some reservation needs to be extended. > > > I think your patch will get early panic about overlap between bss and > > bootmem... like the 256g machine, bss is overlapped with early page > > table... > > Well did you test it? > > bss should have been reserved by this line in head64.c > > reserve_early(__pa_symbol(&_text), __pa_symbol(&_end)); > > (in git-x86). In earlier kernels it was checked for explicitely by the e820 > allocator. no early panic. but the bss end still get corrupted. because bootmap_start is used as <<PAGE_SHIFT, and it is overlapped with bss tail page. YH ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2] x86_64: make early_node_mem return align address 2008-01-29 9:05 ` [PATCH 2/2] x86_64: make early_node_mem return align address Yinghai Lu 2008-01-29 9:33 ` Andi Kleen @ 2008-01-29 18:08 ` Yinghai Lu 1 sibling, 0 replies; 10+ messages in thread From: Yinghai Lu @ 2008-01-29 18:08 UTC (permalink / raw) To: Ingo Molnar; +Cc: Christoph Lameter, Andrew Morton, Andi Kleen, linux-kernel On Tuesday 29 January 2008 01:05:03 am Yinghai Lu wrote: > [PATCH 2/2] x86_64: make early_node_mem return align address > > boot oops when system get 64g or 128g installed > > Calling initcall 0xffffffff80bc33b6: sctp_init+0x0/0x711() > BUG: unable to handle kernel NULL pointer dereference at 000000000000005f > IP: [<ffffffff802bfe55>] proc_register+0xe7/0x10f > PGD 0 > Oops: 0000 [1] SMP > CPU 0 > Modules linked in: > Pid: 1, comm: swapper Not tainted 2.6.24-smp-g5a514e21-dirty #6 > RIP: 0010:[<ffffffff802bfe55>] [<ffffffff802bfe55>] proc_register+0xe7/0x10f > RSP: 0000:ffff810824c57e60 EFLAGS: 00010246 > RAX: 000000000000d7d7 RBX: ffff811024c5fa80 RCX: ffff810824c57e08 > RDX: 0000000000000000 RSI: 0000000000000195 RDI: ffffffff80cc2460 > RBP: ffffffffffffffff R08: 0000000000000000 R09: ffff811024c5fa80 > R10: 0000000000000000 R11: 0000000000000002 R12: ffff810824c57e6c > R13: 0000000000000000 R14: ffff810824c57ee0 R15: 00000006abd25bee > FS: 0000000000000000(0000) GS:ffffffff80b4d000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b > CR2: 000000000000005f CR3: 0000000000201000 CR4: 00000000000006e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process swapper (pid: 1, threadinfo ffff810824c56000, task ffff812024c52000) > Stack: ffffffff80a57348 0000019500000000 ffff811024c5fa80 0000000000000000 > 00000000ffffff97 ffffffff802bfef0 0000000000000000 ffffffffffffffff > 0000000000000000 ffffffff80bc3b4b ffff810824c57ee0 ffffffff80bc34a5 > Call Trace: > [<ffffffff802bfef0>] ? create_proc_entry+0x73/0x8a > [<ffffffff80bc3b4b>] ? sctp_snmp_proc_init+0x1c/0x34 > [<ffffffff80bc34a5>] ? sctp_init+0xef/0x711 > [<ffffffff80b976e3>] ? kernel_init+0x175/0x2e1 > [<ffffffff8020ccf8>] ? child_rip+0xa/0x12 > [<ffffffff80b9756e>] ? kernel_init+0x0/0x2e1 > [<ffffffff8020ccee>] ? child_rip+0x0/0x12 > > > Code: 1e 48 83 7b 38 00 75 08 48 c7 43 38 f0 e8 82 80 48 83 7b 30 00 75 08 48 c7 43 30 d0 e9 82 80 48 c7 c7 60 24 cc 80 e8 bd 5a 54 00 <48> 8b 45 60 48 89 6b 58 48 89 5d 60 48 89 43 50 fe 05 f5 25 a0 > RIP [<ffffffff802bfe55>] proc_register+0xe7/0x10f > RSP <ffff810824c57e60> > CR2: 000000000000005f > ---[ end trace 02c2d78def82877a ]--- > Kernel panic - not syncing: Attempted to kill init! > > it turns out some variables near end of bss is corrupted already. > > in System.map we have > ffffffff80d40420 b rsi_table > ffffffff80d40620 B krb5_seq_lock > ffffffff80d40628 b i.20437 > ffffffff80d40630 b xprt_rdma_inline_write_padding > ffffffff80d40638 b sunrpc_table_header > ffffffff80d40640 b zero > ffffffff80d40644 b min_memreg > ffffffff80d40648 b rpcrdma_tk_lock_g > ffffffff80d40650 B sctp_assocs_id_lock > ffffffff80d40658 B proc_net_sctp > ffffffff80d40660 B sctp_assocs_id > ffffffff80d40680 B sysctl_sctp_mem > ffffffff80d40690 B sysctl_sctp_rmem > ffffffff80d406a0 B sysctl_sctp_wmem > ffffffff80d406b0 b sctp_ctl_socket > ffffffff80d406b8 b sctp_pf_inet6_specific > ffffffff80d406c0 b sctp_pf_inet_specific > ffffffff80d406c8 b sctp_af_v4_specific > ffffffff80d406d0 b sctp_af_v6_specific > ffffffff80d406d8 b sctp_rand.33270 > ffffffff80d406dc b sctp_memory_pressure > ffffffff80d406e0 b sctp_sockets_allocated > ffffffff80d406e4 b sctp_memory_allocated > ffffffff80d406e8 b sctp_sysctl_header > ffffffff80d406f0 b zero > ffffffff80d406f4 A __bss_stop > ffffffff80d406f4 A _end > > and setup_node_bootmem() will use that page 0xd40000 for bootmap > Bootmem setup node 0 0000000000000000-0000000828000000 > NODE_DATA [000000000008a485 - 0000000000091484] > bootmap [0000000000d406f4 - 0000000000e456f3] pages 105 > Bootmem setup node 1 0000000828000000-0000001028000000 > NODE_DATA [0000000828000000 - 0000000828006fff] > bootmap [0000000828007000 - 0000000828106fff] pages 100 > Bootmem setup node 2 0000001028000000-0000001828000000 > NODE_DATA [0000001028000000 - 0000001028006fff] > bootmap [0000001028007000 - 0000001028106fff] pages 100 > Bootmem setup node 3 0000001828000000-0000002028000000 > NODE_DATA [0000001828000000 - 0000001828006fff] > bootmap [0000001828007000 - 0000001828106fff] pages 100 > > actually, setup_node_bootmem hope to make NODE_DATA to be ZONE_ALIGN (1<<(11+12)), > and bootmap will after that in PAGE. > > the patch update early_node_mem, and find_e820_mem to make sure we can extra range > for alignment. > > Signed-off-by: Yinghai Lu <yinghai.lu@sun.com> > please discard this one. I will have a new one. Thanks Yinghai Lu ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/2] print out node_data addr and bootmap_start addr [not found] <200801290053.45776.yinghai.lu@sun.com> 2008-01-29 9:05 ` [PATCH 2/2] x86_64: make early_node_mem return align address Yinghai Lu @ 2008-01-29 9:05 ` Yinghai Lu [not found] ` <20080201170908.GB2159@elte.hu> 1 sibling, 1 reply; 10+ messages in thread From: Yinghai Lu @ 2008-01-29 9:05 UTC (permalink / raw) To: Ingo Molnar, Christoph Lameter; +Cc: Andrew Morton, Andi Kleen, linux-kernel [PATCH 1/2] print out node_data addr and bootmap_start addr Signed-off-by: Yinghai Lu <yinghai.lu@sun.com> Index: linux-2.6/arch/x86/mm/numa_64.c =================================================================== --- linux-2.6.orig/arch/x86/mm/numa_64.c +++ linux-2.6/arch/x86/mm/numa_64.c @@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodei if (node_data[nodeid] == NULL) return; nodedata_phys = __pa(node_data[nodeid]); + printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, + nodedata_phys + pgdat_size - 1); memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; @@ -221,12 +223,15 @@ void __init setup_node_bootmem(int nodei return; } bootmap_start = __pa(bootmap); - Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap_start >> PAGE_SHIFT, start_pfn, end_pfn); + printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", + bootmap_start, bootmap_start + bootmap_size - 1, + bootmap_pages); + free_bootmem_with_active_regions(nodeid, end); reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <20080201170908.GB2159@elte.hu>]
* [PATCH] x86_64: mark x86_cpu_to_node_map_init to __initdata like other xx_init [not found] ` <20080201170908.GB2159@elte.hu> @ 2008-02-01 21:29 ` Yinghai Lu 0 siblings, 0 replies; 10+ messages in thread From: Yinghai Lu @ 2008-02-01 21:29 UTC (permalink / raw) To: Ingo Molnar; +Cc: linux-kernel [PATCH] x86_64: mark x86_cpu_to_node_map_init to __initdata like other xx_init x86_cpu_to_apicid_init and x86_bios_cpu_apicid_init are defined with __initdata. Signed-off-by: Yinghai Lu <yinghai.lu@sun.com> diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index f0e5cab..d7af3fd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -31,7 +31,7 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct memnode memnode; -int x86_cpu_to_node_map_init[NR_CPUS] = { +int x86_cpu_to_node_map_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; void *x86_cpu_to_node_map_early_ptr; diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 8af05a9..d3340de 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -35,7 +35,7 @@ extern int cpu_to_node_map[]; #else DECLARE_PER_CPU(int, x86_cpu_to_node_map); -extern int x86_cpu_to_node_map_init[]; +extern int __initdata x86_cpu_to_node_map_init[]; extern void *x86_cpu_to_node_map_early_ptr; /* Returns the number of the current Node. */ #define numa_node_id() (early_cpu_to_node(raw_smp_processor_id())) ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] x86_64: mark x86_cpu_to_node_map_init to __initdata like other xx_init @ 2008-01-28 9:16 Yinghai Lu 2008-01-28 10:34 ` Ingo Molnar 0 siblings, 1 reply; 10+ messages in thread From: Yinghai Lu @ 2008-01-28 9:16 UTC (permalink / raw) To: Ingo Molnar; +Cc: Mike Travis, Christoph Lameter, Linux Kernel Mailing List [PATCH] x86_64: mark x86_cpu_to_node_map_init to __initdata like other xx_init Signed-off-by: Yinghai Lu <yinghai.lu@sun.com> diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index f0e5cab..d7af3fd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -31,7 +31,7 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct memnode memnode; -int x86_cpu_to_node_map_init[NR_CPUS] = { +int x86_cpu_to_node_map_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; void *x86_cpu_to_node_map_early_ptr; diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 8af05a9..d3340de 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -35,7 +35,7 @@ extern int cpu_to_node_map[]; #else DECLARE_PER_CPU(int, x86_cpu_to_node_map); -extern int x86_cpu_to_node_map_init[]; +extern int __initdata x86_cpu_to_node_map_init[]; extern void *x86_cpu_to_node_map_early_ptr; /* Returns the number of the current Node. */ #define numa_node_id() (early_cpu_to_node(raw_smp_processor_id())) ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86_64: mark x86_cpu_to_node_map_init to __initdata like other xx_init 2008-01-28 9:16 Yinghai Lu @ 2008-01-28 10:34 ` Ingo Molnar 0 siblings, 0 replies; 10+ messages in thread From: Ingo Molnar @ 2008-01-28 10:34 UTC (permalink / raw) To: Yinghai Lu Cc: Mike Travis, Christoph Lameter, Linux Kernel Mailing List, Sam Ravnborg * Yinghai Lu <Yinghai.Lu@Sun.COM> wrote: > -int x86_cpu_to_node_map_init[NR_CPUS] = { > +int x86_cpu_to_node_map_init[NR_CPUS] __initdata = { > [0 ... NR_CPUS-1] = NUMA_NO_NODE > }; i remember some linker warning here. While this array should indeed only be used in early init, that decision is dynamic and our linker warnings do not notice it. There's a special marker for such cases: __initdata_refok. But ... i'm slightly nervous about turning off a vital warning like that. Sam, it would be nice to have a DEBUG_INITDATA mode of operation: in this case free_initmem() would not truly free those pages but would unmap them via: kernel_map_pages(page, nrpages, 0); could be made dependent on DEBUG_PAGEALLOC. If this debugging is enabled then if any code references it, we get a hard page fault. If we had a debug mode like that then bugs in this area would not go unnoticed. Or perhaps just make this part of normal DEBUG_PAGEALLOC. Like the patch below on top of latest x86.git. Hm? Ingo -------------------> Subject: x86: init memory debugging From: Ingo Molnar <mingo@elte.hu> debug incorrect/late access to init memory, by permanently unmapping the init memory ranges. Depends on CONFIG_DEBUG_PAGEALLOC=y. Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/init_32.c | 12 ++++++++++++ arch/x86/mm/init_64.c | 11 +++++++++++ 2 files changed, 23 insertions(+) Index: linux-x86.q/arch/x86/mm/init_32.c =================================================================== --- linux-x86.q.orig/arch/x86/mm/init_32.c +++ linux-x86.q/arch/x86/mm/init_32.c @@ -794,6 +794,18 @@ void free_init_pages(char *what, unsigne unsigned long addr; /* + * If debugging page accesses then do not free this memory but + * mark them not present - any buggy init-section access will + * create a kernel page fault: + */ +#ifdef CONFIG_DEBUG_PAGEALLOC + printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", + begin, PAGE_ALIGN(end)); + set_memory_np(begin, (end - begin) >> PAGE_SHIFT); + return; +#endif + set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); + /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that * writeable first. Index: linux-x86.q/arch/x86/mm/init_64.c =================================================================== --- linux-x86.q.orig/arch/x86/mm/init_64.c +++ linux-x86.q/arch/x86/mm/init_64.c @@ -580,6 +580,17 @@ void free_init_pages(char *what, unsigne if (begin >= end) return; + /* + * If debugging page accesses then do not free this memory but + * mark them not present - any buggy init-section access will + * create a kernel page fault: + */ +#ifdef CONFIG_DEBUG_PAGEALLOC + printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", + begin, PAGE_ALIGN(end)); + set_memory_np(begin, (end - begin) >> PAGE_SHIFT); + return; +#endif printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); for (addr = begin; addr < end; addr += PAGE_SIZE) { ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2008-02-01 21:22 UTC | newest] Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <200801290053.45776.yinghai.lu@sun.com> 2008-01-29 9:05 ` [PATCH 2/2] x86_64: make early_node_mem return align address Yinghai Lu 2008-01-29 9:33 ` Andi Kleen 2008-01-29 17:41 ` Yinghai Lu 2008-01-30 2:55 ` Andi Kleen 2008-01-30 3:24 ` Yinghai Lu 2008-01-29 18:08 ` Yinghai Lu 2008-01-29 9:05 ` [PATCH 1/2] print out node_data addr and bootmap_start addr Yinghai Lu [not found] ` <20080201170908.GB2159@elte.hu> 2008-02-01 21:29 ` [PATCH] x86_64: mark x86_cpu_to_node_map_init to __initdata like other xx_init Yinghai Lu 2008-01-28 9:16 Yinghai Lu 2008-01-28 10:34 ` Ingo Molnar
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).