LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] x86/voyager: Switch voyager memory detection to early_ioremap.
@ 2008-01-19 16:08 Ian Campbell
  2008-01-19 16:08 ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
  0 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-19 16:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ian Campbell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	James Bottomley, Eric W. Biederman

Extracted from an earlier patch by Eric Biederman.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
CC: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/x86/mach-voyager/voyager_basic.c |   19 +++++++------------
 1 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c
index 6a949e4..ed41fd8 100644
--- a/arch/x86/mach-voyager/voyager_basic.c
+++ b/arch/x86/mach-voyager/voyager_basic.c
@@ -110,8 +110,9 @@ typedef struct ClickMap {
 	} Entry[CLICK_ENTRIES];
 } ClickMap_t;
 
-/* This routine is pretty much an awful hack to read the bios clickmap by
- * mapping it into page 0.  There are usually three regions in the map:
+/*
+ * This routine reads the bios clickmap.  There are usually three
+ * regions in the map:
  * 	Base Memory
  * 	Extended Memory
  *	zero length marker for end of map
@@ -125,7 +126,6 @@ int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
 	__u8 cmos[4];
 	ClickMap_t *map;
 	unsigned long map_addr;
-	unsigned long old;
 
 	if (region >= CLICK_ENTRIES) {
 		printk("Voyager: Illegal ClickMap region %d\n", region);
@@ -138,12 +138,8 @@ int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
 
 	map_addr = *(unsigned long *)cmos;
 
-	/* steal page 0 for this */
-	old = pg0[0];
-	pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
-	local_flush_tlb();
-	/* now clear everything out but page 0 */
-	map = (ClickMap_t *) (map_addr & (~PAGE_MASK));
+	/* Setup a temporary mapping for the clickmap */
+	map = early_ioremap(map_addr, sizeof(*map));
 
 	/* zero length is the end of the clickmap */
 	if (map->Entry[region].Length != 0) {
@@ -152,9 +148,8 @@ int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
 		retval = 1;
 	}
 
-	/* replace the mapping */
-	pg0[0] = old;
-	local_flush_tlb();
+	/* undo the mapping */
+	early_iounmap(map, sizeof(*map));
 	return retval;
 }
 
-- 
1.5.3.8


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-19 16:08 [PATCH] x86/voyager: Switch voyager memory detection to early_ioremap Ian Campbell
@ 2008-01-19 16:08 ` Ian Campbell
  2008-01-19 23:07   ` Andi Kleen
  2008-01-20 18:30   ` Mika Penttilä
  0 siblings, 2 replies; 80+ messages in thread
From: Ian Campbell @ 2008-01-19 16:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ian Campbell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	Eric W. Biederman

Specifically the boot time page tables in a CONFIG_X86_PAE=y enabled
kernel are in PAE format.

early_ioremap is updated to use the standard page table accessors.

Derived from an earlier patch by Eric Biederman.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/x86/kernel/head_32.S      |  116 +++++++++++++------------------------
 arch/x86/kernel/setup_32.c     |    4 +
 arch/x86/mm/Makefile_32        |    2 +-
 arch/x86/mm/early_pgtable_32.c |  125 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/init_32.c          |   45 --------------
 arch/x86/mm/ioremap_32.c       |   53 ++++++++++-------
 include/asm-x86/page_32.h      |    1 -
 include/asm-x86/pgtable_32.h   |    4 -
 8 files changed, 201 insertions(+), 149 deletions(-)
 create mode 100644 arch/x86/mm/early_pgtable_32.c

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f409fe2..2090aa4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -33,44 +33,6 @@
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- *  - one bit for each possible page, but only in low memory, which means
- *     2^32/4096/8 = 128K worst case (4G/4G split.)
- *  - enough space to map all low memory, which means
- *     (2^32/4096) / 1024 pages (worst case, non PAE)
- *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
- *  - a few pages for allocator use before the kernel pagetable has
- *     been set up
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- */
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
-
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#ifdef CONFIG_DEBUG_PAGEALLOC
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
-
-#if PTRS_PER_PMD > 1
-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
-#else
-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
-#endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
-ALLOCATOR_SLOP = 4
-
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
-
-/*
  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
  * %esi points to the real-mode code as a 32-bit pointer.
  * CS and DS must be 4 GB flat segments, but we don't depend on
@@ -160,47 +122,52 @@ num_subarch_entries = (. - subarch_entries) / 4
 .previous
 #endif /* CONFIG_PARAVIRT */
 
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond _end.  The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
- *
- * Warning: don't use %esi or the stack in this code.  However, %esp
- * can be used as a GPR if you really need it...
- */
-page_pde_offset = (__PAGE_OFFSET >> 20);
+#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
 
 default_entry:
-	movl $(pg0 - __PAGE_OFFSET), %edi
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
-10:
-	leal 0x007(%edi),%ecx			/* Create PDE entry */
-	movl %ecx,(%edx)			/* Store identity PDE entry */
-	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
-	addl $4,%edx
-	movl $1024, %ecx
-11:
-	stosl
-	addl $0x1000,%eax
-	loop 11b
-	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
-	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
-	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
-	cmpl %ebp,%eax
-	jb 10b
-	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
-	/* Do an early initialization of the fixmap area */
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
-	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
-	movl %eax, 4092(%edx)
+	/* Setup the stack */
+	lss stack_start - __PAGE_OFFSET, %esp
+	subl $__PAGE_OFFSET, %esp
+
+	/* Initialize the boot page tables */
+	call early_pgtable_init
+
+	movl cr4_bits,%edx
+	andl %edx,%edx
+	jz 1f
+	movl %cr4,%eax		# Turn on paging options (PSE,PAE,..)
+	orl %edx,%eax
+	movl %eax,%cr4
+1:
+#ifdef CONFIG_X86_PAE
+	btl $5, %eax
+	jnc err_no_pae
+#endif
 
 	xorl %ebx,%ebx				/* This is the boot CPU (BSP) */
 	jmp 3f
+
+#ifdef CONFIG_X86_PAE
+err_no_pae:
+	/* It is probably too early but we might as well try... */
+#ifdef CONFIG_PRINTK
+	pusha
+	pushl %eax
+	pushl $err_no_pae_msg - __PAGE_OFFSET
+#ifdef CONFIG_EARLY_PRINTK
+	call early_printk - __PAGE_OFFSET
+#else
+	call printk - __PAGE_OFFSET
+#endif
+#endif
+	jmp hlt_loop
+
+err_no_pae_msg:
+	.ascii "cannot execute a PAE-enabled kernel on a PAE-less CPU!"
+	.ascii " (CR4 %lx)\n"
+	.byte  0
+#endif
+
 /*
  * Non-boot CPU entry point; entered from trampoline.S
  * We can't lgdt here, because lgdt itself uses a data segment, but
@@ -237,7 +204,6 @@ ENTRY(startup_32_smp)
  *	NOTE! We have to correct for the fact that we're
  *	not yet offset PAGE_OFFSET..
  */
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
 	movl cr4_bits,%edx
 	andl %edx,%edx
 	jz 6f
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index c6f25cb..196c23b 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -153,7 +153,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 EXPORT_SYMBOL(boot_cpu_data);
 
+#ifndef CONFIG_X86_PAE
 unsigned long mmu_cr4_features;
+#else
+unsigned long mmu_cr4_features = X86_CR4_PAE;
+#endif
 
 /* for MCA, but anyone else can use it if they want */
 unsigned int machine_id;
diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index 2f69025..1b8c09f 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o pageattr_32.o mmap.o pat.o ioremap.o
+obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o pageattr_32.o mmap.o pat.o ioremap.o early_pgtable_32.o
 
 obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
 obj-$(CONFIG_NUMA) += discontig_32.o
diff --git a/arch/x86/mm/early_pgtable_32.c b/arch/x86/mm/early_pgtable_32.c
new file mode 100644
index 0000000..dc5d648
--- /dev/null
+++ b/arch/x86/mm/early_pgtable_32.c
@@ -0,0 +1,125 @@
+/*
+ * Construct boot time page tables.
+ */
+
+/*
+ * Since a paravirt guest will never come down this path we want
+ * native style page table accessors here.
+ */
+#undef CONFIG_PARAVIRT
+
+#include <linux/pagemap.h>
+
+#include <asm/setup.h>
+
+/*
+ * This is how much memory *in addition to the memory covered up to
+ * and including _end* we need mapped initially.  We need one bit for
+ * each possible page, but only in low memory, which means
+ * 2^32/4096/8 = 128K worst case (4G/4G split.)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ */
+#define INIT_MAP_BEYOND_END	(128*1024)
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond _end.  The variable
+ * init_pg_tables_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ *
+ * WARNING: This code runs at it's physical address not it's virtual address,
+ * with all physical everything identity mapped, and nothing else mapped.
+ * This means global variables must be done very carefully.
+ */
+#define __pavar(X) (*(__typeof__(X) *)__pa_symbol(&(X)))
+
+static inline __init pud_t *early_pud_offset(pgd_t *pgd, unsigned long vaddr)
+{
+	return (pud_t *)(pgd + pgd_index(vaddr));
+}
+
+static inline __init pmd_t *early_pmd_offset(pud_t *pud, unsigned long vaddr)
+{
+#ifndef CONFIG_X86_PAE
+	return (pmd_t *)pud;
+#else
+	return ((pmd_t *)(u32)(pud_val(*pud) & PAGE_MASK))
+		+ pmd_index(vaddr);
+#endif
+}
+
+static inline __init pte_t *early_pte_offset(pmd_t *pmd, unsigned long vaddr)
+{
+	return ((pte_t *)(u32)(pmd_val(*pmd) & PAGE_MASK))
+		+ pte_index(vaddr);
+}
+
+static inline __init pmd_t *
+early_pmd_alloc(pgd_t *pgd_base, unsigned long vaddr, unsigned long *end)
+{
+	pud_t *pud = early_pud_offset(pgd_base, vaddr);
+
+#ifdef CONFIG_X86_PAE
+	if (!(pud_val(*pud) & _PAGE_PRESENT)) {
+		unsigned long phys = *end;
+		memset((void *)phys, 0, PAGE_SIZE);
+		set_pud(pud, __pud(phys | _PAGE_PRESENT));
+		*end += PAGE_SIZE;
+	}
+#endif
+	return early_pmd_offset(pud, vaddr);
+}
+
+static __init pte_t *
+early_pte_alloc(pgd_t *pgd_base, unsigned long vaddr, unsigned long *end)
+{
+	pmd_t *pmd;
+
+	pmd = early_pmd_alloc(pgd_base, vaddr, end);
+	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
+		unsigned long phys = *end;
+		memset((void *)phys, 0, PAGE_SIZE);
+		set_pmd(pmd, __pmd(phys | _PAGE_TABLE));
+		*end += PAGE_SIZE;
+	}
+	return early_pte_offset(pmd, vaddr);
+}
+
+static __init void early_set_pte_phys(pgd_t *pgd_base, unsigned long vaddr,
+				      unsigned long phys, unsigned long *end)
+{
+	pte_t *pte;
+	pte = early_pte_alloc(pgd_base, vaddr, end);
+	set_pte(pte, __pte(phys | _PAGE_KERNEL_EXEC));
+}
+
+void __init early_pgtable_init(void)
+{
+	unsigned long addr, end;
+	pgd_t *pgd_base;
+
+	pgd_base = __pavar(swapper_pg_dir);
+	end = __pa_symbol(pg0);
+
+	/* Initialize the directory page */
+	memset(pgd_base, 0, PAGE_SIZE);
+
+	/* Set up the fixmap page table */
+	early_pte_alloc(pgd_base, __pavar(__FIXADDR_TOP), &end);
+
+	/* Set up the initial kernel mapping */
+	for (addr = 0; addr < (end + INIT_MAP_BEYOND_END); addr += PAGE_SIZE)
+		early_set_pte_phys(pgd_base, addr + PAGE_OFFSET, addr, &end);
+
+
+	/* Set up the low identity mappings */
+	clone_pgd_range(pgd_base, pgd_base + USER_PTRS_PER_PGD,
+			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+
+	__pavar(init_pg_tables_end) = end;
+}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cbba769..2f94a3a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -353,44 +353,11 @@ extern void __init remap_numa_kva(void);
 
 void __init native_pagetable_setup_start(pgd_t *base)
 {
-#ifdef CONFIG_X86_PAE
-	int i;
-
-	/*
-	 * Init entries of the first-level page table to the
-	 * zero page, if they haven't already been set up.
-	 *
-	 * In a normal native boot, we'll be running on a
-	 * pagetable rooted in swapper_pg_dir, but not in PAE
-	 * mode, so this will end up clobbering the mappings
-	 * for the lower 24Mbytes of the address space,
-	 * without affecting the kernel address space.
-	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
-		set_pgd(&base[i],
-			__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-
-	/* Make sure kernel address space is empty so that a pagetable
-	   will be allocated for it. */
-	memset(&base[USER_PTRS_PER_PGD], 0,
-	       KERNEL_PGD_PTRS * sizeof(pgd_t));
-#else
 	paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
-#endif
 }
 
 void __init native_pagetable_setup_done(pgd_t *base)
 {
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Add low memory identity-mappings - SMP needs it when
-	 * starting up on an AP from real-mode. In the non-PAE
-	 * case we already have these mappings through head.S.
-	 * All user-space mappings are explicitly cleared after
-	 * SMP startup.
-	 */
-	set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
-#endif
 }
 
 /*
@@ -559,14 +526,6 @@ void __init paging_init(void)
 
 	load_cr3(swapper_pg_dir);
 
-#ifdef CONFIG_X86_PAE
-	/*
-	 * We will bail out later - printk doesn't work right now so
-	 * the user would just see a hanging kernel.
-	 */
-	if (cpu_has_pae)
-		set_in_cr4(X86_CR4_PAE);
-#endif
 	__flush_tlb_all();
 
 	kmap_init();
@@ -696,10 +655,6 @@ void __init mem_init(void)
 	BUG_ON((unsigned long)high_memory      > VMALLOC_START);
 #endif /* double-sanity-check paranoia */
 
-#ifdef CONFIG_X86_PAE
-	if (!cpu_has_pae)
-		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
 
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 05a24cd..73a36cd 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -226,40 +226,45 @@ static int __init early_ioremap_debug_setup(char *str)
 __setup("early_ioremap_debug", early_ioremap_debug_setup);
 
 static __initdata int after_paging_init;
-static __initdata unsigned long bm_pte[1024]
+static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
 				__attribute__((aligned(PAGE_SIZE)));
 
-static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
-	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+	pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
+
+	return pmd;
 }
 
-static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 {
-	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+	return &bm_pte[pte_index(addr)];
 }
 
 void __init early_ioremap_init(void)
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 	if (early_ioremap_debug)
 		printk("early_ioremap_init()\n");
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = __pa(bm_pte) | _PAGE_TABLE;
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	memset(bm_pte, 0, sizeof(bm_pte));
+	set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE));
+
 	/*
-	 * The boot-ioremap range spans multiple pgds, for which
+	 * The boot-ioremap range spans multiple pmds, for which
 	 * we are not prepared:
 	 */
-	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 		WARN_ON(1);
-		printk("pgd %p != %p\n",
-			pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
-		printk("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+		printk(KERN_WARNING "pmd %p != %p\n",
+		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 			fix_to_virt(FIX_BTMAP_BEGIN));
-		printk("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 			fix_to_virt(FIX_BTMAP_END));
 
 		printk("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
@@ -269,27 +274,28 @@ void __init early_ioremap_init(void)
 
 void __init early_ioremap_clear(void)
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 	if (early_ioremap_debug)
 		printk("early_ioremap_clear()\n");
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = 0;
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+	pmd_clear(pmd);
 	__flush_tlb_all();
 }
 
 void __init early_ioremap_reset(void)
 {
 	enum fixed_addresses idx;
-	unsigned long *pte, phys, addr;
+	unsigned long addr, phys;
+	pte_t *pte;
 
 	after_paging_init = 1;
 	for (idx = FIX_BTMAP_BEGIN; idx <= FIX_BTMAP_END; idx--) {
 		addr = fix_to_virt(idx);
 		pte = early_ioremap_pte(addr);
-		if (!*pte & _PAGE_PRESENT) {
-			phys = *pte & PAGE_MASK;
+		if (!(pte_val(*pte) & _PAGE_PRESENT)) {
+			phys = pte_val(*pte) & PAGE_MASK;
 			set_fixmap(idx, phys);
 		}
 	}
@@ -298,7 +304,8 @@ void __init early_ioremap_reset(void)
 static void __init __early_set_fixmap(enum fixed_addresses idx,
 				   unsigned long phys, pgprot_t flags)
 {
-	unsigned long *pte, addr = __fix_to_virt(idx);
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
 
 	if (idx >= __end_of_fixed_addresses) {
 		BUG();
@@ -306,9 +313,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
 	}
 	pte = early_ioremap_pte(addr);
 	if (pgprot_val(flags))
-		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
-		*pte = 0;
+		pte_clear(NULL, addr, pte);
 	__flush_tlb_one(addr);
 }
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 11c4b39..8fc0473 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -48,7 +48,6 @@ typedef unsigned long	pgprotval_t;
 typedef unsigned long	phys_addr_t;
 
 typedef union { pteval_t pte, pte_low; } pte_t;
-typedef pte_t boot_pte_t;
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* CONFIG_X86_PAE */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 11c8b73..c07389b 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -55,10 +55,6 @@ int text_address(unsigned long);
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
 
-#define TWOLEVEL_PGDIR_SHIFT	22
-#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
-#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
-- 
1.5.3.8


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-19 16:08 ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
@ 2008-01-19 23:07   ` Andi Kleen
  2008-01-19 23:50     ` H. Peter Anvin
  2008-01-20 16:44     ` Ian Campbell
  2008-01-20 18:30   ` Mika Penttilä
  1 sibling, 2 replies; 80+ messages in thread
From: Andi Kleen @ 2008-01-19 23:07 UTC (permalink / raw)
  To: Ian Campbell
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	Eric W. Biederman

Ian Campbell <ijc@hellion.org.uk> writes:
> +1:
> +#ifdef CONFIG_X86_PAE
> +	btl $5, %eax
> +	jnc err_no_pae
> +#endif
>  
>  	xorl %ebx,%ebx				/* This is the boot CPU (BSP) */
>  	jmp 3f
> +
> +#ifdef CONFIG_X86_PAE
> +err_no_pae:
> +	/* It is probably too early but we might as well try... */

Without a low identity mapping early_printk will not work and printk
definitely not.

> +#ifdef CONFIG_PRINTK

You should do the test in the 16 bit boot code. In fact it should
already do it by testing the CPUID REQUIRED_MASK.

The only way this could be entered is if someone skips the 16bit boot code
by using kexec, but has the wrong flags. I'm not sure how to handle
it there.

> +/*
> + * Since a paravirt guest will never come down this path we want
> + * native style page table accessors here.
> + */
> +#undef CONFIG_PARAVIRT

Seems quite fragile. I'm sure that would hurt later.


> +
> +static inline __init pud_t *early_pud_offset(pgd_t *pgd, unsigned long vaddr)
> +{
> +	return (pud_t *)(pgd + pgd_index(vaddr));
> +}
> +
> +static inline __init pmd_t *early_pmd_offset(pud_t *pud, unsigned long vaddr)
> +{
> +#ifndef CONFIG_X86_PAE
> +	return (pmd_t *)pud;
> +#else
> +	return ((pmd_t *)(u32)(pud_val(*pud) & PAGE_MASK))
> +		+ pmd_index(vaddr);
> +#endif
> +}
> +
> +static inline __init pte_t *early_pte_offset(pmd_t *pmd, unsigned long vaddr)
> +{
> +	return ((pte_t *)(u32)(pmd_val(*pmd) & PAGE_MASK))

That will break if the kernel is > 4GB won't it? Also same for pmd.

Also not handling NX is dubious, although you can probably get away from it there.

> +		+ pte_index(vaddr);
> +}
> +
> +static inline __init pmd_t *
> +early_pmd_alloc(pgd_t *pgd_base, unsigned long vaddr, unsigned long *end)
> +{
> +	pud_t *pud = early_pud_offset(pgd_base, vaddr);
> +
> +#ifdef CONFIG_X86_PAE
> +	if (!(pud_val(*pud) & _PAGE_PRESENT)) {


Why not set it in the pgd which is identical? Also the proper test is !pgd_none()



> +{
> +	pmd_t *pmd;
> +
> +	pmd = early_pmd_alloc(pgd_base, vaddr, end);
> +	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {

!pmd_none()

> +		unsigned long phys = *end;
> +		memset((void *)phys, 0, PAGE_SIZE);
> +		set_pmd(pmd, __pmd(phys | _PAGE_TABLE));
> +		*end += PAGE_SIZE;
> +	}
> +	return early_pte_offset(pmd, vaddr);
> +}
> +
> +static __init void early_set_pte_phys(pgd_t *pgd_base, unsigned long vaddr,
> +				      unsigned long phys, unsigned long *end)
> +{
> +	pte_t *pte;
> +	pte = early_pte_alloc(pgd_base, vaddr, end);
> +	set_pte(pte, __pte(phys | _PAGE_KERNEL_EXEC));
> +}
> +
> +void __init early_pgtable_init(void)
> +{
> +	unsigned long addr, end;
> +	pgd_t *pgd_base;
> +
> +	pgd_base = __pavar(swapper_pg_dir);
> +	end = __pa_symbol(pg0);

Are you sure there will be enough memory here? You might need to use
an e820 allocator similar to x86-64.

Typical problems is you running into some other memory used by
someone else.

>  {
>  	enum fixed_addresses idx;
> -	unsigned long *pte, phys, addr;
> +	unsigned long addr, phys;
> +	pte_t *pte;
>  
>  	after_paging_init = 1;
>  	for (idx = FIX_BTMAP_BEGIN; idx <= FIX_BTMAP_END; idx--) {
>  		addr = fix_to_virt(idx);
>  		pte = early_ioremap_pte(addr);
> -		if (!*pte & _PAGE_PRESENT) {
> -			phys = *pte & PAGE_MASK;
> +		if (!(pte_val(*pte) & _PAGE_PRESENT)) {

pte_present(). Ok the old code was wrong too, but no need to do that again.

>  			set_fixmap(idx, phys);
>  		}
>  	}
> @@ -298,7 +304,8 @@ void __init early_ioremap_reset(void)
>  static void __init __early_set_fixmap(enum fixed_addresses idx,
>  				   unsigned long phys, pgprot_t flags)
>  {
> -	unsigned long *pte, addr = __fix_to_virt(idx);
> +	unsigned long addr = __fix_to_virt(idx);
> +	pte_t *pte;

Unrelated?

-Andi

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-19 23:07   ` Andi Kleen
@ 2008-01-19 23:50     ` H. Peter Anvin
  2008-01-20 16:44     ` Ian Campbell
  1 sibling, 0 replies; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-19 23:50 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ian Campbell, linux-kernel, Thomas Gleixner, Ingo Molnar,
	Eric W. Biederman

Andi Kleen wrote:
> 
> That will break if the kernel is > 4GB won't it? Also same for pmd.
> 

The kernel can't be > 4 GB; after all, we're running here with paging 
disabled, so inherently we're < 4 GB...

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-19 23:07   ` Andi Kleen
  2008-01-19 23:50     ` H. Peter Anvin
@ 2008-01-20 16:44     ` Ian Campbell
  2008-01-20 17:39       ` Andi Kleen
  1 sibling, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-20 16:44 UTC (permalink / raw)
  To: Andi Kleen
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	Eric W. Biederman


On Sun, 2008-01-20 at 00:07 +0100, Andi Kleen wrote: 
> Ian Campbell <ijc@hellion.org.uk> writes:
> > +#ifdef CONFIG_X86_PAE
> > +err_no_pae:
> > +	/* It is probably too early but we might as well try... */
> 
> Without a low identity mapping early_printk will not work and printk
> definitely not.
> 
> > +#ifdef CONFIG_PRINTK
> 
> You should do the test in the 16 bit boot code. In fact it should
> already do it by testing the CPUID REQUIRED_MASK.

Indeed it does. I don't have any non-PAE to test it but I turned the
failure case into a simple jmp to hlt_loop since we ought never to get
here in any case.

> > +/*
> > + * Since a paravirt guest will never come down this path we want
> > + * native style page table accessors here.
> > + */
> > +#undef CONFIG_PARAVIRT
> 
> Seems quite fragile. I'm sure that would hurt later.

The problem here is that we explicitly want native accessors because
it's too early to use the pv ops since we are still running P==V. A PV
kernel boot will never come down this path -- it is diverted earlier in
head_32.S so using the native versions are appropriate.

I'll try again to use the native_{make,set}_xxx functions but originally
I found the necessary variants weren't defined in all combinations of
PAE/not and PARAVIRT/not.

FWIW we use the same undef trick under arch/x86/boot too and this early
start of day stuff if fairly similar.

> > +static inline __init pte_t *early_pte_offset(pmd_t *pmd, unsigned long vaddr)
> > +{
> > +	return ((pte_t *)(u32)(pmd_val(*pmd) & PAGE_MASK))
> 
> That will break if the kernel is > 4GB won't it? Also same for pmd.

As hpa says we can't be above 4G at this point. Probably I can use some
variant of make_pte now though.

> > +static inline __init pmd_t *
> > +early_pmd_alloc(pgd_t *pgd_base, unsigned long vaddr, unsigned long *end)
> > +{
> > +	pud_t *pud = early_pud_offset(pgd_base, vaddr);
> > +
> > +#ifdef CONFIG_X86_PAE
> > +	if (!(pud_val(*pud) & _PAGE_PRESENT)) {
> 
> Why not set it in the pgd which is identical? Also the proper test is !pgd_none()

I was trying to fit in with the native_foo stuff that is available and
happened to be using pud on my last attempt before I switched to the
#undef CONFIG_PARAVIRT approach. I'll switch to pgd if I can get it to
work.

pgd_none (and pud_none) are hardcoded to 0 in the 32 bit case (in
asm-generic/pgtable-nopud.h and asm-generic/pgtable-nopmd.h or
asm-x86/pgtable-3level.h). Presumably this is because at regular runtime
these entries are guaranteed to exist which isn't true this early at
startup.

In fact since we are always going to need a PMD in the PAE case there is
probably not much wrong with simply unconditionally allocating the pmd
at the start of early_pgtable_init().

> > +{
> > +	pmd_t *pmd;
> > +
> > +	pmd = early_pmd_alloc(pgd_base, vaddr, end);
> > +	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
> 
> !pmd_none()

done (without the !)

> > +void __init early_pgtable_init(void)
> > +{
> > +	unsigned long addr, end;
> > +	pgd_t *pgd_base;
> > +
> > +	pgd_base = __pavar(swapper_pg_dir);
> > +	end = __pa_symbol(pg0);
> 
> Are you sure there will be enough memory here? You might need to use
> an e820 allocator similar to x86-64.

True. However the assembly being replaced makes the same assumptions so
I don't think that should block this patch, it's a fixup that can be
made later.

> > -		if (!*pte & _PAGE_PRESENT) {
> > -			phys = *pte & PAGE_MASK;
> > +		if (!(pte_val(*pte) & _PAGE_PRESENT)) {
> 
> pte_present(). Ok the old code was wrong too, but no need to do that again.

Done.

> > @@ -298,7 +304,8 @@ void __init early_ioremap_reset(void)
> >  static void __init __early_set_fixmap(enum fixed_addresses idx,
> >  				   unsigned long phys, pgprot_t flags)
> >  {
> > -	unsigned long *pte, addr = __fix_to_virt(idx);
> > +	unsigned long addr = __fix_to_virt(idx);
> > +	pte_t *pte;
> 
> Unrelated?

Nope, the return type of early_ioremap_pte() changed unsigned long ->
pte_t and that is what is assigned to pte.

I'll spin another version.

Ian.
-- 
Ian Campbell

"I go on working for the same reason a hen goes on laying eggs."
		-- H. L. Mencken


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-20 16:44     ` Ian Campbell
@ 2008-01-20 17:39       ` Andi Kleen
  2008-01-20 18:48         ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Andi Kleen @ 2008-01-20 17:39 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Andi Kleen, linux-kernel, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, Eric W. Biederman

On Sun, Jan 20, 2008 at 04:44:50PM +0000, Ian Campbell wrote:
> Indeed it does. I don't have any non-PAE to test it but I turned the
> failure case into a simple jmp to hlt_loop since we ought never to get
> here in any case.

There are various loaders (kexec, elilo, ...) that skip the 16bit code
and jump directly to 32bit head.S. So in theory those could hit it.
But still having the loop only is probably fine.

> > > + * Since a paravirt guest will never come down this path we want
> > > + * native style page table accessors here.
> > > + */
> > > +#undef CONFIG_PARAVIRT
> > 
> > Seems quite fragile. I'm sure that would hurt later.
> 
> The problem here is that we explicitly want native accessors because
> it's too early to use the pv ops since we are still running P==V. A PV
> kernel boot will never come down this path -- it is diverted earlier in
> head_32.S so using the native versions are appropriate.

Then i think it would be cleaner to just open code everything without
any accessors.

> As hpa says we can't be above 4G at this point. Probably I can use some
> variant of make_pte now though.

The 32bit cast still feels unclean. After all the PTE is not 32bit.

-Andi

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-19 16:08 ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
  2008-01-19 23:07   ` Andi Kleen
@ 2008-01-20 18:30   ` Mika Penttilä
  2008-01-21 21:23     ` Ian Campbell
  1 sibling, 1 reply; 80+ messages in thread
From: Mika Penttilä @ 2008-01-20 18:30 UTC (permalink / raw)
  To: Ian Campbell
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	Eric W. Biederman


> + * This is how much memory *in addition to the memory covered up to
> + * and including _end* we need mapped initially.  We need one bit for
> + * each possible page, but only in low memory, which means
> + * 2^32/4096/8 = 128K worst case (4G/4G split.)
> + *
> + * Modulo rounding, each megabyte assigned here requires a kilobyte of
> + * memory, which is currently unreclaimed.
> + *
> + * This should be a multiple of a page.
> + */
> +#define INIT_MAP_BEYOND_END	(128*1024)
> +
> +/*
>   

You have dropped the requirement to map all of low memory (the boot 
allocator is used for instance to construct physical mem mapping). 
Either you should fix your INIT_MAP_BEYOND_END or make a big comment 
telling us why it isn't necessary anymore to map low mem.

--Mika

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-20 17:39       ` Andi Kleen
@ 2008-01-20 18:48         ` H. Peter Anvin
  2008-01-20 18:55           ` Andi Kleen
  2008-01-22 10:05           ` Ingo Molnar
  0 siblings, 2 replies; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-20 18:48 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ian Campbell, linux-kernel, Thomas Gleixner, Ingo Molnar,
	Eric W. Biederman

Andi Kleen wrote:
> On Sun, Jan 20, 2008 at 04:44:50PM +0000, Ian Campbell wrote:
>> Indeed it does. I don't have any non-PAE to test it but I turned the
>> failure case into a simple jmp to hlt_loop since we ought never to get
>> here in any case.
> 
> There are various loaders (kexec, elilo, ...) that skip the 16bit code
> and jump directly to 32bit head.S. So in theory those could hit it.
> But still having the loop only is probably fine.
> 

It's probably just as well, since we don't really know how to get a 
message out in such an environment anyway...

>>>> + * Since a paravirt guest will never come down this path we want
>>>> + * native style page table accessors here.
>>>> + */
>>>> +#undef CONFIG_PARAVIRT
>>> Seems quite fragile. I'm sure that would hurt later.
>> The problem here is that we explicitly want native accessors because
>> it's too early to use the pv ops since we are still running P==V. A PV
>> kernel boot will never come down this path -- it is diverted earlier in
>> head_32.S so using the native versions are appropriate.
> 
> Then i think it would be cleaner to just open code everything without
> any accessors.

I was thinking about this yesterday, and it seems to me that there are 
two cleaner options here...

- either we should put in the full machinery to be able to run C code 
compiled with -fPIC/-fPIE before paging is enabled.  Unfortunately gcc 
generates R_386_GOT32 relocations for external references even with 
-fPIE, so we'll have to put in some code to adjust the GOT (easy enough 
to do.)

As far as the native accessors are concerned, the right thing to do 
would is to use the native_ forms thereof, not #undef CONFIG_PARAVIRT.

- alternatively, we recognize that this isn't all that big of a piece of 
code and doing it in C really isn't necessary.  We can have a small 
assembly loop for PAE that matches the small assembly loop we already 
have for !PAE.

>> As hpa says we can't be above 4G at this point. Probably I can use some
>> variant of make_pte now though.
> 
> The 32bit cast still feels unclean. After all the PTE is not 32bit.

No, but (pte_t *) is 32 bits.  To be more "Linuxy" it probably should be 
(long) or (unsigned long) though.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-20 18:55           ` Andi Kleen
@ 2008-01-20 18:54             ` H. Peter Anvin
  0 siblings, 0 replies; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-20 18:54 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ian Campbell, linux-kernel, Thomas Gleixner, Ingo Molnar,
	Eric W. Biederman

Andi Kleen wrote:
>>> There are various loaders (kexec, elilo, ...) that skip the 16bit code
>>> and jump directly to 32bit head.S. So in theory those could hit it.
>>> But still having the loop only is probably fine.
>>>
>> It's probably just as well, since we don't really know how to get a 
>> message out in such an environment anyway...
> 
> It would be robably possible to extend the 32bit protocol to some
> way to error out in such a case. On the other hand I'm not sure it's really
> worth the considerable work to implement and debug such an addition.
> 
>>>> variant of make_pte now though.
>>> The 32bit cast still feels unclean. After all the PTE is not 32bit.
>> No, but (pte_t *) is 32 bits.  To be more "Linuxy" it probably should be 
>> (long) or (unsigned long) though.
> 
> That's not 32bit either.

Looked at the subject line?

	-hpa


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-20 18:48         ` H. Peter Anvin
@ 2008-01-20 18:55           ` Andi Kleen
  2008-01-20 18:54             ` H. Peter Anvin
  2008-01-22 10:05           ` Ingo Molnar
  1 sibling, 1 reply; 80+ messages in thread
From: Andi Kleen @ 2008-01-20 18:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andi Kleen, Ian Campbell, linux-kernel, Thomas Gleixner,
	Ingo Molnar, Eric W. Biederman

> >There are various loaders (kexec, elilo, ...) that skip the 16bit code
> >and jump directly to 32bit head.S. So in theory those could hit it.
> >But still having the loop only is probably fine.
> >
> 
> It's probably just as well, since we don't really know how to get a 
> message out in such an environment anyway...

It would be robably possible to extend the 32bit protocol to some
way to error out in such a case. On the other hand I'm not sure it's really
worth the considerable work to implement and debug such an addition.

> >>variant of make_pte now though.
> >
> >The 32bit cast still feels unclean. After all the PTE is not 32bit.
> 
> No, but (pte_t *) is 32 bits.  To be more "Linuxy" it probably should be 
> (long) or (unsigned long) though.

That's not 32bit either.

-Andi

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-20 18:30   ` Mika Penttilä
@ 2008-01-21 21:23     ` Ian Campbell
  2008-01-21 21:38       ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-21 21:23 UTC (permalink / raw)
  To: Mika Penttilä
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	Eric W. Biederman


On Sun, 2008-01-20 at 20:30 +0200, Mika Penttilä wrote:
> > + * This is how much memory *in addition to the memory covered up to
> > + * and including _end* we need mapped initially.  We need one bit for
> > + * each possible page, but only in low memory, which means
> > + * 2^32/4096/8 = 128K worst case (4G/4G split.)
> > + *
> > + * Modulo rounding, each megabyte assigned here requires a kilobyte of
> > + * memory, which is currently unreclaimed.
> > + *
> > + * This should be a multiple of a page.
> > + */
> > +#define INIT_MAP_BEYOND_END	(128*1024)
> > +
> > +/*
> >   
> 
> You have dropped the requirement to map all of low memory (the boot 
> allocator is used for instance to construct physical mem mapping). 
> Either you should fix your INIT_MAP_BEYOND_END or make a big comment 
> telling us why it isn't necessary anymore to map low mem.

I think you are right. The patch ensures that all the initial page
tables themselves have mappings but won't map the additional pages
needed for mapping the rest of lowmem.

However, I think it is no longer necessary to map a whole new 4G worth
of page table pages because the code in kernel_physical_mapping_init now
extends the initial mappings rather than replacing them (see changes to
native_pagetable_setup_start). So now we only need to map 4G worth of
page tables including the initial page tables. That means we only need
to map a fixed set of extra pages rather than the sliding limit
currently used in the patch.

I'm not convinced by the additional 16MB for CONFIG_DEBUG_PAGEALLOC --
we map enough pages for page tables for 4G of lowmem -- adding space for
an extra 16M seems pointless.

Ian.
-- 
Ian Campbell

Good-bye.  I am leaving because I am bored.
		-- George Saunders' dying words


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-21 21:23     ` Ian Campbell
@ 2008-01-21 21:38       ` H. Peter Anvin
  2008-01-21 21:46         ` Ian Campbell
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-21 21:38 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Ian Campbell wrote:
> On Sun, 2008-01-20 at 20:30 +0200, Mika Penttilä wrote:
>>>   
>> You have dropped the requirement to map all of low memory (the boot 
>> allocator is used for instance to construct physical mem mapping). 
>> Either you should fix your INIT_MAP_BEYOND_END or make a big comment 
>> telling us why it isn't necessary anymore to map low mem.
> 
> I think you are right. The patch ensures that all the initial page
> tables themselves have mappings but won't map the additional pages
> needed for mapping the rest of lowmem.
> 
> However, I think it is no longer necessary to map a whole new 4G worth
> of page table pages because the code in kernel_physical_mapping_init now
> extends the initial mappings rather than replacing them (see changes to
> native_pagetable_setup_start). So now we only need to map 4G worth of
> page tables including the initial page tables. That means we only need
> to map a fixed set of extra pages rather than the sliding limit
> currently used in the patch.
> 

We still need to be able to construct those page tables, which is what 
that stuff is about...


> I'm not convinced by the additional 16MB for CONFIG_DEBUG_PAGEALLOC --
> we map enough pages for page tables for 4G of lowmem -- adding space for
> an extra 16M seems pointless.

If so, adjusting the limit should be a separate patch.

Either way, I'm increasingly thinking that setting up the initial page 
tables via an assembly loop instead of worrying about the C accessors is 
actually cleaner (I prototyped it yesterday, although I still need the 
rest of the machinery.)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-21 21:38       ` H. Peter Anvin
@ 2008-01-21 21:46         ` Ian Campbell
  2008-01-22  2:16           ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-21 21:46 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


On Mon, 2008-01-21 at 13:38 -0800, H. Peter Anvin wrote:
> Ian Campbell wrote:
> > On Sun, 2008-01-20 at 20:30 +0200, Mika Penttilä wrote:
> >>>   
> >> You have dropped the requirement to map all of low memory (the boot 
> >> allocator is used for instance to construct physical mem mapping). 
> >> Either you should fix your INIT_MAP_BEYOND_END or make a big comment 
> >> telling us why it isn't necessary anymore to map low mem.
> > 
> > I think you are right. The patch ensures that all the initial page
> > tables themselves have mappings but won't map the additional pages
> > needed for mapping the rest of lowmem.
> > 
> > However, I think it is no longer necessary to map a whole new 4G worth
> > of page table pages because the code in kernel_physical_mapping_init now
> > extends the initial mappings rather than replacing them (see changes to
> > native_pagetable_setup_start). So now we only need to map 4G worth of
> > page tables including the initial page tables. That means we only need
> > to map a fixed set of extra pages rather than the sliding limit
> > currently used in the patch.
> > 
> 
> We still need to be able to construct those page tables, which is what 
> that stuff is about...

Yes, my initial patch was wrong. However with the patch we no longer
throw away the non-PAE initial page tables and replace them with PAE
ones, instead we augment the initial PAE page tables. This means we only
need initial mappings of 4G worth of page tables rather than 4G plus
what is needed for the non-PAE initial page tables.

I don't think I explained that at all well on either attempt...
Hopefully what I mean will be clearer in patch form -- coming in a
second...

> > I'm not convinced by the additional 16MB for CONFIG_DEBUG_PAGEALLOC --
> > we map enough pages for page tables for 4G of lowmem -- adding space for
> > an extra 16M seems pointless.
> 
> If so, adjusting the limit should be a separate patch.
> 
> Either way, I'm increasingly thinking that setting up the initial page 
> tables via an assembly loop instead of worrying about the C accessors is 
> actually cleaner (I prototyped it yesterday, although I still need the 
> rest of the machinery.)

I'm just preparing to send out a version which uses the native_* way of
doing things, its not actually as clean as I would like so I'd be
interested to see the ASM variant.

Ian.
-- 
Ian Campbell

You shall be rewarded for a dastardly deed.


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-21 21:46         ` Ian Campbell
@ 2008-01-22  2:16           ` H. Peter Anvin
  2008-01-22 17:36             ` Ian Campbell
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-22  2:16 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

[-- Attachment #1: Type: text/plain, Size: 361 bytes --]

Ian Campbell wrote:
> 
> I'm just preparing to send out a version which uses the native_* way of
> doing things, its not actually as clean as I would like so I'd be
> interested to see the ASM variant.
> 

This is the asm version I came up with.  This is only the actual 
assembly part; it doesn't require the (obviously necessary) bootmem 
adjustments.

	-hpa

[-- Attachment #2: diff --]
[-- Type: text/plain, Size: 6794 bytes --]

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f409fe2..d1d30db 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -18,6 +18,10 @@
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/setup.h>
+#include <asm/processor-flags.h>
+
+/* Physical address */
+#define pa(X) ((X) - __PAGE_OFFSET)
 
 /*
  * References to members of the new_cpu_data structure.
@@ -79,10 +83,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
  */
 .section .text.head,"ax",@progbits
 ENTRY(startup_32)
-	/* check to see if KEEP_SEGMENTS flag is meaningful */
-	cmpw $0x207, BP_version(%esi)
-	jb 1f
-
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 		us to not reload segments */
 	testb $(1<<6), BP_loadflags(%esi)
@@ -91,7 +91,7 @@ ENTRY(startup_32)
 /*
  * Set segments to known values.
  */
-1:	lgdt boot_gdt_descr - __PAGE_OFFSET
+1:	lgdt pa(boot_gdt_descr)
 	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
@@ -104,8 +104,8 @@ ENTRY(startup_32)
  */
 	cld
 	xorl %eax,%eax
-	movl $__bss_start - __PAGE_OFFSET,%edi
-	movl $__bss_stop - __PAGE_OFFSET,%ecx
+	movl $pa(__bss_start),%edi
+	movl $pa(__bss_stop),%ecx
 	subl %edi,%ecx
 	shrl $2,%ecx
 	rep ; stosl
@@ -117,31 +117,32 @@ ENTRY(startup_32)
  * (kexec on panic case). Hence copy out the parameters before initializing
  * page tables.
  */
-	movl $(boot_params - __PAGE_OFFSET),%edi
+	movl $pa(boot_params),%edi
 	movl $(PARAM_SIZE/4),%ecx
 	cld
 	rep
 	movsl
-	movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+	movl pa(boot_params) + NEW_CL_POINTER,%esi
 	andl %esi,%esi
 	jz 1f			# No comand line
-	movl $(boot_command_line - __PAGE_OFFSET),%edi
+	movl $pa(boot_command_line),%edi
 	movl $(COMMAND_LINE_SIZE/4),%ecx
 	rep
 	movsl
 1:
 
 #ifdef CONFIG_PARAVIRT
-	cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
+	/* This is can only trip for a broken bootloader... */
+	cmpw $0x207, pa(boot_params + BP_version)
 	jb default_entry
 
 	/* Paravirt-compatible boot parameters.  Look to see what architecture
 		we're booting under. */
-	movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
+	movl pa(boot_params + BP_hardware_subarch), %eax
 	cmpl $num_subarch_entries, %eax
 	jae bad_subarch
 
-	movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
+	movl pa(subarch_entries)(,%eax,4), %eax
 	subl $__PAGE_OFFSET, %eax
 	jmp *%eax
 
@@ -167,17 +168,74 @@ num_subarch_entries = (. - subarch_entries) / 4
  * Mappings are created both at virtual address 0 (identity mapping)
  * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
  *
- * Warning: don't use %esi or the stack in this code.  However, %esp
- * can be used as a GPR if you really need it...
+ * Note that the stack is not yet set up!
  */
-page_pde_offset = (__PAGE_OFFSET >> 20);
+#define PTE_ATTR	0x007		/* PRESENT+RW+USER */
+#define PDE_ATTR	0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_ATTR	0x001		/* PRESENT (no other attributes) */
 
 default_entry:
-	movl $(pg0 - __PAGE_OFFSET), %edi
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
+#ifdef CONFIG_X86_PAE
+	/*
+	 * In PAE mode, the kernel PMD is shared, and __PAGE_OFFSET
+	 * is guaranteed to be a multiple of 1 GB (the PGD granulatity.)
+	 * Thus, we only need to set up a single PMD here; the identity
+	 * mapping is handled by pointing two PGD entries to the PMD.
+	 *
+	 * Note the upper half of each PMD or PTE are always zero at
+	 * this stage.
+	 */
+page_pde_offset = (__PAGE_OFFSET >> 27);
+
+	movl %cr4, %eax
+	orl  $X86_CR4_PAE, %eax
+	movl %eax, %cr4
+
+	xorl %ebx,%ebx				/* %ebx is kept at zero */
+	
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_pmd), %edx
+	movl $PTE_ATTR, %eax
+10:
+	leal PDE_ATTR(%edi),%ecx		/* Create PMD entry */
+	movl %ecx,(%edx)			/* Store PMD entry */
+						/* Upper half already zero */
+	addl $8,%edx
+	movl $512,%ecx
+11:
+	stosl
+	xchgl %eax,%ebx
+	stosl
+	xchgl %eax,%ebx
+	addl $0x1000,%eax
+	loop 11b
+
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables.
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+	cmpl %ebp,%eax
+	jb 10b
+	movl %edi,pa(init_pg_tables_end)
+
+	/* Set up the PGD */
+	movl $pa(swapper_pg_pmd)+PGD_ATTR, %eax
+	movl %eax, pa(swapper_pg_dir)			/* Identity map */
+	movl %eax, pa(swapper_pg_dir+page_pde_offset)	/* Kernel map */
+
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+	movl %eax,pa(swapper_pg_pmd+0xff8)
+#else	/* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+	
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_dir), %edx
+	movl $PTE_ATTR, %eax
 10:
-	leal 0x007(%edi),%ecx			/* Create PDE entry */
+	leal PDE_ATTR(%edi),%ecx		/* Create PDE entry */
 	movl %ecx,(%edx)			/* Store identity PDE entry */
 	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
 	addl $4,%edx
@@ -186,19 +244,20 @@ default_entry:
 	stosl
 	addl $0x1000,%eax
 	loop 11b
-	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
-	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
-	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables; the +0x007 is
+	 * the attribute bits
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
 	cmpl %ebp,%eax
 	jb 10b
-	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
-	/* Do an early initialization of the fixmap area */
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
-	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
-	movl %eax, 4092(%edx)
+	movl %edi,pa(init_pg_tables_end)
 
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ADDR,%eax
+	movl %eax,pa(swapper_pg_dir+0xffc)
+#endif
 	xorl %ebx,%ebx				/* This is the boot CPU (BSP) */
 	jmp 3f
 /*
@@ -237,7 +296,7 @@ ENTRY(startup_32_smp)
  *	NOTE! We have to correct for the fact that we're
  *	not yet offset PAGE_OFFSET..
  */
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
+#define cr4_bits pa(mmu_cr4_features)
 	movl cr4_bits,%edx
 	andl %edx,%edx
 	jz 6f
@@ -278,7 +337,7 @@ ENTRY(startup_32_smp)
 /*
  * Enable paging
  */
-	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
+	movl $pa(swapper_pg_dir),%eax
 	movl %eax,%cr3		/* set the page table pointer.. */
 	movl %cr0,%eax
 	orl $0x80000000,%eax
@@ -556,8 +615,12 @@ ENTRY(_stext)
 	.align PAGE_SIZE_asm
 ENTRY(swapper_pg_dir)
 	.fill 1024,4,0
+#ifdef CONFIG_X86_PAE
 ENTRY(swapper_pg_pmd)
 	.fill 1024,4,0
+#endif
+ENTRY(swapper_pg_fixmap)
+	.fill 1024,4,0
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
 

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-20 18:48         ` H. Peter Anvin
  2008-01-20 18:55           ` Andi Kleen
@ 2008-01-22 10:05           ` Ingo Molnar
  2008-01-22 16:23             ` H. Peter Anvin
  1 sibling, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-01-22 10:05 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andi Kleen, Ian Campbell, linux-kernel, Thomas Gleixner,
	Ingo Molnar, Eric W. Biederman


* H. Peter Anvin <hpa@zytor.com> wrote:

> I was thinking about this yesterday, and it seems to me that there are 
> two cleaner options here...
>
> - either we should put in the full machinery to be able to run C code 
> compiled with -fPIC/-fPIE before paging is enabled.  Unfortunately gcc 
> generates R_386_GOT32 relocations for external references even with 
> -fPIE, so we'll have to put in some code to adjust the GOT (easy 
> enough to do.)

i'd _love_ to have this approach instead of the assembly routines. While 
'constructing pagetables' might not look like a big deal in isolation - 
C is still 10 times more programmable than assembly. Pushing more of the 
early boot code into a sane, non-assembly environment will have positive 
long-term effects all across.

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 10:05           ` Ingo Molnar
@ 2008-01-22 16:23             ` H. Peter Anvin
  0 siblings, 0 replies; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-22 16:23 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andi Kleen, Ian Campbell, linux-kernel, Thomas Gleixner,
	Ingo Molnar, Eric W. Biederman

Ingo Molnar wrote:
> * H. Peter Anvin <hpa@zytor.com> wrote:
> 
>> I was thinking about this yesterday, and it seems to me that there are 
>> two cleaner options here...
>>
>> - either we should put in the full machinery to be able to run C code 
>> compiled with -fPIC/-fPIE before paging is enabled.  Unfortunately gcc 
>> generates R_386_GOT32 relocations for external references even with 
>> -fPIE, so we'll have to put in some code to adjust the GOT (easy 
>> enough to do.)
> 
> i'd _love_ to have this approach instead of the assembly routines. While 
> 'constructing pagetables' might not look like a big deal in isolation - 
> C is still 10 times more programmable than assembly. Pushing more of the 
> early boot code into a sane, non-assembly environment will have positive 
> long-term effects all across.
> 

Yes, but that doesn't mean that this particular task is the right thing 
for that job.  In particular, the GOT adjustment wll be almost the same 
size as the whole task.

On the other hand, there is a whole bunch of post-paging code in 
head_32.S which doesn't need to be there.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22  2:16           ` H. Peter Anvin
@ 2008-01-22 17:36             ` Ian Campbell
  2008-01-22 18:23               ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-22 17:36 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


On Mon, 2008-01-21 at 18:16 -0800, H. Peter Anvin wrote:
> Ian Campbell wrote:
> > 
> > I'm just preparing to send out a version which uses the native_* way of
> > doing things, its not actually as clean as I would like so I'd be
> > interested to see the ASM variant.
> > 
> 
> This is the asm version I came up with.

I moderately prefer the C version, even if it is in a restricted
environment where care is needed to access global variables. I like that
it avoids multiple copies of the code and also find the structure of
what's going on is more obviously apparent (even to someone who has done
plenty of ASM mode page table frobbing in the past).

Anyhow, I don't feel all that strongly about it so if the opinion of the
early start of day maintainer(s) is strongly in favour of ASM I'll defer
to that.

> This is only the actual 
> assembly part; it doesn't require the (obviously necessary) bootmem 
> adjustments.

Do you mean the native_pagetable_setup_start/done changes? I'm a bit
confused by not requiring obviously necessary changes -- I presume you
just mean that those changes are desirable but should be deferred into a
separate patch?

The C way doesn't inherently require those two changes to happen in the
same patch either -- probably worth splitting out if we go that route.

Ian.

-- 
Ian Campbell
Current Noise: Pelican - Bliss In Concrete

Your life would be very empty if you had nothing to regret.


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 17:36             ` Ian Campbell
@ 2008-01-22 18:23               ` H. Peter Anvin
  2008-01-22 19:48                 ` Ian Campbell
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-22 18:23 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Ian Campbell wrote:
> On Mon, 2008-01-21 at 18:16 -0800, H. Peter Anvin wrote:
>> Ian Campbell wrote:
>>> I'm just preparing to send out a version which uses the native_* way of
>>> doing things, its not actually as clean as I would like so I'd be
>>> interested to see the ASM variant.
>>>
>> This is the asm version I came up with.
> 
> I moderately prefer the C version, even if it is in a restricted
> environment where care is needed to access global variables. I like that
> it avoids multiple copies of the code and also find the structure of
> what's going on is more obviously apparent (even to someone who has done
> plenty of ASM mode page table frobbing in the past).
> 
> Anyhow, I don't feel all that strongly about it so if the opinion of the
> early start of day maintainer(s) is strongly in favour of ASM I'll defer
> to that.
> 

My opinion is that I want it done properly (PIC and all that jazz) or 
not at all, and certainly would not want to mix linear and 
paging-enabled code in the same file.  When it comes to assembly code, 
at least people can *see* that there there be dragons.

The plus *and* minus of a C version is that it's easier for people to 
modify.  The plus side of that is that if we really need it, it's a lot 
cleaner; the minus side is that it may encourage more code to creep into 
the pre-paging code, which would not be a good thing IMO.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 18:23               ` H. Peter Anvin
@ 2008-01-22 19:48                 ` Ian Campbell
  2008-01-22 20:00                   ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-22 19:48 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


On Tue, 2008-01-22 at 10:23 -0800, H. Peter Anvin wrote:
> Ian Campbell wrote:
> > Anyhow, I don't feel all that strongly about it so if the opinion of the
> > early start of day maintainer(s) is strongly in favour of ASM I'll defer
> > to that.
> > 
> 
> My opinion is that I want it done properly (PIC and all that jazz) or 
> not at all, and certainly would not want to mix linear and 
> paging-enabled code in the same file.  When it comes to assembly code, 
> at least people can *see* that there there be dragons.
> 
> The plus *and* minus of a C version is that it's easier for people to 
> modify.  The plus side of that is that if we really need it, it's a lot 
> cleaner; the minus side is that it may encourage more code to creep into 
> the pre-paging code, which would not be a good thing IMO.

Seems reasonable to me. I'll integrate your asm diff with the other
changes and give it a whirl.

Ian. 
-- 
Ian Campbell

Never go to bed mad.  Stay up and fight.
		-- Phyllis Diller, "Phyllis Diller's Housekeeping Hints"


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 19:48                 ` Ian Campbell
@ 2008-01-22 20:00                   ` H. Peter Anvin
  2008-01-22 20:36                     ` Ingo Molnar
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-22 20:00 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

[-- Attachment #1: Type: text/plain, Size: 1035 bytes --]

Ian Campbell wrote:
> On Tue, 2008-01-22 at 10:23 -0800, H. Peter Anvin wrote:
>> Ian Campbell wrote:
>>> Anyhow, I don't feel all that strongly about it so if the opinion of the
>>> early start of day maintainer(s) is strongly in favour of ASM I'll defer
>>> to that.
>>>
>> My opinion is that I want it done properly (PIC and all that jazz) or 
>> not at all, and certainly would not want to mix linear and 
>> paging-enabled code in the same file.  When it comes to assembly code, 
>> at least people can *see* that there there be dragons.
>>
>> The plus *and* minus of a C version is that it's easier for people to 
>> modify.  The plus side of that is that if we really need it, it's a lot 
>> cleaner; the minus side is that it may encourage more code to creep into 
>> the pre-paging code, which would not be a good thing IMO.
> 
> Seems reasonable to me. I'll integrate your asm diff with the other
> changes and give it a whirl.

This version boots into userspace on both PAE and !PAE.  You want to 
take it from here?

	-hpa

[-- Attachment #2: diff --]
[-- Type: text/plain, Size: 14703 bytes --]

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f409fe2..d6a1e04 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -18,6 +18,10 @@
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/setup.h>
+#include <asm/processor-flags.h>
+
+/* Physical address */
+#define pa(X) ((X) - __PAGE_OFFSET)
 
 /*
  * References to members of the new_cpu_data structure.
@@ -79,10 +83,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
  */
 .section .text.head,"ax",@progbits
 ENTRY(startup_32)
-	/* check to see if KEEP_SEGMENTS flag is meaningful */
-	cmpw $0x207, BP_version(%esi)
-	jb 1f
-
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 		us to not reload segments */
 	testb $(1<<6), BP_loadflags(%esi)
@@ -91,7 +91,7 @@ ENTRY(startup_32)
 /*
  * Set segments to known values.
  */
-1:	lgdt boot_gdt_descr - __PAGE_OFFSET
+1:	lgdt pa(boot_gdt_descr)
 	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
@@ -104,8 +104,8 @@ ENTRY(startup_32)
  */
 	cld
 	xorl %eax,%eax
-	movl $__bss_start - __PAGE_OFFSET,%edi
-	movl $__bss_stop - __PAGE_OFFSET,%ecx
+	movl $pa(__bss_start),%edi
+	movl $pa(__bss_stop),%ecx
 	subl %edi,%ecx
 	shrl $2,%ecx
 	rep ; stosl
@@ -117,31 +117,32 @@ ENTRY(startup_32)
  * (kexec on panic case). Hence copy out the parameters before initializing
  * page tables.
  */
-	movl $(boot_params - __PAGE_OFFSET),%edi
+	movl $pa(boot_params),%edi
 	movl $(PARAM_SIZE/4),%ecx
 	cld
 	rep
 	movsl
-	movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+	movl pa(boot_params) + NEW_CL_POINTER,%esi
 	andl %esi,%esi
 	jz 1f			# No comand line
-	movl $(boot_command_line - __PAGE_OFFSET),%edi
+	movl $pa(boot_command_line),%edi
 	movl $(COMMAND_LINE_SIZE/4),%ecx
 	rep
 	movsl
 1:
 
 #ifdef CONFIG_PARAVIRT
-	cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
+	/* This is can only trip for a broken bootloader... */
+	cmpw $0x207, pa(boot_params + BP_version)
 	jb default_entry
 
 	/* Paravirt-compatible boot parameters.  Look to see what architecture
 		we're booting under. */
-	movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
+	movl pa(boot_params + BP_hardware_subarch), %eax
 	cmpl $num_subarch_entries, %eax
 	jae bad_subarch
 
-	movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
+	movl pa(subarch_entries)(,%eax,4), %eax
 	subl $__PAGE_OFFSET, %eax
 	jmp *%eax
 
@@ -167,17 +168,74 @@ num_subarch_entries = (. - subarch_entries) / 4
  * Mappings are created both at virtual address 0 (identity mapping)
  * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
  *
- * Warning: don't use %esi or the stack in this code.  However, %esp
- * can be used as a GPR if you really need it...
+ * Note that the stack is not yet set up!
  */
-page_pde_offset = (__PAGE_OFFSET >> 20);
+#define PTE_ATTR	0x007		/* PRESENT+RW+USER */
+#define PDE_ATTR	0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_ATTR	0x001		/* PRESENT (no other attributes) */
 
 default_entry:
-	movl $(pg0 - __PAGE_OFFSET), %edi
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
+#ifdef CONFIG_X86_PAE
+	/*
+	 * In PAE mode, the kernel PMD is shared, and __PAGE_OFFSET
+	 * is guaranteed to be a multiple of 1 GB (the PGD granulatity.)
+	 * Thus, we only need to set up a single PMD here; the identity
+	 * mapping is handled by pointing two PGD entries to the PMD.
+	 *
+	 * Note the upper half of each PMD or PTE are always zero at
+	 * this stage.
+	 */
+page_pde_offset = (__PAGE_OFFSET >> 27);
+
+	movl %cr4, %eax
+	orl  $X86_CR4_PAE, %eax
+	movl %eax, %cr4
+
+	xorl %ebx,%ebx				/* %ebx is kept at zero */
+	
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_pmd), %edx
+	movl $PTE_ATTR, %eax
+10:
+	leal PDE_ATTR(%edi),%ecx		/* Create PMD entry */
+	movl %ecx,(%edx)			/* Store PMD entry */
+						/* Upper half already zero */
+	addl $8,%edx
+	movl $512,%ecx
+11:
+	stosl
+	xchgl %eax,%ebx
+	stosl
+	xchgl %eax,%ebx
+	addl $0x1000,%eax
+	loop 11b
+
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables.
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+	cmpl %ebp,%eax
+	jb 10b
+	movl %edi,pa(init_pg_tables_end)
+
+	/* Set up the PGD */
+	movl $pa(swapper_pg_pmd)+PGD_ATTR, %eax
+	movl %eax, pa(swapper_pg_dir)			/* Identity map */
+	movl %eax, pa(swapper_pg_dir+page_pde_offset)	/* Kernel map */
+
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+	movl %eax,pa(swapper_pg_pmd+0xff8)
+#else	/* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+	
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_dir), %edx
+	movl $PTE_ATTR, %eax
 10:
-	leal 0x007(%edi),%ecx			/* Create PDE entry */
+	leal PDE_ATTR(%edi),%ecx		/* Create PDE entry */
 	movl %ecx,(%edx)			/* Store identity PDE entry */
 	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
 	addl $4,%edx
@@ -186,19 +244,20 @@ default_entry:
 	stosl
 	addl $0x1000,%eax
 	loop 11b
-	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
-	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
-	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables; the +0x007 is
+	 * the attribute bits
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
 	cmpl %ebp,%eax
 	jb 10b
-	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
-	/* Do an early initialization of the fixmap area */
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
-	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
-	movl %eax, 4092(%edx)
+	movl %edi,pa(init_pg_tables_end)
 
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+	movl %eax,pa(swapper_pg_dir+0xffc)
+#endif
 	xorl %ebx,%ebx				/* This is the boot CPU (BSP) */
 	jmp 3f
 /*
@@ -237,7 +296,7 @@ ENTRY(startup_32_smp)
  *	NOTE! We have to correct for the fact that we're
  *	not yet offset PAGE_OFFSET..
  */
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
+#define cr4_bits pa(mmu_cr4_features)
 	movl cr4_bits,%edx
 	andl %edx,%edx
 	jz 6f
@@ -278,10 +337,10 @@ ENTRY(startup_32_smp)
 /*
  * Enable paging
  */
-	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
+	movl $pa(swapper_pg_dir),%eax
 	movl %eax,%cr3		/* set the page table pointer.. */
 	movl %cr0,%eax
-	orl $0x80000000,%eax
+	orl  $X86_CR0_PG,%eax
 	movl %eax,%cr0		/* ..and set paging (PG) bit */
 	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
 1:
@@ -556,8 +615,12 @@ ENTRY(_stext)
 	.align PAGE_SIZE_asm
 ENTRY(swapper_pg_dir)
 	.fill 1024,4,0
+#ifdef CONFIG_X86_PAE
 ENTRY(swapper_pg_pmd)
 	.fill 1024,4,0
+#endif
+ENTRY(swapper_pg_fixmap)
+	.fill 1024,4,0
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cbba769..14c6c41 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -43,6 +43,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/paravirt.h>
+#include <asm/setup.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
@@ -353,44 +354,11 @@ extern void __init remap_numa_kva(void);
 
 void __init native_pagetable_setup_start(pgd_t *base)
 {
-#ifdef CONFIG_X86_PAE
-	int i;
-
-	/*
-	 * Init entries of the first-level page table to the
-	 * zero page, if they haven't already been set up.
-	 *
-	 * In a normal native boot, we'll be running on a
-	 * pagetable rooted in swapper_pg_dir, but not in PAE
-	 * mode, so this will end up clobbering the mappings
-	 * for the lower 24Mbytes of the address space,
-	 * without affecting the kernel address space.
-	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
-		set_pgd(&base[i],
-			__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-
-	/* Make sure kernel address space is empty so that a pagetable
-	   will be allocated for it. */
-	memset(&base[USER_PTRS_PER_PGD], 0,
-	       KERNEL_PGD_PTRS * sizeof(pgd_t));
-#else
 	paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
-#endif
 }
 
 void __init native_pagetable_setup_done(pgd_t *base)
 {
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Add low memory identity-mappings - SMP needs it when
-	 * starting up on an AP from real-mode. In the non-PAE
-	 * case we already have these mappings through head.S.
-	 * All user-space mappings are explicitly cleared after
-	 * SMP startup.
-	 */
-	set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
-#endif
 }
 
 /*
@@ -399,9 +367,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
  * the boot process.
  *
  * If we're booting on native hardware, this will be a pagetable
- * constructed in arch/i386/kernel/head.S, and not running in PAE mode
- * (even if we'll end up running in PAE).  The root of the pagetable
- * will be swapper_pg_dir.
+ * constructed in arch/x86/kernel/head_32.S.  The root of the
+ * pagetable will be swapper_pg_dir.
  *
  * If we're booting paravirtualized under a hypervisor, then there are
  * more options: we may already be running PAE, and the pagetable may
@@ -559,14 +526,6 @@ void __init paging_init(void)
 
 	load_cr3(swapper_pg_dir);
 
-#ifdef CONFIG_X86_PAE
-	/*
-	 * We will bail out later - printk doesn't work right now so
-	 * the user would just see a hanging kernel.
-	 */
-	if (cpu_has_pae)
-		set_in_cr4(X86_CR4_PAE);
-#endif
 	__flush_tlb_all();
 
 	kmap_init();
@@ -696,10 +655,6 @@ void __init mem_init(void)
 	BUG_ON((unsigned long)high_memory      > VMALLOC_START);
 #endif /* double-sanity-check paranoia */
 
-#ifdef CONFIG_X86_PAE
-	if (!cpu_has_pae)
-		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
 
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 05a24cd..fa8a3ff 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -226,40 +226,45 @@ static int __init early_ioremap_debug_setup(char *str)
 __setup("early_ioremap_debug", early_ioremap_debug_setup);
 
 static __initdata int after_paging_init;
-static __initdata unsigned long bm_pte[1024]
+static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
 				__attribute__((aligned(PAGE_SIZE)));
 
-static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
-	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+	pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
+
+	return pmd;
 }
 
-static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 {
-	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+	return &bm_pte[pte_index(addr)];
 }
 
 void __init early_ioremap_init(void)
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 	if (early_ioremap_debug)
 		printk("early_ioremap_init()\n");
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = __pa(bm_pte) | _PAGE_TABLE;
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	memset(bm_pte, 0, sizeof(bm_pte));
+	set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE));
+
 	/*
-	 * The boot-ioremap range spans multiple pgds, for which
+	 * The boot-ioremap range spans multiple pmds, for which
 	 * we are not prepared:
 	 */
-	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 		WARN_ON(1);
-		printk("pgd %p != %p\n",
-			pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
-		printk("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+		printk(KERN_WARNING "pmd %p != %p\n",
+		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 			fix_to_virt(FIX_BTMAP_BEGIN));
-		printk("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 			fix_to_virt(FIX_BTMAP_END));
 
 		printk("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
@@ -269,27 +274,28 @@ void __init early_ioremap_init(void)
 
 void __init early_ioremap_clear(void)
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 	if (early_ioremap_debug)
 		printk("early_ioremap_clear()\n");
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = 0;
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+	pmd_clear(pmd);
 	__flush_tlb_all();
 }
 
 void __init early_ioremap_reset(void)
 {
 	enum fixed_addresses idx;
-	unsigned long *pte, phys, addr;
+	unsigned long addr, phys;
+	pte_t *pte;
 
 	after_paging_init = 1;
 	for (idx = FIX_BTMAP_BEGIN; idx <= FIX_BTMAP_END; idx--) {
 		addr = fix_to_virt(idx);
 		pte = early_ioremap_pte(addr);
-		if (!*pte & _PAGE_PRESENT) {
-			phys = *pte & PAGE_MASK;
+		if (pte_present(*pte)) {
+			phys = pte_val(*pte) & PAGE_MASK;
 			set_fixmap(idx, phys);
 		}
 	}
@@ -298,7 +304,8 @@ void __init early_ioremap_reset(void)
 static void __init __early_set_fixmap(enum fixed_addresses idx,
 				   unsigned long phys, pgprot_t flags)
 {
-	unsigned long *pte, addr = __fix_to_virt(idx);
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
 
 	if (idx >= __end_of_fixed_addresses) {
 		BUG();
@@ -306,9 +313,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
 	}
 	pte = early_ioremap_pte(addr);
 	if (pgprot_val(flags))
-		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
-		*pte = 0;
+		pte_clear(NULL, addr, pte);
 	__flush_tlb_one(addr);
 }
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 11c4b39..8fc0473 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -48,7 +48,6 @@ typedef unsigned long	pgprotval_t;
 typedef unsigned long	phys_addr_t;
 
 typedef union { pteval_t pte, pte_low; } pte_t;
-typedef pte_t boot_pte_t;
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* CONFIG_X86_PAE */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 11c8b73..c07389b 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -55,10 +55,6 @@ int text_address(unsigned long);
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
 
-#define TWOLEVEL_PGDIR_SHIFT	22
-#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
-#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 20:00                   ` H. Peter Anvin
@ 2008-01-22 20:36                     ` Ingo Molnar
  2008-01-22 20:43                       ` H. Peter Anvin
                                         ` (3 more replies)
  0 siblings, 4 replies; 80+ messages in thread
From: Ingo Molnar @ 2008-01-22 20:36 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


* H. Peter Anvin <hpa@zytor.com> wrote:

>> Seems reasonable to me. I'll integrate your asm diff with the other 
>> changes and give it a whirl.
>
> This version boots into userspace on both PAE and !PAE.  You want to 
> take it from here?

ok, i'll wait for Ian to submit the final (tested) version then. A few 
possible complications are: PSE-less boxes, 32-bit PAGEALLOC bootups 
with tons of RAM, NX-less boxes and NX-able boxes :)

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 20:36                     ` Ingo Molnar
@ 2008-01-22 20:43                       ` H. Peter Anvin
  2008-01-22 20:45                         ` Ingo Molnar
  2008-01-22 20:52                       ` Ian Campbell
                                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-22 20:43 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Ingo Molnar wrote:
> * H. Peter Anvin <hpa@zytor.com> wrote:
> 
>>> Seems reasonable to me. I'll integrate your asm diff with the other 
>>> changes and give it a whirl.
>> This version boots into userspace on both PAE and !PAE.  You want to 
>> take it from here?
> 
> ok, i'll wait for Ian to submit the final (tested) version then. A few 
> possible complications are: PSE-less boxes, 32-bit PAGEALLOC bootups 
> with tons of RAM, NX-less boxes and NX-able boxes :)

PSE-less should be less of an issue than making sure we switch to using 
large pages where appropriate, and enable the PGE and NX bits where 
appropriate.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 20:43                       ` H. Peter Anvin
@ 2008-01-22 20:45                         ` Ingo Molnar
  0 siblings, 0 replies; 80+ messages in thread
From: Ingo Molnar @ 2008-01-22 20:45 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


* H. Peter Anvin <hpa@zytor.com> wrote:

>> ok, i'll wait for Ian to submit the final (tested) version then. A 
>> few possible complications are: PSE-less boxes, 32-bit PAGEALLOC 
>> bootups with tons of RAM, NX-less boxes and NX-able boxes :)
>
> PSE-less should be less of an issue than making sure we switch to 
> using large pages where appropriate, and enable the PGE and NX bits 
> where appropriate.

yeah - and that would be the right point to enable gigapages as well - 
once we have all this stuff consolidated and unified from grounds up.

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 20:36                     ` Ingo Molnar
  2008-01-22 20:43                       ` H. Peter Anvin
@ 2008-01-22 20:52                       ` Ian Campbell
  2008-01-22 21:00                         ` H. Peter Anvin
  2008-01-22 21:00                       ` [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE Harvey Harrison
  2008-01-23 20:52                       ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
  3 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-22 20:52 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: H. Peter Anvin, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


On Tue, 2008-01-22 at 21:36 +0100, Ingo Molnar wrote:
> * H. Peter Anvin <hpa@zytor.com> wrote:
> 
> >> Seems reasonable to me. I'll integrate your asm diff with the other 
> >> changes and give it a whirl.
> >
> > This version boots into userspace on both PAE and !PAE.  You want to 
> > take it from here?
> 
> ok, i'll wait for Ian to submit the final (tested) version then. A few 
> possible complications are: PSE-less boxes, 32-bit PAGEALLOC bootups 
> with tons of RAM, NX-less boxes and NX-able boxes :)

I'm not sure I can promise that sort of coverage ;-) Will test on what
hardware I've got available...

Ian.
-- 
Ian Campbell

Modesty is a vastly overrated virtue.
		-- J. K. Galbraith


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE
  2008-01-22 20:36                     ` Ingo Molnar
  2008-01-22 20:43                       ` H. Peter Anvin
  2008-01-22 20:52                       ` Ian Campbell
@ 2008-01-22 21:00                       ` Harvey Harrison
  2008-01-22 21:04                         ` Ingo Molnar
                                           ` (2 more replies)
  2008-01-23 20:52                       ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
  3 siblings, 3 replies; 80+ messages in thread
From: Harvey Harrison @ 2008-01-22 21:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: H. Peter Anvin, Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

nx_enabled can only be set to non-zero when CONFIG_X86_PAE is
set.  The only use not currently inside a CONFIG_X86_PAE block
is the definition, the declaration and a conditional unlikely
test in fault_32.c (is_prefetch).

When !CONFIG_X86_PAE, is_prefetch always returns 0 immediately
as nx_enabled is always 0.

When CONFIG_X86_PAE, the test is preserved, but the test against
the cpu model and stepping is deleted, this may not be correct.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
---
Ingo, further to your nx vs !nx comment, had this lying around,
needs testing, only affects the CONFIG_X86_PAE case.

 arch/x86/mm/fault_32.c    |   17 ++++++++---------
 arch/x86/mm/fault_64.c    |   17 ++++++++---------
 arch/x86/mm/init_32.c     |    4 +---
 include/asm-x86/page_32.h |    2 ++
 4 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 0bd2417..049c3bb 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -81,16 +81,15 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	unsigned char *max_instr;
 
 #ifdef CONFIG_X86_32
-	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		     boot_cpu_data.x86 >= 6)) {
-		/* Catch an obscure case of prefetch inside an NX page. */
-		if (nx_enabled && (error_code & PF_INSTR))
-			return 0;
-	} else {
+# ifdef CONFIG_X86_PAE
+	/* If it was a exec fault on NX page, ignore */
+	if (nx_enabled && (error_code & PF_INSTR))
 		return 0;
-	}
-#else
-	/* If it was a exec fault ignore */
+# else
+	return 0;
+# endif
+#else /* CONFIG_X86_64 */
+	/* If it was a exec fault on NX page, ignore */
 	if (error_code & PF_INSTR)
 		return 0;
 #endif
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index ccbb8e3..33e8ced 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -84,16 +84,15 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	unsigned char *max_instr;
 
 #ifdef CONFIG_X86_32
-	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		     boot_cpu_data.x86 >= 6)) {
-		/* Catch an obscure case of prefetch inside an NX page. */
-		if (nx_enabled && (error_code & PF_INSTR))
-			return 0;
-	} else {
+# ifdef CONFIG_X86_PAE
+	/* If it was a exec fault on NX page, ignore */
+	if (nx_enabled && (error_code & PF_INSTR))
 		return 0;
-	}
-#else
-	/* If it was a exec fault ignore */
+# else
+	return 0;
+# endif
+#else /* CONFIG_X86_64 */
+	/* If it was a exec fault on NX page, ignore */
 	if (error_code & PF_INSTR)
 		return 0;
 #endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 358d3b9..317cf5d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -478,13 +478,11 @@ void zap_low_mappings (void)
 	flush_tlb_all();
 }
 
-int nx_enabled = 0;
-
 pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
 #ifdef CONFIG_X86_PAE
-
+int nx_enabled = 0;
 static int disable_nx __initdata = 0;
 
 /*
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 11c4b39..251f972 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -65,7 +65,9 @@ typedef pte_t boot_pte_t;
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
 
+#ifdef CONFIG_X86_PAE
 extern int nx_enabled;
+#endif
 
 /*
  * This much address space is reserved for vmalloc() and iomap()
-- 
1.5.4.rc3.1118.gf6754c




^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 20:52                       ` Ian Campbell
@ 2008-01-22 21:00                         ` H. Peter Anvin
  2008-01-22 22:21                           ` Ian Campbell
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-22 21:00 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Ian Campbell wrote:
> On Tue, 2008-01-22 at 21:36 +0100, Ingo Molnar wrote:
>> * H. Peter Anvin <hpa@zytor.com> wrote:
>>
>>>> Seems reasonable to me. I'll integrate your asm diff with the other 
>>>> changes and give it a whirl.
>>> This version boots into userspace on both PAE and !PAE.  You want to 
>>> take it from here?
>> ok, i'll wait for Ian to submit the final (tested) version then. A few 
>> possible complications are: PSE-less boxes, 32-bit PAGEALLOC bootups 
>> with tons of RAM, NX-less boxes and NX-able boxes :)
> 
> I'm not sure I can promise that sort of coverage ;-) Will test on what
> hardware I've got available...
> 

I tend to use simulators (e.g. Qemu) quite a bit.  They let you tune 
this kind of stuff.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE
  2008-01-22 21:00                       ` [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE Harvey Harrison
@ 2008-01-22 21:04                         ` Ingo Molnar
  2008-01-22 21:35                           ` Harvey Harrison
  2008-01-22 21:07                         ` Harvey Harrison
       [not found]                         ` <p73odbdlyiu.fsf@crumb.suse.de>
  2 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-01-22 21:04 UTC (permalink / raw)
  To: Harvey Harrison
  Cc: H. Peter Anvin, Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


* Harvey Harrison <harvey.harrison@gmail.com> wrote:

> nx_enabled can only be set to non-zero when CONFIG_X86_PAE is set.  
> The only use not currently inside a CONFIG_X86_PAE block is the 
> definition, the declaration and a conditional unlikely test in 
> fault_32.c (is_prefetch).
> 
> When !CONFIG_X86_PAE, is_prefetch always returns 0 immediately as 
> nx_enabled is always 0.
> 
> When CONFIG_X86_PAE, the test is preserved, but the test against the 
> cpu model and stepping is deleted, this may not be correct.

thanks, applied.

> Ingo, further to your nx vs !nx comment, had this lying around, needs 
> testing, only affects the CONFIG_X86_PAE case.

will keep an eye on it.

How far away are you from unifying fault_32.c and fault_64.c? You 
already managed to line up their sizes:

$ wc -l arch/x86/mm/fault_*.c
  742 arch/x86/mm/fault_32.c
  734 arch/x86/mm/fault_64.c

;-)

and the raw diff between them doesnt look that bad either:

 1 file changed, 127 insertions(+), 135 deletions(-)

so we might as well take a shot at that?

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE
  2008-01-22 21:00                       ` [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE Harvey Harrison
  2008-01-22 21:04                         ` Ingo Molnar
@ 2008-01-22 21:07                         ` Harvey Harrison
       [not found]                         ` <p73odbdlyiu.fsf@crumb.suse.de>
  2 siblings, 0 replies; 80+ messages in thread
From: Harvey Harrison @ 2008-01-22 21:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: H. Peter Anvin, Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Tue, 2008-01-22 at 13:00 -0800, Harvey Harrison wrote:
> nx_enabled can only be set to non-zero when CONFIG_X86_PAE is
> set.  The only use not currently inside a CONFIG_X86_PAE block
> is the definition, the declaration and a conditional unlikely
> test in fault_32.c (is_prefetch).
> 
> When !CONFIG_X86_PAE, is_prefetch always returns 0 immediately
> as nx_enabled is always 0.
> 
> When CONFIG_X86_PAE, the test is preserved, but the test against
> the cpu model and stepping is deleted, this may not be correct.
> 
> Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
> ---

Sorry, I missed the usage in kernel/acpi/wakeup_32.S, that's the only
other user.  I don't know that code well enough to comment on the usage
there, but if anybody knows if that could be conditionalized, please
advise.


Harvey



^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE
  2008-01-22 21:04                         ` Ingo Molnar
@ 2008-01-22 21:35                           ` Harvey Harrison
  0 siblings, 0 replies; 80+ messages in thread
From: Harvey Harrison @ 2008-01-22 21:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: H. Peter Anvin, Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Tue, 2008-01-22 at 22:04 +0100, Ingo Molnar wrote:
> * Harvey Harrison <harvey.harrison@gmail.com> wrote:
> 
> > nx_enabled can only be set to non-zero when CONFIG_X86_PAE is set.  
> > The only use not currently inside a CONFIG_X86_PAE block is the 
> > definition, the declaration and a conditional unlikely test in 
> > fault_32.c (is_prefetch).
> > 
> > When !CONFIG_X86_PAE, is_prefetch always returns 0 immediately as 
> > nx_enabled is always 0.
> > 
> > When CONFIG_X86_PAE, the test is preserved, but the test against the 
> > cpu model and stepping is deleted, this may not be correct.
> 
> thanks, applied.

Hmmm, the extern nx_enabled in page_32.h is already within an
ifndef __ASSEMBLY__ block, so I'm not sure how wakeup_32.S could
be using it.  Thoughts?

Harvey


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 21:00                         ` H. Peter Anvin
@ 2008-01-22 22:21                           ` Ian Campbell
  0 siblings, 0 replies; 80+ messages in thread
From: Ian Campbell @ 2008-01-22 22:21 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


On Tue, 2008-01-22 at 13:00 -0800, H. Peter Anvin wrote:
> Ian Campbell wrote:
> > On Tue, 2008-01-22 at 21:36 +0100, Ingo Molnar wrote:
> >> * H. Peter Anvin <hpa@zytor.com> wrote:
> >>
> >>>> Seems reasonable to me. I'll integrate your asm diff with the other 
> >>>> changes and give it a whirl.
> >>> This version boots into userspace on both PAE and !PAE.  You want to 
> >>> take it from here?
> >> ok, i'll wait for Ian to submit the final (tested) version then. A few 
> >> possible complications are: PSE-less boxes, 32-bit PAGEALLOC bootups 
> >> with tons of RAM, NX-less boxes and NX-able boxes :)
> > 
> > I'm not sure I can promise that sort of coverage ;-) Will test on what
> > hardware I've got available...
> > 
> 
> I tend to use simulators (e.g. Qemu) quite a bit.  They let you tune 
> this kind of stuff.

So do I but I'd never really investigated the option to fiddle with the
CPU type -- very useful though, thanks for the tip!

Ian.

-- 
Ian Campbell

No one can feel as helpless as the owner of a sick goldfish.


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE
       [not found]                         ` <p73odbdlyiu.fsf@crumb.suse.de>
@ 2008-01-23 11:21                           ` Harvey Harrison
  0 siblings, 0 replies; 80+ messages in thread
From: Harvey Harrison @ 2008-01-23 11:21 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, H. Peter Anvin, Ian Campbell,
	=?iso-8859-1?q? Mika Penttilä?=,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Wed, 2008-01-23 at 09:46 +0100, Andi Kleen wrote:
> Harvey Harrison <harvey.harrison@gmail.com> writes:
> 
> > nx_enabled can only be set to non-zero when CONFIG_X86_PAE is
> > set.  The only use not currently inside a CONFIG_X86_PAE block
> > is the definition, the declaration and a conditional unlikely
> > test in fault_32.c (is_prefetch).
> 
> The variable is pretty useless anyways; it can be probably
> replaced with (__supported_pte_mask & _PAGE_NX). Just make
> sure that the disable option still works, but that should
> be possible with some care.
> 
> So if you feel the need to clean up things here convert it 
> to using that. That will automatically be optimized away
> on !PAE too because _PAGE_NX is 0 there.
> 

Noted, will do.

Harvey


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-22 20:36                     ` Ingo Molnar
                                         ` (2 preceding siblings ...)
  2008-01-22 21:00                       ` [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE Harvey Harrison
@ 2008-01-23 20:52                       ` Ian Campbell
  2008-01-24  1:06                         ` Jeremy Fitzhardinge
  3 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-23 20:52 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: H. Peter Anvin, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


On Tue, 2008-01-22 at 21:36 +0100, Ingo Molnar wrote:
> * H. Peter Anvin <hpa@zytor.com> wrote:
> 
> >> Seems reasonable to me. I'll integrate your asm diff with the other 
> >> changes and give it a whirl.
> >
> > This version boots into userspace on both PAE and !PAE.  You want to 
> > take it from here?
> 
> ok, i'll wait for Ian to submit the final (tested) version then. A few 
> possible complications are: PSE-less boxes, 32-bit PAGEALLOC bootups 
> with tons of RAM, NX-less boxes and NX-able boxes :)

FYI, CONFIG_DEBUG_PAGEALLOC+PAE is broken. I'll dig in but it might be
the weekend before I get a chance (there's a beer festival in town ;-)).

Ian.
-- 
Ian Campbell

Flattery will get you everywhere.


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-23 20:52                       ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
@ 2008-01-24  1:06                         ` Jeremy Fitzhardinge
  2008-01-24  9:39                           ` Ian Campbell
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-24  1:06 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Ingo Molnar, H. Peter Anvin, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Ian Campbell wrote:
> FYI, CONFIG_DEBUG_PAGEALLOC+PAE is broken. I'll dig in but it might be
> the weekend before I get a chance (there's a beer festival in town ;-)).
>   

I'm poking around trying to get Xen working again as well; I may end up 
fixing it in passing.

At the moment I've got a problem with early_ioremap's bt_pte[] array 
ending up hanging around in init's pagetable, which Xen is most unhappy 
about.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24  1:06                         ` Jeremy Fitzhardinge
@ 2008-01-24  9:39                           ` Ian Campbell
  2008-01-24 22:06                             ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Ian Campbell @ 2008-01-24  9:39 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, H. Peter Anvin, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


On Wed, 2008-01-23 at 17:06 -0800, Jeremy Fitzhardinge wrote:
> Ian Campbell wrote:
> > FYI, CONFIG_DEBUG_PAGEALLOC+PAE is broken. I'll dig in but it might be
> > the weekend before I get a chance (there's a beer festival in town ;-)).
> >   
> 
> I'm poking around trying to get Xen working again as well; I may end up 
> fixing it in passing.

Turns out that the initial mapping is extending well past the end of the
128M of physical memory when DEBUG_PAGEALLOC is enabled due to the extra
16M mapping. The overshoot is enough that it interferes with the vmalloc
area. I see the same problem without the patch on non PAE if I reduce
the RAM to 64M. There will be ~twice as many PAE boot page tables which
explains why it triggers with 128M with the patch (INIT_MAP_BEYOND_END
is 70M for !PAE and 138M with when DEBUG_PAGEALLOC is on).

I don't see the crash with PAE before this patch. I think because the
kernel page tables are wiped in native_pagetable_setup_start which would
have blown away the mappings past the end of physical memory, avoiding
the problem. I've added code to wipe those mappings past max_low_pfn
from the boot page tables in pagetable_setup_start which has solved the
problem and I think is the right way to do it since we don't know the
correct limit of physical RAM in head_32.

Tested patch follows. All four of {pae,!pae}x{paravirt,!
paravirt}+DEBUG_PAGEALLOC boot to userspace on qemu (smp:2 mem:128)
using cpu type 486 (no PSE or PAE or anything), pentium (PSE only),
pentium3 (everything but NX) and qemu64 (everything, running 32 bit mode
despite name). Exceptions are obviously 486 and pentium which didn't
boot the PAE versions to userspace -- they correctly bailed during
setup.

I'm not sure how PSE comes to be used ever though -- an EFI only thing?
Using the qemu monitor I could see a bunch of NX bits used when NX was
available.

I also booted both {!PAE,PAE}xPARAVIRT+DEBUG_PAGEALLOC on a physical
pentium4 with 4G RAM with and without mem=16M.

> At the moment I've got a problem with early_ioremap's bt_pte[] array 
> ending up hanging around in init's pagetable, which Xen is most unhappy 
> about.

That sounds a lot like the problem I was having with the patch I sent
you as well, although I never identified where the problematic mapping
was.

Ian.

---
x86_32: Construct 32 bit boot time page tables in native format.

Specifically the boot time page tables in a CONFIG_X86_PAE=y enabled
kernel are in PAE format.

early_ioremap is updated to use the standard page table accessors.

Clear any mappings beyond max_low_pfn from the boot page tables in
native_pagetable_setup_start because the initial mappings can extend
beyond the range of physical memory and into the vmalloc area.

Derived from patches by Eric Biederman and H. Peter Anvin.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mika Penttilä <mika.penttila@kolumbus.fi>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
---
 arch/x86/kernel/head_32.S    |  132 +++++++++++++++++++++++++++++++-----------
 arch/x86/kernel/setup_32.c   |    4 +
 arch/x86/mm/init_32.c        |   70 ++++++++--------------
 arch/x86/mm/ioremap_32.c     |   53 ++++++++++-------
 include/asm-x86/page_32.h    |    1 -
 include/asm-x86/pgtable_32.h |    4 -
 6 files changed, 158 insertions(+), 106 deletions(-)

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5d8c573..c6af2c0 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,10 @@
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/setup.h>
+#include <asm/processor-flags.h>
+
+/* Physical address */
+#define pa(X) ((X) - __PAGE_OFFSET)
 
 /*
  * References to members of the new_cpu_data structure.
@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
  */
 .section .text.head,"ax",@progbits
 ENTRY(startup_32)
-	/* check to see if KEEP_SEGMENTS flag is meaningful */
-	cmpw $0x207, BP_version(%esi)
-	jb 1f
-
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 		us to not reload segments */
 	testb $(1<<6), BP_loadflags(%esi)
@@ -92,7 +92,7 @@ ENTRY(startup_32)
 /*
  * Set segments to known values.
  */
-1:	lgdt boot_gdt_descr - __PAGE_OFFSET
+	lgdt pa(boot_gdt_descr)
 	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
@@ -105,8 +105,8 @@ ENTRY(startup_32)
  */
 	cld
 	xorl %eax,%eax
-	movl $__bss_start - __PAGE_OFFSET,%edi
-	movl $__bss_stop - __PAGE_OFFSET,%ecx
+	movl $pa(__bss_start),%edi
+	movl $pa(__bss_stop),%ecx
 	subl %edi,%ecx
 	shrl $2,%ecx
 	rep ; stosl
@@ -118,31 +118,32 @@ ENTRY(startup_32)
  * (kexec on panic case). Hence copy out the parameters before initializing
  * page tables.
  */
-	movl $(boot_params - __PAGE_OFFSET),%edi
+	movl $pa(boot_params),%edi
 	movl $(PARAM_SIZE/4),%ecx
 	cld
 	rep
 	movsl
-	movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+	movl pa(boot_params) + NEW_CL_POINTER,%esi
 	andl %esi,%esi
 	jz 1f			# No comand line
-	movl $(boot_command_line - __PAGE_OFFSET),%edi
+	movl $pa(boot_command_line),%edi
 	movl $(COMMAND_LINE_SIZE/4),%ecx
 	rep
 	movsl
 1:
 
 #ifdef CONFIG_PARAVIRT
-	cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
+	/* This is can only trip for a broken bootloader... */
+	cmpw $0x207, pa(boot_params + BP_version)
 	jb default_entry
 
 	/* Paravirt-compatible boot parameters.  Look to see what architecture
 		we're booting under. */
-	movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
+	movl pa(boot_params + BP_hardware_subarch), %eax
 	cmpl $num_subarch_entries, %eax
 	jae bad_subarch
 
-	movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
+	movl pa(subarch_entries)(,%eax,4), %eax
 	subl $__PAGE_OFFSET, %eax
 	jmp *%eax
 
@@ -170,17 +171,77 @@ num_subarch_entries = (. - subarch_entries) / 4
  * Mappings are created both at virtual address 0 (identity mapping)
  * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
  *
- * Warning: don't use %esi or the stack in this code.  However, %esp
- * can be used as a GPR if you really need it...
+ * Note that the stack is not yet set up!
  */
-page_pde_offset = (__PAGE_OFFSET >> 20);
+#define PTE_ATTR	0x007		/* PRESENT+RW+USER */
+#define PDE_ATTR	0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_ATTR	0x001		/* PRESENT (no other attributes) */
 
 default_entry:
-	movl $(pg0 - __PAGE_OFFSET), %edi
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
+#ifdef CONFIG_X86_PAE
+	/*
+	 * In PAE mode, the kernel PMD is shared, and __PAGE_OFFSET
+	 * is guaranteed to be a multiple of 1 GB (the PGD granulatity.)
+	 * Thus, we only need to set up a single PMD here; the identity
+	 * mapping is handled by pointing two PGD entries to the PMD.
+	 *
+	 * Note the upper half of each PMD or PTE are always zero at
+	 * this stage.
+	 */
+page_pde_offset = (__PAGE_OFFSET >> 27);
+	xorl %ebx,%ebx				/* %ebx is kept at zero */
+
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_pmd), %edx
+	movl $PTE_ATTR, %eax
 10:
-	leal 0x007(%edi),%ecx			/* Create PDE entry */
+	leal PDE_ATTR(%edi),%ecx		/* Create PMD entry */
+	movl %ecx,(%edx)			/* Store PMD entry */
+						/* Upper half already zero */
+	addl $8,%edx
+	movl $512,%ecx
+11:
+	stosl
+	xchgl %eax,%ebx
+	stosl
+	xchgl %eax,%ebx
+	addl $0x1000,%eax
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables.
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+	cmpl %ebp,%eax
+	ja 1f
+	loop 11b
+
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables.
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+	cmpl %ebp,%eax
+	jb 10b
+1:
+	movl %edi,pa(init_pg_tables_end)
+
+	/* Set up the PGD */
+	movl $pa(swapper_pg_pmd)+PGD_ATTR, %eax
+	movl %eax, pa(swapper_pg_dir)			/* Identity map */
+	movl %eax, pa(swapper_pg_dir+page_pde_offset)	/* Kernel map */
+
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+	movl %eax,pa(swapper_pg_pmd+0xff8)
+#else	/* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_dir), %edx
+	movl $PTE_ATTR, %eax
+10:
+	leal PDE_ATTR(%edi),%ecx		/* Create PDE entry */
 	movl %ecx,(%edx)			/* Store identity PDE entry */
 	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
 	addl $4,%edx
@@ -189,19 +250,20 @@ default_entry:
 	stosl
 	addl $0x1000,%eax
 	loop 11b
-	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
-	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
-	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables; the +0x007 is
+	 * the attribute bits
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
 	cmpl %ebp,%eax
 	jb 10b
-	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
-	/* Do an early initialization of the fixmap area */
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
-	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
-	movl %eax, 4092(%edx)
+	movl %edi,pa(init_pg_tables_end)
 
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+	movl %eax,pa(swapper_pg_dir+0xffc)
+#endif
 	jmp 3f
 /*
  * Non-boot CPU entry point; entered from trampoline.S
@@ -241,7 +303,7 @@ ENTRY(startup_32_smp)
  *	NOTE! We have to correct for the fact that we're
  *	not yet offset PAGE_OFFSET..
  */
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
+#define cr4_bits pa(mmu_cr4_features)
 	movl cr4_bits,%edx
 	andl %edx,%edx
 	jz 6f
@@ -276,10 +338,10 @@ ENTRY(startup_32_smp)
 /*
  * Enable paging
  */
-	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
+	movl $pa(swapper_pg_dir),%eax
 	movl %eax,%cr3		/* set the page table pointer.. */
 	movl %cr0,%eax
-	orl $0x80000000,%eax
+	orl  $X86_CR0_PG,%eax
 	movl %eax,%cr0		/* ..and set paging (PG) bit */
 	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
 1:
@@ -554,8 +616,12 @@ ENTRY(_stext)
 	.align PAGE_SIZE_asm
 ENTRY(swapper_pg_dir)
 	.fill 1024,4,0
+#ifdef CONFIG_X86_PAE
 ENTRY(swapper_pg_pmd)
 	.fill 1024,4,0
+#endif
+ENTRY(swapper_pg_fixmap)
+	.fill 1024,4,0
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
 
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2256338..b4b6652 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -153,7 +153,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 EXPORT_SYMBOL(boot_cpu_data);
 
+#ifndef CONFIG_X86_PAE
 unsigned long mmu_cr4_features;
+#else
+unsigned long mmu_cr4_features = X86_CR4_PAE;
+#endif
 
 /* for MCA, but anyone else can use it if they want */
 unsigned int machine_id;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 358d3b9..b382889 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -43,6 +43,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/paravirt.h>
+#include <asm/setup.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
@@ -343,44 +344,35 @@ extern void __init remap_numa_kva(void);
 
 void __init native_pagetable_setup_start(pgd_t *base)
 {
-#ifdef CONFIG_X86_PAE
-	int i;
+	unsigned long pfn, va;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
 
 	/*
-	 * Init entries of the first-level page table to the
-	 * zero page, if they haven't already been set up.
-	 *
-	 * In a normal native boot, we'll be running on a
-	 * pagetable rooted in swapper_pg_dir, but not in PAE
-	 * mode, so this will end up clobbering the mappings
-	 * for the lower 24Mbytes of the address space,
-	 * without affecting the kernel address space.
+	 * Remove any mappings which extend past the end of physical
+	 * memory from the boot time page table.
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
-		set_pgd(&base[i],
-			__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-
-	/* Make sure kernel address space is empty so that a pagetable
-	   will be allocated for it. */
-	memset(&base[USER_PTRS_PER_PGD], 0,
-	       KERNEL_PGD_PTRS * sizeof(pgd_t));
-#else
+	for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
+		va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
+		pgd = base + pgd_index(va);
+		if (!pgd_present(*pgd))
+			break;
+		pud = pud_offset(pgd, va);
+		pmd = pmd_offset(pud, va);
+		if (!pmd_present(*pmd))
+			break;
+		pte = pte_offset_kernel(pmd, va);
+		if (!pte_present(*pte))
+			break;
+		pte_clear(NULL, va, pte);
+	}
 	paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
-#endif
 }
 
 void __init native_pagetable_setup_done(pgd_t *base)
 {
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Add low memory identity-mappings - SMP needs it when
-	 * starting up on an AP from real-mode. In the non-PAE
-	 * case we already have these mappings through head.S.
-	 * All user-space mappings are explicitly cleared after
-	 * SMP startup.
-	 */
-	set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
-#endif
 }
 
 /*
@@ -389,9 +381,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
  * the boot process.
  *
  * If we're booting on native hardware, this will be a pagetable
- * constructed in arch/i386/kernel/head.S, and not running in PAE mode
- * (even if we'll end up running in PAE).  The root of the pagetable
- * will be swapper_pg_dir.
+ * constructed in arch/x86/kernel/head_32.S.  The root of the
+ * pagetable will be swapper_pg_dir.
  *
  * If we're booting paravirtualized under a hypervisor, then there are
  * more options: we may already be running PAE, and the pagetable may
@@ -408,6 +399,7 @@ static void __init pagetable_init (void)
 	unsigned long vaddr, end;
 	pgd_t *pgd_base = swapper_pg_dir;
 
+	printk(KERN_CRIT "%s\n", __FUNCTION__);
 	paravirt_pagetable_setup_start(pgd_base);
 
 	/* Enable PSE if available */
@@ -549,14 +541,6 @@ void __init paging_init(void)
 
 	load_cr3(swapper_pg_dir);
 
-#ifdef CONFIG_X86_PAE
-	/*
-	 * We will bail out later - printk doesn't work right now so
-	 * the user would just see a hanging kernel.
-	 */
-	if (cpu_has_pae)
-		set_in_cr4(X86_CR4_PAE);
-#endif
 	__flush_tlb_all();
 
 	kmap_init();
@@ -686,10 +670,6 @@ void __init mem_init(void)
 	BUG_ON((unsigned long)high_memory      > VMALLOC_START);
 #endif /* double-sanity-check paranoia */
 
-#ifdef CONFIG_X86_PAE
-	if (!cpu_has_pae)
-		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
 
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 96c3ed2..7827f01 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -220,40 +220,45 @@ static int __init early_ioremap_debug_setup(char *str)
 __setup("early_ioremap_debug", early_ioremap_debug_setup);
 
 static __initdata int after_paging_init;
-static __initdata unsigned long bm_pte[1024]
+static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
 				__attribute__((aligned(PAGE_SIZE)));
 
-static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
-	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+	pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
+
+	return pmd;
 }
 
-static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 {
-	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+	return &bm_pte[pte_index(addr)];
 }
 
 void __init early_ioremap_init(void)
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 	if (early_ioremap_debug)
 		printk("early_ioremap_init()\n");
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = __pa(bm_pte) | _PAGE_TABLE;
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	memset(bm_pte, 0, sizeof(bm_pte));
+	set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE));
+
 	/*
-	 * The boot-ioremap range spans multiple pgds, for which
+	 * The boot-ioremap range spans multiple pmds, for which
 	 * we are not prepared:
 	 */
-	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 		WARN_ON(1);
-		printk("pgd %p != %p\n",
-			pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
-		printk("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+		printk(KERN_WARNING "pmd %p != %p\n",
+		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 			fix_to_virt(FIX_BTMAP_BEGIN));
-		printk("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 			fix_to_virt(FIX_BTMAP_END));
 
 		printk("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
@@ -263,27 +268,28 @@ void __init early_ioremap_init(void)
 
 void __init early_ioremap_clear(void)
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 	if (early_ioremap_debug)
 		printk("early_ioremap_clear()\n");
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = 0;
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+	pmd_clear(pmd);
 	__flush_tlb_all();
 }
 
 void __init early_ioremap_reset(void)
 {
 	enum fixed_addresses idx;
-	unsigned long *pte, phys, addr;
+	unsigned long addr, phys;
+	pte_t *pte;
 
 	after_paging_init = 1;
 	for (idx = FIX_BTMAP_BEGIN; idx <= FIX_BTMAP_END; idx--) {
 		addr = fix_to_virt(idx);
 		pte = early_ioremap_pte(addr);
-		if (!*pte & _PAGE_PRESENT) {
-			phys = *pte & PAGE_MASK;
+		if (pte_present(*pte)) {
+			phys = pte_val(*pte) & PAGE_MASK;
 			set_fixmap(idx, phys);
 		}
 	}
@@ -292,7 +298,8 @@ void __init early_ioremap_reset(void)
 static void __init __early_set_fixmap(enum fixed_addresses idx,
 				   unsigned long phys, pgprot_t flags)
 {
-	unsigned long *pte, addr = __fix_to_virt(idx);
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
 
 	if (idx >= __end_of_fixed_addresses) {
 		BUG();
@@ -300,9 +307,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
 	}
 	pte = early_ioremap_pte(addr);
 	if (pgprot_val(flags))
-		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
-		*pte = 0;
+		pte_clear(NULL, addr, pte);
 	__flush_tlb_one(addr);
 }
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 11c4b39..8fc0473 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -48,7 +48,6 @@ typedef unsigned long	pgprotval_t;
 typedef unsigned long	phys_addr_t;
 
 typedef union { pteval_t pte, pte_low; } pte_t;
-typedef pte_t boot_pte_t;
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* CONFIG_X86_PAE */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 72eb06c..e8a6195 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -55,10 +55,6 @@ void paging_init(void);
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
 
-#define TWOLEVEL_PGDIR_SHIFT	22
-#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
-#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
-- 
1.5.3.8




^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24  9:39                           ` Ian Campbell
@ 2008-01-24 22:06                             ` H. Peter Anvin
  2008-01-24 22:35                               ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-24 22:06 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Jeremy Fitzhardinge, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Ian Campbell wrote:
> 
> I'm not sure how PSE comes to be used ever though -- an EFI only thing?
> Using the qemu monitor I could see a bunch of NX bits used when NX was
> available.
> 

This is part of the trickiness with re-using the early pagetables 
instead of rebuilding them from scratch - if PSE is available, we have 
two options:

- either we build PSE page tables early (which means detecting PSE, 
which means if there are any chip-specific CPUID workarounds they have 
to be present in the early code), or

- when building the "complete" page tables, coalesce !PSE pagetables 
into PSE entries where appropriate.

For PAT to work right, the first chunk probably should *not* be a PSE 
page table, which complicates things further.  (There is no TLB impact, 
since a PSE page table at offset zero or that otherwise have an MTRR 
conflict will be broken apart in hardware.)  In the former case, it 
means splitting it apart later; in the latter case it just means 
excluding it from coalescing.

In other words, reusing the early page tables isn't all that 
straightforward.  It may easily be that it's better to build a new set 
of page tables from scratch, however, it would *still* be beneficial to 
have the early page tables be in the same format as the later one, since 
it lets us use the fixmap area, and therefore {bt,early}_ioremap() much 
sooner.

	-hpa


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 22:06                             ` H. Peter Anvin
@ 2008-01-24 22:35                               ` Jeremy Fitzhardinge
  2008-01-24 22:39                                 ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-24 22:35 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

H. Peter Anvin wrote:
> In other words, reusing the early page tables isn't all that 
> straightforward.  It may easily be that it's better to build a new set 
> of page tables from scratch, however, it would *still* be beneficial 
> to have the early page tables be in the same format as the later one, 
> since it lets us use the fixmap area, and therefore 
> {bt,early}_ioremap() much sooner.

Yes, and it simplifies Xen as it always starts guest domains in the 
appropriate pagetable mode and doesn't let the guest change it on the 
fly.  If early_ioremap depends on non-PAE early pagetables in an 
otherwise PAE kernel, we'd need to go to some effort to make sure all 
the early_ioremap stuff is skipped (which would be possible but 
unpleasant for domU, but very bad in dom0).

    J


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 22:35                               ` Jeremy Fitzhardinge
@ 2008-01-24 22:39                                 ` H. Peter Anvin
  2008-01-24 22:58                                   ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-24 22:39 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> In other words, reusing the early page tables isn't all that 
>> straightforward.  It may easily be that it's better to build a new set 
>> of page tables from scratch, however, it would *still* be beneficial 
>> to have the early page tables be in the same format as the later one, 
>> since it lets us use the fixmap area, and therefore 
>> {bt,early}_ioremap() much sooner.
> 
> Yes, and it simplifies Xen as it always starts guest domains in the 
> appropriate pagetable mode and doesn't let the guest change it on the 
> fly.  If early_ioremap depends on non-PAE early pagetables in an 
> otherwise PAE kernel, we'd need to go to some effort to make sure all 
> the early_ioremap stuff is skipped (which would be possible but 
> unpleasant for domU, but very bad in dom0).
> 

Yeah, and it's ugly for the kernel proper, so that bit is a no-brainer. 
  It's just a matter of hammering out the details.

It doesn't sound from the above that you have any opinion either way 
about reusing the initial page tables or creating a new set, as long as 
they're in the same format.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 22:39                                 ` H. Peter Anvin
@ 2008-01-24 22:58                                   ` Jeremy Fitzhardinge
  2008-01-24 23:08                                     ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-24 22:58 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

H. Peter Anvin wrote:
> Yeah, and it's ugly for the kernel proper, so that bit is a 
> no-brainer.  It's just a matter of hammering out the details.
>
> It doesn't sound from the above that you have any opinion either way 
> about reusing the initial page tables or creating a new set, as long 
> as they're in the same format. 

Right.

Xen provides a initial set of pagetables in the appropriate format, so 
what head.S generates is moot.  For simplicity I graft the Xen-provided 
pagetables into swapper_pg_dir in xen_start_kernel, so it is the 
functional equivalent to the head.S pagetable construction.

We also don't (yet) support PSE, so that's a non-issue for us too.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 22:58                                   ` Jeremy Fitzhardinge
@ 2008-01-24 23:08                                     ` H. Peter Anvin
  2008-01-24 23:40                                       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-24 23:08 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> Yeah, and it's ugly for the kernel proper, so that bit is a 
>> no-brainer.  It's just a matter of hammering out the details.
>>
>> It doesn't sound from the above that you have any opinion either way 
>> about reusing the initial page tables or creating a new set, as long 
>> as they're in the same format. 
> 
> Right.
> 
> Xen provides a initial set of pagetables in the appropriate format, so 
> what head.S generates is moot.  For simplicity I graft the Xen-provided 
> pagetables into swapper_pg_dir in xen_start_kernel, so it is the 
> functional equivalent to the head.S pagetable construction.
> 
> We also don't (yet) support PSE, so that's a non-issue for us too.
> 

While we're mucking around in this area, there is another thing which we 
should eventually get around to fixing:

we need a set of page tables with an identity mapping as well as the 
kernel mapping, for trampolining (during startup, but also during things 
like ACPI suspend/resume.)  Right now, we let those be the swapper page 
tables, but that's probably not really a good idea, since it can hide bugs.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 23:08                                     ` H. Peter Anvin
@ 2008-01-24 23:40                                       ` Jeremy Fitzhardinge
  2008-01-24 23:44                                         ` H. Peter Anvin
  2008-01-24 23:51                                         ` H. Peter Anvin
  0 siblings, 2 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-24 23:40 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

H. Peter Anvin wrote:
> While we're mucking around in this area, there is another thing which 
> we should eventually get around to fixing:
>
> we need a set of page tables with an identity mapping as well as the 
> kernel mapping, for trampolining (during startup, but also during 
> things like ACPI suspend/resume.)  Right now, we let those be the 
> swapper page tables, but that's probably not really a good idea, since 
> it can hide bugs.

So you're suggesting a second system pagetable which has a P=V alias as 
well as the normal kernel mapping, used only when we actually need that 
alias?  Sounds simple enough to arrange.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 23:40                                       ` Jeremy Fitzhardinge
@ 2008-01-24 23:44                                         ` H. Peter Anvin
  2008-01-24 23:51                                           ` Jeremy Fitzhardinge
  2008-01-24 23:51                                         ` H. Peter Anvin
  1 sibling, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-24 23:44 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> While we're mucking around in this area, there is another thing which 
>> we should eventually get around to fixing:
>>
>> we need a set of page tables with an identity mapping as well as the 
>> kernel mapping, for trampolining (during startup, but also during 
>> things like ACPI suspend/resume.)  Right now, we let those be the 
>> swapper page tables, but that's probably not really a good idea, since 
>> it can hide bugs.
> 
> So you're suggesting a second system pagetable which has a P=V alias as 
> well as the normal kernel mapping, used only when we actually need that 
> alias?  Sounds simple enough to arrange.
> 

Yes.  We'd use it during initialization and at other times when we need 
trampolining, but give the swapper something which only has the kernel map.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 23:40                                       ` Jeremy Fitzhardinge
  2008-01-24 23:44                                         ` H. Peter Anvin
@ 2008-01-24 23:51                                         ` H. Peter Anvin
  2008-01-25  0:20                                           ` Pavel Machek
  1 sibling, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-24 23:51 UTC (permalink / raw)
  To: Jeremy Fitzhardinge, Pavel Machek
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> While we're mucking around in this area, there is another thing which 
>> we should eventually get around to fixing:
>>
>> we need a set of page tables with an identity mapping as well as the 
>> kernel mapping, for trampolining (during startup, but also during 
>> things like ACPI suspend/resume.)  Right now, we let those be the 
>> swapper page tables, but that's probably not really a good idea, since 
>> it can hide bugs.
> 
> So you're suggesting a second system pagetable which has a P=V alias as 
> well as the normal kernel mapping, used only when we actually need that 
> alias?  Sounds simple enough to arrange.
> 

I just looked at the ACPI suspend code, and it looks like it hacks its 
own identity map at runtime.  Pavel, am I reading that code right?

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 23:44                                         ` H. Peter Anvin
@ 2008-01-24 23:51                                           ` Jeremy Fitzhardinge
  2008-01-25  0:02                                             ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-24 23:51 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

H. Peter Anvin wrote:
> Yes.  We'd use it during initialization and at other times when we 
> need trampolining, but give the swapper something which only has the 
> kernel map.

Hm, though Xen makes it all a bit more complex, as usual.  In the PAE 
case it wouldn't allow the pmd to be shared, so you'd have to allocate a 
new pmd and copy into it.  There's probably a way to deal with it within 
the existing paravirt hooks...

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 23:51                                           ` Jeremy Fitzhardinge
@ 2008-01-25  0:02                                             ` H. Peter Anvin
  2008-01-25  0:11                                               ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-25  0:02 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> Yes.  We'd use it during initialization and at other times when we 
>> need trampolining, but give the swapper something which only has the 
>> kernel map.
> 
> Hm, though Xen makes it all a bit more complex, as usual.  In the PAE 
> case it wouldn't allow the pmd to be shared, so you'd have to allocate a 
> new pmd and copy into it.  There's probably a way to deal with it within 
> the existing paravirt hooks...
> 

Yeah, I'm aware of this particular piece of Xen braindamage, and 
although I had some very unkind words to say about it, it mirrors what 
we have to do for the !PAE case anyway, so it can be sort of glossed over.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:02                                             ` H. Peter Anvin
@ 2008-01-25  0:11                                               ` Jeremy Fitzhardinge
  2008-01-25  0:15                                                 ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-25  0:11 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

H. Peter Anvin wrote:
> Yeah, I'm aware of this particular piece of Xen braindamage, and 
> although I had some very unkind words to say about it, it mirrors what 
> we have to do for the !PAE case anyway, so it can be sort of glossed 
> over.

Sort of.  If Xen weren't an issue, then both cases are a matter of 
copying a set of entries from one place in the pgd to another.

It would be easy enough to add some code on xen side to look for pmd 
aliases when using/pinning a pagetable, and allocate'n'copy a new pmd 
page as needed.  That way the core code can ignore the issue.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:11                                               ` Jeremy Fitzhardinge
@ 2008-01-25  0:15                                                 ` H. Peter Anvin
  2008-01-25  0:31                                                   ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-25  0:15 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> Yeah, I'm aware of this particular piece of Xen braindamage, and 
>> although I had some very unkind words to say about it, it mirrors what 
>> we have to do for the !PAE case anyway, so it can be sort of glossed 
>> over.
> 
> Sort of.  If Xen weren't an issue, then both cases are a matter of 
> copying a set of entries from one place in the pgd to another.
> 

No, if Xen wasn't an issue there wouldn't be anything to do for the PAE 
case at all (since the PGD is trivial.)

Copying PMDs is more or less an analogous case of the !PAE case, once 
the allocation is already done.  The allocation should be trivial 
though, since this would be a one-time thing.

> It would be easy enough to add some code on xen side to look for pmd 
> aliases when using/pinning a pagetable, and allocate'n'copy a new pmd 
> page as needed.  That way the core code can ignore the issue.

As much as I'd rather see Xen fixing this than having it continue to 
impact the kernel, I presume it will take some time to flush the broken 
hypervisors out?

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-24 23:51                                         ` H. Peter Anvin
@ 2008-01-25  0:20                                           ` Pavel Machek
  2008-01-25  0:27                                             ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Pavel Machek @ 2008-01-25  0:20 UTC (permalink / raw)
  To: H. Peter Anvin, Rafael J. Wysocki
  Cc: Jeremy Fitzhardinge, Ian Campbell, Ingo Molnar,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Thu 2008-01-24 15:51:24, H. Peter Anvin wrote:
> Jeremy Fitzhardinge wrote:
>> H. Peter Anvin wrote:
>>> While we're mucking around in this area, there is another thing which we 
>>> should eventually get around to fixing:
>>>
>>> we need a set of page tables with an identity mapping as well as the 
>>> kernel mapping, for trampolining (during startup, but also during things 
>>> like ACPI suspend/resume.)  Right now, we let those be the swapper page 
>>> tables, but that's probably not really a good idea, since it can hide 
>>> bugs.
>>
>> So you're suggesting a second system pagetable which has a P=V alias as 
>> well as the normal kernel mapping, used only when we actually need that 
>> alias?  Sounds simple enough to arrange.
>>
>
> I just looked at the ACPI suspend code, and it looks like it hacks its own 
> identity map at runtime.  Pavel, am I reading that code right?

Yes, I think so, I believe we do it on both 32 and 64 bit now.

(It is early here. And I almost got the .c wakeup code to work... it
already sets the mode).
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:20                                           ` Pavel Machek
@ 2008-01-25  0:27                                             ` H. Peter Anvin
  2008-01-25  0:46                                               ` Rafael J. Wysocki
                                                                 ` (2 more replies)
  0 siblings, 3 replies; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-25  0:27 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Rafael J. Wysocki, Jeremy Fitzhardinge, Ian Campbell,
	Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Pavel Machek wrote:
>>>
>> I just looked at the ACPI suspend code, and it looks like it hacks its own 
>> identity map at runtime.  Pavel, am I reading that code right?
> 
> Yes, I think so, I believe we do it on both 32 and 64 bit now.
> 

So the background to this... we need an identity map to trampoline at 
early boot, obviously, but we'd like it to not stick around more than 
necessary.  We have zap_low_mappings() now but it's not really sufficient.

Secondary SMP processors need these mappings during trampolining -- 
presumably including CPU hotplug -- and I'm suspecting it might simply 
make sense to use a separate set of page tables (with both the identity 
and the kernel map) for trampolining and just keep them around.  That 
way they would be usable for ACPI as well.

> (It is early here. And I almost got the .c wakeup code to work... it
> already sets the mode).

Sweet!

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:15                                                 ` H. Peter Anvin
@ 2008-01-25  0:31                                                   ` Jeremy Fitzhardinge
  2008-01-25  0:37                                                     ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-25  0:31 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

H. Peter Anvin wrote:
> No, if Xen wasn't an issue there wouldn't be anything to do for the 
> PAE case at all (since the PGD is trivial.)
>
> Copying PMDs is more or less an analogous case of the !PAE case, once 
> the allocation is already done.  The allocation should be trivial 
> though, since this would be a one-time thing.

I think we're in vehement agreement here.  In either case, its just a 
matter of something like:

    memcpy(pgd, &pgd[USER_PTRS_PER_PGD], sizeof(pgd_t) * KERNEL_PTRS_PER_PGD);
      

which would work for both PAE and non-PAE.

>> It would be easy enough to add some code on xen side to look for pmd 
>> aliases when using/pinning a pagetable, and allocate'n'copy a new pmd 
>> page as needed.  That way the core code can ignore the issue.
>
> As much as I'd rather see Xen fixing this than having it continue to 
> impact the kernel, I presume it will take some time to flush the 
> broken hypervisors out? 

Sorry, I was unclear.  I meant in the purely Xen-specific parts of the 
kernel (arch/x86/xen).  It wouldn't require a hypervisor change.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:31                                                   ` Jeremy Fitzhardinge
@ 2008-01-25  0:37                                                     ` H. Peter Anvin
  2008-01-25  2:56                                                       ` Eric W. Biederman
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-25  0:37 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>> No, if Xen wasn't an issue there wouldn't be anything to do for the 
>> PAE case at all (since the PGD is trivial.)
>>
>> Copying PMDs is more or less an analogous case of the !PAE case, once 
>> the allocation is already done.  The allocation should be trivial 
>> though, since this would be a one-time thing.
> 
> I think we're in vehement agreement here.  In either case, its just a 
> matter of something like:
> 
>    memcpy(pgd, &pgd[USER_PTRS_PER_PGD], sizeof(pgd_t) * 
> KERNEL_PTRS_PER_PGD);
>     
> which would work for both PAE and non-PAE.
> 
>>> It would be easy enough to add some code on xen side to look for pmd 
>>> aliases when using/pinning a pagetable, and allocate'n'copy a new pmd 
>>> page as needed.  That way the core code can ignore the issue.
>>
>> As much as I'd rather see Xen fixing this than having it continue to 
>> impact the kernel, I presume it will take some time to flush the 
>> broken hypervisors out? 
> 
> Sorry, I was unclear.  I meant in the purely Xen-specific parts of the 
> kernel (arch/x86/xen).  It wouldn't require a hypervisor change.
> 

Oh, that makes that option much more viable and probably preferrable.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:27                                             ` H. Peter Anvin
@ 2008-01-25  0:46                                               ` Rafael J. Wysocki
  2008-01-25  1:08                                                 ` H. Peter Anvin
  2008-01-25  2:16                                               ` Eric W. Biederman
  2008-01-25  7:49                                               ` Pavel Machek
  2 siblings, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-25  0:46 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pavel Machek, Jeremy Fitzhardinge, Ian Campbell, Ingo Molnar,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Friday, 25 of January 2008, H. Peter Anvin wrote:
> Pavel Machek wrote:
> >>>
> >> I just looked at the ACPI suspend code, and it looks like it hacks its own 
> >> identity map at runtime.  Pavel, am I reading that code right?
> > 
> > Yes, I think so, I believe we do it on both 32 and 64 bit now.

For clarity, are you referring to the code in arch/x86/kernel/acpi ?

Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:46                                               ` Rafael J. Wysocki
@ 2008-01-25  1:08                                                 ` H. Peter Anvin
  0 siblings, 0 replies; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-25  1:08 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Jeremy Fitzhardinge, Ian Campbell, Ingo Molnar,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Rafael J. Wysocki wrote:
> On Friday, 25 of January 2008, H. Peter Anvin wrote:
>> Pavel Machek wrote:
>>>> I just looked at the ACPI suspend code, and it looks like it hacks its own 
>>>> identity map at runtime.  Pavel, am I reading that code right?
>>> Yes, I think so, I believe we do it on both 32 and 64 bit now.
> 
> For clarity, are you referring to the code in arch/x86/kernel/acpi ?
> 

Yes.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:27                                             ` H. Peter Anvin
  2008-01-25  0:46                                               ` Rafael J. Wysocki
@ 2008-01-25  2:16                                               ` Eric W. Biederman
  2008-01-25  2:25                                                 ` H. Peter Anvin
  2008-01-25  7:49                                               ` Pavel Machek
  2 siblings, 1 reply; 80+ messages in thread
From: Eric W. Biederman @ 2008-01-25  2:16 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pavel Machek, Rafael J. Wysocki, Jeremy Fitzhardinge,
	Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar

"H. Peter Anvin" <hpa@zytor.com> writes:

> Pavel Machek wrote:
>>>>
>>> I just looked at the ACPI suspend code, and it looks like it hacks its own
>>> identity map at runtime.  Pavel, am I reading that code right?
>>
>> Yes, I think so, I believe we do it on both 32 and 64 bit now.
>>
>
> So the background to this... we need an identity map to trampoline at early
> boot, obviously, but we'd like it to not stick around more than necessary.  We
> have zap_low_mappings() now but it's not really sufficient.
>
> Secondary SMP processors need these mappings during trampolining -- 
> presumably including CPU hotplug -- and I'm suspecting it might simply make
> sense to use a separate set of page tables (with both the identity and the
> kernel map) for trampolining and just keep them around.  That way they would be
> usable for ACPI as well.

We already do this on the 64bit side.  We reuse the kernel and the
identity parts from the core kernel page tables but it is actually
a distinct page table.  

x86_64 has not had the identity mappings mapped in any of the
normal page tables since the relocatable kernel support was merged
a while ago.

Only on the 32bit side does this still remain an issue.  I don't know
if what we can do optimization wise there.  Emulating the 64bit code
and having a dedicated top level pgd (as part of the trampoline)
and then a mapping into it the kernel identity mapping and the kernel
mapping (which are the same on 32bit) should work fairly easily.  

It is just a handful of pgd entries, and then in the actual kernel
entry code we reload %cr3 with the appropriate kernel page table
and we should be fine.  No need for an explicit zap there at all.

Eric

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  2:16                                               ` Eric W. Biederman
@ 2008-01-25  2:25                                                 ` H. Peter Anvin
  0 siblings, 0 replies; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-25  2:25 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Pavel Machek, Rafael J. Wysocki, Jeremy Fitzhardinge,
	Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar

Eric W. Biederman wrote:
> 
> We already do this on the 64bit side.  We reuse the kernel and the
> identity parts from the core kernel page tables but it is actually
> a distinct page table.  
> 
> x86_64 has not had the identity mappings mapped in any of the
> normal page tables since the relocatable kernel support was merged
> a while ago.
> 
> Only on the 32bit side does this still remain an issue.  I don't know
> if what we can do optimization wise there.  Emulating the 64bit code
> and having a dedicated top level pgd (as part of the trampoline)
> and then a mapping into it the kernel identity mapping and the kernel
> mapping (which are the same on 32bit) should work fairly easily.  
> 
> It is just a handful of pgd entries, and then in the actual kernel
> entry code we reload %cr3 with the appropriate kernel page table
> and we should be fine.  No need for an explicit zap there at all.
> 

That's pretty much what I figure.  The one issue is that on non-PAE 
32-bit (or if we actually have to deal with unsharable PMDs on PAE 
kernels) then the PGD (PMD) kernel mappings at least formally should 
really be put in sync.  This could be done either by the same code which 
keeps the PGDs of various processes in sync already or on demand; I 
believe my personal preference would be to have all that in the same 
place, since we have to do it anyway, and this is nothing different 
except for the offset.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:37                                                     ` H. Peter Anvin
@ 2008-01-25  2:56                                                       ` Eric W. Biederman
  2008-01-25  4:41                                                         ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 80+ messages in thread
From: Eric W. Biederman @ 2008-01-25  2:56 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Jeremy Fitzhardinge, Ian Campbell, Ingo Molnar,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar

"H. Peter Anvin" <hpa@zytor.com> writes:

> Jeremy Fitzhardinge wrote:
>> H. Peter Anvin wrote:
>>> No, if Xen wasn't an issue there wouldn't be anything to do for the PAE case
>>> at all (since the PGD is trivial.)
>>>
>>> Copying PMDs is more or less an analogous case of the !PAE case, once the
>>> allocation is already done.  The allocation should be trivial though, since
>>> this would be a one-time thing.
>>
>> I think we're in vehement agreement here.  In either case, its just a matter
>> of something like:
>>
>>    memcpy(pgd, &pgd[USER_PTRS_PER_PGD], sizeof(pgd_t) * KERNEL_PTRS_PER_PGD);
>>     which would work for both PAE and non-PAE.
>>
>>>> It would be easy enough to add some code on xen side to look for pmd aliases
>>>> when using/pinning a pagetable, and allocate'n'copy a new pmd page as
>>>> needed.  That way the core code can ignore the issue.
>>>
>>> As much as I'd rather see Xen fixing this than having it continue to impact
>>> the kernel, I presume it will take some time to flush the broken hypervisors
>>> out?
>>
>> Sorry, I was unclear.  I meant in the purely Xen-specific parts of the kernel
>> (arch/x86/xen).  It wouldn't require a hypervisor change.
>>
>
> Oh, that makes that option much more viable and probably preferrable.

Note.  I don't believe we use either trampoline (cpu startup or acpi wakeup)
in the hypervisor case (esp Xen).  So we should be able to completely ignore
Xen and do the memcpy of pgd entries.

I expect Xen gives us other cpus already in protected mode (which is overall
the sane thing to do).

Eric

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  2:56                                                       ` Eric W. Biederman
@ 2008-01-25  4:41                                                         ` Jeremy Fitzhardinge
  2008-01-25 11:07                                                           ` Eric W. Biederman
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-25  4:41 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: H. Peter Anvin, Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar

Eric W. Biederman wrote:
> Note.  I don't believe we use either trampoline (cpu startup or acpi wakeup)
> in the hypervisor case (esp Xen).  So we should be able to completely ignore
> Xen and do the memcpy of pgd entries.
>   

Indeed.  The alias mapping can be set up in 
native_pagetable_setup_done() and needn't involve Xen at all.

> I expect Xen gives us other cpus already in protected mode (which is overall
> the sane thing to do).

Quite so.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  0:27                                             ` H. Peter Anvin
  2008-01-25  0:46                                               ` Rafael J. Wysocki
  2008-01-25  2:16                                               ` Eric W. Biederman
@ 2008-01-25  7:49                                               ` Pavel Machek
  2008-01-25 22:02                                                 ` Rafael J. Wysocki
  2 siblings, 1 reply; 80+ messages in thread
From: Pavel Machek @ 2008-01-25  7:49 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Rafael J. Wysocki, Jeremy Fitzhardinge, Ian Campbell,
	Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Thu 2008-01-24 16:27:58, H. Peter Anvin wrote:
> Pavel Machek wrote:
> >>>
> >>I just looked at the ACPI suspend code, and it looks 
> >>like it hacks its own identity map at runtime.  Pavel, 
> >>am I reading that code right?
> >
> >Yes, I think so, I believe we do it on both 32 and 64 
> >bit now.
> >
> 
> So the background to this... we need an identity map to 
> trampoline at early boot, obviously, but we'd like it to 
> not stick around more than necessary.  We have 
> zap_low_mappings() now but it's not really sufficient.
> 
> Secondary SMP processors need these mappings during 
> trampolining -- presumably including CPU hotplug -- and 
> I'm suspecting it might simply make sense to use a 
> separate set of page tables (with both the identity and 
> the kernel map) for trampolining and just keep them 
> around.  That way they would be usable for ACPI as well.

That would enable some cleanups, yes.
						Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  4:41                                                         ` Jeremy Fitzhardinge
@ 2008-01-25 11:07                                                           ` Eric W. Biederman
  0 siblings, 0 replies; 80+ messages in thread
From: Eric W. Biederman @ 2008-01-25 11:07 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: H. Peter Anvin, Ian Campbell, Ingo Molnar, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar

Jeremy Fitzhardinge <jeremy@goop.org> writes:

> Eric W. Biederman wrote:
>> Note.  I don't believe we use either trampoline (cpu startup or acpi wakeup)
>> in the hypervisor case (esp Xen).  So we should be able to completely ignore
>> Xen and do the memcpy of pgd entries.
>>
>
> Indeed.  The alias mapping can be set up in native_pagetable_setup_done() and
> needn't involve Xen at all.

Good.  Then this case gets easy.

We just need a pgd that has pgd entries that duplicate the kernel pgd entries
at both address 0 and at the normal kernel address.

In 64bit mode we make this part of the trampoline because we need a pgt below
4G so that we can point a 32bit %cr3 value at it.  We can either use that
technique for the 32bit kernel (and be consistent) or we can have a single
trampoline/wakeup pgd that we use.  As all pgd entries must be below 4G in
32bit mode.

Although if we really wanted to be restrictive we could have a much more limited
set of identity page table entries that only map the low 1M, or possibly just
640K.

Eric

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25  7:49                                               ` Pavel Machek
@ 2008-01-25 22:02                                                 ` Rafael J. Wysocki
  2008-01-25 22:11                                                   ` Pavel Machek
  2008-01-28 15:00                                                   ` Ingo Molnar
  0 siblings, 2 replies; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-25 22:02 UTC (permalink / raw)
  To: Pavel Machek
  Cc: H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell, Ingo Molnar,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Friday, 25 of January 2008, Pavel Machek wrote:
> On Thu 2008-01-24 16:27:58, H. Peter Anvin wrote:
> > Pavel Machek wrote:
> > >>>
> > >>I just looked at the ACPI suspend code, and it looks 
> > >>like it hacks its own identity map at runtime.  Pavel, 
> > >>am I reading that code right?
> > >
> > >Yes, I think so, I believe we do it on both 32 and 64 
> > >bit now.
> > >
> > 
> > So the background to this... we need an identity map to 
> > trampoline at early boot, obviously, but we'd like it to 
> > not stick around more than necessary.  We have 
> > zap_low_mappings() now but it's not really sufficient.
> > 
> > Secondary SMP processors need these mappings during 
> > trampolining -- presumably including CPU hotplug -- and 
> > I'm suspecting it might simply make sense to use a 
> > separate set of page tables (with both the identity and 
> > the kernel map) for trampolining and just keep them 
> > around.  That way they would be usable for ACPI as well.
> 
> That would enable some cleanups, yes.

Speaking of cleanups, the following one is applicable IMO.

Greetings,
Rafael

---
 arch/x86/mm/init_32.c |    8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Index: linux-2.6/arch/x86/mm/init_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_32.c
+++ linux-2.6/arch/x86/mm/init_32.c
@@ -444,23 +444,23 @@ static void __init pagetable_init (void)
 	paravirt_pagetable_setup_done(pgd_base);
 }
 
-#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
+#ifdef CONFIG_SUSPEND
 /*
  * Swap suspend & friends need this for resume because things like the intel-agp
  * driver might have split up a kernel 4MB mapping.
  */
-char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+char swsusp_pg_dir[PAGE_SIZE]
 	__attribute__ ((aligned (PAGE_SIZE)));
 
 static inline void save_pg_dir(void)
 {
 	memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
 }
-#else
+#else /* !CONFIG_SUSPEND */
 static inline void save_pg_dir(void)
 {
 }
-#endif
+#endif /* !CONFIG_SUSPEND */
 
 void zap_low_mappings (void)
 {

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25 22:02                                                 ` Rafael J. Wysocki
@ 2008-01-25 22:11                                                   ` Pavel Machek
  2008-01-28 15:00                                                   ` Ingo Molnar
  1 sibling, 0 replies; 80+ messages in thread
From: Pavel Machek @ 2008-01-25 22:11 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell, Ingo Molnar,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Hi!

> > > >>I just looked at the ACPI suspend code, and it looks 
> > > >>like it hacks its own identity map at runtime.  Pavel, 
> > > >>am I reading that code right?
> > > >
> > > >Yes, I think so, I believe we do it on both 32 and 64 
> > > >bit now.
> > > >
> > > 
> > > So the background to this... we need an identity map to 
> > > trampoline at early boot, obviously, but we'd like it to 
> > > not stick around more than necessary.  We have 
> > > zap_low_mappings() now but it's not really sufficient.
> > > 
> > > Secondary SMP processors need these mappings during 
> > > trampolining -- presumably including CPU hotplug -- and 
> > > I'm suspecting it might simply make sense to use a 
> > > separate set of page tables (with both the identity and 
> > > the kernel map) for trampolining and just keep them 
> > > around.  That way they would be usable for ACPI as well.
> > 
> > That would enable some cleanups, yes.
> 
> Speaking of cleanups, the following one is applicable IMO.

ACK... and BTW ack for that deferred device removal series.

							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-25 22:02                                                 ` Rafael J. Wysocki
  2008-01-25 22:11                                                   ` Pavel Machek
@ 2008-01-28 15:00                                                   ` Ingo Molnar
  2008-01-28 15:25                                                     ` Rafael J. Wysocki
  2008-01-28 16:12                                                     ` Ingo Molnar
  1 sibling, 2 replies; 80+ messages in thread
From: Ingo Molnar @ 2008-01-28 15:00 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman


* Rafael J. Wysocki <rjw@sisk.pl> wrote:

> Speaking of cleanups, the following one is applicable IMO.

> --- linux-2.6.orig/arch/x86/mm/init_32.c
> +++ linux-2.6/arch/x86/mm/init_32.c
> @@ -444,23 +444,23 @@ static void __init pagetable_init (void)
>  	paravirt_pagetable_setup_done(pgd_base);
>  }
>  
> -#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
> +#ifdef CONFIG_SUSPEND
>  /*
>   * Swap suspend & friends need this for resume because things like the intel-agp
>   * driver might have split up a kernel 4MB mapping.
>   */
> -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
> +char swsusp_pg_dir[PAGE_SIZE]

thanks, applied.

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 15:00                                                   ` Ingo Molnar
@ 2008-01-28 15:25                                                     ` Rafael J. Wysocki
  2008-01-28 19:40                                                       ` Pavel Machek
  2008-01-28 16:12                                                     ` Ingo Molnar
  1 sibling, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-28 15:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pavel Machek, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Monday, 28 of January 2008, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> 
> > Speaking of cleanups, the following one is applicable IMO.
> 
> > --- linux-2.6.orig/arch/x86/mm/init_32.c
> > +++ linux-2.6/arch/x86/mm/init_32.c
> > @@ -444,23 +444,23 @@ static void __init pagetable_init (void)
> >  	paravirt_pagetable_setup_done(pgd_base);
> >  }
> >  
> > -#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
> > +#ifdef CONFIG_SUSPEND
> >  /*
> >   * Swap suspend & friends need this for resume because things like the intel-agp
> >   * driver might have split up a kernel 4MB mapping.
> >   */
> > -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
> > +char swsusp_pg_dir[PAGE_SIZE]
> 
> thanks, applied.

Thanks.  Well, the comment should also be updated as I can see now.  I'll send
a separate patch for that.

Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 15:00                                                   ` Ingo Molnar
  2008-01-28 15:25                                                     ` Rafael J. Wysocki
@ 2008-01-28 16:12                                                     ` Ingo Molnar
  2008-01-28 17:02                                                       ` Rafael J. Wysocki
  1 sibling, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-01-28 16:12 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttil?,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

[-- Attachment #1: Type: text/plain, Size: 328 bytes --]


> >   * driver might have split up a kernel 4MB mapping.
> >   */
> > -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
> > +char swsusp_pg_dir[PAGE_SIZE]

hm, random-qa found build breakage with this patch:

 arch/x86/kernel/built-in.o: In function `wakeup_start':
 : undefined reference to `swsusp_pg_dir'

config attached.

	Ingo

[-- Attachment #2: config --]
[-- Type: text/plain, Size: 48055 bytes --]

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.24
# Mon Jan 28 16:57:55 2008
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
# CONFIG_X86_64 is not set
CONFIG_X86=y
# CONFIG_GENERIC_LOCKBREAK is not set
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_QUICKLIST=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_HWEIGHT=y
# CONFIG_GENERIC_GPIO is not set
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_GENERIC_CALIBRATE_DELAY=y
# CONFIG_GENERIC_TIME_VSYSCALL is not set
# CONFIG_HAVE_SETUP_PER_CPU_AREA is not set
CONFIG_ARCH_SUPPORTS_OPROFILE=y
# CONFIG_ZONE_DMA32 is not set
CONFIG_ARCH_POPULATES_NODE_MAP=y
# CONFIG_AUDIT_ARCH is not set
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_KTIME_SCALAR=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
# CONFIG_POSIX_MQUEUE is not set
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
# CONFIG_TASK_XACCT is not set
CONFIG_USER_NS=y
CONFIG_PID_NS=y
# CONFIG_AUDIT is not set
# CONFIG_IKCONFIG is not set
CONFIG_LOG_BUF_SHIFT=20
CONFIG_CGROUPS=y
CONFIG_CGROUP_DEBUG=y
# CONFIG_CGROUP_NS is not set
# CONFIG_FAIR_GROUP_SCHED is not set
# CONFIG_CGROUP_CPUACCT is not set
CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EMBEDDED=y
CONFIG_UID16=y
# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
# CONFIG_BUG is not set
# CONFIG_ELF_CORE is not set
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_ANON_INODES=y
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
CONFIG_EVENTFD=y
# CONFIG_SHMEM is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
CONFIG_TINY_SHMEM=y
CONFIG_BASE_SMALL=0
# CONFIG_MODULES is not set
CONFIG_BLOCK=y
CONFIG_LBD=y
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
# CONFIG_BLK_DEV_BSG is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
# CONFIG_IOSCHED_AS is not set
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
# CONFIG_DEFAULT_AS is not set
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
CONFIG_DEFAULT_NOOP=y
CONFIG_DEFAULT_IOSCHED="noop"
CONFIG_PREEMPT_NOTIFIERS=y
CONFIG_CLASSIC_RCU=y
# CONFIG_PREEMPT_RCU is not set

#
# Processor type and features
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ=y
# CONFIG_HIGH_RES_TIMERS is not set
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
# CONFIG_SMP is not set
# CONFIG_X86_PC is not set
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
CONFIG_X86_GENERICARCH=y
# CONFIG_X86_ES7000 is not set
# CONFIG_X86_RDC321X is not set
# CONFIG_X86_VSMP is not set
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
CONFIG_X86_CYCLONE_TIMER=y
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
CONFIG_MGEODE_LX=y
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
# CONFIG_MCORE2 is not set
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=7
CONFIG_X86_XADD=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_USE_3DNOW=y
CONFIG_X86_TSC=y
CONFIG_X86_MINIMUM_CPU_FAMILY=4
CONFIG_X86_DEBUGCTLMSR=y
# CONFIG_HPET_TIMER is not set
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
CONFIG_RCU_TRACE=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
# CONFIG_X86_MCE is not set
# CONFIG_VM86 is not set
CONFIG_TOSHIBA=y
# CONFIG_I8K is not set
CONFIG_X86_REBOOTFIXUPS=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_OLD_INTERFACE=y
# CONFIG_X86_MSR is not set
# CONFIG_X86_CPUID is not set
# CONFIG_NOHIGHMEM is not set
# CONFIG_HIGHMEM4G is not set
CONFIG_HIGHMEM64G=y
CONFIG_VMSPLIT_3G=y
# CONFIG_VMSPLIT_3G_OPT is not set
# CONFIG_VMSPLIT_2G is not set
# CONFIG_VMSPLIT_2G_OPT is not set
# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_X86_PAE=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_NR_QUICK=1
CONFIG_VIRT_TO_BUS=y
CONFIG_HIGHPTE=y
# CONFIG_MATH_EMULATION is not set
# CONFIG_MTRR is not set
CONFIG_EFI=y
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000
# CONFIG_SCHED_HRTICK is not set
CONFIG_KEXEC=y
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
# CONFIG_RELOCATABLE is not set
CONFIG_PHYSICAL_ALIGN=0x100000
# CONFIG_COMPAT_VDSO is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y

#
# Power management options
#
CONFIG_PM=y
CONFIG_PM_LEGACY=y
CONFIG_PM_DEBUG=y
# CONFIG_PM_VERBOSE is not set
# CONFIG_PM_TRACE is not set
CONFIG_PM_SLEEP=y
CONFIG_SUSPEND_UP_POSSIBLE=y
# CONFIG_SUSPEND is not set
CONFIG_HIBERNATION_UP_POSSIBLE=y
CONFIG_HIBERNATION=y
CONFIG_PM_STD_PARTITION=""
CONFIG_ACPI=y
CONFIG_ACPI_SLEEP=y
CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_PROCFS_POWER=y
# CONFIG_ACPI_SYSFS_POWER is not set
# CONFIG_ACPI_PROC_EVENT is not set
CONFIG_ACPI_AC=y
# CONFIG_ACPI_BATTERY is not set
CONFIG_ACPI_BUTTON=y
# CONFIG_ACPI_FAN is not set
CONFIG_ACPI_DOCK=y
CONFIG_ACPI_BAY=y
# CONFIG_ACPI_PROCESSOR is not set
# CONFIG_ACPI_ASUS is not set
CONFIG_ACPI_TOSHIBA=y
CONFIG_ACPI_BLACKLIST_YEAR=0
CONFIG_ACPI_DEBUG=y
# CONFIG_ACPI_DEBUG_FUNC_TRACE is not set
CONFIG_ACPI_EC=y
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
# CONFIG_X86_PM_TIMER is not set
CONFIG_ACPI_CONTAINER=y
# CONFIG_ACPI_SBS is not set
CONFIG_X86_APM_BOOT=y
CONFIG_APM=y
CONFIG_APM_IGNORE_USER_SUSPEND=y
# CONFIG_APM_DO_ENABLE is not set
# CONFIG_APM_CPU_IDLE is not set
# CONFIG_APM_DISPLAY_BLANK is not set
# CONFIG_APM_ALLOW_INTS is not set
# CONFIG_APM_REAL_MODE_POWER_OFF is not set

#
# CPU Frequency scaling
#
# CONFIG_CPU_FREQ is not set
CONFIG_CPU_IDLE=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPU_IDLE_GOV_MENU=y

#
# Bus options (PCI etc.)
#
CONFIG_PCI=y
CONFIG_PCI_GOBIOS=y
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
# CONFIG_PCI_GOANY is not set
CONFIG_PCI_BIOS=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCIEPORTBUS=y
CONFIG_HOTPLUG_PCI_PCIE=y
# CONFIG_PCIEAER is not set
CONFIG_ARCH_SUPPORTS_MSI=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY is not set
CONFIG_PCI_DEBUG=y
CONFIG_HT_IRQ=y
CONFIG_ISA_DMA_API=y
CONFIG_ISA=y
CONFIG_EISA=y
CONFIG_EISA_PCI_EISA=y
CONFIG_EISA_NAMES=y
# CONFIG_MCA is not set
CONFIG_SCx200=y
CONFIG_SCx200HR_TIMER=y
CONFIG_GEODE_MFGPT_TIMER=y
CONFIG_K8_NB=y
# CONFIG_PCCARD is not set
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_FAKE=y
CONFIG_HOTPLUG_PCI_ACPI=y
CONFIG_HOTPLUG_PCI_ACPI_IBM=y
CONFIG_HOTPLUG_PCI_CPCI=y
# CONFIG_HOTPLUG_PCI_CPCI_ZT5550 is not set
CONFIG_HOTPLUG_PCI_SHPC=y

#
# Executable file formats / Emulations
#
CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y

#
# Networking
#
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
CONFIG_UNIX=y
CONFIG_XFRM=y
# CONFIG_XFRM_USER is not set
# CONFIG_XFRM_SUB_POLICY is not set
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_NET_KEY_MIGRATE=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
# CONFIG_IP_PNP_BOOTP is not set
# CONFIG_IP_PNP_RARP is not set
# CONFIG_NET_IPIP is not set
CONFIG_NET_IPGRE=y
# CONFIG_NET_IPGRE_BROADCAST is not set
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
# CONFIG_ARPD is not set
# CONFIG_SYN_COOKIES is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
CONFIG_INET_IPCOMP=y
CONFIG_INET_XFRM_TUNNEL=y
CONFIG_INET_TUNNEL=y
CONFIG_INET_XFRM_MODE_TRANSPORT=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
CONFIG_INET_XFRM_MODE_BEET=y
# CONFIG_INET_LRO is not set
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BIC=y
CONFIG_TCP_CONG_CUBIC=y
CONFIG_TCP_CONG_WESTWOOD=y
CONFIG_TCP_CONG_HTCP=y
# CONFIG_TCP_CONG_HSTCP is not set
CONFIG_TCP_CONG_HYBLA=y
CONFIG_TCP_CONG_VEGAS=y
CONFIG_TCP_CONG_SCALABLE=y
CONFIG_TCP_CONG_LP=y
# CONFIG_TCP_CONG_VENO is not set
CONFIG_TCP_CONG_YEAH=y
# CONFIG_TCP_CONG_ILLINOIS is not set
# CONFIG_DEFAULT_BIC is not set
# CONFIG_DEFAULT_CUBIC is not set
# CONFIG_DEFAULT_HTCP is not set
# CONFIG_DEFAULT_VEGAS is not set
# CONFIG_DEFAULT_WESTWOOD is not set
CONFIG_DEFAULT_RENO=y
CONFIG_DEFAULT_TCP_CONG="reno"
# CONFIG_TCP_MD5SIG is not set
CONFIG_IPV6=y
CONFIG_IPV6_PRIVACY=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
# CONFIG_IPV6_OPTIMISTIC_DAD is not set
CONFIG_INET6_AH=y
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
CONFIG_IPV6_MIP6=y
# CONFIG_INET6_XFRM_TUNNEL is not set
CONFIG_INET6_TUNNEL=y
CONFIG_INET6_XFRM_MODE_TRANSPORT=y
# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
CONFIG_INET6_XFRM_MODE_BEET=y
CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y
# CONFIG_IPV6_SIT is not set
CONFIG_IPV6_TUNNEL=y
CONFIG_IPV6_MULTIPLE_TABLES=y
# CONFIG_IPV6_SUBTREES is not set
# CONFIG_NETLABEL is not set
CONFIG_NETWORK_SECMARK=y
# CONFIG_NETFILTER is not set
CONFIG_IP_DCCP=y
CONFIG_INET_DCCP_DIAG=y
CONFIG_IP_DCCP_ACKVEC=y

#
# DCCP CCIDs Configuration (EXPERIMENTAL)
#
CONFIG_IP_DCCP_CCID2=y
CONFIG_IP_DCCP_CCID2_DEBUG=y
# CONFIG_IP_DCCP_CCID3 is not set

#
# DCCP Kernel Hacking
#
# CONFIG_IP_DCCP_DEBUG is not set
CONFIG_IP_SCTP=y
CONFIG_SCTP_DBG_MSG=y
CONFIG_SCTP_DBG_OBJCNT=y
CONFIG_SCTP_HMAC_NONE=y
# CONFIG_SCTP_HMAC_SHA1 is not set
# CONFIG_SCTP_HMAC_MD5 is not set
# CONFIG_TIPC is not set
CONFIG_ATM=y
# CONFIG_ATM_CLIP is not set
CONFIG_ATM_LANE=y
# CONFIG_ATM_MPOA is not set
# CONFIG_ATM_BR2684 is not set
# CONFIG_BRIDGE is not set
CONFIG_VLAN_8021Q=y
# CONFIG_DECNET is not set
CONFIG_LLC=y
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
CONFIG_ATALK=y
# CONFIG_DEV_APPLETALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
CONFIG_ECONET=y
CONFIG_ECONET_AUNUDP=y
CONFIG_ECONET_NATIVE=y
# CONFIG_WAN_ROUTER is not set
CONFIG_NET_SCHED=y

#
# Queueing/Scheduling
#
CONFIG_NET_SCH_CBQ=y
CONFIG_NET_SCH_HTB=y
CONFIG_NET_SCH_HFSC=y
CONFIG_NET_SCH_ATM=y
CONFIG_NET_SCH_PRIO=y
# CONFIG_NET_SCH_RR is not set
# CONFIG_NET_SCH_RED is not set
# CONFIG_NET_SCH_SFQ is not set
# CONFIG_NET_SCH_TEQL is not set
CONFIG_NET_SCH_TBF=y
# CONFIG_NET_SCH_GRED is not set
# CONFIG_NET_SCH_DSMARK is not set
# CONFIG_NET_SCH_NETEM is not set
CONFIG_NET_SCH_INGRESS=y

#
# Classification
#
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=y
# CONFIG_NET_CLS_TCINDEX is not set
# CONFIG_NET_CLS_ROUTE4 is not set
CONFIG_NET_CLS_FW=y
CONFIG_NET_CLS_U32=y
# CONFIG_CLS_U32_PERF is not set
CONFIG_CLS_U32_MARK=y
# CONFIG_NET_CLS_RSVP is not set
CONFIG_NET_CLS_RSVP6=y
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
# CONFIG_NET_EMATCH_CMP is not set
# CONFIG_NET_EMATCH_NBYTE is not set
# CONFIG_NET_EMATCH_U32 is not set
CONFIG_NET_EMATCH_META=y
CONFIG_NET_EMATCH_TEXT=y
# CONFIG_NET_CLS_ACT is not set
# CONFIG_NET_CLS_POLICE is not set
# CONFIG_NET_CLS_IND is not set
CONFIG_NET_SCH_FIFO=y

#
# Network testing
#
CONFIG_NET_PKTGEN=y
# CONFIG_HAMRADIO is not set
CONFIG_IRDA=y

#
# IrDA protocols
#
# CONFIG_IRLAN is not set
# CONFIG_IRNET is not set
CONFIG_IRCOMM=y
CONFIG_IRDA_ULTRA=y

#
# IrDA options
#
# CONFIG_IRDA_CACHE_LAST_LSAP is not set
# CONFIG_IRDA_FAST_RR is not set
CONFIG_IRDA_DEBUG=y

#
# Infrared-port device drivers
#

#
# SIR device drivers
#
CONFIG_IRTTY_SIR=y

#
# Dongle support
#
CONFIG_DONGLE=y
# CONFIG_ESI_DONGLE is not set
# CONFIG_ACTISYS_DONGLE is not set
# CONFIG_TEKRAM_DONGLE is not set
# CONFIG_TOIM3232_DONGLE is not set
# CONFIG_LITELINK_DONGLE is not set
# CONFIG_MA600_DONGLE is not set
CONFIG_GIRBIL_DONGLE=y
# CONFIG_MCP2120_DONGLE is not set
# CONFIG_OLD_BELKIN_DONGLE is not set
CONFIG_ACT200L_DONGLE=y
CONFIG_KINGSUN_DONGLE=y
# CONFIG_KSDAZZLE_DONGLE is not set
CONFIG_KS959_DONGLE=y

#
# Old SIR device drivers
#
CONFIG_IRPORT_SIR=y

#
# Old Serial dongle support
#
# CONFIG_DONGLE_OLD is not set

#
# FIR device drivers
#
CONFIG_USB_IRDA=y
# CONFIG_SIGMATEL_FIR is not set
# CONFIG_NSC_FIR is not set
CONFIG_WINBOND_FIR=y
# CONFIG_TOSHIBA_FIR is not set
CONFIG_SMC_IRCC_FIR=y
CONFIG_ALI_FIR=y
CONFIG_VLSI_FIR=y
# CONFIG_VIA_FIR is not set
# CONFIG_MCS_FIR is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
CONFIG_FIB_RULES=y

#
# Wireless
#
CONFIG_CFG80211=y
# CONFIG_NL80211 is not set
CONFIG_WIRELESS_EXT=y
CONFIG_MAC80211=y
CONFIG_MAC80211_RCSIMPLE=y
CONFIG_MAC80211_DEBUGFS=y
CONFIG_MAC80211_DEBUG=y
CONFIG_MAC80211_VERBOSE_DEBUG=y
CONFIG_MAC80211_LOWTX_FRAME_DUMP=y
# CONFIG_TKIP_DEBUG is not set
CONFIG_MAC80211_DEBUG_COUNTERS=y
# CONFIG_MAC80211_IBSS_DEBUG is not set
# CONFIG_MAC80211_VERBOSE_PS_DEBUG is not set
CONFIG_IEEE80211=y
# CONFIG_IEEE80211_DEBUG is not set
CONFIG_IEEE80211_CRYPT_WEP=y
CONFIG_IEEE80211_CRYPT_CCMP=y
# CONFIG_IEEE80211_CRYPT_TKIP is not set
CONFIG_IEEE80211_SOFTMAC=y
# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set
# CONFIG_RFKILL is not set
CONFIG_NET_9P=y
# CONFIG_NET_9P_FD is not set
CONFIG_NET_9P_DEBUG=y

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
CONFIG_DEBUG_DRIVER=y
CONFIG_DEBUG_DEVRES=y
# CONFIG_SYS_HYPERVISOR is not set
CONFIG_CONNECTOR=y
# CONFIG_PROC_EVENTS is not set
# CONFIG_MTD is not set
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
CONFIG_PARPORT_PC_FIFO=y
CONFIG_PARPORT_PC_SUPERIO=y
# CONFIG_PARPORT_GSC is not set
CONFIG_PARPORT_AX88796=y
CONFIG_PARPORT_1284=y
CONFIG_PARPORT_NOT_PC=y
CONFIG_PNP=y
CONFIG_PNP_DEBUG=y

#
# Protocols
#
CONFIG_ISAPNP=y
CONFIG_PNPBIOS=y
# CONFIG_PNPBIOS_PROC_FS is not set
CONFIG_PNPACPI=y
# CONFIG_BLK_DEV is not set
# CONFIG_MISC_DEVICES is not set
CONFIG_EEPROM_93CX6=y
# CONFIG_IDE is not set

#
# SCSI device support
#
CONFIG_RAID_ATTRS=y
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
CONFIG_SCSI_TGT=y
# CONFIG_SCSI_NETLINK is not set
# CONFIG_SCSI_PROC_FS is not set

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
CONFIG_CHR_DEV_OSST=y
# CONFIG_BLK_DEV_SR is not set
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_SCH=y

#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
# CONFIG_SCSI_MULTI_LUN is not set
CONFIG_SCSI_CONSTANTS=y
# CONFIG_SCSI_LOGGING is not set
# CONFIG_SCSI_SCAN_ASYNC is not set

#
# SCSI Transports
#
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
CONFIG_SCSI_ISCSI_ATTRS=y
CONFIG_SCSI_SAS_ATTRS=y
CONFIG_SCSI_SAS_LIBSAS=y
CONFIG_SCSI_SAS_ATA=y
CONFIG_SCSI_SAS_LIBSAS_DEBUG=y
CONFIG_SCSI_SRP_ATTRS=y
CONFIG_SCSI_SRP_TGT_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
CONFIG_SATA_AHCI=y
# CONFIG_SATA_SVW is not set
CONFIG_ATA_PIIX=y
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
CONFIG_PDC_ADMA=y
CONFIG_SATA_QSTOR=y
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SX4 is not set
CONFIG_SATA_SIL=y
CONFIG_SATA_SIL24=y
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_ULI is not set
CONFIG_SATA_VIA=y
CONFIG_SATA_VITESSE=y
CONFIG_SATA_INIC162X=y
# CONFIG_PATA_ACPI is not set
# CONFIG_PATA_ALI is not set
CONFIG_PATA_AMD=y
CONFIG_PATA_ARTOP=y
CONFIG_PATA_ATIIXP=y
CONFIG_PATA_CMD640_PCI=y
# CONFIG_PATA_CMD64X is not set
CONFIG_PATA_CS5520=y
CONFIG_PATA_CS5530=y
# CONFIG_PATA_CS5535 is not set
CONFIG_PATA_CS5536=y
# CONFIG_PATA_CYPRESS is not set
CONFIG_PATA_EFAR=y
# CONFIG_ATA_GENERIC is not set
CONFIG_PATA_HPT366=y
CONFIG_PATA_HPT37X=y
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_ISAPNP is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_IT8213 is not set
CONFIG_PATA_JMICRON=y
CONFIG_PATA_LEGACY=y
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_MARVELL is not set
CONFIG_PATA_MPIIX=y
CONFIG_PATA_OLDPIIX=y
# CONFIG_PATA_NETCELL is not set
CONFIG_PATA_NS87410=y
# CONFIG_PATA_NS87415 is not set
CONFIG_PATA_OPTI=y
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PDC_OLD is not set
CONFIG_PATA_QDI=y
CONFIG_PATA_RADISYS=y
# CONFIG_PATA_RZ1000 is not set
# CONFIG_PATA_SC1200 is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_SIL680 is not set
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
CONFIG_PATA_WINBOND=y
CONFIG_PATA_WINBOND_VLB=y
# CONFIG_PATA_PLATFORM is not set
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=y
# CONFIG_MD_RAID0 is not set
CONFIG_MD_RAID1=y
CONFIG_MD_RAID10=y
# CONFIG_MD_RAID456 is not set
CONFIG_MD_MULTIPATH=y
# CONFIG_MD_FAULTY is not set
CONFIG_BLK_DEV_DM=y
CONFIG_DM_DEBUG=y
CONFIG_DM_CRYPT=y
CONFIG_DM_SNAPSHOT=y
# CONFIG_DM_MIRROR is not set
CONFIG_DM_ZERO=y
CONFIG_DM_MULTIPATH=y
# CONFIG_DM_MULTIPATH_EMC is not set
# CONFIG_DM_MULTIPATH_RDAC is not set
CONFIG_DM_MULTIPATH_HP=y
# CONFIG_DM_DELAY is not set
CONFIG_DM_UEVENT=y
CONFIG_FUSION=y
CONFIG_FUSION_SPI=y
# CONFIG_FUSION_FC is not set
# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=128
CONFIG_FUSION_CTL=y
# CONFIG_FUSION_LOGGING is not set

#
# IEEE 1394 (FireWire) support
#
CONFIG_FIREWIRE=y
CONFIG_FIREWIRE_OHCI=y
# CONFIG_FIREWIRE_SBP2 is not set
CONFIG_IEEE1394=y

#
# Subsystem Options
#
# CONFIG_IEEE1394_VERBOSEDEBUG is not set

#
# Controllers
#
CONFIG_IEEE1394_PCILYNX=y
# CONFIG_IEEE1394_OHCI1394 is not set

#
# Protocols
#
# CONFIG_IEEE1394_SBP2 is not set
CONFIG_IEEE1394_ETH1394_ROM_ENTRY=y
CONFIG_IEEE1394_ETH1394=y
CONFIG_IEEE1394_RAWIO=y
# CONFIG_I2O is not set
# CONFIG_MACINTOSH_DRIVERS is not set
CONFIG_NETDEVICES=y
# CONFIG_NETDEVICES_MULTIQUEUE is not set
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
CONFIG_MACVLAN=y
# CONFIG_EQUALIZER is not set
CONFIG_TUN=y
CONFIG_VETH=y
# CONFIG_NET_SB1000 is not set
CONFIG_ARCNET=y
# CONFIG_ARCNET_1201 is not set
CONFIG_ARCNET_1051=y
# CONFIG_ARCNET_RAW is not set
# CONFIG_ARCNET_CAP is not set
CONFIG_ARCNET_COM90xx=y
CONFIG_ARCNET_COM90xxIO=y
CONFIG_ARCNET_RIM_I=y
CONFIG_ARCNET_COM20020=y
# CONFIG_ARCNET_COM20020_ISA is not set
CONFIG_ARCNET_COM20020_PCI=y
CONFIG_PHYLIB=y

#
# MII PHY device drivers
#
# CONFIG_MARVELL_PHY is not set
CONFIG_DAVICOM_PHY=y
CONFIG_QSEMI_PHY=y
CONFIG_LXT_PHY=y
# CONFIG_CICADA_PHY is not set
# CONFIG_VITESSE_PHY is not set
# CONFIG_SMSC_PHY is not set
CONFIG_BROADCOM_PHY=y
# CONFIG_ICPLUS_PHY is not set
CONFIG_FIXED_PHY=y
CONFIG_FIXED_MII_10_FDX=y
CONFIG_FIXED_MII_100_FDX=y
# CONFIG_FIXED_MII_1000_FDX is not set
CONFIG_FIXED_MII_AMNT=1
# CONFIG_MDIO_BITBANG is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
# CONFIG_NET_VENDOR_3COM is not set
# CONFIG_LANCE is not set
# CONFIG_NET_VENDOR_SMC is not set
# CONFIG_NET_VENDOR_RACAL is not set
# CONFIG_NET_TULIP is not set
# CONFIG_AT1700 is not set
# CONFIG_DEPCA is not set
CONFIG_HP100=y
CONFIG_NET_ISA=y
CONFIG_E2100=y
CONFIG_EWRK3=y
# CONFIG_EEXPRESS is not set
# CONFIG_EEXPRESS_PRO is not set
# CONFIG_HPLAN_PLUS is not set
# CONFIG_HPLAN is not set
# CONFIG_LP486E is not set
CONFIG_ETH16I=y
# CONFIG_NE2000 is not set
CONFIG_ZNET=y
CONFIG_SEEQ8005=y
# CONFIG_IBM_NEW_EMAC_ZMII is not set
# CONFIG_IBM_NEW_EMAC_RGMII is not set
# CONFIG_IBM_NEW_EMAC_TAH is not set
# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
CONFIG_AMD8111_ETH=y
CONFIG_AMD8111E_NAPI=y
# CONFIG_ADAPTEC_STARFIRE is not set
CONFIG_AC3200=y
CONFIG_APRICOT=y
CONFIG_B44=y
CONFIG_B44_PCI_AUTOSELECT=y
CONFIG_B44_PCICORE_AUTOSELECT=y
CONFIG_B44_PCI=y
CONFIG_FORCEDETH=y
# CONFIG_FORCEDETH_NAPI is not set
CONFIG_CS89x0=y
CONFIG_EEPRO100=y
CONFIG_E100=y
CONFIG_LNE390=y
CONFIG_FEALNX=y
CONFIG_NATSEMI=y
CONFIG_NE2K_PCI=y
CONFIG_NE3210=y
CONFIG_ES3210=y
CONFIG_8139CP=y
CONFIG_8139TOO=y
CONFIG_8139TOO_PIO=y
# CONFIG_8139TOO_TUNE_TWISTER is not set
# CONFIG_8139TOO_8129 is not set
CONFIG_8139_OLD_RX_RESET=y
# CONFIG_SIS900 is not set
CONFIG_EPIC100=y
# CONFIG_SUNDANCE is not set
CONFIG_TLAN=y
CONFIG_VIA_RHINE=y
CONFIG_VIA_RHINE_MMIO=y
# CONFIG_VIA_RHINE_NAPI is not set
# CONFIG_SC92031 is not set
# CONFIG_NET_POCKET is not set
CONFIG_NETDEV_1000=y
CONFIG_ACENIC=y
CONFIG_ACENIC_OMIT_TIGON_I=y
CONFIG_DL2K=y
CONFIG_E1000=y
# CONFIG_E1000_NAPI is not set
# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
CONFIG_E1000E=y
# CONFIG_IP1000 is not set
CONFIG_NS83820=y
CONFIG_HAMACHI=y
# CONFIG_YELLOWFIN is not set
CONFIG_R8169=y
# CONFIG_R8169_NAPI is not set
CONFIG_R8169_VLAN=y
CONFIG_SIS190=y
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
CONFIG_SK98LIN=y
CONFIG_VIA_VELOCITY=y
CONFIG_TIGON3=y
CONFIG_BNX2=y
CONFIG_QLA3XXX=y
# CONFIG_ATL1 is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
CONFIG_CHELSIO_T3=y
# CONFIG_IXGBE is not set
CONFIG_IXGB=y
CONFIG_IXGB_NAPI=y
CONFIG_S2IO=y
# CONFIG_S2IO_NAPI is not set
# CONFIG_MYRI10GE is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_NIU is not set
CONFIG_MLX4_CORE=y
CONFIG_MLX4_DEBUG=y
# CONFIG_TEHUTI is not set
# CONFIG_TR is not set

#
# Wireless LAN
#
CONFIG_WLAN_PRE80211=y
# CONFIG_STRIP is not set
CONFIG_ARLAN=y
# CONFIG_WAVELAN is not set
CONFIG_WLAN_80211=y
# CONFIG_IPW2100 is not set
# CONFIG_IPW2200 is not set
CONFIG_LIBERTAS=y
CONFIG_LIBERTAS_USB=y
CONFIG_LIBERTAS_DEBUG=y
# CONFIG_AIRO is not set
CONFIG_HERMES=y
# CONFIG_PLX_HERMES is not set
CONFIG_TMD_HERMES=y
# CONFIG_NORTEL_HERMES is not set
CONFIG_PCI_HERMES=y
# CONFIG_ATMEL is not set
CONFIG_PRISM54=y
CONFIG_USB_ZD1201=y
CONFIG_RTL8187=y
# CONFIG_ADM8211 is not set
CONFIG_P54_COMMON=y
# CONFIG_P54_USB is not set
# CONFIG_P54_PCI is not set
# CONFIG_IWLWIFI is not set
CONFIG_HOSTAP=y
CONFIG_HOSTAP_FIRMWARE=y
CONFIG_HOSTAP_FIRMWARE_NVRAM=y
# CONFIG_HOSTAP_PLX is not set
CONFIG_HOSTAP_PCI=y
# CONFIG_BCM43XX is not set
CONFIG_B43=y
CONFIG_B43_PCI_AUTOSELECT=y
CONFIG_B43_PCICORE_AUTOSELECT=y
CONFIG_B43_DEBUG=y
CONFIG_B43_DMA=y
# CONFIG_B43_DMA_AND_PIO_MODE is not set
CONFIG_B43_DMA_MODE=y
# CONFIG_B43_PIO_MODE is not set
# CONFIG_B43LEGACY is not set
CONFIG_ZD1211RW=y
CONFIG_ZD1211RW_DEBUG=y
# CONFIG_RT2X00 is not set

#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
# CONFIG_WAN is not set
CONFIG_ATM_DRIVERS=y
# CONFIG_ATM_DUMMY is not set
# CONFIG_ATM_TCP is not set
# CONFIG_ATM_LANAI is not set
# CONFIG_ATM_ENI is not set
# CONFIG_ATM_FIRESTREAM is not set
CONFIG_ATM_ZATM=y
# CONFIG_ATM_ZATM_DEBUG is not set
CONFIG_ATM_NICSTAR=y
# CONFIG_ATM_NICSTAR_USE_SUNI is not set
# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
# CONFIG_ATM_IDT77252 is not set
# CONFIG_ATM_AMBASSADOR is not set
CONFIG_ATM_HORIZON=y
# CONFIG_ATM_HORIZON_DEBUG is not set
# CONFIG_ATM_IA is not set
CONFIG_ATM_FORE200E_MAYBE=y
CONFIG_ATM_FORE200E_PCA=y
CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y
CONFIG_ATM_FORE200E_USE_TASKLET=y
CONFIG_ATM_FORE200E_TX_RETRY=16
CONFIG_ATM_FORE200E_DEBUG=0
CONFIG_ATM_FORE200E=y
# CONFIG_ATM_HE is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PLIP is not set
CONFIG_PPP=y
CONFIG_PPP_MULTILINK=y
CONFIG_PPP_FILTER=y
CONFIG_PPP_ASYNC=y
# CONFIG_PPP_SYNC_TTY is not set
CONFIG_PPP_DEFLATE=y
CONFIG_PPP_BSDCOMP=y
# CONFIG_PPP_MPPE is not set
CONFIG_PPPOE=y
CONFIG_PPPOATM=y
CONFIG_PPPOL2TP=y
# CONFIG_SLIP is not set
CONFIG_SLHC=y
# CONFIG_NET_FC is not set
CONFIG_SHAPER=y
CONFIG_NETCONSOLE=y
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETPOLL=y
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_ISDN=y
CONFIG_ISDN_I4L=y
# CONFIG_ISDN_PPP is not set
CONFIG_ISDN_AUDIO=y
CONFIG_ISDN_TTY_FAX=y

#
# ISDN feature submodules
#
CONFIG_ISDN_DRV_LOOP=y
# CONFIG_ISDN_DIVERSION is not set

#
# ISDN4Linux hardware drivers
#

#
# Passive cards
#
# CONFIG_ISDN_DRV_HISAX is not set

#
# Active cards
#
CONFIG_ISDN_DRV_ICN=y
# CONFIG_ISDN_DRV_PCBIT is not set
# CONFIG_ISDN_DRV_SC is not set
CONFIG_ISDN_DRV_ACT2000=y
# CONFIG_ISDN_DRV_GIGASET is not set
# CONFIG_ISDN_CAPI is not set
CONFIG_PHONE=y
CONFIG_PHONE_IXJ=y

#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
CONFIG_INPUT_POLLDEV=y

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_EVBUG=y

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
CONFIG_KEYBOARD_SUNKBD=y
CONFIG_KEYBOARD_LKKBD=y
CONFIG_KEYBOARD_XTKBD=y
CONFIG_KEYBOARD_NEWTON=y
CONFIG_KEYBOARD_STOWAWAY=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_INPUT_JOYSTICK is not set
CONFIG_INPUT_TABLET=y
# CONFIG_TABLET_USB_ACECAD is not set
# CONFIG_TABLET_USB_AIPTEK is not set
# CONFIG_TABLET_USB_GTCO is not set
CONFIG_TABLET_USB_KBTAB=y
CONFIG_TABLET_USB_WACOM=y
CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_FUJITSU is not set
CONFIG_TOUCHSCREEN_GUNZE=y
CONFIG_TOUCHSCREEN_ELO=y
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
CONFIG_TOUCHSCREEN_PENMOUNT=y
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
CONFIG_TOUCHSCREEN_TOUCHWIN=y
CONFIG_TOUCHSCREEN_UCB1400=y
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=y
CONFIG_INPUT_WISTRON_BTNS=y
# CONFIG_INPUT_ATLAS_BTNS is not set
CONFIG_INPUT_ATI_REMOTE=y
CONFIG_INPUT_ATI_REMOTE2=y
CONFIG_INPUT_KEYSPAN_REMOTE=y
# CONFIG_INPUT_POWERMATE is not set
CONFIG_INPUT_YEALINK=y
CONFIG_INPUT_UINPUT=y

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
# CONFIG_SERIO_SERPORT is not set
CONFIG_SERIO_CT82C710=y
# CONFIG_SERIO_PARKBD is not set
CONFIG_SERIO_PCIPS2=y
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
CONFIG_GAMEPORT=y
CONFIG_GAMEPORT_NS558=y
CONFIG_GAMEPORT_L4=y
# CONFIG_GAMEPORT_EMU10K1 is not set
# CONFIG_GAMEPORT_FM801 is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_COMPUTONE=y
CONFIG_ROCKETPORT=y
# CONFIG_CYCLADES is not set
CONFIG_DIGIEPCA=y
# CONFIG_ESPSERIAL is not set
# CONFIG_MOXA_INTELLIO is not set
# CONFIG_MOXA_SMARTIO is not set
CONFIG_MOXA_SMARTIO_NEW=y
# CONFIG_ISI is not set
# CONFIG_SYNCLINK is not set
# CONFIG_SYNCLINKMP is not set
CONFIG_SYNCLINK_GT=y
# CONFIG_N_HDLC is not set
# CONFIG_RISCOM8 is not set
CONFIG_SPECIALIX=y
# CONFIG_SPECIALIX_RTSCTS is not set
CONFIG_SX=y
# CONFIG_RIO is not set
# CONFIG_STALDRV is not set
# CONFIG_NOZOMI is not set

#
# Serial drivers
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_FIX_EARLYCON_MEM=y
# CONFIG_SERIAL_8250_PCI is not set
CONFIG_SERIAL_8250_PNP=y
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
CONFIG_SERIAL_8250_EXTENDED=y
# CONFIG_SERIAL_8250_MANY_PORTS is not set
# CONFIG_SERIAL_8250_SHARE_IRQ is not set
CONFIG_SERIAL_8250_DETECT_IRQ=y
# CONFIG_SERIAL_8250_RSA is not set

#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
# CONFIG_PRINTER is not set
# CONFIG_PPDEV is not set
CONFIG_IPMI_HANDLER=y
# CONFIG_IPMI_PANIC_EVENT is not set
# CONFIG_IPMI_DEVICE_INTERFACE is not set
# CONFIG_IPMI_SI is not set
CONFIG_IPMI_WATCHDOG=y
# CONFIG_IPMI_POWEROFF is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
CONFIG_HW_RANDOM_GEODE=y
# CONFIG_HW_RANDOM_VIA is not set
# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
CONFIG_GEN_RTC=y
# CONFIG_GEN_RTC_X is not set
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
CONFIG_SONYPI=y
CONFIG_MWAVE=y
# CONFIG_SCx200_GPIO is not set
# CONFIG_PC8736x_GPIO is not set
# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
# CONFIG_HPET_RTC_IRQ is not set
# CONFIG_HPET_MMAP is not set
# CONFIG_HANGCHECK_TIMER is not set
CONFIG_TCG_TPM=y
# CONFIG_TCG_TIS is not set
CONFIG_TCG_NSC=y
# CONFIG_TCG_ATMEL is not set
CONFIG_TCG_INFINEON=y
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
CONFIG_I2C=y
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set

#
# I2C Algorithms
#
CONFIG_I2C_ALGOBIT=y
CONFIG_I2C_ALGOPCF=y
CONFIG_I2C_ALGOPCA=y

#
# I2C Hardware Bus support
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
CONFIG_I2C_AMD756=y
# CONFIG_I2C_AMD756_S4882 is not set
CONFIG_I2C_AMD8111=y
CONFIG_I2C_ELEKTOR=y
# CONFIG_I2C_I801 is not set
CONFIG_I2C_I810=y
# CONFIG_I2C_PIIX4 is not set
# CONFIG_I2C_NFORCE2 is not set
CONFIG_I2C_OCORES=y
# CONFIG_I2C_PARPORT is not set
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_PROSAVAGE is not set
# CONFIG_I2C_SAVAGE4 is not set
# CONFIG_I2C_SIMTEC is not set
CONFIG_SCx200_ACB=y
CONFIG_I2C_SIS5595=y
CONFIG_I2C_SIS630=y
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_TAOS_EVM is not set
CONFIG_I2C_TINY_USB=y
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
CONFIG_I2C_VOODOO3=y
CONFIG_I2C_PCA_ISA=y

#
# Miscellaneous I2C Chip support
#
# CONFIG_SENSORS_DS1337 is not set
# CONFIG_SENSORS_DS1374 is not set
CONFIG_DS1682=y
CONFIG_SENSORS_EEPROM=y
# CONFIG_SENSORS_PCF8574 is not set
CONFIG_SENSORS_PCA9539=y
# CONFIG_SENSORS_PCF8591 is not set
CONFIG_SENSORS_MAX6875=y
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
CONFIG_I2C_DEBUG_BUS=y
# CONFIG_I2C_DEBUG_CHIP is not set

#
# SPI support
#
# CONFIG_SPI is not set
# CONFIG_SPI_MASTER is not set
CONFIG_W1=y
# CONFIG_W1_CON is not set

#
# 1-wire Bus Masters
#
CONFIG_W1_MASTER_MATROX=y
CONFIG_W1_MASTER_DS2490=y
CONFIG_W1_MASTER_DS2482=y

#
# 1-wire Slaves
#
CONFIG_W1_SLAVE_THERM=y
CONFIG_W1_SLAVE_SMEM=y
# CONFIG_W1_SLAVE_DS2433 is not set
# CONFIG_W1_SLAVE_DS2760 is not set
# CONFIG_POWER_SUPPLY is not set
CONFIG_HWMON=y
CONFIG_HWMON_VID=y
# CONFIG_SENSORS_ABITUGURU is not set
# CONFIG_SENSORS_ABITUGURU3 is not set
CONFIG_SENSORS_AD7418=y
CONFIG_SENSORS_ADM1021=y
# CONFIG_SENSORS_ADM1025 is not set
CONFIG_SENSORS_ADM1026=y
# CONFIG_SENSORS_ADM1029 is not set
CONFIG_SENSORS_ADM1031=y
# CONFIG_SENSORS_ADM9240 is not set
# CONFIG_SENSORS_ADT7470 is not set
# CONFIG_SENSORS_K8TEMP is not set
CONFIG_SENSORS_ASB100=y
CONFIG_SENSORS_ATXP1=y
CONFIG_SENSORS_DS1621=y
CONFIG_SENSORS_I5K_AMB=y
# CONFIG_SENSORS_F71805F is not set
# CONFIG_SENSORS_F71882FG is not set
CONFIG_SENSORS_F75375S=y
# CONFIG_SENSORS_FSCHER is not set
CONFIG_SENSORS_FSCPOS=y
CONFIG_SENSORS_FSCHMD=y
CONFIG_SENSORS_GL518SM=y
# CONFIG_SENSORS_GL520SM is not set
# CONFIG_SENSORS_CORETEMP is not set
# CONFIG_SENSORS_IBMPEX is not set
# CONFIG_SENSORS_IT87 is not set
# CONFIG_SENSORS_LM63 is not set
# CONFIG_SENSORS_LM75 is not set
# CONFIG_SENSORS_LM77 is not set
# CONFIG_SENSORS_LM78 is not set
CONFIG_SENSORS_LM80=y
CONFIG_SENSORS_LM83=y
# CONFIG_SENSORS_LM85 is not set
# CONFIG_SENSORS_LM87 is not set
# CONFIG_SENSORS_LM90 is not set
CONFIG_SENSORS_LM92=y
CONFIG_SENSORS_LM93=y
# CONFIG_SENSORS_MAX1619 is not set
CONFIG_SENSORS_MAX6650=y
# CONFIG_SENSORS_PC87360 is not set
CONFIG_SENSORS_PC87427=y
# CONFIG_SENSORS_SIS5595 is not set
CONFIG_SENSORS_DME1737=y
CONFIG_SENSORS_SMSC47M1=y
CONFIG_SENSORS_SMSC47M192=y
# CONFIG_SENSORS_SMSC47B397 is not set
CONFIG_SENSORS_THMC50=y
CONFIG_SENSORS_VIA686A=y
# CONFIG_SENSORS_VT1211 is not set
CONFIG_SENSORS_VT8231=y
# CONFIG_SENSORS_W83781D is not set
CONFIG_SENSORS_W83791D=y
# CONFIG_SENSORS_W83792D is not set
CONFIG_SENSORS_W83793=y
CONFIG_SENSORS_W83L785TS=y
CONFIG_SENSORS_W83627HF=y
CONFIG_SENSORS_W83627EHF=y
# CONFIG_SENSORS_HDAPS is not set
CONFIG_SENSORS_APPLESMC=y
CONFIG_HWMON_DEBUG_CHIP=y
# CONFIG_WATCHDOG is not set

#
# Sonics Silicon Backplane
#
CONFIG_SSB_POSSIBLE=y
CONFIG_SSB=y
CONFIG_SSB_PCIHOST_POSSIBLE=y
CONFIG_SSB_PCIHOST=y
# CONFIG_SSB_SILENT is not set
# CONFIG_SSB_DEBUG is not set
CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
CONFIG_SSB_DRIVER_PCICORE=y

#
# Multifunction device drivers
#
# CONFIG_MFD_SM501 is not set

#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
CONFIG_DVB_CORE=y
CONFIG_DVB_CAPTURE_DRIVERS=y

#
# Supported SAA7146 based PCI Adapters
#
# CONFIG_TTPCI_EEPROM is not set
# CONFIG_DVB_BUDGET_CORE is not set

#
# Supported USB Adapters
#
CONFIG_DVB_USB=y
# CONFIG_DVB_USB_DEBUG is not set
CONFIG_DVB_USB_A800=y
# CONFIG_DVB_USB_DIBUSB_MB is not set
CONFIG_DVB_USB_DIBUSB_MC=y
CONFIG_DVB_USB_DIB0700=y
# CONFIG_DVB_USB_UMT_010 is not set
# CONFIG_DVB_USB_CXUSB is not set
# CONFIG_DVB_USB_M920X is not set
# CONFIG_DVB_USB_GL861 is not set
# CONFIG_DVB_USB_AU6610 is not set
CONFIG_DVB_USB_DIGITV=y
CONFIG_DVB_USB_VP7045=y
CONFIG_DVB_USB_VP702X=y
CONFIG_DVB_USB_GP8PSK=y
CONFIG_DVB_USB_NOVA_T_USB2=y
# CONFIG_DVB_USB_TTUSB2 is not set
# CONFIG_DVB_USB_DTT200U is not set
CONFIG_DVB_USB_OPERA1=y
# CONFIG_DVB_USB_AF9005 is not set
# CONFIG_DVB_TTUSB_BUDGET is not set
CONFIG_DVB_TTUSB_DEC=y
# CONFIG_DVB_CINERGYT2 is not set

#
# Supported FlexCopII (B2C2) Adapters
#
# CONFIG_DVB_B2C2_FLEXCOP is not set

#
# Supported BT878 Adapters
#

#
# Supported Pluto2 Adapters
#
CONFIG_DVB_PLUTO2=y

#
# Supported DVB Frontends
#

#
# Customise DVB Frontends
#
# CONFIG_DVB_FE_CUSTOMISE is not set

#
# DVB-S (satellite) frontends
#
CONFIG_DVB_STV0299=y
# CONFIG_DVB_CX24110 is not set
CONFIG_DVB_CX24123=y
# CONFIG_DVB_TDA8083 is not set
CONFIG_DVB_MT312=y
# CONFIG_DVB_VES1X93 is not set
# CONFIG_DVB_S5H1420 is not set
CONFIG_DVB_TDA10086=y

#
# DVB-T (terrestrial) frontends
#
CONFIG_DVB_SP8870=y
CONFIG_DVB_SP887X=y
CONFIG_DVB_CX22700=y
CONFIG_DVB_CX22702=y
CONFIG_DVB_L64781=y
CONFIG_DVB_TDA1004X=y
CONFIG_DVB_NXT6000=y
CONFIG_DVB_MT352=y
CONFIG_DVB_ZL10353=y
CONFIG_DVB_DIB3000MB=y
CONFIG_DVB_DIB3000MC=y
CONFIG_DVB_DIB7000M=y
CONFIG_DVB_DIB7000P=y

#
# DVB-C (cable) frontends
#
CONFIG_DVB_VES1820=y
CONFIG_DVB_TDA10021=y
# CONFIG_DVB_TDA10023 is not set
CONFIG_DVB_STV0297=y

#
# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
#
CONFIG_DVB_NXT200X=y
# CONFIG_DVB_OR51211 is not set
# CONFIG_DVB_OR51132 is not set
CONFIG_DVB_BCM3510=y
# CONFIG_DVB_LGDT330X is not set
CONFIG_DVB_S5H1409=y

#
# Tuners/PLL support
#
CONFIG_DVB_PLL=y
# CONFIG_DVB_TDA826X is not set
# CONFIG_DVB_TDA827X is not set
# CONFIG_DVB_TDA18271 is not set
CONFIG_DVB_TUNER_QT1010=y
CONFIG_DVB_TUNER_MT2060=y
CONFIG_DVB_TUNER_MT2266=y
# CONFIG_DVB_TUNER_MT2131 is not set
CONFIG_DVB_TUNER_DIB0070=y
CONFIG_DVB_TUNER_XC5000=y

#
# Miscellaneous devices
#
CONFIG_DVB_LNBP21=y
# CONFIG_DVB_ISL6421 is not set
# CONFIG_DVB_TUA6100 is not set
CONFIG_DAB=y
# CONFIG_USB_DABUSB is not set

#
# Graphics support
#
CONFIG_AGP=y
CONFIG_AGP_ALI=y
CONFIG_AGP_ATI=y
# CONFIG_AGP_AMD is not set
CONFIG_AGP_AMD64=y
# CONFIG_AGP_INTEL is not set
# CONFIG_AGP_NVIDIA is not set
CONFIG_AGP_SIS=y
CONFIG_AGP_SWORKS=y
CONFIG_AGP_VIA=y
CONFIG_AGP_EFFICEON=y
# CONFIG_DRM is not set
# CONFIG_VGASTATE is not set
# CONFIG_VIDEO_OUTPUT_CONTROL is not set
# CONFIG_FB is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_LCD_CLASS_DEVICE=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_BACKLIGHT_CORGI=y
CONFIG_BACKLIGHT_PROGEAR=y

#
# Display device support
#
# CONFIG_DISPLAY_SUPPORT is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_VIDEO_SELECT=y
# CONFIG_MDA_CONSOLE is not set
CONFIG_DUMMY_CONSOLE=y

#
# Sound
#
# CONFIG_SOUND is not set
CONFIG_AC97_BUS=y
CONFIG_HID_SUPPORT=y
CONFIG_HID=y
# CONFIG_HID_DEBUG is not set
# CONFIG_HIDRAW is not set

#
# USB Input Devices
#
# CONFIG_USB_HID is not set

#
# USB HID Boot Protocol drivers
#
# CONFIG_USB_KBD is not set
CONFIG_USB_MOUSE=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=y
CONFIG_USB_DEBUG=y

#
# Miscellaneous USB options
#
# CONFIG_USB_DEVICEFS is not set
# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_DYNAMIC_MINORS=y
CONFIG_USB_SUSPEND=y
CONFIG_USB_PERSIST=y
# CONFIG_USB_OTG is not set

#
# USB Host Controller Drivers
#
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_SPLIT_ISO=y
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
# CONFIG_USB_ISP116X_HCD is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_SSB=y
# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
CONFIG_USB_R8A66597_HCD=y

#
# USB Device Class drivers
#
CONFIG_USB_ACM=y
CONFIG_USB_PRINTER=y

#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#

#
# may also be needed; see USB_STORAGE Help for more information
#
# CONFIG_USB_STORAGE is not set
CONFIG_USB_LIBUSUAL=y

#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
CONFIG_USB_MICROTEK=y
CONFIG_USB_MON=y

#
# USB port drivers
#
# CONFIG_USB_USS720 is not set

#
# USB Serial Converter support
#
# CONFIG_USB_SERIAL is not set

#
# USB Miscellaneous drivers
#
CONFIG_USB_EMI62=y
CONFIG_USB_EMI26=y
CONFIG_USB_ADUTUX=y
# CONFIG_USB_AUERSWALD is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
CONFIG_USB_LCD=y
# CONFIG_USB_BERRY_CHARGE is not set
CONFIG_USB_LED=y
CONFIG_USB_CYPRESS_CY7C63=y
# CONFIG_USB_CYTHERM is not set
CONFIG_USB_PHIDGET=y
# CONFIG_USB_PHIDGETKIT is not set
# CONFIG_USB_PHIDGETMOTORCONTROL is not set
CONFIG_USB_PHIDGETSERVO=y
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
CONFIG_USB_SISUSBVGA=y
# CONFIG_USB_SISUSBVGA_CON is not set
# CONFIG_USB_LD is not set
CONFIG_USB_TRANCEVIBRATOR=y
# CONFIG_USB_IOWARRIOR is not set

#
# USB DSL modem support
#
# CONFIG_USB_ATM is not set

#
# USB Gadget Support
#
# CONFIG_USB_GADGET is not set
# CONFIG_MMC is not set
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y

#
# LED drivers
#

#
# LED Triggers
#
# CONFIG_LEDS_TRIGGERS is not set
CONFIG_INFINIBAND=y
# CONFIG_INFINIBAND_USER_MAD is not set
# CONFIG_INFINIBAND_USER_ACCESS is not set
CONFIG_INFINIBAND_ADDR_TRANS=y
# CONFIG_INFINIBAND_MTHCA is not set
# CONFIG_INFINIBAND_AMSO1100 is not set
CONFIG_INFINIBAND_CXGB3=y
CONFIG_INFINIBAND_CXGB3_DEBUG=y
CONFIG_MLX4_INFINIBAND=y
CONFIG_INFINIBAND_IPOIB=y
# CONFIG_INFINIBAND_IPOIB_CM is not set
# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
CONFIG_INFINIBAND_SRP=y
CONFIG_INFINIBAND_ISER=y
# CONFIG_EDAC is not set
# CONFIG_RTC_CLASS is not set
CONFIG_DMADEVICES=y

#
# DMA Devices
#
CONFIG_INTEL_IOATDMA=y
CONFIG_DMA_ENGINE=y

#
# DMA Clients
#
CONFIG_NET_DMA=y
CONFIG_DCA=y
# CONFIG_AUXDISPLAY is not set
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=y
# CONFIG_KVM_INTEL is not set
CONFIG_KVM_AMD=y

#
# Userspace I/O
#
# CONFIG_UIO is not set

#
# Firmware Drivers
#
CONFIG_EDD=y
# CONFIG_EFI_VARS is not set
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y

#
# File systems
#
# CONFIG_EXT2_FS is not set
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4DEV_FS=y
# CONFIG_EXT4DEV_FS_XATTR is not set
CONFIG_JBD=y
CONFIG_JBD_DEBUG=y
CONFIG_JBD2=y
# CONFIG_JBD2_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
CONFIG_GFS2_FS=y
# CONFIG_GFS2_FS_LOCKING_NOLOCK is not set
# CONFIG_GFS2_FS_LOCKING_DLM is not set
CONFIG_OCFS2_FS=y
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
# CONFIG_OCFS2_DEBUG_FS is not set
# CONFIG_MINIX_FS is not set
CONFIG_ROMFS_FS=y
# CONFIG_INOTIFY is not set
# CONFIG_QUOTA is not set
# CONFIG_DNOTIFY is not set
# CONFIG_AUTOFS_FS is not set
# CONFIG_AUTOFS4_FS is not set
# CONFIG_FUSE_FS is not set
CONFIG_GENERIC_ACL=y

#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
CONFIG_UDF_FS=y
CONFIG_UDF_NLS=y

#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
# CONFIG_MSDOS_FS is not set
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
# CONFIG_NTFS_FS is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
# CONFIG_PROC_KCORE is not set
# CONFIG_PROC_SYSCTL is not set
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_CONFIGFS_FS=y

#
# Miscellaneous filesystems
#
CONFIG_ADFS_FS=y
CONFIG_ADFS_FS_RW=y
CONFIG_AFFS_FS=y
CONFIG_ECRYPT_FS=y
CONFIG_HFS_FS=y
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
CONFIG_BFS_FS=y
# CONFIG_EFS_FS is not set
CONFIG_CRAMFS=y
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_SYSV_FS is not set
CONFIG_UFS_FS=y
# CONFIG_UFS_FS_WRITE is not set
# CONFIG_UFS_DEBUG is not set
# CONFIG_NETWORK_FILESYSTEMS is not set

#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
# CONFIG_ACORN_PARTITION is not set
# CONFIG_OSF_PARTITION is not set
# CONFIG_AMIGA_PARTITION is not set
CONFIG_ATARI_PARTITION=y
# CONFIG_MAC_PARTITION is not set
CONFIG_MSDOS_PARTITION=y
CONFIG_BSD_DISKLABEL=y
# CONFIG_MINIX_SUBPARTITION is not set
# CONFIG_SOLARIS_X86_PARTITION is not set
# CONFIG_UNIXWARE_DISKLABEL is not set
CONFIG_LDM_PARTITION=y
CONFIG_LDM_DEBUG=y
# CONFIG_SGI_PARTITION is not set
CONFIG_ULTRIX_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
# CONFIG_EFI_PARTITION is not set
CONFIG_SYSV68_PARTITION=y
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="iso8859-1"
# CONFIG_NLS_CODEPAGE_437 is not set
CONFIG_NLS_CODEPAGE_737=y
CONFIG_NLS_CODEPAGE_775=y
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
CONFIG_NLS_CODEPAGE_862=y
CONFIG_NLS_CODEPAGE_863=y
CONFIG_NLS_CODEPAGE_864=y
# CONFIG_NLS_CODEPAGE_865 is not set
CONFIG_NLS_CODEPAGE_866=y
CONFIG_NLS_CODEPAGE_869=y
CONFIG_NLS_CODEPAGE_936=y
CONFIG_NLS_CODEPAGE_950=y
# CONFIG_NLS_CODEPAGE_932 is not set
CONFIG_NLS_CODEPAGE_949=y
# CONFIG_NLS_CODEPAGE_874 is not set
CONFIG_NLS_ISO8859_8=y
CONFIG_NLS_CODEPAGE_1250=y
# CONFIG_NLS_CODEPAGE_1251 is not set
# CONFIG_NLS_ASCII is not set
# CONFIG_NLS_ISO8859_1 is not set
# CONFIG_NLS_ISO8859_2 is not set
CONFIG_NLS_ISO8859_3=y
CONFIG_NLS_ISO8859_4=y
# CONFIG_NLS_ISO8859_5 is not set
CONFIG_NLS_ISO8859_6=y
CONFIG_NLS_ISO8859_7=y
# CONFIG_NLS_ISO8859_9 is not set
CONFIG_NLS_ISO8859_13=y
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
CONFIG_NLS_KOI8_R=y
# CONFIG_NLS_KOI8_U is not set
CONFIG_NLS_UTF8=y
CONFIG_DLM=y
# CONFIG_DLM_DEBUG is not set
CONFIG_INSTRUMENTATION=y
# CONFIG_PROFILING is not set
CONFIG_MARKERS=y

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_PRINTK_TIME=y
CONFIG_ENABLE_WARN_DEPRECATED=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_FS=y
CONFIG_HEADERS_CHECK=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_SHIRQ is not set
# CONFIG_DETECT_SOFTLOCKUP is not set
CONFIG_SCHED_DEBUG=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
# CONFIG_DEBUG_RT_MUTEXES is not set
CONFIG_RT_MUTEX_TESTER=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCKDEP=y
CONFIG_LOCK_STAT=y
# CONFIG_DEBUG_LOCKDEP is not set
CONFIG_TRACE_IRQFLAGS=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_HIGHMEM is not set
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
CONFIG_FRAME_POINTER=y
CONFIG_FORCED_INLINING=y
CONFIG_BOOT_PRINTK_DELAY=y
CONFIG_BACKTRACE_SELF_TEST=y
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
CONFIG_FAIL_PAGE_ALLOC=y
CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_LATENCYTOP=y
# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
CONFIG_SAMPLES=y
# CONFIG_SAMPLE_KOBJECT is not set
# CONFIG_KGDB is not set
CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y

#
# Page alloc debug is incompatible with Software Suspend on i386
#
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_RODATA is not set
# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
CONFIG_DOUBLEFAULT=y
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
CONFIG_IO_DELAY_TYPE_NONE=3
# CONFIG_IO_DELAY_0X80 is not set
# CONFIG_IO_DELAY_0XED is not set
# CONFIG_IO_DELAY_UDELAY is not set
CONFIG_IO_DELAY_NONE=y
CONFIG_DEFAULT_IO_DELAY_TYPE=3
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set

#
# Security options
#
CONFIG_KEYS=y
# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_CAPABILITIES=y
CONFIG_SECURITY_FILE_CAPABILITIES=y
# CONFIG_SECURITY_ROOTPLUG is not set
CONFIG_CRYPTO=y
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_AEAD=y
CONFIG_CRYPTO_BLKCIPHER=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_XCBC is not set
CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_SHA256=y
# CONFIG_CRYPTO_SHA512 is not set
CONFIG_CRYPTO_WP512=y
CONFIG_CRYPTO_TGR192=y
CONFIG_CRYPTO_GF128MUL=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_PCBC=y
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_XTS is not set
CONFIG_CRYPTO_CTR=y
CONFIG_CRYPTO_GCM=y
# CONFIG_CRYPTO_CCM is not set
# CONFIG_CRYPTO_CRYPTD is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_FCRYPT is not set
CONFIG_CRYPTO_BLOWFISH=y
CONFIG_CRYPTO_TWOFISH=y
CONFIG_CRYPTO_TWOFISH_COMMON=y
# CONFIG_CRYPTO_TWOFISH_586 is not set
CONFIG_CRYPTO_SERPENT=y
CONFIG_CRYPTO_AES=y
# CONFIG_CRYPTO_AES_586 is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST6 is not set
CONFIG_CRYPTO_TEA=y
CONFIG_CRYPTO_ARC4=y
# CONFIG_CRYPTO_KHAZAD is not set
CONFIG_CRYPTO_ANUBIS=y
CONFIG_CRYPTO_SEED=y
# CONFIG_CRYPTO_SALSA20 is not set
# CONFIG_CRYPTO_SALSA20_586 is not set
CONFIG_CRYPTO_DEFLATE=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_CAMELLIA is not set
# CONFIG_CRYPTO_AUTHENC is not set
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_HW is not set

#
# Library routines
#
CONFIG_BITREVERSE=y
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_CRC_ITU_T=y
CONFIG_CRC32=y
CONFIG_CRC7=y
# CONFIG_LIBCRC32C is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=y
CONFIG_LZO_COMPRESS=y
CONFIG_LZO_DECOMPRESS=y
CONFIG_GENERIC_ALLOCATOR=y
CONFIG_TEXTSEARCH=y
CONFIG_TEXTSEARCH_KMP=y
CONFIG_TEXTSEARCH_BM=y
CONFIG_TEXTSEARCH_FSM=y
CONFIG_PLIST=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
CONFIG_CHECK_SIGNATURE=y

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 16:12                                                     ` Ingo Molnar
@ 2008-01-28 17:02                                                       ` Rafael J. Wysocki
  2008-02-01 13:51                                                         ` Ingo Molnar
  0 siblings, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-28 17:02 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pavel Machek, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttil?,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman,
	Len Brown

On Monday, 28 of January 2008, Ingo Molnar wrote:
> 
> > >   * driver might have split up a kernel 4MB mapping.
> > >   */
> > > -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
> > > +char swsusp_pg_dir[PAGE_SIZE]
> 
> hm, random-qa found build breakage with this patch:
> 
>  arch/x86/kernel/built-in.o: In function `wakeup_start':
>  : undefined reference to `swsusp_pg_dir'
> 
> config attached.

I see.  CONFIG_HIBERNATION && CONFIG_ACPI -> CONFIG_ACPI_SLEEP
and the Makefile in arch/x86/kernel/acpi/ wants to build wakeup.S, which is
not necessary.  Hmm.

We can do a couple of things:
(1) make wakeup_$(BITS).o depend on CONFIG_SUSPEND (alone)
    This will build it if CONFIG_SUSPEND is set, but CONFIG_ACPI is not
    (still, that's consistent with the change in question).
(2) make wakeup_$(BITS).o depend on CONFIG_SUSPEND and CONFIG_ACPI
(3) define CONFIG_ACPI_SUSPEND depending on ACPI and SUSPEND and
    make wakeup_$(BITS).o as well as swsusp_pg_dir depend on that (most
    elegant)

Which one do you prefer?

In case you choose (3), please drop the patch and I'll send a new one to Len.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 15:25                                                     ` Rafael J. Wysocki
@ 2008-01-28 19:40                                                       ` Pavel Machek
  2008-01-28 19:51                                                         ` H. Peter Anvin
  2008-01-28 20:26                                                         ` Rafael J. Wysocki
  0 siblings, 2 replies; 80+ messages in thread
From: Pavel Machek @ 2008-01-28 19:40 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Ingo Molnar, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Hi!

> > >  /*
> > >   * Swap suspend & friends need this for resume because things like the intel-agp
> > >   * driver might have split up a kernel 4MB mapping.
> > >   */
> > > -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
> > > +char swsusp_pg_dir[PAGE_SIZE]
> > 
> > thanks, applied.

Sorry, this is subtle and I've overlooked it before.

(I thought you were only changing ifdef).

Now you memcpy() over pg_dir when that pgdir is in use during swsusp
resume. Granted, you memcpy() with same data that already are there,
but it may still do some funny effects.

Hmm, but same argument applies to lower levels of paging in 64-bit and
PAE cases, and we still do that memcpy-over-active-pagetables there...
:-(.

							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 19:40                                                       ` Pavel Machek
@ 2008-01-28 19:51                                                         ` H. Peter Anvin
  2008-01-28 20:03                                                           ` Jeremy Fitzhardinge
  2008-01-28 20:26                                                         ` Rafael J. Wysocki
  1 sibling, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-28 19:51 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Rafael J. Wysocki, Ingo Molnar, Jeremy Fitzhardinge,
	Ian Campbell, Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman,
	Ian Campbell

Pavel Machek wrote:
> Hi!
> 
>>>>  /*
>>>>   * Swap suspend & friends need this for resume because things like the intel-agp
>>>>   * driver might have split up a kernel 4MB mapping.
>>>>   */
>>>> -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
>>>> +char swsusp_pg_dir[PAGE_SIZE]
>>> thanks, applied.
> 
> Sorry, this is subtle and I've overlooked it before.
> 
> (I thought you were only changing ifdef).
> 
> Now you memcpy() over pg_dir when that pgdir is in use during swsusp
> resume. Granted, you memcpy() with same data that already are there,
> but it may still do some funny effects.
> 
> Hmm, but same argument applies to lower levels of paging in 64-bit and
> PAE cases, and we still do that memcpy-over-active-pagetables there...
> :-(.
> 

This really comes down to the concept that we should keep an 
identity-mapped page table set around and keep it maintained. 
Maintenance should be relatively cheap -- we don't care about the 
vmalloc area (but if it's easier to have it, it won't cause any harm), 
and we already have to have code to synchronize the PGDs on !PAE and the 
PMDs on Xen (although that was supposedly getting fixed).  This is 
nothing very different than synchronizing yet another PGD[*] offset.

This obviously relates to (and needs to be on top of) the 
always-native-pagetables work.

[*] = Almost.  There is one exception: for 3 GB kernel:1 GB userspace, 
we must ensure that only 1 GB of the kernel area is synced.

	-hpa



^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 19:51                                                         ` H. Peter Anvin
@ 2008-01-28 20:03                                                           ` Jeremy Fitzhardinge
  2008-01-28 20:06                                                             ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-28 20:03 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pavel Machek, Rafael J. Wysocki, Ingo Molnar, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

H. Peter Anvin wrote:
>  and we already have to have code to synchronize the PGDs on !PAE and 
> the PMDs on Xen (although that was supposedly getting fixed).

No, I don't have any plans there.  Xen will continue to require 
non-shared kernel pmd, at least for a 32-bit host.  I think the point is 
that nothing that requires an identity mapping will work under Xen 
anyway, so Xen just doesn't care about this case.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 20:03                                                           ` Jeremy Fitzhardinge
@ 2008-01-28 20:06                                                             ` H. Peter Anvin
  2008-01-28 20:28                                                               ` Rafael J. Wysocki
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-28 20:06 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Pavel Machek, Rafael J. Wysocki, Ingo Molnar, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Jeremy Fitzhardinge wrote:
> H. Peter Anvin wrote:
>>  and we already have to have code to synchronize the PGDs on !PAE and 
>> the PMDs on Xen (although that was supposedly getting fixed).
> 
> No, I don't have any plans there.  Xen will continue to require 
> non-shared kernel pmd, at least for a 32-bit host.  I think the point is 
> that nothing that requires an identity mapping will work under Xen 
> anyway, so Xen just doesn't care about this case.
> 

Still makes it a special case, not just for this.

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 19:40                                                       ` Pavel Machek
  2008-01-28 19:51                                                         ` H. Peter Anvin
@ 2008-01-28 20:26                                                         ` Rafael J. Wysocki
  2008-01-28 20:31                                                           ` H. Peter Anvin
  2008-01-28 20:44                                                           ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-28 20:26 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Monday, 28 of January 2008, Pavel Machek wrote:
> Hi!
> 
> > > >  /*
> > > >   * Swap suspend & friends need this for resume because things like the intel-agp
> > > >   * driver might have split up a kernel 4MB mapping.
> > > >   */
> > > > -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
> > > > +char swsusp_pg_dir[PAGE_SIZE]
> > > 
> > > thanks, applied.
> 
> Sorry, this is subtle and I've overlooked it before.
> 
> (I thought you were only changing ifdef).
> 
> Now you memcpy() over pg_dir when that pgdir is in use during swsusp
> resume.

It is not.  swsusp hasn't been using swsusp_pg_dir for several months.
Hence, the patch. :-)

> Granted, you memcpy() with same data that already are there, 
> but it may still do some funny effects.
> 
> Hmm, but same argument applies to lower levels of paging in 64-bit and
> PAE cases, and we still do that memcpy-over-active-pagetables there...
> :-(.

Actually, no.  We only do that with the kernel code mapping which should be
safe as long as TLBs are not flushed (and they aren't).

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 20:06                                                             ` H. Peter Anvin
@ 2008-01-28 20:28                                                               ` Rafael J. Wysocki
  0 siblings, 0 replies; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-28 20:28 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Jeremy Fitzhardinge, Pavel Machek, Ingo Molnar, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Monday, 28 of January 2008, H. Peter Anvin wrote:
> Jeremy Fitzhardinge wrote:
> > H. Peter Anvin wrote:
> >>  and we already have to have code to synchronize the PGDs on !PAE and 
> >> the PMDs on Xen (although that was supposedly getting fixed).
> > 
> > No, I don't have any plans there.  Xen will continue to require 
> > non-shared kernel pmd, at least for a 32-bit host.  I think the point is 
> > that nothing that requires an identity mapping will work under Xen 
> > anyway, so Xen just doesn't care about this case.
> > 
> 
> Still makes it a special case, not just for this.

In fact swsusp creates its own temporary page tables for restoring the last
part of the image.  Please have a look at
arch/x86/kernel/suspend_*_64.c and the files in arch/x86/power (most
importantly suspend.c).

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 20:26                                                         ` Rafael J. Wysocki
@ 2008-01-28 20:31                                                           ` H. Peter Anvin
  2008-01-28 20:59                                                             ` Rafael J. Wysocki
  2008-01-28 20:44                                                           ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-28 20:31 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Ingo Molnar, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Rafael J. Wysocki wrote:
> On Monday, 28 of January 2008, Pavel Machek wrote:
>> Hi!
>>
>>>>>  /*
>>>>>   * Swap suspend & friends need this for resume because things like the intel-agp
>>>>>   * driver might have split up a kernel 4MB mapping.
>>>>>   */
>>>>> -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
>>>>> +char swsusp_pg_dir[PAGE_SIZE]
>>>> thanks, applied.
>> Sorry, this is subtle and I've overlooked it before.
>>
>> (I thought you were only changing ifdef).
>>
>> Now you memcpy() over pg_dir when that pgdir is in use during swsusp
>> resume.
> 
> It is not.  swsusp hasn't been using swsusp_pg_dir for several months.
> Hence, the patch. :-)
> 
>> Granted, you memcpy() with same data that already are there, 
>> but it may still do some funny effects.
>>
>> Hmm, but same argument applies to lower levels of paging in 64-bit and
>> PAE cases, and we still do that memcpy-over-active-pagetables there...
>> :-(.
> 
> Actually, no.  We only do that with the kernel code mapping which should be
> safe as long as TLBs are not flushed (and they aren't).
> 

Okay... does that in any way affect using the kernel code mapping 
synchronization code to maintain a set of trampoline pagetables?

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 20:26                                                         ` Rafael J. Wysocki
  2008-01-28 20:31                                                           ` H. Peter Anvin
@ 2008-01-28 20:44                                                           ` Jeremy Fitzhardinge
  2008-01-28 20:50                                                             ` Rafael J. Wysocki
  1 sibling, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-28 20:44 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Ingo Molnar, H. Peter Anvin, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Rafael J. Wysocki wrote:
> Actually, no.  We only do that with the kernel code mapping which should be
> safe as long as TLBs are not flushed (and they aren't).
>   

Er, what?  Assuming the TLB will retain some mappings while you 
overwrite the pagetable is a highly dubious prospect.  Are you copying 
the same values over, or something else?

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 20:44                                                           ` Jeremy Fitzhardinge
@ 2008-01-28 20:50                                                             ` Rafael J. Wysocki
  2008-01-28 21:28                                                               ` H. Peter Anvin
  0 siblings, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-28 20:50 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Pavel Machek, Ingo Molnar, H. Peter Anvin, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Monday, 28 of January 2008, Jeremy Fitzhardinge wrote:
> Rafael J. Wysocki wrote:
> > Actually, no.  We only do that with the kernel code mapping which should be
> > safe as long as TLBs are not flushed (and they aren't).
> >   
> 
> Er, what?  Assuming the TLB will retain some mappings while you 
> overwrite the pagetable is a highly dubious prospect.  Are you copying 
> the same values over, or something else?

As long as a relocatable kernel is not used to restore a non-relocatable one
(or vice versa), we're copying the same values over.

Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 20:31                                                           ` H. Peter Anvin
@ 2008-01-28 20:59                                                             ` Rafael J. Wysocki
  0 siblings, 0 replies; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-28 20:59 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pavel Machek, Ingo Molnar, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Monday, 28 of January 2008, H. Peter Anvin wrote:
> Rafael J. Wysocki wrote:
> > On Monday, 28 of January 2008, Pavel Machek wrote:
> >> Hi!
> >>
> >>>>>  /*
> >>>>>   * Swap suspend & friends need this for resume because things like the intel-agp
> >>>>>   * driver might have split up a kernel 4MB mapping.
> >>>>>   */
> >>>>> -char __nosavedata swsusp_pg_dir[PAGE_SIZE]
> >>>>> +char swsusp_pg_dir[PAGE_SIZE]
> >>>> thanks, applied.
> >> Sorry, this is subtle and I've overlooked it before.
> >>
> >> (I thought you were only changing ifdef).
> >>
> >> Now you memcpy() over pg_dir when that pgdir is in use during swsusp
> >> resume.
> > 
> > It is not.  swsusp hasn't been using swsusp_pg_dir for several months.
> > Hence, the patch. :-)
> > 
> >> Granted, you memcpy() with same data that already are there, 
> >> but it may still do some funny effects.
> >>
> >> Hmm, but same argument applies to lower levels of paging in 64-bit and
> >> PAE cases, and we still do that memcpy-over-active-pagetables there...
> >> :-(.
> > 
> > Actually, no.  We only do that with the kernel code mapping which should be
> > safe as long as TLBs are not flushed (and they aren't).
> > 
> 
> Okay... does that in any way affect using the kernel code mapping 
> synchronization code to maintain a set of trampoline pagetables?

I really don't think so.

Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 20:50                                                             ` Rafael J. Wysocki
@ 2008-01-28 21:28                                                               ` H. Peter Anvin
  2008-01-28 22:02                                                                 ` Rafael J. Wysocki
  0 siblings, 1 reply; 80+ messages in thread
From: H. Peter Anvin @ 2008-01-28 21:28 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Jeremy Fitzhardinge, Pavel Machek, Ingo Molnar, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

Rafael J. Wysocki wrote:
> On Monday, 28 of January 2008, Jeremy Fitzhardinge wrote:
>> Rafael J. Wysocki wrote:
>>> Actually, no.  We only do that with the kernel code mapping which should be
>>> safe as long as TLBs are not flushed (and they aren't).
>>>   
>> Er, what?  Assuming the TLB will retain some mappings while you 
>> overwrite the pagetable is a highly dubious prospect.  Are you copying 
>> the same values over, or something else?
> 
> As long as a relocatable kernel is not used to restore a non-relocatable one
> (or vice versa), we're copying the same values over.
> 

So that case is deliberately considered broken?

	-hpa

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 21:28                                                               ` H. Peter Anvin
@ 2008-01-28 22:02                                                                 ` Rafael J. Wysocki
  0 siblings, 0 replies; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-01-28 22:02 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Jeremy Fitzhardinge, Pavel Machek, Ingo Molnar, Ian Campbell,
	Mika Penttilä,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Monday, 28 of January 2008, H. Peter Anvin wrote:
> Rafael J. Wysocki wrote:
> > On Monday, 28 of January 2008, Jeremy Fitzhardinge wrote:
> >> Rafael J. Wysocki wrote:
> >>> Actually, no.  We only do that with the kernel code mapping which should be
> >>> safe as long as TLBs are not flushed (and they aren't).
> >>>   
> >> Er, what?  Assuming the TLB will retain some mappings while you 
> >> overwrite the pagetable is a highly dubious prospect.  Are you copying 
> >> the same values over, or something else?
> > 
> > As long as a relocatable kernel is not used to restore a non-relocatable one
> > (or vice versa), we're copying the same values over.
> > 
> 
> So that case is deliberately considered broken?

Not deliberately, but the fix I had caused a regression.  It's just a pending
issue.

Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-01-28 17:02                                                       ` Rafael J. Wysocki
@ 2008-02-01 13:51                                                         ` Ingo Molnar
  2008-02-01 14:28                                                           ` Rafael J. Wysocki
  0 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-02-01 13:51 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttil?,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman,
	Len Brown


* Rafael J. Wysocki <rjw@sisk.pl> wrote:

> >  arch/x86/kernel/built-in.o: In function `wakeup_start':
> >  : undefined reference to `swsusp_pg_dir'
> > 
> > config attached.
> 
> I see.  CONFIG_HIBERNATION && CONFIG_ACPI -> CONFIG_ACPI_SLEEP and the 
> Makefile in arch/x86/kernel/acpi/ wants to build wakeup.S, which is 
> not necessary.  Hmm.
> 
> We can do a couple of things:
> (1) make wakeup_$(BITS).o depend on CONFIG_SUSPEND (alone)
>     This will build it if CONFIG_SUSPEND is set, but CONFIG_ACPI is not
>     (still, that's consistent with the change in question).
> (2) make wakeup_$(BITS).o depend on CONFIG_SUSPEND and CONFIG_ACPI
> (3) define CONFIG_ACPI_SUSPEND depending on ACPI and SUSPEND and
>     make wakeup_$(BITS).o as well as swsusp_pg_dir depend on that (most
>     elegant)
> 
> Which one do you prefer?

no strong preference here - pick the one you like best and send a patch 
please :-)

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-02-01 13:51                                                         ` Ingo Molnar
@ 2008-02-01 14:28                                                           ` Rafael J. Wysocki
  2008-02-01 14:54                                                             ` Ingo Molnar
  0 siblings, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-02-01 14:28 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pavel Machek, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttil?,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman,
	Len Brown

On Friday, 1 of February 2008, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> 
> > >  arch/x86/kernel/built-in.o: In function `wakeup_start':
> > >  : undefined reference to `swsusp_pg_dir'
> > > 
> > > config attached.
> > 
> > I see.  CONFIG_HIBERNATION && CONFIG_ACPI -> CONFIG_ACPI_SLEEP and the 
> > Makefile in arch/x86/kernel/acpi/ wants to build wakeup.S, which is 
> > not necessary.  Hmm.
> > 
> > We can do a couple of things:
> > (1) make wakeup_$(BITS).o depend on CONFIG_SUSPEND (alone)
> >     This will build it if CONFIG_SUSPEND is set, but CONFIG_ACPI is not
> >     (still, that's consistent with the change in question).
> > (2) make wakeup_$(BITS).o depend on CONFIG_SUSPEND and CONFIG_ACPI
> > (3) define CONFIG_ACPI_SUSPEND depending on ACPI and SUSPEND and
> >     make wakeup_$(BITS).o as well as swsusp_pg_dir depend on that (most
> >     elegant)
> > 
> > Which one do you prefer?
> 
> no strong preference here - pick the one you like best and send a patch 
> please :-)

Here you go, but I think it falls into the ACPI category.

---
From: Rafael J. Wysocki <rjw@sisk.pl>

Since hibernation uses its own temporary page tables for restoring
the image kernel, swsusp_pg_dir is only needed for ACPI resume from
RAM.  Also, some files under arch/x86/kernel/acpi need only be compiled
if ACPI suspend to RAM is going to be used.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/acpi/Makefile |    2 +-
 arch/x86/mm/init_32.c         |   10 +++++-----
 drivers/acpi/Kconfig          |    5 +++++
 3 files changed, 11 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/mm/init_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_32.c
+++ linux-2.6/arch/x86/mm/init_32.c
@@ -423,23 +423,23 @@ static void __init pagetable_init(void)
 	paravirt_pagetable_setup_done(pgd_base);
 }
 
-#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI_SUSPEND
 /*
- * Swap suspend & friends need this for resume because things like the intel-agp
+ * ACPI suspend needs this for resume, because things like the intel-agp
  * driver might have split up a kernel 4MB mapping.
  */
-char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+char swsusp_pg_dir[PAGE_SIZE]
 	__attribute__ ((aligned(PAGE_SIZE)));
 
 static inline void save_pg_dir(void)
 {
 	memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
 }
-#else
+#else /* !CONFIG_ACPI_SUSPEND */
 static inline void save_pg_dir(void)
 {
 }
-#endif
+#endif /* !CONFIG_ACPI_SUSPEND */
 
 void zap_low_mappings(void)
 {
Index: linux-2.6/drivers/acpi/Kconfig
===================================================================
--- linux-2.6.orig/drivers/acpi/Kconfig
+++ linux-2.6/drivers/acpi/Kconfig
@@ -42,6 +42,11 @@ menuconfig ACPI
 
 if ACPI
 
+config ACPI_SUSPEND
+	bool
+	depends on SUSPEND
+	default y
+
 config ACPI_SLEEP
 	bool
 	depends on PM_SLEEP
Index: linux-2.6/arch/x86/kernel/acpi/Makefile
===================================================================
--- linux-2.6.orig/arch/x86/kernel/acpi/Makefile
+++ linux-2.6/arch/x86/kernel/acpi/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_ACPI)		+= boot.o
-obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
+obj-$(CONFIG_ACPI_SUSPEND)	+= sleep.o wakeup_$(BITS).o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y				+= cstate.o processor.o

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-02-01 14:28                                                           ` Rafael J. Wysocki
@ 2008-02-01 14:54                                                             ` Ingo Molnar
  2008-02-01 22:55                                                               ` Len Brown
  0 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-02-01 14:54 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, H. Peter Anvin, Jeremy Fitzhardinge, Ian Campbell,
	Mika Penttil?,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman,
	Len Brown


* Rafael J. Wysocki <rjw@sisk.pl> wrote:

> > no strong preference here - pick the one you like best and send a 
> > patch please :-)
> 
> Here you go, but I think it falls into the ACPI category.

agreed - Len, would you mind to pick this patch up?

Acked-by: Ingo Molnar <mingo@elte.hu>
	
	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
  2008-02-01 14:54                                                             ` Ingo Molnar
@ 2008-02-01 22:55                                                               ` Len Brown
  0 siblings, 0 replies; 80+ messages in thread
From: Len Brown @ 2008-02-01 22:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rafael J. Wysocki, Pavel Machek, H. Peter Anvin,
	Jeremy Fitzhardinge, Ian Campbell, Mika Penttil?,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Eric W. Biederman

On Friday 01 February 2008 09:54, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> 
> > > no strong preference here - pick the one you like best and send a 
> > > patch please :-)
> > 
> > Here you go, but I think it falls into the ACPI category.
> 
> agreed - Len, would you mind to pick this patch up?

This won't work as written -- for the ACPI code doesn't currently optimize
for the HIBERNATE && ! SUSPEND case, and so both code paths are under ACPI_SLEEP.

While some day there may be a justification to make that optimization,
this isn't the day, and this isn't the patch.

So Rafael and I talked about it and decided to go with the simpler
patch below -- which I'll push.

thanks,
-Len

---
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index da524fb..f2f36f8 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -423,23 +423,23 @@ static void __init pagetable_init(void)
 	paravirt_pagetable_setup_done(pgd_base);
 }
 
-#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI_SLEEP
 /*
- * Swap suspend & friends need this for resume because things like the intel-agp
+ * ACPI suspend needs this for resume, because things like the intel-agp
  * driver might have split up a kernel 4MB mapping.
  */
-char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+char swsusp_pg_dir[PAGE_SIZE]
 	__attribute__ ((aligned(PAGE_SIZE)));
 
 static inline void save_pg_dir(void)
 {
 	memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
 }
-#else
+#else /* !CONFIG_ACPI_SLEEP */
 static inline void save_pg_dir(void)
 {
 }
-#endif
+#endif /* !CONFIG_ACPI_SLEEP */
 
 void zap_low_mappings(void)
 {

^ permalink raw reply	[flat|nested] 80+ messages in thread

end of thread, other threads:[~2008-02-01 22:57 UTC | newest]

Thread overview: 80+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-01-19 16:08 [PATCH] x86/voyager: Switch voyager memory detection to early_ioremap Ian Campbell
2008-01-19 16:08 ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
2008-01-19 23:07   ` Andi Kleen
2008-01-19 23:50     ` H. Peter Anvin
2008-01-20 16:44     ` Ian Campbell
2008-01-20 17:39       ` Andi Kleen
2008-01-20 18:48         ` H. Peter Anvin
2008-01-20 18:55           ` Andi Kleen
2008-01-20 18:54             ` H. Peter Anvin
2008-01-22 10:05           ` Ingo Molnar
2008-01-22 16:23             ` H. Peter Anvin
2008-01-20 18:30   ` Mika Penttilä
2008-01-21 21:23     ` Ian Campbell
2008-01-21 21:38       ` H. Peter Anvin
2008-01-21 21:46         ` Ian Campbell
2008-01-22  2:16           ` H. Peter Anvin
2008-01-22 17:36             ` Ian Campbell
2008-01-22 18:23               ` H. Peter Anvin
2008-01-22 19:48                 ` Ian Campbell
2008-01-22 20:00                   ` H. Peter Anvin
2008-01-22 20:36                     ` Ingo Molnar
2008-01-22 20:43                       ` H. Peter Anvin
2008-01-22 20:45                         ` Ingo Molnar
2008-01-22 20:52                       ` Ian Campbell
2008-01-22 21:00                         ` H. Peter Anvin
2008-01-22 22:21                           ` Ian Campbell
2008-01-22 21:00                       ` [PATCH] x86: make nx_enabled conditional on CONFIG_X86_PAE Harvey Harrison
2008-01-22 21:04                         ` Ingo Molnar
2008-01-22 21:35                           ` Harvey Harrison
2008-01-22 21:07                         ` Harvey Harrison
     [not found]                         ` <p73odbdlyiu.fsf@crumb.suse.de>
2008-01-23 11:21                           ` Harvey Harrison
2008-01-23 20:52                       ` [PATCH] x86: Construct 32 bit boot time page tables in native format Ian Campbell
2008-01-24  1:06                         ` Jeremy Fitzhardinge
2008-01-24  9:39                           ` Ian Campbell
2008-01-24 22:06                             ` H. Peter Anvin
2008-01-24 22:35                               ` Jeremy Fitzhardinge
2008-01-24 22:39                                 ` H. Peter Anvin
2008-01-24 22:58                                   ` Jeremy Fitzhardinge
2008-01-24 23:08                                     ` H. Peter Anvin
2008-01-24 23:40                                       ` Jeremy Fitzhardinge
2008-01-24 23:44                                         ` H. Peter Anvin
2008-01-24 23:51                                           ` Jeremy Fitzhardinge
2008-01-25  0:02                                             ` H. Peter Anvin
2008-01-25  0:11                                               ` Jeremy Fitzhardinge
2008-01-25  0:15                                                 ` H. Peter Anvin
2008-01-25  0:31                                                   ` Jeremy Fitzhardinge
2008-01-25  0:37                                                     ` H. Peter Anvin
2008-01-25  2:56                                                       ` Eric W. Biederman
2008-01-25  4:41                                                         ` Jeremy Fitzhardinge
2008-01-25 11:07                                                           ` Eric W. Biederman
2008-01-24 23:51                                         ` H. Peter Anvin
2008-01-25  0:20                                           ` Pavel Machek
2008-01-25  0:27                                             ` H. Peter Anvin
2008-01-25  0:46                                               ` Rafael J. Wysocki
2008-01-25  1:08                                                 ` H. Peter Anvin
2008-01-25  2:16                                               ` Eric W. Biederman
2008-01-25  2:25                                                 ` H. Peter Anvin
2008-01-25  7:49                                               ` Pavel Machek
2008-01-25 22:02                                                 ` Rafael J. Wysocki
2008-01-25 22:11                                                   ` Pavel Machek
2008-01-28 15:00                                                   ` Ingo Molnar
2008-01-28 15:25                                                     ` Rafael J. Wysocki
2008-01-28 19:40                                                       ` Pavel Machek
2008-01-28 19:51                                                         ` H. Peter Anvin
2008-01-28 20:03                                                           ` Jeremy Fitzhardinge
2008-01-28 20:06                                                             ` H. Peter Anvin
2008-01-28 20:28                                                               ` Rafael J. Wysocki
2008-01-28 20:26                                                         ` Rafael J. Wysocki
2008-01-28 20:31                                                           ` H. Peter Anvin
2008-01-28 20:59                                                             ` Rafael J. Wysocki
2008-01-28 20:44                                                           ` Jeremy Fitzhardinge
2008-01-28 20:50                                                             ` Rafael J. Wysocki
2008-01-28 21:28                                                               ` H. Peter Anvin
2008-01-28 22:02                                                                 ` Rafael J. Wysocki
2008-01-28 16:12                                                     ` Ingo Molnar
2008-01-28 17:02                                                       ` Rafael J. Wysocki
2008-02-01 13:51                                                         ` Ingo Molnar
2008-02-01 14:28                                                           ` Rafael J. Wysocki
2008-02-01 14:54                                                             ` Ingo Molnar
2008-02-01 22:55                                                               ` Len Brown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).