LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, virtualization@lists.osdl.org,
xen-devel@lists.xensource.com, Chris Wright <chris@sous-sol.org>,
Zachary Amsden <zach@vmware.com>, Andi Kleen <ak@muc.de>,
Rusty Russell <rusty@rustcorp.com.au>
Subject: [patch 04/20] XEN-paravirt: paravirt pagetable init
Date: Fri, 12 Jan 2007 17:45:43 -0800 [thread overview]
Message-ID: <20070113014647.487710773@goop.org> (raw)
In-Reply-To: <20070113014539.408244126@goop.org>
[-- Attachment #1: paravirt-memory-init.patch --]
[-- Type: text/plain, Size: 10884 bytes --]
Add paravirt hooks into the initial pagetable setup. In the native
case, the kernel builds itself a new initial pagetable from scratch.
In the Xen case, the kernel starts with a pagetable provided by the
hypervisor, which is used as the prototype for the kernel-generated
pagetable. The hooks added in this patch allow either mode of
operation without having special cases (the main change to the
pagetable construction logic is a testing to make sure a pagetable
slot is actually empty before populating it).
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chris@sous-sol.org>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
===================================================================
--- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@
-379,6 +379,43 @@ static fastcall void native_io_delay(voi
{
asm volatile("outb %al,$0x80");
}
+
+void native_pagetable_setup_start(pgd_t *base)
+{
+#ifdef CONFIG_X86_PAE
+ int i;
+
+ /*
+ * Init entries of the first-level page table to the
+ * zero page, if they haven't already been set up.
+ *
+ * In a normal native boot, we'll be running on a
+ * pagetable rooted in swapper_pg_dir, but not in PAE
+ * mode, so this will end up clobbering the mappings
+ * for the lower 24Mbytes of the address space,
+ * without affecting the kernel address space.
+ */
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ set_pgd(&base[i],
+ __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+ memset(&base[USER_PTRS_PER_PGD], 0, sizeof(pgd_t));
+#endif
+}
+
+void native_pagetable_setup_done(pgd_t *base)
+{
+#ifdef CONFIG_X86_PAE
+ /*
+ * Add low memory identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. In the non-PAE
+ * case we already have these mappings through head.S.
+ * All user-space mappings are explicitly cleared after
+ * SMP startup.
+ */
+ set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
+#endif
+}
+
static fastcall void native_flush_tlb(void)
{
@@ -627,6 +664,9 @@ struct paravirt_ops paravirt_ops = {
#endif
.set_lazy_mode = (void *)native_nop,
+ .pagetable_setup_start = native_pagetable_setup_start,
+ .pagetable_setup_done = native_pagetable_setup_done,
+
.flush_tlb_user = native_flush_tlb,
.flush_tlb_kernel = native_flush_tlb_global,
.flush_tlb_single = native_flush_tlb_single,
===================================================================
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -42,6 +42,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/paravirt.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -62,6 +63,8 @@ static pmd_t * __init one_md_table_init(
#ifdef CONFIG_X86_PAE
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ memset(pmd_table, 0, PAGE_SIZE);
+
paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
@@ -83,12 +86,11 @@ static pte_t * __init one_page_table_ini
{
if (pmd_none(*pmd)) {
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ memset(page_table, 0, PAGE_SIZE);
+
paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
- if (page_table != pte_offset_kernel(pmd, 0))
- BUG();
-
- return page_table;
+ BUG_ON(page_table != pte_offset_kernel(pmd, 0));
}
return pte_offset_kernel(pmd, 0);
@@ -119,7 +121,7 @@ static void __init page_table_range_init
pgd = pgd_base + pgd_idx;
for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
- if (pgd_none(*pgd))
+ if (!(pgd_val(*pgd) & _PAGE_PRESENT))
one_md_table_init(pgd);
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
@@ -158,7 +160,11 @@ static void __init kernel_physical_mappi
pfn = 0;
for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
- pmd = one_md_table_init(pgd);
+ if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+ pmd = one_md_table_init(pgd);
+ else
+ pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
+
if (pfn >= max_low_pfn)
continue;
for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
@@ -167,20 +173,26 @@ static void __init kernel_physical_mappi
/* Map with big pages if possible, otherwise create normal page tables. */
if (cpu_has_pse) {
unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-
- if (is_kernel_text(address) || is_kernel_text(address2))
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
- else
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+ if (!pmd_present(*pmd)) {
+ if (is_kernel_text(address) || is_kernel_text(address2))
+ set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+ else
+ set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+ }
pfn += PTRS_PER_PTE;
} else {
pte = one_page_table_init(pmd);
- for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
- if (is_kernel_text(address))
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
- else
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+ for (pte_ofs = 0;
+ pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
+ pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
+ if (pte_present(*pte))
+ continue;
+
+ if (is_kernel_text(address))
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ else
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
}
}
}
@@ -337,19 +349,32 @@ extern void __init remap_numa_kva(void);
#define remap_numa_kva() do {} while (0)
#endif
+/*
+ * Build a proper pagetable for the kernel mappings. Up until this
+ * point, we've been running on some set of pagetables constructed by
+ * the boot process.
+ *
+ * If we're booting on native hardware, this will be a pagetable
+ * constructed in arch/i386/kernel/head.S, and not running in PAE mode
+ * (even if we'll end up running in PAE). The root of the pagetable
+ * will be swapper_pg_dir.
+ *
+ * If we're booting paravirtualized under a hypervisor, then there are
+ * more options: we may already be running PAE, and the pagetable may
+ * or may not be based in swapper_pg_dir. In any case,
+ * paravirt_pagetable_setup_start() will set up swapper_pg_dir
+ * appropriately for the rest of the initialization to work.
+ *
+ * In general, pagetable_init() assumes that the pagetable may already
+ * be partially populated, and so it avoids stomping on any existing
+ * mappings.
+ */
static void __init pagetable_init (void)
{
- unsigned long vaddr;
+ unsigned long vaddr, end;
pgd_t *pgd_base = swapper_pg_dir;
-#ifdef CONFIG_X86_PAE
- int i;
- /* Init entries of the first-level page table to the zero page */
- for (i = 0; i < PTRS_PER_PGD; i++)
- set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-#else
- paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
-#endif
+ paravirt_pagetable_setup_start(pgd_base);
/* Enable PSE if available */
if (cpu_has_pse) {
@@ -371,20 +396,12 @@ static void __init pagetable_init (void)
* created - mappings will be set by set_fixmap():
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- page_table_range_init(vaddr, 0, pgd_base);
+ end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+ page_table_range_init(vaddr, end, pgd_base);
permanent_kmaps_init(pgd_base);
-#ifdef CONFIG_X86_PAE
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
-#endif
+ paravirt_pagetable_setup_done(pgd_base);
}
#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
===================================================================
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -49,6 +49,9 @@ struct paravirt_ops
void (*arch_setup)(void);
char *(*memory_setup)(void);
void (*init_IRQ)(void);
+
+ void (*pagetable_setup_start)(pgd_t *pgd_base);
+ void (*pagetable_setup_done)(pgd_t *pgd_base);
void (*banner)(void);
@@ -185,6 +188,8 @@ struct paravirt_ops
extern struct paravirt_ops paravirt_ops;
+void native_pagetable_setup_start(pgd_t *pgd);
+
#ifdef CONFIG_X86_PAE
fastcall unsigned long long native_pte_val(pte_t);
fastcall unsigned long long native_pmd_val(pmd_t);
@@ -389,6 +394,17 @@ static inline void setup_secondary_clock
}
#endif
+static inline void paravirt_pagetable_setup_start(pgd_t *base)
+{
+ if (paravirt_ops.pagetable_setup_start)
+ (*paravirt_ops.pagetable_setup_start)(base);
+}
+
+static inline void paravirt_pagetable_setup_done(pgd_t *base)
+{
+ if (paravirt_ops.pagetable_setup_done)
+ (*paravirt_ops.pagetable_setup_done)(base);
+}
fastcall void native_set_pte(pte_t *ptep, pte_t pteval);
fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval);
@@ -615,5 +631,43 @@ 772:; \
call *paravirt_ops+PARAVIRT_read_cr0
#endif /* __ASSEMBLY__ */
+#else /* !CONFIG_PARAVIRT */
+#include <asm/pgtable.h>
+
+static inline void paravirt_pagetable_setup_start(pgd_t *base)
+{
+#ifdef CONFIG_X86_PAE
+ int i;
+
+ /*
+ * Init entries of the first-level page table to the
+ * zero page, if they haven't already been set up.
+ *
+ * In a normal native boot, we'll be running on a
+ * pagetable rooted in swapper_pg_dir, but not in PAE
+ * mode, so this will end up clobbering the mappings
+ * for the lower 24Mbytes of the address space,
+ * without affecting the kernel address space.
+ */
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ set_pgd(&base[i],
+ __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+ memset(&base[USER_PTRS_PER_PGD], 0, sizeof(pgd_t));
+#endif
+}
+
+static inline void paravirt_pagetable_setup_done(pgd_t *base)
+{
+#ifdef CONFIG_X86_PAE
+ /*
+ * Add low memory identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. In the non-PAE
+ * case we already have these mappings through head.S.
+ * All user-space mappings are explicitly cleared after
+ * SMP startup.
+ */
+ set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
+#endif
+}
#endif /* CONFIG_PARAVIRT */
#endif /* __ASM_PARAVIRT_H */
===================================================================
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -15,7 +15,10 @@
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <linux/threads.h>
+
+#ifdef CONFIG_PARAVIRT /* guarded to prevent cyclic dependency */
#include <asm/paravirt.h>
+#endif
#ifndef _I386_BITOPS_H
#include <asm/bitops.h>
--
next prev parent reply other threads:[~2007-01-13 23:10 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-13 1:45 [patch 00/20] XEN-paravirt: Xen guest implementation for paravirt_ops interface Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 01/20] XEN-paravirt: Fix typo in sync_constant_test_bit()s name Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 02/20] XEN-paravirt: Add a flag to allow the VGA console to be disabled Jeremy Fitzhardinge
2007-01-14 0:27 ` [Xen-devel] " Alan
2007-01-13 1:45 ` [patch 03/20] XEN-paravirt: paravirt: page-table accessors Jeremy Fitzhardinge
2007-01-13 1:45 ` Jeremy Fitzhardinge [this message]
2007-01-13 1:45 ` [patch 05/20] XEN-paravirt: paravirt: reserve fixmap slot Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 06/20] XEN-paravirt: remove pgd ctor Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 07/20] XEN-paravirt: paravirt shared kernel pmd flag Jeremy Fitzhardinge
2007-01-15 8:59 ` [Xen-devel] " Jan Beulich
2007-01-13 1:45 ` [patch 08/20] XEN-paravirt: paravirt pgd allocation alignment Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 09/20] XEN-paravirt: dont export paravirt_ops structure, do individual functions Jeremy Fitzhardinge
2007-01-14 0:57 ` Rusty Russell
2007-01-13 1:45 ` [patch 10/20] XEN-paravirt: mm lifetime hooks Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 11/20] XEN-paravirt: Add apply_to_page_range() which applies a function to a pte range Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 12/20] XEN-paravirt: Xen: Add nosegneg capability to the vsyscall page notes Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 13/20] XEN-paravirt: Xen: Add config options and disable unsupported config options Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 15/20] XEN-paravirt: Xen: core paravirt guest changes Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 16/20] XEN-paravirt: Add the Xen virtual console driver Jeremy Fitzhardinge
2007-01-14 0:37 ` Alan
2007-01-14 0:35 ` Jeremy Fitzhardinge
2007-01-15 13:03 ` Pavel Machek
2007-01-19 4:11 ` Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 17/20] XEN-paravirt: Add Xen grant table support Jeremy Fitzhardinge
2007-01-15 13:05 ` Pavel Machek
2007-01-19 4:07 ` Jeremy Fitzhardinge
2007-01-13 1:45 ` [patch 18/20] XEN-paravirt: Add Xen driver utility functions Jeremy Fitzhardinge
2007-01-14 7:41 ` Greg KH
2007-01-13 1:45 ` [patch 19/20] XEN-paravirt: Add the Xenbus sysfs and virtual device hotplug driver Jeremy Fitzhardinge
2007-01-15 13:18 ` Pavel Machek
2007-01-13 1:45 ` [patch 20/20] XEN-paravirt: Add Xen virtual block device driver Jeremy Fitzhardinge
2007-01-14 1:07 ` Arjan van de Ven
2007-01-14 7:43 ` Greg KH
2007-01-16 2:53 ` [Xen-devel] " Mark Williamson
2007-01-14 11:05 ` Jan Engelhardt
2007-01-14 11:24 ` Muli Ben-Yehuda
2007-01-14 11:35 ` Jan Engelhardt
2007-01-14 12:37 ` Keir Fraser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070113014647.487710773@goop.org \
--to=jeremy@goop.org \
--cc=ak@muc.de \
--cc=akpm@osdl.org \
--cc=chris@sous-sol.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rusty@rustcorp.com.au \
--cc=virtualization@lists.osdl.org \
--cc=xen-devel@lists.xensource.com \
--cc=zach@vmware.com \
--subject='Re: [patch 04/20] XEN-paravirt: paravirt pagetable init' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).