LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Andi Kleen <ak@muc.de>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, virtualization@lists.osdl.org,
	xen-devel@lists.xensource.com, Chris Wright <chrisw@sous-sol.org>,
	Zachary Amsden <zach@vmware.com>
Subject: [patch 08/21] Xen-paravirt: Allow paravirt backend to choose kernel PMD sharing
Date: Thu, 15 Feb 2007 18:24:57 -0800	[thread overview]
Message-ID: <20070216022531.119986720@goop.org> (raw)
In-Reply-To: <20070216022449.739760547@goop.org>

[-- Attachment #1: shared-kernel-pmd.patch --]
[-- Type: text/plain, Size: 9672 bytes --]

Xen does not allow guests to have the kernel pmd shared between page
tables, so parameterize pgtable.c to allow both modes of operation.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
--
 arch/i386/kernel/paravirt.c            |    1 
 arch/i386/mm/fault.c                   |    6 +--
 arch/i386/mm/pageattr.c                |    2 -
 arch/i386/mm/pgtable.c                 |   61 +++++++++++++++++++------------
 include/asm-i386/page.h                |    7 ++-
 include/asm-i386/paravirt.h            |    1 
 include/asm-i386/pgtable-2level-defs.h |    2 +
 include/asm-i386/pgtable-2level.h      |    2 -
 include/asm-i386/pgtable-3level-defs.h |    6 +++
 include/asm-i386/pgtable-3level.h      |   16 ++------
 include/asm-i386/pgtable.h             |    7 +++
 11 files changed, 68 insertions(+), 43 deletions(-)

===================================================================
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -572,6 +572,7 @@ struct paravirt_ops paravirt_ops = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
 	.kernel_rpl = 0,
+	.shared_kernel_pmd = 1,	/* Only used when CONFIG_X86_PAE is set */
 
  	.patch = native_patch,
 	.banner = default_banner,
===================================================================
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -616,8 +616,7 @@ do_sigbus:
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 }
 
-#ifndef CONFIG_X86_PAE
-void vmalloc_sync_all(void)
+void _vmalloc_sync_all(void)
 {
 	/*
 	 * Note that races in the updates of insync and start aren't
@@ -628,6 +627,8 @@ void vmalloc_sync_all(void)
 	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
 	static unsigned long start = TASK_SIZE;
 	unsigned long address;
+
+	BUG_ON(SHARED_KERNEL_PMD);
 
 	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
 	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
@@ -651,4 +652,3 @@ void vmalloc_sync_all(void)
 			start = address + PGDIR_SIZE;
 	}
 }
-#endif
===================================================================
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, uns
 	unsigned long flags;
 
 	set_pte_atomic(kpte, pte); 	/* change init_mm */
-	if (PTRS_PER_PMD > 1)
+	if (SHARED_KERNEL_PMD)
 		return;
 
 	spin_lock_irqsave(&pgd_lock, flags);
===================================================================
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -241,31 +241,42 @@ static void pgd_ctor(pgd_t *pgd)
 	unsigned long flags;
 
 	if (PTRS_PER_PMD == 1) {
+		/* !PAE, no pagetable sharing */
 		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+
+		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
+				swapper_pg_dir + USER_PTRS_PER_PGD,
+				KERNEL_PGD_PTRS);
+
 		spin_lock_irqsave(&pgd_lock, flags);
-	}
-
-	clone_pgd_range(pgd + USER_PTRS_PER_PGD,
-			swapper_pg_dir + USER_PTRS_PER_PGD,
-			KERNEL_PGD_PTRS);
-
-	if (PTRS_PER_PMD > 1)
-		return;
-
-	/* must happen under lock */
-	paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-			__pa(swapper_pg_dir) >> PAGE_SHIFT,
-			USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
-
-	pgd_list_add(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
+
+		/* must happen under lock */
+		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+					__pa(swapper_pg_dir) >> PAGE_SHIFT,
+					USER_PTRS_PER_PGD,
+					PTRS_PER_PGD - USER_PTRS_PER_PGD);
+
+		pgd_list_add(pgd);
+		spin_unlock_irqrestore(&pgd_lock, flags);
+	} else {
+		/* PAE, PMD may be shared */
+		if (SHARED_KERNEL_PMD) {
+			clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+					swapper_pg_dir + USER_PTRS_PER_PGD,
+					KERNEL_PGD_PTRS);
+		} else {
+			spin_lock_irqsave(&pgd_lock, flags);
+			pgd_list_add(pgd);
+			spin_unlock_irqrestore(&pgd_lock, flags);
+		}
+	}
 }
 
 static void pgd_dtor(pgd_t *pgd)
 {
 	unsigned long flags; /* can be called from interrupt context */
 
-	if (PTRS_PER_PMD == 1)
+	if (SHARED_KERNEL_PMD)
 		return;
 
 	paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
@@ -279,19 +290,25 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	int i;
 	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
 
-	if (pgd)
+	if (likely(pgd))
 		pgd_ctor(pgd);
 
-	if (PTRS_PER_PMD == 1 || !pgd)
+	if (PTRS_PER_PMD == 1 || unlikely(!pgd))
 		return pgd;
 
-	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+	for (i = 0; i < (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD); ++i) {
 		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-		if (!pmd)
+		if (unlikely(!pmd))
 			goto out_oom;
+
+		if (i >= USER_PTRS_PER_PGD)
+			memcpy(pmd, (void *)pgd_page_vaddr(swapper_pg_dir[i]),
+			       sizeof(pmd_t) * PTRS_PER_PMD);
+
 		paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
 		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
 	}
+
 	return pgd;
 
 out_oom:
@@ -312,7 +329,7 @@ void pgd_free(pgd_t *pgd)
 
 	/* in the PAE case user pgd entries are overwritten before usage */
 	if (PTRS_PER_PMD > 1)
-		for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+		for (i = 0; i < (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD); ++i) {
 			pgd_t pgdent = pgd[i];
 			void* pmd = (void *)__va(pgd_val(pgdent)-1);
 			paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
===================================================================
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -50,21 +50,23 @@ typedef struct { unsigned long long pgpr
 #ifndef CONFIG_PARAVIRT
 #define pmd_val(x)	((x).pmd)
 #define pte_val(x)	((x).pte_low | ((unsigned long long)(x).pte_high << 32))
+#define __pte(x) ((pte_t) { .pte_low = (x), .pte_high = ((x) >> 32) } )
 #define __pmd(x) ((pmd_t) { (x) } )
 #endif	/* CONFIG_PARAVIRT */
 #define HPAGE_SHIFT	21
 #include <asm-generic/pgtable-nopud.h>
-#else
+#else  /* !CONFIG_X86_PAE */
 typedef struct { unsigned long pte_low; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 #define boot_pte_t pte_t /* or would you rather have a typedef */
 #ifndef CONFIG_PARAVIRT
+#define __pte(x) ((pte_t) { (x) })
 #define pte_val(x)	((x).pte_low)
 #endif
 #define HPAGE_SHIFT	22
 #include <asm-generic/pgtable-nopmd.h>
-#endif
+#endif	/* CONFIG_X86_PAE */
 #define PTE_MASK	PAGE_MASK
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -79,7 +81,6 @@ typedef struct { unsigned long pgprot; }
 
 #ifndef CONFIG_PARAVIRT
 #define pgd_val(x)	((x).pgd)
-#define __pte(x) ((pte_t) { (x) } )
 #define __pgd(x) ((pgd_t) { (x) } )
 #endif
 
===================================================================
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -34,6 +34,7 @@ struct paravirt_ops
 struct paravirt_ops
 {
 	unsigned int kernel_rpl;
+	int shared_kernel_pmd;
  	int paravirt_enabled;
 	const char *name;
 
===================================================================
--- a/include/asm-i386/pgtable-2level-defs.h
+++ b/include/asm-i386/pgtable-2level-defs.h
@@ -1,5 +1,7 @@
 #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
 #define _I386_PGTABLE_2LEVEL_DEFS_H
+
+#define SHARED_KERNEL_PMD	0
 
 /*
  * traditional i386 two-level paging structure:
===================================================================
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -66,6 +66,4 @@ static inline int pte_exec_kernel(pte_t 
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
-void vmalloc_sync_all(void);
-
 #endif /* _I386_PGTABLE_2LEVEL_H */
===================================================================
--- a/include/asm-i386/pgtable-3level-defs.h
+++ b/include/asm-i386/pgtable-3level-defs.h
@@ -1,5 +1,11 @@
 #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
 #define _I386_PGTABLE_3LEVEL_DEFS_H
+
+#ifdef CONFIG_PARAVIRT
+#define SHARED_KERNEL_PMD	(paravirt_ops.shared_kernel_pmd)
+#else
+#define SHARED_KERNEL_PMD	1
+#endif
 
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
===================================================================
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -153,20 +153,14 @@ extern unsigned long long __supported_pt
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
-	pte_t pte;
-
-	pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \
-					(pgprot_val(pgprot) >> 32);
-	pte.pte_high &= (__supported_pte_mask >> 32);
-	pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \
-							__supported_pte_mask;
-	return pte;
+	return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | 
+		      pgprot_val(pgprot)) & __supported_pte_mask);
 }
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \
-			pgprot_val(pgprot)) & __supported_pte_mask);
+	return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
+		      pgprot_val(pgprot)) & __supported_pte_mask);
 }
 
 /*
@@ -186,6 +180,4 @@ static inline pmd_t pfn_pmd(unsigned lon
 
 #define __pmd_free_tlb(tlb, x)		do { } while (0)
 
-#define vmalloc_sync_all() ((void)0)
-
 #endif /* _I386_PGTABLE_3LEVEL_H */
===================================================================
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -239,6 +239,13 @@ static inline pte_t pte_mkwrite(pte_t pt
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
 static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= _PAGE_PSE; return pte; }
 
+extern void _vmalloc_sync_all(void);
+static inline void vmalloc_sync_all(void)
+{
+	if (!SHARED_KERNEL_PMD)
+		_vmalloc_sync_all();
+}
+
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else

-- 


  parent reply	other threads:[~2007-02-16  4:45 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-02-16  2:24 [patch 00/21] Xen-paravirt: Xen guest implementation for paravirt_ops interface Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 01/21] Xen-paravirt: Fix typo in sync_constant_test_bit()s name Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 02/21] Xen-paravirt: ignore vgacon if hardware not present Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 03/21] Xen-paravirt: Add pagetable accessors to pack and unpack pagetable entries Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 04/21] Xen-paravirt: =================================================================== Jeremy Fitzhardinge
2007-02-16  9:12   ` Andi Kleen
2007-02-16 17:32     ` Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 05/21] Xen-paravirt: paravirt_ops: hooks to set up initial pagetable Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 06/21] Xen-paravirt: paravirt_ops: allocate a fixmap slot Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 07/21] Xen-paravirt: remove ctor for pgd cache Jeremy Fitzhardinge
2007-02-16  6:04   ` Zachary Amsden
2007-02-16  8:39   ` Pekka Enberg
2007-02-16  9:19     ` Nick Piggin
2007-02-16 17:19       ` Jeremy Fitzhardinge
2007-02-16 20:33   ` Christoph Lameter
2007-02-16 23:33     ` Jeremy Fitzhardinge
2007-02-16  2:24 ` Jeremy Fitzhardinge [this message]
2007-02-16  2:24 ` [patch 09/21] Xen-paravirt: Allow paravirt backend to select PGD allocation alignment Jeremy Fitzhardinge
2007-02-16  2:24 ` [patch 10/21] Xen-paravirt: add hooks to intercept mm creation and destruction Jeremy Fitzhardinge
2007-02-16  6:34   ` Andrew Morton
2007-02-16  6:55     ` Jeremy Fitzhardinge
2007-02-22 20:16   ` Pavel Machek
2007-02-22 20:21     ` Jeremy Fitzhardinge
2007-02-22 20:23     ` Jeremy Fitzhardinge
2007-02-16  2:25 ` [patch 11/21] Xen-paravirt: Add apply_to_page_range() which applies a function to a pte range Jeremy Fitzhardinge
2007-02-16  6:37   ` Andrew Morton
2007-02-16  7:06     ` Jeremy Fitzhardinge
2007-02-16  7:19       ` Andrew Morton
2007-02-16  7:23         ` Jeremy Fitzhardinge
2007-02-16  7:31     ` Nick Piggin
2007-02-16  2:25 ` [patch 12/21] Xen-paravirt: Allocate and free vmalloc areas Jeremy Fitzhardinge
2007-02-16  6:43   ` Andrew Morton
2007-02-16  7:08     ` Jeremy Fitzhardinge
2007-02-16  7:24       ` Andrew Morton
2007-02-16  7:30         ` Jeremy Fitzhardinge
2007-02-16  7:49           ` Andrew Morton
2007-02-16  9:18           ` Andi Kleen
2007-02-16 11:10             ` [Xen-devel] " Keir Fraser
2007-02-16 11:34               ` Andi Kleen
2007-02-16 16:46             ` Jeremy Fitzhardinge
2007-02-16 17:10               ` [Xen-devel] " Keir Fraser
2007-02-16 17:12                 ` Keir Fraser
2007-02-16 17:27                   ` Jeremy Fitzhardinge
2007-02-16 19:06                     ` Keir Fraser
2007-02-16 19:19                       ` Keir Fraser
2007-02-16 19:26                         ` Jeremy Fitzhardinge
2007-02-16 23:29                           ` Keir Fraser
2007-02-16 23:41                             ` Jeremy Fitzhardinge
2007-02-16 17:26                 ` Hollis Blanchard
2007-02-16  7:48         ` Nick Piggin
2007-02-16  2:25 ` [patch 13/21] Xen-paravirt: Add nosegneg capability to the vsyscall page notes Jeremy Fitzhardinge
2007-02-16  6:06   ` Zachary Amsden
2007-02-16  2:25 ` [patch 14/21] Xen-paravirt: Add XEN config options and disable unsupported config options Jeremy Fitzhardinge
2007-02-16  6:14   ` Dan Hecht
2007-02-16  7:04     ` Jeremy Fitzhardinge
2007-02-16  7:52       ` Dan Hecht
2007-02-16  8:05         ` Jeremy Fitzhardinge
2007-02-16  8:37           ` Dan Hecht
2007-02-16 10:49             ` Keir Fraser
2007-02-16  7:06     ` Andrew Morton
2007-02-16  7:25       ` Jeremy Fitzhardinge
2007-02-16 10:09         ` Keir Fraser
2007-02-16 10:19           ` [Xen-devel] " Zachary Amsden
2007-02-16 10:44             ` Keir Fraser
2007-02-16 10:55               ` Zachary Amsden
2007-02-17  4:04             ` Rusty Russell
2007-02-16  6:15   ` Zachary Amsden
2007-02-16  7:33     ` Eric W. Biederman
2007-02-16  8:14       ` Ian Campbell
2007-02-16  8:24         ` Eric W. Biederman
2007-02-16  8:31           ` Zachary Amsden
2007-02-16 13:53       ` [Xen-devel] " Gerd Hoffmann
2007-02-18 11:32     ` Avi Kivity
2007-02-16  2:25 ` [patch 16/21] Xen-paravirt: Core Xen implementation Jeremy Fitzhardinge
2007-02-16  2:25 ` [patch 17/21] Xen-paravirt: Add the Xen virtual console driver Jeremy Fitzhardinge
2007-02-16  6:48   ` Andrew Morton
2007-02-16  2:25 ` [patch 18/21] Xen-paravirt: Add Xen grant table support Jeremy Fitzhardinge
2007-02-16  6:52   ` Andrew Morton
2007-02-16  6:58     ` Jeremy Fitzhardinge
2007-02-16  2:25 ` [patch 19/21] Xen-paravirt: Add the Xenbus sysfs and virtual device hotplug driver Jeremy Fitzhardinge
2007-02-16  2:25 ` [patch 20/21] Xen-paravirt: Add Xen virtual block device driver Jeremy Fitzhardinge
2007-02-16  2:25 ` [patch 21/21] Xen-paravirt: Add the Xen virtual network " Jeremy Fitzhardinge
2007-02-16  6:59 ` [patch 00/21] Xen-paravirt: Xen guest implementation for paravirt_ops interface Andrew Morton
2007-02-16  7:20   ` Jeremy Fitzhardinge
2007-02-16 20:49 ` Christoph Lameter
2007-02-16 21:04   ` Zachary Amsden
2007-02-16 21:13     ` Christoph Lameter
2007-02-16 21:48       ` Zachary Amsden
2007-02-16 21:59         ` Christoph Lameter
2007-02-16 22:10           ` Zachary Amsden
2007-02-17 13:51           ` Andi Kleen
2007-02-21 18:37             ` Christoph Lameter
2007-02-21 18:55               ` Zachary Amsden
2007-02-21 20:02               ` Jeremy Fitzhardinge
2007-02-17  5:05         ` Rusty Russell
2007-02-16 23:49   ` Jeremy Fitzhardinge
2007-02-17  4:58   ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070216022531.119986720@goop.org \
    --to=jeremy@goop.org \
    --cc=ak@muc.de \
    --cc=akpm@linux-foundation.org \
    --cc=chrisw@sous-sol.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=virtualization@lists.osdl.org \
    --cc=xen-devel@lists.xensource.com \
    --cc=zach@vmware.com \
    --subject='Re: [patch 08/21] Xen-paravirt: Allow paravirt backend to choose kernel PMD sharing' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).