LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: venkatesh.pallipadi@intel.com
To: ak@muc.de, ebiederm@xmission.com, rdreier@cisco.com,
	torvalds@linux-foundation.org, gregkh@suse.de, airlied@skynet.ie,
	davej@redhat.com, mingo@elte.hu, tglx@linutronix.de,
	hpa@zytor.com, akpm@linux-foundation.org, arjan@infradead.org,
	jesse.barnes@intel.com, davem@davemloft.net
Cc: linux-kernel@vger.kernel.org,
	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>,
	Suresh Siddha <suresh.b.siddha@intel.com>
Subject: [patch 02/11] PAT x86: Map only usable memory in x86_64 identity map and kernel text
Date: Thu, 10 Jan 2008 10:48:42 -0800	[thread overview]
Message-ID: <20080110184854.787474000@intel.com> (raw)
In-Reply-To: <20080110184840.927409000@intel.com>

[-- Attachment #1: usable_only_map.patch --]
[-- Type: text/plain, Size: 12057 bytes --]

x86_64: Map only usable memory in identity map. All reserved memory maps to a
zero page. This is done later during the boot process, by pruning the
page table setup earlier to remove mappings for the reserved region. Prune
done after mem_init, so we can allocate pages as needed and before APs start.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>

Index: linux-2.6.git/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/e820_64.c	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/arch/x86/kernel/e820_64.c	2008-01-08 04:00:59.000000000 -0800
@@ -121,6 +121,35 @@
 }
 EXPORT_SYMBOL_GPL(e820_any_mapped);
 
+int e820_any_non_reserved(unsigned long start, unsigned long end)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (ei->type == E820_RESERVED)
+			continue;
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(e820_any_non_reserved);
+
+int is_memory_any_valid(unsigned long start, unsigned long end)
+{
+	/*
+	 * Keep low PCI/ISA area always mapped.
+	 * Note: end address is exclusive and start is inclusive here
+	 */
+	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS)
+		return 1;
+
+	/* Switch to efi or e820 in future here */
+	return e820_any_non_reserved(start, end);
+}
+EXPORT_SYMBOL_GPL(is_memory_any_valid);
+
 /*
  * This function checks if the entire range <start,end> is mapped with type.
  *
@@ -156,6 +185,47 @@
 	return 0;
 }
 
+int e820_all_non_reserved(unsigned long start, unsigned long end)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (ei->type == E820_RESERVED)
+			continue;
+
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		/*
+		 * if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+
+		/* if start is at or beyond end, we're done, full coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(e820_all_non_reserved);
+
+int is_memory_all_valid(unsigned long start, unsigned long end)
+{
+	/*
+	 * Keep low PCI/ISA area always mapped.
+ 	 * Note: end address is exclusive and start is inclusive here
+	 */
+	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS)
+		return 1;
+
+	/* Switch to efi or e820 in future here */
+	return e820_all_non_reserved(start, end);
+}
+EXPORT_SYMBOL_GPL(is_memory_all_valid);
+
 /*
  * Find a free area in a specific range.
  */
Index: linux-2.6.git/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/init_64.c	2008-01-08 03:43:46.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/init_64.c	2008-01-08 03:59:28.000000000 -0800
@@ -215,8 +215,9 @@
 	int i, pmds;
 
 	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
-	vaddr = __START_KERNEL_map;
-	pmd = level2_kernel_pgt;
+	/* Skip PMDs meant for kernel text */
+	vaddr = __START_KERNEL_map + KERNEL_TEXT_SIZE;
+	pmd = level2_kernel_pgt + (KERNEL_TEXT_SIZE / PMD_SIZE);
 	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
 	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
 		for (i = 0; i < pmds; i++) {
@@ -299,11 +300,6 @@
 		if (addr >= end)
 			break;
 
-		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
-			set_pud(pud, __pud(0)); 
-			continue;
-		} 
-
 		if (pud_val(*pud)) {
 			phys_pmd_update(pud, addr, end);
 			continue;
@@ -344,6 +340,8 @@
 		(table_start << PAGE_SHIFT) + tables);
 }
 
+static unsigned long max_addr;
+
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
    This runs before bootmem is initialized and gets pages directly from the 
    physical memory. To access them they are temporarily mapped. */
@@ -370,10 +368,13 @@
 		pgd_t *pgd = pgd_offset_k(start);
 		pud_t *pud;
 
-		if (after_bootmem)
+		if (after_bootmem) {
 			pud = pud_offset(pgd, start & PGDIR_MASK);
-		else
+		} else {
 			pud = alloc_low_page(&pud_phys);
+			if (end > max_addr)
+				max_addr = end;
+		}
 
 		next = start + PGDIR_SIZE;
 		if (next > end) 
@@ -489,6 +490,187 @@
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
 			 kcore_vsyscall;
 
+
+static unsigned long __init get_res_page(void)
+{
+	static unsigned long res_phys_page;
+	if (!res_phys_page) {
+		pte_t *pte;
+		pte = alloc_low_page(&res_phys_page);
+		unmap_low_page(pte);
+	}
+	return res_phys_page;
+}
+
+static unsigned long __init get_res_ptepage(void)
+{
+	static unsigned long res_phys_ptepage;
+	if (!res_phys_ptepage) {
+		pte_t *pte_page;
+		unsigned long page_phys;
+		unsigned long entry;
+		int i;
+
+		pte_page = alloc_low_page(&res_phys_ptepage);
+
+		page_phys = get_res_page();
+		entry = _PAGE_NX | _KERNPG_TABLE | _PAGE_GLOBAL | page_phys;
+		entry &= __supported_pte_mask;
+		for (i = 0; i < PTRS_PER_PTE; i++) {
+			pte_t *pte = pte_page + i;
+			set_pte(pte, __pte(entry));
+		}
+
+		unmap_low_page(pte_page);
+	}
+	return res_phys_ptepage;
+}
+
+static void __init phys_pte_prune(pte_t *pte_page, unsigned long address,
+		unsigned long end, unsigned long vaddr, unsigned int exec)
+{
+	int i = pte_index(vaddr);
+
+	for (; i < PTRS_PER_PTE; i++, address = (address & PAGE_MASK) + PAGE_SIZE, vaddr = (vaddr + PAGE_MASK) + PAGE_SIZE) {
+		unsigned long entry;
+		pte_t *pte = pte_page + i;
+
+		if (address >= end)
+			break;
+
+		if (pte_val(*pte))
+			continue;
+
+		/* Nothing to map. Map the null page */
+		if (!(address & (~PAGE_MASK)) &&
+		    (address + PAGE_SIZE <= end) &&
+		    !is_memory_any_valid(address, address + PAGE_SIZE)) {
+			unsigned long phys_page;
+
+			phys_page = get_res_page();
+			entry = _PAGE_NX | _KERNPG_TABLE | _PAGE_GLOBAL |
+				phys_page;
+
+			entry &= __supported_pte_mask;
+			set_pte(pte, __pte(entry));
+
+			continue;
+		}
+
+		if (exec)
+			entry = _PAGE_NX|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+		else
+			entry = _KERNPG_TABLE|_PAGE_GLOBAL|address;
+		entry &= __supported_pte_mask;
+		set_pte(pte, __pte(entry));
+	}
+}
+
+static void __init phys_pmd_prune(pmd_t *pmd_page, unsigned long address,
+		unsigned long end, unsigned long vaddr, unsigned int exec)
+{
+	int i = pmd_index(vaddr);
+
+	for (; i < PTRS_PER_PMD; i++, address = (address & PMD_MASK) + PMD_SIZE,
+			vaddr = (vaddr & PMD_MASK) + PMD_SIZE) {
+		pmd_t *pmd = pmd_page + i;
+		pte_t *pte;
+		unsigned long pte_phys;
+
+		if (address >= end)
+			break;
+
+		if (!pmd_val(*pmd))
+			continue;
+
+		/* Nothing to map. Map the null page */
+		if (!(address & (~PMD_MASK)) &&
+		    (address + PMD_SIZE <= end) &&
+		    !is_memory_any_valid(address, address + PMD_SIZE)) {
+
+			pte_phys = get_res_ptepage();
+			set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+
+			continue;
+		}
+
+		/* Map with 2M pages */
+		if (is_memory_all_valid(address, address + PUD_SIZE)) {
+			/* Init already done */
+			continue;
+		}
+
+		/* Map with 4k pages */
+		pte = alloc_low_page(&pte_phys);
+		phys_pte_prune(pte, address, address + PMD_SIZE, vaddr, exec);
+		set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+		unmap_low_page(pte);
+
+	}
+}
+
+static void __init phys_pud_prune(pud_t *pud_page, unsigned long addr,
+	       unsigned long end, unsigned long vaddr, unsigned int exec)
+{
+	int i = pud_index(vaddr);
+
+	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE,
+			vaddr = (vaddr & PUD_MASK) + PUD_SIZE) {
+		pud_t *pud = pud_page + i;
+
+		if (addr >= end)
+			break;
+
+		if (pud_val(*pud)) {
+			pmd_t *pmd = pmd_offset(pud,0);
+			phys_pmd_prune(pmd, addr, end, vaddr, exec);
+		}
+	}
+}
+
+void __init prune_reserved_region_maps(void)
+{
+	unsigned long start, end, next;
+
+	/* Prune physical memory identity map */
+	start = (unsigned long)__va(0);
+	end = max_addr;
+	for (; start < end; start = next) {
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		pud = pud_offset(pgd, start & PGDIR_MASK);
+
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+
+		phys_pud_prune(pud, __pa(start), __pa(next), start, 0);
+	}
+
+	/* Prune kernel text region */
+	start = (unsigned long)KERNEL_TEXT_START;
+	end = start + (unsigned long)KERNEL_TEXT_SIZE;
+	for (; start < end; start = next) {
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		pud = pud_offset(pgd, start & PGDIR_MASK);
+
+		next = (start & PGDIR_MASK) + (unsigned long)PGDIR_SIZE;
+		if (!next || next > end)
+			next = end;
+
+		phys_pud_prune(pud,
+		               start - (unsigned long)KERNEL_TEXT_START,
+		               next - (unsigned long)KERNEL_TEXT_START,
+			       start,
+			       1);
+	}
+
+	__flush_tlb();
+}
+
 void __init mem_init(void)
 {
 	long codesize, reservedpages, datasize, initsize;
@@ -538,6 +720,8 @@
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10);
+
+	prune_reserved_region_maps();
 }
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
Index: linux-2.6.git/arch/x86/mm/ioremap_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/ioremap_64.c	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/ioremap_64.c	2008-01-08 03:59:28.000000000 -0800
@@ -19,6 +19,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/proto.h>
+#include <asm/e820.h>
 
 unsigned long __phys_addr(unsigned long x)
 {
@@ -28,9 +29,6 @@
 }
 EXPORT_SYMBOL(__phys_addr);
 
-#define ISA_START_ADDRESS      0xa0000
-#define ISA_END_ADDRESS                0x100000
-
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
Index: linux-2.6.git/arch/x86/mm/pageattr_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/pageattr_64.c	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/pageattr_64.c	2008-01-08 04:03:33.000000000 -0800
@@ -53,9 +53,11 @@
 	/*
 	 * page_private is used to track the number of entries in
 	 * the page table page have non standard attributes.
+	 * Count of 1 indicates page split by split_large_page(),
+	 * additional count indicates the number of pages with non-std attr.
 	 */
 	SetPagePrivate(base);
-	page_private(base) = 0;
+	page_private(base) = 1;
 
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
@@ -176,11 +178,8 @@
 			BUG();
 	}
 
-	/* on x86-64 the direct mapping set at boot is not using 4k pages */
-	BUG_ON(PageReserved(kpte_page));
-
 	save_page(kpte_page);
-	if (page_private(kpte_page) == 0)
+	if (page_private(kpte_page) == 1)
 		revert_page(address, ref_prot);
 	return 0;
 }
Index: linux-2.6.git/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.git.orig/include/asm-x86/e820_64.h	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/include/asm-x86/e820_64.h	2008-01-08 03:59:28.000000000 -0800
@@ -26,6 +26,10 @@
 extern void e820_mark_nosave_regions(void);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_any_valid(unsigned long start, unsigned long end);
+extern int e820_all_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_all_valid(unsigned long start, unsigned long end);
 extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
 
 extern void e820_setup_gap(void);
@@ -38,6 +42,10 @@
 
 extern unsigned ebda_addr, ebda_size;
 extern unsigned long nodemap_addr, nodemap_size;
+
+#define ISA_START_ADDRESS	0xa0000
+#define ISA_END_ADDRESS		0x100000
+
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/

-- 

  parent reply	other threads:[~2008-01-10 18:50 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-10 18:48 [patch 00/11] PAT x86: PAT support for x86 venkatesh.pallipadi
2008-01-10 18:48 ` [patch 01/11] PAT x86: Make acpi/other drivers map memory instead of assuming identity map venkatesh.pallipadi
2008-01-10 18:48 ` venkatesh.pallipadi [this message]
2008-01-10 19:06   ` [patch 02/11] PAT x86: Map only usable memory in x86_64 identity map and kernel text Andi Kleen
2008-01-10 19:17     ` Pallipadi, Venkatesh
2008-01-10 19:28       ` Andi Kleen
2008-01-10 20:50         ` Pallipadi, Venkatesh
2008-01-10 21:16           ` Andi Kleen
2008-01-10 22:25             ` Pallipadi, Venkatesh
2008-01-10 22:35               ` Andi Kleen
2008-01-14 16:43           ` Ingo Molnar
2008-01-14 21:21             ` Siddha, Suresh B
2008-01-14 21:28               ` Andi Kleen
2008-01-15 22:17               ` Ingo Molnar
2008-01-15 23:11                 ` Andi Kleen
2008-01-15 23:21                 ` Siddha, Suresh B
2008-01-18 12:01                   ` Ingo Molnar
2008-01-18 13:12                     ` Andi Kleen
2008-01-18 16:46                       ` Jesse Barnes
2008-01-18 18:12                         ` Andi Kleen
2008-01-18 19:02                           ` Jesse Barnes
2008-01-19  2:42                             ` Andi Kleen
2008-01-10 21:05   ` Linus Torvalds
2008-01-10 21:57     ` Pallipadi, Venkatesh
2008-01-10 22:15       ` Linus Torvalds
2008-01-10 22:27         ` Pallipadi, Venkatesh
2008-01-10 22:50         ` Valdis.Kletnieks
2008-01-18 18:27           ` Dave Jones
2008-01-18 20:54             ` Ingo Molnar
2008-01-10 18:48 ` [patch 03/11] PAT x86: Map only usable memory in i386 identity map venkatesh.pallipadi
2008-01-10 19:10   ` Andi Kleen
2008-01-10 18:48 ` [patch 04/11] PAT x86: Basic PAT implementation venkatesh.pallipadi
2008-01-10 18:48 ` [patch 05/11] PAT x86: drm driver changes for PAT venkatesh.pallipadi
2008-01-10 18:48 ` [patch 06/11] PAT x86: Refactoring i386 cpa venkatesh.pallipadi
2008-01-10 19:00   ` Andi Kleen
2008-01-14 16:47     ` Ingo Molnar
2008-01-10 18:48 ` [patch 07/11] PAT x86: pat-conflict resolution using linear list venkatesh.pallipadi
2008-01-10 19:13   ` Andi Kleen
2008-01-10 20:08     ` Pallipadi, Venkatesh
2008-01-10 18:48 ` [patch 08/11] PAT x86: pci mmap conlfict patch venkatesh.pallipadi
2008-01-10 18:48 ` [patch 09/11] PAT x86: Add ioremap_wc support venkatesh.pallipadi
2008-01-10 19:08   ` Andi Kleen
2008-01-10 19:25     ` Pallipadi, Venkatesh
2008-01-12  0:18       ` Roland Dreier
2008-01-10 18:48 ` [patch 10/11] PAT x86: Handle /dev/mem mappings venkatesh.pallipadi
2008-01-10 18:48 ` [patch 11/11] PAT x86: Expose uc and wc interfaces in /sysfs vor pci_mmap_resource venkatesh.pallipadi
2008-01-10 19:43   ` Greg KH
2008-01-10 20:54     ` [patch 11/11] PAT x86: Expose uc and wc interfaces in /sysfsvor pci_mmap_resource Pallipadi, Venkatesh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080110184854.787474000@intel.com \
    --to=venkatesh.pallipadi@intel.com \
    --cc=airlied@skynet.ie \
    --cc=ak@muc.de \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@infradead.org \
    --cc=davej@redhat.com \
    --cc=davem@davemloft.net \
    --cc=ebiederm@xmission.com \
    --cc=gregkh@suse.de \
    --cc=hpa@zytor.com \
    --cc=jesse.barnes@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rdreier@cisco.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --subject='Re: [patch 02/11] PAT x86: Map only usable memory in x86_64 identity map and kernel text' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).