LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 1/11] Fix CONFIG_COMPAT_VDSO
@ 2007-01-14  5:31 Roland McGrath
  2007-01-14  5:33 ` [PATCH 2/11] Fix gate_vma.vm_flags Roland McGrath
                   ` (10 more replies)
  0 siblings, 11 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


I wouldn't mind if CONFIG_COMPAT_VDSO went away entirely.
But if it's there, it should work properly.  Currently
it's quite haphazard: both real vma and fixmap are
mapped, both are put in the two different AT_* slots,
sysenter returns to the vma address rather than the
fixmap address, and core dumps yet are another story.

This patch makes CONFIG_COMPAT_VDSO disable the real vma
and use the fixmap area consistently.  This makes it
actually compatible with what the old vdso implementation did.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/i386/kernel/entry.S    |    4 ++++
 arch/i386/kernel/sysenter.c |    2 ++
 include/asm-i386/elf.h      |    7 +++----
 include/asm-i386/fixmap.h   |    2 ++
 include/asm-i386/page.h     |    2 ++
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 06461b8..5e47683 100644  
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -302,12 +302,16 @@ sysenter_past_esp:
 	pushl $(__USER_CS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET cs, 0*/
+#ifndef CONFIG_COMPAT_VDSO
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
 	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+#else
+	pushl $SYSENTER_RETURN
+#endif
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
 
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 7de9117..454d12d 100644  
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -100,6 +100,7 @@ int __init sysenter_setup(void)
 	return 0;
 }
 
+#ifndef CONFIG_COMPAT_VDSO
 static struct page *syscall_nopage(struct vm_area_struct *vma,
 				unsigned long adr, int *type)
 {
@@ -187,3 +188,4 @@ int in_gate_area_no_task(unsigned long a
 {
 	return 0;
 }
+#endif
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 45d21a0..0515d61 100644  
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -143,11 +143,8 @@ extern int dump_task_extended_fpu (struc
 # define VDSO_PRELINK		0
 #endif
 
-#define VDSO_COMPAT_SYM(x) \
-		(VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK)
-
 #define VDSO_SYM(x) \
-		(VDSO_BASE + (unsigned long)(x) - VDSO_PRELINK)
+		(VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK)
 
 #define VDSO_HIGH_EHDR		((const struct elfhdr *) VDSO_HIGH_BASE)
 #define VDSO_EHDR		((const struct elfhdr *) VDSO_COMPAT_BASE)
@@ -156,10 +153,12 @@ extern void __kernel_vsyscall;
 
 #define VDSO_ENTRY		VDSO_SYM(&__kernel_vsyscall)
 
+#ifndef CONFIG_COMPAT_VDSO
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
                                        int executable_stack);
+#endif
 
 extern unsigned int vdso_enabled;
 
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index 02428cb..3e9f610 100644  
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -23,6 +23,8 @@
 extern unsigned long __FIXADDR_TOP;
 #else
 #define __FIXADDR_TOP  0xfffff000
+#define FIXADDR_USER_START	__fix_to_virt(FIX_VDSO)
+#define FIXADDR_USER_END	__fix_to_virt(FIX_VDSO - 1)
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index fd3f64a..7b19f45 100644  
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -143,7 +143,9 @@ extern int page_is_ram(unsigned long pag
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
+#ifndef CONFIG_COMPAT_VDSO
 #define __HAVE_ARCH_GATE_AREA 1
+#endif
 #endif /* __KERNEL__ */
 
 #endif /* _I386_PAGE_H */

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2/11] Fix gate_vma.vm_flags
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
@ 2007-01-14  5:33 ` Roland McGrath
  2007-01-14  5:33 ` [PATCH 3/11] Add VM_ALWAYSDUMP Roland McGrath
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:33 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch fixes the initialization of gate_vma.vm_flags and
gate_vma.vm_page_prot to reflect reality.  This makes the "[vdso]" line in
/proc/PID/maps correctly show r-xp instead of ---p, when gate_vma is used
(CONFIG_COMPAT_VDSO on i386).

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 mm/memory.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index af227d2..5beb4b8 100644  
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2606,8 +2606,8 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_mm = NULL;
 	gate_vma.vm_start = FIXADDR_USER_START;
 	gate_vma.vm_end = FIXADDR_USER_END;
-	gate_vma.vm_page_prot = PAGE_READONLY;
-	gate_vma.vm_flags = 0;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
 	return 0;
 }
 __initcall(gate_vma_init);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 3/11] Add VM_ALWAYSDUMP
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
  2007-01-14  5:33 ` [PATCH 2/11] Fix gate_vma.vm_flags Roland McGrath
@ 2007-01-14  5:33 ` Roland McGrath
  2007-01-14  5:34 ` [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP Roland McGrath
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:33 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch adds the VM_ALWAYSDUMP flag for vm_flags in vm_area_struct.
This provides a clean explicit way to have a vma always included in core
dumps, as is needed for vDSO's.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 fs/binfmt_elf.c    |    4 ++++
 include/linux/mm.h |    1 +
 2 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7cb2872..6fec8bf 100644  
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1178,6 +1178,10 @@ static int dump_seek(struct file *file, 
  */
 static int maydump(struct vm_area_struct *vma)
 {
+	/* The vma can be set up to tell us the answer directly.  */
+	if (vma->vm_flags & VM_ALWAYSDUMP)
+		return 1;
+
 	/* Do not dump I/O mapped devices or special mappings */
 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
 		return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7691223..2d2c08d 100644  
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -168,6 +168,7 @@ extern unsigned int kobjsize(const void 
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
+#define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
  2007-01-14  5:33 ` [PATCH 2/11] Fix gate_vma.vm_flags Roland McGrath
  2007-01-14  5:33 ` [PATCH 3/11] Add VM_ALWAYSDUMP Roland McGrath
@ 2007-01-14  5:34 ` Roland McGrath
  2007-01-23 19:48   ` Andrew Morton
  2007-01-14  5:34 ` [PATCH 5/11] x86_64 ia32 " Roland McGrath
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:34 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch fixes core dumps to include the vDSO vma, which is left out now.
It removes the special-case core writing macros, which were not doing the
right thing for the vDSO vma anyway.  Instead, it uses VM_ALWAYSDUMP in the
vma; there is no need for the fixmap page to be installed.  It handles the
CONFIG_COMPAT_VDSO case by making elf_core_dump use the fake vma from
get_gate_vma after real vmas in the same way the /proc/PID/maps code does.

This changes core dumps so they no longer include the non-PT_LOAD phdrs
from the vDSO.  I made the change to add them in the first place, but in
turned out that nothing ever wanted them there since the advent of NT_AUXV.
It's cleaner to leave them out, and just let the phdrs inside the vDSO
image speak for themselves.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/i386/kernel/sysenter.c |   12 ++++++----
 fs/binfmt_elf.c             |   12 ++++++++--
 include/asm-i386/elf.h      |   44 -------------------------------------------
 mm/memory.c                 |    7 ++++++
 4 files changed, 23 insertions(+), 52 deletions(-)

diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 454d12d..5da7442 100644  
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -79,11 +79,6 @@ int __init sysenter_setup(void)
 #ifdef CONFIG_COMPAT_VDSO
 	__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
 	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
-#else
-	/*
-	 * In the non-compat case the ELF coredumping code needs the fixmap:
-	 */
-	__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
 #endif
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
@@ -147,6 +142,13 @@ int arch_setup_additional_pages(struct l
 	vma->vm_end = addr + PAGE_SIZE;
 	/* MAYWRITE to allow gdb to COW and set breakpoints */
 	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	vma->vm_flags |= VM_ALWAYSDUMP;
 	vma->vm_flags |= mm->def_flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
 	vma->vm_ops = &syscall_vm_ops;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6fec8bf..4ee7cf5 100644  
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1443,7 +1443,7 @@ static int elf_core_dump(long signr, str
 	int segs;
 	size_t size = 0;
 	int i;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff, foffset;
 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
@@ -1529,6 +1529,10 @@ static int elf_core_dump(long signr, str
 	segs += ELF_CORE_EXTRA_PHDRS;
 #endif
 
+	gate_vma = get_gate_vma(current);
+	if (gate_vma != NULL)
+		segs++;
+
 	/* Set up header */
 	fill_elf_header(elf, segs + 1);	/* including notes section */
 
@@ -1596,7 +1600,8 @@ static int elf_core_dump(long signr, str
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
 	/* Write program headers for segments dump */
-	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+	for (vma = current->mm->mmap; vma != NULL;
+	     vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
 		struct elf_phdr phdr;
 		size_t sz;
 
@@ -1645,7 +1650,8 @@ static int elf_core_dump(long signr, str
 	/* Align to page */
 	DUMP_SEEK(dataoff - foffset);
 
-	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+	for (vma = current->mm->mmap; vma != NULL;
+	     vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
 		unsigned long addr;
 
 		if (!maydump(vma))
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 0515d61..369035d 100644  
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -168,50 +168,6 @@ do if (vdso_enabled) {						\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE);	\
 } while (0)
 
-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the vsyscall DSO contents.  Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the vsyscall DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS		(VDSO_HIGH_EHDR->e_phnum)
-#define ELF_CORE_WRITE_EXTRA_PHDRS					      \
-do {									      \
-	const struct elf_phdr *const vsyscall_phdrs =			      \
-		(const struct elf_phdr *) (VDSO_HIGH_BASE		      \
-					   + VDSO_HIGH_EHDR->e_phoff);    \
-	int i;								      \
-	Elf32_Off ofs = 0;						      \
-	for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) {		      \
-		struct elf_phdr phdr = vsyscall_phdrs[i];		      \
-		if (phdr.p_type == PT_LOAD) {				      \
-			BUG_ON(ofs != 0);				      \
-			ofs = phdr.p_offset = offset;			      \
-			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);	      \
-			phdr.p_filesz = phdr.p_memsz;			      \
-			offset += phdr.p_filesz;			      \
-		}							      \
-		else							      \
-			phdr.p_offset += ofs;				      \
-		phdr.p_paddr = 0; /* match other core phdrs */		      \
-		DUMP_WRITE(&phdr, sizeof(phdr));			      \
-	}								      \
-} while (0)
-#define ELF_CORE_WRITE_EXTRA_DATA					      \
-do {									      \
-	const struct elf_phdr *const vsyscall_phdrs =			      \
-		(const struct elf_phdr *) (VDSO_HIGH_BASE		      \
-					   + VDSO_HIGH_EHDR->e_phoff);    \
-	int i;								      \
-	for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) {		      \
-		if (vsyscall_phdrs[i].p_type == PT_LOAD)		      \
-			DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr,	      \
-				   PAGE_ALIGN(vsyscall_phdrs[i].p_memsz));    \
-	}								      \
-} while (0)
-
 #endif
 
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 5beb4b8..ef09f0a 100644  
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2608,6 +2608,13 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_end = FIXADDR_USER_END;
 	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
 	gate_vma.vm_page_prot = __P101;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	gate_vma.vm_flags |= VM_ALWAYSDUMP;
 	return 0;
 }
 __initcall(gate_vma_init);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 5/11] x86_64 ia32 vDSO: use VM_ALWAYSDUMP
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (2 preceding siblings ...)
  2007-01-14  5:34 ` [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP Roland McGrath
@ 2007-01-14  5:34 ` Roland McGrath
  2007-01-14  5:35 ` [PATCH 6/11] powerpc " Roland McGrath
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:34 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch fixes ia32 core dumps on x86_64 to include just one phdr for the
vDSO vma.  Currently it writes a confused format with two phdrs for the
address, one without contents and one with.  This patch removes the
special-case core writing macros for the ia32 vDSO.  Instead, it uses
VM_ALWAYSDUMP in the vma.  This changes core dumps so they no longer
include the non-PT_LOAD phdrs from the vDSO, consistent with fixed native
i386 core dumps.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86_64/ia32/ia32_binfmt.c |   49 ----------------------------------------
 arch/x86_64/ia32/syscall32.c   |    7 +++++
 2 files changed, 7 insertions(+), 49 deletions(-)

diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 543ef4f..5ce0bd4 100644  
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -64,55 +64,6 @@ typedef unsigned int elf_greg_t;
 #define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the vsyscall DSO contents.  Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the vsyscall DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS	(find_vma(current->mm, VSYSCALL32_BASE) ?     \
-    (VSYSCALL32_EHDR->e_phnum) : 0)
-#define ELF_CORE_WRITE_EXTRA_PHDRS					      \
-do {									      \
-	if (find_vma(current->mm, VSYSCALL32_BASE)) { 			      \
-		const struct elf32_phdr *const vsyscall_phdrs =		      \
-			(const struct elf32_phdr *) (VSYSCALL32_BASE	      \
-						   + VSYSCALL32_EHDR->e_phoff);\
-		int i;							      \
-		Elf32_Off ofs = 0;					      \
-		for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) {	      \
-			struct elf32_phdr phdr = vsyscall_phdrs[i];	      \
-			if (phdr.p_type == PT_LOAD) {			      \
-				BUG_ON(ofs != 0);			      \
-				ofs = phdr.p_offset = offset;		      \
-				phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);      \
-				phdr.p_filesz = phdr.p_memsz;		      \
-				offset += phdr.p_filesz;		      \
-			}						      \
-			else						      \
-				phdr.p_offset += ofs;			      \
-			phdr.p_paddr = 0; /* match other core phdrs */	      \
-			DUMP_WRITE(&phdr, sizeof(phdr));		      \
-		}							      \
-	}								      \
-} while (0)
-#define ELF_CORE_WRITE_EXTRA_DATA					      \
-do {									      \
-	if (find_vma(current->mm, VSYSCALL32_BASE)) { 			      \
-		const struct elf32_phdr *const vsyscall_phdrs =		      \
-			(const struct elf32_phdr *) (VSYSCALL32_BASE	      \
-						   + VSYSCALL32_EHDR->e_phoff);      \
-		int i;							      \
-		for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) {	      \
-			if (vsyscall_phdrs[i].p_type == PT_LOAD)	      \
-				DUMP_WRITE((void *) (u64) vsyscall_phdrs[i].p_vaddr,\
-				    PAGE_ALIGN(vsyscall_phdrs[i].p_memsz));   \
-		}							      \
-	}								      \
-} while (0)
-
 struct elf_siginfo
 {
 	int	si_signo;			/* signal number */
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 3e5ed20..3ac9355 100644  
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -59,6 +59,13 @@ int syscall32_setup_pages(struct linux_b
 	vma->vm_end = VSYSCALL32_END;
 	/* MAYWRITE to allow gdb to COW and set breakpoints */
 	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	vma->vm_flags |= VM_ALWAYSDUMP;
 	vma->vm_flags |= mm->def_flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
 	vma->vm_ops = &syscall32_vm_ops;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 6/11] powerpc vDSO: use VM_ALWAYSDUMP
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (3 preceding siblings ...)
  2007-01-14  5:34 ` [PATCH 5/11] x86_64 ia32 " Roland McGrath
@ 2007-01-14  5:35 ` Roland McGrath
  2007-01-14  5:36 ` [PATCH 7/11] x86_64 ia32 vDSO: define arch_vma_name Roland McGrath
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:35 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch fixes core dumps to include the vDSO vma, which is left out now.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/powerpc/kernel/vdso.c |    7 +++++++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index a4b28c7..ae0ede1 100644  
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -284,6 +284,13 @@ int arch_setup_additional_pages(struct l
 	 * pages though
 	 */
 	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	vma->vm_flags |= VM_ALWAYSDUMP;
 	vma->vm_flags |= mm->def_flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
 	vma->vm_ops = &vdso_vmops;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 7/11] x86_64 ia32 vDSO: define arch_vma_name
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (4 preceding siblings ...)
  2007-01-14  5:35 ` [PATCH 6/11] powerpc " Roland McGrath
@ 2007-01-14  5:36 ` Roland McGrath
  2007-01-14  5:36 ` [PATCH 8/11] Add install_special_mapping Roland McGrath
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:36 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch makes x86_64 define arch_vma_name for CONFIG_IA32_EMULATION.
This makes the ia32 vDSO mapping appear in /proc/PID/maps with "[vdso]"
for ia32 processes, as it does on native i386.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86_64/ia32/syscall32.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 3ac9355..59f1fa1 100644  
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -82,6 +82,14 @@ int syscall32_setup_pages(struct linux_b
 	return 0;
 }
 
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_start == VSYSCALL32_BASE &&
+	    vma->vm_mm && vma->vm_mm->task_size == IA32_PAGE_OFFSET)
+		return "[vdso]";
+	return NULL;
+}
+
 static int __init init_syscall32(void)
 { 
 	syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 8/11] Add install_special_mapping
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (5 preceding siblings ...)
  2007-01-14  5:36 ` [PATCH 7/11] x86_64 ia32 vDSO: define arch_vma_name Roland McGrath
@ 2007-01-14  5:36 ` Roland McGrath
  2007-01-14  5:36 ` [PATCH 9/11] i386 vDSO: use install_special_mapping Roland McGrath
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:36 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patchs adds a utility function install_special_mapping, for creating a
special vma using a fixed set of preallocated pages as backing, such as for
a vDSO.  This consolidates some nearly identical code used for vDSO mapping
reimplemented for different architectures.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/linux/mm.h |    3 ++
 mm/mmap.c          |   72 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 0 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2d2c08d..bb793a4 100644  
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1030,6 +1030,9 @@ extern struct vm_area_struct *copy_vma(s
 	unsigned long addr, unsigned long len, pgoff_t pgoff);
 extern void exit_mmap(struct mm_struct *);
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern int install_special_mapping(struct mm_struct *mm,
+				   unsigned long addr, unsigned long len,
+				   unsigned long flags, struct page **pages);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 9717337..b540fb2 100644  
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2094,3 +2094,75 @@ int may_expand_vm(struct mm_struct *mm, 
 		return 0;
 	return 1;
 }
+
+
+static struct page *special_mapping_nopage(struct vm_area_struct *vma,
+					   unsigned long address, int *type)
+{
+	struct page **pages;
+
+	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+
+	address -= vma->vm_start;
+	for (pages = vma->vm_private_data; address > 0 && *pages; ++pages)
+		address -= PAGE_SIZE;
+
+	if (*pages) {
+		struct page *page = *pages;
+		get_page(page);
+		return page;
+	}
+
+	return NOPAGE_SIGBUS;
+}
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct special_mapping_vmops = {
+	.close = special_mapping_close,
+	.nopage	= special_mapping_nopage,
+};
+
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+int install_special_mapping(struct mm_struct *mm,
+			    unsigned long addr, unsigned long len,
+			    unsigned long vm_flags, struct page **pages)
+{
+	struct vm_area_struct *vma;
+
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(vma == NULL))
+		return -ENOMEM;
+
+	vma->vm_mm = mm;
+	vma->vm_start = addr;
+	vma->vm_end = addr + len;
+
+	vma->vm_flags = vm_flags | mm->def_flags;
+	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+
+	vma->vm_ops = &special_mapping_vmops;
+	vma->vm_private_data = pages;
+
+	if (unlikely(insert_vm_struct(mm, vma))) {
+		kmem_cache_free(vm_area_cachep, vma);
+		return -ENOMEM;
+	}
+
+	mm->total_vm += len >> PAGE_SHIFT;
+
+	return 0;
+}

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 9/11] i386 vDSO: use install_special_mapping
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (6 preceding siblings ...)
  2007-01-14  5:36 ` [PATCH 8/11] Add install_special_mapping Roland McGrath
@ 2007-01-14  5:36 ` Roland McGrath
  2007-01-14  5:37 ` [PATCH 10/11] x86_64 ia32 " Roland McGrath
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:36 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch uses install_special_mapping for the i386 vDSO setup,
consolidating duplicated code.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/i386/kernel/sysenter.c |   53 +++++++++----------------------------------
 1 files changed, 11 insertions(+), 42 deletions(-)

diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 5da7442..bc882a2 100644  
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -70,11 +70,12 @@ void enable_sep_cpu(void)
  */
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static void *syscall_page;
+static struct page *syscall_pages[1];
 
 int __init sysenter_setup(void)
 {
-	syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	syscall_pages[0] = virt_to_page(syscall_page);
 
 #ifdef CONFIG_COMPAT_VDSO
 	__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
@@ -96,31 +97,12 @@ int __init sysenter_setup(void)
 }
 
 #ifndef CONFIG_COMPAT_VDSO
-static struct page *syscall_nopage(struct vm_area_struct *vma,
-				unsigned long adr, int *type)
-{
-	struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
-	get_page(p);
-	return p;
-}
-
-/* Prevent VMA merging */
-static void syscall_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall_vm_ops = {
-	.close = syscall_vma_close,
-	.nopage = syscall_nopage,
-};
-
 /* Defined in vsyscall-sysenter.S */
 extern void SYSENTER_RETURN;
 
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 {
-	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr;
 	int ret;
@@ -132,38 +114,25 @@ int arch_setup_additional_pages(struct l
 		goto up_fail;
 	}
 
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (!vma) {
-		ret = -ENOMEM;
-		goto up_fail;
-	}
-
-	vma->vm_start = addr;
-	vma->vm_end = addr + PAGE_SIZE;
-	/* MAYWRITE to allow gdb to COW and set breakpoints */
-	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
 	/*
+	 * MAYWRITE to allow gdb to COW and set breakpoints
+	 *
 	 * Make sure the vDSO gets into every core dump.
 	 * Dumping its contents makes post-mortem fully interpretable later
 	 * without matching up the same kernel and hardware config to see
 	 * what PC values meant.
 	 */
-	vma->vm_flags |= VM_ALWAYSDUMP;
-	vma->vm_flags |= mm->def_flags;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
-	vma->vm_ops = &syscall_vm_ops;
-	vma->vm_mm = mm;
-
-	ret = insert_vm_struct(mm, vma);
-	if (unlikely(ret)) {
-		kmem_cache_free(vm_area_cachep, vma);
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+				      VM_READ|VM_EXEC|
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+				      VM_ALWAYSDUMP,
+				      syscall_pages);
+	if (ret)
 		goto up_fail;
-	}
 
 	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return =
 				    (void *)VDSO_SYM(&SYSENTER_RETURN);
-	mm->total_vm++;
 up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 10/11] x86_64 ia32 vDSO: use install_special_mapping
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (7 preceding siblings ...)
  2007-01-14  5:36 ` [PATCH 9/11] i386 vDSO: use install_special_mapping Roland McGrath
@ 2007-01-14  5:37 ` Roland McGrath
  2007-01-14  5:37 ` [PATCH 11/11] powerpc " Roland McGrath
  2007-01-17  8:49 ` [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Ingo Molnar
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:37 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch uses install_special_mapping for the ia32 vDSO setup,
consolidating duplicated code.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86_64/ia32/syscall32.c |   75 ++++++++++++------------------------------
 include/asm-x86_64/proto.h   |    1 -
 2 files changed, 21 insertions(+), 55 deletions(-)

diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 59f1fa1..3939f10 100644  
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -18,68 +18,34 @@ extern unsigned char syscall32_syscall[]
 extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
 extern int sysctl_vsyscall32;
 
-char *syscall32_page; 
+static struct page *syscall32_pages[1];
 static int use_sysenter = -1;
 
-static struct page *
-syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
-{
-	struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
-	get_page(p);
-	return p;
-}
-
-/* Prevent VMA merging */
-static void syscall32_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall32_vm_ops = {
-	.close = syscall32_vma_close,
-	.nopage = syscall32_nopage,
-};
-
 struct linux_binprm;
 
 /* Setup a VMA at program startup for the vsyscall page */
 int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
 {
-	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
-	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 	int ret;
 
-	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-	if (!vma)
-		return -ENOMEM;
-
-	memset(vma, 0, sizeof(struct vm_area_struct));
-	/* Could randomize here */
-	vma->vm_start = VSYSCALL32_BASE;
-	vma->vm_end = VSYSCALL32_END;
-	/* MAYWRITE to allow gdb to COW and set breakpoints */
-	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+	down_write(&mm->mmap_sem);
 	/*
+	 * MAYWRITE to allow gdb to COW and set breakpoints
+	 *
 	 * Make sure the vDSO gets into every core dump.
 	 * Dumping its contents makes post-mortem fully interpretable later
 	 * without matching up the same kernel and hardware config to see
 	 * what PC values meant.
 	 */
-	vma->vm_flags |= VM_ALWAYSDUMP;
-	vma->vm_flags |= mm->def_flags;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
-	vma->vm_ops = &syscall32_vm_ops;
-	vma->vm_mm = mm;
-
-	down_write(&mm->mmap_sem);
-	if ((ret = insert_vm_struct(mm, vma))) {
-		up_write(&mm->mmap_sem);
-		kmem_cache_free(vm_area_cachep, vma);
-		return ret;
-	}
-	mm->total_vm += npages;
+	/* Could randomize here */
+	ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+				      VM_READ|VM_EXEC|
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+				      VM_ALWAYSDUMP,
+				      syscall32_pages);
 	up_write(&mm->mmap_sem);
-	return 0;
+	return ret;
 }
 
 const char *arch_vma_name(struct vm_area_struct *vma)
@@ -92,9 +58,10 @@ const char *arch_vma_name(struct vm_area
 
 static int __init init_syscall32(void)
 { 
-	syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 
+	char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!syscall32_page) 
 		panic("Cannot allocate syscall32 page"); 
+	syscall32_pages[0] = virt_to_page(syscall32_page);
  	if (use_sysenter > 0) {
  		memcpy(syscall32_page, syscall32_sysenter,
  		       syscall32_sysenter_end - syscall32_sysenter);
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 6d324b8..a6d2ff5 100644  
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -81,7 +81,6 @@ extern void swap_low_mappings(void);
 extern void __show_regs(struct pt_regs * regs);
 extern void show_regs(struct pt_regs * regs);
 
-extern char *syscall32_page;
 extern void syscall32_cpu_init(void);
 
 extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 11/11] powerpc vDSO: use install_special_mapping
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (8 preceding siblings ...)
  2007-01-14  5:37 ` [PATCH 10/11] x86_64 ia32 " Roland McGrath
@ 2007-01-14  5:37 ` Roland McGrath
  2007-01-17  8:49 ` [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Ingo Molnar
  10 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-14  5:37 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: linux-kernel


This patch uses install_special_mapping for the powerpc vDSO setup,
consolidating duplicated code.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/powerpc/kernel/vdso.c |  104 +++++++++++--------------------------------
 1 files changed, 27 insertions(+), 77 deletions(-)

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ae0ede1..50149ec 100644  
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -49,9 +49,13 @@
 /* Max supported size for symbol names */
 #define MAX_SYMNAME	64
 
+#define VDSO32_MAXPAGES	(((0x3000 + PAGE_MASK) >> PAGE_SHIFT) + 2)
+#define VDSO64_MAXPAGES	(((0x3000 + PAGE_MASK) >> PAGE_SHIFT) + 2)
+
 extern char vdso32_start, vdso32_end;
 static void *vdso32_kbase = &vdso32_start;
 unsigned int vdso32_pages;
+static struct page *vdso32_pagelist[VDSO32_MAXPAGES];
 unsigned long vdso32_sigtramp;
 unsigned long vdso32_rt_sigtramp;
 
@@ -59,6 +63,7 @@ unsigned long vdso32_rt_sigtramp;
 extern char vdso64_start, vdso64_end;
 static void *vdso64_kbase = &vdso64_start;
 unsigned int vdso64_pages;
+static struct page *vdso64_pagelist[VDSO64_MAXPAGES];
 unsigned long vdso64_rt_sigtramp;
 #endif /* CONFIG_PPC64 */
 
@@ -165,55 +170,6 @@ static void dump_vdso_pages(struct vm_ar
 #endif /* DEBUG */
 
 /*
- * Keep a dummy vma_close for now, it will prevent VMA merging.
- */
-static void vdso_vma_close(struct vm_area_struct * vma)
-{
-}
-
-/*
- * Our nopage() function, maps in the actual vDSO kernel pages, they will
- * be mapped read-only by do_no_page(), and eventually COW'ed, either
- * right away for an initial write access, or by do_wp_page().
- */
-static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
-				     unsigned long address, int *type)
-{
-	unsigned long offset = address - vma->vm_start;
-	struct page *pg;
-#ifdef CONFIG_PPC64
-	void *vbase = (vma->vm_mm->task_size > TASK_SIZE_USER32) ?
-		vdso64_kbase : vdso32_kbase;
-#else
-	void *vbase = vdso32_kbase;
-#endif
-
-	DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
-	    current->comm, address, offset);
-
-	if (address < vma->vm_start || address > vma->vm_end)
-		return NOPAGE_SIGBUS;
-
-	/*
-	 * Last page is systemcfg.
-	 */
-	if ((vma->vm_end - address) <= PAGE_SIZE)
-		pg = virt_to_page(vdso_data);
-	else
-		pg = virt_to_page(vbase + offset);
-
-	get_page(pg);
-	DBG(" ->page count: %d\n", page_count(pg));
-
-	return pg;
-}
-
-static struct vm_operations_struct vdso_vmops = {
-	.close	= vdso_vma_close,
-	.nopage	= vdso_vma_nopage,
-};
-
-/*
  * This is called from binfmt_elf, we create the special vma for the
  * vDSO and insert it into the mm struct tree
  */
@@ -221,20 +177,23 @@ int arch_setup_additional_pages(struct l
 				int executable_stack)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct page **vdso_pagelist;
 	unsigned long vdso_pages;
 	unsigned long vdso_base;
 	int rc;
 
 #ifdef CONFIG_PPC64
 	if (test_thread_flag(TIF_32BIT)) {
+		vdso_pagelist = vdso32_pagelist;
 		vdso_pages = vdso32_pages;
 		vdso_base = VDSO32_MBASE;
 	} else {
+		vdso_pagelist = vdso64_pagelist;
 		vdso_pages = vdso64_pages;
 		vdso_base = VDSO64_MBASE;
 	}
 #else
+	vdso_pagelist = vdso32_pagelist;
 	vdso_pages = vdso32_pages;
 	vdso_base = VDSO32_MBASE;
 #endif
@@ -262,17 +221,6 @@ int arch_setup_additional_pages(struct l
 		goto fail_mmapsem;
 	}
 
-
-	/* Allocate a VMA structure and fill it up */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (vma == NULL) {
-		rc = -ENOMEM;
-		goto fail_mmapsem;
-	}
-	vma->vm_mm = mm;
-	vma->vm_start = vdso_base;
-	vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT);
-
 	/*
 	 * our vma flags don't have VM_WRITE so by default, the process isn't
 	 * allowed to write those pages.
@@ -282,32 +230,26 @@ int arch_setup_additional_pages(struct l
 	 * and your nice userland gettimeofday will be totally dead.
 	 * It's fine to use that for setting breakpoints in the vDSO code
 	 * pages though
-	 */
-	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
-	/*
+	 *
 	 * Make sure the vDSO gets into every core dump.
 	 * Dumping its contents makes post-mortem fully interpretable later
 	 * without matching up the same kernel and hardware config to see
 	 * what PC values meant.
 	 */
-	vma->vm_flags |= VM_ALWAYSDUMP;
-	vma->vm_flags |= mm->def_flags;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
-	vma->vm_ops = &vdso_vmops;
-
-	/* Insert new VMA */
-	rc = insert_vm_struct(mm, vma);
+	rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+				     VM_READ|VM_EXEC|
+				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+				     VM_ALWAYSDUMP,
+				     vdso_pagelist);
 	if (rc)
-		goto fail_vma;
+		goto fail_mmapsem;
 
-	/* Put vDSO base into mm struct and account for memory usage */
+	/* Put vDSO base into mm struct */
 	current->mm->context.vdso_base = vdso_base;
-	mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
 	up_write(&mm->mmap_sem);
 	return 0;
 
- fail_vma:
-	kmem_cache_free(vm_area_cachep, vma);
  fail_mmapsem:
 	up_write(&mm->mmap_sem);
 	return rc;
@@ -778,18 +720,26 @@ void __init vdso_init(void)
 	}
 
 	/* Make sure pages are in the correct state */
+	BUG_ON(vdso32_pages + 2 > VDSO32_MAXPAGES);
 	for (i = 0; i < vdso32_pages; i++) {
 		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
 		ClearPageReserved(pg);
 		get_page(pg);
-
+		vdso32_pagelist[i] = pg;
 	}
+	vdso32_pagelist[i++] = virt_to_page(vdso_data);
+	vdso32_pagelist[i] = NULL;
+
 #ifdef CONFIG_PPC64
+	BUG_ON(vdso64_pages + 2 > VDSO64_MAXPAGES);
 	for (i = 0; i < vdso64_pages; i++) {
 		struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
 		ClearPageReserved(pg);
 		get_page(pg);
+		vdso64_pagelist[i] = pg;
 	}
+	vdso64_pagelist[i++] = virt_to_page(vdso_data);
+	vdso64_pagelist[i] = NULL;
 #endif /* CONFIG_PPC64 */
 
 	get_page(virt_to_page(vdso_data));

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO
  2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
                   ` (9 preceding siblings ...)
  2007-01-14  5:37 ` [PATCH 11/11] powerpc " Roland McGrath
@ 2007-01-17  8:49 ` Ingo Molnar
  2007-01-17  9:03   ` Roland McGrath
  10 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2007-01-17  8:49 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Linus Torvalds, Andrew Morton, linux-kernel


* Roland McGrath <roland@redhat.com> wrote:

> I wouldn't mind if CONFIG_COMPAT_VDSO went away entirely. But if it's 
> there, it should work properly.  Currently it's quite haphazard: both 
> real vma and fixmap are mapped, both are put in the two different AT_* 
> slots, sysenter returns to the vma address rather than the fixmap 
> address, and core dumps yet are another story.

i think your patches #1...#7 are must-haves for v2.6.20, while #8-#11 
could be delayed to v2.6.21?

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO
  2007-01-17  8:49 ` [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Ingo Molnar
@ 2007-01-17  9:03   ` Roland McGrath
  2007-01-24 10:25     ` Paul Mundt
  0 siblings, 1 reply; 18+ messages in thread
From: Roland McGrath @ 2007-01-17  9:03 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Andrew Morton, linux-kernel

> i think your patches #1...#7 are must-haves for v2.6.20, while #8-#11 
> could be delayed to v2.6.21?

Indeed 1-7 are fixes while 8-11 are only cleanups not changing behavior.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP
  2007-01-14  5:34 ` [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP Roland McGrath
@ 2007-01-23 19:48   ` Andrew Morton
  2007-01-23 19:57     ` Linus Torvalds
  2007-01-23 19:57     ` Roland McGrath
  0 siblings, 2 replies; 18+ messages in thread
From: Andrew Morton @ 2007-01-23 19:48 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Linus Torvalds, linux-kernel

On Sat, 13 Jan 2007 21:34:28 -0800 (PST)
Roland McGrath <roland@redhat.com> wrote:

> +	     vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {

Painful.   Can we do this?


diff -puN fs/binfmt_elf.c~i386-vdso-use-vm_alwaysdump-tidy fs/binfmt_elf.c
--- a/fs/binfmt_elf.c~i386-vdso-use-vm_alwaysdump-tidy
+++ a/fs/binfmt_elf.c
@@ -1429,6 +1429,23 @@ static int elf_dump_thread_status(long s
 }
 
 /*
+ * Helper function for iterating across a vma list.  It ensures that the caller
+ * will visit `gate_vma' prior to terminating the search.
+ */
+static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+					struct vm_area_struct *gate_vma)
+{
+	struct vm_area_struct *ret;
+
+	ret = this_vma->vm_next;
+	if (ret)
+		return ret;
+	if (this_vma == gate_vma)
+		return NULL;
+	return gate_vma;
+}
+
+/*
  * Actual dumper
  *
  * This is a two-pass process; first we find the offsets of the bits,
@@ -1600,8 +1617,7 @@ static int elf_core_dump(long signr, str
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
 	/* Write program headers for segments dump */
-	for (vma = current->mm->mmap; vma != NULL;
-	     vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
+	for (vma = current->mm->mmap; vma; vma = next_vma(vma, gate_vma)) {
 		struct elf_phdr phdr;
 		size_t sz;
 
@@ -1650,8 +1666,7 @@ static int elf_core_dump(long signr, str
 	/* Align to page */
 	DUMP_SEEK(dataoff - foffset);
 
-	for (vma = current->mm->mmap; vma != NULL;
-	     vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
+	for (vma = current->mm->mmap; vma; vma = next_vma(vma, gate_vma)) {
 		unsigned long addr;
 
 		if (!maydump(vma))
_


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP
  2007-01-23 19:48   ` Andrew Morton
@ 2007-01-23 19:57     ` Linus Torvalds
  2007-01-23 20:11       ` Roland McGrath
  2007-01-23 19:57     ` Roland McGrath
  1 sibling, 1 reply; 18+ messages in thread
From: Linus Torvalds @ 2007-01-23 19:57 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Roland McGrath, Linux Kernel Mailing List



On Tue, 23 Jan 2007, Andrew Morton wrote:
>  
>  /*
> + * Helper function for iterating across a vma list.  It ensures that the caller
> + * will visit `gate_vma' prior to terminating the search.

Well, the comment is wrong. The code doesn't actually visit 'gate_vma' if 
the list of VMA's is empty.

Not that the old code did either, so it's not like it's a new bug, but I 
thought I'd point it out anyway. As if we care (but you can probably 
trigger this by having an app that does

	munmap(NULL, TASK_SIZE);

which will cause a SIGSEGV on return (because the stack doesn't exist) and 
then the core-dump should be empty.

Not that I tested anythign that evil anyway, nor do I think we really care 
if it means that the gate_vma doesn't get shown in the core-dump either.

		Linus

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP
  2007-01-23 19:48   ` Andrew Morton
  2007-01-23 19:57     ` Linus Torvalds
@ 2007-01-23 19:57     ` Roland McGrath
  1 sibling, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-23 19:57 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, linux-kernel

> On Sat, 13 Jan 2007 21:34:28 -0800 (PST)
> Roland McGrath <roland@redhat.com> wrote:
> 
> > +	     vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
> 
> Painful.   Can we do this?

Can't stand concise, eh?  ;-)  Your version is fine with me.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP
  2007-01-23 19:57     ` Linus Torvalds
@ 2007-01-23 20:11       ` Roland McGrath
  0 siblings, 0 replies; 18+ messages in thread
From: Roland McGrath @ 2007-01-23 20:11 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, Linux Kernel Mailing List

> Not that the old code did either, so it's not like it's a new bug, but I 
> thought I'd point it out anyway. As if we care (but you can probably 
> trigger this by having an app that does
> 
> 	munmap(NULL, TASK_SIZE);
>
> which will cause a SIGSEGV on return (because the stack doesn't exist) and 
> then the core-dump should be empty.

Ok, damn you.  I admit I thought of exactly this case and then glossed it
over because I didn't want to complicate the patch and discuss the arcane
justification.  And I was doing a good job of repressing the memory of it
completely before you dredged it up.

> Not that I tested anythign that evil anyway, nor do I think we really care 
> if it means that the gate_vma doesn't get shown in the core-dump either.

I didn't test it either.  And I do sort of think the vDSO should be (the
only thing) in that case's core dump on anal principle.  Now that you've
brought it up, I feel all dirty for ever having sent in code that doesn't
cover the case.  (Not that we really care, since CONFIG_COMPAT_VDSO won't
actually be turned on in practice.)

Starting the loops with "vma = current->mm->mmap ?: gate_vma" should do it.
But I guess Andrew would prefer it:

static struct vm_area_struct *first_vma(struct task_struct *tsk,
					struct vm_area_struct *gate_vma) 
{
	struct vm_area_struct *ret = tsk->mm->mmap;
	if (ret)
		return ret;
	return gate_vma;
}

and:

	for (vma = first_vma(current); vma; vma = next_vma(vma, gate_vma)) {


Thanks,
Roland

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO
  2007-01-17  9:03   ` Roland McGrath
@ 2007-01-24 10:25     ` Paul Mundt
  0 siblings, 0 replies; 18+ messages in thread
From: Paul Mundt @ 2007-01-24 10:25 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel

On Wed, Jan 17, 2007 at 01:03:34AM -0800, Roland McGrath wrote:
> > i think your patches #1...#7 are must-haves for v2.6.20, while #8-#11 
> > could be delayed to v2.6.21?
> 
> Indeed 1-7 are fixes while 8-11 are only cleanups not changing behavior.
> 
Here's an update for the SH bits when the 8-11 parts are ready..

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index deb4694..7b0f66f 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -37,11 +37,12 @@ __setup("vdso=", vdso_setup);
  * of the ELF DSO images included therein.
  */
 extern const char vsyscall_trapa_start, vsyscall_trapa_end;
-static void *syscall_page;
+static struct page *syscall_pages[1];
 
 int __init vsyscall_init(void)
 {
-	syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	syscall_pages[0] = virt_to_page(syscall_page);
 
 	/*
 	 * XXX: Map this page to a fixmap entry if we get around
@@ -55,37 +56,10 @@ int __init vsyscall_init(void)
 	return 0;
 }
 
-static struct page *syscall_vma_nopage(struct vm_area_struct *vma,
-				       unsigned long address, int *type)
-{
-	unsigned long offset = address - vma->vm_start;
-	struct page *page;
-
-	if (address < vma->vm_start || address > vma->vm_end)
-		return NOPAGE_SIGBUS;
-
-	page = virt_to_page(syscall_page + offset);
-
-	get_page(page);
-
-	return page;
-}
-
-/* Prevent VMA merging */
-static void syscall_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall_vm_ops = {
-	.nopage	= syscall_vma_nopage,
-	.close	= syscall_vma_close,
-};
-
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm,
 				int executable_stack)
 {
-	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr;
 	int ret;
@@ -97,30 +71,16 @@ int arch_setup_additional_pages(struct l
 		goto up_fail;
 	}
 
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (!vma) {
-		ret = -ENOMEM;
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+				      VM_READ | VM_EXEC |
+				      VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
+				      VM_ALWAYSDUMP,
+				      syscall_pages);
+	if (unlikely(ret))
 		goto up_fail;
-	}
-
-	vma->vm_start = addr;
-	vma->vm_end = addr + PAGE_SIZE;
-	/* MAYWRITE to allow gdb to COW and set breakpoints */
-	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
-	vma->vm_flags |= mm->def_flags;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
-	vma->vm_ops = &syscall_vm_ops;
-	vma->vm_mm = mm;
-
-	ret = insert_vm_struct(mm, vma);
-	if (unlikely(ret)) {
-		kmem_cache_free(vm_area_cachep, vma);
-		goto up_fail;
-	}
 
 	current->mm->context.vdso = (void *)addr;
 
-	mm->total_vm++;
 up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2007-01-24 10:27 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-14  5:31 [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Roland McGrath
2007-01-14  5:33 ` [PATCH 2/11] Fix gate_vma.vm_flags Roland McGrath
2007-01-14  5:33 ` [PATCH 3/11] Add VM_ALWAYSDUMP Roland McGrath
2007-01-14  5:34 ` [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP Roland McGrath
2007-01-23 19:48   ` Andrew Morton
2007-01-23 19:57     ` Linus Torvalds
2007-01-23 20:11       ` Roland McGrath
2007-01-23 19:57     ` Roland McGrath
2007-01-14  5:34 ` [PATCH 5/11] x86_64 ia32 " Roland McGrath
2007-01-14  5:35 ` [PATCH 6/11] powerpc " Roland McGrath
2007-01-14  5:36 ` [PATCH 7/11] x86_64 ia32 vDSO: define arch_vma_name Roland McGrath
2007-01-14  5:36 ` [PATCH 8/11] Add install_special_mapping Roland McGrath
2007-01-14  5:36 ` [PATCH 9/11] i386 vDSO: use install_special_mapping Roland McGrath
2007-01-14  5:37 ` [PATCH 10/11] x86_64 ia32 " Roland McGrath
2007-01-14  5:37 ` [PATCH 11/11] powerpc " Roland McGrath
2007-01-17  8:49 ` [PATCH 1/11] Fix CONFIG_COMPAT_VDSO Ingo Molnar
2007-01-17  9:03   ` Roland McGrath
2007-01-24 10:25     ` Paul Mundt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).