LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* Re: [PATCH] report user-readable fixmap area in /proc/PID/maps
@ 2003-11-26  6:49 David Mosberger
  0 siblings, 0 replies; 7+ messages in thread
From: David Mosberger @ 2003-11-26  6:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: roland

This is a modified and slightly expanded version of Roland's earlier
patch (see http://marc.theaimsgroup.com/?l=linux-kernel&m=106551701731039).
What's modified is that I added a gate_map() macro which evaluates to
NULL on platforms that don't use the gate DSO.  With that, the new
code in task_mmu.c will get optimized away without any ugly #ifdefs.
The other difference is that I left vm_flags at 0, rather
than turning on VM_READ+VM_EXEC.  The reason is that I wanted to
discourage apps from trying to read the fix-map area directly, because
that may or may not work (e.g., on ia64, the executable portion of the
gate-DSO is not readable).  Roland, if this is a problem for gdb's
gcore command, we need to find a better solution.

What's new is that I added a sysctl that allows setting the path of
the gate DSO so that users apps relying on /proc/PID/maps can read the
DSO contents just like any other file.  By default, the path is the
empty string.  The idea here is that there would be an init script
which dumps the kernel's gate DSO to a file and then registers the
path to that file via /proc/sys/kernel/gate_dso.  I considered several
other options, but this seems to me the most lightweight and least
intrusive solution to the problem.  Perhaps for 2.7.x the whole issue
of special kernel mappings could be revisited.

Here is a concrete example of how this works with a simple "bt" test-program
which generates a stack trace that goes across a signal-handler:

 $ ./bt # (no gate DSO registered)
 4000000000001210 <do_backtrace+0x30>
 40000000000017c0 <sighandler+0xa0>
 a0000000000207e0
 a000000000020641
 2000000800183ca0 <kill+0x20>
 4000000000001c40 <main+0x460>
 200000080015c9d0 <__libc_start_main+0x3e0>
 4000000000001000 <_start+0x80>
 $ echo /boot/gate.so > /proc/sys/kernel/gate_dso
 $ ./bt
 4000000000001210 <do_backtrace+0x30>
 40000000000017c0 <sighandler+0xa0>
 a0000000000207e0 <__kernel_sigtramp+0xc0>
 a000000000020641 <__kernel_syscall_via_break+0x1>
 2000000800183ca0 <kill+0x20>
 4000000000001990 <main+0x1b0>
 200000080015c9d0 <__libc_start_main+0x3e0>
 4000000000001000 <_start+0x80>

Note that with the gate-DSO path in /proc/PID/maps, one process can
unwind another process and still get the correct output.

	--david

===== fs/proc/task_mmu.c 1.5 vs edited =====
--- 1.5/fs/proc/task_mmu.c	Sat Aug 23 05:08:00 2003
+++ edited/fs/proc/task_mmu.c	Tue Nov 25 22:16:02 2003
@@ -1,6 +1,7 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/seq_file.h>
+#include <asm/elf.h>
 #include <asm/uaccess.h>
 
 char *task_mem(struct mm_struct *mm, char *buffer)
@@ -75,6 +76,23 @@
 	return size;
 }
 
+#ifdef AT_SYSINFO_EHDR
+
+char gate_dso_path[256] = "";
+static struct vm_area_struct gate_vmarea = {
+	/* Do _not_ mark this area as readable, cuz not the entire range may be readable
+	   (e.g., due to execute-only pages or holes) and the tools that read
+	   /proc/PID/maps should read the interesting bits from the gate-DSO file
+	   instead.  */
+	.vm_start = FIXADDR_USER_START,
+	.vm_end = FIXADDR_USER_END
+};
+
+# define gate_map()	&gate_vmarea
+#else
+# define gate_map()	NULL
+#endif
+
 static int show_map(struct seq_file *m, void *v)
 {
 	struct vm_area_struct *map = v;
@@ -100,12 +118,15 @@
 			map->vm_pgoff << PAGE_SHIFT,
 			MAJOR(dev), MINOR(dev), ino, &len);
 
-	if (map->vm_file) {
+	if (map->vm_file || map == gate_map()) {
 		len = 25 + sizeof(void*) * 6 - len;
 		if (len < 1)
 			len = 1;
 		seq_printf(m, "%*c", len, ' ');
-		seq_path(m, file->f_vfsmnt, file->f_dentry, " \t\n\\");
+		if (map == gate_map())
+			seq_printf (m, "%s", gate_dso_path);
+		else
+			seq_path(m, file->f_vfsmnt, file->f_dentry, " \t\n\\");
 	}
 	seq_putc(m, '\n');
 	return 0;
@@ -128,6 +149,8 @@
 	if (!map) {
 		up_read(&mm->mmap_sem);
 		mmput(mm);
+		if (l == -1)
+			map = gate_map();
 	}
 	return map;
 }
@@ -135,7 +158,7 @@
 static void m_stop(struct seq_file *m, void *v)
 {
 	struct vm_area_struct *map = v;
-	if (map) {
+	if (map && map != gate_map()) {
 		struct mm_struct *mm = map->vm_mm;
 		up_read(&mm->mmap_sem);
 		mmput(mm);
@@ -149,6 +172,8 @@
 	if (map->vm_next)
 		return map->vm_next;
 	m_stop(m, v);
+	if (map != gate_map())
+		return gate_map();
 	return NULL;
 }
 
===== include/linux/sysctl.h 1.53 vs edited =====
--- 1.53/include/linux/sysctl.h	Mon Nov 24 10:15:06 2003
+++ edited/include/linux/sysctl.h	Tue Nov 25 22:29:16 2003
@@ -127,6 +127,7 @@
 	KERN_PANIC_ON_OOPS=57,  /* int: whether we will panic on an oops */
 	KERN_HPPA_PWRSW=58,	/* int: hppa soft-power enable */
 	KERN_HPPA_UNALIGNED=59,	/* int: hppa unaligned-trap enable */
+	KERN_GATE_DSO=60,	/* string: path to gate DSO file */
 };
 
 
===== kernel/sysctl.c 1.52 vs edited =====
--- 1.52/kernel/sysctl.c	Thu Oct  9 16:25:29 2003
+++ edited/kernel/sysctl.c	Tue Nov 25 22:28:57 2003
@@ -37,6 +37,7 @@
 #include <linux/hugetlb.h>
 #include <linux/security.h>
 #include <linux/initrd.h>
+#include <asm/elf.h>
 #include <asm/uaccess.h>
 
 #ifdef CONFIG_ROOT_NFS
@@ -65,6 +66,9 @@
 static int maxolduid = 65535;
 static int minolduid;
 
+#ifdef AT_SYSINFO_EHDR
+extern char gate_dso_path[];
+#endif
 #ifdef CONFIG_KMOD
 extern char modprobe_path[];
 #endif
@@ -395,6 +399,17 @@
 		.strategy	= &sysctl_string,
 	},
 #endif
+#ifdef AT_SYSINFO_EHDR
+	{
+		.ctl_name	= KERN_GATE_DSO,
+		.procname	= "gate_dso",
+		.data		= &gate_dso_path,
+		.maxlen		= 256,
+		.mode		= 0644,
+		.proc_handler	= &proc_dostring,
+		.strategy	= &sysctl_string,
+	},
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 	{
 		.ctl_name	= KERN_SG_BIG_BUFF,

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] report user-readable fixmap area in /proc/PID/maps
  2003-10-13  1:49     ` Linus Torvalds
  2003-10-13  2:08       ` William Lee Irwin III
@ 2003-10-13  2:17       ` Andrew Morton
  1 sibling, 0 replies; 7+ messages in thread
From: Andrew Morton @ 2003-10-13  2:17 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: roland, mingo, linux-kernel

Linus Torvalds <torvalds@osdl.org> wrote:
>
> If you really want /proc/PID/maps to look right, add a new vm_area_struct,
>  see if you can allocate it as part of the "struct mm_struct" so that we
>  don't get yet another (unnecessary) allocation on fork time.

It could be done "on demand".  So get_user_pages() and the /proc code will
call the new add_fixmap_vma() on entry.  Hence the additional overhead is
only incurred when /proc/pid/maps is accessed, or get_user_pages() is
called.

It'll need a new flag in mm_struct.  mm_struct.swap_address can be
salvaged: it is no longer used.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] report user-readable fixmap area in /proc/PID/maps
  2003-10-13  1:49     ` Linus Torvalds
@ 2003-10-13  2:08       ` William Lee Irwin III
  2003-10-13  2:17       ` Andrew Morton
  1 sibling, 0 replies; 7+ messages in thread
From: William Lee Irwin III @ 2003-10-13  2:08 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Roland McGrath, Andrew Morton, mingo, linux-kernel

On Sun, Oct 12, 2003 at 06:49:56PM -0700, Linus Torvalds wrote:
> don't get yet another (unnecessary) allocation on fork time. I hate how
> fork()  has slowed down due to other issues (mainly rmap).

Lighter-weight data structure arrangments for ptov resolution than have
been available for some time, e.g. since 2.5.65 or so.


-- wli

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] report user-readable fixmap area in /proc/PID/maps
  2003-10-13  1:35   ` Roland McGrath
@ 2003-10-13  1:49     ` Linus Torvalds
  2003-10-13  2:08       ` William Lee Irwin III
  2003-10-13  2:17       ` Andrew Morton
  0 siblings, 2 replies; 7+ messages in thread
From: Linus Torvalds @ 2003-10-13  1:49 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Andrew Morton, mingo, linux-kernel


On Sun, 12 Oct 2003, Roland McGrath wrote:
> 
> I always assumed that people (i.e. Linus) wouldn't like it because of
> the overhead in memory and setup time for an extra vma that is identical
> in every process.  Given the constraint that the fixmap area is the last
> thing in the address space, I imagine that can be mitigated by some
> magic using a single shared fixmap_vma at the end of everybody's chain.

That would be a nice trick and works fine for the regular sorted list, but
it would be nasty for the rb-tree handling.

If you really want /proc/PID/maps to look right, add a new vm_area_struct,
see if you can allocate it as part of the "struct mm_struct" so that we
don't get yet another (unnecessary) allocation on fork time. I hate how
fork()  has slowed down due to other issues (mainly rmap).

Being _guaranteed_ to always have a "end marker" on the vma list would 
potentially actually simplify some of the code, but since this would be 
architecture-dependent, it wouldn't help right now. How ugly does the code 
end up being?

		Linus


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] report user-readable fixmap area in /proc/PID/maps
  2003-10-13  1:17 ` Andrew Morton
@ 2003-10-13  1:35   ` Roland McGrath
  2003-10-13  1:49     ` Linus Torvalds
  0 siblings, 1 reply; 7+ messages in thread
From: Roland McGrath @ 2003-10-13  1:35 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, mingo, linux-kernel

> This special-casing, and the special-casing in get_user_pages() would go
> away if each process had a real VMA for the fixmap area inserted into its
> VMA tree.

Agreed.

> Remind me again why we cannot do that?

I don't know any reason to think we cannot.  That's not the way it was done
when I first looked at fixmap issues, and I try not to rock the boat more
than necessary (really!).  I know that Ingo had some kernel versions that
used a normal vma for it (and randomized the location on each exec), so he
certainly managed it.  I always assumed that people (i.e. Linus) wouldn't
like it because of the overhead in memory and setup time for an extra vma
that is identical in every process.  Given the constraint that the fixmap
area is the last thing in the address space, I imagine that can be
mitigated by some magic using a single shared fixmap_vma at the end of
everybody's chain.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] report user-readable fixmap area in /proc/PID/maps
  2003-10-07  8:54 Roland McGrath
@ 2003-10-13  1:17 ` Andrew Morton
  2003-10-13  1:35   ` Roland McGrath
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2003-10-13  1:17 UTC (permalink / raw)
  To: Roland McGrath; +Cc: torvalds, mingo, linux-kernel

Roland McGrath <roland@redhat.com> wrote:
>
> This patch makes /proc/PID/maps report the range from FIXADDR_USER_START to
>  FIXADDR_USER_END as a final pseudo-vma.

This special-casing, and the special-casing in get_user_pages() would go
away if each process had a real VMA for the fixmap area inserted into its
VMA tree.

Remind me again why we cannot do that?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] report user-readable fixmap area in /proc/PID/maps
@ 2003-10-07  8:54 Roland McGrath
  2003-10-13  1:17 ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: Roland McGrath @ 2003-10-07  8:54 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds; +Cc: Ingo Molnar, Linux Kernel Mailing List

This patch makes /proc/PID/maps report the range from FIXADDR_USER_START to
FIXADDR_USER_END as a final pseudo-vma.  This is consistent with the notion
that reading /proc/PID/maps tells you about every page containing data that
the process can in fact access, and with things such as ptrace allowing
access to this memory.  Without this, userland tools that want to look at
all of a process's accessible pages need special-case knowledge about
things such as the vsyscall DSO page.  With this change, existing code that
iterates over the /proc/PID/maps lines will cover those pages like any other.
For example, this lets gdb's "gcore" command synthesize a core file from a
live process that contains the vsyscall DSO page as a real core dump would,
using its existing generic iterator code and no new special cases.

If this change goes in, the fixmap_vma variable could be shared with the
get_user_pages code that has an identical static variable.


Thanks,
Roland


Index: linux-2.6/fs/proc/task_mmu.c
===================================================================
RCS file: /home/cvs/linux-2.5/fs/proc/task_mmu.c,v
retrieving revision 1.6
diff -p -b -u -r1.6 task_mmu.c
--- linux-2.6/fs/proc/task_mmu.c 31 Aug 2003 19:31:55 -0000 1.6
+++ linux-2.6/fs/proc/task_mmu.c 7 Oct 2003 06:54:56 -0000
@@ -2,6 +2,7 @@
 #include <linux/hugetlb.h>
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
+#include <asm/pgtable.h>
 
 char *task_mem(struct mm_struct *mm, char *buffer)
 {
@@ -111,6 +112,16 @@ static int show_map(struct seq_file *m, 
 	return 0;
 }
 
+#ifdef FIXADDR_USER_START
+static struct vm_area_struct fixmap_vma = {
+	.vm_mm = NULL,
+	.vm_start = FIXADDR_USER_START,
+	.vm_end = FIXADDR_USER_END,
+	.vm_page_prot = PAGE_READONLY,
+	.vm_flags = VM_READ | VM_EXEC,
+};
+#endif
+
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
 	struct task_struct *task = m->private;
@@ -128,6 +139,10 @@ static void *m_start(struct seq_file *m,
 	if (!map) {
 		up_read(&mm->mmap_sem);
 		mmput(mm);
+#ifdef FIXADDR_USER_START
+		if (l == (loff_t) -1)
+			map = &fixmap_vma;
+#endif
 	}
 	return map;
 }
@@ -135,6 +150,10 @@ static void *m_start(struct seq_file *m,
 static void m_stop(struct seq_file *m, void *v)
 {
 	struct vm_area_struct *map = v;
+#ifdef FIXADDR_USER_START
+	if (map == &fixmap_vma)
+		return;
+#endif
 	if (map) {
 		struct mm_struct *mm = map->vm_mm;
 		up_read(&mm->mmap_sem);
@@ -149,6 +168,10 @@ static void *m_next(struct seq_file *m, 
 	if (map->vm_next)
 		return map->vm_next;
 	m_stop(m, v);
+#ifdef FIXADDR_USER_START
+	if (map != &fixmap_vma)
+		return &fixmap_vma;
+#endif
 	return NULL;
 }

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2003-11-26  6:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-11-26  6:49 [PATCH] report user-readable fixmap area in /proc/PID/maps David Mosberger
  -- strict thread matches above, loose matches on Subject: below --
2003-10-07  8:54 Roland McGrath
2003-10-13  1:17 ` Andrew Morton
2003-10-13  1:35   ` Roland McGrath
2003-10-13  1:49     ` Linus Torvalds
2003-10-13  2:08       ` William Lee Irwin III
2003-10-13  2:17       ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).