LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* Add support for userspace stacktraces in tracing/iter_ctrl [v2]
@ 2008-11-02 21:18 Török Edwin
  2008-11-02 21:18 ` [PATCH] Add support for userspace stacktraces in tracing/iter_ctrl Török Edwin
  0 siblings, 1 reply; 25+ messages in thread
From: Török Edwin @ 2008-11-02 21:18 UTC (permalink / raw)
  To: mingo; +Cc: srostedt, a.p.zijlstra, sandmann, linux-kernel, edwintorok


This patch series adds support for userstack tracing to ftrace.
I've tested it on x86_64.

Example usage:
mount -t debugfs nodev /sys/kernel/debug
cd /sys/kernel/debug/tracing
echo userstacktrace >iter_ctrl
echo sym-userobj >iter_ctrl
echo sched_switch >current_tracer
echo 1 >tracing_enabled
cat trace_pipe >/tmp/trace&
.... run application ...
echo 0 >tracing_enabled
cat /tmp/trace

		a.out-1623  [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
+x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]

If you just want the addresses don't use sym-userobj, but you should resolve the
address to an object before the process exits, otherwise you can't know which
object it belonged to, due to Address Space Layout Randomization (for libraries
at least).

To get meaningful results you'll need your app and
libs compiled with frame-pointers. This usually means rebuilding libc with
frame-pointers (your own app should have frame pointers by default, unless you
used -fomit-frame-pointer).

Another approach would be to use dwarf unwind info that works without frame
pointers (as userspace does it). There was a kernel/unwind.c around 2.6.19, but
it got removed, so I didn't look further at this possibility.

 arch/x86/kernel/stacktrace.c |   57 +++++++++++++++++
 Documentation/ftrace.txt   |   16 ++++
 kernel/trace/trace.c       |  142 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h       |   10 +++
 include/linux/stacktrace.h   |    8 ++

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH] Add support for userspace stacktraces in tracing/iter_ctrl
  2008-11-02 21:18 Add support for userspace stacktraces in tracing/iter_ctrl [v2] Török Edwin
@ 2008-11-02 21:18 ` Török Edwin
  2008-11-02 21:18   ` [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely Török Edwin
  0 siblings, 1 reply; 25+ messages in thread
From: Török Edwin @ 2008-11-02 21:18 UTC (permalink / raw)
  To: mingo; +Cc: srostedt, a.p.zijlstra, sandmann, linux-kernel, edwintorok

Usage example:
mount -t debugfs nodev /sys/kernel/debug
cd /sys/kernel/debug/tracing
echo userstacktrace >iter_ctrl
echo sched_switch >current_tracer
echo 1 >tracing_enabled
.... run application ...
echo 0 >tracing_enabled

Then read one of 'trace','latency_trace','trace_pipe'
It is recomended to compile your userspace programs with frame pointers
(at least glibc + the app you are tracing).

Signed-off-by: Török Edwin <edwintorok@gmail.com>
---
 Documentation/ftrace.txt     |    5 ++-
 arch/x86/kernel/stacktrace.c |   57 +++++++++++++++++++++++++
 include/linux/stacktrace.h   |    8 ++++
 kernel/trace/trace.c         |   93 ++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h         |    9 ++++
 5 files changed, 171 insertions(+), 1 deletions(-)

diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index ea5a827..b81618e 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -330,7 +330,7 @@ output. To see what is available, simply cat the file:
 
   cat /debug/tracing/iter_ctrl
   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
- noblock nostacktrace nosched-tree
+ noblock nostacktrace nosched-tree nouserstacktrace
 
 To disable one of the options, echo in the option prepended with "no".
 
@@ -384,6 +384,9 @@ Here are the available options:
 		When a trace is recorded, so is the stack of functions.
 		This allows for back traces of trace sites.
 
+  userstacktrace - This option changes the trace.
+		   It records a stacktrace of the current userspace thread.
+
   sched-tree - TBD (any users??)
 
 
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index d1d850a..000a965 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/stacktrace.h>
 
 static void save_stack_warning(void *data, char *msg)
@@ -90,3 +91,59 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
+
+struct stack_frame {
+	const void __user	*next_fp;
+	unsigned long		return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	int ret;
+
+	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+		return 0;
+
+	ret = 1;
+	pagefault_disable();
+	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+		ret = 0;
+	pagefault_enable();
+
+	return ret;
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+	/*
+	 * Trace user stack if we are not a kernel thread
+	 */
+	if (current->mm) {
+		const struct pt_regs *regs = task_pt_regs(current);
+		const void __user *fp = (const void __user *)regs->bp;
+
+		if (trace->nr_entries < trace->max_entries)
+			trace->entries[trace->nr_entries++] = regs->ip;
+
+		while (trace->nr_entries < trace->max_entries) {
+			struct stack_frame frame;
+			frame.next_fp = NULL;
+			frame.return_address = 0;
+			if (!copy_stack_frame(fp, &frame))
+				break;
+			if ((unsigned long)fp < regs->sp)
+				break;
+			if (frame.return_address)
+				trace->entries[trace->nr_entries++] =
+					frame.return_address;
+			if (fp == frame.next_fp)
+				break;
+			fp = frame.next_fp;
+		}
+	}
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 6b8e54a..46704a5 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -18,9 +18,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
+
+#ifdef CONFIG_X86
+extern void save_stack_trace_user(struct stack_trace *trace);
+#else
+# define save_stack_trace_user(trace)              do { } while (0)
+#endif
+
 #else
 # define save_stack_trace(trace)			do { } while (0)
 # define save_stack_trace_tsk(tsk, trace)		do { } while (0)
+# define save_stack_trace_user(trace)              do { } while (0)
 # define print_stack_trace(trace, spaces)		do { } while (0)
 #endif
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5b8c90d..bb965af 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -235,6 +235,7 @@ static const char *trace_options[] = {
 	"stacktrace",
 	"sched-tree",
 	"ftrace_printk",
+	"userstacktrace",
 	NULL
 };
 
@@ -762,6 +763,44 @@ void __trace_stack(struct trace_array *tr,
 	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
 }
 
+static void ftrace_trace_userstack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags, int pc)
+{
+	struct userstack_entry *entry;
+	struct stack_trace trace;
+	struct ring_buffer_event *event;
+	unsigned long irq_flags;
+
+	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+		return;
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type		= TRACE_USER_STACK;
+
+	memset(&entry->caller, 0, sizeof(entry->caller));
+
+	trace.nr_entries	= 0;
+	trace.max_entries	= FTRACE_STACK_ENTRIES;
+	trace.skip		= 0;
+	trace.entries		= entry->caller;
+
+	save_stack_trace_user(&trace);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+}
+
+void __trace_userstack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags)
+{
+	ftrace_trace_userstack(tr, data, flags, preempt_count());
+}
+
 static void
 ftrace_trace_special(void *__tr, void *__data,
 		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -785,6 +824,7 @@ ftrace_trace_special(void *__tr, void *__data,
 	entry->arg3			= arg3;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+	ftrace_trace_userstack(tr, data, irq_flags, pc);
 
 	trace_wake_up();
 }
@@ -823,6 +863,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_cpu	= task_cpu(next);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, flags, 5, pc);
+	ftrace_trace_userstack(tr, data, flags, pc);
 }
 
 void
@@ -852,6 +893,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_cpu			= task_cpu(wakee);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, flags, 6, pc);
+	ftrace_trace_userstack(tr, data, flags, pc);
 
 	trace_wake_up();
 }
@@ -1175,6 +1217,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 	return ret;
 }
 
+static int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+		unsigned long sym_flags)
+{
+	int ret = 1;
+	unsigned i;
+
+	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+		unsigned long ip = entry->caller[i];
+
+		if (ip == ULONG_MAX || !ret)
+			break;
+		if (i)
+			ret = trace_seq_puts(s, " <- ");
+		if (!ip) {
+			ret = trace_seq_puts(s, "??");
+			continue;
+		}
+		if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
+			ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+	}
+
+	return ret;
+}
+
 static void print_lat_help_header(struct seq_file *m)
 {
 	seq_puts(m, "#                  _------=> CPU#            \n");
@@ -1462,6 +1529,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	case TRACE_USER_STACK: {
+		struct userstack_entry *field;
+
+		trace_assign_type(field, entry);
+
+		seq_print_userip_objs(field, s, sym_flags);
+		if (entry->flags & TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
+	}
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
@@ -1598,6 +1675,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	case TRACE_USER_STACK: {
+		struct userstack_entry *field;
+
+		trace_assign_type(field, entry);
+
+		ret = seq_print_userip_objs(field, s, sym_flags);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		ret = trace_seq_putc(s, '\n');
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		break;
+	}
 	}
 	return TRACE_TYPE_HANDLED;
 }
@@ -1657,6 +1747,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
@@ -1745,6 +1836,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
@@ -1799,6 +1891,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad0..4c81642 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -22,6 +22,7 @@ enum trace_type {
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
 	TRACE_BOOT,
+	TRACE_USER_STACK,
 
 	__TRACE_LAST_TYPE
 };
@@ -38,6 +39,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			tgid;
 };
 
 /*
@@ -85,6 +87,11 @@ struct stack_entry {
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
+struct userstack_entry {
+	struct trace_entry	ent;
+	unsigned long		caller[FTRACE_STACK_ENTRIES];
+};
+
 /*
  * ftrace_printk entry:
  */
@@ -212,6 +219,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
 		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
+		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
@@ -415,6 +423,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_STACKTRACE		= 0x100,
 	TRACE_ITER_SCHED_TREE		= 0x200,
 	TRACE_ITER_PRINTK		= 0x400,
+	TRACE_ITER_USERSTACKTRACE       = 0x800
 };
 
 extern struct tracer nop_trace;
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-02 21:18 ` [PATCH] Add support for userspace stacktraces in tracing/iter_ctrl Török Edwin
@ 2008-11-02 21:18   ` Török Edwin
  2008-11-02 21:25     ` Al Viro
                       ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Török Edwin @ 2008-11-02 21:18 UTC (permalink / raw)
  To: mingo; +Cc: srostedt, a.p.zijlstra, sandmann, linux-kernel, edwintorok

Example usage:
mount -t debugfs nodev /sys/kernel/debug
cd /sys/kernel/debug/tracing
echo userstacktrace >iter_ctrl
echo sym-userobj >iter_ctrl
echo sched_switch >current_tracer
echo 1 >tracing_enabled
cat trace_pipe >/tmp/trace&
.... run application ...
echo 0 >tracing_enabled
cat /tmp/trace

You'll see stack entries like:
 /lib/libpthread-2.7.so[+0xd370]
You can convert them to function/line using:
 addr2line -fie /lib/libpthread-2.7.so 0xd370
Or
 addr2line -fie /usr/lib/debug/libpthread-2.7.so 0xd370

For non-PIC/PIE executables this won't work:
 a.out[+0x73b]
You need to run the following: addr2line -fie a.out 0x40073b
(where 0x400000 is the default load address of a.out)

Signed-off-by: Török Edwin <edwintorok@gmail.com>
---
 Documentation/ftrace.txt |   13 ++++++++++-
 kernel/trace/trace.c     |   51 +++++++++++++++++++++++++++++++++++++++++++++-
 kernel/trace/trace.h     |    3 +-
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index b81618e..c5670a9 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -330,7 +330,7 @@ output. To see what is available, simply cat the file:
 
   cat /debug/tracing/iter_ctrl
   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
- noblock nostacktrace nosched-tree nouserstacktrace
+ noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
 
 To disable one of the options, echo in the option prepended with "no".
 
@@ -387,6 +387,17 @@ Here are the available options:
   userstacktrace - This option changes the trace.
 		   It records a stacktrace of the current userspace thread.
 
+  sym-userobj - when user stacktrace are enabled, look up which object the
+		address belongs to, and print a relative address
+		This is especially useful when ASLR is on, otherwise you don't
+		get a chance to resolve the address to object/file/line after the app is no
+		longer running
+
+		The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
+
+		a.out-1623  [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
+x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
+
   sched-tree - TBD (any users??)
 
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bb965af..b5f4068 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -236,6 +236,7 @@ static const char *trace_options[] = {
 	"sched-tree",
 	"ftrace_printk",
 	"userstacktrace",
+	"sym-userobj",
 	NULL
 };
 
@@ -382,6 +383,20 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 	return trace_seq_putmem(s, hex, j);
 }
 
+static int
+trace_seq_path(struct trace_seq *s, struct path *path)
+{
+	int ret;
+	struct seq_file m;
+	m.count = s->len;
+	m.size = PAGE_SIZE;
+	m.buf = s->buffer;
+	ret = seq_path(&m, path, "\n");
+	if (ret)
+		s->len = m.count;
+	return ret;
+}
+
 static void
 trace_seq_reset(struct trace_seq *s)
 {
@@ -678,6 +693,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->tgid               	= (tsk) ? tsk->tgid : 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1221,11 +1237,26 @@ static int
 seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 		unsigned long sym_flags)
 {
+	struct mm_struct *mm = NULL;
 	int ret = 1;
 	unsigned i;
 
+	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+		struct task_struct *task;
+		/* we do the lookup on the thread group leader,
+		 * since individual threads might have already quit! */
+		rcu_read_lock();
+		task = find_task_by_vpid(entry->ent.tgid);
+		rcu_read_unlock();
+
+		if (task)
+			mm = get_task_mm(task);
+	}
+
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 		unsigned long ip = entry->caller[i];
+		unsigned long vmstart = 0;
+		struct file *file = NULL;
 
 		if (ip == ULONG_MAX || !ret)
 			break;
@@ -1235,10 +1266,25 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 			ret = trace_seq_puts(s, "??");
 			continue;
 		}
-		if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
+		if (mm) {
+			const struct vm_area_struct *vma = find_vma(mm, ip);
+			if (vma) {
+				file = vma->vm_file;
+				vmstart = vma->vm_start;
+			}
+		}
+		if (file) {
+			ret = trace_seq_path(s, &file->f_path);
+			if (ret)
+				ret = trace_seq_printf(s, "[+0x%lx]",
+						ip - vmstart);
+		}
+		if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
 			ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
 	}
 
+	if (mm)
+		mmput(mm);
 	return ret;
 }
 
@@ -3239,6 +3285,9 @@ void ftrace_dump(void)
 		atomic_inc(&global_trace.data[cpu]->disabled);
 	}
 
+	/* don't look at user memory in panic mode */
+	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
 
 	iter.tr = &global_trace;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4c81642..5f94aed 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -423,7 +423,8 @@ enum trace_iterator_flags {
 	TRACE_ITER_STACKTRACE		= 0x100,
 	TRACE_ITER_SCHED_TREE		= 0x200,
 	TRACE_ITER_PRINTK		= 0x400,
-	TRACE_ITER_USERSTACKTRACE       = 0x800
+	TRACE_ITER_USERSTACKTRACE       = 0x800,
+	TRACE_ITER_SYM_USEROBJ          = 0x1000
 };
 
 extern struct tracer nop_trace;
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-02 21:18   ` [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely Török Edwin
@ 2008-11-02 21:25     ` Al Viro
  2008-11-02 21:28       ` Török Edwin
  2008-11-03  7:47     ` Ingo Molnar
  2008-11-03 19:56     ` Peter Zijlstra
  2 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2008-11-02 21:25 UTC (permalink / raw)
  To: T??r??k Edwin; +Cc: mingo, srostedt, a.p.zijlstra, sandmann, linux-kernel

On Sun, Nov 02, 2008 at 11:18:14PM +0200, T??r??k Edwin wrote:
> +static int
> +trace_seq_path(struct trace_seq *s, struct path *path)
> +{
> +	int ret;
> +	struct seq_file m;
> +	m.count = s->len;
> +	m.size = PAGE_SIZE;
> +	m.buf = s->buffer;
> +	ret = seq_path(&m, path, "\n");
> +	if (ret)
> +		s->len = m.count;
> +	return ret;
> +}

NAK.  seq_path() is a blatantly wrong thing to use here.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-02 21:25     ` Al Viro
@ 2008-11-02 21:28       ` Török Edwin
  2008-11-02 21:40         ` Al Viro
  0 siblings, 1 reply; 25+ messages in thread
From: Török Edwin @ 2008-11-02 21:28 UTC (permalink / raw)
  To: Al Viro; +Cc: mingo, srostedt, a.p.zijlstra, sandmann, linux-kernel

On 2008-11-02 23:25, Al Viro wrote:
> On Sun, Nov 02, 2008 at 11:18:14PM +0200, T??r??k Edwin wrote:
>   
>> +static int
>> +trace_seq_path(struct trace_seq *s, struct path *path)
>> +{
>> +	int ret;
>> +	struct seq_file m;
>> +	m.count = s->len;
>> +	m.size = PAGE_SIZE;
>> +	m.buf = s->buffer;
>> +	ret = seq_path(&m, path, "\n");
>> +	if (ret)
>> +		s->len = m.count;
>> +	return ret;
>> +}
>>     
>
> NAK.  seq_path() is a blatantly wrong thing to use here.
>   

Are there any alternatives I could use?

This function is called when I do 'cat /sys/kernel/debug/trace', not
during tracing itself.

Best regards,
--Edwin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-02 21:28       ` Török Edwin
@ 2008-11-02 21:40         ` Al Viro
  2008-11-03  7:32           ` Ingo Molnar
  0 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2008-11-02 21:40 UTC (permalink / raw)
  To: T?r?k Edwin; +Cc: mingo, srostedt, a.p.zijlstra, sandmann, linux-kernel

On Sun, Nov 02, 2008 at 11:28:20PM +0200, T?r?k Edwin wrote:
> On 2008-11-02 23:25, Al Viro wrote:
> > On Sun, Nov 02, 2008 at 11:18:14PM +0200, T??r??k Edwin wrote:
> >   
> >> +static int
> >> +trace_seq_path(struct trace_seq *s, struct path *path)
> >> +{
> >> +	int ret;
> >> +	struct seq_file m;
> >> +	m.count = s->len;
> >> +	m.size = PAGE_SIZE;
> >> +	m.buf = s->buffer;
> >> +	ret = seq_path(&m, path, "\n");
> >> +	if (ret)
> >> +		s->len = m.count;
> >> +	return ret;
> >> +}
> >>     
> >
> > NAK.  seq_path() is a blatantly wrong thing to use here.
> >   
> 
> Are there any alternatives I could use?
> 
> This function is called when I do 'cat /sys/kernel/debug/trace', not
> during tracing itself.

_IF_ you want to get the mangled path, you want to take the guts of
seq_path() into a separate helper and use it - without any references
to struct seq_file since it's completely irrelevant there and you are
using it only as a vehicle for passing the stuff you care about through
the seq_path() interface.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-02 21:40         ` Al Viro
@ 2008-11-03  7:32           ` Ingo Molnar
  2008-11-04 18:26             ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Ingo Molnar @ 2008-11-03  7:32 UTC (permalink / raw)
  To: Al Viro; +Cc: T?r?k Edwin, srostedt, a.p.zijlstra, sandmann, linux-kernel


* Al Viro <viro@ZenIV.linux.org.uk> wrote:

> On Sun, Nov 02, 2008 at 11:28:20PM +0200, T?r?k Edwin wrote:
> > On 2008-11-02 23:25, Al Viro wrote:
> > > On Sun, Nov 02, 2008 at 11:18:14PM +0200, T??r??k Edwin wrote:
> > >   
> > >> +static int
> > >> +trace_seq_path(struct trace_seq *s, struct path *path)
> > >> +{
> > >> +	int ret;
> > >> +	struct seq_file m;
> > >> +	m.count = s->len;
> > >> +	m.size = PAGE_SIZE;
> > >> +	m.buf = s->buffer;
> > >> +	ret = seq_path(&m, path, "\n");
> > >> +	if (ret)
> > >> +		s->len = m.count;
> > >> +	return ret;
> > >> +}
> > >>     
> > >
> > > NAK.  seq_path() is a blatantly wrong thing to use here.
> > >   
> > 
> > Are there any alternatives I could use?
> > 
> > This function is called when I do 'cat /sys/kernel/debug/trace', not
> > during tracing itself.
> 
> _IF_ you want to get the mangled path, you want to take the guts of 
> seq_path() into a separate helper and use it - without any 
> references to struct seq_file since it's completely irrelevant there 
> and you are using it only as a vehicle for passing the stuff you 
> care about through the seq_path() interface.

could you please help out with such a helper? This is really about 
visualization, not to rely on it.

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-02 21:18   ` [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely Török Edwin
  2008-11-02 21:25     ` Al Viro
@ 2008-11-03  7:47     ` Ingo Molnar
  2008-11-03  8:16       ` Török Edwin
  2008-11-03 19:56     ` Peter Zijlstra
  2 siblings, 1 reply; 25+ messages in thread
From: Ingo Molnar @ 2008-11-03  7:47 UTC (permalink / raw)
  To: Török Edwin; +Cc: srostedt, a.p.zijlstra, sandmann, linux-kernel


* Török Edwin <edwintorok@gmail.com> wrote:

> +		struct task_struct *task;
> +		/* we do the lookup on the thread group leader,
> +		 * since individual threads might have already quit! */

please use the customary comment style:

  /*
   * Comment .....
   * ...... goes here:
   */

> -		if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
> +		if (mm) {
> +			const struct vm_area_struct *vma = find_vma(mm, ip);
> +			if (vma) {
> +				file = vma->vm_file;
> +				vmstart = vma->vm_start;
> +			}
> +		}
> +		if (file) {
> +			ret = trace_seq_path(s, &file->f_path);
> +			if (ret)
> +				ret = trace_seq_printf(s, "[+0x%lx]",
> +						ip - vmstart);
> +		}
> +		if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
>  			ret = trace_seq_printf(s, " <" IP_FMT ">", ip);

the function is getting a bit large - would it make sense to split out 
this section into a helper inline function?

another small nit: i cleaned up the subject line and the commit log 
message for you - see the two patches below - could you please keep it 
for future (v3) submissions of these patches? It's just small 
formatting changes.

Your patches are a nice feature we want to have nevertheless - to be 
able to see where a user-space app is running has been one of the 
historically weak points of kernel instrumentation.

	Ingo

-------------->
Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl
From: Török Edwin <edwintorok@gmail.com>
Date: Sun, 2 Nov 2008 23:18:13 +0200

Impact: add new (default-off) tracing visualization feature

Usage example:

 mount -t debugfs nodev /sys/kernel/debug
 cd /sys/kernel/debug/tracing
 echo userstacktrace >iter_ctrl
 echo sched_switch >current_tracer
 echo 1 >tracing_enabled
 .... run application ...
 echo 0 >tracing_enabled

Then read one of 'trace','latency_trace','trace_pipe'.

To get the best output you can compile your userspace programs with
frame pointers (at least glibc + the app you are tracing).

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/ftrace.txt     |    5 +-
 arch/x86/kernel/stacktrace.c |   57 ++++++++++++++++++++++++++
 include/linux/stacktrace.h   |    8 +++
 kernel/trace/trace.c         |   93 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h         |    9 ++++
 5 files changed, 171 insertions(+), 1 deletion(-)

Index: tip/Documentation/ftrace.txt
===================================================================
--- tip.orig/Documentation/ftrace.txt
+++ tip/Documentation/ftrace.txt
@@ -330,7 +330,7 @@ output. To see what is available, simply
 
   cat /debug/tracing/iter_ctrl
   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
- noblock nostacktrace nosched-tree
+ noblock nostacktrace nosched-tree nouserstacktrace
 
 To disable one of the options, echo in the option prepended with "no".
 
@@ -384,6 +384,9 @@ Here are the available options:
 		When a trace is recorded, so is the stack of functions.
 		This allows for back traces of trace sites.
 
+  userstacktrace - This option changes the trace.
+		   It records a stacktrace of the current userspace thread.
+
   sched-tree - TBD (any users??)
 
 
Index: tip/arch/x86/kernel/stacktrace.c
===================================================================
--- tip.orig/arch/x86/kernel/stacktrace.c
+++ tip/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/stacktrace.h>
 
 static void save_stack_warning(void *data, char *msg)
@@ -90,3 +91,59 @@ void save_stack_trace_tsk(struct task_st
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
+
+struct stack_frame {
+	const void __user	*next_fp;
+	unsigned long		return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	int ret;
+
+	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+		return 0;
+
+	ret = 1;
+	pagefault_disable();
+	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+		ret = 0;
+	pagefault_enable();
+
+	return ret;
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+	/*
+	 * Trace user stack if we are not a kernel thread
+	 */
+	if (current->mm) {
+		const struct pt_regs *regs = task_pt_regs(current);
+		const void __user *fp = (const void __user *)regs->bp;
+
+		if (trace->nr_entries < trace->max_entries)
+			trace->entries[trace->nr_entries++] = regs->ip;
+
+		while (trace->nr_entries < trace->max_entries) {
+			struct stack_frame frame;
+			frame.next_fp = NULL;
+			frame.return_address = 0;
+			if (!copy_stack_frame(fp, &frame))
+				break;
+			if ((unsigned long)fp < regs->sp)
+				break;
+			if (frame.return_address)
+				trace->entries[trace->nr_entries++] =
+					frame.return_address;
+			if (fp == frame.next_fp)
+				break;
+			fp = frame.next_fp;
+		}
+	}
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
Index: tip/include/linux/stacktrace.h
===================================================================
--- tip.orig/include/linux/stacktrace.h
+++ tip/include/linux/stacktrace.h
@@ -18,9 +18,17 @@ extern void save_stack_trace_tsk(struct 
 				struct stack_trace *trace);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
+
+#ifdef CONFIG_X86
+extern void save_stack_trace_user(struct stack_trace *trace);
+#else
+# define save_stack_trace_user(trace)              do { } while (0)
+#endif
+
 #else
 # define save_stack_trace(trace)			do { } while (0)
 # define save_stack_trace_tsk(tsk, trace)		do { } while (0)
+# define save_stack_trace_user(trace)              do { } while (0)
 # define print_stack_trace(trace, spaces)		do { } while (0)
 #endif
 
Index: tip/kernel/trace/trace.c
===================================================================
--- tip.orig/kernel/trace/trace.c
+++ tip/kernel/trace/trace.c
@@ -235,6 +235,7 @@ static const char *trace_options[] = {
 	"stacktrace",
 	"sched-tree",
 	"ftrace_printk",
+	"userstacktrace",
 	NULL
 };
 
@@ -762,6 +763,44 @@ void __trace_stack(struct trace_array *t
 	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
 }
 
+static void ftrace_trace_userstack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags, int pc)
+{
+	struct userstack_entry *entry;
+	struct stack_trace trace;
+	struct ring_buffer_event *event;
+	unsigned long irq_flags;
+
+	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+		return;
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type		= TRACE_USER_STACK;
+
+	memset(&entry->caller, 0, sizeof(entry->caller));
+
+	trace.nr_entries	= 0;
+	trace.max_entries	= FTRACE_STACK_ENTRIES;
+	trace.skip		= 0;
+	trace.entries		= entry->caller;
+
+	save_stack_trace_user(&trace);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+}
+
+void __trace_userstack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags)
+{
+	ftrace_trace_userstack(tr, data, flags, preempt_count());
+}
+
 static void
 ftrace_trace_special(void *__tr, void *__data,
 		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -785,6 +824,7 @@ ftrace_trace_special(void *__tr, void *_
 	entry->arg3			= arg3;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+	ftrace_trace_userstack(tr, data, irq_flags, pc);
 
 	trace_wake_up();
 }
@@ -823,6 +863,7 @@ tracing_sched_switch_trace(struct trace_
 	entry->next_cpu	= task_cpu(next);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, flags, 5, pc);
+	ftrace_trace_userstack(tr, data, flags, pc);
 }
 
 void
@@ -852,6 +893,7 @@ tracing_sched_wakeup_trace(struct trace_
 	entry->next_cpu			= task_cpu(wakee);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, flags, 6, pc);
+	ftrace_trace_userstack(tr, data, flags, pc);
 
 	trace_wake_up();
 }
@@ -1175,6 +1217,31 @@ seq_print_ip_sym(struct trace_seq *s, un
 	return ret;
 }
 
+static int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+		unsigned long sym_flags)
+{
+	int ret = 1;
+	unsigned i;
+
+	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+		unsigned long ip = entry->caller[i];
+
+		if (ip == ULONG_MAX || !ret)
+			break;
+		if (i)
+			ret = trace_seq_puts(s, " <- ");
+		if (!ip) {
+			ret = trace_seq_puts(s, "??");
+			continue;
+		}
+		if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
+			ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+	}
+
+	return ret;
+}
+
 static void print_lat_help_header(struct seq_file *m)
 {
 	seq_puts(m, "#                  _------=> CPU#            \n");
@@ -1462,6 +1529,16 @@ print_lat_fmt(struct trace_iterator *ite
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	case TRACE_USER_STACK: {
+		struct userstack_entry *field;
+
+		trace_assign_type(field, entry);
+
+		seq_print_userip_objs(field, s, sym_flags);
+		if (entry->flags & TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
+	}
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
@@ -1598,6 +1675,19 @@ static enum print_line_t print_trace_fmt
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	case TRACE_USER_STACK: {
+		struct userstack_entry *field;
+
+		trace_assign_type(field, entry);
+
+		ret = seq_print_userip_objs(field, s, sym_flags);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		ret = trace_seq_putc(s, '\n');
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		break;
+	}
 	}
 	return TRACE_TYPE_HANDLED;
 }
@@ -1657,6 +1747,7 @@ static enum print_line_t print_raw_fmt(s
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
@@ -1745,6 +1836,7 @@ static enum print_line_t print_hex_fmt(s
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
@@ -1799,6 +1891,7 @@ static enum print_line_t print_bin_fmt(s
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
Index: tip/kernel/trace/trace.h
===================================================================
--- tip.orig/kernel/trace/trace.h
+++ tip/kernel/trace/trace.h
@@ -22,6 +22,7 @@ enum trace_type {
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
 	TRACE_BOOT,
+	TRACE_USER_STACK,
 
 	__TRACE_LAST_TYPE
 };
@@ -38,6 +39,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			tgid;
 };
 
 /*
@@ -85,6 +87,11 @@ struct stack_entry {
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
+struct userstack_entry {
+	struct trace_entry	ent;
+	unsigned long		caller[FTRACE_STACK_ENTRIES];
+};
+
 /*
  * ftrace_printk entry:
  */
@@ -212,6 +219,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
 		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
+		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
@@ -415,6 +423,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_STACKTRACE		= 0x100,
 	TRACE_ITER_SCHED_TREE		= 0x200,
 	TRACE_ITER_PRINTK		= 0x400,
+	TRACE_ITER_USERSTACKTRACE       = 0x800
 };
 
 extern struct tracer nop_trace;

------------------->
Subject: tracing: identify which executable object the userspace address belongs to
From: Török Edwin <edwintorok@gmail.com>
Date: Sun, 2 Nov 2008 23:18:14 +0200

Impact: modify+improve the userstacktrace tracing visualization feature

Store thread group leader id, and use it to lookup the address in the
process's map. We could have looked up the address on thread's map,
but the thread might not exist by the time we are called. The process
might not exist either, but if you are reading trace_pipe, that is
unlikely.

Example usage:

 mount -t debugfs nodev /sys/kernel/debug
 cd /sys/kernel/debug/tracing
 echo userstacktrace >iter_ctrl
 echo sym-userobj >iter_ctrl
 echo sched_switch >current_tracer
 echo 1 >tracing_enabled
 cat trace_pipe >/tmp/trace&
 .... run application ...
 echo 0 >tracing_enabled
 cat /tmp/trace

You'll see stack entries like:

   /lib/libpthread-2.7.so[+0xd370]

You can convert them to function/line using:

   addr2line -fie /lib/libpthread-2.7.so 0xd370

Or:

   addr2line -fie /usr/lib/debug/libpthread-2.7.so 0xd370

For non-PIC/PIE executables this won't work:

   a.out[+0x73b]

You need to run the following: addr2line -fie a.out 0x40073b
(where 0x400000 is the default load address of a.out)

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/ftrace.txt |   13 +++++++++++
 kernel/trace/trace.c     |   51 ++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/trace/trace.h     |    3 +-
 3 files changed, 64 insertions(+), 3 deletions(-)

Index: tip/Documentation/ftrace.txt
===================================================================
--- tip.orig/Documentation/ftrace.txt
+++ tip/Documentation/ftrace.txt
@@ -330,7 +330,7 @@ output. To see what is available, simply
 
   cat /debug/tracing/iter_ctrl
   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
- noblock nostacktrace nosched-tree nouserstacktrace
+ noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
 
 To disable one of the options, echo in the option prepended with "no".
 
@@ -387,6 +387,17 @@ Here are the available options:
   userstacktrace - This option changes the trace.
 		   It records a stacktrace of the current userspace thread.
 
+  sym-userobj - when user stacktrace are enabled, look up which object the
+		address belongs to, and print a relative address
+		This is especially useful when ASLR is on, otherwise you don't
+		get a chance to resolve the address to object/file/line after the app is no
+		longer running
+
+		The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
+
+		a.out-1623  [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
+x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
+
   sched-tree - TBD (any users??)
 
 
Index: tip/kernel/trace/trace.c
===================================================================
--- tip.orig/kernel/trace/trace.c
+++ tip/kernel/trace/trace.c
@@ -236,6 +236,7 @@ static const char *trace_options[] = {
 	"sched-tree",
 	"ftrace_printk",
 	"userstacktrace",
+	"sym-userobj",
 	NULL
 };
 
@@ -382,6 +383,20 @@ trace_seq_putmem_hex(struct trace_seq *s
 	return trace_seq_putmem(s, hex, j);
 }
 
+static int
+trace_seq_path(struct trace_seq *s, struct path *path)
+{
+	int ret;
+	struct seq_file m;
+	m.count = s->len;
+	m.size = PAGE_SIZE;
+	m.buf = s->buffer;
+	ret = seq_path(&m, path, "\n");
+	if (ret)
+		s->len = m.count;
+	return ret;
+}
+
 static void
 trace_seq_reset(struct trace_seq *s)
 {
@@ -678,6 +693,7 @@ tracing_generic_entry_update(struct trac
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->tgid               	= (tsk) ? tsk->tgid : 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1221,11 +1237,26 @@ static int
 seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 		unsigned long sym_flags)
 {
+	struct mm_struct *mm = NULL;
 	int ret = 1;
 	unsigned i;
 
+	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+		struct task_struct *task;
+		/* we do the lookup on the thread group leader,
+		 * since individual threads might have already quit! */
+		rcu_read_lock();
+		task = find_task_by_vpid(entry->ent.tgid);
+		rcu_read_unlock();
+
+		if (task)
+			mm = get_task_mm(task);
+	}
+
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 		unsigned long ip = entry->caller[i];
+		unsigned long vmstart = 0;
+		struct file *file = NULL;
 
 		if (ip == ULONG_MAX || !ret)
 			break;
@@ -1235,10 +1266,25 @@ seq_print_userip_objs(const struct users
 			ret = trace_seq_puts(s, "??");
 			continue;
 		}
-		if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
+		if (mm) {
+			const struct vm_area_struct *vma = find_vma(mm, ip);
+			if (vma) {
+				file = vma->vm_file;
+				vmstart = vma->vm_start;
+			}
+		}
+		if (file) {
+			ret = trace_seq_path(s, &file->f_path);
+			if (ret)
+				ret = trace_seq_printf(s, "[+0x%lx]",
+						ip - vmstart);
+		}
+		if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
 			ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
 	}
 
+	if (mm)
+		mmput(mm);
 	return ret;
 }
 
@@ -3239,6 +3285,9 @@ void ftrace_dump(void)
 		atomic_inc(&global_trace.data[cpu]->disabled);
 	}
 
+	/* don't look at user memory in panic mode */
+	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
 
 	iter.tr = &global_trace;
Index: tip/kernel/trace/trace.h
===================================================================
--- tip.orig/kernel/trace/trace.h
+++ tip/kernel/trace/trace.h
@@ -423,7 +423,8 @@ enum trace_iterator_flags {
 	TRACE_ITER_STACKTRACE		= 0x100,
 	TRACE_ITER_SCHED_TREE		= 0x200,
 	TRACE_ITER_PRINTK		= 0x400,
-	TRACE_ITER_USERSTACKTRACE       = 0x800
+	TRACE_ITER_USERSTACKTRACE       = 0x800,
+	TRACE_ITER_SYM_USEROBJ          = 0x1000
 };
 
 extern struct tracer nop_trace;

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03  7:47     ` Ingo Molnar
@ 2008-11-03  8:16       ` Török Edwin
  2008-11-03  8:21         ` Ingo Molnar
  2008-11-03  8:29         ` Ingo Molnar
  0 siblings, 2 replies; 25+ messages in thread
From: Török Edwin @ 2008-11-03  8:16 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: srostedt, a.p.zijlstra, sandmann, linux-kernel

On 2008-11-03 09:47, Ingo Molnar wrote:
> * Török Edwin <edwintorok@gmail.com> wrote:
>
>   
>> +		struct task_struct *task;
>> +		/* we do the lookup on the thread group leader,
>> +		 * since individual threads might have already quit! */
>>     
>
> please use the customary comment style:
>
>   /*
>    * Comment .....
>    * ...... goes here:
>    */
>
>   

. Can checkpatch.pl warn me of situations like this?

>> -		if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
>> +		if (mm) {
>> +			const struct vm_area_struct *vma = find_vma(mm, ip);
>> +			if (vma) {
>> +				file = vma->vm_file;
>> +				vmstart = vma->vm_start;
>> +			}
>> +		}
>> +		if (file) {
>> +			ret = trace_seq_path(s, &file->f_path);
>> +			if (ret)
>> +				ret = trace_seq_printf(s, "[+0x%lx]",
>> +						ip - vmstart);
>> +		}
>> +		if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
>>  			ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
>>     
>
> the function is getting a bit large - would it make sense to split out 
> this section into a helper inline function?
>   

Yes, I'll do that.

> another small nit: i cleaned up the subject line and the commit log 
> message for you - see the two patches below - could you please keep it 
> for future (v3) submissions of these patches? It's just small 
> formatting changes.
>   

Ok, luckily git-rebase -i allows me to rewrite the log message too ;)

> Your patches are a nice feature we want to have nevertheless - to be 
> able to see where a user-space app is running has been one of the 
> historically weak points of kernel instrumentation.

Thanks.
It currently works for x86 only, but architecture porters can add
support for theirs quite easily, it just needs to modeled after how
oprofile does it for example.
BTW would it make sense to change oprofile and the sysprof tracer to use
save_stack_trace_user? It would eliminate some code duplication.

Would it make sense to add a script that post-processes the output to
scripts/tracing?
It would parse a trace log (from trace or latency_trace) and use
addr2line to resolve the address to source:line, and if successful
replace the relative address with that; and also
group identical stack traces together.

Best regards,
--Edwin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03  8:16       ` Török Edwin
@ 2008-11-03  8:21         ` Ingo Molnar
  2008-11-03  8:29         ` Ingo Molnar
  1 sibling, 0 replies; 25+ messages in thread
From: Ingo Molnar @ 2008-11-03  8:21 UTC (permalink / raw)
  To: Török Edwin, Andy Whitcroft
  Cc: srostedt, a.p.zijlstra, sandmann, linux-kernel


* Török Edwin <edwintorok@gmail.com> wrote:

> On 2008-11-03 09:47, Ingo Molnar wrote:
> > * Török Edwin <edwintorok@gmail.com> wrote:
> >
> >   
> >> +		struct task_struct *task;
> >> +		/* we do the lookup on the thread group leader,
> >> +		 * since individual threads might have already quit! */
> >>     
> >
> > please use the customary comment style:
> >
> >   /*
> >    * Comment .....
> >    * ...... goes here:
> >    */
> >
> >   
> 
> . Can checkpatch.pl warn me of situations like this?

Cc:-ed Andy - it would be a useful feature indeed. (since there's no 
hard CodingStyle rule for it, it could be a default-off helper)

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03  8:16       ` Török Edwin
  2008-11-03  8:21         ` Ingo Molnar
@ 2008-11-03  8:29         ` Ingo Molnar
  2008-11-03 13:57           ` Török Edwin
  2008-11-03 19:26           ` Mathieu Desnoyers
  1 sibling, 2 replies; 25+ messages in thread
From: Ingo Molnar @ 2008-11-03  8:29 UTC (permalink / raw)
  To: Török Edwin, Robert Richter, Mathieu Desnoyers
  Cc: srostedt, a.p.zijlstra, sandmann, linux-kernel


* Török Edwin <edwintorok@gmail.com> wrote:

> > Your patches are a nice feature we want to have nevertheless - to 
> > be able to see where a user-space app is running has been one of 
> > the historically weak points of kernel instrumentation.
> 
> Thanks.
> It currently works for x86 only, but architecture porters can add
> support for theirs quite easily, it just needs to modeled after how
> oprofile does it for example.
> BTW would it make sense to change oprofile and the sysprof tracer to use
> save_stack_trace_user? It would eliminate some code duplication.

that definitely sounds like the right direction. I've Cc:-ed Robert 
Richter, the Oprofile maintainer - please Cc: him to code that touches 
oprofile.

note that NMI interaction of user-space stackframe walkers can be a 
bit tricky: the basic problem is that if you fetch a user-space 
stackframe that can create a fault, and the IRET at the end of the 
fault handler will re-enable NMIs (violating the NMI code's 
assumptions).

there are patches on lkml written by Mathieu Desnoyers that solve this 
by changing all the fault path to use RET instead of IRET. It might 
make sense to dust them off - we carried them for a long time in -tip 
and they were robust. (they just never had any really strong 
justification and were rather complex - that changes now)

Mathieu, what do you think?

> Would it make sense to add a script that post-processes the output 
> to scripts/tracing?
>
> It would parse a trace log (from trace or latency_trace) and use 
> addr2line to resolve the address to source:line, and if successful 
> replace the relative address with that; and also group identical 
> stack traces together.

sure, please add it to scripts/tracing/.

The best approach would be if the kernel could output the best info by 
default - but that seems rather hard for addr2line functionality which 
involves debuginfo processing, etc.

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03  8:29         ` Ingo Molnar
@ 2008-11-03 13:57           ` Török Edwin
  2008-11-03 14:04             ` Ingo Molnar
  2008-11-03 19:26           ` Mathieu Desnoyers
  1 sibling, 1 reply; 25+ messages in thread
From: Török Edwin @ 2008-11-03 13:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Robert Richter, Mathieu Desnoyers, srostedt, a.p.zijlstra,
	sandmann, linux-kernel

On 2008-11-03 10:29, Ingo Molnar wrote:
> * Török Edwin <edwintorok@gmail.com> wrote:
>
>   
>>> Your patches are a nice feature we want to have nevertheless - to 
>>> be able to see where a user-space app is running has been one of 
>>> the historically weak points of kernel instrumentation.
>>>       
>> Thanks.
>> It currently works for x86 only, but architecture porters can add
>> support for theirs quite easily, it just needs to modeled after how
>> oprofile does it for example.
>> BTW would it make sense to change oprofile and the sysprof tracer to use
>> save_stack_trace_user? It would eliminate some code duplication.
>>     
>
> that definitely sounds like the right direction. I've Cc:-ed Robert 
> Richter, the Oprofile maintainer - please Cc: him to code that touches 
> oprofile.
>
> note that NMI interaction of user-space stackframe walkers can be a 
> bit tricky: the basic problem is that if you fetch a user-space 
> stackframe that can create a fault

The code in trace_sysprof.c (which I used as a base for the
save_stack_trace_user) disables pagefaults
before reading the stackframe from userspace. Does it avoid this problem
then?

Note that due to its use from ftrace, the userstack walker can be called
from the pagefault handler itself, and if it is
allowed to fault it could lead to some form of deadlock. Are the ftrace
functions protected from recursively reentering themselves?

> , and the IRET at the end of the 
> fault handler will re-enable NMIs (violating the NMI code's 
> assumptions).
>   

Is this already a problem with oprofile's user-stack walker?

> there are patches on lkml written by Mathieu Desnoyers that solve this 
> by changing all the fault path to use RET instead of IRET. It might 
> make sense to dust them off - we carried them for a long time in -tip 
> and they were robust. (they just never had any really strong 
> justification and were rather complex - that changes now)
>
> Mathieu, what do you think?
>
>   
>> Would it make sense to add a script that post-processes the output 
>> to scripts/tracing?
>>
>> It would parse a trace log (from trace or latency_trace) and use 
>> addr2line to resolve the address to source:line, and if successful 
>> replace the relative address with that; and also group identical 
>> stack traces together.
>>     
>
> sure, please add it to scripts/tracing/.
>   

Ok, will do so in v3.

> The best approach would be if the kernel could output the best info by 
> default

The kernel could do some grouping and counting (as latencytop does), but
I don't see where it would fit in frace's infrastructure.

I think ftrace's one entry per event is useful in many situations
(debugging, latency measurements), but if the events occur too frequently
it could produce too much data, and it would be more efficient to do
some counting/grouping of similar info in-kernel before outputting to
userspace.
Perhaps as a layer on top of ftrace? What do you think?

>  - but that seems rather hard for addr2line functionality which 
> involves debuginfo processing, etc.
>   

yes it would be an overkill to try to do that from the kernel, when it
is so easy to do from userspace ;)

Best regards,
--Edwin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03 13:57           ` Török Edwin
@ 2008-11-03 14:04             ` Ingo Molnar
  0 siblings, 0 replies; 25+ messages in thread
From: Ingo Molnar @ 2008-11-03 14:04 UTC (permalink / raw)
  To: Török Edwin
  Cc: Robert Richter, Mathieu Desnoyers, srostedt, a.p.zijlstra,
	sandmann, linux-kernel


* Török Edwin <edwintorok@gmail.com> wrote:

> > note that NMI interaction of user-space stackframe walkers can be 
> > a bit tricky: the basic problem is that if you fetch a user-space 
> > stackframe that can create a fault
> 
> The code in trace_sysprof.c (which I used as a base for the 
> save_stack_trace_user) disables pagefaults before reading the 
> stackframe from userspace. Does it avoid this problem then?

no, it does not solve it - because pagefault_disable is a "soft" 
mechanism which does not disable the faults themselves. (it only 
disables some of their effects)

> > , and the IRET at the end of the fault handler will re-enable NMIs 
> > (violating the NMI code's assumptions).
> 
> Is this already a problem with oprofile's user-stack walker?

yes, oprofile's code is buggy here too. And not enabled by default, 
and probably not used by many people. The bug would show up as 
mistakenly enabled NMIs from within NMIs - the kernel will _probably_ 
live but could lock up or overflow its stack, etc.

> > The best approach would be if the kernel could output the best 
> > info by default
> 
> The kernel could do some grouping and counting (as latencytop does), 
> but I don't see where it would fit in frace's infrastructure.
>
> I think ftrace's one entry per event is useful in many situations 
> (debugging, latency measurements), but if the events occur too 
> frequently it could produce too much data, and it would be more 
> efficient to do some counting/grouping of similar info in-kernel 
> before outputting to userspace. Perhaps as a layer on top of ftrace? 
> What do you think?

yes, histogram generation would be a natural 'view' of a tracer: have 
a look at Steve's likely()/unlikely() tracer that introduces that 
notion into ftrace.

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03  8:29         ` Ingo Molnar
  2008-11-03 13:57           ` Török Edwin
@ 2008-11-03 19:26           ` Mathieu Desnoyers
  1 sibling, 0 replies; 25+ messages in thread
From: Mathieu Desnoyers @ 2008-11-03 19:26 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Török Edwin, Robert Richter, srostedt, a.p.zijlstra,
	sandmann, linux-kernel

* Ingo Molnar (mingo@elte.hu) wrote:
> 
> * Török Edwin <edwintorok@gmail.com> wrote:
> 
> > > Your patches are a nice feature we want to have nevertheless - to 
> > > be able to see where a user-space app is running has been one of 
> > > the historically weak points of kernel instrumentation.
> > 
> > Thanks.
> > It currently works for x86 only, but architecture porters can add
> > support for theirs quite easily, it just needs to modeled after how
> > oprofile does it for example.
> > BTW would it make sense to change oprofile and the sysprof tracer to use
> > save_stack_trace_user? It would eliminate some code duplication.
> 
> that definitely sounds like the right direction. I've Cc:-ed Robert 
> Richter, the Oprofile maintainer - please Cc: him to code that touches 
> oprofile.
> 
> note that NMI interaction of user-space stackframe walkers can be a 
> bit tricky: the basic problem is that if you fetch a user-space 
> stackframe that can create a fault, and the IRET at the end of the 
> fault handler will re-enable NMIs (violating the NMI code's 
> assumptions).
> 
> there are patches on lkml written by Mathieu Desnoyers that solve this 
> by changing all the fault path to use RET instead of IRET. It might 
> make sense to dust them off - we carried them for a long time in -tip 
> and they were robust. (they just never had any really strong 
> justification and were rather complex - that changes now)
> 
> Mathieu, what do you think?
> 

Yep, using the NMI-safe traps seems like a good idea for this. I look
forward to add those userspace stack dumps in my LTTng traces. I've had
this feature in the past in LTTng and it was _really_ useful, e.g. to
know the whole userspace stack that caused a system call.

The patchset version I have in my -lttng tree is pretty much the same
you currently have in -tip. I have not ported my tree to 2.6.28-rcX yet
though.

For trap instrumentation, I think the sane way to deal with the
recursive trap problem is to keep a nesting count associated with
instrumentation within the trap handler, which would dynamically disable
this specific instrumentation (per-cpu given preemption is disabled
within the instrumentation) once it reaches a given nesting level. That
would permit to overcome the recursive trap problem without losing
nested events happening when, for example, a NMI nests over a standard
interrupt, which is, in this case, nested but not caused by recursion.

Also, we have to think carefully about how we want to access userspace.
a copy_from_user_inatomic(), which may fail if the data we try to access
is not in cache, seems like a sane approach to deal with such
instrumentation called in atomic context. But if we detect that the
instrumentation is called from preemptable context, then a
copy_from_user() could be ok.

Mathieu

> > Would it make sense to add a script that post-processes the output 
> > to scripts/tracing?
> >
> > It would parse a trace log (from trace or latency_trace) and use 
> > addr2line to resolve the address to source:line, and if successful 
> > replace the relative address with that; and also group identical 
> > stack traces together.
> 
> sure, please add it to scripts/tracing/.
> 
> The best approach would be if the kernel could output the best info by 
> default - but that seems rather hard for addr2line functionality which 
> involves debuginfo processing, etc.
> 
> 	Ingo

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-02 21:18   ` [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely Török Edwin
  2008-11-02 21:25     ` Al Viro
  2008-11-03  7:47     ` Ingo Molnar
@ 2008-11-03 19:56     ` Peter Zijlstra
  2008-11-03 19:58       ` Arnaldo Carvalho de Melo
  2 siblings, 1 reply; 25+ messages in thread
From: Peter Zijlstra @ 2008-11-03 19:56 UTC (permalink / raw)
  To: Török Edwin; +Cc: mingo, srostedt, sandmann, linux-kernel

fwiw, this is one amazing subject line.. ;-)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03 19:56     ` Peter Zijlstra
@ 2008-11-03 19:58       ` Arnaldo Carvalho de Melo
  2008-11-03 20:01         ` Török Edwin
  0 siblings, 1 reply; 25+ messages in thread
From: Arnaldo Carvalho de Melo @ 2008-11-03 19:58 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Török Edwin, mingo, srostedt, sandmann, linux-kernel

Em Mon, Nov 03, 2008 at 08:56:29PM +0100, Peter Zijlstra escreveu:
> fwiw, this is one amazing subject line.. ;-)

Too much for "subject says it all." 8)

- Arnaldo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03 19:58       ` Arnaldo Carvalho de Melo
@ 2008-11-03 20:01         ` Török Edwin
  0 siblings, 0 replies; 25+ messages in thread
From: Török Edwin @ 2008-11-03 20:01 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Peter Zijlstra, Török Edwin,
	mingo, srostedt, sandmann, linux-kernel

On 2008-11-03 21:58, Arnaldo Carvalho de Melo wrote:
> Em Mon, Nov 03, 2008 at 08:56:29PM +0100, Peter Zijlstra escreveu:
>   
>> fwiw, this is one amazing subject line.. ;-)
>>     
>
> Too much for "subject says it all." 8)

Yep, I've just learned that the first line in my git commit message will
be the subject line for the emails it sends out.

I'll use the shorter subject line suggested by Ingo next time ;)

Best regards,
--Edwin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-03  7:32           ` Ingo Molnar
@ 2008-11-04 18:26             ` Christoph Hellwig
  2008-11-04 18:37               ` Török Edwin
  2008-11-06  9:55               ` Ingo Molnar
  0 siblings, 2 replies; 25+ messages in thread
From: Christoph Hellwig @ 2008-11-04 18:26 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Al Viro, T?r?k Edwin, srostedt, a.p.zijlstra, sandmann, linux-kernel

On Mon, Nov 03, 2008 at 08:32:57AM +0100, Ingo Molnar wrote:
> could you please help out with such a helper? This is really about 
> visualization, not to rely on it.

Is this kindergarten?  Take a look at seq_path - what you want
is the combination of d_path + mangle_path and the combination of two
would be rather trivial.  In fact I'd say you don't even need the helper
but just make mangle_path global (and document it while you're at it)
and then use it.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-04 18:26             ` Christoph Hellwig
@ 2008-11-04 18:37               ` Török Edwin
  2008-11-04 19:09                 ` Christoph Hellwig
  2008-11-06  9:55               ` Ingo Molnar
  1 sibling, 1 reply; 25+ messages in thread
From: Török Edwin @ 2008-11-04 18:37 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Ingo Molnar, Al Viro, srostedt, a.p.zijlstra, sandmann, linux-kernel

On 2008-11-04 20:26, Christoph Hellwig wrote:
> On Mon, Nov 03, 2008 at 08:32:57AM +0100, Ingo Molnar wrote:
>   
>> could you please help out with such a helper? This is really about 
>> visualization, not to rely on it.
>>     
>
> Is this kindergarten?  Take a look at seq_path - what you want
> is the combination of d_path + mangle_path and the combination of two
> would be rather trivial.  In fact I'd say you don't even need the helper
> but just make mangle_path global (and document it while you're at it)
> and then use it.

Hmm, this is just for visualizing things in /sys/kernel/debug/, can't I
use something simpler than mangle_path, such as a simple while loop like
the one below?

+ static int
+trace_seq_path(struct trace_seq *s, struct path *path)
+{
+       char *p;
+
+       if (s->len >= (PAGE_SIZE - 1))
+               return 0;
+       p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+       if (!IS_ERR(p)) {
+               size_t i = s->len;
+               char c;
+               while ((c = *p++)) {
+                       if (c != '\n')
+                               s->buffer[i++] = c;
+               }
+               s->len = i;
+               return 1;
+       } else {
+               s->buffer[s->len++] = '?';
+               return 1;
+       }
+
+       return 0;
+}

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-04 18:37               ` Török Edwin
@ 2008-11-04 19:09                 ` Christoph Hellwig
  2008-11-04 19:10                   ` Török Edwin
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2008-11-04 19:09 UTC (permalink / raw)
  To: T?r?k Edwin
  Cc: Christoph Hellwig, Ingo Molnar, Al Viro, srostedt, a.p.zijlstra,
	sandmann, linux-kernel

On Tue, Nov 04, 2008 at 08:37:11PM +0200, T?r?k Edwin wrote:
> Hmm, this is just for visualizing things in /sys/kernel/debug/, can't I
> use something simpler than mangle_path, such as a simple while loop like
> the one below?

Sounds fine to me, although sharing one mangling-scheme for all pathname
outputs certainly does have benefits of less confusion for the user.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-04 19:09                 ` Christoph Hellwig
@ 2008-11-04 19:10                   ` Török Edwin
  0 siblings, 0 replies; 25+ messages in thread
From: Török Edwin @ 2008-11-04 19:10 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Ingo Molnar, Al Viro, srostedt, a.p.zijlstra, sandmann, linux-kernel

On 2008-11-04 21:09, Christoph Hellwig wrote:
> On Tue, Nov 04, 2008 at 08:37:11PM +0200, T?r?k Edwin wrote:
>   
>> Hmm, this is just for visualizing things in /sys/kernel/debug/, can't I
>> use something simpler than mangle_path, such as a simple while loop like
>> the one below?
>>     
>
> Sounds fine to me, although sharing one mangling-scheme for all pathname
> outputs certainly does have benefits of less confusion for the user.

Indeed. I'll do as you suggested: make mangle_path global and document
what it does.

Best regards,
--Edwin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-04 18:26             ` Christoph Hellwig
  2008-11-04 18:37               ` Török Edwin
@ 2008-11-06  9:55               ` Ingo Molnar
  2008-11-06 12:26                 ` Christoph Hellwig
  1 sibling, 1 reply; 25+ messages in thread
From: Ingo Molnar @ 2008-11-06  9:55 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, T?r?k Edwin, srostedt, a.p.zijlstra, sandmann, linux-kernel


* Christoph Hellwig <hch@infradead.org> wrote:

> On Mon, Nov 03, 2008 at 08:32:57AM +0100, Ingo Molnar wrote:
> > could you please help out with such a helper? This is really about 
> > visualization, not to rely on it.
> 
> Is this kindergarten? [...]

No, this is lkml where, if you decide to stand in the way of a patch, 
you are supposed to back up your NAK's with a path out of that NAK. 
Instead of forcing people into multiple email ping-pongs trying to 
figure out what exactly the objections mean.

Look at the time flow of the mails:

 |  Nov  2   22:18:21 2008        # submission
 |  Nov  2   22:25:16 2008        # +7 minutes, NAK with no comment 
 |
 |  [ ~5 unnecessary mails ]
 |
 |  Nov  4   20:09:13 20008       # +2 days, ACK

What was done was inefficent and unnecessary, it caused a hickup and 
delay of 2 days in the workflow - at the point of the NAK it was 
already known what the right solution is, it just was obscured behind 
a moody reply. Such obstruction adds up and it simply does not scale 
as a form of communication.

Nobody learned anything new compared to the case had the +7 minutes 
reply included this information already.

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-06  9:55               ` Ingo Molnar
@ 2008-11-06 12:26                 ` Christoph Hellwig
  2008-11-06 15:32                   ` Ingo Molnar
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2008-11-06 12:26 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Christoph Hellwig, Al Viro, T?r?k Edwin, srostedt, a.p.zijlstra,
	sandmann, linux-kernel

On Thu, Nov 06, 2008 at 10:55:47AM +0100, Ingo Molnar wrote:
> > On Mon, Nov 03, 2008 at 08:32:57AM +0100, Ingo Molnar wrote:
> > > could you please help out with such a helper? This is really about 
> > > visualization, not to rely on it.
> > 
> > Is this kindergarten? [...]
> 
> No, this is lkml where, if you decide to stand in the way of a patch, 
> you are supposed to back up your NAK's with a path out of that NAK. 
> Instead of forcing people into multiple email ping-pongs trying to 
> figure out what exactly the objections mean.

Even if that was true, which would be avery sad world I should be
easy enough for you or anyone to look at what seq_path does and how
to have a version that doesn't use the seq_file insterface.  Takes
about two minutes with most of the time spent on finding where seq_file
is implemented.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-06 12:26                 ` Christoph Hellwig
@ 2008-11-06 15:32                   ` Ingo Molnar
  2008-11-06 15:36                     ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Ingo Molnar @ 2008-11-06 15:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, T?r?k Edwin, srostedt, a.p.zijlstra, sandmann, linux-kernel


* Christoph Hellwig <hch@infradead.org> wrote:

> On Thu, Nov 06, 2008 at 10:55:47AM +0100, Ingo Molnar wrote:
> > > On Mon, Nov 03, 2008 at 08:32:57AM +0100, Ingo Molnar wrote:
> > > > could you please help out with such a helper? This is really about 
> > > > visualization, not to rely on it.
> > > 
> > > Is this kindergarten? [...]
> > 
> > No, this is lkml where, if you decide to stand in the way of a patch, 
> > you are supposed to back up your NAK's with a path out of that NAK. 
> > Instead of forcing people into multiple email ping-pongs trying to 
> > figure out what exactly the objections mean.
> 
> Even if that was true, which would be avery sad world I should be 
> easy enough for you or anyone to look at what seq_path does and how 
> to have a version that doesn't use the seq_file insterface.  Takes 
> about two minutes with most of the time spent on finding where 
> seq_file is implemented.

the timeline i quoted shows that it was a 2 days process, not 2 
minutes.

It's the basic principle of communication: be forgiving in what you 
receive and conservative in what you transmit. Plus if one extra 
sentence of seemingly redundant information increases the chances to 
save an email-roundtrip down the line then please do it to increase 
communication efficiency and reduce latency.

Just to show you an example, let me give you an analogous situation in 
scheduler talk, as a reply to someone from another field who tries to 
use an existing scheduler API the wrong way:

 "
   Hey, you loser, NAK. Sync wakeups are the wrong thing to do, why 
   dont you just disable buddy wakeups for a minute in sched.c??

   [ Only two minutes to write a patch. The solution is really
     obvious to me, and we are not in the kindergarten to explain
     everything to you. And besides, the longer i make it for you to
     figure it all out, the cooler i can feel about being an
     ueber-capable scheduler hacker ;-) ]
 "

do you want such a world? i dont ;-)

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely.
  2008-11-06 15:32                   ` Ingo Molnar
@ 2008-11-06 15:36                     ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2008-11-06 15:36 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Christoph Hellwig, Al Viro, T?r?k Edwin, srostedt, a.p.zijlstra,
	sandmann, linux-kernel

On Thu, Nov 06, 2008 at 04:32:38PM +0100, Ingo Molnar wrote:
> the timeline i quoted shows that it was a 2 days process, not 2 
> minutes.

Honestly I'm not sure what this timeline stuff should mean. People are
busy, and for many of us kernel hacking is only or at least outside
of very narrow areas a spare time activicty, so you really can't expect
answers in the next minute.  There might be a lot of real life stuff
that's a lot more important as it pais the bills and deals with family,
etc.

But that wasn't my point anyway.  Al told no to seq_path, and just
the contents of it without the interface that is tied to seq_files.
Doing so is trivial by looking at seq_path so I really don't understand
your odd reaction to it.


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2008-11-06 15:37 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-11-02 21:18 Add support for userspace stacktraces in tracing/iter_ctrl [v2] Török Edwin
2008-11-02 21:18 ` [PATCH] Add support for userspace stacktraces in tracing/iter_ctrl Török Edwin
2008-11-02 21:18   ` [PATCH] Identify which executable object the userspace address belongs to. Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely Török Edwin
2008-11-02 21:25     ` Al Viro
2008-11-02 21:28       ` Török Edwin
2008-11-02 21:40         ` Al Viro
2008-11-03  7:32           ` Ingo Molnar
2008-11-04 18:26             ` Christoph Hellwig
2008-11-04 18:37               ` Török Edwin
2008-11-04 19:09                 ` Christoph Hellwig
2008-11-04 19:10                   ` Török Edwin
2008-11-06  9:55               ` Ingo Molnar
2008-11-06 12:26                 ` Christoph Hellwig
2008-11-06 15:32                   ` Ingo Molnar
2008-11-06 15:36                     ` Christoph Hellwig
2008-11-03  7:47     ` Ingo Molnar
2008-11-03  8:16       ` Török Edwin
2008-11-03  8:21         ` Ingo Molnar
2008-11-03  8:29         ` Ingo Molnar
2008-11-03 13:57           ` Török Edwin
2008-11-03 14:04             ` Ingo Molnar
2008-11-03 19:26           ` Mathieu Desnoyers
2008-11-03 19:56     ` Peter Zijlstra
2008-11-03 19:58       ` Arnaldo Carvalho de Melo
2008-11-03 20:01         ` Török Edwin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).