LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Christoph Hellwig <hch@infradead.org>,
	Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
	Gregory Haskins <ghaskins@novell.com>,
	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>,
	Thomas Gleixner <tglx@linutronix.de>,
	Tim Bird <tim.bird@am.sony.com>, Sam Ravnborg <sam@ravnborg.org>,
	"Frank Ch. Eigler" <fche@redhat.com>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	John Stultz <johnstul@us.ibm.com>,
	Arjan van de Ven <arjan@infradead.org>,
	Steven Rostedt <srostedt@redhat.com>
Subject: [PATCH 13/22 -v7] Add tracing of context switches
Date: Tue, 29 Jan 2008 22:15:34 -0500	[thread overview]
Message-ID: <20080130031842.128505372@goodmis.org> (raw)
In-Reply-To: <20080130031521.258552785@goodmis.org>

[-- Attachment #1: trace-add-cmdline-switch.patch --]
[-- Type: text/plain, Size: 10854 bytes --]

This patch adds context switch tracing, of the format of:

                 _------=> CPU#            
                / _-----=> irqs-off        
               | / _----=> need-resched    
               || / _---=> hardirq/softirq 
               ||| / _--=> preempt-depth   
               |||| /                      
               |||||     delay             
   cmd     pid ||||| time  |   caller      
      \   /    |||||   \   |   /           
 swapper-0     1d..3  137us+:  0:140:R --> 2912:120
    sshd-2912  1d..3  216us+:  2912:120:S --> 0:140
 swapper-0     1d..3  261us+:  0:140:R --> 2912:120
    bash-2920  0d..3  267us+:  2920:120:S --> 0:140
    sshd-2912  1d..3  330us!:  2912:120:S --> 0:140
 swapper-0     1d..3 2389us+:  0:140:R --> 2847:120
yum-upda-2847  1d..3 2411us!:  2847:120:S --> 0:140
 swapper-0     0d..3 11089us+:  0:140:R --> 3139:120
gdm-bina-3139  0d..3 11113us!:  3139:120:S --> 0:140
 swapper-0     1d..3 102328us+:  0:140:R --> 2847:120
yum-upda-2847  1d..3 102348us!:  2847:120:S --> 0:140


 "sched_switch" is added to /debugfs/tracing/available_tracers

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 lib/tracing/Kconfig              |    9 ++
 lib/tracing/Makefile             |    1 
 lib/tracing/trace_sched_switch.c |  165 +++++++++++++++++++++++++++++++++++++++
 lib/tracing/tracer.c             |   43 ++++++++++
 lib/tracing/tracer.h             |   23 +++++
 5 files changed, 240 insertions(+), 1 deletion(-)

Index: linux-mcount.git/lib/tracing/Kconfig
===================================================================
--- linux-mcount.git.orig/lib/tracing/Kconfig	2008-01-29 18:06:25.000000000 -0500
+++ linux-mcount.git/lib/tracing/Kconfig	2008-01-29 18:08:06.000000000 -0500
@@ -23,3 +23,12 @@ config FUNCTION_TRACER
 	  insert a call to an architecture specific __mcount routine,
 	  that the debugging mechanism using this facility will hook by
 	  providing a set of inline routines.
+
+config CONTEXT_SWITCH_TRACER
+	bool "Trace process context switches"
+	depends on DEBUG_KERNEL
+	select TRACING
+	help
+	  This tracer hooks into the context switch and records
+	  all switching of tasks.
+
Index: linux-mcount.git/lib/tracing/Makefile
===================================================================
--- linux-mcount.git.orig/lib/tracing/Makefile	2008-01-29 18:06:25.000000000 -0500
+++ linux-mcount.git/lib/tracing/Makefile	2008-01-29 18:08:06.000000000 -0500
@@ -1,6 +1,7 @@
 obj-$(CONFIG_MCOUNT) += libmcount.o
 
 obj-$(CONFIG_TRACING) += tracer.o
+obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_function.o
 
 libmcount-y := mcount.o
Index: linux-mcount.git/lib/tracing/trace_sched_switch.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-mcount.git/lib/tracing/trace_sched_switch.c	2008-01-29 18:08:06.000000000 -0500
@@ -0,0 +1,165 @@
+/*
+ * trace context switch
+ *
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/marker.h>
+#include <linux/mcount.h>
+
+#include "tracer.h"
+
+static struct tracing_trace *tracer_trace;
+static int trace_enabled __read_mostly;
+static atomic_t sched_ref;
+int tracing_sched_switch_enabled __read_mostly;
+
+static notrace void sched_switch_callback(const struct marker *mdata,
+					  void *private_data,
+					  const char *format, ...)
+{
+	struct tracing_trace **p = mdata->private;
+	struct tracing_trace *tr = *p;
+	struct tracing_trace_cpu *data;
+	struct task_struct *prev;
+	struct task_struct *next;
+	unsigned long flags;
+	va_list ap;
+	int cpu;
+
+	if (!trace_enabled)
+		return;
+
+	va_start(ap, format);
+	prev = va_arg(ap, typeof(prev));
+	next = va_arg(ap, typeof(next));
+	va_end(ap);
+
+	raw_local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	atomic_inc(&data->disabled);
+
+	if (likely(atomic_read(&data->disabled) == 1))
+		tracing_sched_switch_trace(tr, data, prev, next, flags);
+
+	atomic_dec(&data->disabled);
+	raw_local_irq_restore(flags);
+}
+
+static notrace void sched_switch_reset(struct tracing_trace *tr)
+{
+	int cpu;
+
+	tr->time_start = now();
+
+	for_each_online_cpu(cpu)
+		tracing_reset(tr->data[cpu]);
+}
+
+static notrace void start_sched_trace(struct tracing_trace *tr)
+{
+	sched_switch_reset(tr);
+	trace_enabled = 1;
+	tracing_start_sched_switch();
+}
+
+static notrace void stop_sched_trace(struct tracing_trace *tr)
+{
+	tracing_stop_sched_switch();
+	trace_enabled = 0;
+}
+
+static notrace void sched_switch_trace_init(struct tracing_trace *tr)
+{
+	tracer_trace = tr;
+
+	if (tr->ctrl)
+		start_sched_trace(tr);
+}
+
+static notrace void sched_switch_trace_reset(struct tracing_trace *tr)
+{
+	if (tr->ctrl)
+		stop_sched_trace(tr);
+}
+
+static void sched_switch_trace_ctrl_update(struct tracing_trace *tr)
+{
+	/* When starting a new trace, reset the buffers */
+	if (tr->ctrl)
+		start_sched_trace(tr);
+	else
+		stop_sched_trace(tr);
+}
+
+static struct trace_types_struct sched_switch_trace __read_mostly =
+{
+	.name = "sched_switch",
+	.init = sched_switch_trace_init,
+	.reset = sched_switch_trace_reset,
+	.ctrl_update = sched_switch_trace_ctrl_update,
+};
+
+static int tracing_sched_arm(void)
+{
+	int ret;
+
+	ret = marker_arm("kernel_sched_schedule");
+	if (ret)
+		pr_info("sched trace: Couldn't arm probe switch_to\n");
+
+	return ret;
+}
+
+void tracing_start_sched_switch(void)
+{
+	long ref;
+
+	ref = atomic_inc_return(&sched_ref);
+	if (tracing_sched_switch_enabled && ref == 1)
+		tracing_sched_arm();
+}
+
+void tracing_stop_sched_switch(void)
+{
+	long ref;
+
+	ref = atomic_dec_return(&sched_ref);
+	if (tracing_sched_switch_enabled && ref)
+		marker_disarm("kernel_sched_schedule");
+}
+
+__init static int init_sched_switch_trace(void)
+{
+	int ret;
+
+	ret = register_trace(&sched_switch_trace);
+	if (ret)
+		return ret;
+
+	ret = marker_probe_register("kernel_sched_schedule",
+				    "prev %p next %p",
+				    sched_switch_callback,
+				    &tracer_trace);
+	if (ret) {
+		pr_info("sched trace: Couldn't add marker"
+			" probe to switch_to\n");
+		goto out;
+	}
+
+	if (atomic_read(&sched_ref))
+		ret = tracing_sched_arm();
+
+	tracing_sched_switch_enabled = 1;
+
+ out:
+	return ret;
+}
+
+device_initcall(init_sched_switch_trace);
Index: linux-mcount.git/lib/tracing/tracer.c
===================================================================
--- linux-mcount.git.orig/lib/tracing/tracer.c	2008-01-29 18:07:38.000000000 -0500
+++ linux-mcount.git/lib/tracing/tracer.c	2008-01-29 18:08:06.000000000 -0500
@@ -52,6 +52,7 @@ enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
 
 	TRACE_FN,
+	TRACE_CTX,
 
 	__TRACE_LAST_TYPE
 };
@@ -229,6 +230,24 @@ notrace void tracing_function_trace(stru
 	entry->fn.parent_ip = parent_ip;
 }
 
+notrace void tracing_sched_switch_trace(struct tracing_trace *tr,
+					struct tracing_trace_cpu *data,
+					struct task_struct *prev,
+					struct task_struct *next,
+					unsigned long flags)
+{
+	struct tracing_entry *entry;
+
+	entry = tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, flags);
+	entry->type		= TRACE_CTX;
+	entry->ctx.prev_pid	= prev->pid;
+	entry->ctx.prev_prio	= prev->prio;
+	entry->ctx.prev_state	= prev->state;
+	entry->ctx.next_pid	= next->pid;
+	entry->ctx.next_prio	= next->prio;
+}
+
 enum trace_iterator {
 	TRACE_ITER_SYM_ONLY	= 1,
 	TRACE_ITER_VERBOSE	= 2,
@@ -547,6 +566,8 @@ lat_print_timestamp(struct seq_file *m, 
 		seq_puts(m, " : ");
 }
 
+static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+
 static void notrace
 print_lat_fmt(struct seq_file *m, struct tracing_iterator *iter,
 	      unsigned int trace_idx, int cpu)
@@ -557,6 +578,7 @@ print_lat_fmt(struct seq_file *m, struct
 	unsigned long rel_usecs;
 	int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY);
 	int verbose = !!(trace_flags & TRACE_ITER_VERBOSE);
+	int S;
 
 	if (!next_entry)
 		next_entry = entry;
@@ -583,6 +605,16 @@ print_lat_fmt(struct seq_file *m, struct
 		seq_print_ip_sym(m, entry->fn.parent_ip, sym_only);
 		seq_puts(m, ")\n");
 		break;
+	case TRACE_CTX:
+		S = entry->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[entry->ctx.prev_state] : 'X';
+		seq_printf(m, " %d:%d:%c --> %d:%d\n",
+			   entry->ctx.prev_pid,
+			   entry->ctx.prev_prio,
+			   S,
+			   entry->ctx.next_pid,
+			   entry->ctx.next_prio);
+		break;
 	}
 }
 
@@ -594,6 +626,7 @@ static void notrace print_trace_fmt(stru
 	unsigned long secs;
 	int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY);
 	unsigned long long t;
+	int S;
 
 	t = cycles_to_usecs(entry->t);
 	usec_rem = do_div(t, 1000000ULL);
@@ -612,6 +645,16 @@ static void notrace print_trace_fmt(stru
 					 sym_only);
 		}
 		break;
+	case TRACE_CTX:
+		S = entry->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[entry->ctx.prev_state] : 'X';
+		seq_printf(m, " %d:%d:%c ==> %d:%d\n",
+			   entry->ctx.prev_pid,
+			   entry->ctx.prev_prio,
+			   S,
+			   entry->ctx.next_pid,
+			   entry->ctx.next_prio);
+		break;
 	}
 	seq_printf(m, "\n");
 }
Index: linux-mcount.git/lib/tracing/tracer.h
===================================================================
--- linux-mcount.git.orig/lib/tracing/tracer.h	2008-01-29 18:06:25.000000000 -0500
+++ linux-mcount.git/lib/tracing/tracer.h	2008-01-29 18:08:06.000000000 -0500
@@ -10,6 +10,14 @@ struct tracing_function {
 	unsigned long parent_ip;
 };
 
+struct tracing_sched_switch {
+	unsigned int prev_pid;
+	unsigned char prev_prio;
+	unsigned char prev_state;
+	unsigned int next_pid;
+	unsigned char next_prio;
+};
+
 struct tracing_entry {
 	char type;
 	char cpu;  /* who will want to trace more than 256 CPUS? */
@@ -18,7 +26,10 @@ struct tracing_entry {
 	int pid;
 	cycle_t t;
 	char comm[TASK_COMM_LEN];
-	struct tracing_function fn;
+	union {
+		struct tracing_function fn;
+		struct tracing_sched_switch ctx;
+	};
 };
 
 struct tracing_trace_cpu {
@@ -82,11 +93,21 @@ void tracing_function_trace(struct traci
 			    unsigned long ip,
 			    unsigned long parent_ip,
 			    unsigned long flags);
+void tracing_sched_switch_trace(struct tracing_trace *tr,
+				struct tracing_trace_cpu *data,
+				struct task_struct *prev,
+				struct task_struct *next,
+				unsigned long flags);
+
 
 void tracing_start_function_trace(void);
 void tracing_stop_function_trace(void);
 int register_trace(struct trace_types_struct *type);
 void unregister_trace(struct trace_types_struct *type);
+void tracing_start_sched_switch(void);
+void tracing_stop_sched_switch(void);
+
+extern int tracing_sched_switch_enabled;
 
 static inline notrace cycle_t now(void)
 {

-- 

  parent reply	other threads:[~2008-01-30  3:27 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-30  3:15 [PATCH 00/22 -v7] mcount and latency tracing utility -v7 Steven Rostedt
2008-01-30  3:15 ` [PATCH 01/22 -v7] printk - dont wakeup klogd with interrupts disabled Steven Rostedt
2008-01-30  3:15 ` [PATCH 02/22 -v7] Add basic support for gcc profiler instrumentation Steven Rostedt
2008-01-30  8:46   ` Peter Zijlstra
2008-01-30 13:08     ` Steven Rostedt
2008-01-30 14:09       ` Steven Rostedt
2008-01-30 14:25         ` Peter Zijlstra
2008-02-01 22:34           ` Paul E. McKenney
2008-02-02  1:56             ` Steven Rostedt
2008-02-02 21:41               ` Paul E. McKenney
2008-02-04 17:09                 ` Steven Rostedt
2008-02-04 21:40                   ` Paul E. McKenney
2008-02-04 22:03                     ` Steven Rostedt
2008-02-04 22:41                       ` Mathieu Desnoyers
2008-02-05  6:11                         ` Paul E. McKenney
2008-02-05  5:13                       ` Paul E. McKenney
2008-01-30 13:21   ` Jan Kiszka
2008-01-30 13:53     ` Steven Rostedt
2008-01-30 14:28       ` Steven Rostedt
2008-01-30  3:15 ` [PATCH 03/22 -v7] Annotate core code that should not be traced Steven Rostedt
2008-01-30  3:15 ` [PATCH 04/22 -v7] x86_64: notrace annotations Steven Rostedt
2008-01-30  3:15 ` [PATCH 05/22 -v7] add notrace annotations to vsyscall Steven Rostedt
2008-01-30  8:49   ` Peter Zijlstra
2008-01-30 13:15     ` Steven Rostedt
2008-01-30  3:15 ` [PATCH 06/22 -v7] handle accurate time keeping over long delays Steven Rostedt
2008-01-30  3:15 ` [PATCH 07/22 -v7] initialize the clock source to jiffies clock Steven Rostedt
2008-01-30  3:15 ` [PATCH 08/22 -v7] add get_monotonic_cycles Steven Rostedt
2008-01-30  3:15 ` [PATCH 09/22 -v7] add notrace annotations to timing events Steven Rostedt
2008-01-30  3:15 ` [PATCH 10/22 -v7] mcount based trace in the form of a header file library Steven Rostedt
2008-01-30  3:15 ` [PATCH 11/22 -v7] Add context switch marker to sched.c Steven Rostedt
2008-01-30  3:15 ` [PATCH 12/22 -v7] Make the task State char-string visible to all Steven Rostedt
2008-01-30  3:15 ` Steven Rostedt [this message]
2008-01-30  3:15 ` [PATCH 14/22 -v7] Generic command line storage Steven Rostedt
2008-01-30  3:15 ` [PATCH 15/22 -v7] trace generic call to schedule switch Steven Rostedt
2008-01-30  3:15 ` [PATCH 16/22 -v7] Add marker in try_to_wake_up Steven Rostedt
2008-01-30  3:15 ` [PATCH 17/22 -v7] mcount tracer for wakeup latency timings Steven Rostedt
2008-01-30  9:31   ` Peter Zijlstra
2008-01-30 13:18     ` Steven Rostedt
2008-01-30  3:15 ` [PATCH 18/22 -v7] Trace irq disabled critical timings Steven Rostedt
2008-01-30  3:15 ` [PATCH 19/22 -v7] trace preempt off " Steven Rostedt
2008-01-30  9:40   ` Peter Zijlstra
2008-01-30 13:40     ` Steven Rostedt
2008-01-30  3:15 ` [PATCH 20/22 -v7] Add markers to various events Steven Rostedt
2008-01-30  3:15 ` [PATCH 21/22 -v7] Add event tracer Steven Rostedt
2008-01-30  3:15 ` [PATCH 22/22 -v7] Critical latency timings histogram Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080130031842.128505372@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@ghostprotocols.net \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@infradead.org \
    --cc=fche@redhat.com \
    --cc=ghaskins@novell.com \
    --cc=hch@infradead.org \
    --cc=jan.kiszka@siemens.com \
    --cc=johnstul@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=mingo@elte.hu \
    --cc=sam@ravnborg.org \
    --cc=srostedt@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tim.bird@am.sony.com \
    --cc=torvalds@linux-foundation.org \
    --subject='Re: [PATCH 13/22 -v7] Add tracing of context switches' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).