LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [patch 0/4] Linux Kernel Markers for 2.6.23-rc6-mm1
@ 2007-09-18 21:13 Mathieu Desnoyers
  2007-09-18 21:13 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
                   ` (3 more replies)
  0 siblings, 4 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-18 21:13 UTC (permalink / raw)
  To: akpm, linux-kernel

Hi Andrew,

Here are the updated Linux Kernel Markers.

It depends on :
Text Edit Lock
Immediate Values
Sorted module list
Merge Kconfig instrumentation menu

It applies to 2.6.23-rc4-mm1, in this order:

linux-kernel-markers-architecture-independent-code.patch
linux-kernel-markers-instrumentation-menu.patch
linux-kernel-markers-documentation.patch
linux-kernel-markers-port-blktrace-to-markers.patch

You can find a tarball of this patch and all its dependencies at:
http://ltt.polymtl.ca/markers/markers-patches-for-2.6.23-rc6-mm1-18-09-2007.tar.bz2

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-18 21:13 [patch 0/4] Linux Kernel Markers for 2.6.23-rc6-mm1 Mathieu Desnoyers
@ 2007-09-18 21:13 ` Mathieu Desnoyers
  2007-09-19 11:37   ` Mathieu Desnoyers
  2007-09-21  0:58   ` Steven Rostedt
  2007-09-18 21:13 ` [patch 2/4] Linux Kernel Markers - Use instrumentation kconfig menu Mathieu Desnoyers
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-18 21:13 UTC (permalink / raw)
  To: akpm, linux-kernel
  Cc: Mathieu Desnoyers, Frank Ch. Eigler, Christoph Hellwig, Rusty Russell

[-- Attachment #1: linux-kernel-markers-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 29168 bytes --]

The marker activation functions sits in kernel/marker.c. A hash table is used
to keep track of the registered probes and armed markers, so the markers within
a newly loaded module that should be active can be activated at module load
time.

marker_query has been removed. marker_get_first, marker_get_next and
marker_release should be used as iterators on the markers.

Changelog:
- markers_mutex now nests inside module_mutex rather than the opposite.
- Iteration on modules is now done in module.c.
- module_mutex is not exported anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: "Frank Ch. Eigler" <fche@redhat.com>
CC: Christoph Hellwig <hch@infradead.org>
CC: Rusty Russell <rusty@rustcorp.com.au>
---

 include/asm-generic/vmlinux.lds.h |   11 
 include/linux/marker.h            |  175 ++++++++++
 include/linux/module.h            |   18 +
 kernel/marker.c                   |  608 ++++++++++++++++++++++++++++++++++++++
 kernel/module.c                   |   66 ++++
 5 files changed, 875 insertions(+), 3 deletions(-)

Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:18.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:31.000000000 -0400
@@ -12,7 +12,11 @@
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
-	*(.data.init.refok)
+	*(.data.init.refok)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___markers) = .;				\
+	*(__markers)							\
+	VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -129,6 +133,11 @@
 		VMLINUX_SYMBOL(__stop___immediate) = .;			\
 	}								\
 									\
+	/* Markers: strings */						\
+        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
+		*(__markers_strings)					\
+ 	}								\
+									\
 	/* Kernel symbol table: strings */				\
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
 		*(__ksymtab_strings)					\
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/marker.h	2007-09-17 12:43:54.000000000 -0400
@@ -0,0 +1,175 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/immediate.h>
+#include <linux/types.h>
+
+struct module;
+struct __mark_marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct __mark_marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...);
+
+struct __mark_marker {
+	const char *name;	/* Marker name */
+	const char *format;	/* Marker format string, describing the
+				 * variable argument list.
+				 */
+	const char *args;	/* List of arguments litteraly transformed
+				 * into a string: "arg1, arg2, arg3".
+				 */
+	DEFINE_IMMEDIATE(char, state);	/* Immediate value state. */
+	marker_probe_func *call;/* Probe handler function pointer */
+	void *pdata;		/* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Generic marker flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(generic, name, call_data, format, args...)		\
+	do {								\
+		static const char __mstrtab_name_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #name;						\
+		static const char __mstrtab_format_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= format;						\
+		static const char __mstrtab_args_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #args;						\
+		static struct __mark_marker __mark_##name		\
+		__attribute__((section("__markers"))) =			\
+		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		__mstrtab_args_##name, 0,				\
+		__mark_empty_function, NULL };				\
+		asm volatile ( "" : : "i" (&__mark_##name));		\
+		__mark_check_format(format, ## args);			\
+		if (!generic) {						\
+			if (unlikely(immediate_read(__mark_##name.state))) { \
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		} else {						\
+			if (unlikely(_immediate_read(__mark_##name.state))) { \
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		}							\
+	} while (0)
+
+extern void marker_update_probe_range(struct __mark_marker *begin,
+	struct __mark_marker *end, struct module *probe_module, int *refcount);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(generic, name, call_data, format, args...) \
+		__mark_check_format(format, ## args)
+static inline void marker_update_probe_range(struct __mark_marker *begin,
+	struct __mark_marker *end, struct module *probe_module, int *refcount)
+{ }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker using code patching
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using optimized code patching technique (immediate_read())
+ * to be enabled.
+ */
+#define trace_mark(name, format, args...) \
+	__trace_mark(0, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_immediate_read()) to be
+ * enabled. Should be used for markers in __init and __exit functions and in
+ * lockdep code.
+ */
+#define _trace_mark(name, format, args...) \
+	__trace_mark(1, name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __attribute__ ((format (printf, 1, 2)))
+	__mark_check_format(const char *fmt, ...) { }
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a marker.
+ * pdata must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+				marker_probe_func *probe, void *pdata);
+
+/*
+ * Returns the pdata given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered pdata.
+ */
+extern void *marker_probe_unregister_pdata(void *pdata);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+
+struct marker_iter {
+	struct module *module;
+	struct __mark_marker *marker;
+};
+
+extern void marker_iter_start(struct marker_iter *iter);
+extern void marker_iter_next(struct marker_iter *iter);
+extern void marker_iter_stop(struct marker_iter *iter);
+extern void marker_iter_reset(struct marker_iter *iter);
+extern void *marker_get_pdata(const char *name);
+extern int marker_get_iter_range(struct __mark_marker **marker,
+	struct __mark_marker *begin,
+	struct __mark_marker *end);
+
+#endif
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2007-09-14 10:11:18.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2007-09-14 10:11:31.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/immediate.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -376,6 +377,10 @@ struct module
 	const struct __immediate *immediate;
 	unsigned int num_immediate;
 #endif
+#ifdef CONFIG_MARKERS
+	struct __mark_marker *markers;
+	unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
@@ -482,6 +487,9 @@ extern void print_modules(void);
 extern void _module_immediate_update(void);
 extern void module_immediate_update(void);
 
+extern void module_update_markers(struct module *probe_module, int *refcount);
+extern int module_get_iter_markers(struct marker_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -589,6 +597,16 @@ static inline void module_immediate_upda
 {
 }
 
+static inline void module_update_markers(struct module *probe_module,
+		int *refcount)
+{
+}
+
+static inline int module_get_iter_markers(struct marker_iter *iter)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-09-14 10:11:30.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-09-14 10:11:31.000000000 -0400
@@ -1720,6 +1720,8 @@ static struct module *load_module(void _
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
 	unsigned int immediateindex;
+	unsigned int markersindex;
+	unsigned int markersstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1972,6 +1974,8 @@ static struct module *load_module(void _
 	mod->num_immediate =
 		sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
 #endif
+	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+ 	markersstringsindex = find_sec(hdr, sechdrs, secstrings, "__markers_strings");
 
 	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
 	if (unusedcrcindex)
@@ -2013,6 +2017,11 @@ static struct module *load_module(void _
 		if (err < 0)
 			goto cleanup;
 	}
+#ifdef CONFIG_MARKERS
+	mod->markers = (void *)sechdrs[markersindex].sh_addr;
+	mod->num_markers =
+		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+#endif
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2037,12 +2046,16 @@ static struct module *load_module(void _
 		 goto nomodsectinfo;
 #endif
 
+	if (!mod->taints) {
 #ifdef CONFIG_IMMEDIATE
-	if (!mod->taints)
 		immediate_update_range(mod->immediate,
 			mod->immediate + mod->num_immediate);
 #endif
-
+#ifdef CONFIG_MARKERS
+		marker_update_probe_range(mod->markers,
+			mod->markers + mod->num_markers, NULL, NULL);
+#endif
+	}
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2693,3 +2706,52 @@ void module_immediate_update(void)
 }
 EXPORT_SYMBOL_GPL(module_immediate_update);
 #endif
+
+#ifdef CONFIG_MARKERS
+void module_update_markers(struct module *probe_module, int *refcount)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			marker_update_probe_range(mod->markers,
+				mod->markers + mod->num_markers,
+				probe_module, refcount);
+	mutex_unlock(&module_mutex);
+}
+EXPORT_SYMBOL_GPL(module_update_markers);
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_markers(struct marker_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module)
+				continue;
+			else if (iter_mod > iter->module)
+				iter->marker = NULL;
+			found = marker_get_iter_range(&iter->marker,
+				iter_mod->markers,
+				iter_mod->markers + iter_mod->num_markers);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(module_get_iter_markers);
+#endif
Index: linux-2.6-lttng/kernel/marker.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/marker.c	2007-09-14 10:11:31.000000000 -0400
@@ -0,0 +1,608 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+#include <linux/immediate.h>
+
+extern struct __mark_marker __start___markers[];
+extern struct __mark_marker __stop___markers[];
+
+/*
+ * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * given module do we call synchronize_sched()). However, we need to make sure
+ * every critical region has ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+	struct hlist_node hlist;
+	char *format;
+	marker_probe_func *probe;
+	void *pdata;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	char name[0];	/* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct __mark_marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we make sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct __mark_marker *mdata,
+	void *private_data,
+	const char *fmt, ...)
+{ }
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *get_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int add_marker(const char *name,
+	const char *format, marker_probe_func *probe, void *pdata)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	size_t format_len = 0;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	if (format)
+		format_len = strlen(format) + 1;
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"Marker %s busy, probe %p already installed\n",
+				name, e->probe);
+			return -EBUSY;	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, name_len);
+	if (format) {
+		e->format = &e->name[name_len];
+		memcpy(e->format, format, format_len);
+		trace_mark(core_marker_format, "name %s format %s",
+				e->name, e->format);
+	} else
+		e->format = NULL;
+	e->probe = probe;
+	e->pdata = pdata;
+	e->refcount = 0;
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *remove_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	void *pdata = NULL;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		pdata = e->pdata;
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+	return pdata;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int marker_set_format(struct marker_entry **entry, const char *format)
+{
+	struct marker_entry *e;
+	size_t name_len = strlen((*entry)->name) + 1;
+	size_t format_len = strlen(format) + 1;
+
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], (*entry)->name, name_len);
+	e->format = &e->name[name_len];
+	memcpy(e->format, format, format_len);
+	e->probe = (*entry)->probe;
+	e->pdata = (*entry)->pdata;
+	e->refcount = (*entry)->refcount;
+	hlist_add_before(&e->hlist, &(*entry)->hlist);
+	hlist_del(&(*entry)->hlist);
+	kfree(*entry);
+	*entry = e;
+	trace_mark(core_marker_format, "name %s format %s",
+			e->name, e->format);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int set_marker(struct marker_entry **entry,
+			struct __mark_marker *elem)
+{
+	int ret;
+	BUG_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	if ((*entry)->format) {
+		if (strcmp((*entry)->format, elem->format) != 0) {
+			printk(KERN_NOTICE
+				"Format mismatch for probe %s "
+				"(%s), marker (%s)\n",
+				(*entry)->name,
+				(*entry)->format,
+				elem->format);
+			return -EPERM;
+		}
+	} else {
+		ret = marker_set_format(entry, elem->format);
+		if (ret)
+			return ret;
+	}
+	elem->call = (*entry)->probe;
+	elem->pdata = (*entry)->pdata;
+	_immediate_set(elem->state, 1);
+	return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void disable_marker(struct __mark_marker *elem)
+{
+	_immediate_set(elem->state, 0);
+	elem->call = __mark_empty_function;
+	/*
+	 * Leave the pdata and id there, because removal is racy and should be
+	 * done only after a synchronize_sched(). These are never used until
+	 * the next initialization anyway.
+	 */
+}
+
+/**
+ * marker_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ * @probe_module: module address of the probe being updated
+ * @refcount: number of references left to the given probe_module (out)
+ *
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+void marker_update_probe_range(
+	struct __mark_marker *begin,
+	struct __mark_marker *end,
+	struct module *probe_module,
+	int *refcount)
+{
+	struct __mark_marker *iter;
+	struct marker_entry *mark_entry;
+
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_marker(iter->name);
+		if (mark_entry && mark_entry->refcount) {
+			set_marker(&mark_entry, iter);
+			/*
+			 * ignore error, continue
+			 */
+			if (probe_module)
+				if (probe_module ==
+			__module_text_address((unsigned long)mark_entry->probe))
+					(*refcount)++;
+		} else {
+			disable_marker(iter);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(marker_update_probe_range);
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static inline void marker_update_probes(struct module *probe_module)
+{
+	int refcount = 0;
+
+	mutex_lock(&markers_mutex);
+	/* Core kernel markers */
+	marker_update_probe_range(__start___markers,
+			__stop___markers, probe_module, &refcount);
+	/* Markers in modules. */
+	module_update_markers(probe_module, &refcount);
+	if (probe_module && refcount == 0) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	mutex_unlock(&markers_mutex);
+}
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @pdata: probe private data
+ *
+ * pdata must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+			marker_probe_func *probe, void *pdata)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (entry && entry->refcount) {
+		ret = -EBUSY;
+		goto end;
+	}
+	if (deferred_sync) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	ret = add_marker(name, format, probe, pdata);
+	if (ret)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+	struct module *probe_module;
+	struct marker_entry *entry;
+	void *pdata;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = remove_marker(name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister_pdata -  Disconnect a probe from a marker
+ * @pdata: probe private data
+ *
+ * Unregister a marker by providing the registered pdata.
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_pdata(void *pdata)
+{
+	struct module *probe_module;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *entry;
+	int found = 0;
+	unsigned int i;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+		head = &marker_table[i];
+		hlist_for_each_entry(entry, node, head, hlist) {
+			if (entry->pdata == pdata) {
+				found = 1;
+				goto iter_end;
+			}
+		}
+	}
+iter_end:
+	if (!found) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = remove_marker(entry->name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_pdata);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only need to update probes when refcount passes from 0 to 1.
+	 */
+	if (entry->refcount++)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only permit decrement refcount if higher than 0.
+	 * Do probe update only on 1 -> 0 transition.
+	 */
+	if (entry->refcount) {
+		if (--entry->refcount)
+			goto end;
+	} else {
+		ret = -EPERM;
+		goto end;
+	}
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_pdata - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the pdata pointer, or an ERR_PTR.
+ * The pdata pointer should _only_ be dereferenced if the caller is the owner of
+ * the data, or its content could vanish. This is mostly used to confirm that a
+ * caller is the owner of a registered probe.
+ */
+void *marker_get_pdata(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+	int found = 0;
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			return e->pdata;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_pdata);
+
+/**
+ * marker_get_iter_range - Get a next marker iterator given a range.
+ * @marker: current markers (in), next marker (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next marker has been found (1) or not (0).
+ * Will return the first marker in the range if the input marker is NULL.
+ */
+int marker_get_iter_range(struct __mark_marker **marker,
+	struct __mark_marker *begin,
+	struct __mark_marker *end)
+{
+	int found = 0;
+
+	if (!*marker && begin != end) {
+		found = 1;
+		*marker = begin;
+	} else if (*marker >= begin && *marker < end) {
+		found = 1;
+		/*
+		 * *marker is known to be a valid marker from now on.
+		 */
+	}
+	return found;
+}
+EXPORT_SYMBOL_GPL(marker_get_iter_range);
+
+static inline void marker_get_iter(struct marker_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel markers */
+	if (!iter->module) {
+		found = marker_get_iter_range(&iter->marker,
+				__start___markers, __stop___markers);
+		if (found)
+			goto end;
+	}
+	/* Markers in modules. */
+	found = module_get_iter_markers(iter);
+end:
+	if (!found)
+		marker_iter_reset(iter);
+}
+
+void marker_iter_start(struct marker_iter *iter)
+{
+	mutex_lock(&markers_mutex);
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_start);
+
+void marker_iter_next(struct marker_iter *iter)
+{
+	iter->marker++;
+	/*
+	 * iter->marker may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the markers, getting the
+	 * markers from following modules if necessary.
+	 */
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_next);
+
+void marker_iter_stop(struct marker_iter *iter)
+{
+	mutex_unlock(&markers_mutex);
+}
+EXPORT_SYMBOL_GPL(marker_iter_stop);
+
+void marker_iter_reset(struct marker_iter *iter)
+{
+	iter->module = NULL;
+	iter->marker = NULL;
+}
+EXPORT_SYMBOL_GPL(marker_iter_reset);

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 2/4] Linux Kernel Markers - Use instrumentation kconfig menu
  2007-09-18 21:13 [patch 0/4] Linux Kernel Markers for 2.6.23-rc6-mm1 Mathieu Desnoyers
  2007-09-18 21:13 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
@ 2007-09-18 21:13 ` Mathieu Desnoyers
  2007-09-18 21:13 ` [patch 3/4] Linux Kernel Markers - Documentation Mathieu Desnoyers
  2007-09-18 21:13 ` [patch 4/4] Port of blktrace to the Linux Kernel Markers Mathieu Desnoyers
  3 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-18 21:13 UTC (permalink / raw)
  To: akpm, linux-kernel
  Cc: Mathieu Desnoyers, Frank Ch. Eigler, Christoph Hellwig, Adrian Bunk

[-- Attachment #1: linux-kernel-markers-instrumentation-menu.patch --]
[-- Type: text/plain, Size: 11226 bytes --]

With the increasing complexity of today's user-space application and the wide
deployment of SMP systems, the users need an increasing understanding of the
behavior and performance of a system across multiple processes/different
execution contexts/multiple CPUs.  In applications such as large clusters
(Google, IBM), video acquisition (Autodesk), embedded real-time systems (Wind
River, Monta Vista, Sony) or sysadmin/programmer-type tasks (SystemTAP from
Redhat), a tool that permits tracing of kernel-user space interaction becomes
necessary.

Usage of such tools have been made to successfully pinpoint problems such as:
latency issues in a user-space video acquisition application, slowdown
problems in large clusters due to a switch to a different filesystems with a
different cache size, abnormal Linux scheduler latency (just to name a few
that I have personally investigated).

The currently existing solutions does not give a system-wide overview of what
- and when - things are happening on the system.  Ptracing a program works
with few processes, but quickly becomes useless when it comes to keeping track
of many processes.

Bugs occuring because of bad interaction of such complex systems can be very
hard to find due to the fact that they occur rarely (sometimes once a week on
hundreds of machines).  One can therefore only hope at having the best
conditions to statistically reproduce the bug while extracting information
from the system.  Some bugs have been successfully found at Google using their
ktrace tracer only because they could enable it on production machines and
therefore recreate the same context where the bug happened.

Therefore, it makes sense to offer an instrumentation set of the most relevant
events occurring in the Linux that can have the smallest performance cost
possible when not active while not requiring a reboot of a production system
to activate.  This is essentially what the markers are providing.

Since we cannot limit the growth of the Linux kernel, nor can we pre-determine
each and every "interesting" instrumentation within each subsystem and driver,
it is sensible to let this task to the persons who knows the best their code. 
Adding instrumentation should therefore be as easy as adding and maintaining a
"printk" in the kernel code from the developer's point of view.

Towards a complete tracing mechanism in the Linux kernel, the markers are only
one step forward.  The following step is to connect probes to those markers
that will record the tracing information in buffers exported to user-space,
organized in timestamped "events".  Probe callbacks are responsible for
serializing the information passed as parameter to the markers (described by
the format string) into the events.  A control mechanism to activate/stop the
tracing is required, as well as a daemon that maps the buffers to write them
to disk or send them through the network.

Keeping track of the events also requires a centralized infrastructure : the
idea is to assign a unique ID to each event so they can be later recognized in
the trace.  Keeping in mind that recording the complete instrumentation site
name string for each event would be more that inefficient, assigning a numeric
unique identifier makes sense.

Finally, support for gathering events coming from user-space, with a minimal
performance impact, is very useful to see the interaction between the system's
execution contexts.

The last steps are currently implemented in Linux Trace Toolkit Next
Generation (LTTng).

The SystemTAP project could clearly benefit from such an infrastructure for
tracing.  In addition, they would be providing support for dynamic addition of
kernel probes through breakpoints/jumps when possible, with the associated
restrictions (accessing local variables, reentrancy, speed).




This marker infrastructure is a hook-callback mechanism.  It is meant to have
an impact as low as possible on the system performances when no callback
(probe) is connected so markers (hooks) can be compiled into a production
kernel without noticeable slowdown.

Why use the markers instead of kprobes?

The rationale behind this mechanism the following :

1 - It makes sense to have instrumentation (for tracing, profiling)
    within the kernel source tree so that it can follow its evolution.
    Other options, such as kprobes, imply maintaining an external set of
    instrumentation that must be adapted to each kernel version.
    Although it may make sense for distributions, it is not well suited
    for kernel developers, since they rarely work on a major
    distribution image.
2 - kprobes, although being a very good attempt at providing a dynamic
    hooking mechanism that has no impact when disabled, suffers from
    important limitations :
  a - It cannot access local variables of a function at a particular
      point within its body that will be consistent thorough the kernel
      versions without involving a lot of recurrent hair-pulling.
  b - Kprobes is slow, since it involves going though a trap each time
      a probe site is executed. Even though the djprobes project made a
      good effort to make things faster, it cannot currently instrument
      fully-preemptible kernels and does not solve (1), (2a) and (2c).
  c - On the reentrancy side, going though a trap (thus playing with
      interrupt enable/disable) and taking spinlocks are not suited to
      some code paths, i.e. :
      kernel/lockdep.c, printk (within the lockdep_on()/lockdep_off()).
      It must be understood that some code paths interesting for
      instrumentation often present a particular reentrancy challenge.

Some more details :

The probe callback connection to its markers is done dynamically.  A predicted
branch (see the immediate values infrastructure) is used to skip the hook stack
setup and function call when the marker is "disabled" (no probe is connected).
Further optimizations can be implemented for each architecture to make this
branch faster.

Instrumentation of a subsystem becomes therefore a straightforward task.  One
has to add instrumentation within the key locations of the kernel code in the
following form :

trace_mark(subsystem_event, "%d %p", myint, myptr);


Jim Keniston <jkenisto@us.ibm.com> adds:

kprobes remains a vital foundation for SystemTap.  But markers are attactive
as an alternate source of trace/debug info.  Here's why:

1. Markers will live in the kernel and presumably be kept up to date by
   the maintainers of the enclosing code.  We have a growing set of tapsets
   (probe libraries), each of which "knows" the source code for a certain area
   of the kernel.  Whenever the underlying kernel code changes (e.g., a
   function or one of its args disappears or is renamed), there's a chance
   that the tapset will become invalid until we bring it back in sync with the
   kernel.  As you can imagine, maintaining tapsets separate from the kernel
   source is a maintenance headache.  Markers could mitigate this.

2. Because the kernel code is highly optimized, the kernel's dwarf info
   doesn't always accurately reflect which variables have which values on
   which lines (sometimes even upon entry to a function).  A marker is a way
   to ensure that values of interest are available to SystemTap at marked
   points.

3. Sometimes the overhead of a kprobe probepoint is too much (either in
   terms of time or locking) for the particular hotspot we want to probe.


In OLS2006 proceedings, vol. 1
http://www.linuxsymposium.org/2006/linuxsymposium_procv1.pdf

Frank C. Eigler, from SystemTAP, presents its "static probing markers"
(pp. 261-268) in his paper "Problem Solving With Systemtap".

He explains the advantages :

"In exchange for this effort, systemtap marker-based probes are faster and
 more precise than kprobes.  The better precision comes from not having to
 covet the compiler's favours.  Such fickle favours include retaining
 clean boundaries in the instruction stream between interesting statements,
 and precisely describing positions of variables in the stack frame.  Since
 markers don't rely on debugging information, neither favour is required,
 and the compiler can channel its charms into unabated optimization.  The
 speed advantage comes from using direct call instructions rather than int 3
 breakpoints to dispatch to the systemtap handlers.  We will see below just
 how big a difference this makes."

He does a comparison of his "simple" marker solution with kprobes (his simple
solution looks like my generic markers, but with a major race condition).  I
also posted numbers about the markers performance impact a few months ago in
the initial thread.  I can dig into my emails to find them for you if you
consider it important for the Changelog.

He concludes with :

"To the extent that is true, we propose that these groups consider using a
 shared pool of static markers as the basic kernel-side instrumentation
 mechanism.  If they prove to have as low dormant cost and as high active
 performance as initial experience suggests, perhaps this could motivate the
 various tracing efforts and kernel subsystem developers to finally join
 forces.  Let's designate standard trace/probe points once and for all. 
 Tracing backends can attach to these markers the same way systemtap would. 
 There would be no need for them to maintain kernel patches any more. 
 Let's think about it."


This patch:

Add Kconfig menus for the marker code.

[bunk@stusta.de: Never ever select MODULES]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: "Frank Ch. Eigler" <fche@redhat.com>
CC: Christoph Hellwig <hch@infradead.org>
CC: Adrian Bunk <bunk@stusta.de>
---
 kernel/Kconfig.instrumentation |    6 ++++++
 kernel/Makefile                |    1 +
 2 files changed, 7 insertions(+)

Index: linux-2.6-lttng/kernel/Kconfig.instrumentation
===================================================================
--- linux-2.6-lttng.orig/kernel/Kconfig.instrumentation	2007-09-12 10:46:55.000000000 -0400
+++ linux-2.6-lttng/kernel/Kconfig.instrumentation	2007-09-12 10:48:45.000000000 -0400
@@ -37,4 +37,10 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config MARKERS
+	bool "Activate markers"
+	help
+	  Place an empty function call at each marker site. Can be
+	  dynamically changed for a probe function.
+
 endif # INSTRUMENTATION
Index: linux-2.6-lttng/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/kernel/Makefile	2007-09-12 10:51:52.000000000 -0400
+++ linux-2.6-lttng/kernel/Makefile	2007-09-12 10:52:07.000000000 -0400
@@ -62,6 +62,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayac
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
 obj-$(CONFIG_IMMEDIATE) += immediate.o
+obj-$(CONFIG_MARKERS) += marker.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 3/4] Linux Kernel Markers - Documentation
  2007-09-18 21:13 [patch 0/4] Linux Kernel Markers for 2.6.23-rc6-mm1 Mathieu Desnoyers
  2007-09-18 21:13 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
  2007-09-18 21:13 ` [patch 2/4] Linux Kernel Markers - Use instrumentation kconfig menu Mathieu Desnoyers
@ 2007-09-18 21:13 ` Mathieu Desnoyers
  2007-09-18 23:22   ` Randy Dunlap
  2007-09-18 21:13 ` [patch 4/4] Port of blktrace to the Linux Kernel Markers Mathieu Desnoyers
  3 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-18 21:13 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Frank Ch. Eigler, Christoph Hellwig

[-- Attachment #1: linux-kernel-markers-documentation.patch --]
[-- Type: text/plain, Size: 9442 bytes --]

Here is some documentation explaining what is/how to use the Linux
Kernel Markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: "Frank Ch. Eigler" <fche@redhat.com>
CC: Christoph Hellwig <hch@infradead.org>
---

 Documentation/markers/markers.txt          |   93 +++++++++++++++++++++++++++
 Documentation/markers/src/Makefile         |    7 ++
 Documentation/markers/src/marker-example.c |   55 ++++++++++++++++
 Documentation/markers/src/probe-example.c  |   98 +++++++++++++++++++++++++++++
 4 files changed, 253 insertions(+)

Index: linux-2.6-lttng/Documentation/markers/markers.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/Documentation/markers/markers.txt	2007-09-07 09:17:45.000000000 -0400
@@ -0,0 +1,93 @@
+ 	             Using the Linux Kernel Markers
+
+			    Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Markers and their use. It provides
+examples of how to insert markers in the kernel and connect probe functions to
+them and provides some examples of probe functions.
+
+
+* Purpose of markers
+
+A marker placed in code provides a hook to call a function (probe) that you can
+provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
+(no probe is attached). When a marker is "off" it has no effect, except for
+adding a tiny time penalty (checking a condition for a branch) and space
+penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section). The
+immediate values are used to minimize the impact on data cache, encoding the
+condition in the instruction stream. When a marker is "on", the function you
+provide is called each time the marker is executed, in the execution context of
+the caller. When the function provided ends its execution, it returns to the
+caller (continuing from the marker site).
+
+You can put markers at important locations in the code. Markers are
+lightweight hooks that can pass an arbitrary number of parameters,
+described in a printk-like format string, to the attached probe function.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+In order to use the macro trace_mark, you should include linux/marker.h.
+
+#include <linux/marker.h>
+
+And,
+
+trace_mark(subsystem_event, "%d %s", someint, somestring);
+Where :
+- subsystem_event is an identifier unique to your event
+    - subsystem is the name of your subsystem.
+    - event is the name of the event to mark.
+- "%d %s" is the formatted string for the serializer.
+- someint is an integer.
+- somestring is a char pointer.
+
+Connecting a function (probe) to a marker is done by providing a probe (function
+to call) for the specific marker through marker_probe_register() and can be
+activated by calling marker_arm(). Marker deactivation can be done by calling
+marker_disarm() as many times as marker_arm() has been called. Removing a probe
+is done through marker_probe_unregister(); it will disarm the probe and make
+sure there is no caller left using the probe when it returns. Probe removal is
+preempt-safe because preemption is disabled around the probe call. See the
+"Probe example" section below for a sample probe module.
+
+The marker mechanism supports inserting multiple instances of the same marker.
+Markers can be put in inline functions, inlined static functions, and
+unrolled loops.
+
+The naming scheme "subsystem_event" is suggested here as a convention intended
+to limit collisions. Marker names are global to the kernel: they are considered
+as being the same whether they are in the core kernel image or in modules.
+Conflicting format strings for markers with the same name will cause the markers
+to be detected to have a different format string not to be armed and will output
+a printk warning which identifies the inconsistency:
+
+"Format mismatch for probe probe_name (format), marker (format)"
+
+
+* Optimization for a given architecture
+
+One can implement optimized markers for a given architecture by replacing
+asm-$ARCH/marker.h.
+
+To force use of a non-optimized version of the markers, _trace_mark() should be
+used. It takes the same parameters as the normal markers, but it does not use
+the immediate values based on code patching.
+
+
+* Probe / marker example
+
+See the example provided in Documentation/markers/markers/src
+
+Run, as root :
+
+make
+insmod marker-example.ko (insmod order is not important)
+insmod probe-example.ko
+cat /proc/marker-example (returns an expected error)
+rmmod marker-example probe-example
+dmesg
Index: linux-2.6-lttng/Documentation/markers/src/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/Documentation/markers/src/Makefile	2007-09-07 09:14:38.000000000 -0400
@@ -0,0 +1,7 @@
+obj-m := probe-example.o marker-example.o
+KDIR := /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+default:
+	$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules
+clean:
+	rm -f *.mod.c *.ko *.o
Index: linux-2.6-lttng/Documentation/markers/src/marker-example.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/Documentation/markers/src/marker-example.c	2007-09-07 09:16:27.000000000 -0400
@@ -0,0 +1,55 @@
+/* marker-example.c
+ *
+ * Executes a marker when /proc/marker-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+
+struct proc_dir_entry *pentry_example = NULL;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+	int i;
+
+	trace_mark(subsystem_event, "%d %s", 123, "example string");
+	for (i=0; i<10; i++) {
+		trace_mark(subsystem_eventb, MARK_NOARGS);
+	}
+	return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+	.open = my_open,
+};
+
+static int example_init(void)
+{
+	printk(KERN_ALERT "example init\n");
+	pentry_example = create_proc_entry("marker-example", 0444, NULL);
+	if (pentry_example)
+		pentry_example->proc_fops = &mark_ops;
+	else
+		return -EPERM;
+	return 0;
+}
+
+static void example_exit(void)
+{
+	printk(KERN_ALERT "example exit\n");
+	remove_proc_entry("marker-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Marker example");
Index: linux-2.6-lttng/Documentation/markers/src/probe-example.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/Documentation/markers/src/probe-example.c	2007-09-07 09:18:55.000000000 -0400
@@ -0,0 +1,98 @@
+/* probe-example.c
+ *
+ * Connects two functions to marker call sites.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <asm/atomic.h>
+
+struct probe_data {
+	const char *name;
+	const char *format;
+	marker_probe_func *probe_func;
+};
+
+void probe_subsystem_event(const struct __mark_marker *mdata,
+		void *private, const char *format, ...)
+{
+	va_list ap;
+	/* Declare args */
+	unsigned int value;
+	const char *mystr;
+
+	/* Assign args */
+	va_start(ap, format);
+	value = va_arg(ap, typeof(value));
+	mystr = va_arg(ap, typeof(mystr));
+
+	/* Call printk */
+	printk("Value %u, string %s\n", value, mystr);
+
+	/* or count, check rights, serialize data in a buffer */
+
+	va_end(ap);
+}
+
+atomic_t eventb_count = ATOMIC_INIT(0);
+
+void probe_subsystem_eventb(const struct __mark_marker *mdata,
+	void *private, const char *format, ...)
+{
+	/* Increment counter */
+	atomic_inc(&eventb_count);
+}
+
+static struct probe_data probe_array[] =
+{
+	{	.name = "subsystem_event",
+		.format = "%d %s",
+		.probe_func = probe_subsystem_event },
+	{	.name = "subsystem_eventb",
+		.format = MARK_NOARGS,
+		.probe_func = probe_subsystem_eventb },
+};
+
+static int __init probe_init(void)
+{
+	int result;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+		result = marker_probe_register(probe_array[i].name,
+				probe_array[i].format,
+				probe_array[i].probe_func, &probe_array[i]);
+		if (result)
+			printk(KERN_INFO "Unable to register probe %s\n",
+				probe_array[i].name);
+		result = marker_arm(probe_array[i].name);
+		if (result)
+			printk(KERN_INFO "Unable to arm probe %s\n",
+				probe_array[i].name);
+	}
+	return 0;
+}
+
+static void __exit probe_fini(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+		marker_probe_unregister(probe_array[i].name);
+	}
+	printk("Number of event b : %u\n", atomic_read(&eventb_count));
+}
+
+module_init(probe_init);
+module_exit(probe_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("SUBSYSTEM Probe");

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 4/4] Port of blktrace to the Linux Kernel Markers.
  2007-09-18 21:13 [patch 0/4] Linux Kernel Markers for 2.6.23-rc6-mm1 Mathieu Desnoyers
                   ` (2 preceding siblings ...)
  2007-09-18 21:13 ` [patch 3/4] Linux Kernel Markers - Documentation Mathieu Desnoyers
@ 2007-09-18 21:13 ` Mathieu Desnoyers
  2007-09-21  1:03   ` Steven Rostedt
  3 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-18 21:13 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Frank Ch. Eigler, Jens Axboe

[-- Attachment #1: linux-kernel-markers-port-blktrace-to-markers.patch --]
[-- Type: text/plain, Size: 27084 bytes --]

Here is the first stage of a port of blktrace to the Linux Kernel Markers. The
advantage of this port is that it minimizes the impact on the running when
blktrace is not active.

A few remarks : this patch has the positive effect of removing some code
from the block io tracing hot paths, minimizing the i-cache impact in a
system where the io tracing is compiled in but inactive.

It also moves the blk tracing code from a header (and therefore from the
body of the instrumented functions) to a separate C file.

There, as soon as one device has to be traced, all devices have to
execute the tracing function call when they pass by the instrumentation site.
This is slower than the previous inline function which tested the condition
quickly.

It does not make the code smaller, since I left all the specialized
tracing functions for requests, bio, generic, remap, which would go away
once a generic infrastructure is in place to serialize the information
passed to the marker. This is mostly why I consider it as a step towards the
full improvements that could bring the markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: "Frank Ch. Eigler" <fche@redhat.com>
CC: Jens Axboe <jens.axboe@oracle.com>
---

 block/Kconfig                |    1 
 block/blktrace.c             |  343 ++++++++++++++++++++++++++++++++++++++++++-
 block/elevator.c             |    6 
 block/ll_rw_blk.c            |   35 ++--
 drivers/block/cciss.c        |    4 
 drivers/md/dm.c              |   14 -
 fs/bio.c                     |    6 
 include/linux/blktrace_api.h |  145 +-----------------
 mm/bounce.c                  |    4 
 mm/highmem.c                 |    2 
 10 files changed, 388 insertions(+), 172 deletions(-)

Index: linux-2.6-lttng/block/elevator.c
===================================================================
--- linux-2.6-lttng.orig/block/elevator.c	2007-09-18 10:08:11.000000000 -0400
+++ linux-2.6-lttng/block/elevator.c	2007-09-18 13:18:26.000000000 -0400
@@ -32,7 +32,7 @@
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
-#include <linux/blktrace_api.h>
+#include <linux/marker.h>
 #include <linux/hash.h>
 
 #include <asm/uaccess.h>
@@ -548,7 +548,7 @@ void elv_insert(struct request_queue *q,
 	unsigned ordseq;
 	int unplug_it = 1;
 
-	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+	trace_mark(blk_request_insert, "%p %p", q, rq);
 
 	rq->q = q;
 
@@ -735,7 +735,7 @@ struct request *elv_next_request(struct 
 			 * not be passed by new incoming requests
 			 */
 			rq->cmd_flags |= REQ_STARTED;
-			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+			trace_mark(blk_request_issue, "%p %p", q, rq);
 		}
 
 		if (!q->boundary_rq || q->boundary_rq == rq) {
Index: linux-2.6-lttng/block/ll_rw_blk.c
===================================================================
--- linux-2.6-lttng.orig/block/ll_rw_blk.c	2007-09-18 10:09:51.000000000 -0400
+++ linux-2.6-lttng/block/ll_rw_blk.c	2007-09-18 13:18:26.000000000 -0400
@@ -28,6 +28,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
+#include <linux/marker.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
 #include <linux/scatterlist.h>
@@ -1570,7 +1571,7 @@ void blk_plug_device(struct request_queu
 
 	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
-		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
+		trace_mark(blk_plug_device, "%p %p %d", q, NULL, 0);
 	}
 }
 
@@ -1636,7 +1637,7 @@ static void blk_backing_dev_unplug(struc
 	 * devices don't necessarily have an ->unplug_fn defined
 	 */
 	if (q->unplug_fn) {
-		blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+		trace_mark(blk_pdu_unplug_io, "%p %p %d", q, NULL,
 					q->rq.count[READ] + q->rq.count[WRITE]);
 
 		q->unplug_fn(q);
@@ -1648,7 +1649,7 @@ static void blk_unplug_work(struct work_
 	struct request_queue *q =
 		container_of(work, struct request_queue, unplug_work);
 
-	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+	trace_mark(blk_pdu_unplug_io, "%p %p %d", q, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
 
 	q->unplug_fn(q);
@@ -1658,7 +1659,7 @@ static void blk_unplug_timeout(unsigned 
 {
 	struct request_queue *q = (struct request_queue *)data;
 
-	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
+	trace_mark(blk_pdu_unplug_timer, "%p %p %d", q, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
 
 	kblockd_schedule_work(&q->unplug_work);
@@ -2178,7 +2179,7 @@ rq_starved:
 	
 	rq_init(q, rq);
 
-	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
+	trace_mark(blk_get_request, "%p %p %d", q, bio, rw);
 out:
 	return rq;
 }
@@ -2208,7 +2209,7 @@ static struct request *get_request_wait(
 		if (!rq) {
 			struct io_context *ioc;
 
-			blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+			trace_mark(blk_sleep_request, "%p %p %d", q, bio, rw);
 
 			__generic_unplug_device(q);
 			spin_unlock_irq(q->queue_lock);
@@ -2282,7 +2283,7 @@ EXPORT_SYMBOL(blk_start_queueing);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
-	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+	trace_mark(blk_requeue, "%p %p", q, rq);
 
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
@@ -3005,7 +3006,7 @@ static int __make_request(struct request
 			if (!ll_back_merge_fn(q, req, bio))
 				break;
 
-			blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+			trace_mark(blk_bio_backmerge, "%p %p", q, bio);
 
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
@@ -3022,7 +3023,7 @@ static int __make_request(struct request
 			if (!ll_front_merge_fn(q, req, bio))
 				break;
 
-			blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+			trace_mark(blk_bio_frontmerge, "%p %p", q, bio);
 
 			bio->bi_next = req->bio;
 			req->bio = bio;
@@ -3105,9 +3106,10 @@ static inline void blk_partition_remap(s
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 
-		blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
-				    bdev->bd_dev, bio->bi_sector,
-				    bio->bi_sector - p->start_sect);
+		trace_mark(blk_remap, "%p %p %llu %llu %llu",
+				    bdev_get_queue(bio->bi_bdev), bio,
+				    (u64)bdev->bd_dev, (u64)bio->bi_sector,
+				    (u64)bio->bi_sector - p->start_sect);
 	}
 }
 
@@ -3272,10 +3274,11 @@ end_io:
 		blk_partition_remap(bio);
 
 		if (old_sector != -1)
-			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
-					    old_sector);
+			trace_mark(blk_remap, "%p %p %llu %llu %llu",
+				q, bio, (u64)old_dev,
+				(u64)bio->bi_sector, (u64)old_sector);
 
-		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+		trace_mark(blk_bio_queue, "%p %p", q, bio);
 
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
@@ -3464,7 +3467,7 @@ static int __end_that_request_first(stru
 	int total_bytes, bio_nbytes, error, next_idx = 0;
 	struct bio *bio;
 
-	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
+	trace_mark(blk_request_complete, "%p %p", req->q, req);
 
 	/*
 	 * extend uptodate bool to allow < 0 value to be direct io error
Index: linux-2.6-lttng/block/Kconfig
===================================================================
--- linux-2.6-lttng.orig/block/Kconfig	2007-09-18 10:04:27.000000000 -0400
+++ linux-2.6-lttng/block/Kconfig	2007-09-18 13:18:26.000000000 -0400
@@ -32,6 +32,7 @@ config BLK_DEV_IO_TRACE
 	depends on SYSFS
 	select RELAY
 	select DEBUG_FS
+	select MARKERS
 	help
 	  Say Y here, if you want to be able to trace the block layer actions
 	  on a given queue. Tracing allows you to see any traffic happening
Index: linux-2.6-lttng/block/blktrace.c
===================================================================
--- linux-2.6-lttng.orig/block/blktrace.c	2007-09-18 10:08:31.000000000 -0400
+++ linux-2.6-lttng/block/blktrace.c	2007-09-18 13:18:26.000000000 -0400
@@ -23,11 +23,19 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/time.h>
+#include <linux/marker.h>
 #include <asm/uaccess.h>
 
 static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
 static unsigned int blktrace_seq __read_mostly = 1;
 
+/* Global reference count of probes */
+static DEFINE_MUTEX(blk_probe_mutex);
+static int blk_probes_ref;
+
+int blk_probe_arm(void);
+void blk_probe_disarm(void);
+
 /*
  * Send out a notify message.
  */
@@ -179,7 +187,7 @@ void __blk_add_trace(struct blk_trace *b
 EXPORT_SYMBOL_GPL(__blk_add_trace);
 
 static struct dentry *blk_tree_root;
-static struct mutex blk_tree_mutex;
+static DEFINE_MUTEX(blk_tree_mutex);
 static unsigned int root_users;
 
 static inline void blk_remove_root(void)
@@ -229,6 +237,10 @@ static void blk_trace_cleanup(struct blk
 	blk_remove_tree(bt->dir);
 	free_percpu(bt->sequence);
 	kfree(bt);
+	mutex_lock(&blk_probe_mutex);
+	if (--blk_probes_ref == 0)
+		blk_probe_disarm();
+	mutex_unlock(&blk_probe_mutex);
 }
 
 static int blk_trace_remove(struct request_queue *q)
@@ -386,6 +398,11 @@ static int blk_trace_setup(struct reques
 		goto err;
 	}
 
+	mutex_lock(&blk_probe_mutex);
+	if (!blk_probes_ref++)
+		blk_probe_arm();
+	mutex_unlock(&blk_probe_mutex);
+
 	return 0;
 err:
 	if (dir)
@@ -549,9 +566,331 @@ static void blk_trace_set_ht_offsets(voi
 #endif
 }
 
+/**
+ * blk_add_trace_rq - Add a trace for a request oriented action
+ * Expected variable arguments :
+ * @q:		queue the io is for
+ * @rq:		the source request
+ *
+ * Description:
+ *     Records an action against a request. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_rq(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...)
+{
+	va_list args;
+	u32 what;
+	struct blk_trace *bt;
+	int rw;
+	struct blk_probe_data *pinfo = mdata->pdata;
+	struct request_queue *q;
+	struct request *rq;
+
+	va_start(args, fmt);
+	q = va_arg(args, struct request_queue *);
+	rq = va_arg(args, struct request *);
+	va_end(args);
+
+	what = pinfo->flags;
+	bt = q->blk_trace;
+	rw = rq->cmd_flags & 0x03;
+
+	if (likely(!bt))
+		return;
+
+	if (blk_pc_request(rq)) {
+		what |= BLK_TC_ACT(BLK_TC_PC);
+		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
+	} else  {
+		what |= BLK_TC_ACT(BLK_TC_FS);
+		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
+	}
+}
+
+/**
+ * blk_add_trace_bio - Add a trace for a bio oriented action
+ * Expected variable arguments :
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ *
+ * Description:
+ *     Records an action against a bio. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_bio(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...)
+{
+	va_list args;
+	u32 what;
+	struct blk_trace *bt;
+	struct blk_probe_data *pinfo = mdata->pdata;
+	struct request_queue *q;
+	struct bio *bio;
+
+	va_start(args, fmt);
+	q = va_arg(args, struct request_queue *);
+	bio = va_arg(args, struct bio *);
+	va_end(args);
+
+	what = pinfo->flags;
+	bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+}
+
+/**
+ * blk_add_trace_generic - Add a trace for a generic action
+ * Expected variable arguments :
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @rw:		the data direction
+ *
+ * Description:
+ *     Records a simple trace
+ *
+ **/
+static void blk_add_trace_generic(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...)
+{
+	va_list args;
+	struct blk_trace *bt;
+	u32 what;
+	struct blk_probe_data *pinfo = mdata->pdata;
+	struct request_queue *q;
+	struct bio *bio;
+	int rw;
+
+	va_start(args, fmt);
+	q = va_arg(args, struct request_queue *);
+	bio = va_arg(args, struct bio *);
+	rw = va_arg(args, int);
+	va_end(args);
+
+	what = pinfo->flags;
+	bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	if (bio)
+		blk_add_trace_bio(mdata, "%p %p", NULL, q, bio);
+	else
+		__blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
+}
+
+/**
+ * blk_add_trace_pdu_ll - Add a trace for a bio with any integer payload
+ * Expected variable arguments :
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @pdu:	the long long integer payload
+ *
+ **/
+static inline void blk_trace_integer(struct request_queue *q, struct bio *bio, unsigned long long pdu,
+					u32 what)
+{
+	struct blk_trace *bt;
+	__be64 rpdu;
+
+	bt = q->blk_trace;
+	rpdu = cpu_to_be64(pdu);
+
+	if (likely(!bt))
+		return;
+
+	if (bio)
+		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
+					!bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
+	else
+		__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
+}
+
+/**
+ * blk_add_trace_pdu_ll - Add a trace for a bio with an long long integer
+ * payload
+ * Expected variable arguments :
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @pdu:	the long long integer payload
+ *
+ * Description:
+ *     Adds a trace with some long long integer payload. This might be an unplug
+ *     option given as the action, with the depth at unplug time given as the
+ *     payload
+ *
+ **/
+static void blk_add_trace_pdu_ll(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...)
+{
+	va_list args;
+	struct blk_probe_data *pinfo = mdata->pdata;
+	struct request_queue *q;
+	struct bio *bio;
+	unsigned long long pdu;
+	u32 what;
+
+	what = pinfo->flags;
+
+	va_start(args, fmt);
+	q = va_arg(args, struct request_queue *);
+	bio = va_arg(args, struct bio *);
+	pdu = va_arg(args, unsigned long long);
+	va_end(args);
+
+	blk_trace_integer(q, bio, pdu, what);
+}
+
+
+/**
+ * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
+ * Expected variable arguments :
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @pdu:	the integer payload
+ *
+ * Description:
+ *     Adds a trace with some integer payload. This might be an unplug
+ *     option given as the action, with the depth at unplug time given
+ *     as the payload
+ *
+ **/
+static void blk_add_trace_pdu_int(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...)
+{
+	va_list args;
+	struct blk_probe_data *pinfo = mdata->pdata;
+	struct request_queue *q;
+	struct bio *bio;
+	unsigned int pdu;
+	u32 what;
+
+	what = pinfo->flags;
+
+	va_start(args, fmt);
+	q = va_arg(args, struct request_queue *);
+	bio = va_arg(args, struct bio *);
+	pdu = va_arg(args, unsigned int);
+	va_end(args);
+
+	blk_trace_integer(q, bio, pdu, what);
+}
+
+/**
+ * blk_add_trace_remap - Add a trace for a remap operation
+ * Expected variable arguments :
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @dev:	target device
+ * @from:	source sector
+ * @to:		target sector
+ *
+ * Description:
+ *     Device mapper or raid target sometimes need to split a bio because
+ *     it spans a stripe (or similar). Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_remap(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...)
+{
+	va_list args;
+	struct blk_trace *bt;
+	struct blk_io_trace_remap r;
+	u32 what;
+	struct blk_probe_data *pinfo = mdata->pdata;
+	struct request_queue *q;
+	struct bio *bio;
+	u64 dev, from, to;
+
+	va_start(args, fmt);
+	q = va_arg(args, struct request_queue *);
+	bio = va_arg(args, struct bio *);
+	dev = va_arg(args, u64);
+	from = va_arg(args, u64);
+	to = va_arg(args, u64);
+	va_end(args);
+
+	what = pinfo->flags;
+	bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	r.device = cpu_to_be32(dev);
+	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
+	r.sector = cpu_to_be64(to);
+
+	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+}
+
+#define FACILITY_NAME "blk"
+
+static struct blk_probe_data probe_array[] =
+{
+	{ "blk_bio_queue", "%p %p", BLK_TA_QUEUE, blk_add_trace_bio },
+	{ "blk_bio_backmerge", "%p %p", BLK_TA_BACKMERGE, blk_add_trace_bio },
+	{ "blk_bio_frontmerge", "%p %p", BLK_TA_FRONTMERGE, blk_add_trace_bio },
+	{ "blk_get_request", "%p %p %d", BLK_TA_GETRQ, blk_add_trace_generic },
+	{ "blk_sleep_request", "%p %p %d", BLK_TA_SLEEPRQ,
+		blk_add_trace_generic },
+	{ "blk_requeue", "%p %p", BLK_TA_REQUEUE, blk_add_trace_rq },
+	{ "blk_request_issue", "%p %p", BLK_TA_ISSUE, blk_add_trace_rq },
+	{ "blk_request_complete", "%p %p", BLK_TA_COMPLETE, blk_add_trace_rq },
+	{ "blk_plug_device", "%p %p %d", BLK_TA_PLUG, blk_add_trace_generic },
+	{ "blk_pdu_unplug_io", "%p %p %d", BLK_TA_UNPLUG_IO,
+		blk_add_trace_pdu_int },
+	{ "blk_pdu_unplug_timer", "%p %p %d", BLK_TA_UNPLUG_TIMER,
+		blk_add_trace_pdu_int },
+	{ "blk_request_insert", "%p %p", BLK_TA_INSERT,
+		blk_add_trace_rq },
+	{ "blk_pdu_split", "%p %p %llu", BLK_TA_SPLIT,
+		blk_add_trace_pdu_ll },
+	{ "blk_bio_bounce", "%p %p", BLK_TA_BOUNCE, blk_add_trace_bio },
+	{ "blk_remap", "%p %p %llu %llu %llu", BLK_TA_REMAP,
+		blk_add_trace_remap },
+};
+
+
+int blk_probe_arm(void)
+{
+	int result;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+		result = marker_probe_register(probe_array[i].name,
+				probe_array[i].format,
+				probe_array[i].callback, &probe_array[i]);
+		if (result)
+			printk(KERN_INFO
+				"blktrace unable to register probe %s\n",
+				probe_array[i].name);
+		result = marker_arm(probe_array[i].name);
+		if (result)
+			printk(KERN_INFO
+				"blktrace unable to arm probe %s\n",
+				probe_array[i].name);
+	}
+	return 0;
+}
+
+void blk_probe_disarm(void)
+{
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+		err = marker_disarm(probe_array[i].name);
+		BUG_ON(err);
+		err = IS_ERR(marker_probe_unregister(probe_array[i].name));
+		BUG_ON(err);
+	}
+}
+
+
 static __init int blk_trace_init(void)
 {
-	mutex_init(&blk_tree_mutex);
 	on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1);
 	blk_trace_set_ht_offsets();
 
Index: linux-2.6-lttng/include/linux/blktrace_api.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/blktrace_api.h	2007-09-18 10:05:15.000000000 -0400
+++ linux-2.6-lttng/include/linux/blktrace_api.h	2007-09-18 13:18:26.000000000 -0400
@@ -3,6 +3,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/relay.h>
+#include <linux/marker.h>
 
 /*
  * Trace categories
@@ -142,150 +143,22 @@ struct blk_user_trace_setup {
 	u32 pid;
 };
 
+/* Probe data used for probe-marker connection */
+struct blk_probe_data {
+	const char *name;
+	const char *format;
+	u32 flags;
+	marker_probe_func *callback;
+};
+
 #if defined(CONFIG_BLK_DEV_IO_TRACE)
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
 extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
 
-/**
- * blk_add_trace_rq - Add a trace for a request oriented action
- * @q:		queue the io is for
- * @rq:		the source request
- * @what:	the action
- *
- * Description:
- *     Records an action against a request. Will log the bio offset + size.
- *
- **/
-static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
-				    u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-	int rw = rq->cmd_flags & 0x03;
-
-	if (likely(!bt))
-		return;
-
-	if (blk_pc_request(rq)) {
-		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
-	} else  {
-		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
-	}
-}
-
-/**
- * blk_add_trace_bio - Add a trace for a bio oriented action
- * @q:		queue the io is for
- * @bio:	the source bio
- * @what:	the action
- *
- * Description:
- *     Records an action against a bio. Will log the bio offset + size.
- *
- **/
-static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
-				     u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (likely(!bt))
-		return;
-
-	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
-}
-
-/**
- * blk_add_trace_generic - Add a trace for a generic action
- * @q:		queue the io is for
- * @bio:	the source bio
- * @rw:		the data direction
- * @what:	the action
- *
- * Description:
- *     Records a simple trace
- *
- **/
-static inline void blk_add_trace_generic(struct request_queue *q,
-					 struct bio *bio, int rw, u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (likely(!bt))
-		return;
-
-	if (bio)
-		blk_add_trace_bio(q, bio, what);
-	else
-		__blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
-}
-
-/**
- * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
- * @q:		queue the io is for
- * @what:	the action
- * @bio:	the source bio
- * @pdu:	the integer payload
- *
- * Description:
- *     Adds a trace with some integer payload. This might be an unplug
- *     option given as the action, with the depth at unplug time given
- *     as the payload
- *
- **/
-static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
-					 struct bio *bio, unsigned int pdu)
-{
-	struct blk_trace *bt = q->blk_trace;
-	__be64 rpdu = cpu_to_be64(pdu);
-
-	if (likely(!bt))
-		return;
-
-	if (bio)
-		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
-	else
-		__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
-}
-
-/**
- * blk_add_trace_remap - Add a trace for a remap operation
- * @q:		queue the io is for
- * @bio:	the source bio
- * @dev:	target device
- * @from:	source sector
- * @to:		target sector
- *
- * Description:
- *     Device mapper or raid target sometimes need to split a bio because
- *     it spans a stripe (or similar). Add a trace for that action.
- *
- **/
-static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t from, sector_t to)
-{
-	struct blk_trace *bt = q->blk_trace;
-	struct blk_io_trace_remap r;
-
-	if (likely(!bt))
-		return;
-
-	r.device = cpu_to_be32(dev);
-	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
-	r.sector = cpu_to_be64(to);
-
-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
-}
-
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
 #define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
 #define blk_trace_shutdown(q)			do { } while (0)
-#define blk_add_trace_rq(q, rq, what)		do { } while (0)
-#define blk_add_trace_bio(q, rq, what)		do { } while (0)
-#define blk_add_trace_generic(q, rq, rw, what)	do { } while (0)
-#define blk_add_trace_pdu_int(q, what, bio, pdu)	do { } while (0)
-#define blk_add_trace_remap(q, bio, dev, f, t)	do {} while (0)
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #endif
Index: linux-2.6-lttng/mm/bounce.c
===================================================================
--- linux-2.6-lttng.orig/mm/bounce.c	2007-09-18 10:08:12.000000000 -0400
+++ linux-2.6-lttng/mm/bounce.c	2007-09-18 13:18:26.000000000 -0400
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
-#include <linux/blktrace_api.h>
+#include <linux/marker.h>
 #include <asm/tlbflush.h>
 
 #define POOL_SIZE	64
@@ -237,7 +237,7 @@ static void __blk_queue_bounce(struct re
 	if (!bio)
 		return;
 
-	blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+	trace_mark(blk_bio_bounce, "%p %p", q, *bio_orig);
 
 	/*
 	 * at least one page was bounced, fill in possible non-highmem
Index: linux-2.6-lttng/mm/highmem.c
===================================================================
--- linux-2.6-lttng.orig/mm/highmem.c	2007-09-18 10:05:23.000000000 -0400
+++ linux-2.6-lttng/mm/highmem.c	2007-09-18 13:18:26.000000000 -0400
@@ -26,7 +26,7 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
-#include <linux/blktrace_api.h>
+#include <linux/marker.h>
 #include <asm/tlbflush.h>
 
 /*
Index: linux-2.6-lttng/fs/bio.c
===================================================================
--- linux-2.6-lttng.orig/fs/bio.c	2007-09-18 10:08:12.000000000 -0400
+++ linux-2.6-lttng/fs/bio.c	2007-09-18 13:18:26.000000000 -0400
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
-#include <linux/blktrace_api.h>
+#include <linux/marker.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
 #define BIO_POOL_SIZE 2
@@ -1072,8 +1072,8 @@ struct bio_pair *bio_split(struct bio *b
 	if (!bp)
 		return bp;
 
-	blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
-				bi->bi_sector + first_sectors);
+	trace_mark(blk_pdu_split, "%p %p %llu", bdev_get_queue(bi->bi_bdev), bi,
+				(u64)bi->bi_sector + first_sectors);
 
 	BUG_ON(bi->bi_vcnt != 1);
 	BUG_ON(bi->bi_idx != 0);
Index: linux-2.6-lttng/drivers/block/cciss.c
===================================================================
--- linux-2.6-lttng.orig/drivers/block/cciss.c	2007-09-18 10:09:12.000000000 -0400
+++ linux-2.6-lttng/drivers/block/cciss.c	2007-09-18 13:18:26.000000000 -0400
@@ -37,7 +37,7 @@
 #include <linux/hdreg.h>
 #include <linux/spinlock.h>
 #include <linux/compat.h>
-#include <linux/blktrace_api.h>
+#include <linux/marker.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
@@ -2545,7 +2545,7 @@ after_error_processing:
 	}
 	cmd->rq->data_len = 0;
 	cmd->rq->completion_data = cmd;
-	blk_add_trace_rq(cmd->rq->q, cmd->rq, BLK_TA_COMPLETE);
+	trace_mark(blk_request_complete, "%p %p", cmd->rq->q, cmd->rq);
 	blk_complete_request(cmd->rq);
 }
 
Index: linux-2.6-lttng/drivers/md/dm.c
===================================================================
--- linux-2.6-lttng.orig/drivers/md/dm.c	2007-09-18 10:08:11.000000000 -0400
+++ linux-2.6-lttng/drivers/md/dm.c	2007-09-18 13:18:26.000000000 -0400
@@ -19,7 +19,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/hdreg.h>
-#include <linux/blktrace_api.h>
+#include <linux/marker.h>
 #include <linux/smp_lock.h>
 
 #define DM_MSG_PREFIX "core"
@@ -481,8 +481,8 @@ static void dec_pending(struct dm_io *io
 			wake_up(&io->md->wait);
 
 		if (io->error != DM_ENDIO_REQUEUE) {
-			blk_add_trace_bio(io->md->queue, io->bio,
-					  BLK_TA_COMPLETE);
+			trace_mark(blk_request_complete, "%p %p",
+				io->md->queue, io->bio);
 
 			bio_endio(io->bio, io->bio->bi_size, io->error);
 		}
@@ -578,10 +578,10 @@ static void __map_bio(struct dm_target *
 	r = ti->type->map(ti, clone, &tio->info);
 	if (r == DM_MAPIO_REMAPPED) {
 		/* the bio has been remapped so dispatch it */
-
-		blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
-				    tio->io->bio->bi_bdev->bd_dev,
-				    clone->bi_sector, sector);
+		trace_mark(blk_remap, "%p %p %llu %llu %llu",
+			bdev_get_queue(clone->bi_bdev), clone,
+			(u64)tio->io->bio->bi_bdev->bd_dev, (u64)sector,
+			(u64)clone->bi_sector);
 
 		generic_make_request(clone);
 	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 3/4] Linux Kernel Markers - Documentation
  2007-09-18 21:13 ` [patch 3/4] Linux Kernel Markers - Documentation Mathieu Desnoyers
@ 2007-09-18 23:22   ` Randy Dunlap
  2007-09-19 11:18     ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Randy Dunlap @ 2007-09-18 23:22 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel, Frank Ch. Eigler, Christoph Hellwig

On Tue, 18 Sep 2007 17:13:27 -0400 Mathieu Desnoyers wrote:

> Here is some documentation explaining what is/how to use the Linux
> Kernel Markers.
> 
> ---
> 
>  Documentation/markers/markers.txt          |   93 +++++++++++++++++++++++++++
>  Documentation/markers/src/Makefile         |    7 ++
>  Documentation/markers/src/marker-example.c |   55 ++++++++++++++++
>  Documentation/markers/src/probe-example.c  |   98 +++++++++++++++++++++++++++++
>  4 files changed, 253 insertions(+)
> 
> Index: linux-2.6-lttng/Documentation/markers/markers.txt
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/Documentation/markers/markers.txt	2007-09-07 09:17:45.000000000 -0400
> @@ -0,0 +1,93 @@

> +The marker mechanism supports inserting multiple instances of the same marker.
> +Markers can be put in inline functions, inlined static functions, and
> +unrolled loops.

as well as regular functions ?

> +* Probe / marker example
> +
> +See the example provided in Documentation/markers/markers/src

                                       drop one of ^^^ "markers/"

> +Run, as root :
> +
> +make
> +insmod marker-example.ko (insmod order is not important)
> +insmod probe-example.ko
> +cat /proc/marker-example (returns an expected error)
> +rmmod marker-example probe-example
> +dmesg


---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 3/4] Linux Kernel Markers - Documentation
  2007-09-18 23:22   ` Randy Dunlap
@ 2007-09-19 11:18     ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-19 11:18 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: akpm, linux-kernel, Frank Ch. Eigler, Christoph Hellwig

* Randy Dunlap (randy.dunlap@oracle.com) wrote:
> On Tue, 18 Sep 2007 17:13:27 -0400 Mathieu Desnoyers wrote:
> 
> > Here is some documentation explaining what is/how to use the Linux
> > Kernel Markers.
> > 
> > ---
> > 
> >  Documentation/markers/markers.txt          |   93 +++++++++++++++++++++++++++
> >  Documentation/markers/src/Makefile         |    7 ++
> >  Documentation/markers/src/marker-example.c |   55 ++++++++++++++++
> >  Documentation/markers/src/probe-example.c  |   98 +++++++++++++++++++++++++++++
> >  4 files changed, 253 insertions(+)
> > 
> > Index: linux-2.6-lttng/Documentation/markers/markers.txt
> > ===================================================================
> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ linux-2.6-lttng/Documentation/markers/markers.txt	2007-09-07 09:17:45.000000000 -0400
> > @@ -0,0 +1,93 @@
> 
> > +The marker mechanism supports inserting multiple instances of the same marker.
> > +Markers can be put in inline functions, inlined static functions, and
> > +unrolled loops.
> 
> as well as regular functions ?
> 

Oh, yes, forgot the trivial case ;)

> > +* Probe / marker example
> > +
> > +See the example provided in Documentation/markers/markers/src
> 
>                                        drop one of ^^^ "markers/"
> 

Ok.

Will fix, thanks!

Mathieu

> > +Run, as root :
> > +
> > +make
> > +insmod marker-example.ko (insmod order is not important)
> > +insmod probe-example.ko
> > +cat /proc/marker-example (returns an expected error)
> > +rmmod marker-example probe-example
> > +dmesg
> 
> 
> ---
> ~Randy
> *** Remember to use Documentation/SubmitChecklist when testing your code ***

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-18 21:13 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
@ 2007-09-19 11:37   ` Mathieu Desnoyers
  2007-09-19 13:53     ` Frank Ch. Eigler
  2007-09-19 17:32     ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Denys Vlasenko
  2007-09-21  0:58   ` Steven Rostedt
  1 sibling, 2 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-19 11:37 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Frank Ch. Eigler, Christoph Hellwig, Rusty Russell, akpm, linux-kernel

* Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) wrote:
> The marker activation functions sits in kernel/marker.c. A hash table is used
> to keep track of the registered probes and armed markers, so the markers within
> a newly loaded module that should be active can be activated at module load
> time.
> 
> marker_query has been removed. marker_get_first, marker_get_next and
> marker_release should be used as iterators on the markers.
> 
> Changelog:
> - markers_mutex now nests inside module_mutex rather than the opposite.
> - Iteration on modules is now done in module.c.
> - module_mutex is not exported anymore.
> 
 [...]
> Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:18.000000000 -0400
> +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:31.000000000 -0400
> @@ -12,7 +12,11 @@
>  /* .data section */
>  #define DATA_DATA							\
>  	*(.data)							\
> -	*(.data.init.refok)
> +	*(.data.init.refok)						\
> +	. = ALIGN(8);							\
> +	VMLINUX_SYMBOL(__start___markers) = .;				\
> +	*(__markers)							\
> +	VMLINUX_SYMBOL(__stop___markers) = .;
>  
>  #define RO_DATA(align)							\
>  	. = ALIGN((align));						\
> @@ -129,6 +133,11 @@
>  		VMLINUX_SYMBOL(__stop___immediate) = .;			\
>  	}								\
>  									\
> +	/* Markers: strings */						\
> +        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
> +		*(__markers_strings)					\
> + 	}								\
> +									\
>  	/* Kernel symbol table: strings */				\
>          __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
>  		*(__ksymtab_strings)					\
[...]

Do you think I should also remove the __markers_strings section from here ?

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-19 11:37   ` Mathieu Desnoyers
@ 2007-09-19 13:53     ` Frank Ch. Eigler
  2007-09-19 20:32       ` Denys Vlasenko
  2007-09-19 17:32     ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Denys Vlasenko
  1 sibling, 1 reply; 43+ messages in thread
From: Frank Ch. Eigler @ 2007-09-19 13:53 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Denys Vlasenko, dsmith, Christoph Hellwig, Rusty Russell, akpm,
	linux-kernel

Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:

> [...]  Do you think I should also remove the __markers_strings
> section from here ?

Current systemtap marker support code relies on the __markers_strings
section.

- FChE

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-19 11:37   ` Mathieu Desnoyers
  2007-09-19 13:53     ` Frank Ch. Eigler
@ 2007-09-19 17:32     ` Denys Vlasenko
  2007-09-19 18:46       ` Mathieu Desnoyers
  1 sibling, 1 reply; 43+ messages in thread
From: Denys Vlasenko @ 2007-09-19 17:32 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Frank Ch. Eigler, Christoph Hellwig, Rusty Russell, akpm, linux-kernel

On Wednesday 19 September 2007 12:37, Mathieu Desnoyers wrote:
> > Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
> > ===================================================================
> > --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:18.000000000 -0400
> > +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:31.000000000 -0400
> > @@ -129,6 +133,11 @@
> >  		VMLINUX_SYMBOL(__stop___immediate) = .;			\
> >  	}								\
> >  									\
> > +	/* Markers: strings */						\
> > +        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
> > +		*(__markers_strings)					\
> > + 	}								\
> > +									\
> >  	/* Kernel symbol table: strings */				\
> >          __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
> >  		*(__ksymtab_strings)					\
> [...]
> 
> Do you think I should also remove the __markers_strings section from here ?

Yes.

It will be beneficial if one can read include/asm-generic/vmlinux.lds.h and
arch/$ARCH/kernel/vmlinux.lds.S and understand which sections in resulting
vmlinux serve what purpose. A comment atop each section explaining
its role will be nice. Even more so that not many people are fluent
in ld script language.

Currently, one will need to grep around (and not only in kernel tree -
you need to read depmod.c source too) in order to understand the role
of various sections in vmlinux.

There are dearth of comments in ld scripts, and some sections
are created "just because I felt like it". For example, there are
".data.page_aligned" and ".data.percpu" sections - can you
easily tell which one has to be a section, and which does not need
to be one (can be merged with ".data")? Maybe both must be sections?
Or none of them?
--
vda

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-19 17:32     ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Denys Vlasenko
@ 2007-09-19 18:46       ` Mathieu Desnoyers
  2007-09-19 18:50         ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-19 18:46 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Frank Ch. Eigler, Christoph Hellwig, Rusty Russell, akpm, linux-kernel

* Denys Vlasenko (vda.linux@googlemail.com) wrote:
> On Wednesday 19 September 2007 12:37, Mathieu Desnoyers wrote:
> > > Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
> > > ===================================================================
> > > --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:18.000000000 -0400
> > > +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:31.000000000 -0400
> > > @@ -129,6 +133,11 @@
> > >  		VMLINUX_SYMBOL(__stop___immediate) = .;			\
> > >  	}								\
> > >  									\
> > > +	/* Markers: strings */						\
> > > +        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
> > > +		*(__markers_strings)					\
> > > + 	}								\
> > > +									\
> > >  	/* Kernel symbol table: strings */				\
> > >          __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
> > >  		*(__ksymtab_strings)					\
> > [...]
> > 
> > Do you think I should also remove the __markers_strings section from here ?
> 
> Yes.
> 
> It will be beneficial if one can read include/asm-generic/vmlinux.lds.h and
> arch/$ARCH/kernel/vmlinux.lds.S and understand which sections in resulting
> vmlinux serve what purpose. A comment atop each section explaining
> its role will be nice. Even more so that not many people are fluent
> in ld script language.
> 
> Currently, one will need to grep around (and not only in kernel tree -
> you need to read depmod.c source too) in order to understand the role
> of various sections in vmlinux.
> 
> There are dearth of comments in ld scripts, and some sections
> are created "just because I felt like it". For example, there are
> ".data.page_aligned" and ".data.percpu" sections - can you
> easily tell which one has to be a section, and which does not need
> to be one (can be merged with ".data")? Maybe both must be sections?
> Or none of them?
> --
> vda

Oh, wait.. I need it in module.c:

immediateindex = find_sec(hdr, sechdrs, secstrings, "__immediate");

I'll leave the section there then.

Mathieu
-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-19 18:46       ` Mathieu Desnoyers
@ 2007-09-19 18:50         ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-19 18:50 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Frank Ch. Eigler, Christoph Hellwig, Rusty Russell, akpm, linux-kernel

* Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) wrote:
> * Denys Vlasenko (vda.linux@googlemail.com) wrote:
> > On Wednesday 19 September 2007 12:37, Mathieu Desnoyers wrote:
> > > > Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
> > > > ===================================================================
> > > > --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:18.000000000 -0400
> > > > +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:31.000000000 -0400
> > > > @@ -129,6 +133,11 @@
> > > >  		VMLINUX_SYMBOL(__stop___immediate) = .;			\
> > > >  	}								\
> > > >  									\
> > > > +	/* Markers: strings */						\
> > > > +        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
> > > > +		*(__markers_strings)					\
> > > > + 	}								\
> > > > +									\
> > > >  	/* Kernel symbol table: strings */				\
> > > >          __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
> > > >  		*(__ksymtab_strings)					\
> > > [...]
> > > 
> > > Do you think I should also remove the __markers_strings section from here ?
> > 
> > Yes.
> > 
> > It will be beneficial if one can read include/asm-generic/vmlinux.lds.h and
> > arch/$ARCH/kernel/vmlinux.lds.S and understand which sections in resulting
> > vmlinux serve what purpose. A comment atop each section explaining
> > its role will be nice. Even more so that not many people are fluent
> > in ld script language.
> > 
> > Currently, one will need to grep around (and not only in kernel tree -
> > you need to read depmod.c source too) in order to understand the role
> > of various sections in vmlinux.
> > 
> > There are dearth of comments in ld scripts, and some sections
> > are created "just because I felt like it". For example, there are
> > ".data.page_aligned" and ".data.percpu" sections - can you
> > easily tell which one has to be a section, and which does not need
> > to be one (can be merged with ".data")? Maybe both must be sections?
> > Or none of them?
> > --
> > vda
> 
> Oh, wait.. I need it in module.c:
> 
> immediateindex = find_sec(hdr, sechdrs, secstrings, "__immediate");
> 
> I'll leave the section there then.
> 
> Mathieu

Sorry, let me take this back. It applies to what is linked in the core
image, but I believe it does not apply to the kernel modules.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-19 13:53     ` Frank Ch. Eigler
@ 2007-09-19 20:32       ` Denys Vlasenko
  2007-09-21 12:58         ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Denys Vlasenko @ 2007-09-19 20:32 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: Mathieu Desnoyers, dsmith, Christoph Hellwig, Rusty Russell,
	akpm, linux-kernel

On Wednesday 19 September 2007 14:53, Frank Ch. Eigler wrote:
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> 
> > [...]  Do you think I should also remove the __markers_strings
> > section from here ?
> 
> Current systemtap marker support code relies on the __markers_strings
> section.

Let users know that in comment above section definition in ld script.

Thanks!
--
vda

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-18 21:13 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
  2007-09-19 11:37   ` Mathieu Desnoyers
@ 2007-09-21  0:58   ` Steven Rostedt
  2007-09-21 13:45     ` Mathieu Desnoyers
  1 sibling, 1 reply; 43+ messages in thread
From: Steven Rostedt @ 2007-09-21  0:58 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: akpm, linux-kernel, Frank Ch. Eigler, Christoph Hellwig, Rusty Russell

On Tue, Sep 18, 2007 at 05:13:25PM -0400, Mathieu Desnoyers wrote:
> +/*
> + * Sets the probe callback corresponding to one marker.
> + */
> +static int set_marker(struct marker_entry **entry,
> +			struct __mark_marker *elem)
> +{
> +	int ret;
> +	BUG_ON(strcmp((*entry)->name, elem->name) != 0);

Can you switch this at least to WARN_ON?  Killing a system with X
running where the user just sees a freeze is not that nice. But a nasty
message in dmesg is very noticable.

-- Steve

> +
> +	if ((*entry)->format) {
> +		if (strcmp((*entry)->format, elem->format) != 0) {
> +			printk(KERN_NOTICE
> +				"Format mismatch for probe %s "
> +				"(%s), marker (%s)\n",
> +				(*entry)->name,
> +				(*entry)->format,
> +				elem->format);
> +			return -EPERM;
> +		}
> +	} else {
> +		ret = marker_set_format(entry, elem->format);
> +		if (ret)
> +			return ret;
> +	}
> +	elem->call = (*entry)->probe;
> +	elem->pdata = (*entry)->pdata;
> +	_immediate_set(elem->state, 1);
> +	return 0;
> +}

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 4/4] Port of blktrace to the Linux Kernel Markers.
  2007-09-18 21:13 ` [patch 4/4] Port of blktrace to the Linux Kernel Markers Mathieu Desnoyers
@ 2007-09-21  1:03   ` Steven Rostedt
  2007-09-21 13:46     ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Steven Rostedt @ 2007-09-21  1:03 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel, Frank Ch. Eigler, Jens Axboe

On Tue, Sep 18, 2007 at 05:13:28PM -0400, Mathieu Desnoyers wrote:
> +void blk_probe_disarm(void)
> +{
> +	int i, err;
> +
> +	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
> +		err = marker_disarm(probe_array[i].name);
> +		BUG_ON(err);
> +		err = IS_ERR(marker_probe_unregister(probe_array[i].name));
> +		BUG_ON(err);
> +	}
> +}

As well as changing these to WARN_ON.

-- Steve


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-19 20:32       ` Denys Vlasenko
@ 2007-09-21 12:58         ` Mathieu Desnoyers
  2007-09-21 13:07           ` Christoph Hellwig
  2007-09-21 13:30           ` Frank Ch. Eigler
  0 siblings, 2 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-21 12:58 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Frank Ch. Eigler, dsmith, Christoph Hellwig, Rusty Russell, akpm,
	linux-kernel

* Denys Vlasenko (vda.linux@googlemail.com) wrote:
> On Wednesday 19 September 2007 14:53, Frank Ch. Eigler wrote:
> > Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> > 
> > > [...]  Do you think I should also remove the __markers_strings
> > > section from here ?
> > 
> > Current systemtap marker support code relies on the __markers_strings
> > section.
> 
> Let users know that in comment above section definition in ld script.
> 

Ok, I am changing it to:


        /* Markers: strings (used by SystemTAP) */                      \
        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) { \
                *(__markers_strings)                                    \
        }                                                               \

Mathieu

> Thanks!
> --
> vda

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-21 12:58         ` Mathieu Desnoyers
@ 2007-09-21 13:07           ` Christoph Hellwig
  2007-09-21 13:30           ` Frank Ch. Eigler
  1 sibling, 0 replies; 43+ messages in thread
From: Christoph Hellwig @ 2007-09-21 13:07 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Denys Vlasenko, Frank Ch. Eigler, dsmith, Christoph Hellwig,
	Rusty Russell, akpm, linux-kernel

On Fri, Sep 21, 2007 at 08:58:19AM -0400, Mathieu Desnoyers wrote:
> Ok, I am changing it to:

As I mentioned before pleae just kill this gunk entirely as it's not needed
at all intree.  markers are already getting far too complex, I'd rather
want a simple useable version in now than trying to cater for every possible
use-case.  Once the systemtap people are ready for merging their stuff into
the kernel tree we can cater towards their needs.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-21 12:58         ` Mathieu Desnoyers
  2007-09-21 13:07           ` Christoph Hellwig
@ 2007-09-21 13:30           ` Frank Ch. Eigler
  2007-09-21 13:38             ` Mathieu Desnoyers
  1 sibling, 1 reply; 43+ messages in thread
From: Frank Ch. Eigler @ 2007-09-21 13:30 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Denys Vlasenko, dsmith, Christoph Hellwig, Rusty Russell, akpm,
	linux-kernel

Hi -

On Fri, Sep 21, 2007 at 08:58:19AM -0400, Mathieu Desnoyers wrote:
> [...]
> > > Current systemtap marker support code relies on the __markers_strings
> > > section.
> > Let users know that in comment above section definition in ld script.
> [...]
>         /* Markers: strings (used by SystemTAP) */                      \
> [...]

I did not mean to imply that this was a necessary state of affairs.

The marker metadata must be stored in at least one place in the kernel
image - this just happens to be a convenient one that David Smith's
recent systemtap code used.  Without it, we'd probably have to do a
more complicated search, following the pointers within the __markers
structs.  That could work, but it hasn't been built/tested.

So, this proposed change (removal of this section) would break
systemtap, and we have to jump through more hoops to make it work
again.  Is the change worth it?

- FChE

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-21 13:30           ` Frank Ch. Eigler
@ 2007-09-21 13:38             ` Mathieu Desnoyers
  2007-10-15 19:41               ` Frank Ch. Eigler
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-21 13:38 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: Denys Vlasenko, dsmith, Christoph Hellwig, Rusty Russell, akpm,
	linux-kernel

* Frank Ch. Eigler (fche@redhat.com) wrote:
> Hi -
> 
> On Fri, Sep 21, 2007 at 08:58:19AM -0400, Mathieu Desnoyers wrote:
> > [...]
> > > > Current systemtap marker support code relies on the __markers_strings
> > > > section.
> > > Let users know that in comment above section definition in ld script.
> > [...]
> >         /* Markers: strings (used by SystemTAP) */                      \
> > [...]
> 
> I did not mean to imply that this was a necessary state of affairs.
> 
> The marker metadata must be stored in at least one place in the kernel
> image - this just happens to be a convenient one that David Smith's
> recent systemtap code used.  Without it, we'd probably have to do a
> more complicated search, following the pointers within the __markers
> structs.  That could work, but it hasn't been built/tested.
> 
> So, this proposed change (removal of this section) would break
> systemtap, and we have to jump through more hoops to make it work
> again.  Is the change worth it?
> 

I guess so. Getting the markers as clean as we can is very important for
kernel inclusion.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-21  0:58   ` Steven Rostedt
@ 2007-09-21 13:45     ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-21 13:45 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: akpm, linux-kernel, Frank Ch. Eigler, Christoph Hellwig, Rusty Russell

* Steven Rostedt (rostedt@goodmis.org) wrote:
> On Tue, Sep 18, 2007 at 05:13:25PM -0400, Mathieu Desnoyers wrote:
> > +/*
> > + * Sets the probe callback corresponding to one marker.
> > + */
> > +static int set_marker(struct marker_entry **entry,
> > +			struct __mark_marker *elem)
> > +{
> > +	int ret;
> > +	BUG_ON(strcmp((*entry)->name, elem->name) != 0);
> 
> Can you switch this at least to WARN_ON?  Killing a system with X
> running where the user just sees a freeze is not that nice. But a nasty
> message in dmesg is very noticable.
> 
Sure.

> -- Steve
> 
> > +
> > +	if ((*entry)->format) {
> > +		if (strcmp((*entry)->format, elem->format) != 0) {
> > +			printk(KERN_NOTICE
> > +				"Format mismatch for probe %s "
> > +				"(%s), marker (%s)\n",
> > +				(*entry)->name,
> > +				(*entry)->format,
> > +				elem->format);
> > +			return -EPERM;
> > +		}
> > +	} else {
> > +		ret = marker_set_format(entry, elem->format);
> > +		if (ret)
> > +			return ret;
> > +	}
> > +	elem->call = (*entry)->probe;
> > +	elem->pdata = (*entry)->pdata;
> > +	_immediate_set(elem->state, 1);
> > +	return 0;
> > +}

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 4/4] Port of blktrace to the Linux Kernel Markers.
  2007-09-21  1:03   ` Steven Rostedt
@ 2007-09-21 13:46     ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-21 13:46 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: akpm, linux-kernel, Frank Ch. Eigler, Jens Axboe

* Steven Rostedt (rostedt@goodmis.org) wrote:
> On Tue, Sep 18, 2007 at 05:13:28PM -0400, Mathieu Desnoyers wrote:
> > +void blk_probe_disarm(void)
> > +{
> > +	int i, err;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
> > +		err = marker_disarm(probe_array[i].name);
> > +		BUG_ON(err);
> > +		err = IS_ERR(marker_probe_unregister(probe_array[i].name));
> > +		BUG_ON(err);
> > +	}
> > +}
> 
> As well as changing these to WARN_ON.
> 
Yep.

> -- Steve
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-21 13:38             ` Mathieu Desnoyers
@ 2007-10-15 19:41               ` Frank Ch. Eigler
  2007-10-15 23:12                 ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Frank Ch. Eigler @ 2007-10-15 19:41 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Denys Vlasenko, systemtap, Christoph Hellwig, Rusty Russell,
	akpm, linux-kernel

Hi -

I wrote:

> [...]
> > The marker metadata must be stored in at least one place in the kernel
> > image - this just happens to be a convenient one that David Smith's
> > recent systemtap code used.  Without it, we'd probably have to do a
> > more complicated search, following the pointers within the __markers
> > structs.  [...]

Our team is farther along adapting to this change against 2.6.23-mm1,
and we have run into a complication.  It's more of a distribution
issue.

We would prefer to retain systemtap's capability to build
instrumentation for a kernel other than the currently running one.
Such instrumentation can be then copied and run on a distinct machine.
This has meant relying on development data: make install_headers +
Makefiles (as packaged by Fedora/RHEL), and to a lesser extent
separated debugging information.

Markers are attractive partly because they don't require debugging
information, so the data needs to be found in an executable image.
But we prefer not to force the executable image itself to be
installed, for example because /boot is relatively small.  So we would
prefer something in between: something small that we can put into the
development package.

If there exists sympathy to this problem, Roland McGrath supposes we
could implement a standardized solution, a file like Module.symvers,
containing the marker names & format strings extracted at build time.
Any opinions?


PS. I wonder why the marker name/format strings are put into a
__markers_strings object section at all, considering that the only
place where that is used again appears to be this code in
kernel/module.c:

        markersstringsindex = find_sec(hdr, sechdrs, secstrings,
                                        "__markers_strings");

and the "markersstringsindex" variable is never used.


- FChE

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-10-15 19:41               ` Frank Ch. Eigler
@ 2007-10-15 23:12                 ` Mathieu Desnoyers
  2007-10-15 23:50                   ` Roland McGrath
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-10-15 23:12 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: Denys Vlasenko, systemtap, Christoph Hellwig, Rusty Russell,
	akpm, linux-kernel

* Frank Ch. Eigler (fche@redhat.com) wrote:
> Hi -
> 
> I wrote:
> 
> > [...]
> > > The marker metadata must be stored in at least one place in the kernel
> > > image - this just happens to be a convenient one that David Smith's
> > > recent systemtap code used.  Without it, we'd probably have to do a
> > > more complicated search, following the pointers within the __markers
> > > structs.  [...]
> 
> Our team is farther along adapting to this change against 2.6.23-mm1,
> and we have run into a complication.  It's more of a distribution
> issue.
> 
> We would prefer to retain systemtap's capability to build
> instrumentation for a kernel other than the currently running one.
> Such instrumentation can be then copied and run on a distinct machine.
> This has meant relying on development data: make install_headers +
> Makefiles (as packaged by Fedora/RHEL), and to a lesser extent
> separated debugging information.
> 
> Markers are attractive partly because they don't require debugging
> information, so the data needs to be found in an executable image.
> But we prefer not to force the executable image itself to be
> installed, for example because /boot is relatively small.  So we would
> prefer something in between: something small that we can put into the
> development package.
> 
> If there exists sympathy to this problem, Roland McGrath supposes we
> could implement a standardized solution, a file like Module.symvers,
> containing the marker names & format strings extracted at build time.
> Any opinions?
> 

Hi Frank,

I think the main issue with the solution you propose is that it doesn't
deal with markers in modules, am I right ?

I will soon come with a marker iterator and a module that provides a
userspace -and in kernel- interface to enable/disable markers. Actually,
I already have the code ready in my LTTng snapshots. I can provide a
link if you want to have a look.

> 
> PS. I wonder why the marker name/format strings are put into a
> __markers_strings object section at all, considering that the only
> place where that is used again appears to be this code in
> kernel/module.c:
> 
>         markersstringsindex = find_sec(hdr, sechdrs, secstrings,
>                                         "__markers_strings");
> 
> and the "markersstringsindex" variable is never used.
> 

Considering that  I want to minimize the impact on the system, I put the
marker strings in their own memory location rather than clobbering the
memory containing the kernel strings (which will likely be used more
often than markers). It makes sure that I don't pollute cachelines
otherwise containing useful kernel strings.

Mathieu

> 
> - FChE

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-10-15 23:12                 ` Mathieu Desnoyers
@ 2007-10-15 23:50                   ` Roland McGrath
  2007-10-25 19:17                     ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Roland McGrath @ 2007-10-15 23:50 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Frank Ch. Eigler, Denys Vlasenko, systemtap, Christoph Hellwig,
	Rusty Russell, akpm, linux-kernel

> I think the main issue with the solution you propose is that it doesn't
> deal with markers in modules, am I right ?

My suggestion applies as well to modules as anything else.  
What "like Module.symvers" means is something like:

name1	vmlinux		%s
name2	fs/nfs/nfs	%d

All the modules built by the same kernel build go into this one file.

Modules packaged separately for the same kernel could provide additional
files of the same kind.

> I will soon come with a marker iterator and a module that provides a
> userspace -and in kernel- interface to enable/disable markers. Actually,
> I already have the code ready in my LTTng snapshots. I can provide a
> link if you want to have a look.

That's clearly straightforward to do given the basic markers data structures.

It does not address the need for an offline list of markers available in a
particular kernel build or set of modules that you are not running right now.
The approach now available for that is grovelling through the markers data
structures extracted from vmlinux and .ko ELF files offline.  That is more
work than one should have to do, and has lots of problems with coping with
different packaging details, etc.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-10-15 23:50                   ` Roland McGrath
@ 2007-10-25 19:17                     ` Mathieu Desnoyers
  2007-10-26 14:28                       ` Frank Ch. Eigler
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-10-25 19:17 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Frank Ch. Eigler, Denys Vlasenko, systemtap, Christoph Hellwig,
	Rusty Russell, akpm, linux-kernel

* Roland McGrath (roland@redhat.com) wrote:
> > I think the main issue with the solution you propose is that it doesn't
> > deal with markers in modules, am I right ?
> 
> My suggestion applies as well to modules as anything else.  
> What "like Module.symvers" means is something like:
> 
> name1	vmlinux		%s
> name2	fs/nfs/nfs	%d
> 
> All the modules built by the same kernel build go into this one file.
> 
> Modules packaged separately for the same kernel could provide additional
> files of the same kind.
> 
> > I will soon come with a marker iterator and a module that provides a
> > userspace -and in kernel- interface to enable/disable markers. Actually,
> > I already have the code ready in my LTTng snapshots. I can provide a
> > link if you want to have a look.
> 
> That's clearly straightforward to do given the basic markers data structures.
> 
> It does not address the need for an offline list of markers available in a
> particular kernel build or set of modules that you are not running right now.
> The approach now available for that is grovelling through the markers data
> structures extracted from vmlinux and .ko ELF files offline.  That is more
> work than one should have to do, and has lots of problems with coping with
> different packaging details, etc.
> 

Since gcc is required to build the systemtap probes on the development
marchine, I don't see why it would be much harder to also require prople
to install drawf ? Or maybe the "crash" tool ?

I guess you must already need to extract the symbols for your kprobes.
Do you use kallsyms for this ? The way I see it, you could maybe extract
kallsyms symbols corresponding to the markers data structures quite
easily.

I would rather prefer not to implement superfluous built-time data
extraction in the kernel build system just to make userspace simpler. If
we can leverage what currently exists, that would be better.

Mathieu

> 
> Thanks,
> Roland

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-10-25 19:17                     ` Mathieu Desnoyers
@ 2007-10-26 14:28                       ` Frank Ch. Eigler
  2007-11-01  1:06                         ` [PATCH] markers: modpost Roland McGrath
  0 siblings, 1 reply; 43+ messages in thread
From: Frank Ch. Eigler @ 2007-10-26 14:28 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Roland McGrath, Frank Ch. Eigler, Denys Vlasenko, systemtap,
	Christoph Hellwig, Rusty Russell, akpm, linux-kernel

Hi -

On Thu, Oct 25, 2007 at 03:17:22PM -0400, Mathieu Desnoyers wrote:
> [...]
> Since gcc is required to build the systemtap probes on the development
> marchine, I don't see why it would be much harder to also require prople
> to install drawf ? Or maybe the "crash" tool ?

The crash tool requires the dwarf data to work.  The dwarf data for an
entire kernel (including all the modules) is on the order of hundreds
of megabytes.  The symbol & marker list would be one thousandth the
size.  You can see the deployment attractiveness of the latter.

> I guess you must already need to extract the symbols for your kprobes.
> Do you use kallsyms for this? 

Nope.  /proc/kallsyms is a another run-time-only source of data, and
so is not applicable for off-line (ahead-of-time) mapping.

> I would rather prefer not to implement superfluous built-time data
> extraction in the kernel build system just to make userspace
> simpler. [...]

It is not superfluous, as it would solve a real distribution problem.


- FChE

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH] markers: modpost
  2007-10-26 14:28                       ` Frank Ch. Eigler
@ 2007-11-01  1:06                         ` Roland McGrath
  2007-11-01  2:46                           ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Roland McGrath @ 2007-11-01  1:06 UTC (permalink / raw)
  To: Andrew Morton, Mathieu Desnoyers; +Cc: linux-kernel, systemtap


This adds some new magic in the MODPOST phase for CONFIG_MARKERS.
Analogous to the Module.symvers file, the build will now write a
Module.markers file when CONFIG_MARKERS=y is set.  This file lists
the name, defining module, and format string of each marker,
separated by \t characters.  This simple text file can be used by
offline build procedures for instrumentation code, analogous to
how System.map and Module.symvers can be useful to have for
kernels other than the one you are running right now.

The method of extracting the strings is somewhat crude, but is very
simple and should work fine in practice for the foreseeable future.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 scripts/Makefile.modpost |   11 +++
 scripts/mod/modpost.c    |  174 +++++++++++++++++++++++++++++++++++++++++++++-
 scripts/mod/modpost.h    |    3 +
 3 files changed, 187 insertions(+), 1 deletions(-)

diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index d988f5d..6321870 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,6 +13,7 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -45,6 +46,10 @@ include scripts/Makefile.lib
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+kernelmarkersfile := $(objtree)/Module.markers
+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
+
+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -62,6 +67,8 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile)   \
  $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
+ $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
 
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
@@ -81,6 +88,10 @@ vmlinux.o: FORCE
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
+ifdef CONFIG_MARKERS
+$(markersfile):	       __modpost ;
+endif
+
 
 # Step 5), compile all *.mod.c files
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 93ac52a..df80bfc 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -11,6 +11,8 @@
  * Usage: modpost vmlinux module1.o module2.o ...
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
@@ -424,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
 			info->export_unused_gpl_sec = i;
 		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
 			info->export_gpl_future_sec = i;
+		else if (strcmp(secname, "__markers_strings") == 0)
+			info->markers_strings_sec = i;
 
 		if (sechdrs[i].sh_type != SHT_SYMTAB)
 			continue;
@@ -1249,6 +1253,73 @@ static int exit_section_ref_ok(const char *name)
 	return 0;
 }
 
+static size_t strlen_with_padding(const char *start, const char *limit)
+{
+	const char *p = memchr(start, '\0', limit - start);
+	if (p == NULL)
+		return 0;
+	do
+		++p;
+	while (p < limit && *p == '\0');
+	return p - start;
+}
+
+static void get_markers(struct elf_info *info, struct module *mod)
+{
+	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
+	const char *strings;
+	const char *strings_end;
+	const char *p;
+	size_t n, i;
+
+	if (!info->markers_strings_sec)
+		return;
+
+	strings = (const char *) info->hdr + sh->sh_offset;
+	strings_end = strings + sh->sh_size;
+
+	/*
+	 * First count the strings.  They come in pairs of name, format.
+	 */
+	for (n = 0, p = strings; p < strings_end; ++n) {
+		size_t len = strlen_with_padding(p, strings_end);
+		if (len == 0)
+			break;
+		p += len;
+	}
+	if (n % 2 != 0 || p != strings_end) {
+		warn("%s.ko has bad __markers_strings, ignoring it\n",
+		     mod->name);
+		return;
+	}
+
+	if (n == 0)
+		return;
+
+	/*
+	 * Now collect each pair into a formatted line for the output.
+	 * Lines look like:
+	 *	marker_name	vmlinux	marker %s format %d
+	 * The format string after the second \t can use whitespace.
+	 */
+	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n / 2));
+	mod->nmarkers = n / 2;
+
+	p = strings;
+	for (i = 0; i < n; i += 2) {
+		const char *name, *fmt;
+		name = p;
+		p += strlen_with_padding(p, strings_end);
+		fmt = p;
+		p += strlen_with_padding(p, strings_end);
+
+		mod->markers[i / 2] = NULL;
+		asprintf(&mod->markers[i / 2], "%s\t%s\t%s\n",
+			 name, mod->name, fmt);
+		NOFAIL(mod->markers[i / 2]);
+	}
+}
+
 static void read_symbols(char *modname)
 {
 	const char *symname;
@@ -1301,6 +1372,8 @@ static void read_symbols(char *modname)
 		get_src_version(modname, mod->srcversion,
 				sizeof(mod->srcversion)-1);
 
+	get_markers(&info, mod);
+
 	parse_elf_finish(&info);
 
 	/* Our trick to get versioning for struct_module - it's
@@ -1649,6 +1722,91 @@ static void write_dump(const char *fname)
 	write_if_changed(&buf, fname);
 }
 
+static void add_marker(struct module *mod, const char *name, const char *fmt)
+{
+	char *line = NULL;
+	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+	NOFAIL(line);
+
+	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
+						     sizeof mod->markers[0])));
+	mod->markers[mod->nmarkers++] = line;
+}
+
+static void read_markers(const char *fname)
+{
+	unsigned long size, pos = 0;
+	void *file = grab_file(fname, &size);
+	char *line;
+
+        if (!file)
+		/* No old markers, silently ignore */
+		return;
+
+	while ((line = get_next_line(&pos, file, size))) {
+		char *marker, *modname, *fmt;
+		struct module *mod;
+
+		marker = line;
+		if (!(modname = strchr(marker, '\t')))
+			goto fail;
+		*modname++ = '\0';
+		if (!(fmt = strchr(modname, '\t')))
+			goto fail;
+		*fmt++ = '\0';
+		if (*marker == '\0' || *modname == '\0')
+			goto fail;
+
+		if (!(mod = find_module(modname))) {
+			if (is_vmlinux(modname)) {
+				have_vmlinux = 1;
+			}
+			mod = new_module(NOFAIL(strdup(modname)));
+			mod->skip = 1;
+		}
+
+		add_marker(mod, marker, fmt);
+	}
+	return;
+fail:
+	fatal("parse error in markers list file\n");
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+	return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void write_markers(const char *fname)
+{
+	struct buffer buf = { };
+	struct module *mod;
+	size_t i;
+
+	for (mod = modules; mod; mod = mod->next)
+		if ((!external_module || !mod->skip) && mod->markers != NULL) {
+			/*
+			 * Sort the strings so we can skip duplicates when
+			 * we write them out.
+			 */
+			qsort(mod->markers, mod->nmarkers,
+			      sizeof mod->markers[0], &compare_strings);
+			for (i = 0; i < mod->nmarkers; ++i) {
+				char *line = mod->markers[i];
+				buf_write(&buf, line, strlen(line));
+				while (i + 1 < mod->nmarkers &&
+				       !strcmp(mod->markers[i],
+					       mod->markers[i + 1]))
+					free(mod->markers[i++]);
+				free(mod->markers[i]);
+			}
+			free(mod->markers);
+			mod->markers = NULL;
+		}
+
+	write_if_changed(&buf, fname);
+}
+
 int main(int argc, char **argv)
 {
 	struct module *mod;
@@ -1656,10 +1814,12 @@ int main(int argc, char **argv)
 	char fname[SZ];
 	char *kernel_read = NULL, *module_read = NULL;
 	char *dump_write = NULL;
+	char *markers_read = NULL;
+	char *markers_write = NULL;
 	int opt;
 	int err;
 
-	while ((opt = getopt(argc, argv, "i:I:mso:aw")) != -1) {
+	while ((opt = getopt(argc, argv, "i:I:mso:awM:K:")) != -1) {
 		switch(opt) {
 			case 'i':
 				kernel_read = optarg;
@@ -1683,6 +1843,12 @@ int main(int argc, char **argv)
 			case 'w':
 				warn_unresolved = 1;
 				break;
+			case 'M':
+				markers_write = optarg;
+				break;
+			case 'K':
+				markers_read = optarg;
+				break;
 			default:
 				exit(1);
 		}
@@ -1724,5 +1890,11 @@ int main(int argc, char **argv)
 	if (dump_write)
 		write_dump(dump_write);
 
+	if (markers_read)
+		read_markers(markers_read);
+
+	if (markers_write)
+		write_markers(markers_write);
+
 	return err;
 }
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 0ffed17..175301a 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -110,6 +110,8 @@ struct module {
 	int has_init;
 	int has_cleanup;
 	struct buffer dev_table_buf;
+	char **markers;
+	size_t nmarkers;
 	char	     srcversion[25];
 };
 
@@ -124,6 +126,7 @@ struct elf_info {
 	Elf_Section  export_gpl_sec;
 	Elf_Section  export_unused_gpl_sec;
 	Elf_Section  export_gpl_future_sec;
+	Elf_Section  markers_strings_sec;
 	const char   *strtab;
 	char	     *modinfo;
 	unsigned int modinfo_len;

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-01  1:06                         ` [PATCH] markers: modpost Roland McGrath
@ 2007-11-01  2:46                           ` Mathieu Desnoyers
  2007-11-01  9:37                             ` Roland McGrath
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-11-01  2:46 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Andrew Morton, linux-kernel, systemtap

* Roland McGrath (roland@redhat.com) wrote:
> 
> This adds some new magic in the MODPOST phase for CONFIG_MARKERS.
> Analogous to the Module.symvers file, the build will now write a
> Module.markers file when CONFIG_MARKERS=y is set.  This file lists
> the name, defining module, and format string of each marker,
> separated by \t characters.  This simple text file can be used by
> offline build procedures for instrumentation code, analogous to
> how System.map and Module.symvers can be useful to have for
> kernels other than the one you are running right now.
> 
> The method of extracting the strings is somewhat crude, but is very
> simple and should work fine in practice for the foreseeable future.
> 

Hi Roland,

I'm ok with the idea of extracting such information, but I doubt one can
assume the strings will always be layed out in the same order in the
__markers_strings section. We also shouldn't assume that a marker name
will be a neighbor of its format string.

If we want to do it safely, I think we should iterate from
__start___markers to __stop___markers symbols of vmlinux and get the
pointers to the name/format string pairs.

The same can then be done with modules using the __markers section.

Or maybe is there some reason not to do that ?

Mathieu


> Signed-off-by: Roland McGrath <roland@redhat.com>
> ---
>  scripts/Makefile.modpost |   11 +++
>  scripts/mod/modpost.c    |  174 +++++++++++++++++++++++++++++++++++++++++++++-
>  scripts/mod/modpost.h    |    3 +
>  3 files changed, 187 insertions(+), 1 deletions(-)
> 
> diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
> index d988f5d..6321870 100644
> --- a/scripts/Makefile.modpost
> +++ b/scripts/Makefile.modpost
> @@ -13,6 +13,7 @@
>  # 2) modpost is then used to
>  # 3)  create one <module>.mod.c file pr. module
>  # 4)  create one Module.symvers file with CRC for all exported symbols
> +# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
>  # 5) compile all <module>.mod.c files
>  # 6) final link of the module to a <module.ko> file
>  
> @@ -45,6 +46,10 @@ include scripts/Makefile.lib
>  
>  kernelsymfile := $(objtree)/Module.symvers
>  modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
> +kernelmarkersfile := $(objtree)/Module.markers
> +modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
> +
> +markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
>  
>  # Step 1), find all modules listed in $(MODVERDIR)/
>  __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
> @@ -62,6 +67,8 @@ modpost = scripts/mod/modpost                    \
>   $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile)   \
>   $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
>   $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
> + $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
> + $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
>   $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
>  
>  quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
> @@ -81,6 +88,10 @@ vmlinux.o: FORCE
>  $(symverfile):         __modpost ;
>  $(modules:.ko=.mod.c): __modpost ;
>  
> +ifdef CONFIG_MARKERS
> +$(markersfile):	       __modpost ;
> +endif
> +
>  
>  # Step 5), compile all *.mod.c files
>  
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index 93ac52a..df80bfc 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -11,6 +11,8 @@
>   * Usage: modpost vmlinux module1.o module2.o ...
>   */
>  
> +#define _GNU_SOURCE
> +#include <stdio.h>
>  #include <ctype.h>
>  #include "modpost.h"
>  #include "../../include/linux/license.h"
> @@ -424,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
>  			info->export_unused_gpl_sec = i;
>  		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
>  			info->export_gpl_future_sec = i;
> +		else if (strcmp(secname, "__markers_strings") == 0)
> +			info->markers_strings_sec = i;
>  
>  		if (sechdrs[i].sh_type != SHT_SYMTAB)
>  			continue;
> @@ -1249,6 +1253,73 @@ static int exit_section_ref_ok(const char *name)
>  	return 0;
>  }
>  
> +static size_t strlen_with_padding(const char *start, const char *limit)
> +{
> +	const char *p = memchr(start, '\0', limit - start);
> +	if (p == NULL)
> +		return 0;
> +	do
> +		++p;
> +	while (p < limit && *p == '\0');
> +	return p - start;
> +}
> +
> +static void get_markers(struct elf_info *info, struct module *mod)
> +{
> +	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
> +	const char *strings;
> +	const char *strings_end;
> +	const char *p;
> +	size_t n, i;
> +
> +	if (!info->markers_strings_sec)
> +		return;
> +
> +	strings = (const char *) info->hdr + sh->sh_offset;
> +	strings_end = strings + sh->sh_size;
> +
> +	/*
> +	 * First count the strings.  They come in pairs of name, format.
> +	 */
> +	for (n = 0, p = strings; p < strings_end; ++n) {
> +		size_t len = strlen_with_padding(p, strings_end);
> +		if (len == 0)
> +			break;
> +		p += len;
> +	}
> +	if (n % 2 != 0 || p != strings_end) {
> +		warn("%s.ko has bad __markers_strings, ignoring it\n",
> +		     mod->name);
> +		return;
> +	}
> +
> +	if (n == 0)
> +		return;
> +
> +	/*
> +	 * Now collect each pair into a formatted line for the output.
> +	 * Lines look like:
> +	 *	marker_name	vmlinux	marker %s format %d
> +	 * The format string after the second \t can use whitespace.
> +	 */
> +	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n / 2));
> +	mod->nmarkers = n / 2;
> +
> +	p = strings;
> +	for (i = 0; i < n; i += 2) {
> +		const char *name, *fmt;
> +		name = p;
> +		p += strlen_with_padding(p, strings_end);
> +		fmt = p;
> +		p += strlen_with_padding(p, strings_end);
> +
> +		mod->markers[i / 2] = NULL;
> +		asprintf(&mod->markers[i / 2], "%s\t%s\t%s\n",
> +			 name, mod->name, fmt);
> +		NOFAIL(mod->markers[i / 2]);
> +	}
> +}
> +
>  static void read_symbols(char *modname)
>  {
>  	const char *symname;
> @@ -1301,6 +1372,8 @@ static void read_symbols(char *modname)
>  		get_src_version(modname, mod->srcversion,
>  				sizeof(mod->srcversion)-1);
>  
> +	get_markers(&info, mod);
> +
>  	parse_elf_finish(&info);
>  
>  	/* Our trick to get versioning for struct_module - it's
> @@ -1649,6 +1722,91 @@ static void write_dump(const char *fname)
>  	write_if_changed(&buf, fname);
>  }
>  
> +static void add_marker(struct module *mod, const char *name, const char *fmt)
> +{
> +	char *line = NULL;
> +	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
> +	NOFAIL(line);
> +
> +	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
> +						     sizeof mod->markers[0])));
> +	mod->markers[mod->nmarkers++] = line;
> +}
> +
> +static void read_markers(const char *fname)
> +{
> +	unsigned long size, pos = 0;
> +	void *file = grab_file(fname, &size);
> +	char *line;
> +
> +        if (!file)
> +		/* No old markers, silently ignore */
> +		return;
> +
> +	while ((line = get_next_line(&pos, file, size))) {
> +		char *marker, *modname, *fmt;
> +		struct module *mod;
> +
> +		marker = line;
> +		if (!(modname = strchr(marker, '\t')))
> +			goto fail;
> +		*modname++ = '\0';
> +		if (!(fmt = strchr(modname, '\t')))
> +			goto fail;
> +		*fmt++ = '\0';
> +		if (*marker == '\0' || *modname == '\0')
> +			goto fail;
> +
> +		if (!(mod = find_module(modname))) {
> +			if (is_vmlinux(modname)) {
> +				have_vmlinux = 1;
> +			}
> +			mod = new_module(NOFAIL(strdup(modname)));
> +			mod->skip = 1;
> +		}
> +
> +		add_marker(mod, marker, fmt);
> +	}
> +	return;
> +fail:
> +	fatal("parse error in markers list file\n");
> +}
> +
> +static int compare_strings(const void *a, const void *b)
> +{
> +	return strcmp(*(const char **) a, *(const char **) b);
> +}
> +
> +static void write_markers(const char *fname)
> +{
> +	struct buffer buf = { };
> +	struct module *mod;
> +	size_t i;
> +
> +	for (mod = modules; mod; mod = mod->next)
> +		if ((!external_module || !mod->skip) && mod->markers != NULL) {
> +			/*
> +			 * Sort the strings so we can skip duplicates when
> +			 * we write them out.
> +			 */
> +			qsort(mod->markers, mod->nmarkers,
> +			      sizeof mod->markers[0], &compare_strings);
> +			for (i = 0; i < mod->nmarkers; ++i) {
> +				char *line = mod->markers[i];
> +				buf_write(&buf, line, strlen(line));
> +				while (i + 1 < mod->nmarkers &&
> +				       !strcmp(mod->markers[i],
> +					       mod->markers[i + 1]))
> +					free(mod->markers[i++]);
> +				free(mod->markers[i]);
> +			}
> +			free(mod->markers);
> +			mod->markers = NULL;
> +		}
> +
> +	write_if_changed(&buf, fname);
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	struct module *mod;
> @@ -1656,10 +1814,12 @@ int main(int argc, char **argv)
>  	char fname[SZ];
>  	char *kernel_read = NULL, *module_read = NULL;
>  	char *dump_write = NULL;
> +	char *markers_read = NULL;
> +	char *markers_write = NULL;
>  	int opt;
>  	int err;
>  
> -	while ((opt = getopt(argc, argv, "i:I:mso:aw")) != -1) {
> +	while ((opt = getopt(argc, argv, "i:I:mso:awM:K:")) != -1) {
>  		switch(opt) {
>  			case 'i':
>  				kernel_read = optarg;
> @@ -1683,6 +1843,12 @@ int main(int argc, char **argv)
>  			case 'w':
>  				warn_unresolved = 1;
>  				break;
> +			case 'M':
> +				markers_write = optarg;
> +				break;
> +			case 'K':
> +				markers_read = optarg;
> +				break;
>  			default:
>  				exit(1);
>  		}
> @@ -1724,5 +1890,11 @@ int main(int argc, char **argv)
>  	if (dump_write)
>  		write_dump(dump_write);
>  
> +	if (markers_read)
> +		read_markers(markers_read);
> +
> +	if (markers_write)
> +		write_markers(markers_write);
> +
>  	return err;
>  }
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index 0ffed17..175301a 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -110,6 +110,8 @@ struct module {
>  	int has_init;
>  	int has_cleanup;
>  	struct buffer dev_table_buf;
> +	char **markers;
> +	size_t nmarkers;
>  	char	     srcversion[25];
>  };
>  
> @@ -124,6 +126,7 @@ struct elf_info {
>  	Elf_Section  export_gpl_sec;
>  	Elf_Section  export_unused_gpl_sec;
>  	Elf_Section  export_gpl_future_sec;
> +	Elf_Section  markers_strings_sec;
>  	const char   *strtab;
>  	char	     *modinfo;
>  	unsigned int modinfo_len;

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-01  2:46                           ` Mathieu Desnoyers
@ 2007-11-01  9:37                             ` Roland McGrath
  2007-11-01 11:24                               ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Roland McGrath @ 2007-11-01  9:37 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Andrew Morton, linux-kernel, systemtap

> If we want to do it safely, I think we should iterate from
> __start___markers to __stop___markers symbols of vmlinux and get the
> pointers to the name/format string pairs.
> 
> The same can then be done with modules using the __markers section.
> 
> Or maybe is there some reason not to do that ?

It's just rather a pain in the ass, a whole lot more fiddly work.
cf "somewhat crude" and "foreseeable future" in my patch's log entry.
Knock yourself out if you're looking for more tedious hacking to do in
modpost.c, but I say fix it when it breaks.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-01  9:37                             ` Roland McGrath
@ 2007-11-01 11:24                               ` Mathieu Desnoyers
  2007-11-08 19:31                                 ` David Smith
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-11-01 11:24 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Andrew Morton, linux-kernel, systemtap

* Roland McGrath (roland@redhat.com) wrote:
> > If we want to do it safely, I think we should iterate from
> > __start___markers to __stop___markers symbols of vmlinux and get the
> > pointers to the name/format string pairs.
> > 
> > The same can then be done with modules using the __markers section.
> > 
> > Or maybe is there some reason not to do that ?
> 
> It's just rather a pain in the ass, a whole lot more fiddly work.
> cf "somewhat crude" and "foreseeable future" in my patch's log entry.
> Knock yourself out if you're looking for more tedious hacking to do in
> modpost.c, but I say fix it when it breaks.
> 

Hmmmm, I have rarely seen code go into mainline without addressing valid
technical criticism first. Please fix.

I'll look into it if I find the time.

Mathieu

> 
> Thanks,
> Roland

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-01 11:24                               ` Mathieu Desnoyers
@ 2007-11-08 19:31                                 ` David Smith
  2007-11-08 19:36                                   ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: David Smith @ 2007-11-08 19:31 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Roland McGrath, Andrew Morton, linux-kernel, systemtap

Mathieu Desnoyers wrote:
> * Roland McGrath (roland@redhat.com) wrote:
>>> If we want to do it safely, I think we should iterate from
>>> __start___markers to __stop___markers symbols of vmlinux and get the
>>> pointers to the name/format string pairs.
>>>
>>> The same can then be done with modules using the __markers section.
>>>
>>> Or maybe is there some reason not to do that ?
>> It's just rather a pain in the ass, a whole lot more fiddly work.
>> cf "somewhat crude" and "foreseeable future" in my patch's log entry.
>> Knock yourself out if you're looking for more tedious hacking to do in
>> modpost.c, but I say fix it when it breaks.
>>
> 
> Hmmmm, I have rarely seen code go into mainline without addressing valid
> technical criticism first. Please fix.
> 
> I'll look into it if I find the time.
> 
> Mathieu

Mathieu,

Here's an updated patch, written by Roland (that I tested for him), that
looks for all marker symbols in the __markers_strings section.  It doesn't
get the pointers from the __markers section because that is very difficult
to do in modpost (having to handle the architecture-dependent relocations
applied to those pointers).

See what you think.

---
This adds some new magic in the MODPOST phase for CONFIG_MARKERS.
Analogous to the Module.symvers file, the build will now write a
Module.markers file when CONFIG_MARKERS=y is set.  This file lists
the name, defining module, and format string of each marker,
separated by \t characters.  This simple text file can be used by
offline build procedures for instrumentation code, analogous to
how System.map and Module.symvers can be useful to have for
kernels other than the one you are running right now.

The method of extracting the strings is somewhat crude, but is pretty
simple and should work fine in practice for the foreseeable future.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 scripts/Makefile.modpost |   11 +++
 scripts/mod/modpost.c    |  184 +++++++++++++++++++++++++++++++++++++++++++++-
 scripts/mod/modpost.h    |    3 +
 3 files changed, 197 insertions(+), 1 deletions(-)

diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index d988f5d..6321870 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,6 +13,7 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -45,6 +46,10 @@ include scripts/Makefile.lib
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+kernelmarkersfile := $(objtree)/Module.markers
+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
+
+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -62,6 +67,8 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile)   \
  $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
+ $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
 
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
@@ -81,6 +88,10 @@ vmlinux.o: FORCE
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
+ifdef CONFIG_MARKERS
+$(markersfile):	       __modpost ;
+endif
+
 
 # Step 5), compile all *.mod.c files
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 93ac52a..bbaf26d 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -11,6 +11,8 @@
  * Usage: modpost vmlinux module1.o module2.o ...
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
@@ -424,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
 			info->export_unused_gpl_sec = i;
 		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
 			info->export_gpl_future_sec = i;
+		else if (strcmp(secname, "__markers_strings") == 0)
+			info->markers_strings_sec = i;
 
 		if (sechdrs[i].sh_type != SHT_SYMTAB)
 			continue;
@@ -1249,6 +1253,83 @@ static int exit_section_ref_ok(const char *name)
 	return 0;
 }
 
+static void get_markers(struct elf_info *info, struct module *mod)
+{
+	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
+	const char *strings = (const char *) info->hdr + sh->sh_offset;
+	const Elf_Sym *sym, *first_sym, *last_sym;
+	const char *name;
+	size_t n;
+
+	if (!info->markers_strings_sec)
+		return;
+
+	/*
+	 * First count the strings.  They come in pairs of name, format.
+	 * We look for all the symbols defined in the __markers_strings
+	 * section.  They are named __mstrtab_name_* and __mstrtab_format_*
+	 * in matching pairs.  For these local names, the compiler puts
+	 * a random .NNN suffix on, so the names don't correspond exactly.
+	 */
+	first_sym = last_sym = NULL;
+	n = 0;
+	for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec) {
+			if (first_sym == NULL)
+				first_sym = sym;
+			last_sym = sym;
+			++n;
+			name = info->strtab + sym->st_name;
+			if (n % 2 == 0 ?
+			    !strncmp(name, "__mstrtab_name_",
+				     sizeof "__mstrtab_name_" - 1) :
+			    !strncmp(name, "__mstrtab_format_",
+				     sizeof "__mstrtab_format_" - 1)) {
+				warn("%s.ko has unexpected symbol \"%s\"\n",
+				     mod->name, name);
+				first_sym = NULL;
+			}
+		}
+
+	if (n % 2 != 0 || first_sym == NULL) {
+		warn("%s.ko has bad __markers_strings, ignoring it\n",
+		     mod->name);
+		return;
+	}
+
+	if (n == 0)
+		return;
+
+	/*
+	 * Now collect each pair into a formatted line for the output.
+	 * Lines look like:
+	 *	marker_name	vmlinux	marker %s format %d
+	 * The format string after the second \t can use whitespace.
+	 */
+	n /= 2;
+	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
+	mod->nmarkers = n;
+
+	name = NULL;
+	n = 0;
+	for (sym = first_sym; sym <= last_sym; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec) {
+			const char *str = strings + sym->st_value;
+			if (name == NULL)
+				name = str;
+			else {
+				char *line = NULL;
+				asprintf(&line, "%s\t%s\t%s\n",
+					 name, mod->name, str);
+				NOFAIL(line);
+				mod->markers[n++] = line;
+				name = NULL;
+			}
+		}
+}
+
 static void read_symbols(char *modname)
 {
 	const char *symname;
@@ -1301,6 +1382,8 @@ static void read_symbols(char *modname)
 		get_src_version(modname, mod->srcversion,
 				sizeof(mod->srcversion)-1);
 
+	get_markers(&info, mod);
+
 	parse_elf_finish(&info);
 
 	/* Our trick to get versioning for struct_module - it's
@@ -1649,6 +1732,91 @@ static void write_dump(const char *fname)
 	write_if_changed(&buf, fname);
 }
 
+static void add_marker(struct module *mod, const char *name, const char *fmt)
+{
+	char *line = NULL;
+	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+	NOFAIL(line);
+
+	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
+						     sizeof mod->markers[0])));
+	mod->markers[mod->nmarkers++] = line;
+}
+
+static void read_markers(const char *fname)
+{
+	unsigned long size, pos = 0;
+	void *file = grab_file(fname, &size);
+	char *line;
+
+        if (!file)
+		/* No old markers, silently ignore */
+		return;
+
+	while ((line = get_next_line(&pos, file, size))) {
+		char *marker, *modname, *fmt;
+		struct module *mod;
+
+		marker = line;
+		if (!(modname = strchr(marker, '\t')))
+			goto fail;
+		*modname++ = '\0';
+		if (!(fmt = strchr(modname, '\t')))
+			goto fail;
+		*fmt++ = '\0';
+		if (*marker == '\0' || *modname == '\0')
+			goto fail;
+
+		if (!(mod = find_module(modname))) {
+			if (is_vmlinux(modname)) {
+				have_vmlinux = 1;
+			}
+			mod = new_module(NOFAIL(strdup(modname)));
+			mod->skip = 1;
+		}
+
+		add_marker(mod, marker, fmt);
+	}
+	return;
+fail:
+	fatal("parse error in markers list file\n");
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+	return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void write_markers(const char *fname)
+{
+	struct buffer buf = { };
+	struct module *mod;
+	size_t i;
+
+	for (mod = modules; mod; mod = mod->next)
+		if ((!external_module || !mod->skip) && mod->markers != NULL) {
+			/*
+			 * Sort the strings so we can skip duplicates when
+			 * we write them out.
+			 */
+			qsort(mod->markers, mod->nmarkers,
+			      sizeof mod->markers[0], &compare_strings);
+			for (i = 0; i < mod->nmarkers; ++i) {
+				char *line = mod->markers[i];
+				buf_write(&buf, line, strlen(line));
+				while (i + 1 < mod->nmarkers &&
+				       !strcmp(mod->markers[i],
+					       mod->markers[i + 1]))
+					free(mod->markers[i++]);
+				free(mod->markers[i]);
+			}
+			free(mod->markers);
+			mod->markers = NULL;
+		}
+
+	write_if_changed(&buf, fname);
+}
+
 int main(int argc, char **argv)
 {
 	struct module *mod;
@@ -1656,10 +1824,12 @@ int main(int argc, char **argv)
 	char fname[SZ];
 	char *kernel_read = NULL, *module_read = NULL;
 	char *dump_write = NULL;
+	char *markers_read = NULL;
+	char *markers_write = NULL;
 	int opt;
 	int err;
 
-	while ((opt = getopt(argc, argv, "i:I:mso:aw")) != -1) {
+	while ((opt = getopt(argc, argv, "i:I:mso:awM:K:")) != -1) {
 		switch(opt) {
 			case 'i':
 				kernel_read = optarg;
@@ -1683,6 +1853,12 @@ int main(int argc, char **argv)
 			case 'w':
 				warn_unresolved = 1;
 				break;
+			case 'M':
+				markers_write = optarg;
+				break;
+			case 'K':
+				markers_read = optarg;
+				break;
 			default:
 				exit(1);
 		}
@@ -1724,5 +1900,11 @@ int main(int argc, char **argv)
 	if (dump_write)
 		write_dump(dump_write);
 
+	if (markers_read)
+		read_markers(markers_read);
+
+	if (markers_write)
+		write_markers(markers_write);
+
 	return err;
 }
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 0ffed17..175301a 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -110,6 +110,8 @@ struct module {
 	int has_init;
 	int has_cleanup;
 	struct buffer dev_table_buf;
+	char **markers;
+	size_t nmarkers;
 	char	     srcversion[25];
 };
 
@@ -124,6 +126,7 @@ struct elf_info {
 	Elf_Section  export_gpl_sec;
 	Elf_Section  export_unused_gpl_sec;
 	Elf_Section  export_gpl_future_sec;
+	Elf_Section  markers_strings_sec;
 	const char   *strtab;
 	char	     *modinfo;
 	unsigned int modinfo_len;






^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-08 19:31                                 ` David Smith
@ 2007-11-08 19:36                                   ` Mathieu Desnoyers
  2007-11-08 19:45                                     ` David Smith
  2007-11-09 16:36                                     ` David Smith
  0 siblings, 2 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-11-08 19:36 UTC (permalink / raw)
  To: David Smith; +Cc: Roland McGrath, Andrew Morton, linux-kernel, systemtap

* David Smith (dsmith@redhat.com) wrote:
> Mathieu Desnoyers wrote:
> > * Roland McGrath (roland@redhat.com) wrote:
> >>> If we want to do it safely, I think we should iterate from
> >>> __start___markers to __stop___markers symbols of vmlinux and get the
> >>> pointers to the name/format string pairs.
> >>>
> >>> The same can then be done with modules using the __markers section.
> >>>
> >>> Or maybe is there some reason not to do that ?
> >> It's just rather a pain in the ass, a whole lot more fiddly work.
> >> cf "somewhat crude" and "foreseeable future" in my patch's log entry.
> >> Knock yourself out if you're looking for more tedious hacking to do in
> >> modpost.c, but I say fix it when it breaks.
> >>
> > 
> > Hmmmm, I have rarely seen code go into mainline without addressing valid
> > technical criticism first. Please fix.
> > 
> > I'll look into it if I find the time.
> > 
> > Mathieu
> 
> Mathieu,
> 
> Here's an updated patch, written by Roland (that I tested for him), that
> looks for all marker symbols in the __markers_strings section.  It doesn't
> get the pointers from the __markers section because that is very difficult
> to do in modpost (having to handle the architecture-dependent relocations
> applied to those pointers).
> 

Hrm, what would happen if a gcc optimization eventually decides to mix
the memory layout of the strings ? Is there something that specifies
that they won't ?

> See what you think.
> 
> ---
> This adds some new magic in the MODPOST phase for CONFIG_MARKERS.
> Analogous to the Module.symvers file, the build will now write a
> Module.markers file when CONFIG_MARKERS=y is set.  This file lists
> the name, defining module, and format string of each marker,
> separated by \t characters.  This simple text file can be used by
> offline build procedures for instrumentation code, analogous to
> how System.map and Module.symvers can be useful to have for
> kernels other than the one you are running right now.
> 
> The method of extracting the strings is somewhat crude, but is pretty
> simple and should work fine in practice for the foreseeable future.
> 
> Signed-off-by: Roland McGrath <roland@redhat.com>
> ---
>  scripts/Makefile.modpost |   11 +++
>  scripts/mod/modpost.c    |  184 +++++++++++++++++++++++++++++++++++++++++++++-
>  scripts/mod/modpost.h    |    3 +
>  3 files changed, 197 insertions(+), 1 deletions(-)
> 
> diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
> index d988f5d..6321870 100644
> --- a/scripts/Makefile.modpost
> +++ b/scripts/Makefile.modpost
> @@ -13,6 +13,7 @@
>  # 2) modpost is then used to
>  # 3)  create one <module>.mod.c file pr. module
>  # 4)  create one Module.symvers file with CRC for all exported symbols
> +# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
>  # 5) compile all <module>.mod.c files
>  # 6) final link of the module to a <module.ko> file
>  
> @@ -45,6 +46,10 @@ include scripts/Makefile.lib
>  
>  kernelsymfile := $(objtree)/Module.symvers
>  modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
> +kernelmarkersfile := $(objtree)/Module.markers
> +modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
> +
> +markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
>  
>  # Step 1), find all modules listed in $(MODVERDIR)/
>  __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
> @@ -62,6 +67,8 @@ modpost = scripts/mod/modpost                    \
>   $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile)   \
>   $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
>   $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
> + $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
> + $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
>   $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
>  
>  quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
> @@ -81,6 +88,10 @@ vmlinux.o: FORCE
>  $(symverfile):         __modpost ;
>  $(modules:.ko=.mod.c): __modpost ;
>  
> +ifdef CONFIG_MARKERS
> +$(markersfile):	       __modpost ;
> +endif
> +
>  
>  # Step 5), compile all *.mod.c files
>  
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index 93ac52a..bbaf26d 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -11,6 +11,8 @@
>   * Usage: modpost vmlinux module1.o module2.o ...
>   */
>  
> +#define _GNU_SOURCE
> +#include <stdio.h>
>  #include <ctype.h>
>  #include "modpost.h"
>  #include "../../include/linux/license.h"
> @@ -424,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
>  			info->export_unused_gpl_sec = i;
>  		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
>  			info->export_gpl_future_sec = i;
> +		else if (strcmp(secname, "__markers_strings") == 0)
> +			info->markers_strings_sec = i;
>  
>  		if (sechdrs[i].sh_type != SHT_SYMTAB)
>  			continue;
> @@ -1249,6 +1253,83 @@ static int exit_section_ref_ok(const char *name)
>  	return 0;
>  }
>  
> +static void get_markers(struct elf_info *info, struct module *mod)
> +{
> +	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
> +	const char *strings = (const char *) info->hdr + sh->sh_offset;
> +	const Elf_Sym *sym, *first_sym, *last_sym;
> +	const char *name;
> +	size_t n;
> +
> +	if (!info->markers_strings_sec)
> +		return;
> +
> +	/*
> +	 * First count the strings.  They come in pairs of name, format.
> +	 * We look for all the symbols defined in the __markers_strings
> +	 * section.  They are named __mstrtab_name_* and __mstrtab_format_*
> +	 * in matching pairs.  For these local names, the compiler puts
> +	 * a random .NNN suffix on, so the names don't correspond exactly.
> +	 */
> +	first_sym = last_sym = NULL;
> +	n = 0;
> +	for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
> +		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
> +		    sym->st_shndx == info->markers_strings_sec) {
> +			if (first_sym == NULL)
> +				first_sym = sym;
> +			last_sym = sym;
> +			++n;
> +			name = info->strtab + sym->st_name;
> +			if (n % 2 == 0 ?
> +			    !strncmp(name, "__mstrtab_name_",
> +				     sizeof "__mstrtab_name_" - 1) :
> +			    !strncmp(name, "__mstrtab_format_",
> +				     sizeof "__mstrtab_format_" - 1)) {
> +				warn("%s.ko has unexpected symbol \"%s\"\n",
> +				     mod->name, name);
> +				first_sym = NULL;
> +			}
> +		}
> +
> +	if (n % 2 != 0 || first_sym == NULL) {
> +		warn("%s.ko has bad __markers_strings, ignoring it\n",
> +		     mod->name);
> +		return;
> +	}
> +
> +	if (n == 0)
> +		return;
> +
> +	/*
> +	 * Now collect each pair into a formatted line for the output.
> +	 * Lines look like:
> +	 *	marker_name	vmlinux	marker %s format %d
> +	 * The format string after the second \t can use whitespace.
> +	 */
> +	n /= 2;
> +	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
> +	mod->nmarkers = n;
> +
> +	name = NULL;
> +	n = 0;
> +	for (sym = first_sym; sym <= last_sym; sym++)
> +		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
> +		    sym->st_shndx == info->markers_strings_sec) {
> +			const char *str = strings + sym->st_value;
> +			if (name == NULL)
> +				name = str;
> +			else {
> +				char *line = NULL;
> +				asprintf(&line, "%s\t%s\t%s\n",
> +					 name, mod->name, str);
> +				NOFAIL(line);
> +				mod->markers[n++] = line;
> +				name = NULL;
> +			}
> +		}
> +}
> +
>  static void read_symbols(char *modname)
>  {
>  	const char *symname;
> @@ -1301,6 +1382,8 @@ static void read_symbols(char *modname)
>  		get_src_version(modname, mod->srcversion,
>  				sizeof(mod->srcversion)-1);
>  
> +	get_markers(&info, mod);
> +
>  	parse_elf_finish(&info);
>  
>  	/* Our trick to get versioning for struct_module - it's
> @@ -1649,6 +1732,91 @@ static void write_dump(const char *fname)
>  	write_if_changed(&buf, fname);
>  }
>  
> +static void add_marker(struct module *mod, const char *name, const char *fmt)
> +{
> +	char *line = NULL;
> +	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
> +	NOFAIL(line);
> +
> +	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
> +						     sizeof mod->markers[0])));
> +	mod->markers[mod->nmarkers++] = line;
> +}
> +
> +static void read_markers(const char *fname)
> +{
> +	unsigned long size, pos = 0;
> +	void *file = grab_file(fname, &size);
> +	char *line;
> +
> +        if (!file)
> +		/* No old markers, silently ignore */
> +		return;
> +
> +	while ((line = get_next_line(&pos, file, size))) {
> +		char *marker, *modname, *fmt;
> +		struct module *mod;
> +
> +		marker = line;
> +		if (!(modname = strchr(marker, '\t')))
> +			goto fail;
> +		*modname++ = '\0';
> +		if (!(fmt = strchr(modname, '\t')))
> +			goto fail;
> +		*fmt++ = '\0';
> +		if (*marker == '\0' || *modname == '\0')
> +			goto fail;
> +
> +		if (!(mod = find_module(modname))) {
> +			if (is_vmlinux(modname)) {
> +				have_vmlinux = 1;
> +			}
> +			mod = new_module(NOFAIL(strdup(modname)));
> +			mod->skip = 1;
> +		}
> +
> +		add_marker(mod, marker, fmt);
> +	}
> +	return;
> +fail:
> +	fatal("parse error in markers list file\n");
> +}
> +
> +static int compare_strings(const void *a, const void *b)
> +{
> +	return strcmp(*(const char **) a, *(const char **) b);
> +}
> +
> +static void write_markers(const char *fname)
> +{
> +	struct buffer buf = { };
> +	struct module *mod;
> +	size_t i;
> +
> +	for (mod = modules; mod; mod = mod->next)
> +		if ((!external_module || !mod->skip) && mod->markers != NULL) {
> +			/*
> +			 * Sort the strings so we can skip duplicates when
> +			 * we write them out.
> +			 */
> +			qsort(mod->markers, mod->nmarkers,
> +			      sizeof mod->markers[0], &compare_strings);
> +			for (i = 0; i < mod->nmarkers; ++i) {
> +				char *line = mod->markers[i];
> +				buf_write(&buf, line, strlen(line));
> +				while (i + 1 < mod->nmarkers &&
> +				       !strcmp(mod->markers[i],
> +					       mod->markers[i + 1]))
> +					free(mod->markers[i++]);
> +				free(mod->markers[i]);
> +			}
> +			free(mod->markers);
> +			mod->markers = NULL;
> +		}
> +
> +	write_if_changed(&buf, fname);
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	struct module *mod;
> @@ -1656,10 +1824,12 @@ int main(int argc, char **argv)
>  	char fname[SZ];
>  	char *kernel_read = NULL, *module_read = NULL;
>  	char *dump_write = NULL;
> +	char *markers_read = NULL;
> +	char *markers_write = NULL;
>  	int opt;
>  	int err;
>  
> -	while ((opt = getopt(argc, argv, "i:I:mso:aw")) != -1) {
> +	while ((opt = getopt(argc, argv, "i:I:mso:awM:K:")) != -1) {
>  		switch(opt) {
>  			case 'i':
>  				kernel_read = optarg;
> @@ -1683,6 +1853,12 @@ int main(int argc, char **argv)
>  			case 'w':
>  				warn_unresolved = 1;
>  				break;
> +			case 'M':
> +				markers_write = optarg;
> +				break;
> +			case 'K':
> +				markers_read = optarg;
> +				break;
>  			default:
>  				exit(1);
>  		}
> @@ -1724,5 +1900,11 @@ int main(int argc, char **argv)
>  	if (dump_write)
>  		write_dump(dump_write);
>  
> +	if (markers_read)
> +		read_markers(markers_read);
> +
> +	if (markers_write)
> +		write_markers(markers_write);
> +
>  	return err;
>  }
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index 0ffed17..175301a 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -110,6 +110,8 @@ struct module {
>  	int has_init;
>  	int has_cleanup;
>  	struct buffer dev_table_buf;
> +	char **markers;
> +	size_t nmarkers;
>  	char	     srcversion[25];
>  };
>  
> @@ -124,6 +126,7 @@ struct elf_info {
>  	Elf_Section  export_gpl_sec;
>  	Elf_Section  export_unused_gpl_sec;
>  	Elf_Section  export_gpl_future_sec;
> +	Elf_Section  markers_strings_sec;
>  	const char   *strtab;
>  	char	     *modinfo;
>  	unsigned int modinfo_len;
> 
> 
> 
> 
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-08 19:36                                   ` Mathieu Desnoyers
@ 2007-11-08 19:45                                     ` David Smith
  2007-11-09 16:36                                     ` David Smith
  1 sibling, 0 replies; 43+ messages in thread
From: David Smith @ 2007-11-08 19:45 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Roland McGrath, Andrew Morton, linux-kernel, systemtap

Mathieu Desnoyers wrote:
> * David Smith (dsmith@redhat.com) wrote:
>> Mathieu Desnoyers wrote:
>>> * Roland McGrath (roland@redhat.com) wrote:
>>>>> If we want to do it safely, I think we should iterate from
>>>>> __start___markers to __stop___markers symbols of vmlinux and get the
>>>>> pointers to the name/format string pairs.
>>>>>
>>>>> The same can then be done with modules using the __markers section.
>>>>>
>>>>> Or maybe is there some reason not to do that ?
>>>> It's just rather a pain in the ass, a whole lot more fiddly work.
>>>> cf "somewhat crude" and "foreseeable future" in my patch's log entry.
>>>> Knock yourself out if you're looking for more tedious hacking to do in
>>>> modpost.c, but I say fix it when it breaks.
>>>>
>>> Hmmmm, I have rarely seen code go into mainline without addressing valid
>>> technical criticism first. Please fix.
>>>
>>> I'll look into it if I find the time.
>>>
>>> Mathieu
>> Mathieu,
>>
>> Here's an updated patch, written by Roland (that I tested for him), that
>> looks for all marker symbols in the __markers_strings section.  It doesn't
>> get the pointers from the __markers section because that is very difficult
>> to do in modpost (having to handle the architecture-dependent relocations
>> applied to those pointers).
>>
> 
> Hrm, what would happen if a gcc optimization eventually decides to mix
> the memory layout of the strings ? Is there something that specifies
> that they won't ?

I don't believe there is anything in gcc that specifies that the strings
won't get mixed around.  But, I believe this code is good for the
foreseeable future.  We could fix this code if the future breakage does
happen.

-- 
David Smith
dsmith@redhat.com
Red Hat
http://www.redhat.com
256.217.0141 (direct)
256.837.0057 (fax)

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-08 19:36                                   ` Mathieu Desnoyers
  2007-11-08 19:45                                     ` David Smith
@ 2007-11-09 16:36                                     ` David Smith
  2007-11-11 23:24                                       ` Mathieu Desnoyers
  1 sibling, 1 reply; 43+ messages in thread
From: David Smith @ 2007-11-09 16:36 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Roland McGrath, Andrew Morton, linux-kernel, systemtap

Mathieu Desnoyers wrote:
> Hrm, what would happen if a gcc optimization eventually decides to mix
> the memory layout of the strings ? Is there something that specifies
> that they won't ?

Here's another patch that Roland wrote and I tested that
attempts to solve the potential problem of string ordering
by merging the name and format strings.

---
This adds some new magic in the MODPOST phase for CONFIG_MARKERS.
Analogous to the Module.symvers file, the build will now write a
Module.markers file when CONFIG_MARKERS=y is set.  This file lists
the name, defining module, and format string of each marker,
separated by \t characters.  This simple text file can be used by
offline build procedures for instrumentation code, analogous to
how System.map and Module.symvers can be useful to have for
kernels other than the one you are running right now.

The strings are made easy to extract by having the __trace_mark macro
define the name and format together in a single array called __mstrtab_*
in the __markers_strings section.  This is straightforward and reliable
as long as the marker structs are always defined by this macro.  It is
an unreasonable amount of hairy work to extract the string pointers from
the __markers section structs, which entails handling a relocation type
for every machine under the sun.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/linux/marker.h   |    9 +--
 scripts/Makefile.modpost |   11 +++
 scripts/mod/modpost.c    |  163 +++++++++++++++++++++++++++++++++++++++++++++-
 scripts/mod/modpost.h    |    3 +
 4 files changed, 179 insertions(+), 7 deletions(-)

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 5f36cf9..b978bbe 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -51,15 +51,12 @@ struct marker {
  */
 #define __trace_mark(generic, name, call_data, format, args...)		\
 	do {								\
-		static const char __mstrtab_name_##name[]		\
+		static const char __mstrtab_##name[]			\
 		__attribute__((section("__markers_strings")))		\
-		= #name;						\
-		static const char __mstrtab_format_##name[]		\
-		__attribute__((section("__markers_strings")))		\
-		= format;						\
+ 		= #name "\0" format;					\
 		static struct marker __mark_##name			\
 		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
 		0, __mark_empty_function, NULL };			\
 		__mark_check_format(format, ## args);			\
 		if (!generic) {						\
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index d988f5d..6321870 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,6 +13,7 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -45,6 +46,10 @@ include scripts/Makefile.lib
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+kernelmarkersfile := $(objtree)/Module.markers
+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
+
+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -62,6 +67,8 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile)   \
  $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
+ $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
 
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
@@ -81,6 +88,10 @@ vmlinux.o: FORCE
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
+ifdef CONFIG_MARKERS
+$(markersfile):	       __modpost ;
+endif
+
 
 # Step 5), compile all *.mod.c files
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 93ac52a..53887e8 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -11,6 +11,8 @@
  * Usage: modpost vmlinux module1.o module2.o ...
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
@@ -424,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
 			info->export_unused_gpl_sec = i;
 		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
 			info->export_gpl_future_sec = i;
+		else if (strcmp(secname, "__markers_strings") == 0)
+			info->markers_strings_sec = i;
 
 		if (sechdrs[i].sh_type != SHT_SYMTAB)
 			continue;
@@ -1249,6 +1253,62 @@ static int exit_section_ref_ok(const char *name)
 	return 0;
 }
 
+static void get_markers(struct elf_info *info, struct module *mod)
+{
+	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
+	const char *strings = (const char *) info->hdr + sh->sh_offset;
+	const Elf_Sym *sym, *first_sym, *last_sym;
+	size_t n;
+
+	if (!info->markers_strings_sec)
+		return;
+
+	/*
+	 * First count the strings.  We look for all the symbols defined
+	 * in the __markers_strings section named __mstrtab_*.  For
+	 * these local names, the compiler puts a random .NNN suffix on,
+	 * so the names don't correspond exactly.
+	 */
+	first_sym = last_sym = NULL;
+	n = 0;
+	for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec &&
+		    !strncmp(info->strtab + sym->st_name,
+			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+			if (first_sym == NULL)
+				first_sym = sym;
+			last_sym = sym;
+			++n;
+		}
+
+	if (n == 0)
+		return;
+
+	/*
+	 * Now collect each name and format into a line for the output.
+	 * Lines look like:
+	 *	marker_name	vmlinux	marker %s format %d
+	 * The format string after the second \t can use whitespace.
+	 */
+	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
+	mod->nmarkers = n;
+
+	n = 0;
+	for (sym = first_sym; sym <= last_sym; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec &&
+		    !strncmp(info->strtab + sym->st_name,
+			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+			const char *name = strings + sym->st_value;
+			const char *fmt = strchr(name, '\0') + 1;
+			char *line = NULL;
+			asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+			NOFAIL(line);
+			mod->markers[n++] = line;
+		}
+}
+
 static void read_symbols(char *modname)
 {
 	const char *symname;
@@ -1301,6 +1361,8 @@ static void read_symbols(char *modname)
 		get_src_version(modname, mod->srcversion,
 				sizeof(mod->srcversion)-1);
 
+	get_markers(&info, mod);
+
 	parse_elf_finish(&info);
 
 	/* Our trick to get versioning for struct_module - it's
@@ -1649,6 +1711,91 @@ static void write_dump(const char *fname)
 	write_if_changed(&buf, fname);
 }
 
+static void add_marker(struct module *mod, const char *name, const char *fmt)
+{
+	char *line = NULL;
+	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+	NOFAIL(line);
+
+	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
+						     sizeof mod->markers[0])));
+	mod->markers[mod->nmarkers++] = line;
+}
+
+static void read_markers(const char *fname)
+{
+	unsigned long size, pos = 0;
+	void *file = grab_file(fname, &size);
+	char *line;
+
+        if (!file)
+		/* No old markers, silently ignore */
+		return;
+
+	while ((line = get_next_line(&pos, file, size))) {
+		char *marker, *modname, *fmt;
+		struct module *mod;
+
+		marker = line;
+		if (!(modname = strchr(marker, '\t')))
+			goto fail;
+		*modname++ = '\0';
+		if (!(fmt = strchr(modname, '\t')))
+			goto fail;
+		*fmt++ = '\0';
+		if (*marker == '\0' || *modname == '\0')
+			goto fail;
+
+		if (!(mod = find_module(modname))) {
+			if (is_vmlinux(modname)) {
+				have_vmlinux = 1;
+			}
+			mod = new_module(NOFAIL(strdup(modname)));
+			mod->skip = 1;
+		}
+
+		add_marker(mod, marker, fmt);
+	}
+	return;
+fail:
+	fatal("parse error in markers list file\n");
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+	return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void write_markers(const char *fname)
+{
+	struct buffer buf = { };
+	struct module *mod;
+	size_t i;
+
+	for (mod = modules; mod; mod = mod->next)
+		if ((!external_module || !mod->skip) && mod->markers != NULL) {
+			/*
+			 * Sort the strings so we can skip duplicates when
+			 * we write them out.
+			 */
+			qsort(mod->markers, mod->nmarkers,
+			      sizeof mod->markers[0], &compare_strings);
+			for (i = 0; i < mod->nmarkers; ++i) {
+				char *line = mod->markers[i];
+				buf_write(&buf, line, strlen(line));
+				while (i + 1 < mod->nmarkers &&
+				       !strcmp(mod->markers[i],
+					       mod->markers[i + 1]))
+					free(mod->markers[i++]);
+				free(mod->markers[i]);
+			}
+			free(mod->markers);
+			mod->markers = NULL;
+		}
+
+	write_if_changed(&buf, fname);
+}
+
 int main(int argc, char **argv)
 {
 	struct module *mod;
@@ -1656,10 +1803,12 @@ int main(int argc, char **argv)
 	char fname[SZ];
 	char *kernel_read = NULL, *module_read = NULL;
 	char *dump_write = NULL;
+	char *markers_read = NULL;
+	char *markers_write = NULL;
 	int opt;
 	int err;
 
-	while ((opt = getopt(argc, argv, "i:I:mso:aw")) != -1) {
+	while ((opt = getopt(argc, argv, "i:I:mso:awM:K:")) != -1) {
 		switch(opt) {
 			case 'i':
 				kernel_read = optarg;
@@ -1683,6 +1832,12 @@ int main(int argc, char **argv)
 			case 'w':
 				warn_unresolved = 1;
 				break;
+			case 'M':
+				markers_write = optarg;
+				break;
+			case 'K':
+				markers_read = optarg;
+				break;
 			default:
 				exit(1);
 		}
@@ -1724,5 +1879,11 @@ int main(int argc, char **argv)
 	if (dump_write)
 		write_dump(dump_write);
 
+	if (markers_read)
+		read_markers(markers_read);
+
+	if (markers_write)
+		write_markers(markers_write);
+
 	return err;
 }
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 0ffed17..175301a 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -110,6 +110,8 @@ struct module {
 	int has_init;
 	int has_cleanup;
 	struct buffer dev_table_buf;
+	char **markers;
+	size_t nmarkers;
 	char	     srcversion[25];
 };
 
@@ -124,6 +126,7 @@ struct elf_info {
 	Elf_Section  export_gpl_sec;
 	Elf_Section  export_unused_gpl_sec;
 	Elf_Section  export_gpl_future_sec;
+	Elf_Section  markers_strings_sec;
 	const char   *strtab;
 	char	     *modinfo;
 	unsigned int modinfo_len;



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] markers: modpost
  2007-11-09 16:36                                     ` David Smith
@ 2007-11-11 23:24                                       ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-11-11 23:24 UTC (permalink / raw)
  To: David Smith; +Cc: Roland McGrath, Andrew Morton, linux-kernel, systemtap

* David Smith (dsmith@redhat.com) wrote:
> Mathieu Desnoyers wrote:
> > Hrm, what would happen if a gcc optimization eventually decides to mix
> > the memory layout of the strings ? Is there something that specifies
> > that they won't ?
> 
> Here's another patch that Roland wrote and I tested that
> attempts to solve the potential problem of string ordering
> by merging the name and format strings.
> 

Yup, it looks good. I'll give it a try.

Thanks!

Mathieu

> ---
> This adds some new magic in the MODPOST phase for CONFIG_MARKERS.
> Analogous to the Module.symvers file, the build will now write a
> Module.markers file when CONFIG_MARKERS=y is set.  This file lists
> the name, defining module, and format string of each marker,
> separated by \t characters.  This simple text file can be used by
> offline build procedures for instrumentation code, analogous to
> how System.map and Module.symvers can be useful to have for
> kernels other than the one you are running right now.
> 
> The strings are made easy to extract by having the __trace_mark macro
> define the name and format together in a single array called __mstrtab_*
> in the __markers_strings section.  This is straightforward and reliable
> as long as the marker structs are always defined by this macro.  It is
> an unreasonable amount of hairy work to extract the string pointers from
> the __markers section structs, which entails handling a relocation type
> for every machine under the sun.
> 
> Signed-off-by: Roland McGrath <roland@redhat.com>
> ---
>  include/linux/marker.h   |    9 +--
>  scripts/Makefile.modpost |   11 +++
>  scripts/mod/modpost.c    |  163 +++++++++++++++++++++++++++++++++++++++++++++-
>  scripts/mod/modpost.h    |    3 +
>  4 files changed, 179 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/marker.h b/include/linux/marker.h
> index 5f36cf9..b978bbe 100644
> --- a/include/linux/marker.h
> +++ b/include/linux/marker.h
> @@ -51,15 +51,12 @@ struct marker {
>   */
>  #define __trace_mark(generic, name, call_data, format, args...)		\
>  	do {								\
> -		static const char __mstrtab_name_##name[]		\
> +		static const char __mstrtab_##name[]			\
>  		__attribute__((section("__markers_strings")))		\
> -		= #name;						\
> -		static const char __mstrtab_format_##name[]		\
> -		__attribute__((section("__markers_strings")))		\
> -		= format;						\
> + 		= #name "\0" format;					\
>  		static struct marker __mark_##name			\
>  		__attribute__((section("__markers"), aligned(8))) =	\
> -		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
> +		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
>  		0, __mark_empty_function, NULL };			\
>  		__mark_check_format(format, ## args);			\
>  		if (!generic) {						\
> diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
> index d988f5d..6321870 100644
> --- a/scripts/Makefile.modpost
> +++ b/scripts/Makefile.modpost
> @@ -13,6 +13,7 @@
>  # 2) modpost is then used to
>  # 3)  create one <module>.mod.c file pr. module
>  # 4)  create one Module.symvers file with CRC for all exported symbols
> +# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
>  # 5) compile all <module>.mod.c files
>  # 6) final link of the module to a <module.ko> file
>  
> @@ -45,6 +46,10 @@ include scripts/Makefile.lib
>  
>  kernelsymfile := $(objtree)/Module.symvers
>  modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
> +kernelmarkersfile := $(objtree)/Module.markers
> +modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
> +
> +markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
>  
>  # Step 1), find all modules listed in $(MODVERDIR)/
>  __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
> @@ -62,6 +67,8 @@ modpost = scripts/mod/modpost                    \
>   $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile)   \
>   $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
>   $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
> + $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
> + $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
>   $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
>  
>  quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
> @@ -81,6 +88,10 @@ vmlinux.o: FORCE
>  $(symverfile):         __modpost ;
>  $(modules:.ko=.mod.c): __modpost ;
>  
> +ifdef CONFIG_MARKERS
> +$(markersfile):	       __modpost ;
> +endif
> +
>  
>  # Step 5), compile all *.mod.c files
>  
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index 93ac52a..53887e8 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -11,6 +11,8 @@
>   * Usage: modpost vmlinux module1.o module2.o ...
>   */
>  
> +#define _GNU_SOURCE
> +#include <stdio.h>
>  #include <ctype.h>
>  #include "modpost.h"
>  #include "../../include/linux/license.h"
> @@ -424,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
>  			info->export_unused_gpl_sec = i;
>  		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
>  			info->export_gpl_future_sec = i;
> +		else if (strcmp(secname, "__markers_strings") == 0)
> +			info->markers_strings_sec = i;
>  
>  		if (sechdrs[i].sh_type != SHT_SYMTAB)
>  			continue;
> @@ -1249,6 +1253,62 @@ static int exit_section_ref_ok(const char *name)
>  	return 0;
>  }
>  
> +static void get_markers(struct elf_info *info, struct module *mod)
> +{
> +	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
> +	const char *strings = (const char *) info->hdr + sh->sh_offset;
> +	const Elf_Sym *sym, *first_sym, *last_sym;
> +	size_t n;
> +
> +	if (!info->markers_strings_sec)
> +		return;
> +
> +	/*
> +	 * First count the strings.  We look for all the symbols defined
> +	 * in the __markers_strings section named __mstrtab_*.  For
> +	 * these local names, the compiler puts a random .NNN suffix on,
> +	 * so the names don't correspond exactly.
> +	 */
> +	first_sym = last_sym = NULL;
> +	n = 0;
> +	for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
> +		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
> +		    sym->st_shndx == info->markers_strings_sec &&
> +		    !strncmp(info->strtab + sym->st_name,
> +			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
> +			if (first_sym == NULL)
> +				first_sym = sym;
> +			last_sym = sym;
> +			++n;
> +		}
> +
> +	if (n == 0)
> +		return;
> +
> +	/*
> +	 * Now collect each name and format into a line for the output.
> +	 * Lines look like:
> +	 *	marker_name	vmlinux	marker %s format %d
> +	 * The format string after the second \t can use whitespace.
> +	 */
> +	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
> +	mod->nmarkers = n;
> +
> +	n = 0;
> +	for (sym = first_sym; sym <= last_sym; sym++)
> +		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
> +		    sym->st_shndx == info->markers_strings_sec &&
> +		    !strncmp(info->strtab + sym->st_name,
> +			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
> +			const char *name = strings + sym->st_value;
> +			const char *fmt = strchr(name, '\0') + 1;
> +			char *line = NULL;
> +			asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
> +			NOFAIL(line);
> +			mod->markers[n++] = line;
> +		}
> +}
> +
>  static void read_symbols(char *modname)
>  {
>  	const char *symname;
> @@ -1301,6 +1361,8 @@ static void read_symbols(char *modname)
>  		get_src_version(modname, mod->srcversion,
>  				sizeof(mod->srcversion)-1);
>  
> +	get_markers(&info, mod);
> +
>  	parse_elf_finish(&info);
>  
>  	/* Our trick to get versioning for struct_module - it's
> @@ -1649,6 +1711,91 @@ static void write_dump(const char *fname)
>  	write_if_changed(&buf, fname);
>  }
>  
> +static void add_marker(struct module *mod, const char *name, const char *fmt)
> +{
> +	char *line = NULL;
> +	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
> +	NOFAIL(line);
> +
> +	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
> +						     sizeof mod->markers[0])));
> +	mod->markers[mod->nmarkers++] = line;
> +}
> +
> +static void read_markers(const char *fname)
> +{
> +	unsigned long size, pos = 0;
> +	void *file = grab_file(fname, &size);
> +	char *line;
> +
> +        if (!file)
> +		/* No old markers, silently ignore */
> +		return;
> +
> +	while ((line = get_next_line(&pos, file, size))) {
> +		char *marker, *modname, *fmt;
> +		struct module *mod;
> +
> +		marker = line;
> +		if (!(modname = strchr(marker, '\t')))
> +			goto fail;
> +		*modname++ = '\0';
> +		if (!(fmt = strchr(modname, '\t')))
> +			goto fail;
> +		*fmt++ = '\0';
> +		if (*marker == '\0' || *modname == '\0')
> +			goto fail;
> +
> +		if (!(mod = find_module(modname))) {
> +			if (is_vmlinux(modname)) {
> +				have_vmlinux = 1;
> +			}
> +			mod = new_module(NOFAIL(strdup(modname)));
> +			mod->skip = 1;
> +		}
> +
> +		add_marker(mod, marker, fmt);
> +	}
> +	return;
> +fail:
> +	fatal("parse error in markers list file\n");
> +}
> +
> +static int compare_strings(const void *a, const void *b)
> +{
> +	return strcmp(*(const char **) a, *(const char **) b);
> +}
> +
> +static void write_markers(const char *fname)
> +{
> +	struct buffer buf = { };
> +	struct module *mod;
> +	size_t i;
> +
> +	for (mod = modules; mod; mod = mod->next)
> +		if ((!external_module || !mod->skip) && mod->markers != NULL) {
> +			/*
> +			 * Sort the strings so we can skip duplicates when
> +			 * we write them out.
> +			 */
> +			qsort(mod->markers, mod->nmarkers,
> +			      sizeof mod->markers[0], &compare_strings);
> +			for (i = 0; i < mod->nmarkers; ++i) {
> +				char *line = mod->markers[i];
> +				buf_write(&buf, line, strlen(line));
> +				while (i + 1 < mod->nmarkers &&
> +				       !strcmp(mod->markers[i],
> +					       mod->markers[i + 1]))
> +					free(mod->markers[i++]);
> +				free(mod->markers[i]);
> +			}
> +			free(mod->markers);
> +			mod->markers = NULL;
> +		}
> +
> +	write_if_changed(&buf, fname);
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	struct module *mod;
> @@ -1656,10 +1803,12 @@ int main(int argc, char **argv)
>  	char fname[SZ];
>  	char *kernel_read = NULL, *module_read = NULL;
>  	char *dump_write = NULL;
> +	char *markers_read = NULL;
> +	char *markers_write = NULL;
>  	int opt;
>  	int err;
>  
> -	while ((opt = getopt(argc, argv, "i:I:mso:aw")) != -1) {
> +	while ((opt = getopt(argc, argv, "i:I:mso:awM:K:")) != -1) {
>  		switch(opt) {
>  			case 'i':
>  				kernel_read = optarg;
> @@ -1683,6 +1832,12 @@ int main(int argc, char **argv)
>  			case 'w':
>  				warn_unresolved = 1;
>  				break;
> +			case 'M':
> +				markers_write = optarg;
> +				break;
> +			case 'K':
> +				markers_read = optarg;
> +				break;
>  			default:
>  				exit(1);
>  		}
> @@ -1724,5 +1879,11 @@ int main(int argc, char **argv)
>  	if (dump_write)
>  		write_dump(dump_write);
>  
> +	if (markers_read)
> +		read_markers(markers_read);
> +
> +	if (markers_write)
> +		write_markers(markers_write);
> +
>  	return err;
>  }
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index 0ffed17..175301a 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -110,6 +110,8 @@ struct module {
>  	int has_init;
>  	int has_cleanup;
>  	struct buffer dev_table_buf;
> +	char **markers;
> +	size_t nmarkers;
>  	char	     srcversion[25];
>  };
>  
> @@ -124,6 +126,7 @@ struct elf_info {
>  	Elf_Section  export_gpl_sec;
>  	Elf_Section  export_unused_gpl_sec;
>  	Elf_Section  export_gpl_future_sec;
> +	Elf_Section  markers_strings_sec;
>  	const char   *strtab;
>  	char	     *modinfo;
>  	unsigned int modinfo_len;
> 
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-09-17 18:46 [patch 0/4] " Mathieu Desnoyers
@ 2007-09-17 18:46 ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-09-17 18:46 UTC (permalink / raw)
  To: akpm, linux-kernel
  Cc: Mathieu Desnoyers, Frank Ch. Eigler, Christoph Hellwig, Rusty Russell

[-- Attachment #1: linux-kernel-markers-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 29168 bytes --]

The marker activation functions sits in kernel/marker.c. A hash table is used
to keep track of the registered probes and armed markers, so the markers within
a newly loaded module that should be active can be activated at module load
time.

marker_query has been removed. marker_get_first, marker_get_next and
marker_release should be used as iterators on the markers.

Changelog:
- markers_mutex now nests inside module_mutex rather than the opposite.
- Iteration on modules is now done in module.c.
- module_mutex is not exported anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: "Frank Ch. Eigler" <fche@redhat.com>
CC: Christoph Hellwig <hch@infradead.org>
CC: Rusty Russell <rusty@rustcorp.com.au>
---

 include/asm-generic/vmlinux.lds.h |   11 
 include/linux/marker.h            |  175 ++++++++++
 include/linux/module.h            |   18 +
 kernel/marker.c                   |  608 ++++++++++++++++++++++++++++++++++++++
 kernel/module.c                   |   66 ++++
 5 files changed, 875 insertions(+), 3 deletions(-)

Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:18.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-09-14 10:11:31.000000000 -0400
@@ -12,7 +12,11 @@
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
-	*(.data.init.refok)
+	*(.data.init.refok)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___markers) = .;				\
+	*(__markers)							\
+	VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -129,6 +133,11 @@
 		VMLINUX_SYMBOL(__stop___immediate) = .;			\
 	}								\
 									\
+	/* Markers: strings */						\
+        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
+		*(__markers_strings)					\
+ 	}								\
+									\
 	/* Kernel symbol table: strings */				\
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
 		*(__ksymtab_strings)					\
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/marker.h	2007-09-17 12:43:54.000000000 -0400
@@ -0,0 +1,175 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/immediate.h>
+#include <linux/types.h>
+
+struct module;
+struct __mark_marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct __mark_marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...);
+
+struct __mark_marker {
+	const char *name;	/* Marker name */
+	const char *format;	/* Marker format string, describing the
+				 * variable argument list.
+				 */
+	const char *args;	/* List of arguments litteraly transformed
+				 * into a string: "arg1, arg2, arg3".
+				 */
+	DEFINE_IMMEDIATE(char, state);	/* Immediate value state. */
+	marker_probe_func *call;/* Probe handler function pointer */
+	void *pdata;		/* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Generic marker flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(generic, name, call_data, format, args...)		\
+	do {								\
+		static const char __mstrtab_name_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #name;						\
+		static const char __mstrtab_format_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= format;						\
+		static const char __mstrtab_args_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #args;						\
+		static struct __mark_marker __mark_##name		\
+		__attribute__((section("__markers"))) =			\
+		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		__mstrtab_args_##name, 0,				\
+		__mark_empty_function, NULL };				\
+		asm volatile ( "" : : "i" (&__mark_##name));		\
+		__mark_check_format(format, ## args);			\
+		if (!generic) {						\
+			if (unlikely(immediate_read(__mark_##name.state))) { \
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		} else {						\
+			if (unlikely(_immediate_read(__mark_##name.state))) { \
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		}							\
+	} while (0)
+
+extern void marker_update_probe_range(struct __mark_marker *begin,
+	struct __mark_marker *end, struct module *probe_module, int *refcount);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(generic, name, call_data, format, args...) \
+		__mark_check_format(format, ## args)
+static inline void marker_update_probe_range(struct __mark_marker *begin,
+	struct __mark_marker *end, struct module *probe_module, int *refcount)
+{ }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker using code patching
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using optimized code patching technique (immediate_read())
+ * to be enabled.
+ */
+#define trace_mark(name, format, args...) \
+	__trace_mark(0, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_immediate_read()) to be
+ * enabled. Should be used for markers in __init and __exit functions and in
+ * lockdep code.
+ */
+#define _trace_mark(name, format, args...) \
+	__trace_mark(1, name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __attribute__ ((format (printf, 1, 2)))
+	__mark_check_format(const char *fmt, ...) { }
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a marker.
+ * pdata must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+				marker_probe_func *probe, void *pdata);
+
+/*
+ * Returns the pdata given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered pdata.
+ */
+extern void *marker_probe_unregister_pdata(void *pdata);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+
+struct marker_iter {
+	struct module *module;
+	struct __mark_marker *marker;
+};
+
+extern void marker_iter_start(struct marker_iter *iter);
+extern void marker_iter_next(struct marker_iter *iter);
+extern void marker_iter_stop(struct marker_iter *iter);
+extern void marker_iter_reset(struct marker_iter *iter);
+extern void *marker_get_pdata(const char *name);
+extern int marker_get_iter_range(struct __mark_marker **marker,
+	struct __mark_marker *begin,
+	struct __mark_marker *end);
+
+#endif
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2007-09-14 10:11:18.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2007-09-14 10:11:31.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/immediate.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -376,6 +377,10 @@ struct module
 	const struct __immediate *immediate;
 	unsigned int num_immediate;
 #endif
+#ifdef CONFIG_MARKERS
+	struct __mark_marker *markers;
+	unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
@@ -482,6 +487,9 @@ extern void print_modules(void);
 extern void _module_immediate_update(void);
 extern void module_immediate_update(void);
 
+extern void module_update_markers(struct module *probe_module, int *refcount);
+extern int module_get_iter_markers(struct marker_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -589,6 +597,16 @@ static inline void module_immediate_upda
 {
 }
 
+static inline void module_update_markers(struct module *probe_module,
+		int *refcount)
+{
+}
+
+static inline int module_get_iter_markers(struct marker_iter *iter)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-09-14 10:11:30.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-09-14 10:11:31.000000000 -0400
@@ -1720,6 +1720,8 @@ static struct module *load_module(void _
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
 	unsigned int immediateindex;
+	unsigned int markersindex;
+	unsigned int markersstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1972,6 +1974,8 @@ static struct module *load_module(void _
 	mod->num_immediate =
 		sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
 #endif
+	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+ 	markersstringsindex = find_sec(hdr, sechdrs, secstrings, "__markers_strings");
 
 	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
 	if (unusedcrcindex)
@@ -2013,6 +2017,11 @@ static struct module *load_module(void _
 		if (err < 0)
 			goto cleanup;
 	}
+#ifdef CONFIG_MARKERS
+	mod->markers = (void *)sechdrs[markersindex].sh_addr;
+	mod->num_markers =
+		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+#endif
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2037,12 +2046,16 @@ static struct module *load_module(void _
 		 goto nomodsectinfo;
 #endif
 
+	if (!mod->taints) {
 #ifdef CONFIG_IMMEDIATE
-	if (!mod->taints)
 		immediate_update_range(mod->immediate,
 			mod->immediate + mod->num_immediate);
 #endif
-
+#ifdef CONFIG_MARKERS
+		marker_update_probe_range(mod->markers,
+			mod->markers + mod->num_markers, NULL, NULL);
+#endif
+	}
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2693,3 +2706,52 @@ void module_immediate_update(void)
 }
 EXPORT_SYMBOL_GPL(module_immediate_update);
 #endif
+
+#ifdef CONFIG_MARKERS
+void module_update_markers(struct module *probe_module, int *refcount)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			marker_update_probe_range(mod->markers,
+				mod->markers + mod->num_markers,
+				probe_module, refcount);
+	mutex_unlock(&module_mutex);
+}
+EXPORT_SYMBOL_GPL(module_update_markers);
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_markers(struct marker_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module)
+				continue;
+			else if (iter_mod > iter->module)
+				iter->marker = NULL;
+			found = marker_get_iter_range(&iter->marker,
+				iter_mod->markers,
+				iter_mod->markers + iter_mod->num_markers);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(module_get_iter_markers);
+#endif
Index: linux-2.6-lttng/kernel/marker.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/marker.c	2007-09-14 10:11:31.000000000 -0400
@@ -0,0 +1,608 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+#include <linux/immediate.h>
+
+extern struct __mark_marker __start___markers[];
+extern struct __mark_marker __stop___markers[];
+
+/*
+ * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * given module do we call synchronize_sched()). However, we need to make sure
+ * every critical region has ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+	struct hlist_node hlist;
+	char *format;
+	marker_probe_func *probe;
+	void *pdata;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	char name[0];	/* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct __mark_marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we make sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct __mark_marker *mdata,
+	void *private_data,
+	const char *fmt, ...)
+{ }
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *get_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int add_marker(const char *name,
+	const char *format, marker_probe_func *probe, void *pdata)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	size_t format_len = 0;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	if (format)
+		format_len = strlen(format) + 1;
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"Marker %s busy, probe %p already installed\n",
+				name, e->probe);
+			return -EBUSY;	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, name_len);
+	if (format) {
+		e->format = &e->name[name_len];
+		memcpy(e->format, format, format_len);
+		trace_mark(core_marker_format, "name %s format %s",
+				e->name, e->format);
+	} else
+		e->format = NULL;
+	e->probe = probe;
+	e->pdata = pdata;
+	e->refcount = 0;
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *remove_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	void *pdata = NULL;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		pdata = e->pdata;
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+	return pdata;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int marker_set_format(struct marker_entry **entry, const char *format)
+{
+	struct marker_entry *e;
+	size_t name_len = strlen((*entry)->name) + 1;
+	size_t format_len = strlen(format) + 1;
+
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], (*entry)->name, name_len);
+	e->format = &e->name[name_len];
+	memcpy(e->format, format, format_len);
+	e->probe = (*entry)->probe;
+	e->pdata = (*entry)->pdata;
+	e->refcount = (*entry)->refcount;
+	hlist_add_before(&e->hlist, &(*entry)->hlist);
+	hlist_del(&(*entry)->hlist);
+	kfree(*entry);
+	*entry = e;
+	trace_mark(core_marker_format, "name %s format %s",
+			e->name, e->format);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int set_marker(struct marker_entry **entry,
+			struct __mark_marker *elem)
+{
+	int ret;
+	BUG_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	if ((*entry)->format) {
+		if (strcmp((*entry)->format, elem->format) != 0) {
+			printk(KERN_NOTICE
+				"Format mismatch for probe %s "
+				"(%s), marker (%s)\n",
+				(*entry)->name,
+				(*entry)->format,
+				elem->format);
+			return -EPERM;
+		}
+	} else {
+		ret = marker_set_format(entry, elem->format);
+		if (ret)
+			return ret;
+	}
+	elem->call = (*entry)->probe;
+	elem->pdata = (*entry)->pdata;
+	_immediate_set(elem->state, 1);
+	return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void disable_marker(struct __mark_marker *elem)
+{
+	_immediate_set(elem->state, 0);
+	elem->call = __mark_empty_function;
+	/*
+	 * Leave the pdata and id there, because removal is racy and should be
+	 * done only after a synchronize_sched(). These are never used until
+	 * the next initialization anyway.
+	 */
+}
+
+/**
+ * marker_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ * @probe_module: module address of the probe being updated
+ * @refcount: number of references left to the given probe_module (out)
+ *
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+void marker_update_probe_range(
+	struct __mark_marker *begin,
+	struct __mark_marker *end,
+	struct module *probe_module,
+	int *refcount)
+{
+	struct __mark_marker *iter;
+	struct marker_entry *mark_entry;
+
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_marker(iter->name);
+		if (mark_entry && mark_entry->refcount) {
+			set_marker(&mark_entry, iter);
+			/*
+			 * ignore error, continue
+			 */
+			if (probe_module)
+				if (probe_module ==
+			__module_text_address((unsigned long)mark_entry->probe))
+					(*refcount)++;
+		} else {
+			disable_marker(iter);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(marker_update_probe_range);
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static inline void marker_update_probes(struct module *probe_module)
+{
+	int refcount = 0;
+
+	mutex_lock(&markers_mutex);
+	/* Core kernel markers */
+	marker_update_probe_range(__start___markers,
+			__stop___markers, probe_module, &refcount);
+	/* Markers in modules. */
+	module_update_markers(probe_module, &refcount);
+	if (probe_module && refcount == 0) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	mutex_unlock(&markers_mutex);
+}
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @pdata: probe private data
+ *
+ * pdata must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+			marker_probe_func *probe, void *pdata)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (entry && entry->refcount) {
+		ret = -EBUSY;
+		goto end;
+	}
+	if (deferred_sync) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	ret = add_marker(name, format, probe, pdata);
+	if (ret)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+	struct module *probe_module;
+	struct marker_entry *entry;
+	void *pdata;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = remove_marker(name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister_pdata -  Disconnect a probe from a marker
+ * @pdata: probe private data
+ *
+ * Unregister a marker by providing the registered pdata.
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_pdata(void *pdata)
+{
+	struct module *probe_module;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *entry;
+	int found = 0;
+	unsigned int i;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+		head = &marker_table[i];
+		hlist_for_each_entry(entry, node, head, hlist) {
+			if (entry->pdata == pdata) {
+				found = 1;
+				goto iter_end;
+			}
+		}
+	}
+iter_end:
+	if (!found) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = remove_marker(entry->name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_pdata);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only need to update probes when refcount passes from 0 to 1.
+	 */
+	if (entry->refcount++)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only permit decrement refcount if higher than 0.
+	 * Do probe update only on 1 -> 0 transition.
+	 */
+	if (entry->refcount) {
+		if (--entry->refcount)
+			goto end;
+	} else {
+		ret = -EPERM;
+		goto end;
+	}
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_pdata - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the pdata pointer, or an ERR_PTR.
+ * The pdata pointer should _only_ be dereferenced if the caller is the owner of
+ * the data, or its content could vanish. This is mostly used to confirm that a
+ * caller is the owner of a registered probe.
+ */
+void *marker_get_pdata(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+	int found = 0;
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			return e->pdata;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_pdata);
+
+/**
+ * marker_get_iter_range - Get a next marker iterator given a range.
+ * @marker: current markers (in), next marker (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next marker has been found (1) or not (0).
+ * Will return the first marker in the range if the input marker is NULL.
+ */
+int marker_get_iter_range(struct __mark_marker **marker,
+	struct __mark_marker *begin,
+	struct __mark_marker *end)
+{
+	int found = 0;
+
+	if (!*marker && begin != end) {
+		found = 1;
+		*marker = begin;
+	} else if (*marker >= begin && *marker < end) {
+		found = 1;
+		/*
+		 * *marker is known to be a valid marker from now on.
+		 */
+	}
+	return found;
+}
+EXPORT_SYMBOL_GPL(marker_get_iter_range);
+
+static inline void marker_get_iter(struct marker_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel markers */
+	if (!iter->module) {
+		found = marker_get_iter_range(&iter->marker,
+				__start___markers, __stop___markers);
+		if (found)
+			goto end;
+	}
+	/* Markers in modules. */
+	found = module_get_iter_markers(iter);
+end:
+	if (!found)
+		marker_iter_reset(iter);
+}
+
+void marker_iter_start(struct marker_iter *iter)
+{
+	mutex_lock(&markers_mutex);
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_start);
+
+void marker_iter_next(struct marker_iter *iter)
+{
+	iter->marker++;
+	/*
+	 * iter->marker may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the markers, getting the
+	 * markers from following modules if necessary.
+	 */
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_next);
+
+void marker_iter_stop(struct marker_iter *iter)
+{
+	mutex_unlock(&markers_mutex);
+}
+EXPORT_SYMBOL_GPL(marker_iter_stop);
+
+void marker_iter_reset(struct marker_iter *iter)
+{
+	iter->module = NULL;
+	iter->marker = NULL;
+}
+EXPORT_SYMBOL_GPL(marker_iter_reset);

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-08-27 16:05 [patch 0/4] Linux Kernel Markers Mathieu Desnoyers
@ 2007-08-27 16:05 ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-08-27 16:05 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: linux-kernel-markers-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 28513 bytes --]

The marker activation functions sits in kernel/marker.c. A hash table is used
to keep track of the registered probes and armed markers, so the markers within
a newly loaded module that should be active can be activated at module load
time.

marker_query has been removed. marker_get_first, marker_get_next and
marker_release should be used as iterators on the markers.

Changelog:
- markers_mutex now nests inside module_mutex rather than the opposite.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Reviewed-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: "Frank Ch. Eigler" <fche@redhat.com>
---

 include/asm-generic/vmlinux.lds.h |   11 
 include/linux/marker.h            |  169 +++++++++
 include/linux/module.h            |    5 
 kernel/marker.c                   |  699 ++++++++++++++++++++++++++++++++++++++
 kernel/module.c                   |   13 
 5 files changed, 896 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-08-25 14:41:21.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-08-25 14:41:22.000000000 -0400
@@ -12,7 +12,11 @@
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
-	*(.data.init.refok)
+	*(.data.init.refok)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___markers) = .;				\
+	*(__markers)							\
+	VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -129,6 +133,11 @@
 		VMLINUX_SYMBOL(__stop___immediate) = .;			\
 	}								\
 									\
+	/* Markers: strings */						\
+        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
+		*(__markers_strings)					\
+ 	}								\
+									\
 	/* Kernel symbol table: strings */				\
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
 		*(__ksymtab_strings)					\
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/marker.h	2007-08-25 14:41:22.000000000 -0400
@@ -0,0 +1,169 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/immediate.h>
+#include <linux/types.h>
+
+struct module;
+struct __mark_marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct __mark_marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...);
+
+struct __mark_marker {
+	const char *name;	/* Marker name */
+	const char *format;	/* Marker format string, describing the
+				 * variable argument list.
+				 */
+	const char *args;	/* List of arguments litteraly transformed
+				 * into a string: "arg1, arg2, arg3".
+				 */
+	immediate_char_t state;	/* Immediate value state. */
+	marker_probe_func *call;/* Probe handler function pointer */
+	void *pdata;		/* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Generic marker flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(generic, name, call_data, format, args...)		\
+	do {								\
+		static const char __mstrtab_name_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #name;						\
+		static const char __mstrtab_format_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= format;						\
+		static const char __mstrtab_args_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #args;						\
+		static struct __mark_marker __mark_##name		\
+		__attribute__((section("__markers"))) =			\
+		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		__mstrtab_args_##name, { 0 },				\
+		__mark_empty_function, NULL };				\
+		asm volatile ( "" : : "i" (&__mark_##name));		\
+		__mark_check_format(format, ## args);			\
+		if (!generic) {						\
+			immediate_if (&__mark_##name.state) { 		\
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		} else {						\
+			_immediate_if (&__mark_##name.state) { 		\
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		}							\
+	} while (0)
+
+extern void module_marker_update(struct module *mod);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(generic, name, call_data, format, args...) \
+		__mark_check_format(format, ## args)
+static inline void module_marker_update(struct module *mod) { }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker using code patching
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using optimized code patching technique (immediate_if ())
+ * to be enabled.
+ */
+#define trace_mark(name, format, args...) \
+	__trace_mark(0, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_immediate_if ()) to be
+ * enabled. Should be used for markers in __init and __exit functions and in
+ * lockdep code.
+ */
+#define _trace_mark(name, format, args...) \
+	__trace_mark(1, name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __attribute__ ((format (printf, 1, 2)))
+	__mark_check_format(const char *fmt, ...) { }
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a markers.
+ * pdata must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+				marker_probe_func *probe, void *pdata);
+
+/*
+ * Returns the pdata given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered pdata.
+ */
+extern void *marker_probe_unregister_pdata(void *pdata);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+
+struct marker_iter {
+	struct module *module;
+	struct __mark_marker *marker;
+};
+
+extern void marker_iter_start(struct marker_iter *iter);
+extern void marker_iter_next(struct marker_iter *iter);
+extern void marker_iter_stop(struct marker_iter *iter);
+extern void marker_iter_reset(struct marker_iter *iter);
+extern void *marker_get_pdata(const char *name);
+
+#endif
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2007-08-25 14:41:21.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2007-08-25 17:26:00.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/immediate.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -380,6 +381,10 @@ struct module
 	const struct __immediate *immediate;
 	unsigned int num_immediate;
 #endif
+#ifdef CONFIG_MARKERS
+	struct __mark_marker *markers;
+	unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-08-25 14:41:22.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-08-25 17:26:00.000000000 -0400
@@ -1722,6 +1722,8 @@ static struct module *load_module(void _
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
 	unsigned int immediateindex = 0;
+	unsigned int markersindex = 0;
+	unsigned int markersstringsindex = 0;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1821,6 +1823,10 @@ static struct module *load_module(void _
 #ifdef CONFIG_IMMEDIATE
 	immediateindex = find_sec(hdr, sechdrs, secstrings, "__immediate");
 #endif
+#ifdef CONFIG_MARKERS
+	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+ 	markersstringsindex = find_sec(hdr, sechdrs, secstrings, "__markers_strings");
+#endif
 
 	/* Don't keep modinfo section */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -2017,6 +2023,11 @@ static struct module *load_module(void _
 		if (err < 0)
 			goto cleanup;
 	}
+#ifdef CONFIG_MARKERS
+	mod->markers = (void *)sechdrs[markersindex].sh_addr;
+	mod->num_markers =
+		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+#endif
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2042,6 +2053,8 @@ static struct module *load_module(void _
 	 }
 #endif
 
+	module_marker_update(mod);
+
 	module_immediate_setup(mod);
 
 	err = module_finalize(hdr, sechdrs, mod);
Index: linux-2.6-lttng/kernel/marker.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/marker.c	2007-08-25 17:25:57.000000000 -0400
@@ -0,0 +1,699 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+#include <linux/immediate.h>
+
+extern struct __mark_marker __start___markers[];
+extern struct __mark_marker __stop___markers[];
+
+/*
+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * give module do we call synchronize_sched()). However, we need to make sure
+ * every critical region have ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+	struct hlist_node hlist;
+	char *format;
+	marker_probe_func *probe;
+	void *pdata;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	char name[0];	/* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct __mark_marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we makes sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct __mark_marker *mdata,
+	void *private_data,
+	const char *fmt, ...)
+{ }
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *get_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int add_marker(const char *name,
+	const char *format, marker_probe_func *probe, void *pdata)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	size_t format_len = 0;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	if (format)
+		format_len = strlen(format) + 1;
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"Marker %s busy, probe %p already installed\n",
+				name, e->probe);
+			return -EBUSY;	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, name_len);
+	if (format) {
+		e->format = &e->name[name_len];
+		memcpy(e->format, format, format_len);
+		trace_mark(core_marker_format, "name %s format %s",
+				e->name, e->format);
+	} else
+		e->format = NULL;
+	e->probe = probe;
+	e->pdata = pdata;
+	e->refcount = 0;
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *remove_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	void *pdata = NULL;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		pdata = e->pdata;
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+	return pdata;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int marker_set_format(struct marker_entry **entry, const char *format)
+{
+	struct marker_entry *e;
+	size_t name_len = strlen((*entry)->name) + 1;
+	size_t format_len = strlen(format) + 1;
+
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], (*entry)->name, name_len);
+	e->format = &e->name[name_len];
+	memcpy(e->format, format, format_len);
+	e->probe = (*entry)->probe;
+	e->pdata = (*entry)->pdata;
+	e->refcount = (*entry)->refcount;
+	hlist_add_before(&e->hlist, &(*entry)->hlist);
+	hlist_del(&(*entry)->hlist);
+	kfree(*entry);
+	*entry = e;
+	trace_mark(core_marker_format, "name %s format %s",
+			e->name, e->format);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int set_marker(struct marker_entry **entry,
+			struct __mark_marker *elem)
+{
+	int ret;
+	BUG_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	if ((*entry)->format) {
+		if (strcmp((*entry)->format, elem->format) != 0) {
+			printk(KERN_NOTICE
+				"Format mismatch for probe %s "
+				"(%s), marker (%s)\n",
+				(*entry)->name,
+				(*entry)->format,
+				elem->format);
+			return -EPERM;
+		}
+	} else {
+		ret = marker_set_format(entry, elem->format);
+		if (ret)
+			return ret;
+	}
+	elem->call = (*entry)->probe;
+	elem->pdata = (*entry)->pdata;
+	_immediate_set(&elem->state, 1);
+	return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void disable_marker(struct __mark_marker *elem)
+{
+	_immediate_set(&elem->state, 0);
+	elem->call = __mark_empty_function;
+	/*
+	 * Leave the pdata and id there, because removal is racy and should be
+	 * done only after a synchronize_sched(). There are never used until
+	 * the next initialization anyway.
+	 */
+}
+
+/*
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+static void marker_update_probe_range(
+	struct __mark_marker *begin,
+	struct __mark_marker *end,
+	struct module *probe_module,
+	int *refcount)
+{
+	struct __mark_marker *iter;
+	struct marker_entry *mark_entry;
+
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_marker(iter->name);
+		if (mark_entry && mark_entry->refcount) {
+			set_marker(&mark_entry, iter);
+			/*
+			 * ignore error, continue
+			 */
+			if (probe_module)
+				if (probe_module ==
+			__module_text_address((unsigned long)mark_entry->probe))
+					(*refcount)++;
+		} else {
+			disable_marker(iter);
+		}
+	}
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Update module probes.
+ * Must be called with markers_mutex held.
+ */
+static inline void marker_update_probes_modules(struct module *probe_module,
+	int *refcount)
+{
+	struct module *mod;
+
+	list_for_each_entry(mod, &modules, list) {
+		if (!mod->taints) {
+			marker_update_probe_range(mod->markers,
+				mod->markers+mod->num_markers,
+				probe_module, refcount);
+		}
+	}
+}
+#else
+static inline void marker_update_probes_modules(struct module *probe_module,
+	int *refcount)
+{ }
+#endif
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static inline void _marker_update_probes(struct module *probe_module)
+{
+	int refcount = 0;
+
+	/* Core kernel markers */
+	marker_update_probe_range(__start___markers,
+			__stop___markers, probe_module, &refcount);
+	/* Markers in modules. */
+	marker_update_probes_modules(probe_module, &refcount);
+	if (probe_module && refcount == 0) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+}
+
+#ifdef CONFIG_MODULES
+/**
+ * module_marker_update - Update module's markers
+ * @mod: pointer of type struct module identifying the target module
+ *
+ * Setup the marker according to the data present in the marker hash table
+ * upon module load. Must be called with module_mutex held.  Since the
+ * probe_module parameter is NULL, it is safe for refcount to be NULL.
+ */
+void module_marker_update(struct module *mod)
+{
+	if (!mod->taints) {
+		mutex_lock(&markers_mutex);
+		marker_update_probe_range(mod->markers,
+			mod->markers+mod->num_markers, NULL, NULL);
+		mutex_unlock(&markers_mutex);
+	}
+}
+
+/*
+ * Update the system wide probes, with modules.
+ * Called with module_mutex taken.
+ */
+static inline void marker_update_probes(struct module *probe_module)
+{
+	mutex_lock(&module_mutex);
+	mutex_lock(&markers_mutex);
+	_marker_update_probes(probe_module);
+	mutex_unlock(&markers_mutex);
+	mutex_unlock(&module_mutex);
+}
+#else
+/*
+ * Update the system wide probes, without modules.
+ */
+static inline void marker_update_probes(struct module *probe_module)
+{
+	mutex_lock(&markers_mutex);
+	_marker_update_probes(probe_module);
+	mutex_unlock(&markers_mutex);
+}
+#endif
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @pdata: probe private data
+ *
+ * pdata must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+			marker_probe_func *probe, void *pdata)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (entry && entry->refcount) {
+		ret = -EBUSY;
+		goto end;
+	}
+	if (deferred_sync) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	ret = add_marker(name, format, probe, pdata);
+	if (ret)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+	struct module *probe_module;
+	struct marker_entry *entry;
+	void *pdata;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = remove_marker(name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @pdata: probe private data
+ *
+ * Unregister a marker by providing the registered pdata.
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_pdata(void *pdata)
+{
+	struct module *probe_module;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *entry;
+	int found = 0;
+	unsigned int i;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+		head = &marker_table[i];
+		hlist_for_each_entry(entry, node, head, hlist) {
+			if (entry->pdata == pdata) {
+				found = 1;
+				goto iter_end;
+			}
+		}
+	}
+iter_end:
+	if (!found) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = remove_marker(entry->name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_pdata);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only need to update probes when refcount passes from 0 to 1.
+	 */
+	if (entry->refcount++)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only permit decrement refcount if higher than 0.
+	 * Do probe update only on 1 -> 0 transition.
+	 */
+	if (entry->refcount) {
+		if (--entry->refcount)
+			goto end;
+	} else {
+		ret = -EPERM;
+		goto end;
+	}
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_pdata - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the pdata pointer, or an ERR_PTR.
+ * The pdata pointer should _only_ be dereferenced if the caller is the owner of
+ * the data, or its content could vanish. This is mostly used to confirm that a
+ * caller is the owner of a registered probe.
+ */
+void *marker_get_pdata(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+	int found = 0;
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			return e->pdata;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_pdata);
+
+static inline int marker_get_iter_range(struct __mark_marker **marker,
+	struct __mark_marker *begin,
+	struct __mark_marker *end)
+{
+	int found = 0;
+
+	if (!*marker && begin != end) {
+		found = 1;
+		*marker = begin;
+	} else if (*marker >= begin && *marker < end) {
+		found = 1;
+		/*
+		 * *marker is known to be a valid marker from now on.
+		 */
+	}
+	return found;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ * Should be called with module_mutex taken.
+ */
+static inline int marker_get_iter_modules(struct marker_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module) {
+				continue;
+			} else if (iter_mod > iter->module) {
+				iter->marker = NULL;
+			} else {
+			}
+			found = marker_get_iter_range(&iter->marker,
+				iter_mod->markers,
+				iter_mod->markers + iter_mod->num_markers);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+
+		}
+	}
+	return found;
+}
+#else
+static inline int marker_get_iter_modules(struct marker_iter *iter)
+{
+	return 0;
+}
+#endif
+
+static inline void marker_get_iter(struct marker_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel markers */
+	if (!iter->module) {
+		found = marker_get_iter_range(&iter->marker,
+				__start___markers, __stop___markers);
+		if (found)
+			goto end;
+	}
+	/* Markers in modules. */
+	found = marker_get_iter_modules(iter);
+end:
+	if (!found)
+		marker_iter_reset(iter);
+}
+
+void marker_iter_start(struct marker_iter *iter)
+{
+	mutex_lock(&module_mutex);
+	mutex_lock(&markers_mutex);
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_start);
+
+void marker_iter_next(struct marker_iter *iter)
+{
+	iter->marker++;
+	/*
+	 * iter->marker may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the markers, getting the
+	 * markers from following modules if necessary.
+	 */
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_next);
+
+void marker_iter_stop(struct marker_iter *iter)
+{
+	mutex_unlock(&markers_mutex);
+	mutex_unlock(&module_mutex);
+}
+EXPORT_SYMBOL_GPL(marker_iter_stop);
+
+void marker_iter_reset(struct marker_iter *iter)
+{
+	iter->module = NULL;
+	iter->marker = NULL;
+}
+EXPORT_SYMBOL_GPL(marker_iter_reset);

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-08-25 20:49       ` Rusty Russell
@ 2007-08-25 21:26         ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-08-25 21:26 UTC (permalink / raw)
  To: Rusty Russell; +Cc: akpm, linux-kernel

* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Fri, 2007-08-24 at 12:26 -0400, Mathieu Desnoyers wrote:
> > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > On Mon, 2007-08-20 at 16:27 -0400, Mathieu Desnoyers wrote:
> > > > +{
> > > > +	struct hlist_head *head;
> > > > +	struct hlist_node *node;
> > > > +	struct marker_entry *e;
> > > > +	size_t len = strlen(name) + 1;
> > > > +	u32 hash = jhash(name, len-1, 0);
> > > > +
> > > > +	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
> > > > +	hlist_for_each_entry(e, node, head, hlist) {
> > > > +		if (!strcmp(name, e->name))
> > > > +			return e;
> > > > +	}
> > > > +	return NULL;
> > > > +}
> > > 
> > > OK, don't understand the strlen, len, len-1 dance here?
> > > 
> > 
> > Let's say we have abc\0 for marker name as name input.
> > 
> > len = 3 + 1 = 4 (including \0)
> > hash is done only on the 3 first chars, excluding the \0 (therefore the
> >                                                           len-1 there)
> > 
> > Actually, it's like this only for a matter of consistency between
> > add_marker and remove_marker, which are quite similar, but add_marker
> > needs name_len to include the \0 value. It would be odd to change the
> > logic between the two functions to one including the \0 and the other
> > excluding it.
> 
> Sure, but that doesn't really explain why the code does:
> 
> 	size_t len = strlen(name) + 1;
> 	u32 hash = jhash(name, len-1, 0);
> 
> Rather than:
> 
> 	u32 hash = jhash(name, strlen(name), 0);
> 

Yup, good point. Fixed.

Thanks,

Mathieu

> > Thanks for the review,
> 
> That's fine, just some light reading...
> 
> Cheers,
> Rusty.
> > 
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-08-24 16:26     ` Mathieu Desnoyers
@ 2007-08-25 20:49       ` Rusty Russell
  2007-08-25 21:26         ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Rusty Russell @ 2007-08-25 20:49 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Fri, 2007-08-24 at 12:26 -0400, Mathieu Desnoyers wrote:
> * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > On Mon, 2007-08-20 at 16:27 -0400, Mathieu Desnoyers wrote:
> > > +{
> > > +	struct hlist_head *head;
> > > +	struct hlist_node *node;
> > > +	struct marker_entry *e;
> > > +	size_t len = strlen(name) + 1;
> > > +	u32 hash = jhash(name, len-1, 0);
> > > +
> > > +	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
> > > +	hlist_for_each_entry(e, node, head, hlist) {
> > > +		if (!strcmp(name, e->name))
> > > +			return e;
> > > +	}
> > > +	return NULL;
> > > +}
> > 
> > OK, don't understand the strlen, len, len-1 dance here?
> > 
> 
> Let's say we have abc\0 for marker name as name input.
> 
> len = 3 + 1 = 4 (including \0)
> hash is done only on the 3 first chars, excluding the \0 (therefore the
>                                                           len-1 there)
> 
> Actually, it's like this only for a matter of consistency between
> add_marker and remove_marker, which are quite similar, but add_marker
> needs name_len to include the \0 value. It would be odd to change the
> logic between the two functions to one including the \0 and the other
> excluding it.

Sure, but that doesn't really explain why the code does:

	size_t len = strlen(name) + 1;
	u32 hash = jhash(name, len-1, 0);

Rather than:

	u32 hash = jhash(name, strlen(name), 0);

> Thanks for the review,

That's fine, just some light reading...

Cheers,
Rusty.
> 


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-08-21  0:43   ` Rusty Russell
@ 2007-08-24 16:26     ` Mathieu Desnoyers
  2007-08-25 20:49       ` Rusty Russell
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-08-24 16:26 UTC (permalink / raw)
  To: Rusty Russell; +Cc: akpm, linux-kernel

* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Mon, 2007-08-20 at 16:27 -0400, Mathieu Desnoyers wrote:
> > The marker activation functions sits in kernel/marker.c. A hash table is used
> > to keep track of the registered probes and armed markers, so the markers within
> > a newly loaded module that should be active can be activated at module load
> > time.
> 
> Hi Mathieu!
> 
> 	Just reading through this patch, a couple of comments:
> 

Hi Rusty,

> > +/* To be used for string format validity checking with gcc */
> > +static inline void __mark_check_format(const char *fmt, ...)
> > +		__attribute__ ((format (printf, 1, 2)));
> > +static inline void __mark_check_format(const char *fmt, ...) { }
> 
> If you place the __attribute__() before the function name, you can do
> this in the definition.
> 

Ok, will fix.

> > ===================================================================
> > --- linux-2.6-lttng.orig/kernel/module.c	2007-08-10 19:44:18.000000000 -0400
> > +++ linux-2.6-lttng/kernel/module.c	2007-08-10 23:54:38.000000000 -0400
> > @@ -1980,6 +1986,10 @@ static struct module *load_module(void _
> >  			sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
> >  	}
> >  #endif
> > +	if (markersindex)
> > +		sechdrs[markersindex].sh_flags |= SHF_ALLOC;
> > +	if (markersstringsindex)
> > +		sechdrs[markersstringsindex].sh_flags |= SHF_ALLOC;
> >  
> 
> Perhaps I'm missing something, but I don't see why these sections
> wouldn't be SHF_ALLOC already.
> 

Declaring variables with __attribute__((section("__markers_strings")))
will likely put them in an allocated section, you are right. Will fix.

The same applies to immediate values with .section __immediate, \"a\",
@progbits, which is explicitely allocated.

> >  	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
> >  	if (unusedcrcindex)
> > @@ -2021,6 +2031,13 @@ static struct module *load_module(void _
> >  		if (err < 0)
> >  			goto cleanup;
> >  	}
> > +#ifdef CONFIG_MARKERS
> > +	if (markersindex) {
> > +		mod->markers = (void *)sechdrs[markersindex].sh_addr;
> > +		mod->num_markers =
> > +			sechdrs[markersindex].sh_size / sizeof(*mod->markers);
> > +	}
> > +#endif
> 
> Because of the wonders of ELF, section 0 has sh_addr and sh_size 0.  So
> the if (markersindex) is unnecessary here.
> 

Ok. Will apply to immediate values too.

> > +/*
> > + * Get marker if the marker is present in the marker hash table.
> > + * Must be called with markers_mutex held.
> > + * Returns NULL if not present.
> > + */
> > +static struct marker_entry *_get_marker(const char *name)
> 
> You seem to really enjoy underscores, yet I'm having trouble
> understanding why this would have an underscore in it.
> 

Just they are internal functiona meant to be called with markers_mutex
held. But I guess having a static prefix and not being exported is
enough. Will remove. I'll just keep
_marker_update_probes/marker_update_probes to differentiate between
locked/non-locked version.

> > +{
> > +	struct hlist_head *head;
> > +	struct hlist_node *node;
> > +	struct marker_entry *e;
> > +	size_t len = strlen(name) + 1;
> > +	u32 hash = jhash(name, len-1, 0);
> > +
> > +	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
> > +	hlist_for_each_entry(e, node, head, hlist) {
> > +		if (!strcmp(name, e->name))
> > +			return e;
> > +	}
> > +	return NULL;
> > +}
> 
> OK, don't understand the strlen, len, len-1 dance here?
> 

Let's say we have abc\0 for marker name as name input.

len = 3 + 1 = 4 (including \0)
hash is done only on the 3 first chars, excluding the \0 (therefore the
                                                          len-1 there)

Actually, it's like this only for a matter of consistency between
add_marker and remove_marker, which are quite similar, but add_marker
needs name_len to include the \0 value. It would be odd to change the
logic between the two functions to one including the \0 and the other
excluding it.

> > +/*
> > + * Updates the probe callback corresponding to a range of markers.
> > + * Must be called with markers_mutex held.
> > + */
> > +static void _marker_update_probe_range(
> 
> And yet:
> 
> > +void module_marker_update(struct module *mod)
> > +{
> > +	if (!mod->taints)
> > +		_marker_update_probe_range(mod->markers,
> > +			mod->markers+mod->num_markers, NULL, NULL);
> > +}
> 
> This doesn't hold the markers_mutex.
> 

Since this function is only meant to be called upon module load, before
the module is added to the module list, this marker range cannot be seen
by other calls to marker_update_probe_range. But you are right. It also
protects the hash table, so I should take the mutex there.  Fixing.

Thanks for the review,

Mathieu

> Cheers,
> Rusty.
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-08-20 20:27 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
@ 2007-08-21  0:43   ` Rusty Russell
  2007-08-24 16:26     ` Mathieu Desnoyers
  0 siblings, 1 reply; 43+ messages in thread
From: Rusty Russell @ 2007-08-21  0:43 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Mon, 2007-08-20 at 16:27 -0400, Mathieu Desnoyers wrote:
> The marker activation functions sits in kernel/marker.c. A hash table is used
> to keep track of the registered probes and armed markers, so the markers within
> a newly loaded module that should be active can be activated at module load
> time.

Hi Mathieu!

	Just reading through this patch, a couple of comments:

> +/* To be used for string format validity checking with gcc */
> +static inline void __mark_check_format(const char *fmt, ...)
> +		__attribute__ ((format (printf, 1, 2)));
> +static inline void __mark_check_format(const char *fmt, ...) { }

If you place the __attribute__() before the function name, you can do
this in the definition.

> ===================================================================
> --- linux-2.6-lttng.orig/kernel/module.c	2007-08-10 19:44:18.000000000 -0400
> +++ linux-2.6-lttng/kernel/module.c	2007-08-10 23:54:38.000000000 -0400
> @@ -1980,6 +1986,10 @@ static struct module *load_module(void _
>  			sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
>  	}
>  #endif
> +	if (markersindex)
> +		sechdrs[markersindex].sh_flags |= SHF_ALLOC;
> +	if (markersstringsindex)
> +		sechdrs[markersstringsindex].sh_flags |= SHF_ALLOC;
>  

Perhaps I'm missing something, but I don't see why these sections
wouldn't be SHF_ALLOC already.

>  	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
>  	if (unusedcrcindex)
> @@ -2021,6 +2031,13 @@ static struct module *load_module(void _
>  		if (err < 0)
>  			goto cleanup;
>  	}
> +#ifdef CONFIG_MARKERS
> +	if (markersindex) {
> +		mod->markers = (void *)sechdrs[markersindex].sh_addr;
> +		mod->num_markers =
> +			sechdrs[markersindex].sh_size / sizeof(*mod->markers);
> +	}
> +#endif

Because of the wonders of ELF, section 0 has sh_addr and sh_size 0.  So
the if (markersindex) is unnecessary here.

> +/*
> + * Get marker if the marker is present in the marker hash table.
> + * Must be called with markers_mutex held.
> + * Returns NULL if not present.
> + */
> +static struct marker_entry *_get_marker(const char *name)

You seem to really enjoy underscores, yet I'm having trouble
understanding why this would have an underscore in it.

> +{
> +	struct hlist_head *head;
> +	struct hlist_node *node;
> +	struct marker_entry *e;
> +	size_t len = strlen(name) + 1;
> +	u32 hash = jhash(name, len-1, 0);
> +
> +	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
> +	hlist_for_each_entry(e, node, head, hlist) {
> +		if (!strcmp(name, e->name))
> +			return e;
> +	}
> +	return NULL;
> +}

OK, don't understand the strlen, len, len-1 dance here?

> +/*
> + * Updates the probe callback corresponding to a range of markers.
> + * Must be called with markers_mutex held.
> + */
> +static void _marker_update_probe_range(

And yet:

> +void module_marker_update(struct module *mod)
> +{
> +	if (!mod->taints)
> +		_marker_update_probe_range(mod->markers,
> +			mod->markers+mod->num_markers, NULL, NULL);
> +}

This doesn't hold the markers_mutex.

Cheers,
Rusty.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-08-20 20:27 [patch 0/4] Linux Kernel Markers Mathieu Desnoyers
@ 2007-08-20 20:27 ` Mathieu Desnoyers
  2007-08-21  0:43   ` Rusty Russell
  0 siblings, 1 reply; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-08-20 20:27 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: linux-kernel-markers-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 28224 bytes --]

The marker activation functions sits in kernel/marker.c. A hash table is used
to keep track of the registered probes and armed markers, so the markers within
a newly loaded module that should be active can be activated at module load
time.

marker_query has been removed. marker_get_first, marker_get_next and
marker_release should be used as iterators on the markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---

 include/asm-generic/vmlinux.lds.h |   11 
 include/linux/marker.h            |  170 +++++++++
 include/linux/module.h            |    5 
 kernel/marker.c                   |  678 ++++++++++++++++++++++++++++++++++++++
 kernel/module.c                   |   19 +
 5 files changed, 882 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-08-10 19:44:17.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-08-10 19:44:18.000000000 -0400
@@ -12,7 +12,11 @@
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
-	*(.data.init.refok)
+	*(.data.init.refok)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___markers) = .;				\
+	*(__markers)							\
+	VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -129,6 +133,11 @@
 		VMLINUX_SYMBOL(__stop___immediate) = .;			\
 	}								\
 									\
+	/* Markers: strings */						\
+        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
+		*(__markers_strings)					\
+ 	}								\
+									\
 	/* Kernel symbol table: strings */				\
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
 		*(__ksymtab_strings)					\
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/marker.h	2007-08-10 23:54:35.000000000 -0400
@@ -0,0 +1,170 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/immediate.h>
+#include <linux/types.h>
+
+struct module;
+struct __mark_marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct __mark_marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...);
+
+struct __mark_marker {
+	const char *name;	/* Marker name */
+	const char *format;	/* Marker format string, describing the
+				 * variable argument list.
+				 */
+	const char *args;	/* List of arguments litteraly transformed
+				 * into a string: "arg1, arg2, arg3".
+				 */
+	immediate_char_t state;	/* Immediate value state. */
+	marker_probe_func *call;/* Probe handler function pointer */
+	void *pdata;		/* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Generic marker flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(generic, name, call_data, format, args...)		\
+	do {								\
+		static const char __mstrtab_name_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #name;						\
+		static const char __mstrtab_format_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= format;						\
+		static const char __mstrtab_args_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #args;						\
+		static struct __mark_marker __mark_##name		\
+		__attribute__((section("__markers"))) =			\
+		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		__mstrtab_args_##name, { 0 },				\
+		__mark_empty_function, NULL };				\
+		asm volatile ( "" : : "i" (&__mark_##name));		\
+		__mark_check_format(format, ## args);			\
+		if (!generic) {						\
+			immediate_if (&__mark_##name.state) { 		\
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		} else {						\
+			_immediate_if (&__mark_##name.state) { 		\
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		}							\
+	} while (0)
+
+extern void module_marker_update(struct module *mod);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(generic, name, call_data, format, args...) \
+		__mark_check_format(format, ## args)
+static inline void module_marker_update(struct module *mod) { }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker using code patching
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using optimized code patching technique (immediate_if ())
+ * to be enabled.
+ */
+#define trace_mark(name, format, args...) \
+	__trace_mark(0, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_immediate_if ()) to be
+ * enabled. Should be used for markers in __init and __exit functions and in
+ * lockdep code.
+ */
+#define _trace_mark(name, format, args...) \
+	__trace_mark(1, name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __mark_check_format(const char *fmt, ...)
+		__attribute__ ((format (printf, 1, 2)));
+static inline void __mark_check_format(const char *fmt, ...) { }
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a markers.
+ * pdata must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+				marker_probe_func *probe, void *pdata);
+
+/*
+ * Returns the pdata given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered pdata.
+ */
+extern void *marker_probe_unregister_pdata(void *pdata);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+
+struct marker_iter {
+	struct module *module;
+	struct __mark_marker *marker;
+};
+
+extern void marker_iter_start(struct marker_iter *iter);
+extern void marker_iter_next(struct marker_iter *iter);
+extern void marker_iter_stop(struct marker_iter *iter);
+extern void marker_iter_reset(struct marker_iter *iter);
+extern void *marker_get_pdata(const char *name);
+
+#endif
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2007-08-10 19:44:17.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2007-08-10 23:54:38.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/immediate.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -380,6 +381,10 @@ struct module
 	const struct __immediate *immediate;
 	unsigned int num_immediate;
 #endif
+#ifdef CONFIG_MARKERS
+	struct __mark_marker *markers;
+	unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-08-10 19:44:18.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-08-10 23:54:38.000000000 -0400
@@ -1722,6 +1722,8 @@ static struct module *load_module(void _
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
 	unsigned int immediateindex = 0;
+	unsigned int markersindex = 0;
+	unsigned int markersstringsindex = 0;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1821,6 +1823,10 @@ static struct module *load_module(void _
 #ifdef CONFIG_IMMEDIATE
 	immediateindex = find_sec(hdr, sechdrs, secstrings, "__immediate");
 #endif
+#ifdef CONFIG_MARKERS
+	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+ 	markersstringsindex = find_sec(hdr, sechdrs, secstrings, "__markers_strings");
+#endif
 
 	/* Don't keep modinfo section */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1980,6 +1986,10 @@ static struct module *load_module(void _
 			sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
 	}
 #endif
+	if (markersindex)
+		sechdrs[markersindex].sh_flags |= SHF_ALLOC;
+	if (markersstringsindex)
+		sechdrs[markersstringsindex].sh_flags |= SHF_ALLOC;
 
 	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
 	if (unusedcrcindex)
@@ -2021,6 +2031,13 @@ static struct module *load_module(void _
 		if (err < 0)
 			goto cleanup;
 	}
+#ifdef CONFIG_MARKERS
+	if (markersindex) {
+		mod->markers = (void *)sechdrs[markersindex].sh_addr;
+		mod->num_markers =
+			sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+	}
+#endif
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2046,6 +2063,8 @@ static struct module *load_module(void _
 	 }
 #endif
 
+	module_marker_update(mod);
+
 	module_immediate_setup(mod);
 
 	err = module_finalize(hdr, sechdrs, mod);
Index: linux-2.6-lttng/kernel/marker.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/marker.c	2007-08-10 19:44:18.000000000 -0400
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+#include <linux/immediate.h>
+
+extern struct __mark_marker __start___markers[];
+extern struct __mark_marker __stop___markers[];
+
+/*
+ * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * give module do we call synchronize_sched()). However, we need to make sure
+ * every critical region have ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+	struct hlist_node hlist;
+	char *format;
+	marker_probe_func *probe;
+	void *pdata;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	char name[0];	/* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct __mark_marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we makes sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct __mark_marker *mdata,
+	void *private_data,
+	const char *fmt, ...)
+{ }
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *_get_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t len = strlen(name) + 1;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int _add_marker(const char *name,
+	const char *format, marker_probe_func *probe, void *pdata)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	size_t format_len = 0;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	if (format)
+		format_len = strlen(format) + 1;
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"Marker %s busy, probe %p already installed\n",
+				name, e->probe);
+			return -EBUSY;	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, name_len);
+	if (format) {
+		e->format = &e->name[name_len];
+		memcpy(e->format, format, format_len);
+		trace_mark(core_marker_format, "name %s format %s",
+				e->name, e->format);
+	} else
+		e->format = NULL;
+	e->probe = probe;
+	e->pdata = pdata;
+	e->refcount = 0;
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *_remove_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	void *pdata = NULL;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		pdata = e->pdata;
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+	return pdata;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int _marker_set_format(struct marker_entry **entry, const char *format)
+{
+	struct marker_entry *e;
+	size_t name_len = strlen((*entry)->name) + 1;
+	size_t format_len = strlen(format) + 1;
+
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], (*entry)->name, name_len);
+	e->format = &e->name[name_len];
+	memcpy(e->format, format, format_len);
+	e->probe = (*entry)->probe;
+	e->pdata = (*entry)->pdata;
+	e->refcount = (*entry)->refcount;
+	hlist_add_before(&e->hlist, &(*entry)->hlist);
+	hlist_del(&(*entry)->hlist);
+	kfree(*entry);
+	*entry = e;
+	trace_mark(core_marker_format, "name %s format %s",
+			e->name, e->format);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int _set_marker(struct marker_entry **entry,
+			struct __mark_marker *elem)
+{
+	int ret;
+	BUG_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	if ((*entry)->format) {
+		if (strcmp((*entry)->format, elem->format) != 0) {
+			printk(KERN_NOTICE
+				"Format mismatch for probe %s "
+				"(%s), marker (%s)\n",
+				(*entry)->name,
+				(*entry)->format,
+				elem->format);
+			return -EPERM;
+		}
+	} else {
+		ret = _marker_set_format(entry, elem->format);
+		if (ret)
+			return ret;
+	}
+	elem->call = (*entry)->probe;
+	elem->pdata = (*entry)->pdata;
+	_immediate_set(&elem->state, 1);
+	return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void _disable_marker(struct __mark_marker *elem)
+{
+	_immediate_set(&elem->state, 0);
+	elem->call = __mark_empty_function;
+	/*
+	 * Leave the pdata and id there, because removal is racy and should be
+	 * done only after a synchronize_sched(). There are never used until
+	 * the next initialization anyway.
+	 */
+}
+
+/*
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+static void _marker_update_probe_range(
+	struct __mark_marker *begin,
+	struct __mark_marker *end,
+	struct module *probe_module,
+	int *refcount)
+{
+	struct __mark_marker *iter;
+	struct marker_entry *mark_entry;
+
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = _get_marker(iter->name);
+		if (mark_entry && mark_entry->refcount) {
+			_set_marker(&mark_entry, iter);
+			/*
+			 * ignore error, continue
+			 */
+			if (probe_module)
+				if (probe_module ==
+			__module_text_address((unsigned long)mark_entry->probe))
+					(*refcount)++;
+		} else {
+			_disable_marker(iter);
+		}
+	}
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Update module probes.
+ * Must be called with markers_mutex held.
+ */
+static inline void __marker_update_probes_modules(struct module *probe_module,
+	int *refcount)
+{
+	struct module *mod;
+
+	list_for_each_entry(mod, &modules, list) {
+		if (!mod->taints) {
+			_marker_update_probe_range(mod->markers,
+				mod->markers+mod->num_markers,
+				probe_module, refcount);
+		}
+	}
+}
+#else
+static inline void __marker_update_probes_modules(struct module *probe_module,
+	int *refcount)
+{ }
+#endif
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static inline void __marker_update_probes(struct module *probe_module)
+{
+	int refcount = 0;
+
+	/* Core kernel markers */
+	_marker_update_probe_range(__start___markers,
+			__stop___markers, probe_module, &refcount);
+	/* Markers in modules. */
+	__marker_update_probes_modules(probe_module, &refcount);
+	if (probe_module && refcount == 0) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+}
+
+#ifdef CONFIG_MODULES
+/**
+ * module_marker_update - Update module's markers
+ * @mod: pointer of type struct module identifying the target module
+ *
+ * Setup the marker according to the data present in the marker hash table
+ * upon module load. Must be called with module_mutex held.  Since the
+ * probe_module parameter is NULL, it is safe for refcount to be NULL.
+ */
+void module_marker_update(struct module *mod)
+{
+	if (!mod->taints)
+		_marker_update_probe_range(mod->markers,
+			mod->markers+mod->num_markers, NULL, NULL);
+}
+
+/*
+ * Update the system wide probes, with modules.
+ */
+static inline void _marker_update_probes(struct module *probe_module)
+{
+	mutex_lock(&module_mutex);
+	__marker_update_probes(probe_module);
+	mutex_unlock(&module_mutex);
+}
+#else
+/*
+ * Update the system wide probes, without modules.
+ */
+static inline void _marker_update_probes(struct module *probe_module)
+{
+	__marker_update_probes(probe_module);
+}
+#endif
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @pdata: probe private data
+ *
+ * pdata must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+			marker_probe_func *probe, void *pdata)
+{
+	struct marker_entry *entry;
+	int ret = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (entry && entry->refcount) {
+		ret = -EBUSY;
+		goto end;
+	}
+	if (deferred_sync) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	ret = _add_marker(name, format, probe, pdata);
+	if (ret)
+		goto end;
+	_marker_update_probes(NULL);
+end:
+	mutex_unlock(&markers_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+	struct module *probe_module;
+	struct marker_entry *entry;
+	void *pdata;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (!entry) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = _remove_marker(name);
+	deferred_sync = 1;
+	_marker_update_probes(probe_module);
+end:
+	mutex_unlock(&markers_mutex);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @pdata: probe private data
+ *
+ * Unregister a marker by providing the registered pdata.
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_pdata(void *pdata)
+{
+	struct module *probe_module;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *entry;
+	int found = 0;
+	unsigned int i;
+
+	mutex_lock(&markers_mutex);
+	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+		head = &marker_table[i];
+		hlist_for_each_entry(entry, node, head, hlist) {
+			if (entry->pdata == pdata) {
+				found = 1;
+				goto iter_end;
+			}
+		}
+	}
+iter_end:
+	if (!found) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = _remove_marker(entry->name);
+	deferred_sync = 1;
+	_marker_update_probes(probe_module);
+end:
+	mutex_unlock(&markers_mutex);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_pdata);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only need to update probes when refcount passes from 0 to 1.
+	 */
+	if (entry->refcount++)
+		goto end;
+	_marker_update_probes(NULL);
+end:
+	mutex_unlock(&markers_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only permit decrement refcount if higher than 0.
+	 * Do probe update only on 1 -> 0 transition.
+	 */
+	if (entry->refcount) {
+		if (--entry->refcount)
+			goto end;
+	} else {
+		ret = -EPERM;
+		goto end;
+	}
+	_marker_update_probes(NULL);
+end:
+	mutex_unlock(&markers_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_pdata - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the pdata pointer, or an ERR_PTR.
+ * The pdata pointer should _only_ be dereferenced if the caller is the owner of
+ * the data, or its content could vanish. This is mostly used to confirm that a
+ * caller is the owner of a registered probe.
+ */
+void *marker_get_pdata(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+	int found = 0;
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			return e->pdata;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_pdata);
+
+static inline int _marker_get_iter_range(struct __mark_marker **marker,
+	struct __mark_marker *begin,
+	struct __mark_marker *end)
+{
+	int found = 0;
+
+	if (!*marker && begin != end) {
+		found = 1;
+		*marker = begin;
+	} else if (*marker >= begin && *marker < end) {
+		found = 1;
+		/*
+		 * *marker is known to be a valid marker from now on.
+		 */
+	}
+	return found;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Returns 0 if current not foud.
+ * Returns 1 if current found.
+ * Should be called with module_mutex taken.
+ */
+static inline int marker_get_iter_modules(struct marker_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module) {
+				continue;
+			} else if (iter_mod > iter->module) {
+				iter->marker = NULL;
+			} else {
+			}
+			found = _marker_get_iter_range(&iter->marker,
+				iter_mod->markers,
+				iter_mod->markers + iter_mod->num_markers);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+
+		}
+	}
+	return found;
+}
+#else
+static inline int marker_get_iter_modules(struct marker_iter *iter)
+{
+	return 0;
+}
+#endif
+
+static inline void marker_get_iter(struct marker_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel markers */
+	if (!iter->module) {
+		found = _marker_get_iter_range(&iter->marker,
+				__start___markers, __stop___markers);
+		if (found)
+			goto end;
+	}
+	/* Markers in modules. */
+	found = marker_get_iter_modules(iter);
+end:
+	if (!found)
+		marker_iter_reset(iter);
+}
+
+void marker_iter_start(struct marker_iter *iter)
+{
+	mutex_lock(&module_mutex);
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_start);
+
+void marker_iter_next(struct marker_iter *iter)
+{
+	iter->marker++;
+	/*
+	 * iter->marker may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the markers, getting the
+	 * markers from following modules if necessary.
+	 */
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_next);
+
+void marker_iter_stop(struct marker_iter *iter)
+{
+	mutex_unlock(&module_mutex);
+}
+EXPORT_SYMBOL_GPL(marker_iter_stop);
+
+void marker_iter_reset(struct marker_iter *iter)
+{
+	iter->module = NULL;
+	iter->marker = NULL;
+}
+EXPORT_SYMBOL_GPL(marker_iter_reset);

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch 1/4] Linux Kernel Markers - Architecture Independent Code
  2007-08-12 15:10 [patch 0/4] Linux Kernel Markers Mathieu Desnoyers
@ 2007-08-12 15:10 ` Mathieu Desnoyers
  0 siblings, 0 replies; 43+ messages in thread
From: Mathieu Desnoyers @ 2007-08-12 15:10 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: linux-kernel-markers-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 28224 bytes --]

The marker activation functions sits in kernel/marker.c. A hash table is used
to keep track of the registered probes and armed markers, so the markers within
a newly loaded module that should be active can be activated at module load
time.

marker_query has been removed. marker_get_first, marker_get_next and
marker_release should be used as iterators on the markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---

 include/asm-generic/vmlinux.lds.h |   11 
 include/linux/marker.h            |  170 +++++++++
 include/linux/module.h            |    5 
 kernel/marker.c                   |  678 ++++++++++++++++++++++++++++++++++++++
 kernel/module.c                   |   19 +
 5 files changed, 882 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-08-10 19:44:17.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-08-10 19:44:18.000000000 -0400
@@ -12,7 +12,11 @@
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
-	*(.data.init.refok)
+	*(.data.init.refok)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___markers) = .;				\
+	*(__markers)							\
+	VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -129,6 +133,11 @@
 		VMLINUX_SYMBOL(__stop___immediate) = .;			\
 	}								\
 									\
+	/* Markers: strings */						\
+        __markers_strings : AT(ADDR(__markers_strings) - LOAD_OFFSET) {	\
+		*(__markers_strings)					\
+ 	}								\
+									\
 	/* Kernel symbol table: strings */				\
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
 		*(__ksymtab_strings)					\
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/marker.h	2007-08-10 23:54:35.000000000 -0400
@@ -0,0 +1,170 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/immediate.h>
+#include <linux/types.h>
+
+struct module;
+struct __mark_marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct __mark_marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct __mark_marker *mdata,
+	void *private_data, const char *fmt, ...);
+
+struct __mark_marker {
+	const char *name;	/* Marker name */
+	const char *format;	/* Marker format string, describing the
+				 * variable argument list.
+				 */
+	const char *args;	/* List of arguments litteraly transformed
+				 * into a string: "arg1, arg2, arg3".
+				 */
+	immediate_char_t state;	/* Immediate value state. */
+	marker_probe_func *call;/* Probe handler function pointer */
+	void *pdata;		/* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Generic marker flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(generic, name, call_data, format, args...)		\
+	do {								\
+		static const char __mstrtab_name_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #name;						\
+		static const char __mstrtab_format_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= format;						\
+		static const char __mstrtab_args_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #args;						\
+		static struct __mark_marker __mark_##name		\
+		__attribute__((section("__markers"))) =			\
+		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		__mstrtab_args_##name, { 0 },				\
+		__mark_empty_function, NULL };				\
+		asm volatile ( "" : : "i" (&__mark_##name));		\
+		__mark_check_format(format, ## args);			\
+		if (!generic) {						\
+			immediate_if (&__mark_##name.state) { 		\
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		} else {						\
+			_immediate_if (&__mark_##name.state) { 		\
+				preempt_disable();			\
+				(*__mark_##name.call)			\
+					(&__mark_##name, call_data,	\
+					format, ## args);		\
+				preempt_enable();			\
+			}						\
+		}							\
+	} while (0)
+
+extern void module_marker_update(struct module *mod);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(generic, name, call_data, format, args...) \
+		__mark_check_format(format, ## args)
+static inline void module_marker_update(struct module *mod) { }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker using code patching
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using optimized code patching technique (immediate_if ())
+ * to be enabled.
+ */
+#define trace_mark(name, format, args...) \
+	__trace_mark(0, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_immediate_if ()) to be
+ * enabled. Should be used for markers in __init and __exit functions and in
+ * lockdep code.
+ */
+#define _trace_mark(name, format, args...) \
+	__trace_mark(1, name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __mark_check_format(const char *fmt, ...)
+		__attribute__ ((format (printf, 1, 2)));
+static inline void __mark_check_format(const char *fmt, ...) { }
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a markers.
+ * pdata must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+				marker_probe_func *probe, void *pdata);
+
+/*
+ * Returns the pdata given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered pdata.
+ */
+extern void *marker_probe_unregister_pdata(void *pdata);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+
+struct marker_iter {
+	struct module *module;
+	struct __mark_marker *marker;
+};
+
+extern void marker_iter_start(struct marker_iter *iter);
+extern void marker_iter_next(struct marker_iter *iter);
+extern void marker_iter_stop(struct marker_iter *iter);
+extern void marker_iter_reset(struct marker_iter *iter);
+extern void *marker_get_pdata(const char *name);
+
+#endif
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2007-08-10 19:44:17.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2007-08-10 23:54:38.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/immediate.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -380,6 +381,10 @@ struct module
 	const struct __immediate *immediate;
 	unsigned int num_immediate;
 #endif
+#ifdef CONFIG_MARKERS
+	struct __mark_marker *markers;
+	unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-08-10 19:44:18.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-08-10 23:54:38.000000000 -0400
@@ -1722,6 +1722,8 @@ static struct module *load_module(void _
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
 	unsigned int immediateindex = 0;
+	unsigned int markersindex = 0;
+	unsigned int markersstringsindex = 0;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1821,6 +1823,10 @@ static struct module *load_module(void _
 #ifdef CONFIG_IMMEDIATE
 	immediateindex = find_sec(hdr, sechdrs, secstrings, "__immediate");
 #endif
+#ifdef CONFIG_MARKERS
+	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+ 	markersstringsindex = find_sec(hdr, sechdrs, secstrings, "__markers_strings");
+#endif
 
 	/* Don't keep modinfo section */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1980,6 +1986,10 @@ static struct module *load_module(void _
 			sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
 	}
 #endif
+	if (markersindex)
+		sechdrs[markersindex].sh_flags |= SHF_ALLOC;
+	if (markersstringsindex)
+		sechdrs[markersstringsindex].sh_flags |= SHF_ALLOC;
 
 	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
 	if (unusedcrcindex)
@@ -2021,6 +2031,13 @@ static struct module *load_module(void _
 		if (err < 0)
 			goto cleanup;
 	}
+#ifdef CONFIG_MARKERS
+	if (markersindex) {
+		mod->markers = (void *)sechdrs[markersindex].sh_addr;
+		mod->num_markers =
+			sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+	}
+#endif
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2046,6 +2063,8 @@ static struct module *load_module(void _
 	 }
 #endif
 
+	module_marker_update(mod);
+
 	module_immediate_setup(mod);
 
 	err = module_finalize(hdr, sechdrs, mod);
Index: linux-2.6-lttng/kernel/marker.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/marker.c	2007-08-10 19:44:18.000000000 -0400
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+#include <linux/immediate.h>
+
+extern struct __mark_marker __start___markers[];
+extern struct __mark_marker __stop___markers[];
+
+/*
+ * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * give module do we call synchronize_sched()). However, we need to make sure
+ * every critical region have ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+	struct hlist_node hlist;
+	char *format;
+	marker_probe_func *probe;
+	void *pdata;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	char name[0];	/* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct __mark_marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we makes sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct __mark_marker *mdata,
+	void *private_data,
+	const char *fmt, ...)
+{ }
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *_get_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t len = strlen(name) + 1;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int _add_marker(const char *name,
+	const char *format, marker_probe_func *probe, void *pdata)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	size_t format_len = 0;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	if (format)
+		format_len = strlen(format) + 1;
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"Marker %s busy, probe %p already installed\n",
+				name, e->probe);
+			return -EBUSY;	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, name_len);
+	if (format) {
+		e->format = &e->name[name_len];
+		memcpy(e->format, format, format_len);
+		trace_mark(core_marker_format, "name %s format %s",
+				e->name, e->format);
+	} else
+		e->format = NULL;
+	e->probe = probe;
+	e->pdata = pdata;
+	e->refcount = 0;
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *_remove_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	void *pdata = NULL;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		pdata = e->pdata;
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+	return pdata;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int _marker_set_format(struct marker_entry **entry, const char *format)
+{
+	struct marker_entry *e;
+	size_t name_len = strlen((*entry)->name) + 1;
+	size_t format_len = strlen(format) + 1;
+
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], (*entry)->name, name_len);
+	e->format = &e->name[name_len];
+	memcpy(e->format, format, format_len);
+	e->probe = (*entry)->probe;
+	e->pdata = (*entry)->pdata;
+	e->refcount = (*entry)->refcount;
+	hlist_add_before(&e->hlist, &(*entry)->hlist);
+	hlist_del(&(*entry)->hlist);
+	kfree(*entry);
+	*entry = e;
+	trace_mark(core_marker_format, "name %s format %s",
+			e->name, e->format);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int _set_marker(struct marker_entry **entry,
+			struct __mark_marker *elem)
+{
+	int ret;
+	BUG_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	if ((*entry)->format) {
+		if (strcmp((*entry)->format, elem->format) != 0) {
+			printk(KERN_NOTICE
+				"Format mismatch for probe %s "
+				"(%s), marker (%s)\n",
+				(*entry)->name,
+				(*entry)->format,
+				elem->format);
+			return -EPERM;
+		}
+	} else {
+		ret = _marker_set_format(entry, elem->format);
+		if (ret)
+			return ret;
+	}
+	elem->call = (*entry)->probe;
+	elem->pdata = (*entry)->pdata;
+	_immediate_set(&elem->state, 1);
+	return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void _disable_marker(struct __mark_marker *elem)
+{
+	_immediate_set(&elem->state, 0);
+	elem->call = __mark_empty_function;
+	/*
+	 * Leave the pdata and id there, because removal is racy and should be
+	 * done only after a synchronize_sched(). There are never used until
+	 * the next initialization anyway.
+	 */
+}
+
+/*
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+static void _marker_update_probe_range(
+	struct __mark_marker *begin,
+	struct __mark_marker *end,
+	struct module *probe_module,
+	int *refcount)
+{
+	struct __mark_marker *iter;
+	struct marker_entry *mark_entry;
+
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = _get_marker(iter->name);
+		if (mark_entry && mark_entry->refcount) {
+			_set_marker(&mark_entry, iter);
+			/*
+			 * ignore error, continue
+			 */
+			if (probe_module)
+				if (probe_module ==
+			__module_text_address((unsigned long)mark_entry->probe))
+					(*refcount)++;
+		} else {
+			_disable_marker(iter);
+		}
+	}
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Update module probes.
+ * Must be called with markers_mutex held.
+ */
+static inline void __marker_update_probes_modules(struct module *probe_module,
+	int *refcount)
+{
+	struct module *mod;
+
+	list_for_each_entry(mod, &modules, list) {
+		if (!mod->taints) {
+			_marker_update_probe_range(mod->markers,
+				mod->markers+mod->num_markers,
+				probe_module, refcount);
+		}
+	}
+}
+#else
+static inline void __marker_update_probes_modules(struct module *probe_module,
+	int *refcount)
+{ }
+#endif
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static inline void __marker_update_probes(struct module *probe_module)
+{
+	int refcount = 0;
+
+	/* Core kernel markers */
+	_marker_update_probe_range(__start___markers,
+			__stop___markers, probe_module, &refcount);
+	/* Markers in modules. */
+	__marker_update_probes_modules(probe_module, &refcount);
+	if (probe_module && refcount == 0) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+}
+
+#ifdef CONFIG_MODULES
+/**
+ * module_marker_update - Update module's markers
+ * @mod: pointer of type struct module identifying the target module
+ *
+ * Setup the marker according to the data present in the marker hash table
+ * upon module load. Must be called with module_mutex held.  Since the
+ * probe_module parameter is NULL, it is safe for refcount to be NULL.
+ */
+void module_marker_update(struct module *mod)
+{
+	if (!mod->taints)
+		_marker_update_probe_range(mod->markers,
+			mod->markers+mod->num_markers, NULL, NULL);
+}
+
+/*
+ * Update the system wide probes, with modules.
+ */
+static inline void _marker_update_probes(struct module *probe_module)
+{
+	mutex_lock(&module_mutex);
+	__marker_update_probes(probe_module);
+	mutex_unlock(&module_mutex);
+}
+#else
+/*
+ * Update the system wide probes, without modules.
+ */
+static inline void _marker_update_probes(struct module *probe_module)
+{
+	__marker_update_probes(probe_module);
+}
+#endif
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @pdata: probe private data
+ *
+ * pdata must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+			marker_probe_func *probe, void *pdata)
+{
+	struct marker_entry *entry;
+	int ret = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (entry && entry->refcount) {
+		ret = -EBUSY;
+		goto end;
+	}
+	if (deferred_sync) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	ret = _add_marker(name, format, probe, pdata);
+	if (ret)
+		goto end;
+	_marker_update_probes(NULL);
+end:
+	mutex_unlock(&markers_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+	struct module *probe_module;
+	struct marker_entry *entry;
+	void *pdata;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (!entry) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = _remove_marker(name);
+	deferred_sync = 1;
+	_marker_update_probes(probe_module);
+end:
+	mutex_unlock(&markers_mutex);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @pdata: probe private data
+ *
+ * Unregister a marker by providing the registered pdata.
+ * Returns the pdata given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_pdata(void *pdata)
+{
+	struct module *probe_module;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *entry;
+	int found = 0;
+	unsigned int i;
+
+	mutex_lock(&markers_mutex);
+	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+		head = &marker_table[i];
+		hlist_for_each_entry(entry, node, head, hlist) {
+			if (entry->pdata == pdata) {
+				found = 1;
+				goto iter_end;
+			}
+		}
+	}
+iter_end:
+	if (!found) {
+		pdata = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	pdata = _remove_marker(entry->name);
+	deferred_sync = 1;
+	_marker_update_probes(probe_module);
+end:
+	mutex_unlock(&markers_mutex);
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_pdata);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only need to update probes when refcount passes from 0 to 1.
+	 */
+	if (entry->refcount++)
+		goto end;
+	_marker_update_probes(NULL);
+end:
+	mutex_unlock(&markers_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+	struct marker_entry * entry;
+	int ret = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = _get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only permit decrement refcount if higher than 0.
+	 * Do probe update only on 1 -> 0 transition.
+	 */
+	if (entry->refcount) {
+		if (--entry->refcount)
+			goto end;
+	} else {
+		ret = -EPERM;
+		goto end;
+	}
+	_marker_update_probes(NULL);
+end:
+	mutex_unlock(&markers_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_pdata - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the pdata pointer, or an ERR_PTR.
+ * The pdata pointer should _only_ be dereferenced if the caller is the owner of
+ * the data, or its content could vanish. This is mostly used to confirm that a
+ * caller is the owner of a registered probe.
+ */
+void *marker_get_pdata(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+	int found = 0;
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			return e->pdata;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_pdata);
+
+static inline int _marker_get_iter_range(struct __mark_marker **marker,
+	struct __mark_marker *begin,
+	struct __mark_marker *end)
+{
+	int found = 0;
+
+	if (!*marker && begin != end) {
+		found = 1;
+		*marker = begin;
+	} else if (*marker >= begin && *marker < end) {
+		found = 1;
+		/*
+		 * *marker is known to be a valid marker from now on.
+		 */
+	}
+	return found;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Returns 0 if current not foud.
+ * Returns 1 if current found.
+ * Should be called with module_mutex taken.
+ */
+static inline int marker_get_iter_modules(struct marker_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module) {
+				continue;
+			} else if (iter_mod > iter->module) {
+				iter->marker = NULL;
+			} else {
+			}
+			found = _marker_get_iter_range(&iter->marker,
+				iter_mod->markers,
+				iter_mod->markers + iter_mod->num_markers);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+
+		}
+	}
+	return found;
+}
+#else
+static inline int marker_get_iter_modules(struct marker_iter *iter)
+{
+	return 0;
+}
+#endif
+
+static inline void marker_get_iter(struct marker_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel markers */
+	if (!iter->module) {
+		found = _marker_get_iter_range(&iter->marker,
+				__start___markers, __stop___markers);
+		if (found)
+			goto end;
+	}
+	/* Markers in modules. */
+	found = marker_get_iter_modules(iter);
+end:
+	if (!found)
+		marker_iter_reset(iter);
+}
+
+void marker_iter_start(struct marker_iter *iter)
+{
+	mutex_lock(&module_mutex);
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_start);
+
+void marker_iter_next(struct marker_iter *iter)
+{
+	iter->marker++;
+	/*
+	 * iter->marker may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the markers, getting the
+	 * markers from following modules if necessary.
+	 */
+	marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_next);
+
+void marker_iter_stop(struct marker_iter *iter)
+{
+	mutex_unlock(&module_mutex);
+}
+EXPORT_SYMBOL_GPL(marker_iter_stop);
+
+void marker_iter_reset(struct marker_iter *iter)
+{
+	iter->module = NULL;
+	iter->marker = NULL;
+}
+EXPORT_SYMBOL_GPL(marker_iter_reset);

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2007-11-11 23:24 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-09-18 21:13 [patch 0/4] Linux Kernel Markers for 2.6.23-rc6-mm1 Mathieu Desnoyers
2007-09-18 21:13 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
2007-09-19 11:37   ` Mathieu Desnoyers
2007-09-19 13:53     ` Frank Ch. Eigler
2007-09-19 20:32       ` Denys Vlasenko
2007-09-21 12:58         ` Mathieu Desnoyers
2007-09-21 13:07           ` Christoph Hellwig
2007-09-21 13:30           ` Frank Ch. Eigler
2007-09-21 13:38             ` Mathieu Desnoyers
2007-10-15 19:41               ` Frank Ch. Eigler
2007-10-15 23:12                 ` Mathieu Desnoyers
2007-10-15 23:50                   ` Roland McGrath
2007-10-25 19:17                     ` Mathieu Desnoyers
2007-10-26 14:28                       ` Frank Ch. Eigler
2007-11-01  1:06                         ` [PATCH] markers: modpost Roland McGrath
2007-11-01  2:46                           ` Mathieu Desnoyers
2007-11-01  9:37                             ` Roland McGrath
2007-11-01 11:24                               ` Mathieu Desnoyers
2007-11-08 19:31                                 ` David Smith
2007-11-08 19:36                                   ` Mathieu Desnoyers
2007-11-08 19:45                                     ` David Smith
2007-11-09 16:36                                     ` David Smith
2007-11-11 23:24                                       ` Mathieu Desnoyers
2007-09-19 17:32     ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Denys Vlasenko
2007-09-19 18:46       ` Mathieu Desnoyers
2007-09-19 18:50         ` Mathieu Desnoyers
2007-09-21  0:58   ` Steven Rostedt
2007-09-21 13:45     ` Mathieu Desnoyers
2007-09-18 21:13 ` [patch 2/4] Linux Kernel Markers - Use instrumentation kconfig menu Mathieu Desnoyers
2007-09-18 21:13 ` [patch 3/4] Linux Kernel Markers - Documentation Mathieu Desnoyers
2007-09-18 23:22   ` Randy Dunlap
2007-09-19 11:18     ` Mathieu Desnoyers
2007-09-18 21:13 ` [patch 4/4] Port of blktrace to the Linux Kernel Markers Mathieu Desnoyers
2007-09-21  1:03   ` Steven Rostedt
2007-09-21 13:46     ` Mathieu Desnoyers
  -- strict thread matches above, loose matches on Subject: below --
2007-09-17 18:46 [patch 0/4] " Mathieu Desnoyers
2007-09-17 18:46 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
2007-08-27 16:05 [patch 0/4] Linux Kernel Markers Mathieu Desnoyers
2007-08-27 16:05 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
2007-08-20 20:27 [patch 0/4] Linux Kernel Markers Mathieu Desnoyers
2007-08-20 20:27 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers
2007-08-21  0:43   ` Rusty Russell
2007-08-24 16:26     ` Mathieu Desnoyers
2007-08-25 20:49       ` Rusty Russell
2007-08-25 21:26         ` Mathieu Desnoyers
2007-08-12 15:10 [patch 0/4] Linux Kernel Markers Mathieu Desnoyers
2007-08-12 15:10 ` [patch 1/4] Linux Kernel Markers - Architecture Independent Code Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).