LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [PATCH][RFC] trace: profile likely and unlikely annotations [not found] ` <20081028001340.GB9797@mit.edu> @ 2008-10-28 4:12 ` Steven Rostedt 2008-10-28 4:23 ` Arjan van de Ven ` (2 more replies) 0 siblings, 3 replies; 17+ messages in thread From: Steven Rostedt @ 2008-10-28 4:12 UTC (permalink / raw) To: LKML Cc: Mike Snitzer, Theodore Tso, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Arjan van de Ven, Frederic Weisbecker Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | \ awk '{ if ((($1 + 1.0)/ ($2 + 1.0)) > 0.5) { print $0; }}' |head -15 Hit Missed Function --- ------ -------- 36278 44749 __switch_to+0x129/0x55d 70064 10964 __switch_to+0xcd/0x55d 70060 10964 __switch_to+0x63/0x55d 2628 0 syscall_trace_leave+0x0/0x154 1 0 native_smp_prepare_cpus+0x3e9/0x485 287349 98809 place_entity+0xac/0x11c 141308 236748 calc_delta_mine+0x8b/0x109 55091 0 enqueue_task+0x0/0x9b 71724 52117 pick_next_task_fair+0x0/0xc1 11 5 yield_task_fair+0x0/0x13b 1269 0 pre_schedule_rt+0x0/0x98 46392 1265 pick_next_task_rt+0x0/0x12f 51523 0 dequeue_task+0x57/0x114 # cat /debug/tracing/profile_likely | \ awk '{ if ((($2 + 1.0)/ ($1 + 1.0)) > 0.5) { print $0; }}' Hit Missed Function --- ------ -------- 43316 37700 schedule+0x6e7/0xdb2 23032 12997 schedule+0xa9b/0xdb2 0 7 __cancel_work_timer+0x4b/0x292 885 1683 update_wall_time+0x27b/0x4fd 0 369948 audit_syscall_exit+0x131/0x403 19 370851 audit_free+0x0/0x27e 0 368981 audit_syscall_entry+0x1a4/0x226 19 900 audit_free+0xf0/0x27e 0 973 audit_alloc+0xa6/0x13c 0 6 move_masked_irq+0x2b/0x174 12878 223023 page_evictable+0x9a/0x1f1 9 7 flush_cpu_slab+0x0/0x143 634634 324186 kmem_cache_free+0x79/0x139 3142 61979 dentry_lru_del_init+0x0/0x89 948 1166 load_elf_binary+0xa00/0x18ad 102 82 disk_part_iter_next+0x0/0xf1 0 11 tcp_mss_split_point+0x0/0xbd As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 60 hits that were more than 50%. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to be marked as unlikely_notrace(). Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- arch/x86/kernel/vsyscall_64.c | 5 - include/asm-generic/vmlinux.lds.h | 14 +++ include/linux/compiler.h | 38 ++++++++ include/linux/seqlock.h | 3 kernel/trace/Kconfig | 16 +++ kernel/trace/Makefile | 2 kernel/trace/trace_unlikely.c | 163 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 237 insertions(+), 4 deletions(-) Index: linux-tip.git/kernel/trace/Makefile =================================================================== --- linux-tip.git.orig/kernel/trace/Makefile 2008-10-27 23:11:52.000000000 -0400 +++ linux-tip.git/kernel/trace/Makefile 2008-10-27 23:12:08.000000000 -0400 @@ -26,4 +26,6 @@ obj-$(CONFIG_FUNCTION_PROFILER) += trace obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o +obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o + libftrace-y := ftrace.o Index: linux-tip.git/arch/x86/kernel/vsyscall_64.c =================================================================== --- linux-tip.git.orig/arch/x86/kernel/vsyscall_64.c 2008-10-27 23:11:52.000000000 -0400 +++ linux-tip.git/arch/x86/kernel/vsyscall_64.c 2008-10-27 23:12:44.000000000 -0400 @@ -124,7 +124,8 @@ static __always_inline void do_vgettimeo seq = read_seqbegin(&__vsyscall_gtod_data.lock); vread = __vsyscall_gtod_data.clock.vread; - if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { + if (unlikely_notrace(!__vsyscall_gtod_data.sysctl_enabled || + !vread)) { gettimeofday(tv,NULL); return; } @@ -165,7 +166,7 @@ time_t __vsyscall(1) vtime(time_t *t) { struct timeval tv; time_t result; - if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) + if (unlikely_notrace(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); vgettimeofday(&tv, NULL); Index: linux-tip.git/include/asm-generic/vmlinux.lds.h =================================================================== --- linux-tip.git.orig/include/asm-generic/vmlinux.lds.h 2008-10-27 23:11:52.000000000 -0400 +++ linux-tip.git/include/asm-generic/vmlinux.lds.h 2008-10-27 23:12:08.000000000 -0400 @@ -45,6 +45,17 @@ #define MCOUNT_REC() #endif +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ + *(_ftrace_likely) \ + VMLINUX_SYMBOL(__stop_likely_profile) = .; \ + VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ + *(_ftrace_unlikely) \ + VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#else +#define LIKELY_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -62,7 +73,8 @@ VMLINUX_SYMBOL(__stop___markers) = .; \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; + VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + LIKELY_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ Index: linux-tip.git/include/linux/compiler.h =================================================================== --- linux-tip.git.orig/include/linux/compiler.h 2008-10-27 23:11:52.000000000 -0400 +++ linux-tip.git/include/linux/compiler.h 2008-10-28 00:00:50.000000000 -0400 @@ -59,8 +59,46 @@ extern void __chk_io_ptr(const volatile * specific implementations come from the above header files */ +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +struct ftrace_likely_data { + unsigned long ip; + unsigned long hit; + unsigned long missed; +}; +void ftrace_likely_update(struct ftrace_likely_data *f, int val); + +#define likely_notrace(x) __builtin_expect(!!(x), 1) +#define unlikely_notrace(x) __builtin_expect(!!(x), 0) + +/* Can't use _THIS_IP_ because it screws up header dependencies */ +#define __THIS_IP__ ({ __label__ __here; __here: (unsigned long)&&__here; }) + +#define likely(x) ({ \ + int ______r; \ + static struct ftrace_likely_data ______f \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_likely"))); \ + if (unlikely_notrace(!______f.ip)) \ + ______f.ip = __THIS_IP__; \ + ______r = likely_notrace(x); \ + ftrace_likely_update(&______f, ______r); \ + ______r; \ + }) +#define unlikely(x) ({ \ + int ______r; \ + static struct ftrace_likely_data ______f \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_unlikely"))); \ + if (unlikely_notrace(!______f.ip)) \ + ______f.ip = __THIS_IP__; \ + ______r = unlikely_notrace(x); \ + ftrace_likely_update(&______f, ______r); \ + ______r; \ + }) +#else #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier Index: linux-tip.git/kernel/trace/Kconfig =================================================================== --- linux-tip.git.orig/kernel/trace/Kconfig 2008-10-27 23:11:52.000000000 -0400 +++ linux-tip.git/kernel/trace/Kconfig 2008-10-27 23:12:08.000000000 -0400 @@ -138,6 +138,22 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) +config TRACE_UNLIKELY_PROFILE + bool "Trace likely/unlikely profiler" + depends on DEBUG_KERNEL + select TRACING + help + This tracer profiles all the the likely and unlikely macros + in the kernel. It will display the results in: + + /debugfs/tracing/profile_likely + /debugfs/tracing/profile_unlikely + + Note: this will add a significant overhead, only turn this + on if you need to profile the system's use of these macros. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER Index: linux-tip.git/kernel/trace/trace_unlikely.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-tip.git/kernel/trace/trace_unlikely.c 2008-10-27 23:13:53.000000000 -0400 @@ -0,0 +1,163 @@ +/* + * unlikely profiler + * + * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> + */ +#include <linux/kallsyms.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/ftrace.h> +#include <linux/hash.h> +#include <linux/fs.h> +#include <asm/local.h> +#include "trace.h" + +void ftrace_likely_update(struct ftrace_likely_data *f, int val) +{ + /* FIXME: Make this atomic! */ + if (val) + f->hit++; + else + f->missed++; +} +EXPORT_SYMBOL(ftrace_likely_update); + +struct ftrace_pointer { + void *start; + void *stop; +}; + +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_pointer *f = m->private; + struct ftrace_likely_data *p = v; + + (*pos)++; + + if (v == (void *)1) + return f->start; + + ++p; + + if ((void *)p >= (void *)f->stop) + return NULL; + + return p; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + void *t = (void *)1; + loff_t l = 0; + + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int trace_lookup_sym(struct seq_file *m, unsigned long addr) +{ +#ifdef CONFIG_KALLSYMS + char str[KSYM_SYMBOL_LEN]; + + sprint_symbol(str, addr); + + return seq_printf(m, "%s\n", str); +#else + return seq_printf(m, "%p\n", (void *)addr); +#endif +} + +static int t_show(struct seq_file *m, void *v) +{ + struct ftrace_likely_data *p = v; + + if (v == (void *)1) { + seq_printf(m, " Hit Missed Function\n" + " --- ------ --------\n"); + return 0; + } + + /* skip null records */ + if (!p->ip) + return 0; + + seq_printf(m, " %8lu %8lu ", p->hit, p->missed); + trace_lookup_sym(m, p->ip); + return 0; +} + +static struct seq_operations tracing_likely_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int tracing_likely_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &tracing_likely_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = (void *)inode->i_private; + } + + return ret; +} + +static struct file_operations tracing_likely_fops = { + .open = tracing_likely_open, + .read = seq_read, + .llseek = seq_lseek, +}; + +extern unsigned long __start_likely_profile[]; +extern unsigned long __stop_likely_profile[]; +extern unsigned long __start_unlikely_profile[]; +extern unsigned long __stop_unlikely_profile[]; + +static struct ftrace_pointer ftrace_likely_pos = { + .start = __start_likely_profile, + .stop = __stop_likely_profile, +}; + +static struct ftrace_pointer ftrace_unlikely_pos = { + .start = __start_unlikely_profile, + .stop = __stop_unlikely_profile, +}; + +static __init int ftrace_unlikely_init(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("profile_likely", 0444, d_tracer, + &ftrace_likely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs 'profile_likely' entry\n"); + + entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, + &ftrace_unlikely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs" + " 'profile_unlikely' entry\n"); + + return 0; +} + +device_initcall(ftrace_unlikely_init); Index: linux-tip.git/include/linux/seqlock.h =================================================================== --- linux-tip.git.orig/include/linux/seqlock.h 2008-10-27 23:11:52.000000000 -0400 +++ linux-tip.git/include/linux/seqlock.h 2008-10-27 23:12:08.000000000 -0400 @@ -90,7 +90,8 @@ static __always_inline unsigned read_seq repeat: ret = sl->sequence; smp_rmb(); - if (unlikely(ret & 1)) { + /* Used in vsyscall, need notrace version */ + if (unlikely_notrace(ret & 1)) { cpu_relax(); goto repeat; } ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 4:12 ` [PATCH][RFC] trace: profile likely and unlikely annotations Steven Rostedt @ 2008-10-28 4:23 ` Arjan van de Ven 2008-10-28 4:39 ` Andrew Morton 2008-10-28 14:37 ` Theodore Tso 2 siblings, 0 replies; 17+ messages in thread From: Arjan van de Ven @ 2008-10-28 4:23 UTC (permalink / raw) To: Steven Rostedt Cc: LKML, Mike Snitzer, Theodore Tso, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker On Tue, 28 Oct 2008 00:12:48 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote: > > Andrew Morton recently suggested having an in-kernel way to profile > likely and unlikely macros. This patch achieves that goal. alternatively we remove likely/unlikely ;) or maybe only ban it from drivers or something. -- Arjan van de Ven Intel Open Source Technology Centre For development, discussion and tips for power savings, visit http://www.lesswatts.org ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 4:12 ` [PATCH][RFC] trace: profile likely and unlikely annotations Steven Rostedt 2008-10-28 4:23 ` Arjan van de Ven @ 2008-10-28 4:39 ` Andrew Morton 2008-10-28 14:37 ` Theodore Tso 2 siblings, 0 replies; 17+ messages in thread From: Andrew Morton @ 2008-10-28 4:39 UTC (permalink / raw) To: Steven Rostedt Cc: LKML, Mike Snitzer, Theodore Tso, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Arjan van de Ven, Frederic Weisbecker On Tue, 28 Oct 2008 00:12:48 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote: > (*) Not ever unlikely is recorded, those that are used by vsyscalls > (a few of them) had to be marked as unlikely_notrace(). Yes, it took continuous maintenance to weed this stuff out of profile-likely-unlikely-macros.patch. I'd suggest that you take a peek at this patch and make sure that you've addressed all the sites which had to be fixed up while maintaining this one. (I've been maintaining this for 2-1/2 years. Sniff) From: Daniel Walker <dwalker@mvista.com> -ENOCHANGELOG! Creates /proc/likely_prof. [randy.dunlap@oracle.com: profile_likely: export do_check_likely] [akpm@linux-foundation.org: fixes and general maintenance] [dwalker@mvista.com: likely_prof changed to use proc_create] [12o3l@tiscali.nl: likeliness accounting change and cleanup] [adobriyan@gmail.com: proc: remove proc_root from drivers] [12o3l@tiscali.nl: update to test_and_set_bit_lock / clear_bit_unlock] [dwalker@mvista.com: likely-profiling: disable ftrace] Signed-off-by: Daniel Walker <dwalker@mvista.com> Signed-off-by: Hua Zhong <hzhong@gmail.com> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- arch/x86/kernel/vsyscall_64.c | 2 arch/x86/vdso/vclock_gettime.c | 2 include/linux/compiler.h | 36 +++++++ lib/Kconfig.debug | 8 + lib/Makefile | 5 + lib/likely_prof.c | 150 +++++++++++++++++++++++++++++++ 6 files changed, 203 insertions(+) diff -puN arch/x86/kernel/vsyscall_64.c~profile-likely-unlikely-macros arch/x86/kernel/vsyscall_64.c --- a/arch/x86/kernel/vsyscall_64.c~profile-likely-unlikely-macros +++ a/arch/x86/kernel/vsyscall_64.c @@ -17,6 +17,8 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +#define SUPPRESS_LIKELY_PROFILING + #include <linux/time.h> #include <linux/init.h> #include <linux/kernel.h> diff -puN arch/x86/vdso/vclock_gettime.c~profile-likely-unlikely-macros arch/x86/vdso/vclock_gettime.c --- a/arch/x86/vdso/vclock_gettime.c~profile-likely-unlikely-macros +++ a/arch/x86/vdso/vclock_gettime.c @@ -9,6 +9,8 @@ * Also alternative() doesn't work. */ +#define SUPPRESS_LIKELY_PROFILING + #include <linux/kernel.h> #include <linux/posix-timers.h> #include <linux/time.h> diff -puN include/linux/compiler.h~profile-likely-unlikely-macros include/linux/compiler.h --- a/include/linux/compiler.h~profile-likely-unlikely-macros +++ a/include/linux/compiler.h @@ -53,6 +53,41 @@ extern void __chk_io_ptr(const volatile # include <linux/compiler-intel.h> #endif +#if defined(CONFIG_PROFILE_LIKELY) && !defined(SUPPRESS_LIKELY_PROFILING) && \ + !(defined(CONFIG_MODULE_UNLOAD) && defined(MODULE)) +struct likeliness { + char *const file; + unsigned long caller; + unsigned int count[2]; + struct likeliness *next; + unsigned int label; +}; + +extern int do_check_likely(struct likeliness *likeliness, unsigned int exp); + +#define LP_IS_EXPECTED 1 +#define LP_UNSEEN 4 +#define LP_LINE_SHIFT 3 + +#define __check_likely(exp, is_likely) \ + ({ \ + static struct likeliness likeliness = { \ + .file = __FILE__, \ + .label = __LINE__ << LP_LINE_SHIFT | \ + LP_UNSEEN | is_likely, \ + }; \ + do_check_likely(&likeliness, !!(exp)); \ + }) + +/* + * We check for constant values with __builtin_constant_p() since + * it's not interesting to profile them, and there is a compiler + * bug in gcc 3.x which blows up during constant evalution when + * CONFIG_PROFILE_LIKELY is turned on. + */ +#define likely(x) (__builtin_constant_p(x) ? (!!(x)) : __check_likely((x), 1)) +#define unlikely(x) (__builtin_constant_p(x) ? (!!(x)) : __check_likely((x), 0)) +#else /* * Generic compiler-dependent macros required for kernel * build go below this comment. Actual compiler/compiler version @@ -61,6 +96,7 @@ extern void __chk_io_ptr(const volatile #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier diff -puN lib/Kconfig.debug~profile-likely-unlikely-macros lib/Kconfig.debug --- a/lib/Kconfig.debug~profile-likely-unlikely-macros +++ a/lib/Kconfig.debug @@ -568,6 +568,14 @@ config DEBUG_SYNCHRO_TEST See Documentation/synchro-test.txt. +config PROFILE_LIKELY + bool "Record return values from likely/unlikely macros" + default n + help + Adds profiling on likely/unlikly macros . To see the + results of the profiling you can view the following, + /proc/likely_prof + config BOOT_PRINTK_DELAY bool "Delay each boot printk message by N milliseconds" depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY diff -puN lib/Makefile~profile-likely-unlikely-macros lib/Makefile --- a/lib/Makefile~profile-likely-unlikely-macros +++ a/lib/Makefile @@ -22,6 +22,9 @@ obj-y += bcd.o div64.o sort.o parser.o h bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ string_helpers.o +# likely profiling can cause recursion in ftrace, so don't trace it. +CFLAGS_REMOVE_likely_prof.o = -pg + ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject_uevent.o += -DDEBUG @@ -82,6 +85,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += sys obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_PROFILE_LIKELY) += likely_prof.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff -puN /dev/null lib/likely_prof.c --- /dev/null +++ a/lib/likely_prof.c @@ -0,0 +1,150 @@ +/* + * This code should enable profiling the likely and unlikely macros. + * + * Output goes in /proc/likely_prof + * + * Authors: + * Daniel Walker <dwalker@mvista.com> + * Hua Zhong <hzhong@gmail.com> + * Andrew Morton <akpm@osdl.org> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/kallsyms.h> + +#include <asm/bug.h> +#include <asm/atomic.h> + +static struct likeliness *likeliness_head; + +int do_check_likely(struct likeliness *likeliness, unsigned int ret) +{ + static unsigned long likely_lock; + + likeliness->count[ret]++; + + if (likeliness->label & LP_UNSEEN) { + /* + * We don't simply use a spinlock because internally to the + * spinlock there is a call to unlikely which causes recursion. + * We opted for this method because we didn't need a preempt/irq + * disable and it was a bit cleaner then using internal __raw + * spinlock calls. + */ + if (!test_and_set_bit_lock(0, &likely_lock)) { + if (likeliness->label & LP_UNSEEN) { + likeliness->label &= (~LP_UNSEEN); + likeliness->next = likeliness_head; + likeliness_head = likeliness; + likeliness->caller = (unsigned long) + __builtin_return_address(0); + } + clear_bit_unlock(0, &likely_lock); + } + } + + return ret; +} +EXPORT_SYMBOL(do_check_likely); + +static void * lp_seq_start(struct seq_file *out, loff_t *pos) +{ + + if (!*pos) { + + seq_printf(out, "Likely Profiling Results\n"); + seq_printf(out, " --------------------------------------------" + "------------------------\n"); + seq_printf(out, "[+- ]Type | # True | # False | Function@" + "Filename:Line\n"); + + out->private = likeliness_head; + } + + return out->private; +} + +static void *lp_seq_next(struct seq_file *out, void *p, loff_t *pos) +{ + struct likeliness *entry = p; + + if (entry->next) { + ++(*pos); + out->private = entry->next; + } else + out->private = NULL; + + return out->private; +} + +static int lp_seq_show(struct seq_file *out, void *p) +{ + struct likeliness *entry = p; + unsigned int pos = entry->count[1]; + unsigned int neg = entry->count[0]; + char function[KSYM_SYMBOL_LEN]; + + /* + * Balanced if the suggestion was false in less than 5% of the tests + */ + if (!(entry->label & LP_IS_EXPECTED)) { + if (pos + neg < 20 * pos) + seq_printf(out, "+"); + else + seq_printf(out, " "); + + seq_printf(out, "unlikely "); + } else { + if (pos + neg < 20 * neg) + seq_printf(out, "-"); + else + seq_printf(out, " "); + + seq_printf(out, "likely "); + } + + sprint_symbol(function, entry->caller); + seq_printf(out, "|%9u|%9u|\t%s@%s:%u\n", pos, neg, function, + entry->file, entry->label >> LP_LINE_SHIFT); + + return 0; +} + +static void lp_seq_stop(struct seq_file *m, void *p) +{ +} + +static struct seq_operations likely_profiling_ops = { + .start = lp_seq_start, + .next = lp_seq_next, + .stop = lp_seq_stop, + .show = lp_seq_show +}; + +static int lp_results_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &likely_profiling_ops); +} + +static struct file_operations proc_likely_operations = { + .open = lp_results_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init init_likely(void) +{ + struct proc_dir_entry *entry = + proc_create("likely_prof", 0, NULL, &proc_likely_operations); + if (!entry) + return 1; + + return 0; +} +__initcall(init_likely); _ ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 4:12 ` [PATCH][RFC] trace: profile likely and unlikely annotations Steven Rostedt 2008-10-28 4:23 ` Arjan van de Ven 2008-10-28 4:39 ` Andrew Morton @ 2008-10-28 14:37 ` Theodore Tso 2008-10-28 14:48 ` Arjan van de Ven 2008-10-28 14:49 ` [PATCH][RFC] " Steven Rostedt 2 siblings, 2 replies; 17+ messages in thread From: Theodore Tso @ 2008-10-28 14:37 UTC (permalink / raw) To: Steven Rostedt Cc: LKML, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Arjan van de Ven, Frederic Weisbecker On Tue, Oct 28, 2008 at 12:12:48AM -0400, Steven Rostedt wrote: > > Andrew Morton recently suggested having an in-kernel way to profile > likely and unlikely macros. This patch achieves that goal. Maybe I'm confused, but when I read through the patch, it looks like that 'hit' is incremented whenever the condition is true, and 'missed' is incremented whenever the condition is false, correct? Is that what you intended? So for profile_unlikely, "missed" is good, and "hit" is bad, and for profile_likely, "hit" is good, and "missed" is bad. That seems horribly confusing. If that wasn't what you intended, the meaning of "hit" and "missed" seems to be highly confusing, either way. Can we perhaps use some other terminology? Simply using "True" and "False" would be better, since there's no possible confusion what the labels mean. > +#define unlikely(x) ({ \ > + int ______r; \ > + static struct ftrace_likely_data ______f \ > + __attribute__((__aligned__(4))) \ > + __attribute__((section("_ftrace_unlikely"))); \ > + if (unlikely_notrace(!______f.ip)) \ > + ______f.ip = __THIS_IP__; \ > + ______r = unlikely_notrace(x); \ > + ftrace_likely_update(&______f, ______r); \ > + ______r; \ > + }) Note that unlikely(x) calls ftrace_likely_update(), which does this: > +void ftrace_likely_update(struct ftrace_likely_data *f, int val) > +{ > + /* FIXME: Make this atomic! */ > + if (val) > + f->hit++; > + else > + f->missed++; > +} > +EXPORT_SYMBOL(ftrace_likely_update); So that seems to mean that if unlikely(x) is false, then _____r is 0, which means we increment f->missed. Or am I missing something? I would have thought that if unlikely(x) is false, that's *good*, since it means the unlikely label was correct. And normally, when people think about cache hits vs cache misses, hits are good and misses are bad. Which is why I think the terminology is highly confusing... - Ted ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 14:37 ` Theodore Tso @ 2008-10-28 14:48 ` Arjan van de Ven 2008-10-28 14:51 ` Steven Rostedt 2008-10-28 14:49 ` [PATCH][RFC] " Steven Rostedt 1 sibling, 1 reply; 17+ messages in thread From: Arjan van de Ven @ 2008-10-28 14:48 UTC (permalink / raw) To: Theodore Tso Cc: Steven Rostedt, LKML, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker On Tue, 28 Oct 2008 10:37:20 -0400 Theodore Tso <tytso@mit.edu> wrote: > On Tue, Oct 28, 2008 at 12:12:48AM -0400, Steven Rostedt wrote: > > > > Andrew Morton recently suggested having an in-kernel way to profile > > likely and unlikely macros. This patch achieves that goal. > > Maybe I'm confused, but when I read through the patch, it looks like > that 'hit' is incremented whenever the condition is true, and 'missed' > is incremented whenever the condition is false, correct? > > Is that what you intended? So for profile_unlikely, "missed" is good, > and "hit" is bad, and for profile_likely, "hit" is good, and "missed" > is bad. That seems horribly confusing. > > If that wasn't what you intended, the meaning of "hit" and "missed" > seems to be highly confusing, either way. Can we perhaps use some > other terminology? Simply using "True" and "False" would be better, > since there's no possible confusion what the labels mean. or "correct" and "incorrect" -- Arjan van de Ven Intel Open Source Technology Centre For development, discussion and tips for power savings, visit http://www.lesswatts.org ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 14:48 ` Arjan van de Ven @ 2008-10-28 14:51 ` Steven Rostedt 2008-10-29 16:35 ` [PATCH 1/2 v2][RFC] " Steven Rostedt 0 siblings, 1 reply; 17+ messages in thread From: Steven Rostedt @ 2008-10-28 14:51 UTC (permalink / raw) To: Arjan van de Ven Cc: Theodore Tso, LKML, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker On Tue, 28 Oct 2008, Arjan van de Ven wrote: > On Tue, 28 Oct 2008 10:37:20 -0400 > Theodore Tso <tytso@mit.edu> wrote: > > > On Tue, Oct 28, 2008 at 12:12:48AM -0400, Steven Rostedt wrote: > > > > > > Andrew Morton recently suggested having an in-kernel way to profile > > > likely and unlikely macros. This patch achieves that goal. > > > > Maybe I'm confused, but when I read through the patch, it looks like > > that 'hit' is incremented whenever the condition is true, and 'missed' > > is incremented whenever the condition is false, correct? > > > > Is that what you intended? So for profile_unlikely, "missed" is good, > > and "hit" is bad, and for profile_likely, "hit" is good, and "missed" > > is bad. That seems horribly confusing. > > > > If that wasn't what you intended, the meaning of "hit" and "missed" > > seems to be highly confusing, either way. Can we perhaps use some > > other terminology? Simply using "True" and "False" would be better, > > since there's no possible confusion what the labels mean. > > or "correct" and "incorrect" This means that the code will need to be different for both. Or at least a way to differentiate between the two. Not that hard, but I wanted to make the code as trivial as possible. -- Steve ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 1/2 v2][RFC] trace: profile likely and unlikely annotations 2008-10-28 14:51 ` Steven Rostedt @ 2008-10-29 16:35 ` Steven Rostedt 2008-10-29 16:38 ` [PATCH 2/2 v2][RFC] ftrace: unlikely annotation tracer Steven Rostedt ` (2 more replies) 0 siblings, 3 replies; 17+ messages in thread From: Steven Rostedt @ 2008-10-29 16:35 UTC (permalink / raw) To: LKML Cc: Arjan van de Ven, Theodore Tso, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | \ awk '{if (($1 || $2) && (($2+0.1)/($1+0.1) > 0.25)) { print $0; }}' | \ head -20 correct incorrect Function File Line ------- --------- -------- ---- ---- 46894 38216 __switch_to process_64.c 624 11951 73138 __switch_to process_64.c 594 11951 73097 __switch_to process_64.c 590 117873 35049 need_resched sched.h 2103 1345 711 syscall_get_error syscall.h 51 0 2510 syscall_trace_leave ptrace.c 1568 0 1 native_smp_prepare_cpus smpboot.c 1231 91635 276125 calc_delta_fair sched_fair.c 411 219028 113449 calc_delta_mine sched.c 1279 0 58048 sched_info_queued sched_stats.h 222 55449 71591 pick_next_task_fair sched_fair.c 1394 10 5 yield_task_fair sched_fair.c 983 7117 2553 rt_policy sched.c 144 0 1220 pre_schedule_rt sched_rt.c 1260 1216 52402 pick_next_task_rt sched_rt.c 884 0 48009 sched_info_dequeued sched_stats.h 177 0 14 sched_move_task sched.c 8707 0 14 sched_move_task sched.c 8697 # cat /debug/tracing/profile_likely | \ awk '{if (($1 || $2) && (($2+0.1)/($1+0.1) > 0.25)) { print $0; }}' correct incorrect Function File Line ------- --------- -------- ---- ---- 42987 40240 pick_next_task sched.c 4407 21419 18103 switch_mm mmu_context_64.h 18 0 7 __cancel_work_timer workqueue.c 560 861 3960 clocksource_adjust timekeeping.c 456 390 1913 clocksource_adjust timekeeping.c 450 0 363683 audit_syscall_exit auditsc.c 1570 30 364573 audit_get_context auditsc.c 732 0 362744 audit_syscall_entry auditsc.c 1541 30 879 audit_free auditsc.c 1446 0 952 audit_alloc auditsc.c 862 2606 952 audit_alloc auditsc.c 858 0 6 move_masked_irq migration.c 20 4465 2091 __grab_cache_page filemap.c 2212 9495 2787 iov_iter_advance filemap.c 1884 3121 899 __set_page_dirty page-writeback.c 1134 12874 221371 mapping_unevictable pagemap.h 50 4 10 __flush_cpu_slab slub.c 1466 635239 320760 slab_free slub.c 1731 3105 61055 dentry_lru_del_init dcache.c 153 942 1156 load_elf_binary binfmt_elf.c 904 102 82 disk_put_part genhd.h 206 0 1 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 76 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- arch/x86/kernel/vsyscall_64.c | 8 + include/asm-generic/vmlinux.lds.h | 14 +++ include/linux/compiler.h | 61 ++++++++++++++ kernel/trace/Kconfig | 16 +++ kernel/trace/Makefile | 2 kernel/trace/trace_unlikely.c | 157 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 255 insertions(+), 3 deletions(-) Index: linux-tip.git/kernel/trace/Makefile =================================================================== --- linux-tip.git.orig/kernel/trace/Makefile 2008-10-29 11:30:05.000000000 -0400 +++ linux-tip.git/kernel/trace/Makefile 2008-10-29 12:32:40.000000000 -0400 @@ -25,4 +25,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stac obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o +obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o + libftrace-y := ftrace.o Index: linux-tip.git/arch/x86/kernel/vsyscall_64.c =================================================================== --- linux-tip.git.orig/arch/x86/kernel/vsyscall_64.c 2008-10-29 11:30:05.000000000 -0400 +++ linux-tip.git/arch/x86/kernel/vsyscall_64.c 2008-10-29 11:30:12.000000000 -0400 @@ -17,6 +17,14 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +/* Protect userspace from profiling */ +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +# undef likely +# undef unlikely +# define likely(x) likely_notrace(x) +# define unlikely(x) unlikely_notrace(x) +#endif + #include <linux/time.h> #include <linux/init.h> #include <linux/kernel.h> Index: linux-tip.git/include/asm-generic/vmlinux.lds.h =================================================================== --- linux-tip.git.orig/include/asm-generic/vmlinux.lds.h 2008-10-29 11:30:06.000000000 -0400 +++ linux-tip.git/include/asm-generic/vmlinux.lds.h 2008-10-29 11:30:12.000000000 -0400 @@ -45,6 +45,17 @@ #define MCOUNT_REC() #endif +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ + *(_ftrace_likely) \ + VMLINUX_SYMBOL(__stop_likely_profile) = .; \ + VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ + *(_ftrace_unlikely) \ + VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#else +#define LIKELY_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -62,7 +73,8 @@ VMLINUX_SYMBOL(__stop___markers) = .; \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; + VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + LIKELY_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ Index: linux-tip.git/include/linux/compiler.h =================================================================== --- linux-tip.git.orig/include/linux/compiler.h 2008-10-29 11:30:06.000000000 -0400 +++ linux-tip.git/include/linux/compiler.h 2008-10-29 11:32:05.000000000 -0400 @@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile * specific implementations come from the above header files */ -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +struct ftrace_likely_data { + const char *func; + const char *file; + unsigned line; + unsigned long correct; + unsigned long incorrect; +}; +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +#define likely_notrace(x) __builtin_expect(!!(x), 1) +#define unlikely_notrace(x) __builtin_expect(!!(x), 0) + +#define likely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_likely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = likely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 1); \ + ______r; \ + }) +#define unlikely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_unlikely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = unlikely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 0); \ + ______r; \ + }) + +/* + * Using __builtin_constant_p(x) to ignore cases where the return + * value is always the same. This idea is taken from a similar patch + * written by Daniel Walker. + */ +# ifndef likely +# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# endif +#else +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier Index: linux-tip.git/kernel/trace/Kconfig =================================================================== --- linux-tip.git.orig/kernel/trace/Kconfig 2008-10-29 11:30:10.000000000 -0400 +++ linux-tip.git/kernel/trace/Kconfig 2008-10-29 12:32:40.000000000 -0400 @@ -138,6 +138,22 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) +config TRACE_UNLIKELY_PROFILE + bool "Trace likely/unlikely profiler" + depends on DEBUG_KERNEL + select TRACING + help + This tracer profiles all the the likely and unlikely macros + in the kernel. It will display the results in: + + /debugfs/tracing/profile_likely + /debugfs/tracing/profile_unlikely + + Note: this will add a significant overhead, only turn this + on if you need to profile the system's use of these macros. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER Index: linux-tip.git/kernel/trace/trace_unlikely.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-tip.git/kernel/trace/trace_unlikely.c 2008-10-29 12:32:40.000000000 -0400 @@ -0,0 +1,157 @@ +/* + * unlikely profiler + * + * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> + */ +#include <linux/kallsyms.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/ftrace.h> +#include <linux/hash.h> +#include <linux/fs.h> +#include <asm/local.h> +#include "trace.h" + +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) +{ + /* FIXME: Make this atomic! */ + if (val == expect) + f->correct++; + else + f->incorrect++; +} +EXPORT_SYMBOL(ftrace_likely_update); + +struct ftrace_pointer { + void *start; + void *stop; +}; + +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_pointer *f = m->private; + struct ftrace_likely_data *p = v; + + (*pos)++; + + if (v == (void *)1) + return f->start; + + ++p; + + if ((void *)p >= (void *)f->stop) + return NULL; + + return p; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + void *t = (void *)1; + loff_t l = 0; + + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int t_show(struct seq_file *m, void *v) +{ + struct ftrace_likely_data *p = v; + const char *f; + + if (v == (void *)1) { + seq_printf(m, " correct incorrect" + " Function " + " File Line\n" + " ------- ---------" + " -------- " + " ---- ----\n"); + return 0; + } + + /* Only print the file, not the path */ + f = p->file + strlen(p->file); + while (f >= p->file && *f != '/') + f--; + f++; + + seq_printf(m, " %8lu %8lu ", p->correct, p->incorrect); + seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); + return 0; +} + +static struct seq_operations tracing_likely_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int tracing_likely_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &tracing_likely_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = (void *)inode->i_private; + } + + return ret; +} + +static struct file_operations tracing_likely_fops = { + .open = tracing_likely_open, + .read = seq_read, + .llseek = seq_lseek, +}; + +extern unsigned long __start_likely_profile[]; +extern unsigned long __stop_likely_profile[]; +extern unsigned long __start_unlikely_profile[]; +extern unsigned long __stop_unlikely_profile[]; + +static struct ftrace_pointer ftrace_likely_pos = { + .start = __start_likely_profile, + .stop = __stop_likely_profile, +}; + +static struct ftrace_pointer ftrace_unlikely_pos = { + .start = __start_unlikely_profile, + .stop = __stop_unlikely_profile, +}; + +static __init int ftrace_unlikely_init(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("profile_likely", 0444, d_tracer, + &ftrace_likely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs 'profile_likely' entry\n"); + + entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, + &ftrace_unlikely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs" + " 'profile_unlikely' entry\n"); + + return 0; +} + +device_initcall(ftrace_unlikely_init); ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2/2 v2][RFC] ftrace: unlikely annotation tracer 2008-10-29 16:35 ` [PATCH 1/2 v2][RFC] " Steven Rostedt @ 2008-10-29 16:38 ` Steven Rostedt 2008-10-29 16:40 ` [PATCH 1/2 v2][RFC] trace: profile likely and unlikely annotations Arjan van de Ven 2008-10-29 22:39 ` [PATCH 1/2 v3][RFC] " Steven Rostedt 2 siblings, 0 replies; 17+ messages in thread From: Steven Rostedt @ 2008-10-29 16:38 UTC (permalink / raw) To: LKML Cc: Arjan van de Ven, Theodore Tso, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker This patch adds a way to record the instances of the likely and unlikely condition annotations. When "unlikely" is set in /debugfs/tracing/iter_ctrl the unlikely conditions will be added to any of the ftrace tracers. For example: irqbalance-2662 [002] 41127.102858: [correct] debug_mutex_free_waiter 51 <idle>-0 [001] 41127.102858: [correct] add_preempt_count 4307 irqbalance-2662 [002] 41127.102859: [INCORRECT] debug_smp_processor_id 15 <idle>-0 [001] 41127.102860: [correct] add_preempt_count 4316 irqbalance-2662 [002] 41127.102861: [correct] slab_alloc 1608 Which shows the normal tracer heading, as well as whether the condition was correct "[correct]" or was mistaken "[INCORRECT]", followed by the function and line number. Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- kernel/Makefile | 5 + kernel/trace/Kconfig | 22 +++++++ kernel/trace/Makefile | 5 + kernel/trace/ring_buffer.c | 11 +++ kernel/trace/trace.c | 37 +++++++++++ kernel/trace/trace.h | 43 +++++++++++++ kernel/trace/trace_unlikely.c | 130 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 253 insertions(+) Index: linux-tip.git/kernel/trace/Kconfig =================================================================== --- linux-tip.git.orig/kernel/trace/Kconfig 2008-10-29 12:32:30.000000000 -0400 +++ linux-tip.git/kernel/trace/Kconfig 2008-10-29 12:32:35.000000000 -0400 @@ -154,6 +154,28 @@ config TRACE_UNLIKELY_PROFILE Say N if unsure. +config TRACING_UNLIKELY + bool + help + Selected by tracers that will trace the likely and unlikely + conditions. This prevents the tracers themselves from being + profiled. Profiling the tracing infrastructure can only happen + when the likelys and unlikelys are not being traced. + +config UNLIKELY_TRACER + bool "Trace likely/unlikely instances" + depends on TRACE_UNLIKELY_PROFILE + select TRACING_UNLIKELY + help + This is traces the events of likely and unlikely condition + calls in the kernel. The difference between this and the + "Trace likely/unlikely profiler" is that this is not a + histogram of the callers, but actually places the calling + events into a running buffer to see when and where the + events happened, as well as their results. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER Index: linux-tip.git/kernel/Makefile =================================================================== --- linux-tip.git.orig/kernel/Makefile 2008-10-29 12:32:30.000000000 -0400 +++ linux-tip.git/kernel/Makefile 2008-10-29 12:32:35.000000000 -0400 @@ -24,6 +24,11 @@ CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -mno-spe -pg endif +ifdef CONFIG_TRACING_UNLIKELY +# Do not profile tracepoint if we are tracing the unlikely conditions +CFLAGS_tracepoint.o += '-Dunlikely(x)=unlikely_notrace(x)' +endif + obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o Index: linux-tip.git/kernel/trace/Makefile =================================================================== --- linux-tip.git.orig/kernel/trace/Makefile 2008-10-29 12:32:30.000000000 -0400 +++ linux-tip.git/kernel/trace/Makefile 2008-10-29 12:32:35.000000000 -0400 @@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif +ifdef CONFIG_TRACING_UNLIKELY +KBUILD_CFLAGS += '-Dlikely(x)=likely_notrace(x)' +KBUILD_CFLAGS += '-Dunlikely(x)=unlikely_notrace(x)' +endif + obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o Index: linux-tip.git/kernel/trace/trace_unlikely.c =================================================================== --- linux-tip.git.orig/kernel/trace/trace_unlikely.c 2008-10-29 12:32:30.000000000 -0400 +++ linux-tip.git/kernel/trace/trace_unlikely.c 2008-10-29 12:32:35.000000000 -0400 @@ -15,8 +15,138 @@ #include <asm/local.h> #include "trace.h" +#ifdef CONFIG_UNLIKELY_TRACER + +static int unlikely_tracing_enabled __read_mostly; +static DEFINE_MUTEX(unlikely_tracing_mutex); +static DEFINE_PER_CPU(atomic_t, unlikely_trace_disabled); +static struct trace_array *unlikely_tracer; + +void trace_unlikely_stop(void) +{ + /* preemption should be disabled */ + int cpu = smp_processor_id(); + + atomic_inc(&per_cpu(unlikely_trace_disabled, cpu)); +} + +void trace_unlikely_start(void) +{ + /* preemption should be disabled */ + int cpu = smp_processor_id(); + + atomic_dec(&per_cpu(unlikely_trace_disabled, cpu)); +} + +static void +probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ + struct trace_array *tr = unlikely_tracer; + struct ring_buffer_event *event; + struct trace_unlikely *entry; + unsigned long flags, irq_flags; + int cpu, resched, pc; + + if (unlikely(!tr)) + return; + + pc = preempt_count(); + /* Can be called within schedule */ + resched = need_resched(); + preempt_disable_notrace(); + cpu = raw_smp_processor_id(); + if (atomic_inc_return(&per_cpu(unlikely_trace_disabled, cpu)) != 1) + goto out; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + + local_save_flags(flags); + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_UNLIKELY; + + /* + * I would love to save just the ftrace_likely_data pointer, but + * this code can also be used by modules. Ugly things can happen + * if the module is unloaded, and then we go and read the + * pointer. This is slower, but much safer. + */ + + strncpy(entry->func, f->func, TRACE_FUNC_SIZE); + strncpy(entry->file, f->file, TRACE_FILE_SIZE); + entry->func[TRACE_FUNC_SIZE] = 0; + entry->file[TRACE_FILE_SIZE] = 0; + entry->line = f->line; + entry->correct = val == expect; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + out: + atomic_dec(&per_cpu(unlikely_trace_disabled, cpu)); + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); +} + +static inline +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ + if (!unlikely_tracing_enabled) + return; + + probe_likely_condition(f, val, expect); +} + +int enable_unlikely_tracing(struct trace_array *tr) +{ + int ret = 0; + + mutex_lock(&unlikely_tracing_mutex); + unlikely_tracer = tr; + /* + * Must be seen before enabling. The reader is a condition + * where we do not need a matching rmb() + */ + smp_wmb(); + unlikely_tracing_enabled++; + mutex_unlock(&unlikely_tracing_mutex); + + return ret; +} + +void disable_unlikely_tracing(void) +{ + mutex_lock(&unlikely_tracing_mutex); + + if (!unlikely_tracing_enabled) + goto out_unlock; + + unlikely_tracing_enabled--; + + out_unlock: + mutex_unlock(&unlikely_tracing_mutex); +} +#else +static inline +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ +} +#endif /* CONFIG_UNLIKELY_TRACER */ + void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) { + /* + * I would love to have a trace point here instead, but the + * trace point code is so inundated with unlikely and likely + * conditions that the recursive nightmare that exists is too + * much to try to get working. At least for now. + */ + trace_likely_condition(f, val, expect); + /* FIXME: Make this atomic! */ if (val == expect) f->correct++; Index: linux-tip.git/kernel/trace/trace.c =================================================================== --- linux-tip.git.orig/kernel/trace/trace.c 2008-10-29 12:32:30.000000000 -0400 +++ linux-tip.git/kernel/trace/trace.c 2008-10-29 12:32:35.000000000 -0400 @@ -235,6 +235,9 @@ static const char *trace_options[] = { "stacktrace", "sched-tree", "ftrace_printk", +#ifdef CONFIG_UNLIKELY_TRACER + "unlikely", +#endif NULL }; @@ -1457,6 +1460,17 @@ print_lat_fmt(struct trace_iterator *ite trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %-30.30s %d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->line); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1593,6 +1607,17 @@ static enum print_line_t print_trace_fmt trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %-30.30s %d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->line); + break; + } } return TRACE_TYPE_HANDLED; } @@ -1675,6 +1700,10 @@ static enum print_line_t print_raw_fmt(s trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + /* IGNORE */ + break; + }; } return TRACE_TYPE_HANDLED; } @@ -2362,8 +2391,14 @@ tracing_ctrl_write(struct file *filp, co tr->ctrl = val; + if (!val) + trace_unlikely_disable(); + if (current_trace && current_trace->ctrl_update) current_trace->ctrl_update(tr); + + if (val) + trace_unlikely_enable(tr); } mutex_unlock(&trace_types_lock); @@ -2425,6 +2460,7 @@ tracing_set_trace_write(struct file *fil if (t == current_trace) goto out; + trace_unlikely_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); @@ -2432,6 +2468,7 @@ tracing_set_trace_write(struct file *fil if (t->init) t->init(tr); + trace_unlikely_enable(tr); out: mutex_unlock(&trace_types_lock); Index: linux-tip.git/kernel/trace/trace.h =================================================================== --- linux-tip.git.orig/kernel/trace/trace.h 2008-10-29 12:32:30.000000000 -0400 +++ linux-tip.git/kernel/trace/trace.h 2008-10-29 12:32:35.000000000 -0400 @@ -21,6 +21,7 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, + TRACE_UNLIKELY, TRACE_BOOT, __TRACE_LAST_TYPE @@ -117,6 +118,16 @@ struct trace_boot { struct boot_trace initcall; }; +#define TRACE_FUNC_SIZE 30 +#define TRACE_FILE_SIZE 20 +struct trace_unlikely { + struct trace_entry ent; + unsigned line; + char func[TRACE_FUNC_SIZE+1]; + char file[TRACE_FILE_SIZE+1]; + char correct; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -217,6 +228,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ + IF_ASSIGN(var, ent, struct trace_unlikely, TRACE_UNLIKELY); \ __ftrace_bad_type(); \ } while (0) @@ -413,8 +425,39 @@ enum trace_iterator_flags { TRACE_ITER_STACKTRACE = 0x100, TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, +#ifdef CONFIG_UNLIKELY_TRACER + TRACE_ITER_UNLIKELY = 0x800, +#endif }; extern struct tracer nop_trace; +#ifdef CONFIG_UNLIKELY_TRACER +extern int enable_unlikely_tracing(struct trace_array *tr); +extern void disable_unlikely_tracing(void); +static inline int trace_unlikely_enable(struct trace_array *tr) +{ + if (trace_flags & TRACE_ITER_UNLIKELY) + return enable_unlikely_tracing(tr); + return 0; +} +static inline void trace_unlikely_disable(void) +{ + /* due to races, always disable */ + disable_unlikely_tracing(); +} +extern void trace_unlikely_stop(void); +extern void trace_unlikely_start(void); +#else +static inline int trace_unlikely_enable(struct trace_array *tr) +{ + return 0; +} +static inline void trace_unlikely_disable(void) +{ +} +static inline void trace_unlikely_stop(void) { } +static inline void trace_unlikely_start(void) { } +#endif /* CONFIG_UNLIKELY_TRACER */ + #endif /* _LINUX_KERNEL_TRACE_H */ Index: linux-tip.git/kernel/trace/ring_buffer.c =================================================================== --- linux-tip.git.orig/kernel/trace/ring_buffer.c 2008-10-29 12:32:30.000000000 -0400 +++ linux-tip.git/kernel/trace/ring_buffer.c 2008-10-29 12:32:35.000000000 -0400 @@ -16,6 +16,8 @@ #include <linux/list.h> #include <linux/fs.h> +#include "trace.h" + /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -1139,7 +1141,14 @@ ring_buffer_lock_reserve(struct ring_buf if (length > BUF_PAGE_SIZE) goto out; + /* + * Ugly, but the unlikely tracing will enter back into + * the ring buffer code. Namely through the timestamp, causing + * a triggering of the "again" loop, that will be infinite. + */ + trace_unlikely_stop(); event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); + trace_unlikely_start(); if (!event) goto out; @@ -1251,8 +1260,10 @@ int ring_buffer_write(struct ring_buffer goto out; event_length = rb_calculate_event_length(length); + trace_unlikely_stop(); event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, event_length); + trace_unlikely_start(); if (!event) goto out; ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/2 v2][RFC] trace: profile likely and unlikely annotations 2008-10-29 16:35 ` [PATCH 1/2 v2][RFC] " Steven Rostedt 2008-10-29 16:38 ` [PATCH 2/2 v2][RFC] ftrace: unlikely annotation tracer Steven Rostedt @ 2008-10-29 16:40 ` Arjan van de Ven 2008-10-29 22:39 ` [PATCH 1/2 v3][RFC] " Steven Rostedt 2 siblings, 0 replies; 17+ messages in thread From: Arjan van de Ven @ 2008-10-29 16:40 UTC (permalink / raw) To: Steven Rostedt Cc: LKML, Theodore Tso, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker On Wed, 29 Oct 2008 12:35:44 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote: > > Andrew Morton recently suggested having an in-kernel way to profile > likely and unlikely macros. This patch achieves that goal. > > When configured, every(*) likely and unlikely macro gets a counter > attached to it. When the condition is hit, the hit and misses of that > condition are recorded. These numbers can later be retrieved by: > > /debugfs/tracing/profile_likely - All likely markers > /debugfs/tracing/profile_unlikely - All unlikely markers. > I like the general approach it'd be very convenient if the debugfs files also had a percentage-incorrect (or percentage-correct, either way) listed, so that we humans can focus straight on which ones are outliers rather than having to do math in our heads first... -- Arjan van de Ven Intel Open Source Technology Centre For development, discussion and tips for power savings, visit http://www.lesswatts.org ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations 2008-10-29 16:35 ` [PATCH 1/2 v2][RFC] " Steven Rostedt 2008-10-29 16:38 ` [PATCH 2/2 v2][RFC] ftrace: unlikely annotation tracer Steven Rostedt 2008-10-29 16:40 ` [PATCH 1/2 v2][RFC] trace: profile likely and unlikely annotations Arjan van de Ven @ 2008-10-29 22:39 ` Steven Rostedt 2008-10-29 22:40 ` [PATCH 2/2 v3][RFC] ftrace: unlikely annotation tracer Steven Rostedt 2008-10-30 14:32 ` [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations Jörn Engel 2 siblings, 2 replies; 17+ messages in thread From: Steven Rostedt @ 2008-10-29 22:39 UTC (permalink / raw) To: LKML Cc: Arjan van de Ven, Theodore Tso, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker, Linus Torvalds [ changes since v2: Added Arjan's recommendation about calculating a percentage of failures in the output. Question: Do people prefer the "func file line" notation or the kallsyms "func+0x<offset>" notation? ] Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | head correct incorrect % Function File Line ------- --------- - -------- ---- ---- 2494 0 0 do_arch_prctl process_64.c 832 0 0 0 do_arch_prctl process_64.c 804 2988 0 0 IS_ERR err.h 34 124668 6133 4 __switch_to process_64.c 673 130811 0 0 __switch_to process_64.c 639 69768 61064 87 __switch_to process_64.c 624 15557 115251 100 __switch_to process_64.c 594 15555 115227 100 __switch_to process_64.c 590 # cat /debug/tracing/profile_unlikely | \ awk '{ if ($3 > 25) print $0; }' |head -20 59851 49675 82 __switch_to process_64.c 624 12688 96800 100 __switch_to process_64.c 594 12688 96778 100 __switch_to process_64.c 590 152290 49467 32 need_resched sched.h 2103 1385 1034 74 syscall_get_error syscall.h 51 0 2967 100 syscall_trace_leave ptrace.c 1568 0 1 100 native_smp_prepare_cpus smpboot.c 1231 304770 92199 30 update_curr sched_fair.c 489 113195 297065 100 calc_delta_fair sched_fair.c 411 227018 123309 54 calc_delta_mine sched.c 1279 0 70299 100 sched_info_queued sched_stats.h 222 67434 100268 100 pick_next_task_fair sched_fair.c 1394 7 9 100 yield_task_fair sched_fair.c 983 7215 3321 46 rt_policy sched.c 144 0 1611 100 pre_schedule_rt sched_rt.c 1260 1607 66013 100 pick_next_task_rt sched_rt.c 884 0 60198 100 sched_info_dequeued sched_stats.h 177 0 15 100 sched_move_task sched.c 8707 0 15 100 sched_move_task sched.c 8697 68373 50822 74 schedule sched.c 4467 # cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }' 54665 53649 98 pick_next_task sched.c 4407 25344 23614 93 switch_mm mmu_context_64.h 18 0 7 100 __cancel_work_timer workqueue.c 560 1202 1354 100 clocksource_adjust timekeeping.c 456 0 441888 100 audit_syscall_exit auditsc.c 1570 22 442776 100 audit_get_context auditsc.c 732 0 440930 100 audit_syscall_entry auditsc.c 1541 22 889 100 audit_free auditsc.c 1446 0 962 100 audit_alloc auditsc.c 862 2614 962 36 audit_alloc auditsc.c 858 0 6 100 move_masked_irq migration.c 20 4482 2110 47 __grab_cache_page filemap.c 2212 9575 2787 29 iov_iter_advance filemap.c 1884 3121 905 28 __set_page_dirty page-writeback.c 1134 12882 222991 100 mapping_unevictable pagemap.h 50 4 11 100 __flush_cpu_slab slub.c 1466 664688 320446 48 slab_free slub.c 1731 2962 61920 100 dentry_lru_del_init dcache.c 153 950 1157 100 load_elf_binary binfmt_elf.c 904 102 82 80 disk_put_part genhd.h 206 0 4 100 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 77 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- arch/x86/kernel/vsyscall_64.c | 8 + include/asm-generic/vmlinux.lds.h | 14 ++- include/linux/compiler.h | 61 +++++++++++++ kernel/trace/Kconfig | 16 +++ kernel/trace/Makefile | 2 kernel/trace/trace_unlikely.c | 167 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 265 insertions(+), 3 deletions(-) Index: linux-tip.git/kernel/trace/Makefile =================================================================== --- linux-tip.git.orig/kernel/trace/Makefile 2008-10-29 12:38:54.000000000 -0400 +++ linux-tip.git/kernel/trace/Makefile 2008-10-29 18:31:26.000000000 -0400 @@ -25,4 +25,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stac obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o +obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o + libftrace-y := ftrace.o Index: linux-tip.git/arch/x86/kernel/vsyscall_64.c =================================================================== --- linux-tip.git.orig/arch/x86/kernel/vsyscall_64.c 2008-10-29 12:38:54.000000000 -0400 +++ linux-tip.git/arch/x86/kernel/vsyscall_64.c 2008-10-29 16:13:00.000000000 -0400 @@ -17,6 +17,14 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +/* Protect userspace from profiling */ +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +# undef likely +# undef unlikely +# define likely(x) likely_notrace(x) +# define unlikely(x) unlikely_notrace(x) +#endif + #include <linux/time.h> #include <linux/init.h> #include <linux/kernel.h> Index: linux-tip.git/include/asm-generic/vmlinux.lds.h =================================================================== --- linux-tip.git.orig/include/asm-generic/vmlinux.lds.h 2008-10-29 12:38:54.000000000 -0400 +++ linux-tip.git/include/asm-generic/vmlinux.lds.h 2008-10-29 16:13:00.000000000 -0400 @@ -45,6 +45,17 @@ #define MCOUNT_REC() #endif +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ + *(_ftrace_likely) \ + VMLINUX_SYMBOL(__stop_likely_profile) = .; \ + VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ + *(_ftrace_unlikely) \ + VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#else +#define LIKELY_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -62,7 +73,8 @@ VMLINUX_SYMBOL(__stop___markers) = .; \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; + VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + LIKELY_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ Index: linux-tip.git/include/linux/compiler.h =================================================================== --- linux-tip.git.orig/include/linux/compiler.h 2008-10-29 12:38:54.000000000 -0400 +++ linux-tip.git/include/linux/compiler.h 2008-10-29 16:13:00.000000000 -0400 @@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile * specific implementations come from the above header files */ -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +struct ftrace_likely_data { + const char *func; + const char *file; + unsigned line; + unsigned long correct; + unsigned long incorrect; +}; +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +#define likely_notrace(x) __builtin_expect(!!(x), 1) +#define unlikely_notrace(x) __builtin_expect(!!(x), 0) + +#define likely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_likely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = likely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 1); \ + ______r; \ + }) +#define unlikely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_unlikely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = unlikely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 0); \ + ______r; \ + }) + +/* + * Using __builtin_constant_p(x) to ignore cases where the return + * value is always the same. This idea is taken from a similar patch + * written by Daniel Walker. + */ +# ifndef likely +# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# endif +#else +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier Index: linux-tip.git/kernel/trace/Kconfig =================================================================== --- linux-tip.git.orig/kernel/trace/Kconfig 2008-10-29 16:12:53.000000000 -0400 +++ linux-tip.git/kernel/trace/Kconfig 2008-10-29 18:31:26.000000000 -0400 @@ -138,6 +138,22 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) +config TRACE_UNLIKELY_PROFILE + bool "Trace likely/unlikely profiler" + depends on DEBUG_KERNEL + select TRACING + help + This tracer profiles all the the likely and unlikely macros + in the kernel. It will display the results in: + + /debugfs/tracing/profile_likely + /debugfs/tracing/profile_unlikely + + Note: this will add a significant overhead, only turn this + on if you need to profile the system's use of these macros. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER Index: linux-tip.git/kernel/trace/trace_unlikely.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-tip.git/kernel/trace/trace_unlikely.c 2008-10-29 18:31:26.000000000 -0400 @@ -0,0 +1,167 @@ +/* + * unlikely profiler + * + * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> + */ +#include <linux/kallsyms.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/ftrace.h> +#include <linux/hash.h> +#include <linux/fs.h> +#include <asm/local.h> +#include "trace.h" + +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) +{ + /* FIXME: Make this atomic! */ + if (val == expect) + f->correct++; + else + f->incorrect++; +} +EXPORT_SYMBOL(ftrace_likely_update); + +struct ftrace_pointer { + void *start; + void *stop; +}; + +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_pointer *f = m->private; + struct ftrace_likely_data *p = v; + + (*pos)++; + + if (v == (void *)1) + return f->start; + + ++p; + + if ((void *)p >= (void *)f->stop) + return NULL; + + return p; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + void *t = (void *)1; + loff_t l = 0; + + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int t_show(struct seq_file *m, void *v) +{ + struct ftrace_likely_data *p = v; + const char *f; + unsigned long percent; + + if (v == (void *)1) { + seq_printf(m, " correct incorrect %% " + " Function " + " File Line\n" + " ------- --------- - " + " -------- " + " ---- ----\n"); + return 0; + } + + /* Only print the file, not the path */ + f = p->file + strlen(p->file); + while (f >= p->file && *f != '/') + f--; + f++; + + if (p->correct) { + percent = p->incorrect * 100; + percent /= p->correct; + /* No need to see huge numbers */ + if (percent > 100) + percent = 100; + } else + percent = p->incorrect ? 100 : 0; + + seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent); + seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); + return 0; +} + +static struct seq_operations tracing_likely_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int tracing_likely_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &tracing_likely_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = (void *)inode->i_private; + } + + return ret; +} + +static struct file_operations tracing_likely_fops = { + .open = tracing_likely_open, + .read = seq_read, + .llseek = seq_lseek, +}; + +extern unsigned long __start_likely_profile[]; +extern unsigned long __stop_likely_profile[]; +extern unsigned long __start_unlikely_profile[]; +extern unsigned long __stop_unlikely_profile[]; + +static struct ftrace_pointer ftrace_likely_pos = { + .start = __start_likely_profile, + .stop = __stop_likely_profile, +}; + +static struct ftrace_pointer ftrace_unlikely_pos = { + .start = __start_unlikely_profile, + .stop = __stop_unlikely_profile, +}; + +static __init int ftrace_unlikely_init(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("profile_likely", 0444, d_tracer, + &ftrace_likely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs 'profile_likely' entry\n"); + + entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, + &ftrace_unlikely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs" + " 'profile_unlikely' entry\n"); + + return 0; +} + +device_initcall(ftrace_unlikely_init); ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2/2 v3][RFC] ftrace: unlikely annotation tracer 2008-10-29 22:39 ` [PATCH 1/2 v3][RFC] " Steven Rostedt @ 2008-10-29 22:40 ` Steven Rostedt 2008-10-30 14:32 ` [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations Jörn Engel 1 sibling, 0 replies; 17+ messages in thread From: Steven Rostedt @ 2008-10-29 22:40 UTC (permalink / raw) To: LKML Cc: Arjan van de Ven, Theodore Tso, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker, Linus Torvalds [ Changes since v2: Changed to a "func:file:line-num" format. ] This patch adds a way to record the instances of the likely and unlikely condition annotations. When "unlikely" is set in /debugfs/tracing/iter_ctrl the unlikely conditions will be added to any of the ftrace tracers. For example: bash-3471 [003] 357.014755: [INCORRECT] sched_info_dequeued:sched_stats.h:177 bash-3471 [003] 357.014756: [correct] update_curr:sched_fair.c:489 bash-3471 [003] 357.014758: [correct] calc_delta_fair:sched_fair.c:411 bash-3471 [003] 357.014759: [correct] account_group_exec_runtime:sched_stats.h:356 bash-3471 [003] 357.014761: [correct] update_curr:sched_fair.c:489 bash-3471 [003] 357.014763: [INCORRECT] calc_delta_fair:sched_fair.c:411 bash-3471 [003] 357.014765: [correct] calc_delta_mine:sched.c:1279 Which shows the normal tracer heading, as well as whether the condition was correct "[correct]" or was mistaken "[INCORRECT]", followed by the function, file name and line number. Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- kernel/Makefile | 5 + kernel/trace/Kconfig | 22 ++++++ kernel/trace/Makefile | 5 + kernel/trace/ring_buffer.c | 11 +++ kernel/trace/trace.c | 39 +++++++++++ kernel/trace/trace.h | 43 +++++++++++++ kernel/trace/trace_unlikely.c | 137 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 262 insertions(+) Index: linux-tip.git/kernel/trace/Kconfig =================================================================== --- linux-tip.git.orig/kernel/trace/Kconfig 2008-10-29 17:01:11.000000000 -0400 +++ linux-tip.git/kernel/trace/Kconfig 2008-10-29 17:46:37.000000000 -0400 @@ -154,6 +154,28 @@ config TRACE_UNLIKELY_PROFILE Say N if unsure. +config TRACING_UNLIKELY + bool + help + Selected by tracers that will trace the likely and unlikely + conditions. This prevents the tracers themselves from being + profiled. Profiling the tracing infrastructure can only happen + when the likelys and unlikelys are not being traced. + +config UNLIKELY_TRACER + bool "Trace likely/unlikely instances" + depends on TRACE_UNLIKELY_PROFILE + select TRACING_UNLIKELY + help + This is traces the events of likely and unlikely condition + calls in the kernel. The difference between this and the + "Trace likely/unlikely profiler" is that this is not a + histogram of the callers, but actually places the calling + events into a running buffer to see when and where the + events happened, as well as their results. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER Index: linux-tip.git/kernel/Makefile =================================================================== --- linux-tip.git.orig/kernel/Makefile 2008-10-29 17:01:11.000000000 -0400 +++ linux-tip.git/kernel/Makefile 2008-10-29 17:46:37.000000000 -0400 @@ -24,6 +24,11 @@ CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -mno-spe -pg endif +ifdef CONFIG_TRACING_UNLIKELY +# Do not profile tracepoint if we are tracing the unlikely conditions +CFLAGS_tracepoint.o += '-Dunlikely(x)=unlikely_notrace(x)' +endif + obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o Index: linux-tip.git/kernel/trace/Makefile =================================================================== --- linux-tip.git.orig/kernel/trace/Makefile 2008-10-29 17:01:11.000000000 -0400 +++ linux-tip.git/kernel/trace/Makefile 2008-10-29 17:46:37.000000000 -0400 @@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif +ifdef CONFIG_TRACING_UNLIKELY +KBUILD_CFLAGS += '-Dlikely(x)=likely_notrace(x)' +KBUILD_CFLAGS += '-Dunlikely(x)=unlikely_notrace(x)' +endif + obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o Index: linux-tip.git/kernel/trace/trace_unlikely.c =================================================================== --- linux-tip.git.orig/kernel/trace/trace_unlikely.c 2008-10-29 17:25:31.000000000 -0400 +++ linux-tip.git/kernel/trace/trace_unlikely.c 2008-10-29 18:03:30.000000000 -0400 @@ -15,8 +15,145 @@ #include <asm/local.h> #include "trace.h" +#ifdef CONFIG_UNLIKELY_TRACER + +static int unlikely_tracing_enabled __read_mostly; +static DEFINE_MUTEX(unlikely_tracing_mutex); +static DEFINE_PER_CPU(atomic_t, unlikely_trace_disabled); +static struct trace_array *unlikely_tracer; + +void trace_unlikely_stop(void) +{ + /* preemption should be disabled */ + int cpu = smp_processor_id(); + + atomic_inc(&per_cpu(unlikely_trace_disabled, cpu)); +} + +void trace_unlikely_start(void) +{ + /* preemption should be disabled */ + int cpu = smp_processor_id(); + + atomic_dec(&per_cpu(unlikely_trace_disabled, cpu)); +} + +static void +probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ + struct trace_array *tr = unlikely_tracer; + struct ring_buffer_event *event; + struct trace_unlikely *entry; + unsigned long flags, irq_flags; + int cpu, resched, pc; + const char *p; + + /* + * I would love to save just the ftrace_likely_data pointer, but + * this code can also be used by modules. Ugly things can happen + * if the module is unloaded, and then we go and read the + * pointer. This is slower, but much safer. + */ + + if (unlikely(!tr)) + return; + + pc = preempt_count(); + /* Can be called within schedule */ + resched = need_resched(); + preempt_disable_notrace(); + cpu = raw_smp_processor_id(); + if (atomic_inc_return(&per_cpu(unlikely_trace_disabled, cpu)) != 1) + goto out; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + + local_save_flags(flags); + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_UNLIKELY; + + /* Strip off the path, only save the file */ + p = f->file + strlen(f->file); + while (p >= f->file && *p != '/') + p--; + p++; + + strncpy(entry->func, f->func, TRACE_FUNC_SIZE); + strncpy(entry->file, p, TRACE_FILE_SIZE); + entry->func[TRACE_FUNC_SIZE] = 0; + entry->file[TRACE_FILE_SIZE] = 0; + entry->line = f->line; + entry->correct = val == expect; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + out: + atomic_dec(&per_cpu(unlikely_trace_disabled, cpu)); + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); +} + +static inline +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ + if (!unlikely_tracing_enabled) + return; + + probe_likely_condition(f, val, expect); +} + +int enable_unlikely_tracing(struct trace_array *tr) +{ + int ret = 0; + + mutex_lock(&unlikely_tracing_mutex); + unlikely_tracer = tr; + /* + * Must be seen before enabling. The reader is a condition + * where we do not need a matching rmb() + */ + smp_wmb(); + unlikely_tracing_enabled++; + mutex_unlock(&unlikely_tracing_mutex); + + return ret; +} + +void disable_unlikely_tracing(void) +{ + mutex_lock(&unlikely_tracing_mutex); + + if (!unlikely_tracing_enabled) + goto out_unlock; + + unlikely_tracing_enabled--; + + out_unlock: + mutex_unlock(&unlikely_tracing_mutex); +} +#else +static inline +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ +} +#endif /* CONFIG_UNLIKELY_TRACER */ + void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) { + /* + * I would love to have a trace point here instead, but the + * trace point code is so inundated with unlikely and likely + * conditions that the recursive nightmare that exists is too + * much to try to get working. At least for now. + */ + trace_likely_condition(f, val, expect); + /* FIXME: Make this atomic! */ if (val == expect) f->correct++; Index: linux-tip.git/kernel/trace/trace.c =================================================================== --- linux-tip.git.orig/kernel/trace/trace.c 2008-10-29 17:01:11.000000000 -0400 +++ linux-tip.git/kernel/trace/trace.c 2008-10-29 18:04:04.000000000 -0400 @@ -235,6 +235,9 @@ static const char *trace_options[] = { "stacktrace", "sched-tree", "ftrace_printk", +#ifdef CONFIG_UNLIKELY_TRACER + "unlikely", +#endif NULL }; @@ -1457,6 +1460,18 @@ print_lat_fmt(struct trace_iterator *ite trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->file, + field->line); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1593,6 +1608,18 @@ static enum print_line_t print_trace_fmt trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->file, + field->line); + break; + } } return TRACE_TYPE_HANDLED; } @@ -1675,6 +1702,10 @@ static enum print_line_t print_raw_fmt(s trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + /* IGNORE */ + break; + }; } return TRACE_TYPE_HANDLED; } @@ -2362,8 +2393,14 @@ tracing_ctrl_write(struct file *filp, co tr->ctrl = val; + if (!val) + trace_unlikely_disable(); + if (current_trace && current_trace->ctrl_update) current_trace->ctrl_update(tr); + + if (val) + trace_unlikely_enable(tr); } mutex_unlock(&trace_types_lock); @@ -2425,6 +2462,7 @@ tracing_set_trace_write(struct file *fil if (t == current_trace) goto out; + trace_unlikely_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); @@ -2432,6 +2470,7 @@ tracing_set_trace_write(struct file *fil if (t->init) t->init(tr); + trace_unlikely_enable(tr); out: mutex_unlock(&trace_types_lock); Index: linux-tip.git/kernel/trace/trace.h =================================================================== --- linux-tip.git.orig/kernel/trace/trace.h 2008-10-29 17:01:11.000000000 -0400 +++ linux-tip.git/kernel/trace/trace.h 2008-10-29 17:46:37.000000000 -0400 @@ -21,6 +21,7 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, + TRACE_UNLIKELY, TRACE_BOOT, __TRACE_LAST_TYPE @@ -117,6 +118,16 @@ struct trace_boot { struct boot_trace initcall; }; +#define TRACE_FUNC_SIZE 30 +#define TRACE_FILE_SIZE 20 +struct trace_unlikely { + struct trace_entry ent; + unsigned line; + char func[TRACE_FUNC_SIZE+1]; + char file[TRACE_FILE_SIZE+1]; + char correct; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -217,6 +228,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ + IF_ASSIGN(var, ent, struct trace_unlikely, TRACE_UNLIKELY); \ __ftrace_bad_type(); \ } while (0) @@ -413,8 +425,39 @@ enum trace_iterator_flags { TRACE_ITER_STACKTRACE = 0x100, TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, +#ifdef CONFIG_UNLIKELY_TRACER + TRACE_ITER_UNLIKELY = 0x800, +#endif }; extern struct tracer nop_trace; +#ifdef CONFIG_UNLIKELY_TRACER +extern int enable_unlikely_tracing(struct trace_array *tr); +extern void disable_unlikely_tracing(void); +static inline int trace_unlikely_enable(struct trace_array *tr) +{ + if (trace_flags & TRACE_ITER_UNLIKELY) + return enable_unlikely_tracing(tr); + return 0; +} +static inline void trace_unlikely_disable(void) +{ + /* due to races, always disable */ + disable_unlikely_tracing(); +} +extern void trace_unlikely_stop(void); +extern void trace_unlikely_start(void); +#else +static inline int trace_unlikely_enable(struct trace_array *tr) +{ + return 0; +} +static inline void trace_unlikely_disable(void) +{ +} +static inline void trace_unlikely_stop(void) { } +static inline void trace_unlikely_start(void) { } +#endif /* CONFIG_UNLIKELY_TRACER */ + #endif /* _LINUX_KERNEL_TRACE_H */ Index: linux-tip.git/kernel/trace/ring_buffer.c =================================================================== --- linux-tip.git.orig/kernel/trace/ring_buffer.c 2008-10-29 17:01:11.000000000 -0400 +++ linux-tip.git/kernel/trace/ring_buffer.c 2008-10-29 17:46:37.000000000 -0400 @@ -16,6 +16,8 @@ #include <linux/list.h> #include <linux/fs.h> +#include "trace.h" + /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -1151,7 +1153,14 @@ ring_buffer_lock_reserve(struct ring_buf if (length > BUF_PAGE_SIZE) goto out; + /* + * Ugly, but the unlikely tracing will enter back into + * the ring buffer code. Namely through the timestamp, causing + * a triggering of the "again" loop, that will be infinite. + */ + trace_unlikely_stop(); event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); + trace_unlikely_start(); if (!event) goto out; @@ -1263,8 +1272,10 @@ int ring_buffer_write(struct ring_buffer goto out; event_length = rb_calculate_event_length(length); + trace_unlikely_stop(); event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, event_length); + trace_unlikely_start(); if (!event) goto out; ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations 2008-10-29 22:39 ` [PATCH 1/2 v3][RFC] " Steven Rostedt 2008-10-29 22:40 ` [PATCH 2/2 v3][RFC] ftrace: unlikely annotation tracer Steven Rostedt @ 2008-10-30 14:32 ` Jörn Engel 2008-10-30 14:55 ` Theodore Tso 1 sibling, 1 reply; 17+ messages in thread From: Jörn Engel @ 2008-10-30 14:32 UTC (permalink / raw) To: Steven Rostedt Cc: LKML, Arjan van de Ven, Theodore Tso, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker, Linus Torvalds On Wed, 29 October 2008 18:39:55 -0400, Steven Rostedt wrote: > > 69768 61064 87 __switch_to process_64.c 624 > 15557 115251 100 __switch_to process_64.c 594 > 15555 115227 100 __switch_to process_64.c 590 I may be out of school for a while, but that math sure looks odd. > + if (p->correct) { > + percent = p->incorrect * 100; > + percent /= p->correct; percent /= p->correct + p->incorect; > + /* No need to see huge numbers */ > + if (percent > 100) > + percent = 100; > + } else > + percent = p->incorrect ? 100 : 0; Jörn -- To my face you have the audacity to advise me to become a thief - the worst kind of thief that is conceivable, a thief of spiritual things, a thief of ideas! It is insufferable, intolerable! -- M. Binet in Scarabouche ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations 2008-10-30 14:32 ` [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations Jörn Engel @ 2008-10-30 14:55 ` Theodore Tso 2008-10-30 15:10 ` Steven Rostedt 0 siblings, 1 reply; 17+ messages in thread From: Theodore Tso @ 2008-10-30 14:55 UTC (permalink / raw) To: Jörn Engel Cc: Steven Rostedt, LKML, Arjan van de Ven, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker, Linus Torvalds On Thu, Oct 30, 2008 at 03:32:39PM +0100, Jörn Engel wrote: > On Wed, 29 October 2008 18:39:55 -0400, Steven Rostedt wrote: > > > > 69768 61064 87 __switch_to process_64.c 624 > > 15557 115251 100 __switch_to process_64.c 594 > > 15555 115227 100 __switch_to process_64.c 590 > > I may be out of school for a while, but that math sure looks odd. > > > + if (p->correct) { > > + percent = p->incorrect * 100; > > + percent /= p->correct; > percent /= p->correct + p->incorect; And once you do the above fix, I don't think what is below is necessary any more. :-) > > + /* No need to see huge numbers */ > > + if (percent > 100) > > + percent = 100; I would also calculate the percent correct rather than the percent incorrect, on the general theory that 100% good, 0% bad is easier for my little brain to understand, but that's just a minor thing... - Ted ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations 2008-10-30 14:55 ` Theodore Tso @ 2008-10-30 15:10 ` Steven Rostedt 0 siblings, 0 replies; 17+ messages in thread From: Steven Rostedt @ 2008-10-30 15:10 UTC (permalink / raw) To: Theodore Tso Cc: Jörn Engel, LKML, Arjan van de Ven, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Frederic Weisbecker, Daniel Walker, Linus Torvalds On Thu, 30 Oct 2008, Theodore Tso wrote: > On Thu, Oct 30, 2008 at 03:32:39PM +0100, J?rn Engel wrote: > > On Wed, 29 October 2008 18:39:55 -0400, Steven Rostedt wrote: > > > > > > 69768 61064 87 __switch_to process_64.c 624 > > > 15557 115251 100 __switch_to process_64.c 594 > > > 15555 115227 100 __switch_to process_64.c 590 > > > > I may be out of school for a while, but that math sure looks odd. > > > > > + if (p->correct) { > > > + percent = p->incorrect * 100; > > > + percent /= p->correct; > > percent /= p->correct + p->incorect; > > And once you do the above fix, I don't think what is below is > necessary any more. :-) Bah! Total brain-fart. Thanks ;-) > > > > + /* No need to see huge numbers */ > > > + if (percent > 100) > > > + percent = 100; > > I would also calculate the percent correct rather than the percent > incorrect, on the general theory that 100% good, 0% bad is easier for > my little brain to understand, but that's just a minor thing... I thought about whether or not to show percent correct or precent incorrect. I chose, incorrect, just because it is easier to spot the trouble makers. You can say, lets allow 25% incorrect, better than saying lets have a minimum of 75%. I think the mind can pick out those that go over a number better than it can see those that are under a number. Thanks, -- Steve ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 14:37 ` Theodore Tso 2008-10-28 14:48 ` Arjan van de Ven @ 2008-10-28 14:49 ` Steven Rostedt 2008-10-28 18:29 ` Theodore Tso 1 sibling, 1 reply; 17+ messages in thread From: Steven Rostedt @ 2008-10-28 14:49 UTC (permalink / raw) To: Theodore Tso Cc: LKML, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Arjan van de Ven, Frederic Weisbecker On Tue, 28 Oct 2008, Theodore Tso wrote: > On Tue, Oct 28, 2008 at 12:12:48AM -0400, Steven Rostedt wrote: > > > > Andrew Morton recently suggested having an in-kernel way to profile > > likely and unlikely macros. This patch achieves that goal. > > Maybe I'm confused, but when I read through the patch, it looks like > that 'hit' is incremented whenever the condition is true, and 'missed' > is incremented whenever the condition is false, correct? Correct. > > Is that what you intended? So for profile_unlikely, "missed" is good, > and "hit" is bad, and for profile_likely, "hit" is good, and "missed" > is bad. That seems horribly confusing. Correct. Yeah, I figured I'd get complaints about this (hence the RFC). If you look at my awk example, you will also notice that I switched the $1 and $2 around when reading the other file. This can be confusing either way. I did this to reuse the code for both outputs. > > If that wasn't what you intended, the meaning of "hit" and "missed" > seems to be highly confusing, either way. Can we perhaps use some > other terminology? Simply using "True" and "False" would be better, > since there's no possible confusion what the labels mean. So renaming 'hit' and 'miss' to 'True' and 'False' would be good enough? That is, it will still mean that a 'True' is bad for unlikely but good for a likely? > > > +#define unlikely(x) ({ \ > > + int ______r; \ > > + static struct ftrace_likely_data ______f \ > > + __attribute__((__aligned__(4))) \ > > + __attribute__((section("_ftrace_unlikely"))); \ > > + if (unlikely_notrace(!______f.ip)) \ > > + ______f.ip = __THIS_IP__; \ > > + ______r = unlikely_notrace(x); \ > > + ftrace_likely_update(&______f, ______r); \ > > + ______r; \ > > + }) > > Note that unlikely(x) calls ftrace_likely_update(), which does this: > > > +void ftrace_likely_update(struct ftrace_likely_data *f, int val) > > +{ > > + /* FIXME: Make this atomic! */ > > + if (val) > > + f->hit++; > > + else > > + f->missed++; > > +} > > +EXPORT_SYMBOL(ftrace_likely_update); > > > So that seems to mean that if unlikely(x) is false, then _____r is 0, > which means we increment f->missed. Or am I missing something? > > I would have thought that if unlikely(x) is false, that's *good*, > since it means the unlikely label was correct. And normally, when > people think about cache hits vs cache misses, hits are good and > misses are bad. Which is why I think the terminology is highly > confusing... OK, I'm fine with changing the terminology. v2 will do: s/hit/True/ s/missed/False/ Fine with you? -- Steve ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 14:49 ` [PATCH][RFC] " Steven Rostedt @ 2008-10-28 18:29 ` Theodore Tso 2008-10-28 18:41 ` Steven Rostedt 0 siblings, 1 reply; 17+ messages in thread From: Theodore Tso @ 2008-10-28 18:29 UTC (permalink / raw) To: Steven Rostedt Cc: LKML, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Arjan van de Ven, Frederic Weisbecker On Tue, Oct 28, 2008 at 10:49:16AM -0400, Steven Rostedt wrote: > > OK, I'm fine with changing the terminology. v2 will do: > > s/hit/True/ > s/missed/False/ > > Fine with you? I'm OK with either that, or with Arjan's suggestion of "Correct" and "Incorrect" --- although that would changing a line in the definition of #define unlikely(x): ftrace_likely_update(&______f, !______r); Either "True" / "False" or "Correct" / "Incorrect" has the advantage of being unambiguous. "Correct" / "Incorrect" has the advantage that people don't have to think about the fact that for /proc/profile_unlikely, high numbers of "False" is a good thing, where as for /proc/profile_likely, high numbers of "True" is a good thing. With "Correct" / "Incorrect" it's easier to undersatnd that high numbers of "Correct" is good. So I can see why Arjan suggested Correct/Incorrect, although I can live with either. - Ted ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] trace: profile likely and unlikely annotations 2008-10-28 18:29 ` Theodore Tso @ 2008-10-28 18:41 ` Steven Rostedt 0 siblings, 0 replies; 17+ messages in thread From: Steven Rostedt @ 2008-10-28 18:41 UTC (permalink / raw) To: Theodore Tso Cc: LKML, Mike Snitzer, Andrew Morton, Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Arjan van de Ven, Frederic Weisbecker On Tue, 28 Oct 2008, Theodore Tso wrote: > On Tue, Oct 28, 2008 at 10:49:16AM -0400, Steven Rostedt wrote: > > > > OK, I'm fine with changing the terminology. v2 will do: > > > > s/hit/True/ > > s/missed/False/ > > > > Fine with you? > > I'm OK with either that, or with Arjan's suggestion of "Correct" and > "Incorrect" --- although that would changing a line in the definition > of #define unlikely(x): > > ftrace_likely_update(&______f, !______r); > > Either "True" / "False" or "Correct" / "Incorrect" has the advantage > of being unambiguous. "Correct" / "Incorrect" has the advantage that > people don't have to think about the fact that for > /proc/profile_unlikely, high numbers of "False" is a good thing, where > as for /proc/profile_likely, high numbers of "True" is a good thing. > With "Correct" / "Incorrect" it's easier to undersatnd that high > numbers of "Correct" is good. > > So I can see why Arjan suggested Correct/Incorrect, although I can > live with either. I'm almost done with v2. Using Arjan's version (Correct/Incorrect) was actually a trivial change. -- Steve ^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2008-10-30 15:10 UTC | newest] Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <170fa0d20810271529g3c64ae89me29ed8b65a9c3b5e@mail.gmail.com> [not found] ` <alpine.DEB.1.10.0810271918390.19731@gandalf.stny.rr.com> [not found] ` <20081028001340.GB9797@mit.edu> 2008-10-28 4:12 ` [PATCH][RFC] trace: profile likely and unlikely annotations Steven Rostedt 2008-10-28 4:23 ` Arjan van de Ven 2008-10-28 4:39 ` Andrew Morton 2008-10-28 14:37 ` Theodore Tso 2008-10-28 14:48 ` Arjan van de Ven 2008-10-28 14:51 ` Steven Rostedt 2008-10-29 16:35 ` [PATCH 1/2 v2][RFC] " Steven Rostedt 2008-10-29 16:38 ` [PATCH 2/2 v2][RFC] ftrace: unlikely annotation tracer Steven Rostedt 2008-10-29 16:40 ` [PATCH 1/2 v2][RFC] trace: profile likely and unlikely annotations Arjan van de Ven 2008-10-29 22:39 ` [PATCH 1/2 v3][RFC] " Steven Rostedt 2008-10-29 22:40 ` [PATCH 2/2 v3][RFC] ftrace: unlikely annotation tracer Steven Rostedt 2008-10-30 14:32 ` [PATCH 1/2 v3][RFC] trace: profile likely and unlikely annotations Jörn Engel 2008-10-30 14:55 ` Theodore Tso 2008-10-30 15:10 ` Steven Rostedt 2008-10-28 14:49 ` [PATCH][RFC] " Steven Rostedt 2008-10-28 18:29 ` Theodore Tso 2008-10-28 18:41 ` Steven Rostedt
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).