LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [PATCH 0/2] ftrace: clean ups and sanity checks @ 2008-10-21 16:40 Steven Rostedt 2008-10-21 16:40 ` [PATCH 1/2] ftrace: make dynamic ftrace more robust Steven Rostedt 2008-10-21 16:40 ` [PATCH 2/2] ftrace: release functions from hash Steven Rostedt 0 siblings, 2 replies; 9+ messages in thread From: Steven Rostedt @ 2008-10-21 16:40 UTC (permalink / raw) To: linux-kernel Cc: Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds Ingo, Can you please pull in these two patches and run them through your tests. I've tested them, but I know you have a more elaborate setup. Then can you send them over to Linus for 2.6.28. These are clean ups and robustness patches. No new features here. Thanks, -- Steve ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] ftrace: make dynamic ftrace more robust 2008-10-21 16:40 [PATCH 0/2] ftrace: clean ups and sanity checks Steven Rostedt @ 2008-10-21 16:40 ` Steven Rostedt 2008-10-22 6:53 ` Ingo Molnar 2008-10-21 16:40 ` [PATCH 2/2] ftrace: release functions from hash Steven Rostedt 1 sibling, 1 reply; 9+ messages in thread From: Steven Rostedt @ 2008-10-21 16:40 UTC (permalink / raw) To: linux-kernel Cc: Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt [-- Attachment #1: ftrace-more-robust-cleanup.patch --] [-- Type: text/plain, Size: 6906 bytes --] This patch cleans up some of the ftrace code as well as adds some more sanity checks. If any of the sanity checks fail, a warning will be printed and ftrace will be disabled. Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- arch/x86/kernel/ftrace.c | 15 ++++++++++----- include/linux/ftrace.h | 7 ++++++- include/linux/init.h | 10 +++++----- kernel/trace/ftrace.c | 35 +++++++++++++++++++++++++++++++---- 4 files changed, 52 insertions(+), 15 deletions(-) Index: linux-compile.git/arch/x86/kernel/ftrace.c =================================================================== --- linux-compile.git.orig/arch/x86/kernel/ftrace.c 2008-10-20 19:39:54.000000000 -0400 +++ linux-compile.git/arch/x86/kernel/ftrace.c 2008-10-20 19:42:02.000000000 -0400 @@ -66,19 +66,24 @@ ftrace_modify_code(unsigned long ip, uns /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting - * as well as code changing. + * as well as code changing. We do this by using the + * __copy_*_user functions. * * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ + + /* read the text we want to modify */ if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) - return 1; + return FTRACE_CODE_FAILED_READ; + /* Make sure it is what we expect it to be */ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return 2; + return FTRACE_CODE_FAILED_CMP; - WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE)); + /* replace the text with the new text */ + if (__copy_to_user_inatomic((char __user *)ip, new_code, MCOUNT_INSN_SIZE)) + return FTRACE_CODE_FAILED_WRITE; sync_core(); Index: linux-compile.git/include/linux/ftrace.h =================================================================== --- linux-compile.git.orig/include/linux/ftrace.h 2008-10-20 19:39:54.000000000 -0400 +++ linux-compile.git/include/linux/ftrace.h 2008-10-20 19:47:23.000000000 -0400 @@ -232,6 +232,11 @@ static inline void start_boot_trace(void static inline void stop_boot_trace(void) { } #endif - +enum { + FTRACE_CODE_MODIFIED, + FTRACE_CODE_FAILED_READ, + FTRACE_CODE_FAILED_CMP, + FTRACE_CODE_FAILED_WRITE, +}; #endif /* _LINUX_FTRACE_H */ Index: linux-compile.git/include/linux/init.h =================================================================== --- linux-compile.git.orig/include/linux/init.h 2008-10-20 19:39:54.000000000 -0400 +++ linux-compile.git/include/linux/init.h 2008-10-20 19:40:06.000000000 -0400 @@ -75,15 +75,15 @@ #ifdef MODULE -#define __exitused +#define __exitused notrace #else -#define __exitused __used +#define __exitused __used notrace #endif #define __exit __section(.exit.text) __exitused __cold /* Used for HOTPLUG */ -#define __devinit __section(.devinit.text) __cold +#define __devinit __section(.devinit.text) __cold notrace #define __devinitdata __section(.devinit.data) #define __devinitconst __section(.devinit.rodata) #define __devexit __section(.devexit.text) __exitused __cold @@ -91,7 +91,7 @@ #define __devexitconst __section(.devexit.rodata) /* Used for HOTPLUG_CPU */ -#define __cpuinit __section(.cpuinit.text) __cold +#define __cpuinit __section(.cpuinit.text) __cold notrace #define __cpuinitdata __section(.cpuinit.data) #define __cpuinitconst __section(.cpuinit.rodata) #define __cpuexit __section(.cpuexit.text) __exitused __cold @@ -99,7 +99,7 @@ #define __cpuexitconst __section(.cpuexit.rodata) /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold +#define __meminit __section(.meminit.text) __cold notrace #define __meminitdata __section(.meminit.data) #define __meminitconst __section(.meminit.rodata) #define __memexit __section(.memexit.text) __exitused __cold Index: linux-compile.git/kernel/trace/ftrace.c =================================================================== --- linux-compile.git.orig/kernel/trace/ftrace.c 2008-10-20 19:39:54.000000000 -0400 +++ linux-compile.git/kernel/trace/ftrace.c 2008-10-20 19:46:00.000000000 -0400 @@ -318,6 +318,14 @@ static inline void ftrace_del_hash(struc static void ftrace_free_rec(struct dyn_ftrace *rec) { + /* + * No locking, only called from kstop_machine, or + * from module unloading with module locks and interrupts + * disabled to prevent kstop machine from running. + */ + + WARN_ON(rec->flags & FTRACE_FL_FREE); + rec->ip = (unsigned long)ftrace_free_records; ftrace_free_records = rec; rec->flags |= FTRACE_FL_FREE; @@ -346,7 +354,6 @@ void ftrace_release(void *start, unsigne } } spin_unlock(&ftrace_lock); - } static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) @@ -556,9 +563,12 @@ static void ftrace_replace_code(int enab failed = __ftrace_replace_code(rec, old, new, enable); if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { + /* kprobes can cause this failure */ rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { + /* kprobes was not the fault */ + ftrace_kill_atomic(); ftrace_del_hash(rec); ftrace_free_rec(rec); } @@ -601,12 +611,12 @@ ftrace_code_disable(struct dyn_ftrace *r failed = ftrace_modify_code(ip, call, nop); if (failed) { switch (failed) { - case 1: + case FTRACE_CODE_FAILED_READ: WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); print_ip_sym(ip); break; - case 2: + case FTRACE_CODE_FAILED_CMP: WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); @@ -615,7 +625,18 @@ ftrace_code_disable(struct dyn_ftrace *r print_ip_ins(" replace: ", nop); printk(KERN_CONT "\n"); break; + case FTRACE_CODE_FAILED_WRITE: + WARN_ON_ONCE(1); + pr_info("ftrace failed to write "); + print_ip_sym(ip); + break; + default: + WARN_ON_ONCE(1); + pr_info("ftrace unkown error "); + print_ip_sym(ip); + break; } + ftrace_kill_atomic(); rec->flags |= FTRACE_FL_FAILED; return 0; @@ -789,6 +810,11 @@ static int __ftrace_update_code(void *ig /* No locks needed, the machine is stopped! */ for (i = 0; i < FTRACE_HASHSIZE; i++) { + + /* If something went wrong, bail without enabling anything */ + if (unlikely(ftrace_disabled)) + return; + INIT_HLIST_HEAD(&temp_list); head = &ftrace_hash[i]; @@ -796,7 +822,8 @@ static int __ftrace_update_code(void *ig hlist_for_each_entry_safe(p, t, n, head, node) { /* Skip over failed records which have not been * freed. */ - if (p->flags & FTRACE_FL_FAILED) + if (p->flags & FTRACE_FL_FAILED || + p->flags & FTRACE_FL_FREE) continue; /* Unconverted records are always at the head of the -- ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust 2008-10-21 16:40 ` [PATCH 1/2] ftrace: make dynamic ftrace more robust Steven Rostedt @ 2008-10-22 6:53 ` Ingo Molnar 2008-10-22 11:07 ` Steven Rostedt 0 siblings, 1 reply; 9+ messages in thread From: Ingo Molnar @ 2008-10-22 6:53 UTC (permalink / raw) To: Steven Rostedt Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt * Steven Rostedt <rostedt@goodmis.org> wrote: > +enum { > + FTRACE_CODE_MODIFIED, i'd suggest to name it FTRACE_CODE_MODIFIED_OK here, to make it stand out from the failure codes. > + FTRACE_CODE_FAILED_READ, > + FTRACE_CODE_FAILED_CMP, > + FTRACE_CODE_FAILED_WRITE, but maybe we should just use the standard kernel return codes. 0 for success, -EINVAL for the rest. Is there any real value to know exactly why it failed? We just know the modification was fishy (this is an exception situation), and want to stop ftrace ASAP and then print a warning so a kernel developer can debug it. Complicating error handling by introducing similar-looking return code names just makes it easier to mess up accidentally, hence it _reduces_ robustness. > --- linux-compile.git.orig/include/linux/init.h 2008-10-20 19:39:54.000000000 -0400 > +++ linux-compile.git/include/linux/init.h 2008-10-20 19:40:06.000000000 -0400 > @@ -75,15 +75,15 @@ > > > #ifdef MODULE > -#define __exitused > +#define __exitused notrace > #else > -#define __exitused __used > +#define __exitused __used notrace > #endif > > #define __exit __section(.exit.text) __exitused __cold > > /* Used for HOTPLUG */ > -#define __devinit __section(.devinit.text) __cold > +#define __devinit __section(.devinit.text) __cold notrace > #define __devinitdata __section(.devinit.data) > #define __devinitconst __section(.devinit.rodata) > #define __devexit __section(.devexit.text) __exitused __cold > @@ -91,7 +91,7 @@ > #define __devexitconst __section(.devexit.rodata) > > /* Used for HOTPLUG_CPU */ > -#define __cpuinit __section(.cpuinit.text) __cold > +#define __cpuinit __section(.cpuinit.text) __cold notrace > #define __cpuinitdata __section(.cpuinit.data) > #define __cpuinitconst __section(.cpuinit.rodata) > #define __cpuexit __section(.cpuexit.text) __exitused __cold > @@ -99,7 +99,7 @@ > #define __cpuexitconst __section(.cpuexit.rodata) > > /* Used for MEMORY_HOTPLUG */ > -#define __meminit __section(.meminit.text) __cold > +#define __meminit __section(.meminit.text) __cold notrace > #define __meminitdata __section(.meminit.data) > #define __meminitconst __section(.meminit.rodata) > #define __memexit __section(.memexit.text) __exitused __cold there's no justification given for this in the changelog and the change looks fishy. > static void ftrace_free_rec(struct dyn_ftrace *rec) > { > + /* > + * No locking, only called from kstop_machine, or > + * from module unloading with module locks and interrupts > + * disabled to prevent kstop machine from running. > + */ > + > + WARN_ON(rec->flags & FTRACE_FL_FREE); this should _NOT_ be just a WARN_ON(). It should immediately stop ftrace entirely, then print _one_ warning. Then it should never ever run up to the next reboot. this is a basic principle for instrumentation. If we detect a bug we disable ourselves immediately and print a _single_ warning. Do _not_ print possibly thousands of warnings and continue as if nothing happened ... > + /* kprobes was not the fault */ > + ftrace_kill_atomic(); while at it, ftrace_kill_atomic() is a misnomer. Please use something more understandable and less ambigious, like "ftrace_turn_off()". Both 'kill' and 'atomic' are heavily laden phrases used for many other things in the kernel. And any such facility must work from any context, because we might call it from crash paths, etc. So dont name it _atomic() - it must obviously be atomic. Ingo ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust 2008-10-22 6:53 ` Ingo Molnar @ 2008-10-22 11:07 ` Steven Rostedt 2008-10-22 11:28 ` Steven Rostedt 2008-10-22 11:47 ` Ingo Molnar 0 siblings, 2 replies; 9+ messages in thread From: Steven Rostedt @ 2008-10-22 11:07 UTC (permalink / raw) To: Ingo Molnar Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt On Wed, 22 Oct 2008, Ingo Molnar wrote: > > * Steven Rostedt <rostedt@goodmis.org> wrote: > > > +enum { > > + FTRACE_CODE_MODIFIED, > > i'd suggest to name it FTRACE_CODE_MODIFIED_OK here, to make it stand > out from the failure codes. > > > + FTRACE_CODE_FAILED_READ, > > + FTRACE_CODE_FAILED_CMP, > > + FTRACE_CODE_FAILED_WRITE, > > but maybe we should just use the standard kernel return codes. 0 for > success, -EINVAL for the rest. Is there any real value to know exactly > why it failed? We just know the modification was fishy (this is an > exception situation), and want to stop ftrace ASAP and then print a > warning so a kernel developer can debug it. Yes it is important to know the reason of failure, since it helps with diagnosing the issue. > > Complicating error handling by introducing similar-looking return code > names just makes it easier to mess up accidentally, hence it _reduces_ > robustness. I had in mind for 2.6.29 that I would let an arch add another non-error code that says, "FAIL NICELY". This is a way, for example, to let an arch not be able to modify the code because it does not have the ability yet. Like with the trampoline example. I wanted to let the arch say, I do not make this kind of change, but it is not a bug (I didn't modify anything) simply ignore. And have ftrace simply remove the record and go on. > > > --- linux-compile.git.orig/include/linux/init.h 2008-10-20 19:39:54.000000000 -0400 > > +++ linux-compile.git/include/linux/init.h 2008-10-20 19:40:06.000000000 -0400 > > @@ -75,15 +75,15 @@ > > > > > > #ifdef MODULE > > -#define __exitused > > +#define __exitused notrace > > #else > > -#define __exitused __used > > +#define __exitused __used notrace > > #endif > > > > #define __exit __section(.exit.text) __exitused __cold > > > > /* Used for HOTPLUG */ > > -#define __devinit __section(.devinit.text) __cold > > +#define __devinit __section(.devinit.text) __cold notrace > > #define __devinitdata __section(.devinit.data) > > #define __devinitconst __section(.devinit.rodata) > > #define __devexit __section(.devexit.text) __exitused __cold > > @@ -91,7 +91,7 @@ > > #define __devexitconst __section(.devexit.rodata) > > > > /* Used for HOTPLUG_CPU */ > > -#define __cpuinit __section(.cpuinit.text) __cold > > +#define __cpuinit __section(.cpuinit.text) __cold notrace > > #define __cpuinitdata __section(.cpuinit.data) > > #define __cpuinitconst __section(.cpuinit.rodata) > > #define __cpuexit __section(.cpuexit.text) __exitused __cold > > @@ -99,7 +99,7 @@ > > #define __cpuexitconst __section(.cpuexit.rodata) > > > > /* Used for MEMORY_HOTPLUG */ > > -#define __meminit __section(.meminit.text) __cold > > +#define __meminit __section(.meminit.text) __cold notrace > > #define __meminitdata __section(.meminit.data) > > #define __meminitconst __section(.meminit.rodata) > > #define __memexit __section(.memexit.text) __exitused __cold > > there's no justification given for this in the changelog and the change > looks fishy. Sorry, I missed writing this. I had it in other patches, but forgot to add the change log here. These are areas, just like the __init section that I have no way ok finding out in an arch independent way, what to remove from the ftrace records. So by not adding these notraces, we are guaranteed to hit the warnings above! > > > static void ftrace_free_rec(struct dyn_ftrace *rec) > > { > > + /* > > + * No locking, only called from kstop_machine, or > > + * from module unloading with module locks and interrupts > > + * disabled to prevent kstop machine from running. > > + */ > > + > > + WARN_ON(rec->flags & FTRACE_FL_FREE); > > this should _NOT_ be just a WARN_ON(). It should immediately stop ftrace > entirely, then print _one_ warning. Then it should never ever run up to > the next reboot. > > this is a basic principle for instrumentation. If we detect a bug we > disable ourselves immediately and print a _single_ warning. > > Do _not_ print possibly thousands of warnings and continue as if nothing > happened ... Fine. I'll replace all WARN_ONs with FTRACE_WARN_ONS. > > > + /* kprobes was not the fault */ > > + ftrace_kill_atomic(); > > while at it, ftrace_kill_atomic() is a misnomer. > > Please use something more understandable and less ambigious, like > "ftrace_turn_off()". Both 'kill' and 'atomic' are heavily laden phrases > used for many other things in the kernel. > > And any such facility must work from any context, because we might call > it from crash paths, etc. So dont name it _atomic() - it must obviously > be atomic. The reason for the naming was that ftrace_kill was used when I knew something was wrong but not seriously wrong. But enough to disable ftrace with the kstop_machine. But fine, I'll fix it. -- Steve ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust 2008-10-22 11:07 ` Steven Rostedt @ 2008-10-22 11:28 ` Steven Rostedt 2008-10-22 11:47 ` Ingo Molnar 1 sibling, 0 replies; 9+ messages in thread From: Steven Rostedt @ 2008-10-22 11:28 UTC (permalink / raw) To: Ingo Molnar Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt On Wed, 22 Oct 2008, Steven Rostedt wrote: > > > > > + /* kprobes was not the fault */ > > > + ftrace_kill_atomic(); > > > > while at it, ftrace_kill_atomic() is a misnomer. > > > > Please use something more understandable and less ambigious, like > > "ftrace_turn_off()". Both 'kill' and 'atomic' are heavily laden phrases > > used for many other things in the kernel. I agree with your "atomic" part but the 'kill' I do not. Yes, it is unfortunate that Unix used 'kill' to send signals. But the Unix name is the misnomer. The problem with a "ftrace_turn_off" or anything similar, is that people will likely use it to temporarily disable ftrace when they need to do some sensitive code that they can not allow tracing on. Then they will be confused when they can not find a "ftrace_turn_on()". We already use "disable" to shut down ftrace and put it back into the "NOP" state. We have "stop" and "start" to stop function tracing quickly (just a bit is set, no conversion of code). I figured "kill" is self explanatory. You use it when you want to kill ftrace and do not want it to come back. We have no "ftrace_resurrect" (well, not yet ;-) I think most developers know the "kill" meaning. If you do not like the name, you can change it. -- Steve ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust 2008-10-22 11:07 ` Steven Rostedt 2008-10-22 11:28 ` Steven Rostedt @ 2008-10-22 11:47 ` Ingo Molnar 2008-10-22 12:07 ` Steven Rostedt 1 sibling, 1 reply; 9+ messages in thread From: Ingo Molnar @ 2008-10-22 11:47 UTC (permalink / raw) To: Steven Rostedt Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt * Steven Rostedt <rostedt@goodmis.org> wrote: > > i'd suggest to name it FTRACE_CODE_MODIFIED_OK here, to make it > > stand out from the failure codes. > > > > > + FTRACE_CODE_FAILED_READ, > > > + FTRACE_CODE_FAILED_CMP, > > > + FTRACE_CODE_FAILED_WRITE, > > > > but maybe we should just use the standard kernel return codes. 0 for > > success, -EINVAL for the rest. Is there any real value to know > > exactly why it failed? We just know the modification was fishy (this > > is an exception situation), and want to stop ftrace ASAP and then > > print a warning so a kernel developer can debug it. > > Yes it is important to know the reason of failure, since it helps with > diagnosing the issue. we have everything we need: a warning message. We only add "reason debugging" _if and only if_ problems are so frequent in an area of code that it's absolutely needed. Otherwise we just fix the bugs, whenever they happen. > > Complicating error handling by introducing similar-looking return > > code names just makes it easier to mess up accidentally, hence it > > _reduces_ robustness. > > I had in mind for 2.6.29 that I would let an arch add another > non-error code that says, "FAIL NICELY". [...] no ... you are really thinking about robustness in the wrong way. This code runs in the deepest guts of the kernel and hence is playing with fire and it must be absolutely robust. Not 'nicely diagnosable', not 'fail nicely'. But utterly robust in stopping whatever it does early enough to make that problem reportable, should it trigger. (which it really should not) > > > /* Used for MEMORY_HOTPLUG */ > > > -#define __meminit __section(.meminit.text) __cold > > > +#define __meminit __section(.meminit.text) __cold notrace > > > #define __meminitdata __section(.meminit.data) > > > #define __meminitconst __section(.meminit.rodata) > > > #define __memexit __section(.memexit.text) __exitused __cold > > > > there's no justification given for this in the changelog and the change > > looks fishy. > > Sorry, I missed writing this. I had it in other patches, but forgot to > add the change log here. These are areas, just like the __init section > that I have no way ok finding out in an arch independent way, what to > remove from the ftrace records. So by not adding these notraces, we > are guaranteed to hit the warnings above! this is utterly fragile and might miss places that insert symbols into some of these sections manually. the robust approach is to make sure these things are never in an ftrace record to begin with. scripts/recordmcount.pl should be taught to only record places that it is _100% sure of is traceable_. Not "everything and we'll sort out the stuff that we think is not okay". if that needs arch dependent smarts then so be it - ftrace has to be enabled per arch anyway. Ingo ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust 2008-10-22 11:47 ` Ingo Molnar @ 2008-10-22 12:07 ` Steven Rostedt 0 siblings, 0 replies; 9+ messages in thread From: Steven Rostedt @ 2008-10-22 12:07 UTC (permalink / raw) To: Ingo Molnar Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt On Wed, 22 Oct 2008, Ingo Molnar wrote: > > > > > /* Used for MEMORY_HOTPLUG */ > > > > -#define __meminit __section(.meminit.text) __cold > > > > +#define __meminit __section(.meminit.text) __cold notrace > > > > #define __meminitdata __section(.meminit.data) > > > > #define __meminitconst __section(.meminit.rodata) > > > > #define __memexit __section(.memexit.text) __exitused __cold > > > > > > there's no justification given for this in the changelog and the change > > > looks fishy. > > > > Sorry, I missed writing this. I had it in other patches, but forgot to > > add the change log here. These are areas, just like the __init section > > that I have no way ok finding out in an arch independent way, what to > > remove from the ftrace records. So by not adding these notraces, we > > are guaranteed to hit the warnings above! > > this is utterly fragile and might miss places that insert symbols into > some of these sections manually. > > the robust approach is to make sure these things are never in an ftrace > record to begin with. scripts/recordmcount.pl should be taught to only > record places that it is _100% sure of is traceable_. Not "everything > and we'll sort out the stuff that we think is not okay". > > if that needs arch dependent smarts then so be it - ftrace has to be > enabled per arch anyway. In that case, the only reasonable patch, until we do the above is this. Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- kernel/trace/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: linux-compile.git/kernel/trace/Kconfig =================================================================== --- linux-compile.git.orig/kernel/trace/Kconfig 2008-10-21 17:07:47.000000000 -0400 +++ linux-compile.git/kernel/trace/Kconfig 2008-10-22 08:05:40.000000000 -0400 @@ -159,10 +159,11 @@ config STACK_TRACER Say N if unsure. config DYNAMIC_FTRACE - bool "enable/disable ftrace tracepoints dynamically" + bool "enable/disable ftrace tracepoints dynamically (BROKEN)" depends on FTRACE depends on HAVE_DYNAMIC_FTRACE depends on DEBUG_KERNEL + depends on BROKEN default y help This option will modify all the calls to ftrace dynamically ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/2] ftrace: release functions from hash 2008-10-21 16:40 [PATCH 0/2] ftrace: clean ups and sanity checks Steven Rostedt 2008-10-21 16:40 ` [PATCH 1/2] ftrace: make dynamic ftrace more robust Steven Rostedt @ 2008-10-21 16:40 ` Steven Rostedt 2008-10-21 18:27 ` Steven Rostedt 1 sibling, 1 reply; 9+ messages in thread From: Steven Rostedt @ 2008-10-21 16:40 UTC (permalink / raw) To: linux-kernel Cc: Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt [-- Attachment #1: ftrace-release-funcs-from-hash.patch --] [-- Type: text/plain, Size: 3011 bytes --] The x86 architecture uses a static recording of mcount caller locations and is not affected by this patch. For architectures still using the dynamic ftrace daemon, this patch is critical. It removes the race between the recording of a function that calls mcount, the unloading of a module, and the ftrace daemon updating the call sites. This patch adds the releasing of the hash functions that the daemon uses to update the mcount call sites. When a module is unloaded, not only are the replaced call site table update, but now so is the hash recorded functions that the ftrace daemon will use. Again, architectures that implement MCOUNT_RECORD are not affected by this (which currently only x86 has). Signed-off-by: Steven Rostedt <srostedt@redhat.com> --- kernel/trace/ftrace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) Index: linux-compile.git/kernel/trace/ftrace.c =================================================================== --- linux-compile.git.orig/kernel/trace/ftrace.c 2008-10-21 09:34:51.000000000 -0400 +++ linux-compile.git/kernel/trace/ftrace.c 2008-10-21 10:31:24.000000000 -0400 @@ -164,10 +164,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock) #define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags) #define ftrace_hash_unlock(flags) \ spin_unlock_irqrestore(&ftrace_hash_lock, flags) +static void ftrace_release_hash(unsigned long start, unsigned long end); #else /* This is protected via the ftrace_lock with MCOUNT_RECORD. */ #define ftrace_hash_lock(flags) do { (void)(flags); } while (0) #define ftrace_hash_unlock(flags) do { } while(0) +static inline void ftrace_release_hash(unsigned long start, unsigned long end) +{ +} #endif /* @@ -354,6 +358,8 @@ void ftrace_release(void *start, unsigne } } spin_unlock(&ftrace_lock); + + ftrace_release_hash(s, e); } static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) @@ -1686,6 +1692,44 @@ void __init ftrace_init(void) ftrace_disabled = 1; } #else /* CONFIG_FTRACE_MCOUNT_RECORD */ + +static void ftrace_release_hash(unsigned long start, unsigned long end) +{ + struct dyn_ftrace *rec; + struct hlist_node *t, *n; + struct hlist_head *head, temp_list; + unsigned long flags; + int i, cpu; + + preempt_disable_notrace(); + + /* disable incase we call something that calls mcount */ + cpu = raw_smp_processor_id(); + per_cpu(ftrace_shutdown_disable_cpu, cpu)++; + + ftrace_hash_lock(flags); + + for (i = 0; i < FTRACE_HASHSIZE; i++) { + INIT_HLIST_HEAD(&temp_list); + head = &ftrace_hash[i]; + + /* all CPUS are stopped, we are safe to modify code */ + hlist_for_each_entry_safe(rec, t, n, head, node) { + if (rec->flags & FTRACE_FL_FREE) + continue; + + if ((rec->ip >= start) && (rec->ip < end)) + ftrace_free_rec(rec); + } + } + + ftrace_hash_unlock(flags); + + per_cpu(ftrace_shutdown_disable_cpu, cpu)--; + preempt_enable_notrace(); + +} + static int ftraced(void *ignore) { unsigned long usecs; -- ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] ftrace: release functions from hash 2008-10-21 16:40 ` [PATCH 2/2] ftrace: release functions from hash Steven Rostedt @ 2008-10-21 18:27 ` Steven Rostedt 0 siblings, 0 replies; 9+ messages in thread From: Steven Rostedt @ 2008-10-21 18:27 UTC (permalink / raw) To: linux-kernel Cc: Ingo Molnar, Thomas Gleixner, Peter Zijlstra, Andrew Morton, David Miller, Linus Torvalds, Steven Rostedt Ingo, I found a bug with this patch. Do not incorporate it. I'll come up with a new patch to replace this one. Thanks, -- Steve On Tue, 21 Oct 2008, Steven Rostedt wrote: > The x86 architecture uses a static recording of mcount caller locations > and is not affected by this patch. > > For architectures still using the dynamic ftrace daemon, this patch is > critical. It removes the race between the recording of a function that > calls mcount, the unloading of a module, and the ftrace daemon updating > the call sites. > > This patch adds the releasing of the hash functions that the daemon uses > to update the mcount call sites. When a module is unloaded, not only > are the replaced call site table update, but now so is the hash recorded > functions that the ftrace daemon will use. > > Again, architectures that implement MCOUNT_RECORD are not affected by > this (which currently only x86 has). > > Signed-off-by: Steven Rostedt <srostedt@redhat.com> > --- > kernel/trace/ftrace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 44 insertions(+) > > Index: linux-compile.git/kernel/trace/ftrace.c > =================================================================== > --- linux-compile.git.orig/kernel/trace/ftrace.c 2008-10-21 09:34:51.000000000 -0400 > +++ linux-compile.git/kernel/trace/ftrace.c 2008-10-21 10:31:24.000000000 -0400 > @@ -164,10 +164,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock) > #define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags) > #define ftrace_hash_unlock(flags) \ > spin_unlock_irqrestore(&ftrace_hash_lock, flags) > +static void ftrace_release_hash(unsigned long start, unsigned long end); > #else > /* This is protected via the ftrace_lock with MCOUNT_RECORD. */ > #define ftrace_hash_lock(flags) do { (void)(flags); } while (0) > #define ftrace_hash_unlock(flags) do { } while(0) > +static inline void ftrace_release_hash(unsigned long start, unsigned long end) > +{ > +} > #endif > > /* > @@ -354,6 +358,8 @@ void ftrace_release(void *start, unsigne > } > } > spin_unlock(&ftrace_lock); > + > + ftrace_release_hash(s, e); > } > > static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) > @@ -1686,6 +1692,44 @@ void __init ftrace_init(void) > ftrace_disabled = 1; > } > #else /* CONFIG_FTRACE_MCOUNT_RECORD */ > + > +static void ftrace_release_hash(unsigned long start, unsigned long end) > +{ > + struct dyn_ftrace *rec; > + struct hlist_node *t, *n; > + struct hlist_head *head, temp_list; > + unsigned long flags; > + int i, cpu; > + > + preempt_disable_notrace(); > + > + /* disable incase we call something that calls mcount */ > + cpu = raw_smp_processor_id(); > + per_cpu(ftrace_shutdown_disable_cpu, cpu)++; > + > + ftrace_hash_lock(flags); > + > + for (i = 0; i < FTRACE_HASHSIZE; i++) { > + INIT_HLIST_HEAD(&temp_list); > + head = &ftrace_hash[i]; > + > + /* all CPUS are stopped, we are safe to modify code */ > + hlist_for_each_entry_safe(rec, t, n, head, node) { > + if (rec->flags & FTRACE_FL_FREE) > + continue; > + > + if ((rec->ip >= start) && (rec->ip < end)) > + ftrace_free_rec(rec); > + } > + } > + > + ftrace_hash_unlock(flags); > + > + per_cpu(ftrace_shutdown_disable_cpu, cpu)--; > + preempt_enable_notrace(); > + > +} > + > static int ftraced(void *ignore) > { > unsigned long usecs; > > -- > > ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-10-22 12:07 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2008-10-21 16:40 [PATCH 0/2] ftrace: clean ups and sanity checks Steven Rostedt 2008-10-21 16:40 ` [PATCH 1/2] ftrace: make dynamic ftrace more robust Steven Rostedt 2008-10-22 6:53 ` Ingo Molnar 2008-10-22 11:07 ` Steven Rostedt 2008-10-22 11:28 ` Steven Rostedt 2008-10-22 11:47 ` Ingo Molnar 2008-10-22 12:07 ` Steven Rostedt 2008-10-21 16:40 ` [PATCH 2/2] ftrace: release functions from hash Steven Rostedt 2008-10-21 18:27 ` Steven Rostedt
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).