LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Daniel Bristot de Oliveira <bristot@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: williams@redhat.com, daniel@bristot.me,
	"Steven Rostedt (VMware)" <rostedt@goodmis.org>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Matthias Kaehlcke <mka@chromium.org>,
	"Joel Fernandes (Google)" <joel@joelfernandes.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Yangtao Li <tiny.windzz@gmail.com>,
	Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>
Subject: [RFC 2/3] preempt_tracer: Disable IRQ while starting/stopping due to a preempt_counter change
Date: Tue, 28 May 2019 17:16:23 +0200	[thread overview]
Message-ID: <f2ca7336162b6dc45f413cfe4e0056e6aa32e7ed.1559051152.git.bristot@redhat.com> (raw)
In-Reply-To: <cover.1559051152.git.bristot@redhat.com>

The preempt_disable/enable tracepoint only traces in the disable <-> enable
case, which is correct. But think about this case:

---------------------------- %< ------------------------------
	THREAD					IRQ
	   |					 |
preempt_disable() {
    __preempt_count_add(1)
	------->	    smp_apic_timer_interrupt() {
				preempt_disable()
				    do not trace (preempt count >= 1)
				    ....
				preempt_enable()
				    do not trace (preempt count >= 1)
			    }
    trace_preempt_disable();
}
---------------------------- >% ------------------------------

The tracepoint will be skipped.

It is possible to observe this problem using this kernel module:

	http://bristot.me/files/efficient_verification/wip.tar.gz [*]

and doing the following trace:

	# cd /sys/kernel/debug/tracing/
	# echo 1 > snapshot
	# cat available_events | grep preempt_ > set_event
	# echo irq_vectors >> /sys/kernel/debug/tracing/set_event
	# insmod wip.ko
	/* wait for a snapshot creation */
	# tail -100 snapshot
		[...]
	    tail-5572  [001] ....1..  2888.401184: preempt_enable: caller=_raw_spin_unlock_irqrestore+0x2a/0x70 parent=          (null)
	    tail-5572  [001] ....1..  2888.401184: preempt_disable: caller=migrate_disable+0x8b/0x1e0 parent=migrate_disable+0x8b/0x1e0
	    tail-5572  [001] ....111  2888.401184: preempt_enable: caller=migrate_disable+0x12f/0x1e0 parent=migrate_disable+0x12f/0x1e0
	    tail-5572  [001] d..h212  2888.401189: local_timer_entry: vector=236
	    tail-5572  [001] dN.h512  2888.401192: process_event: event sched_waking not expected in the state preemptive
	    tail-5572  [001] dN.h512  2888.401200: <stack trace>
	 => process_event
	 => __handle_event
	 => ttwu_do_wakeup
	 => try_to_wake_up
	 => invoke_rcu_core
	 => rcu_check_callbacks
	 => update_process_times
	 => tick_sched_handle
	 => tick_sched_timer
	 => __hrtimer_run_queues
	 => hrtimer_interrupt
	 => smp_apic_timer_interrupt
	 => apic_timer_interrupt
	 => trace_event_raw_event_preemptirq_template
	 => trace_preempt_off
	 => get_page_from_freelist
	 => __alloc_pages_nodemask
	 => __handle_mm_fault
	 => handle_mm_fault
	 => __do_page_fault
	 => do_page_fault
	 => async_page_fault

To avoid skipping the trace, the change in the counter should be "atomic"
with the start/stop, w.r.t the interrupts.

Disable interrupts while the adding/starting stopping/subtracting.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Yangtao Li <tiny.windzz@gmail.com>
Cc: Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>
Cc: linux-kernel@vger.kernel.org
---
[*] with some luck we will talk about this at the Plumbers.

 kernel/sched/core.c | 48 +++++++++++++++++++++++++++++++++------------
 kernel/softirq.c    |  2 +-
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8c0b414e45dc..be4117d7384f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3163,6 +3163,16 @@ void preempt_latency_start(int val)
 	}
 }
 
+static inline void preempt_add_start_latency(int val)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	__preempt_count_add(val);
+	preempt_latency_start(val);
+	raw_local_irq_restore(flags);
+}
+
 void preempt_count_add(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
@@ -3172,7 +3182,7 @@ void preempt_count_add(int val)
 	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
 		return;
 #endif
-	__preempt_count_add(val);
+	preempt_add_start_latency(val);
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Spinlock count overflowing soon?
@@ -3180,7 +3190,6 @@ void preempt_count_add(int val)
 	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
 				PREEMPT_MASK - 10);
 #endif
-	preempt_latency_start(val);
 }
 EXPORT_SYMBOL(preempt_count_add);
 NOKPROBE_SYMBOL(preempt_count_add);
@@ -3195,6 +3204,16 @@ void preempt_latency_stop(int val)
 		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
 }
 
+static inline void preempt_sub_stop_latency(int val)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	preempt_latency_stop(val);
+	__preempt_count_sub(val);
+	raw_local_irq_restore(flags);
+}
+
 void preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
@@ -3211,8 +3230,7 @@ void preempt_count_sub(int val)
 		return;
 #endif
 
-	preempt_latency_stop(val);
-	__preempt_count_sub(val);
+	preempt_sub_stop_latency(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
 NOKPROBE_SYMBOL(preempt_count_sub);
@@ -3220,6 +3238,16 @@ NOKPROBE_SYMBOL(preempt_count_sub);
 #else
 static inline void preempt_latency_start(int val) { }
 static inline void preempt_latency_stop(int val) { }
+
+static inline void preempt_sub_stop_latency(int val)
+{
+	__preempt_count_sub(val);
+}
+
+static inline void preempt_add_start_latency(int val)
+{
+	__preempt_count_add(val);
+}
 #endif
 
 static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
@@ -3585,11 +3613,9 @@ static void __sched notrace preempt_schedule_common(void)
 		 * traced. The other to still record the preemption latency,
 		 * which can also be traced by the function tracer.
 		 */
-		preempt_disable_notrace();
-		preempt_latency_start(1);
+		preempt_add_start_latency(1);
 		__schedule(true);
-		preempt_latency_stop(1);
-		preempt_enable_no_resched_notrace();
+		preempt_sub_stop_latency(1);
 
 		/*
 		 * Check again in case we missed a preemption opportunity
@@ -3653,8 +3679,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 		 * traced. The other to still record the preemption latency,
 		 * which can also be traced by the function tracer.
 		 */
-		preempt_disable_notrace();
-		preempt_latency_start(1);
+		preempt_add_start_latency(1);
 		/*
 		 * Needs preempt disabled in case user_exit() is traced
 		 * and the tracer calls preempt_enable_notrace() causing
@@ -3664,8 +3689,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 		__schedule(true);
 		exception_exit(prev_ctx);
 
-		preempt_latency_stop(1);
-		preempt_enable_no_resched_notrace();
+		preempt_sub_stop_latency(1);
 	} while (need_resched());
 }
 EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c9ad89c3dfed..9c64522ecc76 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -130,9 +130,9 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 	 */
 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
 		trace_softirqs_off(ip);
-	raw_local_irq_restore(flags);
 
 	preempt_latency_start(cnt);
+	raw_local_irq_restore(flags);
 
 }
 EXPORT_SYMBOL(__local_bh_disable_ip);
-- 
2.20.1


  parent reply	other threads:[~2019-05-28 15:17 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-28 15:16 [RFC 0/3] preempt_tracer: Fix preempt_disable tracepoint Daniel Bristot de Oliveira
2019-05-28 15:16 ` [RFC 1/3] softirq: Use preempt_latency_stop/start to trace preemption Daniel Bristot de Oliveira
2019-05-29  8:29   ` Peter Zijlstra
2019-05-29  9:30   ` Joel Fernandes
2019-05-29 12:22     ` Steven Rostedt
2019-06-04 10:39       ` Daniel Bristot de Oliveira
2019-06-04 12:01         ` Steven Rostedt
2019-05-28 15:16 ` Daniel Bristot de Oliveira [this message]
2019-05-29  8:33   ` [RFC 2/3] preempt_tracer: Disable IRQ while starting/stopping due to a preempt_counter change Peter Zijlstra
2019-05-29  9:40     ` Daniel Bristot de Oliveira
2019-05-29 10:20       ` Peter Zijlstra
2019-05-29 12:39         ` Steven Rostedt
2019-05-29 13:19           ` Peter Zijlstra
2019-05-29 13:29             ` Peter Zijlstra
2019-05-29 13:42             ` Steven Rostedt
2019-05-29 13:49               ` Peter Zijlstra
2019-05-29 13:58                 ` Steven Rostedt
2019-05-29 13:51         ` Daniel Bristot de Oliveira
2019-05-29 18:21           ` Peter Zijlstra
2019-06-04 10:20             ` Daniel Bristot de Oliveira
2019-05-31  7:47       ` Joel Fernandes
2019-06-04 10:12         ` Daniel Bristot de Oliveira
2019-06-04 15:01           ` Steven Rostedt
2019-06-05 15:16             ` Joel Fernandes
2019-05-28 15:16 ` [RFC 3/3] preempt_tracer: Use a percpu variable to control traceble calls Daniel Bristot de Oliveira
2019-05-29  8:41   ` Peter Zijlstra
2019-05-29  9:48     ` Daniel Bristot de Oliveira

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f2ca7336162b6dc45f413cfe4e0056e6aa32e7ed.1559051152.git.bristot@redhat.com \
    --to=bristot@redhat.com \
    --cc=daniel@bristot.me \
    --cc=frederic@kernel.org \
    --cc=joel@joelfernandes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=mka@chromium.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=tiny.windzz@gmail.com \
    --cc=tommaso.cucinotta@santannapisa.it \
    --cc=williams@redhat.com \
    --subject='Re: [RFC 2/3] preempt_tracer: Disable IRQ while starting/stopping due to a preempt_counter change' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).