LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] ARM: Don't use complete() during __cpu_die
@ 2015-02-04 16:53 Krzysztof Kozlowski
  2015-02-04 17:57 ` Paul E. McKenney
  2015-02-04 22:42 ` Stephen Boyd
  0 siblings, 2 replies; 5+ messages in thread
From: Krzysztof Kozlowski @ 2015-02-04 16:53 UTC (permalink / raw)
  To: Russell King, linux-arm-kernel, linux-kernel
  Cc: paulmck, Fengguang Wu, Arnd Bergmann, Mark Rutland,
	Bartlomiej Zolnierkiewicz, Marek Szyprowski, Krzysztof Kozlowski

The complete() should not be used on offlined CPU. Rewrite the
wait-complete mechanism with wait_on_bit_timeout().

The CPU triggering hot unplug (e.g. CPU0) will loop until some bit is
cleared. In each iteration schedule_timeout() is used with initial sleep
time of 1 ms.  Later it is increased to 10 ms.

The dying CPU will clear the bit which is safe in that context.

This fixes following RCU warning on ARMv8 (Exynos 4412, Trats2) during
suspend to RAM:

[   31.113925] ===============================
[   31.113928] [ INFO: suspicious RCU usage. ]
[   31.113935] 3.19.0-rc7-next-20150203 #1914 Not tainted
[   31.113938] -------------------------------
[   31.113943] kernel/sched/fair.c:4740 suspicious rcu_dereference_check() usage!
[   31.113946]
[   31.113946] other info that might help us debug this:
[   31.113946]
[   31.113952]
[   31.113952] RCU used illegally from offline CPU!
[   31.113952] rcu_scheduler_active = 1, debug_locks = 0
[   31.113957] 3 locks held by swapper/1/0:
[   31.113988]  #0:  ((cpu_died).wait.lock){......}, at: [<c005a114>] complete+0x14/0x44
[   31.114012]  #1:  (&p->pi_lock){-.-.-.}, at: [<c004a790>] try_to_wake_up+0x28/0x300
[   31.114035]  #2:  (rcu_read_lock){......}, at: [<c004f1b8>] select_task_rq_fair+0x5c/0xa04
[   31.114038]
[   31.114038] stack backtrace:
[   31.114046] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.19.0-rc7-next-20150203 #1914
[   31.114050] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
[   31.114076] [<c0014ce4>] (unwind_backtrace) from [<c0011c30>] (show_stack+0x10/0x14)
[   31.114091] [<c0011c30>] (show_stack) from [<c04dc048>] (dump_stack+0x70/0xbc)
[   31.114105] [<c04dc048>] (dump_stack) from [<c004f83c>] (select_task_rq_fair+0x6e0/0xa04)
[   31.114118] [<c004f83c>] (select_task_rq_fair) from [<c004a83c>] (try_to_wake_up+0xd4/0x300)
[   31.114129] [<c004a83c>] (try_to_wake_up) from [<c00598a0>] (__wake_up_common+0x4c/0x80)
[   31.114140] [<c00598a0>] (__wake_up_common) from [<c00598e8>] (__wake_up_locked+0x14/0x1c)
[   31.114150] [<c00598e8>] (__wake_up_locked) from [<c005a134>] (complete+0x34/0x44)
[   31.114167] [<c005a134>] (complete) from [<c04d6ca4>] (cpu_die+0x24/0x84)
[   31.114179] [<c04d6ca4>] (cpu_die) from [<c005a508>] (cpu_startup_entry+0x328/0x358)
[   31.114189] [<c005a508>] (cpu_startup_entry) from [<40008784>] (0x40008784)
[   31.114226] CPU1: shutdown

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
---
 arch/arm/kernel/smp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 86ef244c5a24..bb8ff465975f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -26,6 +26,7 @@
 #include <linux/completion.h>
 #include <linux/cpufreq.h>
 #include <linux/irq_work.h>
+#include <linux/wait.h>
 
 #include <linux/atomic.h>
 #include <asm/smp.h>
@@ -76,6 +77,9 @@ enum ipi_msg_type {
 
 static DECLARE_COMPLETION(cpu_running);
 
+#define CPU_DIE_WAIT_BIT		0
+static unsigned long wait_cpu_die;
+
 static struct smp_operations smp_ops;
 
 void __init smp_set_ops(struct smp_operations *ops)
@@ -133,6 +137,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
 	}
 
+	set_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
+	smp_mb__after_atomic();
 
 	memset(&secondary_data, 0, sizeof(secondary_data));
 	return ret;
@@ -213,7 +219,40 @@ int __cpu_disable(void)
 	return 0;
 }
 
-static DECLARE_COMPLETION(cpu_died);
+/*
+ * Wait for 5000*1 ms for 'wait_cpu_die' bit to be cleared.
+ * Actually the real wait time will be longer because of schedule()
+ * called bit_wait_timeout.
+ *
+ * Returns 0 if bit was cleared (CPU died) or non-zero
+ * otherwise (1 or negative ERRNO).
+ */
+static int wait_for_cpu_die(void)
+{
+	int retries = 5000, sleep_ms = 1, ret = 0;
+
+	might_sleep();
+
+	smp_mb__before_atomic();
+	while (test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die)) {
+		ret = out_of_line_wait_on_bit_timeout(&wait_cpu_die,
+				CPU_DIE_WAIT_BIT, bit_wait_timeout,
+				TASK_UNINTERRUPTIBLE,
+				msecs_to_jiffies(sleep_ms));
+		if (!ret || (--retries <= 0))
+			break;
+
+		if (retries < 4000) {
+			/* After ~1000 ms increase sleeping time to 10 ms */
+			retries = 400;
+			sleep_ms = 10;
+		}
+
+		smp_mb__before_atomic(); /* For next test_bit() in loop */
+	}
+
+	return ret;
+}
 
 /*
  * called on the thread which is asking for a CPU to be shutdown -
@@ -221,7 +260,7 @@ static DECLARE_COMPLETION(cpu_died);
  */
 void __cpu_die(unsigned int cpu)
 {
-	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+	if (wait_for_cpu_die()) {
 		pr_err("CPU%u: cpu didn't die\n", cpu);
 		return;
 	}
@@ -236,6 +275,10 @@ void __cpu_die(unsigned int cpu)
 	 */
 	if (!platform_cpu_kill(cpu))
 		pr_err("CPU%u: unable to kill\n", cpu);
+
+	/* Prepare the bit for some next CPU die */
+	set_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
+	smp_mb__after_atomic();
 }
 
 /*
@@ -250,6 +293,8 @@ void __ref cpu_die(void)
 {
 	unsigned int cpu = smp_processor_id();
 
+	WARN_ON(!test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die));
+
 	idle_task_exit();
 
 	local_irq_disable();
@@ -267,7 +312,8 @@ void __ref cpu_die(void)
 	 * this returns, power and/or clocks can be removed at any point
 	 * from this CPU and its cache by platform_cpu_kill().
 	 */
-	complete(&cpu_died);
+	clear_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
+	smp_mb__after_atomic();
 
 	/*
 	 * Ensure that the cache lines associated with that completion are
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ARM: Don't use complete() during __cpu_die
  2015-02-04 16:53 [PATCH] ARM: Don't use complete() during __cpu_die Krzysztof Kozlowski
@ 2015-02-04 17:57 ` Paul E. McKenney
  2015-02-05  7:46   ` Krzysztof Kozlowski
  2015-02-04 22:42 ` Stephen Boyd
  1 sibling, 1 reply; 5+ messages in thread
From: Paul E. McKenney @ 2015-02-04 17:57 UTC (permalink / raw)
  To: Krzysztof Kozlowski
  Cc: Russell King, linux-arm-kernel, linux-kernel, Fengguang Wu,
	Arnd Bergmann, Mark Rutland, Bartlomiej Zolnierkiewicz,
	Marek Szyprowski

On Wed, Feb 04, 2015 at 05:53:55PM +0100, Krzysztof Kozlowski wrote:
> The complete() should not be used on offlined CPU. Rewrite the
> wait-complete mechanism with wait_on_bit_timeout().
> 
> The CPU triggering hot unplug (e.g. CPU0) will loop until some bit is
> cleared. In each iteration schedule_timeout() is used with initial sleep
> time of 1 ms.  Later it is increased to 10 ms.
> 
> The dying CPU will clear the bit which is safe in that context.
> 
> This fixes following RCU warning on ARMv8 (Exynos 4412, Trats2) during
> suspend to RAM:
> 
> [   31.113925] ===============================
> [   31.113928] [ INFO: suspicious RCU usage. ]
> [   31.113935] 3.19.0-rc7-next-20150203 #1914 Not tainted
> [   31.113938] -------------------------------
> [   31.113943] kernel/sched/fair.c:4740 suspicious rcu_dereference_check() usage!
> [   31.113946]
> [   31.113946] other info that might help us debug this:
> [   31.113946]
> [   31.113952]
> [   31.113952] RCU used illegally from offline CPU!
> [   31.113952] rcu_scheduler_active = 1, debug_locks = 0
> [   31.113957] 3 locks held by swapper/1/0:
> [   31.113988]  #0:  ((cpu_died).wait.lock){......}, at: [<c005a114>] complete+0x14/0x44
> [   31.114012]  #1:  (&p->pi_lock){-.-.-.}, at: [<c004a790>] try_to_wake_up+0x28/0x300
> [   31.114035]  #2:  (rcu_read_lock){......}, at: [<c004f1b8>] select_task_rq_fair+0x5c/0xa04
> [   31.114038]
> [   31.114038] stack backtrace:
> [   31.114046] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.19.0-rc7-next-20150203 #1914
> [   31.114050] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> [   31.114076] [<c0014ce4>] (unwind_backtrace) from [<c0011c30>] (show_stack+0x10/0x14)
> [   31.114091] [<c0011c30>] (show_stack) from [<c04dc048>] (dump_stack+0x70/0xbc)
> [   31.114105] [<c04dc048>] (dump_stack) from [<c004f83c>] (select_task_rq_fair+0x6e0/0xa04)
> [   31.114118] [<c004f83c>] (select_task_rq_fair) from [<c004a83c>] (try_to_wake_up+0xd4/0x300)
> [   31.114129] [<c004a83c>] (try_to_wake_up) from [<c00598a0>] (__wake_up_common+0x4c/0x80)
> [   31.114140] [<c00598a0>] (__wake_up_common) from [<c00598e8>] (__wake_up_locked+0x14/0x1c)
> [   31.114150] [<c00598e8>] (__wake_up_locked) from [<c005a134>] (complete+0x34/0x44)
> [   31.114167] [<c005a134>] (complete) from [<c04d6ca4>] (cpu_die+0x24/0x84)
> [   31.114179] [<c04d6ca4>] (cpu_die) from [<c005a508>] (cpu_startup_entry+0x328/0x358)
> [   31.114189] [<c005a508>] (cpu_startup_entry) from [<40008784>] (0x40008784)
> [   31.114226] CPU1: shutdown
> 
> Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

One suggestion below, but either way:

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

(If you would rather that I carried the patch, please let me know.)

> ---
>  arch/arm/kernel/smp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 49 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 86ef244c5a24..bb8ff465975f 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -26,6 +26,7 @@
>  #include <linux/completion.h>
>  #include <linux/cpufreq.h>
>  #include <linux/irq_work.h>
> +#include <linux/wait.h>
> 
>  #include <linux/atomic.h>
>  #include <asm/smp.h>
> @@ -76,6 +77,9 @@ enum ipi_msg_type {
> 
>  static DECLARE_COMPLETION(cpu_running);
> 
> +#define CPU_DIE_WAIT_BIT		0
> +static unsigned long wait_cpu_die;
> +
>  static struct smp_operations smp_ops;
> 
>  void __init smp_set_ops(struct smp_operations *ops)
> @@ -133,6 +137,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
>  	}
> 
> +	set_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> +	smp_mb__after_atomic();
> 
>  	memset(&secondary_data, 0, sizeof(secondary_data));
>  	return ret;
> @@ -213,7 +219,40 @@ int __cpu_disable(void)
>  	return 0;
>  }
> 
> -static DECLARE_COMPLETION(cpu_died);
> +/*
> + * Wait for 5000*1 ms for 'wait_cpu_die' bit to be cleared.
> + * Actually the real wait time will be longer because of schedule()
> + * called bit_wait_timeout.
> + *
> + * Returns 0 if bit was cleared (CPU died) or non-zero
> + * otherwise (1 or negative ERRNO).
> + */
> +static int wait_for_cpu_die(void)
> +{
> +	int retries = 5000, sleep_ms = 1, ret = 0;
> +
> +	might_sleep();
> +
> +	smp_mb__before_atomic();
> +	while (test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die)) {
> +		ret = out_of_line_wait_on_bit_timeout(&wait_cpu_die,
> +				CPU_DIE_WAIT_BIT, bit_wait_timeout,
> +				TASK_UNINTERRUPTIBLE,
> +				msecs_to_jiffies(sleep_ms));
> +		if (!ret || (--retries <= 0))
> +			break;
> +
> +		if (retries < 4000) {
> +			/* After ~1000 ms increase sleeping time to 10 ms */
> +			retries = 400;
> +			sleep_ms = 10;
> +		}

Another approach that gets roughly the same response times with fewer
wakeups (and a bit less code) would be something like this:

	int ms_left = 5000, sleep_ms = 1, ret = 0;

	might_sleep();

	smp_mb__before_atomic();
	while (test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die)) {
		ret = out_of_line_wait_on_bit_timeout(&wait_cpu_die,
				CPU_DIE_WAIT_BIT, bit_wait_timeout,
				TASK_UNINTERRUPTIBLE,
				msecs_to_jiffies(sleep_ms));
		ms_left -= sleep_ms;
		if (!ret || (ms_left <= 0))
			break;
		sleep_ms = DIV_ROUND_UP(sleep_ms * 11, 10);

This would result in less than 50 wakeups compared to more than 4000,
with little added latency in the common case.

But either way works for me.

> +
> +		smp_mb__before_atomic(); /* For next test_bit() in loop */
> +	}
> +
> +	return ret;
> +}
> 
>  /*
>   * called on the thread which is asking for a CPU to be shutdown -
> @@ -221,7 +260,7 @@ static DECLARE_COMPLETION(cpu_died);
>   */
>  void __cpu_die(unsigned int cpu)
>  {
> -	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
> +	if (wait_for_cpu_die()) {
>  		pr_err("CPU%u: cpu didn't die\n", cpu);
>  		return;
>  	}
> @@ -236,6 +275,10 @@ void __cpu_die(unsigned int cpu)
>  	 */
>  	if (!platform_cpu_kill(cpu))
>  		pr_err("CPU%u: unable to kill\n", cpu);
> +
> +	/* Prepare the bit for some next CPU die */
> +	set_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> +	smp_mb__after_atomic();
>  }
> 
>  /*
> @@ -250,6 +293,8 @@ void __ref cpu_die(void)
>  {
>  	unsigned int cpu = smp_processor_id();
> 
> +	WARN_ON(!test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die));
> +
>  	idle_task_exit();
> 
>  	local_irq_disable();
> @@ -267,7 +312,8 @@ void __ref cpu_die(void)
>  	 * this returns, power and/or clocks can be removed at any point
>  	 * from this CPU and its cache by platform_cpu_kill().
>  	 */
> -	complete(&cpu_died);
> +	clear_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> +	smp_mb__after_atomic();
> 
>  	/*
>  	 * Ensure that the cache lines associated with that completion are
> -- 
> 1.9.1
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ARM: Don't use complete() during __cpu_die
  2015-02-04 16:53 [PATCH] ARM: Don't use complete() during __cpu_die Krzysztof Kozlowski
  2015-02-04 17:57 ` Paul E. McKenney
@ 2015-02-04 22:42 ` Stephen Boyd
  2015-02-05  7:54   ` Krzysztof Kozlowski
  1 sibling, 1 reply; 5+ messages in thread
From: Stephen Boyd @ 2015-02-04 22:42 UTC (permalink / raw)
  To: Krzysztof Kozlowski, Russell King, linux-arm-kernel, linux-kernel
  Cc: Mark Rutland, paulmck, Arnd Bergmann, Bartlomiej Zolnierkiewicz,
	Fengguang Wu, Marek Szyprowski

On 02/04/15 08:53, Krzysztof Kozlowski wrote:
> The complete() should not be used on offlined CPU. Rewrite the
> wait-complete mechanism with wait_on_bit_timeout().
>
> The CPU triggering hot unplug (e.g. CPU0) will loop until some bit is
> cleared. In each iteration schedule_timeout() is used with initial sleep
> time of 1 ms.  Later it is increased to 10 ms.
>
> The dying CPU will clear the bit which is safe in that context.
>
> This fixes following RCU warning on ARMv8 (Exynos 4412, Trats2) during
> suspend to RAM:
>
> [   31.113925] ===============================
> [   31.113928] [ INFO: suspicious RCU usage. ]
> [   31.113935] 3.19.0-rc7-next-20150203 #1914 Not tainted
> [   31.113938] -------------------------------
> [   31.113943] kernel/sched/fair.c:4740 suspicious rcu_dereference_check() usage!
> [   31.113946]
> [   31.113946] other info that might help us debug this:
> [   31.113946]
> [   31.113952]
> [   31.113952] RCU used illegally from offline CPU!
> [   31.113952] rcu_scheduler_active = 1, debug_locks = 0
> [   31.113957] 3 locks held by swapper/1/0:
> [   31.113988]  #0:  ((cpu_died).wait.lock){......}, at: [<c005a114>] complete+0x14/0x44
> [   31.114012]  #1:  (&p->pi_lock){-.-.-.}, at: [<c004a790>] try_to_wake_up+0x28/0x300
> [   31.114035]  #2:  (rcu_read_lock){......}, at: [<c004f1b8>] select_task_rq_fair+0x5c/0xa04
> [   31.114038]
> [   31.114038] stack backtrace:
> [   31.114046] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.19.0-rc7-next-20150203 #1914
> [   31.114050] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> [   31.114076] [<c0014ce4>] (unwind_backtrace) from [<c0011c30>] (show_stack+0x10/0x14)
> [   31.114091] [<c0011c30>] (show_stack) from [<c04dc048>] (dump_stack+0x70/0xbc)
> [   31.114105] [<c04dc048>] (dump_stack) from [<c004f83c>] (select_task_rq_fair+0x6e0/0xa04)
> [   31.114118] [<c004f83c>] (select_task_rq_fair) from [<c004a83c>] (try_to_wake_up+0xd4/0x300)
> [   31.114129] [<c004a83c>] (try_to_wake_up) from [<c00598a0>] (__wake_up_common+0x4c/0x80)
> [   31.114140] [<c00598a0>] (__wake_up_common) from [<c00598e8>] (__wake_up_locked+0x14/0x1c)
> [   31.114150] [<c00598e8>] (__wake_up_locked) from [<c005a134>] (complete+0x34/0x44)
> [   31.114167] [<c005a134>] (complete) from [<c04d6ca4>] (cpu_die+0x24/0x84)
> [   31.114179] [<c04d6ca4>] (cpu_die) from [<c005a508>] (cpu_startup_entry+0x328/0x358)
> [   31.114189] [<c005a508>] (cpu_startup_entry) from [<40008784>] (0x40008784)
> [   31.114226] CPU1: shutdown
>
> Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
> ---

Would it be better to use IPIs instead? The IPI handler could even call
complete() as long as we IPI off the dying CPU to the killing CPU. I
suppose this could be an extension of the current IPI_COMPLETION that we
already have.

>  arch/arm/kernel/smp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 49 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 86ef244c5a24..bb8ff465975f 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -213,7 +219,40 @@ int __cpu_disable(void)
>  	return 0;
>  }
>  
> -static DECLARE_COMPLETION(cpu_died);
> +/*
> + * Wait for 5000*1 ms for 'wait_cpu_die' bit to be cleared.
> + * Actually the real wait time will be longer because of schedule()
> + * called bit_wait_timeout.
> + *
> + * Returns 0 if bit was cleared (CPU died) or non-zero
> + * otherwise (1 or negative ERRNO).
> + */
> +static int wait_for_cpu_die(void)
> +{
> +	int retries = 5000, sleep_ms = 1, ret = 0;
> +
> +	might_sleep();
> +
> +	smp_mb__before_atomic();
> +	while (test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die)) {
> +		ret = out_of_line_wait_on_bit_timeout(&wait_cpu_die,
> +				CPU_DIE_WAIT_BIT, bit_wait_timeout,
> +				TASK_UNINTERRUPTIBLE,
> +				msecs_to_jiffies(sleep_ms));
> +		if (!ret || (--retries <= 0))
> +			break;
> +
> +		if (retries < 4000) {
> +			/* After ~1000 ms increase sleeping time to 10 ms */
> +			retries = 400;
> +			sleep_ms = 10;
> +		}
> +
> +		smp_mb__before_atomic(); /* For next test_bit() in loop */
> +	}
> +
> +	return ret;
> +}
>  

Is there any reason we test the bit before testing it again in
out_of_line_wait_on_bit_timeout()? Why can't we just call that function
in a loop and let it handle checking the bit?

> @@ -267,7 +312,8 @@ void __ref cpu_die(void)
>  	 * this returns, power and/or clocks can be removed at any point
>  	 * from this CPU and its cache by platform_cpu_kill().
>  	 */
> -	complete(&cpu_died);
> +	clear_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> +	smp_mb__after_atomic();
>  
>  	/*
>  	 * Ensure that the cache lines associated with that completion are

This comment here should be updated because the completion is gone.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ARM: Don't use complete() during __cpu_die
  2015-02-04 17:57 ` Paul E. McKenney
@ 2015-02-05  7:46   ` Krzysztof Kozlowski
  0 siblings, 0 replies; 5+ messages in thread
From: Krzysztof Kozlowski @ 2015-02-05  7:46 UTC (permalink / raw)
  To: paulmck
  Cc: Russell King, linux-arm-kernel, linux-kernel, Fengguang Wu,
	Arnd Bergmann, Mark Rutland, Bartlomiej Zolnierkiewicz,
	Marek Szyprowski

On śro, 2015-02-04 at 09:57 -0800, Paul E. McKenney wrote:
> On Wed, Feb 04, 2015 at 05:53:55PM +0100, Krzysztof Kozlowski wrote:
> > The complete() should not be used on offlined CPU. Rewrite the
> > wait-complete mechanism with wait_on_bit_timeout().
> > 
> > The CPU triggering hot unplug (e.g. CPU0) will loop until some bit is
> > cleared. In each iteration schedule_timeout() is used with initial sleep
> > time of 1 ms.  Later it is increased to 10 ms.
> > 
> > The dying CPU will clear the bit which is safe in that context.
> > 
> > This fixes following RCU warning on ARMv8 (Exynos 4412, Trats2) during
> > suspend to RAM:
> > 
> > [   31.113925] ===============================
> > [   31.113928] [ INFO: suspicious RCU usage. ]
> > [   31.113935] 3.19.0-rc7-next-20150203 #1914 Not tainted
> > [   31.113938] -------------------------------
> > [   31.113943] kernel/sched/fair.c:4740 suspicious rcu_dereference_check() usage!
> > [   31.113946]
> > [   31.113946] other info that might help us debug this:
> > [   31.113946]
> > [   31.113952]
> > [   31.113952] RCU used illegally from offline CPU!
> > [   31.113952] rcu_scheduler_active = 1, debug_locks = 0
> > [   31.113957] 3 locks held by swapper/1/0:
> > [   31.113988]  #0:  ((cpu_died).wait.lock){......}, at: [<c005a114>] complete+0x14/0x44
> > [   31.114012]  #1:  (&p->pi_lock){-.-.-.}, at: [<c004a790>] try_to_wake_up+0x28/0x300
> > [   31.114035]  #2:  (rcu_read_lock){......}, at: [<c004f1b8>] select_task_rq_fair+0x5c/0xa04
> > [   31.114038]
> > [   31.114038] stack backtrace:
> > [   31.114046] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.19.0-rc7-next-20150203 #1914
> > [   31.114050] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> > [   31.114076] [<c0014ce4>] (unwind_backtrace) from [<c0011c30>] (show_stack+0x10/0x14)
> > [   31.114091] [<c0011c30>] (show_stack) from [<c04dc048>] (dump_stack+0x70/0xbc)
> > [   31.114105] [<c04dc048>] (dump_stack) from [<c004f83c>] (select_task_rq_fair+0x6e0/0xa04)
> > [   31.114118] [<c004f83c>] (select_task_rq_fair) from [<c004a83c>] (try_to_wake_up+0xd4/0x300)
> > [   31.114129] [<c004a83c>] (try_to_wake_up) from [<c00598a0>] (__wake_up_common+0x4c/0x80)
> > [   31.114140] [<c00598a0>] (__wake_up_common) from [<c00598e8>] (__wake_up_locked+0x14/0x1c)
> > [   31.114150] [<c00598e8>] (__wake_up_locked) from [<c005a134>] (complete+0x34/0x44)
> > [   31.114167] [<c005a134>] (complete) from [<c04d6ca4>] (cpu_die+0x24/0x84)
> > [   31.114179] [<c04d6ca4>] (cpu_die) from [<c005a508>] (cpu_startup_entry+0x328/0x358)
> > [   31.114189] [<c005a508>] (cpu_startup_entry) from [<40008784>] (0x40008784)
> > [   31.114226] CPU1: shutdown
> > 
> > Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
> 
> One suggestion below, but either way:
> 
> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Thanks!

> (If you would rather that I carried the patch, please let me know.)

I'll send the patch through Russell's patch system.
> 
> > ---
> >  arch/arm/kernel/smp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 49 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> > index 86ef244c5a24..bb8ff465975f 100644
> > --- a/arch/arm/kernel/smp.c
> > +++ b/arch/arm/kernel/smp.c
> > @@ -26,6 +26,7 @@
> >  #include <linux/completion.h>
> >  #include <linux/cpufreq.h>
> >  #include <linux/irq_work.h>
> > +#include <linux/wait.h>
> > 
> >  #include <linux/atomic.h>
> >  #include <asm/smp.h>
> > @@ -76,6 +77,9 @@ enum ipi_msg_type {
> > 
> >  static DECLARE_COMPLETION(cpu_running);
> > 
> > +#define CPU_DIE_WAIT_BIT		0
> > +static unsigned long wait_cpu_die;
> > +
> >  static struct smp_operations smp_ops;
> > 
> >  void __init smp_set_ops(struct smp_operations *ops)
> > @@ -133,6 +137,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
> >  		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
> >  	}
> > 
> > +	set_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> > +	smp_mb__after_atomic();
> > 
> >  	memset(&secondary_data, 0, sizeof(secondary_data));
> >  	return ret;
> > @@ -213,7 +219,40 @@ int __cpu_disable(void)
> >  	return 0;
> >  }
> > 
> > -static DECLARE_COMPLETION(cpu_died);
> > +/*
> > + * Wait for 5000*1 ms for 'wait_cpu_die' bit to be cleared.
> > + * Actually the real wait time will be longer because of schedule()
> > + * called bit_wait_timeout.
> > + *
> > + * Returns 0 if bit was cleared (CPU died) or non-zero
> > + * otherwise (1 or negative ERRNO).
> > + */
> > +static int wait_for_cpu_die(void)
> > +{
> > +	int retries = 5000, sleep_ms = 1, ret = 0;
> > +
> > +	might_sleep();
> > +
> > +	smp_mb__before_atomic();
> > +	while (test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die)) {
> > +		ret = out_of_line_wait_on_bit_timeout(&wait_cpu_die,
> > +				CPU_DIE_WAIT_BIT, bit_wait_timeout,
> > +				TASK_UNINTERRUPTIBLE,
> > +				msecs_to_jiffies(sleep_ms));
> > +		if (!ret || (--retries <= 0))
> > +			break;
> > +
> > +		if (retries < 4000) {
> > +			/* After ~1000 ms increase sleeping time to 10 ms */
> > +			retries = 400;
> > +			sleep_ms = 10;
> > +		}
> 
> Another approach that gets roughly the same response times with fewer
> wakeups (and a bit less code) would be something like this:
> 
> 	int ms_left = 5000, sleep_ms = 1, ret = 0;
> 
> 	might_sleep();
> 
> 	smp_mb__before_atomic();
> 	while (test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die)) {
> 		ret = out_of_line_wait_on_bit_timeout(&wait_cpu_die,
> 				CPU_DIE_WAIT_BIT, bit_wait_timeout,
> 				TASK_UNINTERRUPTIBLE,
> 				msecs_to_jiffies(sleep_ms));
> 		ms_left -= sleep_ms;
> 		if (!ret || (ms_left <= 0))
> 			break;
> 		sleep_ms = DIV_ROUND_UP(sleep_ms * 11, 10);
> 
> This would result in less than 50 wakeups compared to more than 4000,
> with little added latency in the common case.
> 
> But either way works for me.

It looks better. I'll use it although Stephen Boyd had an other idea
which could be smaller and more schedule-friendly.

Thanks for review,
Krzysztof

> 
> > +
> > +		smp_mb__before_atomic(); /* For next test_bit() in loop */
> > +	}
> > +
> > +	return ret;
> > +}
> > 
> >  /*
> >   * called on the thread which is asking for a CPU to be shutdown -
> > @@ -221,7 +260,7 @@ static DECLARE_COMPLETION(cpu_died);
> >   */
> >  void __cpu_die(unsigned int cpu)
> >  {
> > -	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
> > +	if (wait_for_cpu_die()) {
> >  		pr_err("CPU%u: cpu didn't die\n", cpu);
> >  		return;
> >  	}
> > @@ -236,6 +275,10 @@ void __cpu_die(unsigned int cpu)
> >  	 */
> >  	if (!platform_cpu_kill(cpu))
> >  		pr_err("CPU%u: unable to kill\n", cpu);
> > +
> > +	/* Prepare the bit for some next CPU die */
> > +	set_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> > +	smp_mb__after_atomic();
> >  }
> > 
> >  /*
> > @@ -250,6 +293,8 @@ void __ref cpu_die(void)
> >  {
> >  	unsigned int cpu = smp_processor_id();
> > 
> > +	WARN_ON(!test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die));
> > +
> >  	idle_task_exit();
> > 
> >  	local_irq_disable();
> > @@ -267,7 +312,8 @@ void __ref cpu_die(void)
> >  	 * this returns, power and/or clocks can be removed at any point
> >  	 * from this CPU and its cache by platform_cpu_kill().
> >  	 */
> > -	complete(&cpu_died);
> > +	clear_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> > +	smp_mb__after_atomic();
> > 
> >  	/*
> >  	 * Ensure that the cache lines associated with that completion are
> > -- 
> > 1.9.1
> > 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ARM: Don't use complete() during __cpu_die
  2015-02-04 22:42 ` Stephen Boyd
@ 2015-02-05  7:54   ` Krzysztof Kozlowski
  0 siblings, 0 replies; 5+ messages in thread
From: Krzysztof Kozlowski @ 2015-02-05  7:54 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Russell King, linux-arm-kernel, linux-kernel, Mark Rutland,
	paulmck, Arnd Bergmann, Bartlomiej Zolnierkiewicz, Fengguang Wu,
	Marek Szyprowski

On śro, 2015-02-04 at 14:42 -0800, Stephen Boyd wrote:
> On 02/04/15 08:53, Krzysztof Kozlowski wrote:
> > The complete() should not be used on offlined CPU. Rewrite the
> > wait-complete mechanism with wait_on_bit_timeout().
> >
> > The CPU triggering hot unplug (e.g. CPU0) will loop until some bit is
> > cleared. In each iteration schedule_timeout() is used with initial sleep
> > time of 1 ms.  Later it is increased to 10 ms.
> >
> > The dying CPU will clear the bit which is safe in that context.
> >
> > This fixes following RCU warning on ARMv8 (Exynos 4412, Trats2) during
> > suspend to RAM:
> >
> > [   31.113925] ===============================
> > [   31.113928] [ INFO: suspicious RCU usage. ]
> > [   31.113935] 3.19.0-rc7-next-20150203 #1914 Not tainted
> > [   31.113938] -------------------------------
> > [   31.113943] kernel/sched/fair.c:4740 suspicious rcu_dereference_check() usage!
> > [   31.113946]
> > [   31.113946] other info that might help us debug this:
> > [   31.113946]
> > [   31.113952]
> > [   31.113952] RCU used illegally from offline CPU!
> > [   31.113952] rcu_scheduler_active = 1, debug_locks = 0
> > [   31.113957] 3 locks held by swapper/1/0:
> > [   31.113988]  #0:  ((cpu_died).wait.lock){......}, at: [<c005a114>] complete+0x14/0x44
> > [   31.114012]  #1:  (&p->pi_lock){-.-.-.}, at: [<c004a790>] try_to_wake_up+0x28/0x300
> > [   31.114035]  #2:  (rcu_read_lock){......}, at: [<c004f1b8>] select_task_rq_fair+0x5c/0xa04
> > [   31.114038]
> > [   31.114038] stack backtrace:
> > [   31.114046] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.19.0-rc7-next-20150203 #1914
> > [   31.114050] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> > [   31.114076] [<c0014ce4>] (unwind_backtrace) from [<c0011c30>] (show_stack+0x10/0x14)
> > [   31.114091] [<c0011c30>] (show_stack) from [<c04dc048>] (dump_stack+0x70/0xbc)
> > [   31.114105] [<c04dc048>] (dump_stack) from [<c004f83c>] (select_task_rq_fair+0x6e0/0xa04)
> > [   31.114118] [<c004f83c>] (select_task_rq_fair) from [<c004a83c>] (try_to_wake_up+0xd4/0x300)
> > [   31.114129] [<c004a83c>] (try_to_wake_up) from [<c00598a0>] (__wake_up_common+0x4c/0x80)
> > [   31.114140] [<c00598a0>] (__wake_up_common) from [<c00598e8>] (__wake_up_locked+0x14/0x1c)
> > [   31.114150] [<c00598e8>] (__wake_up_locked) from [<c005a134>] (complete+0x34/0x44)
> > [   31.114167] [<c005a134>] (complete) from [<c04d6ca4>] (cpu_die+0x24/0x84)
> > [   31.114179] [<c04d6ca4>] (cpu_die) from [<c005a508>] (cpu_startup_entry+0x328/0x358)
> > [   31.114189] [<c005a508>] (cpu_startup_entry) from [<40008784>] (0x40008784)
> > [   31.114226] CPU1: shutdown
> >
> > Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
> > ---
> 
> Would it be better to use IPIs instead? The IPI handler could even call
> complete() as long as we IPI off the dying CPU to the killing CPU. I
> suppose this could be an extension of the current IPI_COMPLETION that we
> already have.

That sounds good. I wonder where register_ipi_completion() should be
put. This must execute before cpu_die() of dying CPU (if I get it
right). So it cannot be __cpu_die(). One candidate is __cpu_disable()
but it may not be called always. Any ideas?


> 
> >  arch/arm/kernel/smp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 49 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> > index 86ef244c5a24..bb8ff465975f 100644
> > --- a/arch/arm/kernel/smp.c
> > +++ b/arch/arm/kernel/smp.c
> > @@ -213,7 +219,40 @@ int __cpu_disable(void)
> >  	return 0;
> >  }
> >  
> > -static DECLARE_COMPLETION(cpu_died);
> > +/*
> > + * Wait for 5000*1 ms for 'wait_cpu_die' bit to be cleared.
> > + * Actually the real wait time will be longer because of schedule()
> > + * called bit_wait_timeout.
> > + *
> > + * Returns 0 if bit was cleared (CPU died) or non-zero
> > + * otherwise (1 or negative ERRNO).
> > + */
> > +static int wait_for_cpu_die(void)
> > +{
> > +	int retries = 5000, sleep_ms = 1, ret = 0;
> > +
> > +	might_sleep();
> > +
> > +	smp_mb__before_atomic();
> > +	while (test_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die)) {
> > +		ret = out_of_line_wait_on_bit_timeout(&wait_cpu_die,
> > +				CPU_DIE_WAIT_BIT, bit_wait_timeout,
> > +				TASK_UNINTERRUPTIBLE,
> > +				msecs_to_jiffies(sleep_ms));
> > +		if (!ret || (--retries <= 0))
> > +			break;
> > +
> > +		if (retries < 4000) {
> > +			/* After ~1000 ms increase sleeping time to 10 ms */
> > +			retries = 400;
> > +			sleep_ms = 10;
> > +		}
> > +
> > +		smp_mb__before_atomic(); /* For next test_bit() in loop */
> > +	}
> > +
> > +	return ret;
> > +}
> >  
> 
> Is there any reason we test the bit before testing it again in
> out_of_line_wait_on_bit_timeout()? Why can't we just call that function
> in a loop and let it handle checking the bit?

I think there is no strong reason. Maybe except that if bit was cleared
already then we skip the wait preparation steps. I followed the
convention of wait_on_bit in include/linux/wait.h.

I'll remove the test.

> 
> > @@ -267,7 +312,8 @@ void __ref cpu_die(void)
> >  	 * this returns, power and/or clocks can be removed at any point
> >  	 * from this CPU and its cache by platform_cpu_kill().
> >  	 */
> > -	complete(&cpu_died);
> > +	clear_bit(CPU_DIE_WAIT_BIT, &wait_cpu_die);
> > +	smp_mb__after_atomic();
> >  
> >  	/*
> >  	 * Ensure that the cache lines associated with that completion are
> 
> This comment here should be updated because the completion is gone.

Yes, indeed. I'll update it.

Thanks for feedback,
Krzysztof



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-02-05  7:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-04 16:53 [PATCH] ARM: Don't use complete() during __cpu_die Krzysztof Kozlowski
2015-02-04 17:57 ` Paul E. McKenney
2015-02-05  7:46   ` Krzysztof Kozlowski
2015-02-04 22:42 ` Stephen Boyd
2015-02-05  7:54   ` Krzysztof Kozlowski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).