LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC PATCH] powerpc: Investigate static_call concept
@ 2021-08-27  9:45 Christophe Leroy
  2021-08-27 14:18 ` Peter Zijlstra
  0 siblings, 1 reply; 3+ messages in thread
From: Christophe Leroy @ 2021-08-27  9:45 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev, Peter Zijlstra, Josh Poimboeuf,
	Jason Baron, Steven Rostedt, Ard Biesheuvel

This RFC is to validate the concept of static_call on powerpc.

Highly copied from x86.

It replaces ppc_md.get_irq() which is called at every IRQ, by
a static call.

When updating the call, we just replace the instruction at the
trampoline address by a relative jump to the function.

For the time being, the case of out-of-range functions is not handled.

Note that even if we don't immediately use static calls in powerpc
code, supporting static calls would immediately benefit to core
functionnalities using it, like tracing.

Tested on powerpc 8xx.

With the patch:

	00000000 <__SCT__ppc_md_get_irq>:
	   0:	4e 80 00 20 	blr			<== Replaced by 'b <ppc_md.get_irq>' at runtime
...
	00000038 <__do_irq>:
	  38:	94 21 ff f0 	stwu    r1,-16(r1)
	  3c:	7c 08 02 a6 	mflr    r0
	  40:	90 01 00 14 	stw     r0,20(r1)
	  44:	48 00 00 01 	bl      44 <__do_irq+0xc>
				44: R_PPC_REL24	__SCT__ppc_md_get_irq
...

Before the patch:

	00000038 <__do_irq>:
	  38:	3d 20 00 00 	lis     r9,0
				3a: R_PPC_ADDR16_HA	ppc_md+0x20
	  3c:	94 21 ff f0 	stwu    r1,-16(r1)
	  40:	81 29 00 00 	lwz     r9,0(r9)
				42: R_PPC_ADDR16_LO	ppc_md+0x20
	  44:	7c 08 02 a6 	mflr    r0
	  48:	90 01 00 14 	stw     r0,20(r1)
	  4c:	7d 29 03 a6 	mtctr   r9
	  50:	4e 80 04 21 	bctrl
...

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/Kconfig                   |  1 +
 arch/powerpc/include/asm/machdep.h     |  3 +++
 arch/powerpc/include/asm/static_call.h | 25 +++++++++++++++++++++++++
 arch/powerpc/kernel/Makefile           |  2 +-
 arch/powerpc/kernel/irq.c              |  2 +-
 arch/powerpc/kernel/setup-common.c     |  4 ++++
 arch/powerpc/kernel/static_call.c      | 16 ++++++++++++++++
 7 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/include/asm/static_call.h
 create mode 100644 arch/powerpc/kernel/static_call.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 36b72d972568..c3930ea63e59 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -247,6 +247,7 @@ config PPC
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
 	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
+	select HAVE_STATIC_CALL
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HUGETLB_PAGE_SIZE_VARIABLE	if PPC_BOOK3S_64 && HUGETLB_PAGE
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 764f2732a821..ac9712312b76 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
+#include <linux/static_call_types.h>
 
 #include <asm/setup.h>
 
@@ -295,5 +296,7 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal)
 #define machine_late_initcall(mach, fn)		__define_machine_initcall(mach, fn, 7)
 #define machine_late_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 7s)
 
+DECLARE_STATIC_CALL(ppc_md_get_irq, *ppc_md.get_irq);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MACHDEP_H */
diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
new file mode 100644
index 000000000000..335ee4ceaef9
--- /dev/null
+++ b/arch/powerpc/include/asm/static_call.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_STATIC_CALL_H
+#define _ASM_POWERPC_STATIC_CALL_H
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)			\
+	asm(".pushsection .text, \"ax\"				\n"	\
+	    ".align 4						\n"	\
+	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    "	b " #func "					\n"	\
+	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
+	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+	    ".popsection					\n")
+
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
+	asm(".pushsection .text, \"ax\"				\n"	\
+	    ".align 4						\n"	\
+	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    "	blr						\n"	\
+	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
+	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+	    ".popsection					\n")
+
+#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7be36c1e1db6..08877252dff8 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -47,7 +47,7 @@ obj-y				:= cputable.o syscalls.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
 				   of_platform.o prom_parse.o firmware.o \
 				   hw_breakpoint_constraints.o interrupt.o \
-				   kdebugfs.o
+				   kdebugfs.o static_call.o
 obj-y				+= ptrace/
 obj-$(CONFIG_PPC64)		+= setup_64.o \
 				   paca.o nvram_64.o note.o
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 551b653228c4..872f46e20754 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -736,7 +736,7 @@ void __do_irq(struct pt_regs *regs)
 	 *
 	 * This will typically lower the interrupt line to the CPU
 	 */
-	irq = ppc_md.get_irq();
+	irq = static_call(ppc_md_get_irq)();
 
 	/* We can hard enable interrupts now to allow perf interrupts */
 	may_hard_irq_enable();
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b1e43b69a559..57d06c163b7b 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,6 +33,7 @@
 #include <linux/of_platform.h>
 #include <linux/hugetlb.h>
 #include <linux/pgtable.h>
+#include <linux/static_call.h>
 #include <asm/io.h>
 #include <asm/paca.h>
 #include <asm/prom.h>
@@ -81,6 +82,8 @@ EXPORT_SYMBOL(ppc_md);
 struct machdep_calls *machine_id;
 EXPORT_SYMBOL(machine_id);
 
+DEFINE_STATIC_CALL_NULL(ppc_md_get_irq, *ppc_md.get_irq);
+
 int boot_cpuid = -1;
 EXPORT_SYMBOL_GPL(boot_cpuid);
 
@@ -613,6 +616,7 @@ void probe_machine(void)
 	     machine_id++) {
 		DBG("  %s ...", machine_id->name);
 		memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
+		static_call_update(ppc_md_get_irq, ppc_md.get_irq);
 		if (ppc_md.probe()) {
 			DBG(" match !\n");
 			break;
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
new file mode 100644
index 000000000000..a281f1759c39
--- /dev/null
+++ b/arch/powerpc/kernel/static_call.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/memory.h>
+#include <linux/static_call.h>
+
+#include <asm/code-patching.h>
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+	mutex_lock(&text_mutex);
+
+	if (tramp)
+		patch_branch(tramp, (unsigned long)func, 0);
+
+	mutex_unlock(&text_mutex);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
-- 
2.25.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH] powerpc: Investigate static_call concept
  2021-08-27  9:45 [RFC PATCH] powerpc: Investigate static_call concept Christophe Leroy
@ 2021-08-27 14:18 ` Peter Zijlstra
  2021-08-27 16:04   ` Segher Boessenkool
  0 siblings, 1 reply; 3+ messages in thread
From: Peter Zijlstra @ 2021-08-27 14:18 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linux-kernel, linuxppc-dev, Josh Poimboeuf, Jason Baron,
	Steven Rostedt, Ard Biesheuvel

On Fri, Aug 27, 2021 at 09:45:37AM +0000, Christophe Leroy wrote:
> This RFC is to validate the concept of static_call on powerpc.
> 
> Highly copied from x86.
> 
> It replaces ppc_md.get_irq() which is called at every IRQ, by
> a static call.

The code looks saner, but does it actually improve performance? I'm
thinking the double branch also isn't free.

> When updating the call, we just replace the instruction at the
> trampoline address by a relative jump to the function.
> 
> For the time being, the case of out-of-range functions is not handled.

The paranoid in me would've made it:

	BUG_ON(patch_branch(...));

just to make sure to notice the target not fitting. Ohh, patch_branch()
doesn't return the create_branch() error, perhaps that wants to be
fixed?

Did you see the arm64 variant that deals with out-of-range functions in
their trampoline?

  https://lore.kernel.org/linux-arm-kernel/20201120082103.4840-1-ardb@kernel.org/

Not exactly 'nice' but supposedly that works.

> +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
> +	asm(".pushsection .text, \"ax\"				\n"	\
> +	    ".align 4						\n"	\
> +	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
> +	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
> +	    "	blr						\n"	\
> +	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
> +	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> +	    ".popsection					\n")
> +

Since you support CALL_NULL_TRAMP, your patch function below:

> +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
> +{
> +	mutex_lock(&text_mutex);
> +
> +	if (tramp)
> +		patch_branch(tramp, (unsigned long)func, 0);
> +
> +	mutex_unlock(&text_mutex);
> +}
> +EXPORT_SYMBOL_GPL(arch_static_call_transform);

Ought to patch in "blr" when !func to be consistent :-)

I'm thinking that your core kernel text all fits in the native range and
only modules need out-of-range ?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH] powerpc: Investigate static_call concept
  2021-08-27 14:18 ` Peter Zijlstra
@ 2021-08-27 16:04   ` Segher Boessenkool
  0 siblings, 0 replies; 3+ messages in thread
From: Segher Boessenkool @ 2021-08-27 16:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Christophe Leroy, linux-kernel, Steven Rostedt, Jason Baron,
	Paul Mackerras, Josh Poimboeuf, linuxppc-dev, Ard Biesheuvel

On Fri, Aug 27, 2021 at 04:18:47PM +0200, Peter Zijlstra wrote:
> On Fri, Aug 27, 2021 at 09:45:37AM +0000, Christophe Leroy wrote:
> > This RFC is to validate the concept of static_call on powerpc.
> > 
> > Highly copied from x86.
> > 
> > It replaces ppc_md.get_irq() which is called at every IRQ, by
> > a static call.
> 
> The code looks saner, but does it actually improve performance? I'm
> thinking the double branch also isn't free.

It isn't, but it is very cheap, while the branch-to-count is not, even
*if* it is correctly predicted.

> The paranoid in me would've made it:
> 
> 	BUG_ON(patch_branch(...));
> 
> just to make sure to notice the target not fitting. Ohh, patch_branch()
> doesn't return the create_branch() error, perhaps that wants to be
> fixed?

Should that be allowed to fail ever?  I.e., should a failure be a fatal
error?  Sounds very fragile otherwise.


Segher

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-08-27 16:10 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-27  9:45 [RFC PATCH] powerpc: Investigate static_call concept Christophe Leroy
2021-08-27 14:18 ` Peter Zijlstra
2021-08-27 16:04   ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).