LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Lai Jiangshan <jiangshanlai@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Lai Jiangshan <laijs@linux.alibaba.com>,
	Andy Lutomirski <luto@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	Joerg Roedel <jroedel@suse.de>,
	Youquan Song <youquan.song@intel.com>,
	Tony Luck <tony.luck@intel.com>
Subject: [PATCH 22/24] x86/entry: Implement and use do_paranoid_entry() and paranoid_exit()
Date: Wed,  1 Sep 2021 01:50:23 +0800	[thread overview]
Message-ID: <20210831175025.27570-23-jiangshanlai@gmail.com> (raw)
In-Reply-To: <20210831175025.27570-1-jiangshanlai@gmail.com>

From: Lai Jiangshan <laijs@linux.alibaba.com>

All the facilities are set in traps.c, so we can implement the major body
of paranoid_entry() in C as do_paranoid_entry() and the whole
paranoid_exit() in C.

paranoid_entry() needs to save two values which are added into the
struct ist_regs.  And paranoid_exit() use them after the interrupt
is handled.

No functional change intended.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/entry/entry_64.S    | 128 +++--------------------------------
 arch/x86/entry/traps.c       |  62 +++++++++++++++++
 arch/x86/include/asm/traps.h |  22 ++++++
 3 files changed, 92 insertions(+), 120 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ae10ca351f4..8b2e19e6c9e1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -437,6 +437,7 @@ SYM_CODE_START(\asmsym)
 
 	call	\cfunc
 
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -516,6 +517,7 @@ SYM_CODE_START(\asmsym)
 	 * identical to the stack in the IRET frame or the VC fall-back stack,
 	 * so it is definitely mapped even with PTI enabled.
 	 */
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -548,6 +550,7 @@ SYM_CODE_START(\asmsym)
 	movq	$-1, ORIG_RAX(%rdi)	/* no syscall to restart */
 	call	\cfunc
 
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -840,14 +843,8 @@ SYM_CODE_END(xen_failsafe_callback)
 #endif /* CONFIG_XEN_PV */
 
 /*
- * Save all registers in pt_regs. Return GSBASE related information
- * in EBX depending on the availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        GSBASE value at entry, must be restored in paranoid_exit
+ * Save all registers and addtional info in ist_regs.
+ * Switch CR3 and gsbase if needed.
  */
 SYM_CODE_START_LOCAL(paranoid_entry)
 	UNWIND_HINT_FUNC
@@ -856,124 +853,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 	movq	RDI(%rsp), %rsi	/* temporarily store the return address in %rsi */
 	movq	%rdi, RDI(%rsp) /* put %rdi onto pt_regs */
 	subq	$IST_pt_regs, %rsp /* reserve room for ist_regs */
+	movq	%rsp, %rdi	/* ist_regs pointer */
 	pushq	%rsi		/* put the return address onto the stack */
 	ENCODE_FRAME_POINTER 8+IST_pt_regs
 
-	/*
-	 * Always stash CR3 in %r14.  This value will be restored,
-	 * verbatim, at exit.  Needed if paranoid_entry interrupted
-	 * another entry that already switched to the user CR3 value
-	 * but has not yet returned to userspace.
-	 *
-	 * This is also why CS (stashed in the "iret frame" by the
-	 * hardware at entry) can not be used: this may be a return
-	 * to kernel code, but with a user CR3 value.
-	 *
-	 * Switching CR3 does not depend on kernel GSBASE so it can
-	 * be done before switching to the kernel GSBASE. This is
-	 * required for FSGSBASE because the kernel GSBASE has to
-	 * be retrieved from a kernel internal table.
-	 */
-	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
-
-	/*
-	 * Handling GSBASE depends on the availability of FSGSBASE.
-	 *
-	 * Without FSGSBASE the kernel enforces that negative GSBASE
-	 * values indicate kernel GSBASE. With FSGSBASE no assumptions
-	 * can be made about the GSBASE value when entering from user
-	 * space.
-	 */
-	ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/*
-	 * Read the current GSBASE and store it in %rbx unconditionally,
-	 * retrieve and set the current CPUs kernel GSBASE. The stored value
-	 * has to be restored in paranoid_exit unconditionally.
-	 *
-	 * The unconditional write to GS base below ensures that no subsequent
-	 * loads based on a mispredicted GS base can happen, therefore no LFENCE
-	 * is needed here.
-	 */
-	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
-	ret
-
-.Lparanoid_entry_checkgs:
-	/* EBX = 1 -> kernel GSBASE active, no restore required */
-	movl	$1, %ebx
-	/*
-	 * The kernel-enforced convention is a negative GSBASE indicates
-	 * a kernel value. No SWAPGS needed on entry and exit.
-	 */
-	movl	$MSR_GS_BASE, %ecx
-	rdmsr
-	testl	%edx, %edx
-	jns	.Lparanoid_entry_swapgs
-	ret
-
-.Lparanoid_entry_swapgs:
-	swapgs
-
-	/*
-	 * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
-	 * unconditional CR3 write, even in the PTI case.  So do an lfence
-	 * to prevent GS speculation, regardless of whether PTI is enabled.
-	 */
-	FENCE_SWAPGS_KERNEL_ENTRY
-
-	/* EBX = 0 -> SWAPGS required on exit */
-	xorl	%ebx, %ebx
+	call	do_paranoid_entry
 	ret
 SYM_CODE_END(paranoid_entry)
 
-/*
- * "Paranoid" exit path from exception stack.  This is invoked
- * only on return from IST interrupts that came from kernel space.
- *
- * We may be returning to very strange contexts (e.g. very early
- * in syscall entry), so checking for preemption here would
- * be complicated.  Fortunately, there's no good reason to try
- * to handle preemption here.
- *
- * R/EBX contains the GSBASE related information depending on the
- * availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        User space GSBASE, must be restored unconditionally
- */
-SYM_CODE_START_LOCAL(paranoid_exit)
-	UNWIND_HINT_REGS offset=8
-	/*
-	 * The order of operations is important. RESTORE_CR3 requires
-	 * kernel GSBASE.
-	 *
-	 * NB to anyone to try to optimize this code: this code does
-	 * not execute at all for exceptions from user mode. Those
-	 * exceptions go through error_exit instead.
-	 */
-	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
-
-	/* Handle the three GSBASE cases */
-	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/* With FSGSBASE enabled, unconditionally restore GSBASE */
-	wrgsbase	%rbx
-	ret
-
-.Lparanoid_exit_checkgs:
-	/* On non-FSGSBASE systems, conditionally do SWAPGS */
-	testl		%ebx, %ebx
-	jnz		.Lparanoid_exit_done
-
-	/* We are returning to a context with user GSBASE */
-	swapgs
-.Lparanoid_exit_done:
-	ret
-SYM_CODE_END(paranoid_exit)
-
 /*
  * Save all registers in pt_regs, and switch GS if needed.
  */
@@ -1308,6 +1195,7 @@ end_repeat_nmi:
 	 * Use paranoid_exit to handle SWAPGS and CR3, but no need to use
 	 * restore_regs_and_return_to_kernel as we must handle nested NMI.
 	 */
+	movq	%rsp, %rdi			/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp		/* put %rsp back to pt_regs */
 
diff --git a/arch/x86/entry/traps.c b/arch/x86/entry/traps.c
index b5c92b4e0cb5..52511db6baa6 100644
--- a/arch/x86/entry/traps.c
+++ b/arch/x86/entry/traps.c
@@ -1029,6 +1029,68 @@ static __always_inline unsigned long ist_switch_to_kernel_gsbase(void)
 	/* SWAPGS required on exit */
 	return 0;
 }
+
+asmlinkage __visible __entry_text
+void do_paranoid_entry(struct ist_regs *ist)
+{
+	/*
+	 * Always stash CR3 in ist->cr3.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 *
+	 * This is also why CS (stashed in the "iret frame" by the
+	 * hardware at entry) can not be used: this may be a return
+	 * to kernel code, but with a user CR3 value.
+	 *
+	 * Switching CR3 does not depend on kernel GSBASE so it can
+	 * be done before switching to the kernel GSBASE. This is
+	 * required for FSGSBASE because the kernel GSBASE has to
+	 * be retrieved from a kernel internal table.
+	 */
+	ist->cr3 = ist_switch_to_kernel_cr3();
+
+	/* Handle GSBASE, store the return value in ist_regs for exit. */
+	ist->gsbase = ist_switch_to_kernel_gsbase();
+}
+
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from IST interrupts that came from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, there's no good reason to try
+ * to handle preemption here.
+ */
+asmlinkage __visible __entry_text
+void paranoid_exit(struct ist_regs *ist)
+{
+	/*
+	 * Restore cr3 at first, it can use kernel GSBASE.
+	 */
+	ist_restore_cr3(ist->cr3);
+
+	/*
+	 * Handle the three GSBASE cases.
+	 *
+	 * ist->gsbase contains the GSBASE related information depending
+	 * on the availability of the FSGSBASE instructions:
+	 *
+	 * FSGSBASE	ist->gsbase
+	 *     N        0 -> SWAPGS on exit
+	 *              1 -> no SWAPGS on exit
+	 *
+	 *     Y        User space GSBASE, must be restored unconditionally
+	 */
+	if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+		wrgsbase(ist->gsbase);
+		return;
+	}
+
+	if (!ist->gsbase)
+		native_swapgs();
+}
 #endif
 
 static bool is_sysenter_singlestep(struct pt_regs *regs)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index e24c63bbc30a..0bc7117a01cd 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -12,6 +12,26 @@
 
 #ifdef CONFIG_X86_64
 struct ist_regs {
+	/*
+	 * Always stash CR3 in cr3.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 */
+	unsigned long cr3;
+
+	/*
+	 * gsbase contains the GSBASE related information depending on the
+	 * availability of the FSGSBASE instructions:
+	 *
+	 * FSGSBASE	gsbase
+	 *     N        0 -> SWAPGS on exit
+	 *              1 -> no SWAPGS on exit
+	 *
+	 *     Y        User space GSBASE, must be restored unconditionally
+	 */
+	unsigned long gsbase;
+
 	/*
 	 * ist specific fields must be defined before pt_regs
 	 * and they are located below pt_regs on the stacks.
@@ -20,6 +40,8 @@ struct ist_regs {
 };
 
 asmlinkage __visible notrace struct pt_regs *do_error_entry(struct pt_regs *eregs);
+asmlinkage __visible notrace void do_paranoid_entry(struct ist_regs *ist);
+asmlinkage __visible notrace void paranoid_exit(struct ist_regs *ist);
 void __init trap_init(void);
 asmlinkage __visible noinstr struct ist_regs *vc_switch_off_ist(struct ist_regs *ist);
 #endif
-- 
2.19.1.6.gb485710b


  parent reply	other threads:[~2021-08-31 17:52 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-31 17:50 [PATCH 00/24] x86/entry/64: Convert a bunch of ASM entry code into C code Lai Jiangshan
2021-08-31 17:50 ` [PATCH 01/24] x86/traps: Remove stack-protector from traps.c Lai Jiangshan
2021-08-31 17:50 ` [PATCH 02/24] x86/traps: Move arch/x86/kernel/traps.c to arch/x86/entry/ Lai Jiangshan
2021-09-02  8:09   ` Joerg Roedel
2021-09-02  9:21     ` Lai Jiangshan
2021-09-02 10:50       ` Peter Zijlstra
2021-09-02 11:54         ` Lai Jiangshan
2021-09-02 12:05           ` Peter Zijlstra
2021-09-02 13:34             ` Peter Zijlstra
2021-09-02 17:05               ` Nick Desaulniers
2021-09-02 17:19                 ` Miguel Ojeda
2021-09-02 17:23                   ` Nick Desaulniers
2021-09-03  7:36                 ` Martin Liška
2021-09-07 21:12                   ` Nick Desaulniers
2021-09-08  7:33                     ` Martin Liška
2021-08-31 17:50 ` [PATCH 03/24] x86/traps: Move declaration of native_irq_return_iret up Lai Jiangshan
2021-08-31 17:50 ` [PATCH 04/24] x86/entry: Expose the address of .Lgs_change to traps.c Lai Jiangshan
2021-09-02  9:14   ` Peter Zijlstra
2021-09-02  9:20     ` Lai Jiangshan
2021-08-31 17:50 ` [PATCH 05/24] x86/entry: Introduce __entry_text for entry code written in C Lai Jiangshan
2021-08-31 19:34   ` Peter Zijlstra
2021-09-01  0:23     ` Lai Jiangshan
2021-08-31 17:50 ` [PATCH 06/24] x86/entry: Move PTI_USER_* to arch/x86/include/asm/processor-flags.h Lai Jiangshan
2021-08-31 17:50 ` [PATCH 07/24] x86: Mark __native_read_cr3() & native_write_cr3() as __always_inline Lai Jiangshan
2021-08-31 17:50 ` [PATCH 08/24] x86/traps: Add C verion of SWITCH_TO_KERNEL_CR3 as switch_to_kernel_cr3() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 09/24] x86/traps: Add fence_swapgs_{user,kernel}_entry() Lai Jiangshan
2021-09-02  9:25   ` Peter Zijlstra
2021-08-31 17:50 ` [PATCH 10/24] x86/traps: Move pt_regs only in fixup_bad_iret() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 11/24] x86/entry: Replace the most of asm code of error_entry to C code Lai Jiangshan
2021-09-02 10:16   ` Peter Zijlstra
2021-09-02 12:08     ` Lai Jiangshan
2021-08-31 17:50 ` [PATCH 12/24] x86/traps: Reconstruct pt_regs on task stack directly in fixup_bad_iret() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 13/24] x86/traps: Mark sync_regs() and fixup_bad_iret() as static __always_inline Lai Jiangshan
2021-08-31 17:50 ` [PATCH 14/24] x86/entry: Make paranoid_exit() callable Lai Jiangshan
2021-08-31 17:50 ` [PATCH 15/24] x86/entry: Call paranoid_exit() in asm_exc_nmi() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 16/24] x86/entry: Use skip_rdi instead of save_ret for PUSH_AND_CLEAR_REGS Lai Jiangshan
2021-08-31 17:50 ` [PATCH 17/24] x86/entry: Introduce struct ist_regs Lai Jiangshan
2021-09-10  0:18   ` Lai Jiangshan
2021-09-10  0:36     ` Lai Jiangshan
2021-09-10  4:31     ` H. Peter Anvin
2021-09-10  7:13       ` Lai Jiangshan
2021-09-10  7:14         ` H. Peter Anvin
2021-09-10  4:50     ` H. Peter Anvin
2021-09-10  4:51       ` H. Peter Anvin
2021-08-31 17:50 ` [PATCH 18/24] x86/entry: Add the C version ist_switch_to_kernel_cr3() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 19/24] x86/entry: Add the C version ist_restore_cr3() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 20/24] x86/entry: Add the C version get_percpu_base() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 21/24] x86/entry: Add the C version ist_switch_to_kernel_gsbase() Lai Jiangshan
2021-08-31 17:50 ` Lai Jiangshan [this message]
2021-09-02 10:33   ` [PATCH 22/24] x86/entry: Implement and use do_paranoid_entry() and paranoid_exit() Peter Zijlstra
2021-09-02 10:42     ` Lai Jiangshan
2021-09-02 12:02       ` Peter Zijlstra
2021-09-02 11:58     ` Lai Jiangshan
2021-09-02 12:29       ` Joerg Roedel
2021-08-31 17:50 ` [PATCH 23/24] x86/entry: Remove the unused ASM macros Lai Jiangshan
2021-08-31 17:50 ` [PATCH 24/24] x86/syscall/64: Move the checking for sysret to C code Lai Jiangshan
2021-09-10  7:20   ` Nikolay Borisov
2021-09-10  7:30     ` Lai Jiangshan
2021-08-31 20:44 ` [PATCH 00/24] x86/entry/64: Convert a bunch of ASM entry code into " Peter Zijlstra
2021-09-02  6:28   ` Lai Jiangshan
2021-09-02  7:44     ` Peter Zijlstra
2021-09-02 10:50 ` [PATCH 25/24] x86/traps: Rewrite native_load_gs_index in " Lai Jiangshan
2021-09-08  1:38   ` H. Peter Anvin
2021-09-08  4:42     ` H. Peter Anvin
2021-09-08  5:00       ` H. Peter Anvin
2021-09-08  7:12         ` Lai Jiangshan
2021-09-09 23:16           ` H. Peter Anvin
2021-09-13 20:01   ` Andy Lutomirski
2021-09-14  2:04     ` Lai Jiangshan
2021-09-14  8:14       ` Peter Zijlstra
2021-09-14  8:17         ` Borislav Petkov
2021-09-14  8:40         ` Lai Jiangshan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210831175025.27570-23-jiangshanlai@gmail.com \
    --to=jiangshanlai@gmail.com \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=jroedel@suse.de \
    --cc=laijs@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    --cc=youquan.song@intel.com \
    --subject='Re: [PATCH 22/24] x86/entry: Implement and use do_paranoid_entry() and paranoid_exit()' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).