LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: 王贇 <yun.wang@linux.alibaba.com>
Cc: Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>,
	John Fastabend <john.fastabend@gmail.com>,
	KP Singh <kpsingh@kernel.org>,
	"open list:PERFORMANCE EVENTS SUBSYSTEM" 
	<linux-perf-users@vger.kernel.org>,
	"open list:PERFORMANCE EVENTS SUBSYSTEM" 
	<linux-kernel@vger.kernel.org>,
	"open list:BPF (Safe dynamic programs and tools)" 
	<netdev@vger.kernel.org>,
	"open list:BPF (Safe dynamic programs and tools)" 
	<bpf@vger.kernel.org>,
	jroedel@suse.de, x86@kernel.org,
	Josh Poimboeuf <jpoimboe@redhat.com>
Subject: Re: [PATCH] x86/dumpstack/64: Add guard pages to stack_info
Date: Thu, 16 Sep 2021 12:02:21 +0200	[thread overview]
Message-ID: <YUMWLdijs8vSkRjo@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <YUL6R5AH6WNxu5sH@hirez.programming.kicks-ass.net>

On Thu, Sep 16, 2021 at 10:03:19AM +0200, Peter Zijlstra wrote:

> Oh, I'm an idiot... yes it tries to read regs the stack, but clearly
> that won't work for the guard page.

OK, extended it to also cover task and IRQ stacks. get_stack_info()
doesn't seem to know about SOFTIRQ stacks on 64bit, might have to look
into that next.

Andy, what's the story with page_fault_oops(), according to the comment
in exc_double_fault() actual stack overflows will always hit #DF.

---
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 3d52b094850a..c4e92462c2b4 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -61,6 +61,9 @@ enum exception_stack_ordering {
 #define CEA_ESTACK_OFFS(st)					\
 	offsetof(struct cea_exception_stacks, st## _stack)
 
+#define CEA_EGUARD_OFFS(st)					\
+	offsetof(struct cea_exception_stacks, st## _stack_guard)
+
 #define CEA_ESTACK_PAGES					\
 	(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
 
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f248eb2ac2d4..8ff346579330 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -14,13 +14,14 @@
 #include <asm/switch_to.h>
 
 enum stack_type {
-	STACK_TYPE_UNKNOWN,
+	STACK_TYPE_UNKNOWN = 0,
 	STACK_TYPE_TASK,
 	STACK_TYPE_IRQ,
 	STACK_TYPE_SOFTIRQ,
 	STACK_TYPE_ENTRY,
 	STACK_TYPE_EXCEPTION,
 	STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+	STACK_TYPE_GUARD = 0x80,
 };
 
 struct stack_info {
@@ -31,6 +32,15 @@ struct stack_info {
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info);
 
+static __always_inline bool in_stack_guard(void *addr, void *begin, void *end)
+{
+#ifdef CONFIG_VMAP_STACK
+	if (addr > (begin - PAGE_SIZE))
+		return true;
+#endif
+	return false;
+}
+
 bool in_entry_stack(unsigned long *stack, struct stack_info *info);
 
 int get_stack_info(unsigned long *stack, struct task_struct *task,
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ea4fe192189d..91b406fe2a39 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -32,12 +32,19 @@ static struct pt_regs exec_summary_regs;
 bool noinstr in_task_stack(unsigned long *stack, struct task_struct *task,
 			   struct stack_info *info)
 {
-	unsigned long *begin = task_stack_page(task);
-	unsigned long *end   = task_stack_page(task) + THREAD_SIZE;
-
-	if (stack < begin || stack >= end)
+	void *begin = task_stack_page(task);
+	void *end   = begin + THREAD_SIZE;
+	int type    = STACK_TYPE_TASK;
+
+	if ((void *)stack < begin || (void *)stack >= end) {
+		if (in_stack_guard(stack, begin, end)) {
+			type |= STACK_TYPE_GUARD;
+			goto fill_info;
+		}
 		return false;
+	}
 
+fill_info:
 	info->type	= STACK_TYPE_TASK;
 	info->begin	= begin;
 	info->end	= end;
@@ -50,14 +57,20 @@ bool noinstr in_task_stack(unsigned long *stack, struct task_struct *task,
 bool noinstr in_entry_stack(unsigned long *stack, struct stack_info *info)
 {
 	struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
-
+	int type = STACK_TYPE_ENTRY;
 	void *begin = ss;
 	void *end = ss + 1;
 
-	if ((void *)stack < begin || (void *)stack >= end)
+	if ((void *)stack < begin || (void *)stack >= end) {
+		if (in_stack_guard(stack, begin, end)) {
+			type |= STACK_TYPE_GUARD;
+			goto fill_info;
+		}
 		return false;
+	}
 
-	info->type	= STACK_TYPE_ENTRY;
+fill_info:
+	info->type	= type;
 	info->begin	= begin;
 	info->end	= end;
 	info->next_sp	= NULL;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5601b95944fa..3634bdf9ab36 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -32,9 +32,15 @@ const char *stack_type_name(enum stack_type type)
 {
 	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 
+	if (type == STACK_TYPE_TASK)
+		return "TASK";
+
 	if (type == STACK_TYPE_IRQ)
 		return "IRQ";
 
+	if (type == STACK_TYPE_SOFTIRQ)
+		return "SOFTIRQ";
+
 	if (type == STACK_TYPE_ENTRY) {
 		/*
 		 * On 64-bit, we have a generic entry stack that we
@@ -63,6 +69,11 @@ struct estack_pages {
 };
 
 #define EPAGERANGE(st)							\
+	[PFN_DOWN(CEA_EGUARD_OFFS(st))] = {				\
+		.offs	= CEA_EGUARD_OFFS(st),				\
+		.size	= PAGE_SIZE,					\
+		.type	= STACK_TYPE_GUARD +				\
+			  STACK_TYPE_EXCEPTION + ESTACK_ ##st, },	\
 	[PFN_DOWN(CEA_ESTACK_OFFS(st)) ...				\
 	 PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = {	\
 		.offs	= CEA_ESTACK_OFFS(st),				\
@@ -111,7 +122,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
 	k = (stk - begin) >> PAGE_SHIFT;
 	/* Lookup the page descriptor */
 	ep = &estack_pages[k];
-	/* Guard page? */
+	/* unknown entry */
 	if (!ep->size)
 		return false;
 
@@ -122,7 +133,12 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
 	info->type	= ep->type;
 	info->begin	= (unsigned long *)begin;
 	info->end	= (unsigned long *)end;
-	info->next_sp	= (unsigned long *)regs->sp;
+	info->next_sp	= NULL;
+
+	/* Can't read regs from a guard page. */
+	if (!(ep->type & STACK_TYPE_GUARD))
+		info->next_sp = (unsigned long *)regs->sp;
+
 	return true;
 }
 
@@ -130,6 +146,7 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info
 {
 	unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
 	unsigned long *begin;
+	int type = STACK_TYPE_IRQ;
 
 	/*
 	 * @end points directly to the top most stack entry to avoid a -8
@@ -144,19 +161,27 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info
 	 * final operation is 'popq %rsp' which means after that RSP points
 	 * to the original stack and not to @end.
 	 */
-	if (stack < begin || stack >= end)
+	if (stack < begin || stack >= end) {
+		if (in_stack_guard(stack, begin, end)) {
+			type |= STACK_TYPE_GUARD;
+			goto fill_info;
+		}
 		return false;
+	}
 
-	info->type	= STACK_TYPE_IRQ;
+fill_info:
+	info->type	= type;
 	info->begin	= begin;
 	info->end	= end;
+	info->next_sp	= NULL;
 
 	/*
 	 * The next stack pointer is stored at the top of the irq stack
 	 * before switching to the irq stack. Actual stack entries are all
 	 * below that.
 	 */
-	info->next_sp = (unsigned long *)*(end - 1);
+	if (!(type & STACK_TYPE_GUARD))
+		info->next_sp = (unsigned long *)*(end - 1);
 
 	return true;
 }
@@ -193,6 +218,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 	if (!get_stack_info_noinstr(stack, task, info))
 		goto unknown;
 
+	if (info->type & STACK_TYPE_GUARD)
+		goto unknown;
+
 	/*
 	 * Make sure we don't iterate through any given stack more than once.
 	 * If it comes up a second time then there's something wrong going on:
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a58800973aed..80f6d8d735eb 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -353,6 +353,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
 
 #ifdef CONFIG_VMAP_STACK
 	unsigned long address = read_cr2();
+	struct stack_info info;
 #endif
 
 #ifdef CONFIG_X86_ESPFIX64
@@ -455,9 +456,11 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
 	 * stack even if the actual trigger for the double fault was
 	 * something else.
 	 */
-	if ((unsigned long)task_stack_page(tsk) - 1 - address < PAGE_SIZE) {
-		handle_stack_overflow("kernel stack overflow (double-fault)",
-				      regs, address);
+	if (get_stack_info_noinstr((void *)address, current, &info) &&
+	    info.type & STACK_TYPE_GUARD) {
+		const char *name = stack_type_name(info.type & ~STACK_TYPE_GUARD);
+		pr_emerg("BUG: %s stack guard hit at %p (stack is %p..%p)\n",
+			 name, (void *)address, info.begin, info.end);
 	}
 #endif
 
@@ -708,7 +711,9 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
 	sp    = regs->sp;
 	stack = (unsigned long *)sp;
 
-	if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
+	if (!get_stack_info_noinstr(stack, current, &info) ||
+	    info.type & STACK_TYPE_GUARD ||
+	    info.type == STACK_TYPE_ENTRY ||
 	    info.type >= STACK_TYPE_EXCEPTION_LAST)
 		sp = __this_cpu_ist_top_va(VC2);
 

  reply	other threads:[~2021-09-16 10:04 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-09  3:13 [RFC PATCH] perf: fix panic by mark recursion inside perf_log_throttle 王贇
2021-09-09  6:10 ` 王贇
2021-09-10 15:38 ` Peter Zijlstra
2021-09-13  3:00   ` 王贇
2021-09-13  3:21     ` 王贇
2021-09-13 10:24     ` Peter Zijlstra
2021-09-13 10:36       ` Peter Zijlstra
2021-09-14  2:02         ` 王贇
2021-09-14  1:58       ` 王贇
2021-09-14 10:28         ` Peter Zijlstra
2021-09-15  1:51           ` 王贇
2021-09-15 15:17             ` [PATCH] x86/dumpstack/64: Add guard pages to stack_info Peter Zijlstra
2021-09-16  3:34               ` 王贇
2021-09-16  3:47               ` 王贇
2021-09-16  8:00                 ` Peter Zijlstra
2021-09-16  8:03                   ` Peter Zijlstra
2021-09-16 10:02                     ` Peter Zijlstra [this message]
2021-09-17  2:15                       ` 王贇
2021-09-17  3:02                       ` 王贇
2021-09-17 10:21                         ` Peter Zijlstra
2021-09-17 16:40                           ` Peter Zijlstra
2021-09-18  2:30                             ` 王贇
2021-09-18  6:56                               ` Peter Zijlstra
2021-09-18  2:38                             ` 王贇
2021-09-13  3:30 ` [PATCH] perf: fix panic by disable ftrace on fault.c 王贇
2021-09-13 14:49   ` Dave Hansen
2021-09-14  1:52     ` 王贇
2021-09-14  3:02       ` 王贇
2021-09-14  7:23         ` 王贇
2021-09-14 16:16           ` Dave Hansen
2021-09-15  1:56             ` 王贇
2021-09-15  3:27               ` Dave Hansen
2021-09-15  7:22                 ` 王贇
2021-09-15  7:34                   ` 王贇
2021-09-15 15:19                     ` [PATCH] x86: Increase exception stack sizes Peter Zijlstra
2021-09-16  3:42                       ` 王贇
2021-09-21  7:28                       ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-09-21 12:41                       ` tip-bot2 for Peter Zijlstra
2021-09-14  2:08     ` [PATCH] perf: fix panic by disable ftrace on fault.c 王贇

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YUMWLdijs8vSkRjo@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@redhat.com \
    --cc=jpoimboe@redhat.com \
    --cc=jroedel@suse.de \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=x86@kernel.org \
    --cc=yhs@fb.com \
    --cc=yun.wang@linux.alibaba.com \
    --subject='Re: [PATCH] x86/dumpstack/64: Add guard pages to stack_info' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).