LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: KP Singh <kpsingh@chromium.org>
To: linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
	x86@kernel.org, linux-security-module@vger.kernel.org
Cc: Kees Cook <keescook@chromium.org>,
	"David S. Miller" <davem@davemloft.net>,
	Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>,
	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>, Andrii Nakryiko <andriin@fb.com>,
	Thomas Garnier <thgarnie@chromium.org>,
	Florent Revest <revest@chromium.org>,
	Brendan Jackman <jackmanb@chromium.org>,
	Jann Horn <jannh@google.com>, Matthew Garrett <mjg59@google.com>,
	Michael Halcrow <mhalcrow@google.com>
Subject: [PATCH bpf-next] bpf: Make trampolines W^X
Date: Sat,  4 Jan 2020 00:47:25 +0100	[thread overview]
Message-ID: <20200103234725.22846-1-kpsingh@chromium.org> (raw)

From: KP Singh <kpsingh@google.com>

The image for the BPF trampolines is allocated with
bpf_jit_alloc_exe_page which marks this allocated page executable. This
means that the allocated memory is W and X at the same time making it
susceptible to WX based attacks.

Since the allocated memory is shared between two trampolines (the
current and the next), 2 pages must be allocated to adhere to W^X and
the following sequence is obeyed where trampolines are modified:

- Mark memory as non executable (set_memory_nx). While module_alloc for
  x86 allocates the memory as PAGE_KERNEL and not PAGE_KERNEL_EXEC, not
  all implementations of module_alloc do so
- Mark the memory as read/write (set_memory_rw)
- Modify the trampoline
- Mark the memory as read-only (set_memory_ro)
- Mark the memory as executable (set_memory_x)

There's a window between the modification of the trampoline and setting
the trampoline as read-only where an attacker and ~could~ change the
contents of the memory. This can be further improved by adding more
verification after the memory is marked as read-only.

Reported-by: Kees Cook <keescook@chromium.org>
Signed-off-by: KP Singh <kpsingh@google.com>
---
 arch/x86/net/bpf_jit_comp.c |  2 +-
 include/linux/bpf.h         |  1 -
 kernel/bpf/dispatcher.c     | 30 +++++++++++++++++++++++----
 kernel/bpf/trampoline.c     | 41 +++++++++++++++++++------------------
 4 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4c8a2d1f8470..a510f8260322 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1527,7 +1527,7 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags
 	 * Another half is an area for next trampoline.
 	 * Make sure the trampoline generation logic doesn't overflow.
 	 */
-	if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+	if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE - BPF_INSN_SAFETY))
 		return -EFAULT;
 	return 0;
 }
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b14e51d56a82..3be8b1b0166d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -502,7 +502,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
 int bpf_trampoline_link_prog(struct bpf_prog *prog);
 int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
-void *bpf_jit_alloc_exec_page(void);
 #define BPF_DISPATCHER_INIT(name) {			\
 	.mutex = __MUTEX_INITIALIZER(name.mutex),	\
 	.func = &name##func,				\
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index 204ee61a3904..f4589da3bb34 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -93,13 +93,34 @@ int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
 static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image)
 {
 	s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
-	int i;
+	int i, ret;
 
 	for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
 		if (d->progs[i].prog)
 			*ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func;
 	}
-	return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs);
+
+	/* First make the page non-executable and then make it RW to avoid it
+	 * from being W+X. While x86's implementation of module_alloc
+	 * allocates memory as non-executable, not all implementations do so.
+	 * Till these are fixed, explicitly mark the memory as NX.
+	 */
+	set_memory_nx((unsigned long)image, 1);
+	set_memory_rw((unsigned long)image, 1);
+
+	ret = arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs);
+	if (ret)
+		return ret;
+
+	/* First make the page read-only, and only then make it executable to
+	 * prevent it from being W+X in between.
+	 */
+	set_memory_ro((unsigned long)image, 1);
+	/* More checks can be done here to ensure that nothing was changed
+	 * between arch_prepare_bpf_dispatcher and set_memory_ro.
+	 */
+	set_memory_x((unsigned long)image, 1);
+	return 0;
 }
 
 static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
@@ -113,7 +134,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
 		noff = 0;
 	} else {
 		old = d->image + d->image_off;
-		noff = d->image_off ^ (PAGE_SIZE / 2);
+		noff = d->image_off ^ PAGE_SIZE;
 	}
 
 	new = d->num_progs ? d->image + noff : NULL;
@@ -140,10 +161,11 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 
 	mutex_lock(&d->mutex);
 	if (!d->image) {
-		d->image = bpf_jit_alloc_exec_page();
+		d->image = bpf_jit_alloc_exec(2 * PAGE_SIZE);
 		if (!d->image)
 			goto out;
 	}
+	set_vm_flush_reset_perms(d->image);
 
 	prev_num_progs = d->num_progs;
 	changed |= bpf_dispatcher_remove_prog(d, from);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 505f4e4b31d2..ff6a92ef8945 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -14,22 +14,6 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
 /* serializes access to trampoline_table */
 static DEFINE_MUTEX(trampoline_mutex);
 
-void *bpf_jit_alloc_exec_page(void)
-{
-	void *image;
-
-	image = bpf_jit_alloc_exec(PAGE_SIZE);
-	if (!image)
-		return NULL;
-
-	set_vm_flush_reset_perms(image);
-	/* Keep image as writeable. The alternative is to keep flipping ro/rw
-	 * everytime new program is attached or detached.
-	 */
-	set_memory_x((long)image, 1);
-	return image;
-}
-
 struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 {
 	struct bpf_trampoline *tr;
@@ -50,12 +34,13 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 		goto out;
 
 	/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
-	image = bpf_jit_alloc_exec_page();
+	image = bpf_jit_alloc_exec(2 * PAGE_SIZE);
 	if (!image) {
 		kfree(tr);
 		tr = NULL;
 		goto out;
 	}
+	set_vm_flush_reset_perms(image);
 
 	tr->key = key;
 	INIT_HLIST_NODE(&tr->hlist);
@@ -125,14 +110,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 }
 
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
- * bytes on x86.  Pick a number to fit into PAGE_SIZE / 2
+ * bytes on x86.  Pick a number to fit into PAGE_SIZE.
  */
 #define BPF_MAX_TRAMP_PROGS 40
 
 static int bpf_trampoline_update(struct bpf_trampoline *tr)
 {
-	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
-	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE;
+	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE;
 	struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
 	int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
 	int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -160,6 +145,13 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
 	if (fexit_cnt)
 		flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 
+
+	/* First make the page non-executable and then make it RW to avoid it
+	 * from being being W+X.
+	 */
+	set_memory_nx((unsigned long)new_image, 1);
+	set_memory_rw((unsigned long)new_image, 1);
+
 	err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags,
 					  fentry, fentry_cnt,
 					  fexit, fexit_cnt,
@@ -167,6 +159,15 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
 	if (err)
 		goto out;
 
+	/* First make the page read-only, and only then make it executable to
+	 * prevent it from being W+X in between.
+	 */
+	set_memory_ro((unsigned long)new_image, 1);
+	/* More checks can be done here to ensure that nothing was changed
+	 * between arch_prepare_bpf_trampoline and set_memory_ro.
+	 */
+	set_memory_x((unsigned long)new_image, 1);
+
 	if (tr->selector)
 		/* progs already running at this address */
 		err = modify_fentry(tr, old_image, new_image);
-- 
2.20.1


             reply	other threads:[~2020-01-03 23:47 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-03 23:47 KP Singh [this message]
2020-01-04  0:49 ` [PATCH bpf-next] bpf: Make trampolines W^X Andy Lutomirski
2020-01-05  1:19   ` Justin Capella
2020-01-06  8:23   ` Peter Zijlstra
2020-01-06 22:25   ` Edgecombe, Rick P
2020-01-07  1:36     ` Andy Lutomirski
2020-01-07 19:01       ` Edgecombe, Rick P
2020-01-08  8:41         ` Andy Lutomirski
2020-01-08 20:52           ` Edgecombe, Rick P
2020-01-09  6:48             ` Andy Lutomirski
2020-01-10  1:00               ` Edgecombe, Rick P
2020-01-10 18:35                 ` Andy Lutomirski
     [not found] <CAMrEMU8Vsn8rfULqf1gfuYL_-ybqzit29CLYReskaZ8XUroZww@mail.gmail.com>
     [not found] ` <768BAF04-BEBF-489A-8737-B645816B262A@amacapital.net>
2020-01-06 22:13   ` Alexei Starovoitov
2020-01-07  9:11     ` Peter Zijlstra
2020-01-07 18:55       ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200103234725.22846-1-kpsingh@chromium.org \
    --to=kpsingh@chromium.org \
    --cc=andriin@fb.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=jackmanb@chromium.org \
    --cc=jannh@google.com \
    --cc=kafai@fb.com \
    --cc=keescook@chromium.org \
    --cc=kuznet@ms2.inr.ac.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=mhalcrow@google.com \
    --cc=mjg59@google.com \
    --cc=revest@chromium.org \
    --cc=songliubraving@fb.com \
    --cc=thgarnie@chromium.org \
    --cc=x86@kernel.org \
    --cc=yhs@fb.com \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).