LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: cgel.zte@gmail.com
To: keescook@chromium.org, christian.brauner@ubuntu.com,
	ktkhai@virtuozzo.com, jamorris@linux.microsoft.com,
	varad.gautam@suse.com, legion@kernel.org, dbueso@suse.de
Cc: linux-kernel@vger.kernel.org, Ran Xiaokai <ran.xiaokai@zte.com.cn>
Subject: [PATCH] ipc: add set_ownership() and permissions() callbacks for posix mqueue sysctl
Date: Wed, 28 Jul 2021 20:06:51 -0700	[thread overview]
Message-ID: <20210729030651.536326-1-ran.xiaokai@zte.com.cn> (raw)

From: Ran Xiaokai <ran.xiaokai@zte.com.cn>

When a non-root user process creates a user namespace and ipc namespace
with command "unshare -Ur -i", and map the root user inside
the user namesapce to the global owner of user namespace.
The newly created user namespace OWNS the ipc namespace,
So the root user inside the user namespace should have full access
rights to the ipc namespace resources.

$ id
uid=1200(u1200) gid=1200(u1200) groups=1200(u1200)
$ unshare -Ur -i
$ id
uid=0(root) gid=0(root) groups=0(root)
$ ls -l /proc/sys/fs/mqueue/
total 0
-rw-r--r-- 1 65534 65534 0 Jul 28 19:03 msg_default
-rw-r--r-- 1 65534 65534 0 Jul 28 19:03 msg_max
-rw-r--r-- 1 65534 65534 0 Jul 28 19:03 msgsize_default
-rw-r--r-- 1 65534 65534 0 Jul 28 19:03 msgsize_max
-rw-r--r-- 1 65534 65534 0 Jul 28 19:03 queues_max
-sh: /proc/sys/fs/mqueue/msg_max: Permission denied

As opposite, inside a net namespace,
1. sysctl files owners are set to the local root user
   insede the user namespace, not the GLOBAL_ROOT_UID;
2. sysctl files are writable when accessed by root user
   inside the user namespace.

$ id
uid=1200(u1200) gid=1200(u1200) groups=1200(u1200)
$ unshare -Ur -n
$ id
uid=0(root) gid=0(root) groups=0(root)
$ ls -l /proc/sys/net/ipv4/ip_forward
-rw-r--r-- 1 root root 0 Jul 28 19:04 /proc/sys/net/ipv4/ip_forward
$ echo 1 > /proc/sys/net/ipv4/ip_forward
$ cat /proc/sys/net/ipv4/ip_forward
1

This patch adds a ctl_table_set per ipc namespace, and also the
set_ownership() and permissions() callbacks for the new ctl_table_root
for ipc mqueue syscgtls.

Signed-off-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
---
 include/linux/ipc_namespace.h |  14 +++++
 ipc/mq_sysctl.c               | 116 ++++++++++++++++++++++++++++++++++++------
 ipc/mqueue.c                  |  10 +++-
 ipc/namespace.c               |   2 +
 4 files changed, 124 insertions(+), 18 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 05e2277..3e8e340 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -10,6 +10,7 @@
 #include <linux/ns_common.h>
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
+#include <linux/sysctl.h>
 
 struct user_namespace;
 
@@ -67,6 +68,11 @@ struct ipc_namespace {
 	struct user_namespace *user_ns;
 	struct ucounts *ucounts;
 
+#ifdef CONFIG_POSIX_MQUEUE_SYSCTL
+	struct ctl_table_set	mq_set;
+	struct ctl_table_header	*sysctls;
+#endif
+
 	struct llist_node mnt_llist;
 
 	struct ns_common ns;
@@ -155,7 +161,10 @@ static inline void put_ipc_ns(struct ipc_namespace *ns)
 #ifdef CONFIG_POSIX_MQUEUE_SYSCTL
 
 struct ctl_table_header;
+extern struct ctl_table_header *mq_sysctl_table;
 extern struct ctl_table_header *mq_register_sysctl_table(void);
+bool setup_mq_sysctls(struct ipc_namespace *ns);
+void retire_mq_sysctls(struct ipc_namespace *ns);
 
 #else /* CONFIG_POSIX_MQUEUE_SYSCTL */
 
@@ -163,6 +172,11 @@ static inline struct ctl_table_header *mq_register_sysctl_table(void)
 {
 	return NULL;
 }
+static inline bool setup_mq_sysctls(struct ipc_namespace *ns)
+{
+	return true;
+}
+static inline void retire_mq_sysctls(struct ipc_namespace *ns) { }
 
 #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
 #endif
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 72a92a0..dbdd428 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -8,6 +8,11 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+#include <linux/capability.h>
+#include <linux/cred.h>
+#include <linux/stat.h>
 
 #ifdef CONFIG_PROC_SYSCTL
 static void *get_mq(struct ctl_table *table)
@@ -96,25 +101,104 @@ static struct ctl_table mq_sysctls[] = {
 	{}
 };
 
-static struct ctl_table mq_sysctl_dir[] = {
-	{
-		.procname	= "mqueue",
-		.mode		= 0555,
-		.child		= mq_sysctls,
-	},
-	{}
-};
+static int set_is_seen(struct ctl_table_set *set)
+{
+	return &current->nsproxy->ipc_ns->mq_set == set;
+}
 
-static struct ctl_table mq_sysctl_root[] = {
-	{
-		.procname	= "fs",
-		.mode		= 0555,
-		.child		= mq_sysctl_dir,
-	},
-	{}
+static struct ctl_table_set *
+set_lookup(struct ctl_table_root *root)
+{
+	return &current->nsproxy->ipc_ns->mq_set;
+}
+
+static int set_permissions(struct ctl_table_header *head,
+				struct ctl_table *table)
+{
+	struct ipc_namespace *ipc_ns =
+		container_of(head->set, struct ipc_namespace, mq_set);
+	struct user_namespace *user_ns = ipc_ns->user_ns;
+	int mode;
+
+	/* Allow users with CAP_SYS_RESOURCE unrestrained access */
+	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+		mode = (table->mode & S_IRWXU) >> 6;
+	else
+	/* Allow all others at most read-only access */
+		mode = table->mode & S_IROTH;
+	return (mode << 6) | (mode << 3) | mode;
+}
+
+static void set_ownership(struct ctl_table_header *head,
+				struct ctl_table *table,
+				kuid_t *uid, kgid_t *gid)
+{
+	struct ipc_namespace *ipc_ns =
+		container_of(head->set, struct ipc_namespace, mq_set);
+	struct user_namespace *user_ns = ipc_ns->user_ns;
+	kuid_t ns_root_uid;
+	kgid_t ns_root_gid;
+
+	ns_root_uid = make_kuid(user_ns, 0);
+	if (uid_valid(ns_root_uid))
+		*uid = ns_root_uid;
+
+	ns_root_gid = make_kgid(user_ns, 0);
+	if (gid_valid(ns_root_gid))
+		*gid = ns_root_gid;
+}
+
+static struct ctl_table_root mq_sysctl_root = {
+	.lookup = set_lookup,
+	.permissions = set_permissions,
+	.set_ownership = set_ownership,
 };
 
+bool setup_mq_sysctls(struct ipc_namespace *ns)
+{
+	struct ctl_table *tbl;
+
+	if (!mq_sysctl_table)
+		return false;
+
+	setup_sysctl_set(&ns->mq_set, &mq_sysctl_root, set_is_seen);
+	tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL);
+	if (!tbl)
+		goto out;
+
+	ns->sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl);
+	if (!ns->sysctls)
+		goto out1;
+
+	return true;
+
+out1:
+	kfree(tbl);
+	retire_sysctl_set(&ns->mq_set);
+out:
+	return false;
+}
+
+void retire_mq_sysctls(struct ipc_namespace *ns)
+{
+	struct ctl_table *tbl;
+
+	if (!ns->sysctls)
+		return;
+
+	tbl = ns->sysctls->ctl_table_arg;
+	unregister_sysctl_table(ns->sysctls);
+	retire_sysctl_set(&ns->mq_set);
+	kfree(tbl);
+}
+
 struct ctl_table_header *mq_register_sysctl_table(void)
 {
-	return register_sysctl_table(mq_sysctl_root);
+	static struct ctl_table empty[1];
+
+	/*
+	 * Register the fs/mqueue directory in the default set so that
+	 * registrations in the child sets work properly.
+	 */
+	return register_sysctl("fs/mqueue", empty);
 }
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 4e4e611..3b68564 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -163,7 +163,7 @@ static void remove_notification(struct mqueue_inode_info *info);
 
 static struct kmem_cache *mqueue_inode_cachep;
 
-static struct ctl_table_header *mq_sysctl_table;
+struct ctl_table_header *mq_sysctl_table;
 
 static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
 {
@@ -1713,6 +1713,10 @@ static int __init init_mqueue_fs(void)
 
 	/* ignore failures - they are not fatal */
 	mq_sysctl_table = mq_register_sysctl_table();
+	if (mq_sysctl_table && !setup_mq_sysctls(&init_ipc_ns)) {
+		unregister_sysctl_table(mq_sysctl_table);
+		mq_sysctl_table = NULL;
+	}
 
 	error = register_filesystem(&mqueue_fs_type);
 	if (error)
@@ -1729,8 +1733,10 @@ static int __init init_mqueue_fs(void)
 out_filesystem:
 	unregister_filesystem(&mqueue_fs_type);
 out_sysctl:
-	if (mq_sysctl_table)
+	if (mq_sysctl_table) {
+		retire_mq_sysctls(&init_ipc_ns);
 		unregister_sysctl_table(mq_sysctl_table);
+	}
 	kmem_cache_destroy(mqueue_inode_cachep);
 	return error;
 }
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7bd0766..c891cc1 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -58,6 +58,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	err = mq_init_ns(ns);
 	if (err)
 		goto fail_put;
+	setup_mq_sysctls(ns);
 
 	sem_init_ns(ns);
 	msg_init_ns(ns);
@@ -121,6 +122,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	 * uses synchronize_rcu().
 	 */
 	mq_put_mnt(ns);
+	retire_mq_sysctls(ns);
 	sem_exit_ns(ns);
 	msg_exit_ns(ns);
 	shm_exit_ns(ns);
-- 
2.15.2



             reply	other threads:[~2021-07-29  3:06 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-29  3:06 cgel.zte [this message]
2021-07-29 14:53 ` Christian Brauner
2021-08-03 10:31   ` CGEL
2021-08-03 14:01     ` Christian Brauner
2021-08-11 15:51       ` CGEL
2021-08-23  3:29       ` [PATCH] tests: add mqueue sysctl tests for user namespace Ran Xiaokai
2021-08-23 15:26         ` Davidlohr Bueso
2021-08-24 12:05         ` Christian Brauner
2021-08-27  9:50           ` [PATCH V2] " CGEL
2021-08-27 10:12           ` [PATCH V2] ipc: add set_ownership() and permissions() callbacks for posix mqueue sysctl CGEL
2021-09-13 14:40             ` Christian Brauner
2021-09-13 19:42               ` Davidlohr Bueso
2021-09-16  1:49               ` CGEL
2021-10-04 10:53                 ` Christian Brauner
2021-12-01  7:14                   ` CGEL
2021-12-01 12:53                     ` Christian Brauner
2022-04-06  7:59                       ` cgel.zte
2021-07-30 15:09 ` [PATCH] " Davidlohr Bueso
2021-08-03 10:34   ` CGEL

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210729030651.536326-1-ran.xiaokai@zte.com.cn \
    --to=cgel.zte@gmail.com \
    --cc=christian.brauner@ubuntu.com \
    --cc=dbueso@suse.de \
    --cc=jamorris@linux.microsoft.com \
    --cc=keescook@chromium.org \
    --cc=ktkhai@virtuozzo.com \
    --cc=legion@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ran.xiaokai@zte.com.cn \
    --cc=varad.gautam@suse.com \
    --subject='Re: [PATCH] ipc: add set_ownership() and permissions() callbacks for posix mqueue sysctl' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).