LKML Archive on lore.kernel.org help / color / mirror / Atom feed
From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com> To: linux-kernel@vger.kernel.org Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>, "Eric W . Biederman" <ebiederm@xmission.com>, Manfred Spraul <manfred@colorfullife.com>, Andrew Morton <akpm@linux-foundation.org>, Davidlohr Bueso <dave@stgolabs.net>, Johannes Weiner <hannes@cmpxchg.org>, Michal Hocko <mhocko@suse.com>, Vladimir Davydov <vdavydov.dev@gmail.com>, Andrei Vagin <avagin@gmail.com>, Christian Brauner <christian@brauner.io>, Pavel Tikhomirov <ptikhomirov@virtuozzo.com>, Alexander Mikhalitsyn <alexander@mihalicyn.com> Subject: [RFC PATCH] shm: extend forced shm destroy to support objects from several IPC nses Date: Wed, 14 Jul 2021 20:30:05 +0300 [thread overview] Message-ID: <20210714173005.491941-1-alexander.mikhalitsyn@virtuozzo.com> (raw) In-Reply-To: <CAJqdLrov0VBzHamSvMRKBHSJX+NROUx0TUsRD9U0zEWUn5XxDA@mail.gmail.com> This is total rework of fix. Thanks to Eric Biederman for suggestions (but may be I've misunderstood some of them :)) I've tested it with reproducer of the original problem. But of course it needs detailed testing. I hope that I get some general comments about design and implementation. ToDo: remove unneeded "ns" argument from shm_destroy, shm_rmid and other functions. Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity") Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Andrei Vagin <avagin@gmail.com> Cc: Christian Brauner <christian@brauner.io> Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com> Cc: Alexander Mikhalitsyn <alexander@mihalicyn.com> Cc: linux-kernel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com> --- include/linux/shm.h | 5 +- ipc/shm.c | 128 +++++++++++++++++++++++++++++++------------- 2 files changed, 95 insertions(+), 38 deletions(-) diff --git a/include/linux/shm.h b/include/linux/shm.h index d8e69aed3d32..8053ed1433df 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -11,14 +11,15 @@ struct file; #ifdef CONFIG_SYSVIPC struct sysv_shm { - struct list_head shm_clist; + spinlock_t shm_clist_lock; + struct list_head shm_clist; }; long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); -#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) +void shm_init_task(struct task_struct *task); #else struct sysv_shm { /* empty */ diff --git a/ipc/shm.c b/ipc/shm.c index 748933e376ca..a746886a0e00 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -65,6 +65,7 @@ struct shmid_kernel /* private to the kernel */ /* The task created the shm object. NULL if the task is dead. */ struct task_struct *shm_creator; struct list_head shm_clist; /* list by creator */ + struct ipc_namespace *ns; } __randomize_layout; /* shm_mode upper byte flags */ @@ -115,6 +116,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) struct shmid_kernel *shp; shp = container_of(ipcp, struct shmid_kernel, shm_perm); + BUG_ON(shp->ns && ns != shp->ns); if (shp->shm_nattch) { shp->shm_perm.mode |= SHM_DEST; @@ -225,9 +227,32 @@ static void shm_rcu_free(struct rcu_head *head) kfree(shp); } +static inline void task_shm_clist_lock(struct task_struct *task) +{ + spin_lock(&task->sysvshm.shm_clist_lock); +} + +static inline void task_shm_clist_unlock(struct task_struct *task) +{ + spin_unlock(&task->sysvshm.shm_clist_lock); +} + +void shm_clist_rm(struct shmid_kernel *shp) +{ + if (!shp->shm_creator) + return; + + task_shm_clist_lock(shp->shm_creator); + list_del_init(&shp->shm_clist); + task_shm_clist_unlock(shp->shm_creator); + shp->shm_creator = NULL; +} + static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) { - list_del(&s->shm_clist); + WARN_ON(s->ns && ns != s->ns); + //list_del_init(&s->shm_clist); + shm_clist_rm(s); ipc_rmid(&shm_ids(ns), &s->shm_perm); } @@ -306,10 +331,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) * * 2) sysctl kernel.shm_rmid_forced is set to 1. */ -static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +static bool shm_may_destroy(struct shmid_kernel *shp) { return (shp->shm_nattch == 0) && - (ns->shm_rmid_forced || + (shp->ns->shm_rmid_forced || (shp->shm_perm.mode & SHM_DEST)); } @@ -340,7 +365,7 @@ static void shm_close(struct vm_area_struct *vma) ipc_update_pid(&shp->shm_lprid, task_tgid(current)); shp->shm_dtim = ktime_get_real_seconds(); shp->shm_nattch--; - if (shm_may_destroy(ns, shp)) + if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); @@ -361,10 +386,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) * * As shp->* are changed under rwsem, it's safe to skip shp locking. */ - if (shp->shm_creator != NULL) + if (!list_empty(&shp->shm_clist)) return 0; - if (shm_may_destroy(ns, shp)) { + if (shm_may_destroy(shp)) { shm_lock_by_ptr(shp); shm_destroy(ns, shp); } @@ -379,51 +404,77 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) up_write(&shm_ids(ns).rwsem); } +void shm_init_task(struct task_struct *task) +{ + INIT_LIST_HEAD(&(task)->sysvshm.shm_clist); + spin_lock_init(&task->sysvshm.shm_clist_lock); +} + /* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task) { - struct ipc_namespace *ns = task->nsproxy->ipc_ns; + LIST_HEAD(tmp); struct shmid_kernel *shp, *n; if (list_empty(&task->sysvshm.shm_clist)) return; - /* - * If kernel.shm_rmid_forced is not set then only keep track of - * which shmids are orphaned, so that a later set of the sysctl - * can clean them up. - */ - if (!ns->shm_rmid_forced) { - down_read(&shm_ids(ns).rwsem); - list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) - shp->shm_creator = NULL; - /* - * Only under read lock but we are only called on current - * so no entry on the list will be shared. - */ - list_del(&task->sysvshm.shm_clist); - up_read(&shm_ids(ns).rwsem); - return; - } + rcu_read_lock(); /* for refcount_inc_not_zero */ + task_shm_clist_lock(task); - /* - * Destroy all already created segments, that were not yet mapped, - * and mark any mapped as orphan to cover the sysctl toggling. - * Destroy is skipped if shm_may_destroy() returns false. - */ - down_write(&shm_ids(ns).rwsem); list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { + struct ipc_namespace *ns = shp->ns; + + /* + * Remove shm from task list and nullify shm_creator which + * marks object as orphaned. + * + * If kernel.shm_rmid_forced is not set then only keep track of + * which shmids are orphaned, so that a later set of the sysctl + * can clean them up. + */ + list_del_init(&shp->shm_clist); shp->shm_creator = NULL; - if (shm_may_destroy(ns, shp)) { - shm_lock_by_ptr(shp); - shm_destroy(ns, shp); + printk("exit_shm() %px refcnt=%u, id=%d,key=%x\n", shp, + refcount_read(&shp->shm_perm.refcount), + shp->shm_perm.id, shp->shm_perm.key + ); + + /* + * Will destroy all already created segments, that were not yet mapped, + * and mark any mapped as orphan to cover the sysctl toggling. + * Destroy is skipped if shm_may_destroy() returns false. + */ + if (shp->ns->shm_rmid_forced && shm_may_destroy(shp)) { + /* + * We may race with shm_exit_ns() if refcounter + * already zero. Let's skip shm_destroy() of such + * shm object as it will be destroyed during shm_exit_ns() + */ + if (!refcount_inc_not_zero(&ns->ns.count)) /* get_ipc_ns */ + continue; + + list_add(&shp->shm_clist, &tmp); } } - /* Remove the list head from any segments still attached. */ list_del(&task->sysvshm.shm_clist); - up_write(&shm_ids(ns).rwsem); + task_shm_clist_unlock(task); + rcu_read_unlock(); + + list_for_each_entry_safe(shp, n, &tmp, shm_clist) { + struct ipc_namespace *ns = shp->ns; + + list_del_init(&shp->shm_clist); + + down_write(&shm_ids(ns).rwsem); + shm_lock_by_ptr(shp); + /* will do put_ipc_ns(shp->ns) */ + shm_destroy(ns, shp); + up_write(&shm_ids(ns).rwsem); + put_ipc_ns(ns); /* see refcount_inc_not_zero */ + } } static vm_fault_t shm_fault(struct vm_fault *vmf) @@ -681,6 +732,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) goto no_id; list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); + shp->ns = ns; /* * shmid gets reported as "inode#" in /proc/pid/maps. @@ -1573,7 +1625,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); shp->shm_nattch--; - if (shm_may_destroy(ns, shp)) +#if 0 + if (shp->shm_nattch) + list_del_init(&shp->shm_clist); +#endif + if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); -- 2.31.1
next prev parent reply other threads:[~2021-07-14 17:30 UTC|newest] Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-07-06 13:22 [PATCH 0/2] shm: omit forced shm destroy if task IPC namespace was changed Alexander Mikhalitsyn 2021-07-06 13:22 ` [PATCH 1/2] shm: skip shm_destroy " Alexander Mikhalitsyn 2021-07-06 13:22 ` [PATCH 2/2] ipc: WARN if trying to remove ipc object which is absent Alexander Mikhalitsyn 2021-07-10 1:12 ` [PATCH 0/2] shm: omit forced shm destroy if task IPC namespace was changed Andrew Morton 2021-07-10 10:55 ` Alexander Mihalicyn [not found] ` <CALgW_8VUk0us_umLncUv2DUMkOi3qixmT+YkHV4Dhpt_nNMZHw@mail.gmail.com> 2021-07-11 10:33 ` Alexander Mihalicyn 2021-07-11 11:46 ` Manfred Spraul 2021-07-12 9:54 ` Alexander Mihalicyn 2021-07-12 19:18 ` Eric W. Biederman 2021-07-12 19:27 ` Alexander Mihalicyn 2021-07-14 17:30 ` Alexander Mikhalitsyn [this message] 2021-07-21 6:32 ` [RFC PATCH] shm: extend forced shm destroy to support objects from several IPC nses Manfred Spraul 2021-07-22 18:46 ` Manfred Spraul 2021-07-30 14:52 ` Alexander Mihalicyn 2021-07-14 17:42 ` [PATCH 0/2] shm: omit forced shm destroy if task IPC namespace was changed Alexander Mihalicyn 2021-09-23 16:36 ` Manfred Spraul 2021-09-24 16:45 ` Eric W. Biederman
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210714173005.491941-1-alexander.mikhalitsyn@virtuozzo.com \ --to=alexander.mikhalitsyn@virtuozzo.com \ --cc=akpm@linux-foundation.org \ --cc=alexander@mihalicyn.com \ --cc=avagin@gmail.com \ --cc=christian@brauner.io \ --cc=dave@stgolabs.net \ --cc=ebiederm@xmission.com \ --cc=hannes@cmpxchg.org \ --cc=linux-kernel@vger.kernel.org \ --cc=manfred@colorfullife.com \ --cc=mhocko@suse.com \ --cc=ptikhomirov@virtuozzo.com \ --cc=vdavydov.dev@gmail.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).