Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Alexey Dobriyan <adobriyan@gmail.com>,
	Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>
Cc: linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-mm@kvack.org, Waiman Long <longman@redhat.com>
Subject: [RFC PATCH 2/8] memcg, mm: Return ENOMEM or delay if memcg_over_limit
Date: Mon, 17 Aug 2020 10:08:25 -0400	[thread overview]
Message-ID: <20200817140831.30260-3-longman@redhat.com> (raw)
In-Reply-To: <20200817140831.30260-1-longman@redhat.com>

The brk(), mmap(), mlock(), mlockall() and mprotect() syscalls are
modified to check the memcg_over_limit flag and return ENOMEM when it
is set and memory control action is PR_MEMACT_ENOMEM.

In case the action is PR_MEMACT_SLOWDOWN, an artificial delay of 20ms
will be added to slow down the memory allocation syscalls.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 include/linux/sched.h | 16 ++++++++++++++++
 kernel/fork.c         |  1 +
 mm/memcontrol.c       | 25 +++++++++++++++++++++++--
 mm/mlock.c            |  6 ++++++
 mm/mmap.c             | 12 ++++++++++++
 mm/mprotect.c         |  3 +++
 6 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c79d606d27ab..9ec1bd072334 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1477,6 +1477,22 @@ static inline char task_state_to_char(struct task_struct *tsk)
 	return task_index_to_char(task_state_index(tsk));
 }
 
+#ifdef CONFIG_MEMCG
+extern bool mem_cgroup_check_over_limit(void);
+
+static inline bool mem_over_memcg_limit(void)
+{
+	if (READ_ONCE(current->memcg_over_limit))
+		return mem_cgroup_check_over_limit();
+	return false;
+}
+#else
+static inline bool mem_over_memcg_limit(void)
+{
+	return false;
+}
+#endif
+
 /**
  * is_global_init - check if a task structure is init. Since init
  * is free to have sub-threads we need to check tgid.
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d32190861bd..61f9a9e5f857 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -940,6 +940,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
+	tsk->memcg_over_limit = false;
 #endif
 	return tsk;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1106dac024ac..5cad7bb26d13 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2646,7 +2646,9 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
 	if (!mm)
 		return true;	/* No more check is needed */
 
-	current->memcg_over_limit = false;
+	if (READ_ONCE(current->memcg_over_limit))
+		WRITE_ONCE(current->memcg_over_limit, false);
+
 	if ((action == PR_MEMACT_SIGNAL) && !signal)
 		goto out;
 
@@ -2660,7 +2662,11 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
 		WRITE_ONCE(current->memcg_over_limit, true);
 		break;
 	case PR_MEMACT_SLOWDOWN:
-		/* Slow down by yielding the cpu */
+		/*
+		 * Slow down by yielding the cpu & adding delay to
+		 * memory allocation syscalls.
+		 */
+		WRITE_ONCE(current->memcg_over_limit, true);
 		set_tsk_need_resched(current);
 		set_preempt_need_resched();
 		break;
@@ -2694,6 +2700,21 @@ static inline bool mem_cgroup_over_high_action(struct mem_cgroup *memcg)
 	return __mem_cgroup_over_high_action(memcg, action);
 }
 
+/*
+ * Called from memory allocation syscalls.
+ * Return true if ENOMEM should be returned, false otherwise.
+ */
+bool mem_cgroup_check_over_limit(void)
+{
+	u8 action = READ_ONCE(current->memcg_over_high_action);
+
+	if (action == PR_MEMACT_ENOMEM)
+		return true;
+	if (action == PR_MEMACT_SLOWDOWN)
+		msleep(20);	/* Artificial delay of 20ms */
+	return false;
+}
+
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		      unsigned int nr_pages)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index 93ca2bf30b4f..130d4b3fa0f5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -678,6 +678,9 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
 	if (!can_do_mlock())
 		return -EPERM;
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	len = PAGE_ALIGN(len + (offset_in_page(start)));
 	start &= PAGE_MASK;
 
@@ -807,6 +810,9 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 	if (!can_do_mlock())
 		return -EPERM;
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	lock_limit = rlimit(RLIMIT_MEMLOCK);
 	lock_limit >>= PAGE_SHIFT;
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 40248d84ad5f..873ccf2560a6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -198,6 +198,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	bool downgraded = false;
 	LIST_HEAD(uf);
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
@@ -1407,6 +1411,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
@@ -1557,6 +1565,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 	struct file *file = NULL;
 	unsigned long retval;
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	if (!(flags & MAP_ANONYMOUS)) {
 		audit_mmap_fd(fd, flags);
 		file = fget(fd);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ce8b8a5eacbb..b2c0f50bb0a0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -519,6 +519,9 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
 				(prot & PROT_READ);
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	start = untagged_addr(start);
 
 	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
-- 
2.18.1


  parent reply	other threads:[~2020-08-17 14:10 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-17 14:08 [RFC PATCH 0/8] memcg: Enable fine-grained per process memory control Waiman Long
2020-08-17 14:08 ` [RFC PATCH 1/8] memcg: Enable fine-grained control of over memory.high action Waiman Long
2020-08-17 14:30   ` Chris Down
2020-08-17 15:38     ` Waiman Long
2020-08-17 16:11       ` Chris Down
2020-08-17 16:44   ` Shakeel Butt
2020-08-17 16:56     ` Chris Down
2020-08-18 19:12       ` Waiman Long
2020-08-18 19:14     ` Waiman Long
2020-08-17 14:08 ` Waiman Long [this message]
2020-08-17 14:08 ` [RFC PATCH 3/8] memcg: Allow the use of task RSS memory as over-high action trigger Waiman Long
2020-08-17 14:08 ` [RFC PATCH 4/8] fs/proc: Support a new procfs memctl file Waiman Long
2020-08-17 14:08 ` [RFC PATCH 5/8] memcg: Allow direct per-task memory limit checking Waiman Long
2020-08-17 14:08 ` [RFC PATCH 6/8] memcg: Introduce additional memory control slowdown if needed Waiman Long
2020-08-17 14:08 ` [RFC PATCH 7/8] memcg: Enable logging of memory control mitigation action Waiman Long
2020-08-17 14:08 ` [RFC PATCH 8/8] memcg: Add over-high action prctl() documentation Waiman Long
2020-08-17 15:26 ` [RFC PATCH 0/8] memcg: Enable fine-grained per process memory control Michal Hocko
2020-08-17 15:55   ` Waiman Long
2020-08-17 19:26     ` Michal Hocko
2020-08-18 19:20       ` Waiman Long
2020-08-18  9:14 ` peterz
2020-08-18  9:26   ` Michal Hocko
2020-08-18  9:59     ` peterz
2020-08-18 10:05       ` Michal Hocko
2020-08-18 10:18         ` peterz
2020-08-18 10:30           ` Michal Hocko
2020-08-18 10:36             ` peterz
2020-08-18 13:49           ` Johannes Weiner
2020-08-21 19:37             ` Peter Zijlstra
2020-08-24 16:58               ` Johannes Weiner
2020-09-07 11:47                 ` Chris Down
2020-09-09 11:53                 ` Michal Hocko
2020-08-18 10:17       ` Chris Down
2020-08-18 10:26         ` peterz
2020-08-18 10:35           ` Chris Down
2020-08-23  2:49         ` Waiman Long
2020-08-18  9:27   ` Chris Down
2020-08-18 10:04     ` peterz
2020-08-18 12:55       ` Matthew Wilcox
2020-08-20  6:11         ` Dave Chinner
2020-08-18 19:30     ` Waiman Long
2020-08-18 19:27   ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200817140831.30260-3-longman@redhat.com \
    --to=longman@redhat.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=corbet@lwn.net \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=vdavydov.dev@gmail.com \
    --cc=vincent.guittot@linaro.org \
    --subject='Re: [RFC PATCH 2/8] memcg, mm: Return ENOMEM or delay if memcg_over_limit' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).