LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: balbir@linux.vnet.ibm.com
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"menage@google.com" <menage@google.com>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>
Subject: [RFC][PATCH 7/6] memcg: add atribute (for change bahavior of rmdir)
Date: Thu, 6 Nov 2008 19:41:53 +0900	[thread overview]
Message-ID: <20081106194153.220157ec.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <49129493.9070103@linux.vnet.ibm.com>

On Thu, 06 Nov 2008 12:24:11 +0530
Balbir Singh <balbir@linux.vnet.ibm.com> wrote:

> KAMEZAWA Hiroyuki wrote:
> > Weekly (RFC) update for memcg.
> > 
> > This set includes
> > 
> > 1. change force_empty to do move account rather than forget all
> 
> I would like this to be selectable, please. We don't want to break behaviour and
> not everyone would like to pay the cost of movement.

How about a patch like this ? I'd like to move this as [2/7], if possible.
It obviously needs painful rework. If I found it difficult, schedule this as [7/7].

BTW, cost of movement itself is not far from cost for force_empty.

If you can't find why "forget" is bad, please consider one more day.

==
This patch adds attribute to memory resource controller.

This memory.attribute file allows following to set/clear attribute.
  #echo attribute option > memory.attribute

This patch implements an attribute

 # on_rmdir [keep | drop] > memory.attribute.

 When on_rmdir=keep, memory remaining in memcg will be moved up to parent
 at rmdir. This is fast.
 When on_rmdir=drop, memory remaining in memcg will be freed.

 Characteristic of Keep.
  - fast.
  - Doesn't cause unnecessary freeing of memory(page cache).
    (IOW. page-cache for temporal files or some unnecessary pages will be kept.)
 Characteristic of Drop.
  - slow
  - No influence to its parent. all page caches will be dropped.
    (IOW. page-cache for libc or some important pages will be dropped.)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

 Documentation/controllers/memory.txt |   22 +++++
 mm/memcontrol.c                      |  134 ++++++++++++++++++++++++++++++++++-
 2 files changed, 154 insertions(+), 2 deletions(-)

Index: mmotm-2.6.28-rc2-Oct30/mm/memcontrol.c
===================================================================
--- mmotm-2.6.28-rc2-Oct30.orig/mm/memcontrol.c
+++ mmotm-2.6.28-rc2-Oct30/mm/memcontrol.c
@@ -35,6 +35,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm_inline.h>
 #include <linux/page_cgroup.h>
+#include <linux/ctype.h>
 #include "internal.h"
 
 #include <asm/uaccess.h>
@@ -146,6 +147,10 @@ struct mem_cgroup {
 	 */
 	int	prev_priority;	/* for recording reclaim priority */
 	/*
+	 * attribute
+	 */
+	char		drop_on_rmdir;
+	/*
 	 * used for counting reference from swap_cgroup.
 	 */
 	int		obsolete;
@@ -182,6 +187,22 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
 #define MEMFILE_TYPE(val)	(((val) >> 16) & 0xffff)
 #define MEMFILE_ATTR(val)	((val) & 0xffff)
 
+/*
+ *  attribute for memcg default value comes from its parent.
+ *  the root set all to false.
+ */
+enum {
+	MEMCG_ATTR_ON_RMDIR, /* drop_all if true, default is true. */
+	MEMCG_LAST_ATTR,
+};
+/* we may have to check status under racy situation. use global mutex. */
+DEFINE_MUTEX(memcg_attr_mutex);
+
+static char *memcg_attribute_names[MEMCG_LAST_ATTR] = {
+	"on_rmdir",
+};
+
+
 static void mem_cgroup_forget_swapref(struct mem_cgroup *mem);
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
@@ -1294,6 +1315,10 @@ static int mem_cgroup_force_empty(struct
 	css_get(&mem->css);
 
 	shrink = 0;
+	/* If this is true, free all orphan pages on LRU as much as possible */
+	if (mem->drop_on_rmdir)
+		goto try_to_free;
+
 move_account:
 	while (mem->res.usage > 0) {
 		ret = -EBUSY;
@@ -1311,6 +1336,9 @@ move_account:
 		/* it seems parent cgroup doesn't have enough mem */
 		if (ret == -ENOMEM)
 			goto try_to_free;
+		ret = -EINTR;
+		if (signal_pending(current))
+			goto out;
 		cond_resched();
 	}
 	ret = 0;
@@ -1332,6 +1360,10 @@ try_to_free:
 						  GFP_HIGHUSER_MOVABLE, false);
 		if (!progress)
 			nr_retries--;
+		ret = -EINTR;
+		if (signal_pending(current))
+			goto out;
+		cond_resched();
 
 	}
 	lru_add_drain();
@@ -1475,6 +1507,95 @@ static int mem_control_stat_show(struct 
 	return 0;
 }
 
+
+/*
+ * Assumes
+ * #echo attribute [option] > memory.feature
+ */
+static int
+parse_attr_option(char *buffer, char **attr, char **option, char **end)
+{
+	char *c = buffer;
+
+	*attr = NULL;
+	*option = NULL;
+	/* skip white space */
+	for (; *c && isspace(*c);c++);
+	/* found NULL ? */
+	if (!*c)
+		return -EINVAL;
+	*attr = c;
+	/* skip attribute */
+	for (; *c && !isspace(*c);c++);
+	/* skip space */
+	for (; *c && isspace(*c);c++);
+	/* pass pointer to option */
+	*option = c;
+	for (; *c; c++);
+	*end = c;
+	return 0;
+
+}
+
+static int mem_cgroup_write_attr(struct cgroup *cont,
+				     struct cftype *cft,
+				     const char *buffer)
+{
+	int i, len;
+	char *attr, *option, *end;
+	int ret = -EINVAL;
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+
+	mutex_lock(&memcg_attr_mutex);
+	/* parse attribute option */
+	ret = parse_attr_option((char*)buffer, &attr, &option, &end);
+	if (ret)
+		goto out;
+	for (i = 0; i < MEMCG_LAST_ATTR; i++) {
+
+		len = strlen(memcg_attribute_names[i]);
+		if ((option - attr) < len)
+			continue;
+		if (!strncmp(memcg_attribute_names[i], attr, len))
+			break;
+	}
+	ret = -EINVAL;
+	if (i == MEMCG_LAST_ATTR)
+		goto out;
+	switch(i) {
+	case MEMCG_ATTR_ON_RMDIR:
+		if ((end - option) < 4)
+			break;
+		ret = 0;
+		if (strncmp(option, "keep", 4) == 0)
+			mem->drop_on_rmdir = 0;
+		else if (strncmp(option, "drop", 4) == 0)
+			mem->drop_on_rmdir = 1;
+		else
+			ret = -EINVAL;
+		break;
+	}
+out:
+	mutex_unlock(&memcg_attr_mutex);
+	return ret;
+}
+
+static int mem_cgroup_read_attr(struct cgroup *cont, struct cftype *cft,
+				 struct seq_file *m)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+	char *s;
+
+	s = memcg_attribute_names[MEMCG_ATTR_ON_RMDIR];
+
+	if (mem->drop_on_rmdir)
+		seq_printf(m, "%s drop\n",s);
+	else
+		seq_printf(m, "%s keep\n",s);
+
+	return 0;
+}
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -1503,6 +1624,11 @@ static struct cftype mem_cgroup_files[] 
 		.name = "stat",
 		.read_map = mem_control_stat_show,
 	},
+	{
+		.name = "attribute",
+		.write_string = mem_cgroup_write_attr,
+		.read_seq_string = mem_cgroup_read_attr,
+	},
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 	{
 		.name = "memsw.usage_in_bytes",
@@ -1640,20 +1766,26 @@ static void __init enable_swap_cgroup(vo
 static struct cgroup_subsys_state *
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
-	struct mem_cgroup *mem;
+	struct mem_cgroup *mem, *parent;
 	int node;
 
 	if (unlikely((cont->parent) == NULL)) {
 		mem = &init_mem_cgroup;
 		enable_swap_cgroup();
+		parent = NULL;
+		mem->drop_on_rmdir = 1; /* default is drop */
 	} else {
 		mem = mem_cgroup_alloc();
 		if (!mem)
 			return ERR_PTR(-ENOMEM);
+		parent = mem_cgroup_from_cont(cont->parent);
 	}
 
 	res_counter_init(&mem->res);
 	res_counter_init(&mem->memsw);
+	/* inherit */
+	if (parent)
+		mem->drop_on_rmdir = parent->drop_on_rmdir;
 
 	for_each_node_state(node, N_POSSIBLE)
 		if (alloc_mem_cgroup_per_zone_info(mem, node))
Index: mmotm-2.6.28-rc2-Oct30/Documentation/controllers/memory.txt
===================================================================
--- mmotm-2.6.28-rc2-Oct30.orig/Documentation/controllers/memory.txt
+++ mmotm-2.6.28-rc2-Oct30/Documentation/controllers/memory.txt
@@ -270,8 +270,28 @@ Charges recorded in swap information is 
 Recorded information is effectively discarded and a cgroup which uses swap
 (swapcache) will be charged as a new owner of it.
 
+5. Attributes
+memory.attribute file is provided to set per-memcg attribute.
+You can specify attribute by
+ #echo attribute option > memory.attribute
+
+5.1 on_rmdir
+set behavior of memcg at rmdir (destroy cgroup) default is "drop".
+
+  5.1.1 drop
+	#echo on_rmdir drop > memory.attribute
+	This is default. All pages on this memcg will be freed.
+	If pages are locked or too busy, they will be moved up to the parent.
+	Useful when you want to drop (large) page caches used in this memcg.
+
+  5.1.2 keep
+	#echo on_rmdir keep > memory.attribute
+	All pages on this memcg will be moved to parent.
+	Useful when you don't want to drop page caches used in this memcg.
+	You can keep page caches from some library or DB accessed by this
+	memcg on memory.
 
-5. TODO
+6. TODO
 
 1. Add support for accounting huge pages (as a separate controller)
 2. Make per-cgroup scanner reclaim not-shared pages first


  parent reply	other threads:[~2008-11-06 10:42 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-05  8:16 [RFC][PATCH 0/6] memcg updates (05/Nov) KAMEZAWA Hiroyuki
2008-11-05  8:18 ` [RFC][PATCH 1/6] memcg: move all accounts to parent at rmdir() KAMEZAWA Hiroyuki
2008-11-05  8:20 ` [RFC][PATCH 2/6] memcg: handle swap cache KAMEZAWA Hiroyuki
2008-11-07  8:53   ` Daisuke Nishimura
2008-11-07  9:13     ` KAMEZAWA Hiroyuki
2008-11-05  8:20 ` [RFC][PATCH 3/6] memcg : mem+swap controller kconfig KAMEZAWA Hiroyuki
2008-11-06 11:07   ` Daisuke Nishimura
2008-11-05  8:21 ` [RFC][PATCH 4/6] memcg : swap cgroup KAMEZAWA Hiroyuki
2008-11-06 11:25   ` Daisuke Nishimura
2008-11-06 12:44     ` KAMEZAWA Hiroyuki
2008-11-07  1:19       ` Daisuke Nishimura
2008-11-05  8:23 ` [RFC][PATCH 5/6] memcg: mem+swap controller KAMEZAWA Hiroyuki
2008-11-07  9:02   ` Daisuke Nishimura
2008-11-07  9:19     ` KAMEZAWA Hiroyuki
2008-11-07 13:30       ` Daisuke Nishimura
2008-11-07 13:21   ` Daisuke Nishimura
2008-11-10  4:30   ` Daisuke Nishimura
2008-11-10  7:03     ` KAMEZAWA Hiroyuki
2008-11-05  8:24 ` [RFC][PATCH 6/6] memcg: synchronized LRU KAMEZAWA Hiroyuki
2008-11-06  6:54 ` [RFC][PATCH 0/6] memcg updates (05/Nov) Balbir Singh
2008-11-06  7:03   ` KAMEZAWA Hiroyuki
2008-11-06 10:41   ` KAMEZAWA Hiroyuki [this message]
2008-11-06 11:59     ` [RFC][PATCH 7/6] memcg: add atribute (for change bahavior of rmdir) Hugh Dickins
2008-11-06 12:47       ` [RFC][PATCH 7/6] memcg: add atribute (for change bahavior ofrmdir) KAMEZAWA Hiroyuki
2008-11-06 13:46     ` [RFC][PATCH 7/6] memcg: add atribute (for change bahavior of rmdir) Balbir Singh
2008-11-06 14:30       ` [RFC][PATCH 7/6] memcg: add atribute (for change bahavior ofrmdir) KAMEZAWA Hiroyuki
2008-11-07  1:12         ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081106194153.220157ec.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=menage@google.com \
    --cc=nishimura@mxp.nes.nec.co.jp \
    --subject='Re: [RFC][PATCH 7/6] memcg: add atribute (for change bahavior of rmdir)' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).