LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Balbir Singh <balbir@linux.vnet.ibm.com>
To: linux-mm@kvack.org
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>,
	Paul Menage <menage@google.com>,
	lizf@cn.fujitsu.com, linux-kernel@vger.kernel.org,
	Nick Piggin <nickpiggin@yahoo.com.au>,
	David Rientjes <rientjes@google.com>,
	Pavel Emelianov <xemul@openvz.org>,
	Dhaval Giani <dhaval@linux.vnet.ibm.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Subject: [mm] [PATCH 2/4] Memory cgroup resource counters for hierarchy
Date: Sun, 02 Nov 2008 00:18:37 +0530	[thread overview]
Message-ID: <20081101184837.2575.98059.sendpatchset@balbir-laptop> (raw)
In-Reply-To: <20081101184812.2575.68112.sendpatchset@balbir-laptop>


Add support for building hierarchies in resource counters. Cgroups allows us
to build a deep hierarchy, but we currently don't link the resource counters
belonging to the memory controller control groups, which are linked in
cgroup hiearchy. This patch provides the infrastructure for resource counters
that have the same hiearchy as their cgroup counter parts.

These set of patches are based on the resource counter hiearchy patches posted
by Pavel Emelianov.

NOTE: Building hiearchies is expensive, deeper hierarchies imply charging
the all the way up to the root. It is known that hiearchies are expensive,
so the user needs to be careful and aware of the trade-offs before creating
very deep ones.


Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 include/linux/res_counter.h |    8 ++++++--
 kernel/res_counter.c        |   42 ++++++++++++++++++++++++++++++++++--------
 mm/memcontrol.c             |    9 ++++++---
 3 files changed, 46 insertions(+), 13 deletions(-)

diff -puN include/linux/res_counter.h~resource-counters-hierarchy-support include/linux/res_counter.h
--- linux-2.6.28-rc2/include/linux/res_counter.h~resource-counters-hierarchy-support	2008-11-02 00:14:58.000000000 +0530
+++ linux-2.6.28-rc2-balbir/include/linux/res_counter.h	2008-11-02 00:14:58.000000000 +0530
@@ -43,6 +43,10 @@ struct res_counter {
 	 * the routines below consider this to be IRQ-safe
 	 */
 	spinlock_t lock;
+	/*
+	 * Parent counter, used for hierarchial resource accounting
+	 */
+	struct res_counter *parent;
 };
 
 /**
@@ -87,7 +91,7 @@ enum {
  * helpers for accounting
  */
 
-void res_counter_init(struct res_counter *counter);
+void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 
 /*
  * charge - try to consume more resource.
@@ -103,7 +107,7 @@ void res_counter_init(struct res_counter
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val);
+		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
diff -puN kernel/res_counter.c~resource-counters-hierarchy-support kernel/res_counter.c
--- linux-2.6.28-rc2/kernel/res_counter.c~resource-counters-hierarchy-support	2008-11-02 00:14:58.000000000 +0530
+++ linux-2.6.28-rc2-balbir/kernel/res_counter.c	2008-11-02 00:14:58.000000000 +0530
@@ -15,10 +15,11 @@
 #include <linux/uaccess.h>
 #include <linux/mm.h>
 
-void res_counter_init(struct res_counter *counter)
+void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
 	spin_lock_init(&counter->lock);
 	counter->limit = (unsigned long long)LLONG_MAX;
+	counter->parent = parent;
 }
 
 int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res
 	return 0;
 }
 
-int res_counter_charge(struct res_counter *counter, unsigned long val)
+int res_counter_charge(struct res_counter *counter, unsigned long val,
+			struct res_counter **limit_fail_at)
 {
 	int ret;
 	unsigned long flags;
+	struct res_counter *c, *u;
 
-	spin_lock_irqsave(&counter->lock, flags);
-	ret = res_counter_charge_locked(counter, val);
-	spin_unlock_irqrestore(&counter->lock, flags);
+	*limit_fail_at = NULL;
+	local_irq_save(flags);
+	for (c = counter; c != NULL; c = c->parent) {
+		spin_lock(&c->lock);
+		ret = res_counter_charge_locked(c, val);
+		spin_unlock(&c->lock);
+		if (ret < 0) {
+			*limit_fail_at = c;
+			goto undo;
+		}
+	}
+	ret = 0;
+	goto done;
+undo:
+	for (u = counter; u != c; u = u->parent) {
+		spin_lock(&u->lock);
+		res_counter_uncharge_locked(u, val);
+		spin_unlock(&u->lock);
+	}
+done:
+	local_irq_restore(flags);
 	return ret;
 }
 
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct 
 void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
 	unsigned long flags;
+	struct res_counter *c;
 
-	spin_lock_irqsave(&counter->lock, flags);
-	res_counter_uncharge_locked(counter, val);
-	spin_unlock_irqrestore(&counter->lock, flags);
+	local_irq_save(flags);
+	for (c = counter; c != NULL; c = c->parent) {
+		spin_lock(&c->lock);
+		res_counter_uncharge_locked(c, val);
+		spin_unlock(&c->lock);
+	}
+	local_irq_restore(flags);
 }
 
 
diff -puN mm/memcontrol.c~resource-counters-hierarchy-support mm/memcontrol.c
--- linux-2.6.28-rc2/mm/memcontrol.c~resource-counters-hierarchy-support	2008-11-02 00:14:58.000000000 +0530
+++ linux-2.6.28-rc2-balbir/mm/memcontrol.c	2008-11-02 00:14:58.000000000 +0530
@@ -485,6 +485,7 @@ int mem_cgroup_try_charge(struct mm_stru
 {
 	struct mem_cgroup *mem;
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+	struct res_counter *fail_res;
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
 	 * The mm_struct's mem_cgroup changes on task migration if the
@@ -510,7 +511,7 @@ int mem_cgroup_try_charge(struct mm_stru
 	}
 
 
-	while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
+	while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE, &fail_res))) {
 		if (!(gfp_mask & __GFP_WAIT))
 			goto nomem;
 
@@ -1175,18 +1176,20 @@ static void mem_cgroup_free(struct mem_c
 static struct cgroup_subsys_state *
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
-	struct mem_cgroup *mem;
+	struct mem_cgroup *mem, *parent;
 	int node;
 
 	if (unlikely((cont->parent) == NULL)) {
 		mem = &init_mem_cgroup;
+		parent = NULL;
 	} else {
 		mem = mem_cgroup_alloc();
+		parent = mem_cgroup_from_cont(cont->parent);
 		if (!mem)
 			return ERR_PTR(-ENOMEM);
 	}
 
-	res_counter_init(&mem->res);
+	res_counter_init(&mem->res, parent ? &parent->res : NULL);
 
 	for_each_node_state(node, N_POSSIBLE)
 		if (alloc_mem_cgroup_per_zone_info(mem, node))
_

-- 
	Balbir

  parent reply	other threads:[~2008-11-01 18:49 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-01 18:48 [mm][PATCH 0/4] Memory cgroup hierarchy introduction Balbir Singh
2008-11-01 18:48 ` [mm] [PATCH 1/4] Memory cgroup hierarchy documentation Balbir Singh
2008-11-04  6:25   ` Paul Menage
2008-11-04  6:26     ` Paul Menage
2008-11-05 13:55       ` Balbir Singh
2008-11-01 18:48 ` Balbir Singh [this message]
2008-11-02  5:42   ` [mm] [PATCH 2/4] Memory cgroup resource counters for hierarchy KAMEZAWA Hiroyuki
2008-11-02  5:49     ` Balbir Singh
2008-11-02  5:56       ` KAMEZAWA Hiroyuki
2008-11-02 11:46         ` Balbir Singh
2008-11-01 18:48 ` [mm] [PATCH 3/4] Memory cgroup hierarchical reclaim Balbir Singh
2008-11-02  5:37   ` KAMEZAWA Hiroyuki
2008-11-02  5:44     ` Balbir Singh
2008-11-04  2:17       ` KAMEZAWA Hiroyuki
2008-11-05 13:34         ` Balbir Singh
2008-11-05 16:20           ` KAMEZAWA Hiroyuki
2008-11-06 14:00             ` Balbir Singh
2008-11-01 18:49 ` [mm] [PATCH 4/4] Memory cgroup hierarchy feature selector Balbir Singh
2008-11-02  5:38   ` KAMEZAWA Hiroyuki
2008-11-02  6:03     ` Balbir Singh
2008-11-02  6:24       ` KAMEZAWA Hiroyuki
2008-11-02 15:52         ` Balbir Singh
2008-11-04  6:37           ` Paul Menage
2008-11-06  7:00             ` Balbir Singh
2008-11-06  7:01               ` Balbir Singh
2008-11-06  6:56         ` Balbir Singh
2008-11-06  7:30           ` KAMEZAWA Hiroyuki
2008-11-04  0:15 ` [mm][PATCH 0/4] Memory cgroup hierarchy introduction KAMEZAWA Hiroyuki
2008-11-05 13:51   ` Balbir Singh
2008-11-05 16:33     ` KAMEZAWA Hiroyuki
2008-11-05 17:52       ` Balbir Singh
2008-11-06  0:22         ` KAMEZAWA Hiroyuki
2008-11-04  9:21 ` [patch 1/2] memcg: hierarchy, yet another one KAMEZAWA Hiroyuki
2008-11-04  9:25 ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081101184837.2575.98059.sendpatchset@balbir-laptop \
    --to=balbir@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=dhaval@linux.vnet.ibm.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=menage@google.com \
    --cc=nickpiggin@yahoo.com.au \
    --cc=rientjes@google.com \
    --cc=xemul@openvz.org \
    --cc=yamamoto@valinux.co.jp \
    --subject='Re: [mm] [PATCH 2/4] Memory cgroup resource counters for hierarchy' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).