* [PATCH 1/2] Cpuset hardwall flag: Switch cpusets to use the bulk cgroup_add_files() API
2008-03-05 7:52 [PATCH 0/2] Cpuset hardwall flag: Introduction menage
@ 2008-03-05 7:52 ` menage
2008-03-05 8:43 ` Li Zefan
2008-03-05 12:07 ` Paul Jackson
2008-03-05 7:52 ` [PATCH 2/2] Cpuset hardwall flag: Add a mem_hardwall flag to cpusets menage
2008-03-05 12:23 ` [PATCH 0/2] Cpuset hardwall flag: Introduction Paul Jackson
2 siblings, 2 replies; 8+ messages in thread
From: menage @ 2008-03-05 7:52 UTC (permalink / raw)
To: pj, akpm; +Cc: linux-kernel, linux-mm
[-- Attachment #1: cpuset_add_files.patch --]
[-- Type: text/plain, Size: 4831 bytes --]
This change tidies up the cpusets control file definitions, and
reduces the amount of boilerplate required to add/change control files
in the future.
Signed-off-by: Paul Menage <menage@google.com>
---
kernel/cpuset.c | 149 +++++++++++++++++++++++++-------------------------------
1 file changed, 68 insertions(+), 81 deletions(-)
Index: hardwall-2.6.25-rc3-mm1/kernel/cpuset.c
===================================================================
--- hardwall-2.6.25-rc3-mm1.orig/kernel/cpuset.c
+++ hardwall-2.6.25-rc3-mm1/kernel/cpuset.c
@@ -1397,46 +1397,69 @@ static u64 cpuset_read_u64(struct cgroup
* for the common functions, 'private' gives the type of file
*/
-static struct cftype cft_cpus = {
- .name = "cpus",
- .read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
- .private = FILE_CPULIST,
-};
-
-static struct cftype cft_mems = {
- .name = "mems",
- .read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
- .private = FILE_MEMLIST,
-};
-
-static struct cftype cft_cpu_exclusive = {
- .name = "cpu_exclusive",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_CPU_EXCLUSIVE,
-};
-
-static struct cftype cft_mem_exclusive = {
- .name = "mem_exclusive",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_MEM_EXCLUSIVE,
-};
-
-static struct cftype cft_sched_load_balance = {
- .name = "sched_load_balance",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_SCHED_LOAD_BALANCE,
-};
-
-static struct cftype cft_memory_migrate = {
- .name = "memory_migrate",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_MEMORY_MIGRATE,
+static struct cftype files[] = {
+ {
+ .name = "cpus",
+ .read = cpuset_common_file_read,
+ .write = cpuset_common_file_write,
+ .private = FILE_CPULIST,
+ },
+
+ {
+ .name = "mems",
+ .read = cpuset_common_file_read,
+ .write = cpuset_common_file_write,
+ .private = FILE_MEMLIST,
+ },
+
+ {
+ .name = "cpu_exclusive",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_CPU_EXCLUSIVE,
+ },
+
+ {
+ .name = "mem_exclusive",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEM_EXCLUSIVE,
+ },
+
+ {
+ .name = "sched_load_balance",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_SCHED_LOAD_BALANCE,
+ },
+
+ {
+ .name = "memory_migrate",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEMORY_MIGRATE,
+ },
+
+ {
+ .name = "memory_pressure",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEMORY_PRESSURE,
+ },
+
+ {
+ .name = "memory_spread_page",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_SPREAD_PAGE,
+ },
+
+ {
+ .name = "memory_spread_slab",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_SPREAD_SLAB,
+ },
};
static struct cftype cft_memory_pressure_enabled = {
@@ -1446,54 +1469,18 @@ static struct cftype cft_memory_pressure
.private = FILE_MEMORY_PRESSURE_ENABLED,
};
-static struct cftype cft_memory_pressure = {
- .name = "memory_pressure",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_MEMORY_PRESSURE,
-};
-
-static struct cftype cft_spread_page = {
- .name = "memory_spread_page",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_SPREAD_PAGE,
-};
-
-static struct cftype cft_spread_slab = {
- .name = "memory_spread_slab",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_SPREAD_SLAB,
-};
-
static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
{
int err;
- if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
+ err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+ if (err)
return err;
/* memory_pressure_enabled is in root cpuset only */
- if (err == 0 && !cont->parent)
+ if (!cont->parent)
err = cgroup_add_file(cont, ss,
- &cft_memory_pressure_enabled);
- return 0;
+ &cft_memory_pressure_enabled);
+ return err;
}
/*
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] Cpuset hardwall flag: Switch cpusets to use the bulk cgroup_add_files() API
2008-03-05 7:52 ` [PATCH 1/2] Cpuset hardwall flag: Switch cpusets to use the bulk cgroup_add_files() API menage
@ 2008-03-05 8:43 ` Li Zefan
2008-03-05 12:07 ` Paul Jackson
1 sibling, 0 replies; 8+ messages in thread
From: Li Zefan @ 2008-03-05 8:43 UTC (permalink / raw)
To: menage; +Cc: pj, akpm, linux-kernel, linux-mm
menage@google.com wrote:
> This change tidies up the cpusets control file definitions, and
> reduces the amount of boilerplate required to add/change control files
> in the future.
>
> Signed-off-by: Paul Menage <menage@google.com>
>
Actually I've done this cleanup but don't have time to post the
patch. :)
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] Cpuset hardwall flag: Switch cpusets to use the bulk cgroup_add_files() API
2008-03-05 7:52 ` [PATCH 1/2] Cpuset hardwall flag: Switch cpusets to use the bulk cgroup_add_files() API menage
2008-03-05 8:43 ` Li Zefan
@ 2008-03-05 12:07 ` Paul Jackson
1 sibling, 0 replies; 8+ messages in thread
From: Paul Jackson @ 2008-03-05 12:07 UTC (permalink / raw)
To: menage; +Cc: akpm, linux-kernel, linux-mm
Paul M wrote:
> This change tidies up the cpusets control file definitions,
Acked-by: Paul Jackson <pj@sgi.com>
--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <pj@sgi.com> 1.940.382.4214
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/2] Cpuset hardwall flag: Add a mem_hardwall flag to cpusets
2008-03-05 7:52 [PATCH 0/2] Cpuset hardwall flag: Introduction menage
2008-03-05 7:52 ` [PATCH 1/2] Cpuset hardwall flag: Switch cpusets to use the bulk cgroup_add_files() API menage
@ 2008-03-05 7:52 ` menage
2008-03-05 12:17 ` Paul Jackson
2008-03-05 12:23 ` [PATCH 0/2] Cpuset hardwall flag: Introduction Paul Jackson
2 siblings, 1 reply; 8+ messages in thread
From: menage @ 2008-03-05 7:52 UTC (permalink / raw)
To: pj, akpm; +Cc: linux-kernel, linux-mm
[-- Attachment #1: hardwall.patch --]
[-- Type: text/plain, Size: 8254 bytes --]
This flag provides the hardwalling properties of mem_exclusive,
without enforcing the exclusivity. Either mem_hardwall or
mem_exclusive is sufficient to prevent GFP_KERNEL allocations from
passing outside the cpuset's assigned nodes.
Signed-off-by: Paul Menage <menage@google.com>
---
Documentation/cpusets.txt | 26 +++++++++++++-----------
kernel/cpuset.c | 48 ++++++++++++++++++++++++++++++----------------
2 files changed, 46 insertions(+), 28 deletions(-)
Index: hardwall-2.6.25-rc3-mm1/kernel/cpuset.c
===================================================================
--- hardwall-2.6.25-rc3-mm1.orig/kernel/cpuset.c
+++ hardwall-2.6.25-rc3-mm1/kernel/cpuset.c
@@ -124,6 +124,7 @@ struct cpuset_hotplug_scanner {
typedef enum {
CS_CPU_EXCLUSIVE,
CS_MEM_EXCLUSIVE,
+ CS_MEM_HARDWALL,
CS_MEMORY_MIGRATE,
CS_SCHED_LOAD_BALANCE,
CS_SPREAD_PAGE,
@@ -141,6 +142,11 @@ static inline int is_mem_exclusive(const
return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
}
+static inline int is_mem_hardwall(const struct cpuset *cs)
+{
+ return test_bit(CS_MEM_HARDWALL, &cs->flags);
+}
+
static inline int is_sched_load_balance(const struct cpuset *cs)
{
return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
@@ -1002,12 +1008,9 @@ int current_cpuset_is_being_rebound(void
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
- * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
- * CS_SCHED_LOAD_BALANCE,
- * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
- * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
- * cs: the cpuset to update
- * buf: the buffer where we read the 0 or 1
+ * bit: the bit to update (see cpuset_flagbits_t)
+ * cs: the cpuset to update
+ * turning_on: whether the flag is being set or cleared
*
* Call with cgroup_mutex held.
*/
@@ -1188,6 +1191,7 @@ typedef enum {
FILE_MEMLIST,
FILE_CPU_EXCLUSIVE,
FILE_MEM_EXCLUSIVE,
+ FILE_MEM_HARDWALL,
FILE_SCHED_LOAD_BALANCE,
FILE_MEMORY_PRESSURE_ENABLED,
FILE_MEMORY_PRESSURE,
@@ -1268,6 +1272,9 @@ static int cpuset_write_u64(struct cgrou
case FILE_MEM_EXCLUSIVE:
retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
break;
+ case FILE_MEM_HARDWALL:
+ retval = update_flag(CS_MEM_HARDWALL, cs, val);
+ break;
case FILE_SCHED_LOAD_BALANCE:
retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
break;
@@ -1375,6 +1382,8 @@ static u64 cpuset_read_u64(struct cgroup
return is_cpu_exclusive(cs);
case FILE_MEM_EXCLUSIVE:
return is_mem_exclusive(cs);
+ case FILE_MEM_HARDWALL:
+ return is_mem_hardwall(cs);
case FILE_SCHED_LOAD_BALANCE:
return is_sched_load_balance(cs);
case FILE_MEMORY_MIGRATE:
@@ -1427,6 +1436,13 @@ static struct cftype files[] = {
},
{
+ .name = "mem_hardwall",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEM_HARDWALL,
+ },
+
+ {
.name = "sched_load_balance",
.read_u64 = cpuset_read_u64,
.write_u64 = cpuset_write_u64,
@@ -1913,14 +1929,14 @@ int cpuset_nodemask_valid_mems_allowed(n
}
/*
- * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
- * ancestor to the specified cpuset. Call holding callback_mutex.
- * If no ancestor is mem_exclusive (an unusual configuration), then
- * returns the root cpuset.
+ * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
+ * mem_hardwall ancestor to the specified cpuset. Call holding
+ * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall
+ * (an unusual configuration), then returns the root cpuset.
*/
-static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
+static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
{
- while (!is_mem_exclusive(cs) && cs->parent)
+ while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent)
cs = cs->parent;
return cs;
}
@@ -1934,7 +1950,7 @@ static const struct cpuset *nearest_excl
* __GFP_THISNODE is set, yes, we can always allocate. If zone
* z's node is in our tasks mems_allowed, yes. If it's not a
* __GFP_HARDWALL request and this zone's nodes is in the nearest
- * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
+ * hardwalled cpuset ancestor to this tasks cpuset, yes.
* If the task has been OOM killed and has access to memory reserves
* as specified by the TIF_MEMDIE flag, yes.
* Otherwise, no.
@@ -1957,7 +1973,7 @@ static const struct cpuset *nearest_excl
* and do not allow allocations outside the current tasks cpuset
* unless the task has been OOM killed as is marked TIF_MEMDIE.
* GFP_KERNEL allocations are not so marked, so can escape to the
- * nearest enclosing mem_exclusive ancestor cpuset.
+ * nearest enclosing hardwalled ancestor cpuset.
*
* Scanning up parent cpusets requires callback_mutex. The
* __alloc_pages() routine only calls here with __GFP_HARDWALL bit
@@ -1980,7 +1996,7 @@ static const struct cpuset *nearest_excl
* in_interrupt - any node ok (current task context irrelevant)
* GFP_ATOMIC - any node ok
* TIF_MEMDIE - any node ok
- * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
+ * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*
* Rule:
@@ -2017,7 +2033,7 @@ int __cpuset_zone_allowed_softwall(struc
mutex_lock(&callback_mutex);
task_lock(current);
- cs = nearest_exclusive_ancestor(task_cs(current));
+ cs = nearest_hardwall_ancestor(task_cs(current));
task_unlock(current);
allowed = node_isset(node, cs->mems_allowed);
Index: hardwall-2.6.25-rc3-mm1/Documentation/cpusets.txt
===================================================================
--- hardwall-2.6.25-rc3-mm1.orig/Documentation/cpusets.txt
+++ hardwall-2.6.25-rc3-mm1/Documentation/cpusets.txt
@@ -169,6 +169,7 @@ files describing that cpuset:
- memory_migrate flag: if set, move pages to cpusets nodes
- cpu_exclusive flag: is cpu placement exclusive?
- mem_exclusive flag: is memory placement exclusive?
+ - mem_hardwall flag: is memory allocation hardwalled
- memory_pressure: measure of how much paging pressure in cpuset
In addition, the root cpuset only has the following file:
@@ -220,17 +221,18 @@ If a cpuset is cpu or mem exclusive, no
a direct ancestor or descendent, may share any of the same CPUs or
Memory Nodes.
-A cpuset that is mem_exclusive restricts kernel allocations for
-page, buffer and other data commonly shared by the kernel across
-multiple users. All cpusets, whether mem_exclusive or not, restrict
-allocations of memory for user space. This enables configuring a
-system so that several independent jobs can share common kernel data,
-such as file system pages, while isolating each jobs user allocation in
-its own cpuset. To do this, construct a large mem_exclusive cpuset to
-hold all the jobs, and construct child, non-mem_exclusive cpusets for
-each individual job. Only a small amount of typical kernel memory,
-such as requests from interrupt handlers, is allowed to be taken
-outside even a mem_exclusive cpuset.
+A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
+i.e. it restricts kernel allocations for page, buffer and other data
+commonly shared by the kernel across multiple users. All cpusets,
+whether hardwalled or not, restrict allocations of memory for user
+space. This enables configuring a system so that several independent
+jobs can share common kernel data, such as file system pages, while
+isolating each job's user allocation in its own cpuset. To do this,
+construct a large mem_exclusive cpuset to hold all the jobs, and
+construct child, non-mem_exclusive cpusets for each individual job.
+Only a small amount of typical kernel memory, such as requests from
+interrupt handlers, is allowed to be taken outside even a
+mem_exclusive cpuset.
1.5 What is memory_pressure ?
@@ -639,7 +641,7 @@ Now you want to do something with this c
In this directory you can find several files:
# ls
-cpus cpu_exclusive mems mem_exclusive tasks
+cpus cpu_exclusive mems mem_exclusive mem_hardwall tasks
Reading them will give you information about the state of this cpuset:
the CPUs and Memory Nodes it can use, the processes that are using
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 0/2] Cpuset hardwall flag: Introduction
2008-03-05 7:52 [PATCH 0/2] Cpuset hardwall flag: Introduction menage
2008-03-05 7:52 ` [PATCH 1/2] Cpuset hardwall flag: Switch cpusets to use the bulk cgroup_add_files() API menage
2008-03-05 7:52 ` [PATCH 2/2] Cpuset hardwall flag: Add a mem_hardwall flag to cpusets menage
@ 2008-03-05 12:23 ` Paul Jackson
2008-03-05 18:10 ` Andrew Morton
2 siblings, 1 reply; 8+ messages in thread
From: Paul Jackson @ 2008-03-05 12:23 UTC (permalink / raw)
To: menage; +Cc: akpm, linux-kernel, linux-mm
Paul M wrote:
> Currently the cpusets mem_exclusive flag is overloaded to mean both
> "no-overlapping" and "no GFP_KERNEL allocations outside this cpuset".
>
> These patches add a new mem_hardwall flag with just the allocation
> restriction part of the mem_exclusive semantics, without breaking
> backwards-compatibility for those who continue to use just
> mem_exclusive.
... too bad this nice comment wasn't included in PATCH 2/2, so that
it would automatically make it into the record of history - the source
control log message (as best I understand how Andrew's tools work,
comments off in their own, codeless patch "PATCH 0/N" don't make
it to the source control log, except when Andrew chooses to make a
special effort.)
--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <pj@sgi.com> 1.940.382.4214
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 0/2] Cpuset hardwall flag: Introduction
2008-03-05 12:23 ` [PATCH 0/2] Cpuset hardwall flag: Introduction Paul Jackson
@ 2008-03-05 18:10 ` Andrew Morton
0 siblings, 0 replies; 8+ messages in thread
From: Andrew Morton @ 2008-03-05 18:10 UTC (permalink / raw)
To: Paul Jackson; +Cc: menage, linux-kernel, linux-mm
On Wed, 5 Mar 2008 06:23:18 -0600 Paul Jackson <pj@sgi.com> wrote:
> Paul M wrote:
> > Currently the cpusets mem_exclusive flag is overloaded to mean both
> > "no-overlapping" and "no GFP_KERNEL allocations outside this cpuset".
> >
> > These patches add a new mem_hardwall flag with just the allocation
> > restriction part of the mem_exclusive semantics, without breaking
> > backwards-compatibility for those who continue to use just
> > mem_exclusive.
>
> ... too bad this nice comment wasn't included in PATCH 2/2, so that
> it would automatically make it into the record of history - the source
> control log message (as best I understand how Andrew's tools work,
> comments off in their own, codeless patch "PATCH 0/N" don't make
> it to the source control log, except when Andrew chooses to make a
> special effort.)
I make that special effort almost 100% of the time. The changelog for the
first patch becomes:
<text from [0/n]>
This patch:
<text from [1/n]>
^ permalink raw reply [flat|nested] 8+ messages in thread