LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [patch -mm 1/4] mempolicy: move rebind functions
@ 2008-03-06 20:05 David Rientjes
  2008-03-06 20:05 ` [patch -mm 2/4] mempolicy: create mempolicy_operations structure David Rientjes
  0 siblings, 1 reply; 15+ messages in thread
From: David Rientjes @ 2008-03-06 20:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Paul Jackson, Christoph Lameter, Lee Schermerhorn, Andi Kleen,
	linux-kernel

Move the mpol_rebind_{policy,task,mm}() functions after mpol_new() to
avoid having to declare function prototypes.

Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/mempolicy.c |  185 +++++++++++++++++++++++++++----------------------------
 1 files changed, 91 insertions(+), 94 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -110,9 +110,6 @@ struct mempolicy default_policy = {
 	.policy = MPOL_DEFAULT,
 };
 
-static void mpol_rebind_policy(struct mempolicy *pol,
-                               const nodemask_t *newmask);
-
 /* Check that the nodemask contains at least one populated zone */
 static int is_valid_nodemask(nodemask_t *nodemask)
 {
@@ -203,6 +200,97 @@ free:
 	return ERR_PTR(-EINVAL);
 }
 
+/* Migrate a policy to a different set of nodes */
+static void mpol_rebind_policy(struct mempolicy *pol,
+			       const nodemask_t *newmask)
+{
+	nodemask_t tmp;
+	int static_nodes;
+	int relative_nodes;
+
+	if (!pol)
+		return;
+	static_nodes = pol->flags & MPOL_F_STATIC_NODES;
+	relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES;
+	if (!mpol_store_user_nodemask(pol) &&
+	    nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
+		return;
+
+	switch (pol->policy) {
+	case MPOL_DEFAULT:
+		break;
+	case MPOL_BIND:
+		/* Fall through */
+	case MPOL_INTERLEAVE:
+		if (static_nodes)
+			nodes_and(tmp, pol->w.user_nodemask, *newmask);
+		else if (relative_nodes)
+			mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
+					       newmask);
+		else {
+			nodes_remap(tmp, pol->v.nodes,
+				    pol->w.cpuset_mems_allowed, *newmask);
+			pol->w.cpuset_mems_allowed = *newmask;
+		}
+		pol->v.nodes = tmp;
+		if (!node_isset(current->il_next, tmp)) {
+			current->il_next = next_node(current->il_next, tmp);
+			if (current->il_next >= MAX_NUMNODES)
+				current->il_next = first_node(tmp);
+			if (current->il_next >= MAX_NUMNODES)
+				current->il_next = numa_node_id();
+		}
+		break;
+	case MPOL_PREFERRED:
+		if (static_nodes) {
+			int node = first_node(pol->w.user_nodemask);
+
+			if (node_isset(node, *newmask))
+				pol->v.preferred_node = node;
+			else
+				pol->v.preferred_node = -1;
+		} else if (relative_nodes) {
+			mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
+					       newmask);
+			pol->v.preferred_node = first_node(tmp);
+		} else {
+			pol->v.preferred_node = node_remap(pol->v.preferred_node,
+					pol->w.cpuset_mems_allowed, *newmask);
+			pol->w.cpuset_mems_allowed = *newmask;
+		}
+		break;
+	default:
+		BUG();
+		break;
+	}
+}
+
+/*
+ * Wrapper for mpol_rebind_policy() that just requires task
+ * pointer, and updates task mempolicy.
+ */
+
+void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
+{
+	mpol_rebind_policy(tsk->mempolicy, new);
+}
+
+/*
+ * Rebind each vma in mm to new nodemask.
+ *
+ * Call holding a reference to mm.  Takes mm->mmap_sem during call.
+ */
+
+void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
+{
+	struct vm_area_struct *vma;
+
+	down_write(&mm->mmap_sem);
+	for (vma = mm->mmap; vma; vma = vma->vm_next)
+		mpol_rebind_policy(vma->vm_policy, new);
+	up_write(&mm->mmap_sem);
+}
+
 static void gather_stats(struct page *, void *, int pte_dirty);
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags);
@@ -1745,97 +1833,6 @@ void numa_default_policy(void)
 	do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
 }
 
-/* Migrate a policy to a different set of nodes */
-static void mpol_rebind_policy(struct mempolicy *pol,
-			       const nodemask_t *newmask)
-{
-	nodemask_t tmp;
-	int static_nodes;
-	int relative_nodes;
-
-	if (!pol)
-		return;
-	static_nodes = pol->flags & MPOL_F_STATIC_NODES;
-	relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES;
-	if (!mpol_store_user_nodemask(pol) &&
-	    nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
-		return;
-
-	switch (pol->policy) {
-	case MPOL_DEFAULT:
-		break;
-	case MPOL_BIND:
-		/* Fall through */
-	case MPOL_INTERLEAVE:
-		if (static_nodes)
-			nodes_and(tmp, pol->w.user_nodemask, *newmask);
-		else if (relative_nodes)
-			mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
-					       newmask);
-		else {
-			nodes_remap(tmp, pol->v.nodes,
-				    pol->w.cpuset_mems_allowed, *newmask);
-			pol->w.cpuset_mems_allowed = *newmask;
-		}
-		pol->v.nodes = tmp;
-		if (!node_isset(current->il_next, tmp)) {
-			current->il_next = next_node(current->il_next, tmp);
-			if (current->il_next >= MAX_NUMNODES)
-				current->il_next = first_node(tmp);
-			if (current->il_next >= MAX_NUMNODES)
-				current->il_next = numa_node_id();
-		}
-		break;
-	case MPOL_PREFERRED:
-		if (static_nodes) {
-			int node = first_node(pol->w.user_nodemask);
-
-			if (node_isset(node, *newmask))
-				pol->v.preferred_node = node;
-			else
-				pol->v.preferred_node = -1;
-		} else if (relative_nodes) {
-			mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
-					       newmask);
-			pol->v.preferred_node = first_node(tmp);
-		} else {
-			pol->v.preferred_node = node_remap(pol->v.preferred_node,
-					pol->w.cpuset_mems_allowed, *newmask);
-			pol->w.cpuset_mems_allowed = *newmask;
-		}
-		break;
-	default:
-		BUG();
-		break;
-	}
-}
-
-/*
- * Wrapper for mpol_rebind_policy() that just requires task
- * pointer, and updates task mempolicy.
- */
-
-void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
-{
-	mpol_rebind_policy(tsk->mempolicy, new);
-}
-
-/*
- * Rebind each vma in mm to new nodemask.
- *
- * Call holding a reference to mm.  Takes mm->mmap_sem during call.
- */
-
-void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
-{
-	struct vm_area_struct *vma;
-
-	down_write(&mm->mmap_sem);
-	for (vma = mm->mmap; vma; vma = vma->vm_next)
-		mpol_rebind_policy(vma->vm_policy, new);
-	up_write(&mm->mmap_sem);
-}
-
 /*
  * Display pages allocated per node and memory policy via /proc.
  */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch -mm 2/4] mempolicy: create mempolicy_operations structure
  2008-03-06 20:05 [patch -mm 1/4] mempolicy: move rebind functions David Rientjes
@ 2008-03-06 20:05 ` David Rientjes
  2008-03-06 20:05   ` [patch -mm 3/4] mempolicy: small header file cleanup David Rientjes
  2008-03-07 20:44   ` Regression: Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure Lee Schermerhorn
  0 siblings, 2 replies; 15+ messages in thread
From: David Rientjes @ 2008-03-06 20:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Paul Jackson, Christoph Lameter, Lee Schermerhorn, Andi Kleen,
	linux-kernel

Create a mempolicy_operations structure that currently points to two
functions[*] for the various modes:

	int (*create)(struct mempolicy *, const nodemask_t *);
	void (*rebind)(struct mempolicy *, const nodemask_t *);

This splits the implementation for the various modes out of two large
functions, mpol_new() and mpol_rebind_policy().  Eventually it may be
beneficial to add additional functions to accomodate the existing switch()
statements in mm/mempolicy.c.

 [*] The ->create() function for MPOL_DEFAULT is currently NULL since no
     struct mempolicy is dynamically allocated.

Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/mempolicy.c |  191 ++++++++++++++++++++++++++++++++------------------------
 1 files changed, 110 insertions(+), 81 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -63,7 +63,6 @@
    grows down?
    make bind policy root only? It can trigger oom much faster and the
    kernel is not always grateful with that.
-   could replace all the switch()es with a mempolicy_ops structure.
 */
 
 #include <linux/mempolicy.h>
@@ -110,8 +109,13 @@ struct mempolicy default_policy = {
 	.policy = MPOL_DEFAULT,
 };
 
+static const struct mempolicy_operations {
+	int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
+	void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes);
+} mpol_ops[MPOL_MAX];
+
 /* Check that the nodemask contains at least one populated zone */
-static int is_valid_nodemask(nodemask_t *nodemask)
+static int is_valid_nodemask(const nodemask_t *nodemask)
 {
 	int nd, k;
 
@@ -144,19 +148,45 @@ static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig,
 	nodes_onto(*ret, tmp, *rel);
 }
 
+static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
+{
+	if (nodes_empty(*nodes))
+		return -EINVAL;
+	pol->v.nodes = *nodes;
+	return 0;
+}
+
+static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
+{
+	if (nodes_empty(*nodes))
+		return -EINVAL;
+	pol->v.preferred_node = first_node(*nodes);
+	return 0;
+}
+
+static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
+{
+	if (!is_valid_nodemask(nodes))
+		return -EINVAL;
+	pol->v.nodes = *nodes;
+	return 0;
+}
+
 /* Create a new policy */
 static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
 				  nodemask_t *nodes)
 {
 	struct mempolicy *policy;
 	nodemask_t cpuset_context_nmask;
+	int ret;
 
 	pr_debug("setting mode %d flags %d nodes[0] %lx\n",
 		 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
 
+	if (nodes && nodes_empty(*nodes) && mode != MPOL_PREFERRED)
+		return ERR_PTR(-EINVAL);
 	if (mode == MPOL_DEFAULT)
-		return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) :
-							 NULL;
+		return NULL;
 	policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
 	if (!policy)
 		return ERR_PTR(-ENOMEM);
@@ -168,101 +198,83 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
 	else
 		nodes_and(cpuset_context_nmask, *nodes,
 			  cpuset_current_mems_allowed);
-	switch (mode) {
-	case MPOL_INTERLEAVE:
-		if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask))
-			goto free;
-		policy->v.nodes = cpuset_context_nmask;
-		break;
-	case MPOL_PREFERRED:
-		policy->v.preferred_node = first_node(cpuset_context_nmask);
-		if (policy->v.preferred_node >= MAX_NUMNODES)
-			goto free;
-		break;
-	case MPOL_BIND:
-		if (!is_valid_nodemask(&cpuset_context_nmask))
-			goto free;
-		policy->v.nodes = cpuset_context_nmask;
-		break;
-	default:
-		BUG();
-	}
 	policy->policy = mode;
 	policy->flags = flags;
 	if (mpol_store_user_nodemask(policy))
 		policy->w.user_nodemask = *nodes;
 	else
 		policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current);
+
+	ret = mpol_ops[mode].create(policy, &cpuset_context_nmask);
+	if (ret < 0) {
+		kmem_cache_free(policy_cache, policy);
+		return ERR_PTR(ret);
+	}
 	return policy;
+}
+
+static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes)
+{
+}
+
+static void mpol_rebind_nodemask(struct mempolicy *pol,
+				 const nodemask_t *nodes)
+{
+	nodemask_t tmp;
 
-free:
-	kmem_cache_free(policy_cache, policy);
-	return ERR_PTR(-EINVAL);
+	if (pol->flags & MPOL_F_STATIC_NODES)
+		nodes_and(tmp, pol->w.user_nodemask, *nodes);
+	else if (pol->flags & MPOL_F_RELATIVE_NODES)
+		mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
+	else {
+		nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed,
+			    *nodes);
+		pol->w.cpuset_mems_allowed = *nodes;
+	}
+
+	pol->v.nodes = tmp;
+	if (!node_isset(current->il_next, tmp)) {
+		current->il_next = next_node(current->il_next, tmp);
+		if (current->il_next >= MAX_NUMNODES)
+			current->il_next = first_node(tmp);
+		if (current->il_next >= MAX_NUMNODES)
+			current->il_next = numa_node_id();
+	}
+}
+
+static void mpol_rebind_preferred(struct mempolicy *pol,
+				  const nodemask_t *nodes)
+{
+	nodemask_t tmp;
+
+	if (pol->flags & MPOL_F_STATIC_NODES) {
+		int node = first_node(pol->w.user_nodemask);
+
+		if (node_isset(node, *nodes))
+			pol->v.preferred_node = node;
+		else
+			pol->v.preferred_node = -1;
+	} else if (pol->flags & MPOL_F_RELATIVE_NODES) {
+		mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
+		pol->v.preferred_node = first_node(tmp);
+	} else {
+		pol->v.preferred_node = node_remap(pol->v.preferred_node,
+						   pol->w.cpuset_mems_allowed,
+						   *nodes);
+		pol->w.cpuset_mems_allowed = *nodes;
+	}
 }
 
 /* Migrate a policy to a different set of nodes */
 static void mpol_rebind_policy(struct mempolicy *pol,
 			       const nodemask_t *newmask)
 {
-	nodemask_t tmp;
-	int static_nodes;
-	int relative_nodes;
-
 	if (!pol)
 		return;
-	static_nodes = pol->flags & MPOL_F_STATIC_NODES;
-	relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES;
 	if (!mpol_store_user_nodemask(pol) &&
 	    nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
 		return;
-
-	switch (pol->policy) {
-	case MPOL_DEFAULT:
-		break;
-	case MPOL_BIND:
-		/* Fall through */
-	case MPOL_INTERLEAVE:
-		if (static_nodes)
-			nodes_and(tmp, pol->w.user_nodemask, *newmask);
-		else if (relative_nodes)
-			mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
-					       newmask);
-		else {
-			nodes_remap(tmp, pol->v.nodes,
-				    pol->w.cpuset_mems_allowed, *newmask);
-			pol->w.cpuset_mems_allowed = *newmask;
-		}
-		pol->v.nodes = tmp;
-		if (!node_isset(current->il_next, tmp)) {
-			current->il_next = next_node(current->il_next, tmp);
-			if (current->il_next >= MAX_NUMNODES)
-				current->il_next = first_node(tmp);
-			if (current->il_next >= MAX_NUMNODES)
-				current->il_next = numa_node_id();
-		}
-		break;
-	case MPOL_PREFERRED:
-		if (static_nodes) {
-			int node = first_node(pol->w.user_nodemask);
-
-			if (node_isset(node, *newmask))
-				pol->v.preferred_node = node;
-			else
-				pol->v.preferred_node = -1;
-		} else if (relative_nodes) {
-			mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
-					       newmask);
-			pol->v.preferred_node = first_node(tmp);
-		} else {
-			pol->v.preferred_node = node_remap(pol->v.preferred_node,
-					pol->w.cpuset_mems_allowed, *newmask);
-			pol->w.cpuset_mems_allowed = *newmask;
-		}
-		break;
-	default:
-		BUG();
-		break;
-	}
+	mpol_ops[pol->policy].rebind(pol, newmask);
 }
 
 /*
@@ -291,6 +303,24 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
 	up_write(&mm->mmap_sem);
 }
 
+static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
+	[MPOL_DEFAULT] = {
+		.rebind = mpol_rebind_default,
+	},
+	[MPOL_INTERLEAVE] = {
+		.create = mpol_new_interleave,
+		.rebind = mpol_rebind_nodemask,
+	},
+	[MPOL_PREFERRED] = {
+		.create = mpol_new_preferred,
+		.rebind = mpol_rebind_preferred,
+	},
+	[MPOL_BIND] = {
+		.create = mpol_new_bind,
+		.rebind = mpol_rebind_nodemask,
+	},
+};
+
 static void gather_stats(struct page *, void *, int pte_dirty);
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags);
@@ -1836,7 +1866,6 @@ void numa_default_policy(void)
 /*
  * Display pages allocated per node and memory policy via /proc.
  */
-
 static const char * const policy_types[] =
 	{ "default", "prefer", "bind", "interleave" };
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch -mm 3/4] mempolicy: small header file cleanup
  2008-03-06 20:05 ` [patch -mm 2/4] mempolicy: create mempolicy_operations structure David Rientjes
@ 2008-03-06 20:05   ` David Rientjes
  2008-03-06 20:05     ` [patch -mm 4/4] mempolicy: remove includes for duplicate headers David Rientjes
  2008-03-07 20:44   ` Regression: Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure Lee Schermerhorn
  1 sibling, 1 reply; 15+ messages in thread
From: David Rientjes @ 2008-03-06 20:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Paul Jackson, Christoph Lameter, Lee Schermerhorn, Andi Kleen,
	linux-kernel

Removes forward definition of vm_area_struct in linux/mempolicy.h.  We
already get it from the linux/slab.h -> linux/gfp.h include.

Removes the unused mpol_set_vma_default() macro from linux/mempolicy.h.

Removes the extern definition of default_policy since it is only
referenced, as it should be, in mm/mempolicy.c.

Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 include/linux/mempolicy.h |    8 --------
 1 files changed, 0 insertions(+), 8 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -52,7 +52,6 @@ enum {
 #include <linux/spinlock.h>
 #include <linux/nodemask.h>
 
-struct vm_area_struct;
 struct mm_struct;
 
 #ifdef CONFIG_NUMA
@@ -129,10 +128,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
 	return __mpol_equal(a, b);
 }
 
-/* Could later add inheritance of the process policy here. */
-
-#define mpol_set_vma_default(vma) ((vma)->vm_policy = NULL)
-
 /*
  * Tree of shared policies for a shared memory region.
  * Maintain the policies in a pseudo mm that contains vmas. The vmas
@@ -168,7 +163,6 @@ extern void mpol_rebind_task(struct task_struct *tsk,
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern void mpol_fix_fork_child_flag(struct task_struct *p);
 
-extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 		unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol);
 extern unsigned slab_node(struct mempolicy *policy);
@@ -193,8 +187,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
 	return 1;
 }
 
-#define mpol_set_vma_default(vma) do {} while(0)
-
 static inline void mpol_free(struct mempolicy *p)
 {
 }

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch -mm 4/4] mempolicy: remove includes for duplicate headers
  2008-03-06 20:05   ` [patch -mm 3/4] mempolicy: small header file cleanup David Rientjes
@ 2008-03-06 20:05     ` David Rientjes
  2008-03-06 20:19       ` Paul Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: David Rientjes @ 2008-03-06 20:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Paul Jackson, Christoph Lameter, Lee Schermerhorn, Andi Kleen,
	linux-kernel

Remove #includes for:

	linux/mempolicy.h	(already from linux/migrate.h)
	linux/mm.h		(already from linux/highmem.h)
	linux/kernel.h		(already from linux/nodemask.h)
	linux/nodemask.h	(already from linux/sched.h)
	linux/gfp.h		(already from linux/slab.h)
	linux/slab.h		(already from linux/mempolicy.h)
	linux/init.h		(already from linux/mmzone.h)

Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/mempolicy.c |    7 -------
 1 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -65,21 +65,14 @@
    kernel is not always grateful with that.
 */
 
-#include <linux/mempolicy.h>
-#include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
-#include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/nodemask.h>
 #include <linux/cpuset.h>
-#include <linux/gfp.h>
-#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/module.h>
 #include <linux/nsproxy.h>
 #include <linux/interrupt.h>
-#include <linux/init.h>
 #include <linux/compat.h>
 #include <linux/swap.h>
 #include <linux/seq_file.h>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch -mm 4/4] mempolicy: remove includes for duplicate headers
  2008-03-06 20:05     ` [patch -mm 4/4] mempolicy: remove includes for duplicate headers David Rientjes
@ 2008-03-06 20:19       ` Paul Jackson
  2008-03-06 20:51         ` David Rientjes
  0 siblings, 1 reply; 15+ messages in thread
From: Paul Jackson @ 2008-03-06 20:19 UTC (permalink / raw)
  To: David Rientjes; +Cc: akpm, clameter, Lee.Schermerhorn, ak, linux-kernel

David R wrote:
> Remove #includes for:
> 
> 	linux/mempolicy.h	(already from linux/migrate.h)
> 	linux/mm.h		(already from linux/highmem.h)

I didn't see any problem offhand with the other patches 0 to 3 in this
set, but this patch 4/4 surprises me.

Perhaps I'm out of phase with what's customary in the kernel, but I
prefer directly including whatever headers I explicitly need.
Depending on some other header to drag in something you have explicit
need of anyway makes things more fragile.

-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.940.382.4214

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch -mm 4/4] mempolicy: remove includes for duplicate headers
  2008-03-06 20:19       ` Paul Jackson
@ 2008-03-06 20:51         ` David Rientjes
  2008-03-06 21:01           ` Paul Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: David Rientjes @ 2008-03-06 20:51 UTC (permalink / raw)
  To: Paul Jackson; +Cc: akpm, clameter, Lee.Schermerhorn, ak, linux-kernel

On Thu, 6 Mar 2008, Paul Jackson wrote:

> > Remove #includes for:
> > 
> > 	linux/mempolicy.h	(already from linux/migrate.h)
> > 	linux/mm.h		(already from linux/highmem.h)
> 
> I didn't see any problem offhand with the other patches 0 to 3 in this
> set, but this patch 4/4 surprises me.
> 
> Perhaps I'm out of phase with what's customary in the kernel, but I
> prefer directly including whatever headers I explicitly need.
> Depending on some other header to drag in something you have explicit
> need of anyway makes things more fragile.
> 

It simply decreases the remote chance later that we'll develop obscure 
header file dependencies for some archs like we saw with the memcontroller 
and the sparc build.  There are no strange dependencies that I am 
currently aware of, so this is not a bugfix.

The only way this would make things more fragile is if a header file 
removes an include for another header file without checking all the users 
of the former first and fixing it up in mm/mempolicy.c.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch -mm 4/4] mempolicy: remove includes for duplicate headers
  2008-03-06 20:51         ` David Rientjes
@ 2008-03-06 21:01           ` Paul Jackson
  2008-03-07  8:45             ` Andrew Morton
  0 siblings, 1 reply; 15+ messages in thread
From: Paul Jackson @ 2008-03-06 21:01 UTC (permalink / raw)
  To: David Rientjes; +Cc: akpm, clameter, Lee.Schermerhorn, ak, linux-kernel

David wrote:
> It simply decreases the remote chance later that ...
> 
> The only way this would make things more fragile is if ...

Does anyone lurking on this thread know if there is an
established convention in kernel code, whether to directly
include all headers that your code explicitly needs, or
whether it's ok to rely on indirect includes for such?

David and I could debate the fine points of which way is
best until the cows come home; the two of us are good at
that.  This is too minor an issue for that sort of effort.

-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.940.382.4214

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch -mm 4/4] mempolicy: remove includes for duplicate headers
  2008-03-06 21:01           ` Paul Jackson
@ 2008-03-07  8:45             ` Andrew Morton
  0 siblings, 0 replies; 15+ messages in thread
From: Andrew Morton @ 2008-03-07  8:45 UTC (permalink / raw)
  To: Paul Jackson; +Cc: David Rientjes, clameter, Lee.Schermerhorn, ak, linux-kernel

On Thu, 6 Mar 2008 15:01:48 -0600 Paul Jackson <pj@sgi.com> wrote:

> David wrote:
> > It simply decreases the remote chance later that ...
> > 
> > The only way this would make things more fragile is if ...
> 
> Does anyone lurking on this thread know if there is an
> established convention in kernel code, whether to directly
> include all headers that your code explicitly needs, or
> whether it's ok to rely on indirect includes for such?
> 
> David and I could debate the fine points of which way is
> best until the cows come home; the two of us are good at
> that.  This is too minor an issue for that sort of effort.
> 

Directly including what you need is more robust than a) relying upon nested
inclusions by includees or b) relying upon preceding inclusions by
includers.

So we usually go that way, when we notice it and when we think about it.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Regression:  Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure
  2008-03-06 20:05 ` [patch -mm 2/4] mempolicy: create mempolicy_operations structure David Rientjes
  2008-03-06 20:05   ` [patch -mm 3/4] mempolicy: small header file cleanup David Rientjes
@ 2008-03-07 20:44   ` Lee Schermerhorn
  2008-03-07 21:48     ` David Rientjes
  1 sibling, 1 reply; 15+ messages in thread
From: Lee Schermerhorn @ 2008-03-07 20:44 UTC (permalink / raw)
  To: Andrew Morton, David Rientjes
  Cc: Paul Jackson, Christoph Lameter, Andi Kleen, linux-kernel,
	linux-mm, Eric Whitney

The subject patch causes a regression in the numactl package mempolicy
regression tests.

I'm using the numactl-1.0.2 package with the patches available at:

http://free.linux.hp.com/~lts/Patches/Numactl/numactl-1.0.2-patches-080226.tar.gz

The numastat and regress patches in that tarball are necessary for
recent kernels, else the tests will report false failures.

The following patch fixes the regression.

Lee

----------------------------------------------------

Against:  2.6.25-rc3-mm1 atop the following patches, which Andrew
has already added to the -mm tree:

[patch 1/6] mempolicy: convert MPOL constants to enum
[patch 2/6] mempolicy: support optional mode flags
[patch 3/6] mempolicy: add MPOL_F_STATIC_NODES flag
[patch 4/6] mempolicy: add bitmap_onto() and bitmap_fold() operations
[patch 5/6] mempolicy: add MPOL_F_RELATIVE_NODES flag
[patch 6/6] mempolicy: update NUMA memory policy documentation
[patch -mm 1/4] mempolicy: move rebind functions
[patch -mm 2/4] mempolicy: create mempolicy_operations structure
[patch -mm 3/4] mempolicy: small header file cleanup

Specifically this patch fixes problems introduced by the rework
of mpol_new() in patch 2/4 in the second series.  As a result,
we're no longer accepting NULL/empty nodemask with MPOL_PREFERRED
as "local" allocation.  This breaks the numactl regression tests.

"numactl --localalloc" <some command>" fails with invalid argument.

This patch fixes the regression by again treating NULL/empty nodemask
with MPOL_PREFERRED as "local allocation", and special casing this
condition, as needed, in mpol_new(), mpol_new_preferred() and 
mpol_rebind_preferred().

It also appears that the patch series listed above required a non-empty
nodemask with MPOL_DEFAULT.  However, I didn't test that.  With this
patch, MPOL_DEFAULT effectively ignores the nodemask--empty or not.
This is a change in behavior that I have argued against, but the
regression tests don't test this, so I'm not going to attempt to address
it with this patch.

Note:  I have no tests to test the 'STATIC_NODES and 'RELATIVE_NODES
behavior to ensure that this patch doesn't regress those.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>

 mm/mempolicy.c |   54 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 18 deletions(-)

Index: linux-2.6.25-rc3-mm1/mm/mempolicy.c
===================================================================
--- linux-2.6.25-rc3-mm1.orig/mm/mempolicy.c	2008-03-07 15:22:01.000000000 -0500
+++ linux-2.6.25-rc3-mm1/mm/mempolicy.c	2008-03-07 15:37:43.000000000 -0500
@@ -158,9 +158,12 @@ static int mpol_new_interleave(struct me
 
 static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
 {
-	if (nodes_empty(*nodes))
-		return -EINVAL;
-	pol->v.preferred_node = first_node(*nodes);
+	if (!nodes)
+		pol->v.preferred_node = -1;	/* local allocation */
+	else if (nodes_empty(*nodes))
+		return -EINVAL;			/*  no allowed nodes */
+	else
+		pol->v.preferred_node = first_node(*nodes);
 	return 0;
 }
 
@@ -178,34 +181,43 @@ static struct mempolicy *mpol_new(unsign
 {
 	struct mempolicy *policy;
 	nodemask_t cpuset_context_nmask;
+	int localalloc = 0;
 	int ret;
 
 	pr_debug("setting mode %d flags %d nodes[0] %lx\n",
 		 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
 
-	if (nodes && nodes_empty(*nodes) && mode != MPOL_PREFERRED)
-		return ERR_PTR(-EINVAL);
 	if (mode == MPOL_DEFAULT)
 		return NULL;
+	if (!nodes || nodes_empty(*nodes)) {
+		if (mode != MPOL_PREFERRED)
+			return ERR_PTR(-EINVAL);
+		localalloc = 1;	/* special case:  no mode flags */
+	}
 	policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
 	if (!policy)
 		return ERR_PTR(-ENOMEM);
 	atomic_set(&policy->refcnt, 1);
-	cpuset_update_task_memory_state();
-	if (flags & MPOL_F_RELATIVE_NODES)
-		mpol_relative_nodemask(&cpuset_context_nmask, nodes,
-				       &cpuset_current_mems_allowed);
-	else
-		nodes_and(cpuset_context_nmask, *nodes,
-			  cpuset_current_mems_allowed);
 	policy->policy = mode;
-	policy->flags = flags;
-	if (mpol_store_user_nodemask(policy))
-		policy->w.user_nodemask = *nodes;
-	else
-		policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current);
 
-	ret = mpol_ops[mode].create(policy, &cpuset_context_nmask);
+	if (!localalloc) {
+		policy->flags = flags;
+		cpuset_update_task_memory_state();
+		if (flags & MPOL_F_RELATIVE_NODES)
+			mpol_relative_nodemask(&cpuset_context_nmask, nodes,
+					       &cpuset_current_mems_allowed);
+		else
+			nodes_and(cpuset_context_nmask, *nodes,
+				  cpuset_current_mems_allowed);
+		if (mpol_store_user_nodemask(policy))
+			policy->w.user_nodemask = *nodes;
+		else
+			policy->w.cpuset_mems_allowed =
+						cpuset_mems_allowed(current);
+	}
+
+	ret = mpol_ops[mode].create(policy,
+				localalloc ? NULL : &cpuset_context_nmask);
 	if (ret < 0) {
 		kmem_cache_free(policy_cache, policy);
 		return ERR_PTR(ret);
@@ -247,6 +259,10 @@ static void mpol_rebind_preferred(struct
 {
 	nodemask_t tmp;
 
+	/*
+	 * check 'STATIC_NODES first, as preferred_node == -1 may be
+	 * a temporary, "fallback" state for this policy.
+	 */
 	if (pol->flags & MPOL_F_STATIC_NODES) {
 		int node = first_node(pol->w.user_nodemask);
 
@@ -254,6 +270,8 @@ static void mpol_rebind_preferred(struct
 			pol->v.preferred_node = node;
 		else
 			pol->v.preferred_node = -1;
+	} else if (pol->v.preferred_node == -1) {
+		return;	/* no remap required for explicit local alloc */
 	} else if (pol->flags & MPOL_F_RELATIVE_NODES) {
 		mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
 		pol->v.preferred_node = first_node(tmp);



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Regression:  Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure
  2008-03-07 20:44   ` Regression: Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure Lee Schermerhorn
@ 2008-03-07 21:48     ` David Rientjes
  2008-03-07 21:57       ` Paul Jackson
                         ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: David Rientjes @ 2008-03-07 21:48 UTC (permalink / raw)
  To: Lee Schermerhorn
  Cc: Andrew Morton, Paul Jackson, Christoph Lameter, Andi Kleen,
	linux-kernel, linux-mm, Eric Whitney

On Fri, 7 Mar 2008, Lee Schermerhorn wrote:

> It also appears that the patch series listed above required a non-empty
> nodemask with MPOL_DEFAULT.  However, I didn't test that.  With this
> patch, MPOL_DEFAULT effectively ignores the nodemask--empty or not.
> This is a change in behavior that I have argued against, but the
> regression tests don't test this, so I'm not going to attempt to address
> it with this patch.
> 

Excuse me, but there was significant discussion about this on LKML and I 
eventually did force MPOL_DEFAULT to require a non-empty nodemask 
specifically because of your demand that it should.  It didn't originally 
require this in my patchset, and now you're removing the exact same 
requirement that you demanded.

You said on February 13:

	1) we've discussed the issue of returning EINVAL for non-empty
	nodemasks with MPOL_DEFAULT.  By removing this restriction, we run
	the risk of breaking applications if we should ever want to define
	a semantic to non-empty node mask for MPOL_DEFAULT.

If you want to remove this requirement now (please get agreement from 
Paul) and are sure of your position, you'll at least need an update to 
Documentation/vm/numa-memory-policy.txt.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Regression:  Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure
  2008-03-07 21:48     ` David Rientjes
@ 2008-03-07 21:57       ` Paul Jackson
  2008-03-08 18:49       ` Lee Schermerhorn
  2008-03-12 19:33       ` [PATCH] Mempolicy: fix parsing of tmpfs mpol mount option Lee Schermerhorn
  2 siblings, 0 replies; 15+ messages in thread
From: Paul Jackson @ 2008-03-07 21:57 UTC (permalink / raw)
  To: David Rientjes
  Cc: Lee.Schermerhorn, akpm, clameter, ak, linux-kernel, linux-mm,
	eric.whitney

David wrote:
> If you want to remove this requirement now (please get agreement from Paul)

I'm ducking and running for cover ;).

Personally, I'm slightly in favor of not requiring the empty mask,
as I always that that empty mask check was a couple lines of non-
essential logic.  However I'm slightly in favor of not changing
this detail from what it has been for years, which would mean we
still checked for the empty mask.  And no doubt, if someone cares
to examine the record closely enough they will find where I took a
third position as well.

But I can't see where it actually matters enough to write home about.

So I'll quit writing, and agree to most anything.

-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.940.382.4214

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Regression:  Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure
  2008-03-07 21:48     ` David Rientjes
  2008-03-07 21:57       ` Paul Jackson
@ 2008-03-08 18:49       ` Lee Schermerhorn
  2008-03-08 22:09         ` David Rientjes
  2008-03-12 19:33       ` [PATCH] Mempolicy: fix parsing of tmpfs mpol mount option Lee Schermerhorn
  2 siblings, 1 reply; 15+ messages in thread
From: Lee Schermerhorn @ 2008-03-08 18:49 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andrew Morton, Paul Jackson, Christoph Lameter, Andi Kleen,
	linux-kernel, linux-mm, Eric Whitney

On Fri, 2008-03-07 at 13:48 -0800, David Rientjes wrote: 
> On Fri, 7 Mar 2008, Lee Schermerhorn wrote:
> 
> > It also appears that the patch series listed above required a non-empty
> > nodemask with MPOL_DEFAULT.  However, I didn't test that.  With this
> > patch, MPOL_DEFAULT effectively ignores the nodemask--empty or not.
> > This is a change in behavior that I have argued against, but the
> > regression tests don't test this, so I'm not going to attempt to address
> > it with this patch.
> > 
> 
> Excuse me, but there was significant discussion about this on LKML and I 
> eventually did force MPOL_DEFAULT to require a non-empty nodemask 
> specifically because of your demand that it should.  It didn't originally 
> require this in my patchset, and now you're removing the exact same 
> requirement that you demanded.
> 
> You said on February 13:
> 
> 	1) we've discussed the issue of returning EINVAL for non-empty
> 	nodemasks with MPOL_DEFAULT.  By removing this restriction, we run
> 	the risk of breaking applications if we should ever want to define
> 	a semantic to non-empty node mask for MPOL_DEFAULT.
> 
> If you want to remove this requirement now (please get agreement from 
> Paul) and are sure of your position, you'll at least need an update to 
> Documentation/vm/numa-memory-policy.txt.

Excuse me.  I thought that the discussion--my position, anyway--was
about preserving existing behavior for MPOL_DEFAULT which is to require
an EMPTY [or NULL--same effect] nodemask.  Not a NON-EMPTY one.  See:
http://www.kernel.org/doc/man-pages/online/pages/man2/set_mempolicy.2.html
It does appear that your patches now require a non-empty nodemask.  This
was intentional?

Is it, then, the case that our disagreement was based on the fact that
you thought I was advocating a non-empty nodemask with MPOL_DEFAULT?  No
wonder you said it didn't make sense. 

Since we can't seem to understand each other with ~English prose, I've
attached a little test program that demonstrates the behavior that I
expect.   This is not to belabor the point; just an attempt to establish
understanding.

Note:  in the subject patch, I didn't enforce this behavior because your
patch didn't [it enforced just the opposite], and I've pretty much given
up.  Although I prefer current behavior [before your series], if we
change it, we will need to change the man pages to remove the error
condition for non-empty nodemasks with MPOL_DEFAULT.

Later,
Lee


/*
 * test error returns for set_mempolicy(MPOL_DEFAULT, nodemask, maxnodes) for
 * null, empty and non-empty nodemasks.
 *
 * requires libnuma
 */
#include <sys/types.h>

#include <errno.h>
#include <numaif.h>
#include <numa.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

void results(int ret, int ierr, int expected)
{
	if (ret) {
		printf("\tResults:   %s [%d]\n", strerror(ierr), ierr);
	} else {
		printf("\tResults:   No Error [0]\n");
	}
	printf("\tExpected:  %s [%d]\n",
			expected ? strerror(expected) : "No Error", expected);
}

int main(int argc, char *argv[])
{
	unsigned long nodemask;	/* hack:  single long word mask */
	int maxnodes = 4;	/* arbitrary max <= 8 * sizeof(nodemask) */
	int ret;

	printf("\n1: testing set_mempolicy(MPOL_DEFAULT, ...) with NULL nodemask:\n");
	ret = set_mempolicy(MPOL_DEFAULT, NULL, maxnodes);
	results(ret, errno, 0);	/* expect success */

	printf("\n2: testing set_mempolicy(MPOL_DEFAULT, ...) with non-NULL, "
			"but empty, nodemask:\n");
	nodemask = 0UL;
	ret = set_mempolicy(MPOL_DEFAULT, &nodemask, maxnodes);
	results(ret, errno, 0);	/* expect success */

	printf("\n2: testing set_mempolicy(MPOL_DEFAULT, ...) with non-NULL, "
			"non-empty nodemask:\n");
	nodemask = 1UL;
	ret = set_mempolicy(MPOL_DEFAULT, &nodemask, maxnodes);
	results(ret, errno, EINVAL);	/* expect EINVAL */

}





^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Regression:  Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure
  2008-03-08 18:49       ` Lee Schermerhorn
@ 2008-03-08 22:09         ` David Rientjes
  2008-03-10 14:58           ` Lee Schermerhorn
  0 siblings, 1 reply; 15+ messages in thread
From: David Rientjes @ 2008-03-08 22:09 UTC (permalink / raw)
  To: Lee Schermerhorn
  Cc: Andrew Morton, Paul Jackson, Christoph Lameter, Andi Kleen,
	linux-kernel, linux-mm, Eric Whitney

On Sat, 8 Mar 2008, Lee Schermerhorn wrote:

> > Excuse me, but there was significant discussion about this on LKML and I 
> > eventually did force MPOL_DEFAULT to require a non-empty nodemask 

Correction: s/non-empty/empty

> > specifically because of your demand that it should.  It didn't originally 
> > require this in my patchset, and now you're removing the exact same 
> > requirement that you demanded.
> > 
> > You said on February 13:
> > 
> > 	1) we've discussed the issue of returning EINVAL for non-empty
> > 	nodemasks with MPOL_DEFAULT.  By removing this restriction, we run
> > 	the risk of breaking applications if we should ever want to define
> > 	a semantic to non-empty node mask for MPOL_DEFAULT.
> > 
> > If you want to remove this requirement now (please get agreement from 
> > Paul) and are sure of your position, you'll at least need an update to 
> > Documentation/vm/numa-memory-policy.txt.
> 
> Excuse me.  I thought that the discussion--my position, anyway--was
> about preserving existing behavior for MPOL_DEFAULT which is to require
> an EMPTY [or NULL--same effect] nodemask.  Not a NON-EMPTY one.  See:
> http://www.kernel.org/doc/man-pages/online/pages/man2/set_mempolicy.2.html
> It does appear that your patches now require a non-empty nodemask.  This
> was intentional?
> 

The first and second set did not have this requirement, but the third set 
does (not currently in -mm), so I've changed it back.  Hopefully there's 
no confusion and we can settle on a solution without continuously 
revisiting the topic.

My position was originally to allow any type of nodemask to be passed with 
MPOL_DEFAULT since its not used.  You asked for strict argument checking 
and so after some debate I changed it to require an empty nodemask mainly 
because I didn't want the patchset to stall on such a minor point.  But in 
your regression fix, you expressed the desire once again to allow it to 
accept any nodemask because the testsuite does not check for it.

So if you'd like to do that, I'd encourage you to submit it as a separate 
patch and open it up for review.

What is currently in -mm and what I will be posting shortly is the updated 
regression fix.  All of these patches require that MPOL_DEFAULT include a 
NULL pointer or empty nodemask passed via the two syscalls.

> Note:  in the subject patch, I didn't enforce this behavior because your
> patch didn't [it enforced just the opposite], and I've pretty much given
> up.  Although I prefer current behavior [before your series], if we
> change it, we will need to change the man pages to remove the error
> condition for non-empty nodemasks with MPOL_DEFAULT.
> 

With my patches it still requires a NULL pointer or empty nodemask and 
I've updated Documentation/vm/numa_memory_policy.txt to explicitly say its 
an error if a non-empty nodemask is passed.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Regression:  Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure
  2008-03-08 22:09         ` David Rientjes
@ 2008-03-10 14:58           ` Lee Schermerhorn
  0 siblings, 0 replies; 15+ messages in thread
From: Lee Schermerhorn @ 2008-03-10 14:58 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andrew Morton, Paul Jackson, Christoph Lameter, Andi Kleen,
	linux-kernel, linux-mm, Eric Whitney

On Sat, 2008-03-08 at 14:09 -0800, David Rientjes wrote: 
> On Sat, 8 Mar 2008, Lee Schermerhorn wrote:
> 
> > > Excuse me, but there was significant discussion about this on LKML and I 
> > > eventually did force MPOL_DEFAULT to require a non-empty nodemask 
> 
> Correction: s/non-empty/empty

That makes more sense.  I agree.  more below... 
> 
> > > specifically because of your demand that it should.  It didn't originally 
> > > require this in my patchset, and now you're removing the exact same 
> > > requirement that you demanded.
> > > 
> > > You said on February 13:
> > > 
> > > 	1) we've discussed the issue of returning EINVAL for non-empty
> > > 	nodemasks with MPOL_DEFAULT.  By removing this restriction, we run
> > > 	the risk of breaking applications if we should ever want to define
> > > 	a semantic to non-empty node mask for MPOL_DEFAULT.
> > > 
> > > If you want to remove this requirement now (please get agreement from 
> > > Paul) and are sure of your position, you'll at least need an update to 
> > > Documentation/vm/numa-memory-policy.txt.
> > 
> > Excuse me.  I thought that the discussion--my position, anyway--was
> > about preserving existing behavior for MPOL_DEFAULT which is to require
> > an EMPTY [or NULL--same effect] nodemask.  Not a NON-EMPTY one.  See:
> > http://www.kernel.org/doc/man-pages/online/pages/man2/set_mempolicy.2.html
> > It does appear that your patches now require a non-empty nodemask.  This
> > was intentional?
> > 
> 
> The first and second set did not have this requirement, but the third set 
> does (not currently in -mm), so I've changed it back.  Hopefully there's 
> no confusion and we can settle on a solution without continuously 
> revisiting the topic.
> 
> My position was originally to allow any type of nodemask to be passed with 
> MPOL_DEFAULT since its not used.  You asked for strict argument checking 
> and so after some debate I changed it to require an empty nodemask mainly 
> because I didn't want the patchset to stall on such a minor point.  But in 
> your regression fix, you expressed the desire once again to allow it to 
> accept any nodemask because the testsuite does not check for it.

Not a desire.  Just that when I fixed the MPOL_PREFERRED with empty node
mask regression, I also fixed mpol_new() not to require a non-empty
nodemask with MPOL_DEFAULT.  I didn't go the extra step to require an
empty one.  I'm tiring of the subject, as I think you are, and didn't
want to argue it anymore.  So, I was willing to "cave" on that point.

> 
> So if you'd like to do that, I'd encourage you to submit it as a separate 
> patch and open it up for review.

No, I'm quite happy if, after your patches, the APIs retain the previous
behavior w/rt nodemask error checking.

> 
> What is currently in -mm and what I will be posting shortly is the updated 
> regression fix.  All of these patches require that MPOL_DEFAULT include a 
> NULL pointer or empty nodemask passed via the two syscalls.
> 
> > Note:  in the subject patch, I didn't enforce this behavior because your
> > patch didn't [it enforced just the opposite], and I've pretty much given
> > up.  Although I prefer current behavior [before your series], if we
> > change it, we will need to change the man pages to remove the error
> > condition for non-empty nodemasks with MPOL_DEFAULT.
> > 
> 
> With my patches it still requires a NULL pointer or empty nodemask and 
> I've updated Documentation/vm/numa_memory_policy.txt to explicitly say its 
> an error if a non-empty nodemask is passed.

Good.

Do you intend for your patch entitled "[patch -mm v2] mempolicy:
disallow static or relative flags for local preferred mode" to replace
the patch that I sent in to repair the regression?  Looks that way.
I'll replace it in my tree and retest.

Lee





^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] Mempolicy:  fix parsing of tmpfs mpol mount option
  2008-03-07 21:48     ` David Rientjes
  2008-03-07 21:57       ` Paul Jackson
  2008-03-08 18:49       ` Lee Schermerhorn
@ 2008-03-12 19:33       ` Lee Schermerhorn
  2 siblings, 0 replies; 15+ messages in thread
From: Lee Schermerhorn @ 2008-03-12 19:33 UTC (permalink / raw)
  To: Andrew Morton
  Cc: DavidRientjes, Paul Jackson, Christoph Lameter, Andi Kleen,
	linux-kernel, linux-mm, Eric Whitney

Mempolicy:  fix parsing of tmpfs mpol= mount option.

Against:  2.6.25-rc5-mm1 atop:
+ mempolicy-disallow-static-or-relative-flags-for-local-preferred-mode
+ mempolicy-support-optional-mode-flags-fix [Hugh]

Parsing of new mode flags in the tmpfs mpol mount option is
slightly broken:

Setting a valid flag works OK:
	#mount -o remount,mpol=bind=static:1-2 /dev/shm
	#mount
	...
	tmpfs on /dev/shm type tmpfs (rw,mpol=bind=static:1-2)
	...

However, we can't remove them or change them, once we've
set a valid flag:

	#mount -o remount,mpol=bind:1-2 /dev/shm
	#mount
	...
	tmpfs on /dev/shm type tmpfs (rw,mpol=bind:1-2)
	...

It SAYS it removed it, but that's just a copy of the input
string.  If we now try to set it to a different flag, we
get:

	#mount -o remount,mpol=bind=relative:1-2 /dev/shm
	mount: /dev/shm not mounted already, or bad option

And on the console, we see:
	tmpfs: Bad value 'bind' for mount option 'mpol'
	                      ^ lost remainder of string

Furthermore, bogus flags are accepted with out error.
Granted, they are a no-op:

	#mount -o remount,mpol=interleave=foo:0-3 /dev/shm
	#mount
	...
	tmpfs on /dev/shm type tmpfs (rw,mpol=interleave=foo:0-3)

Again, that's just a copy of the input string shown by the
mount command.

This patch fixes the behavior by pre-zeroing the flags so that
only one of the mutually exclusive flags can be set at one time.
It also reports an error when an unrecognized flag is specified.

The check for both flags being set is removed because it can't
happen with this implementation.  If we ever want to support
multiple non-exclusive flags, this area will need rework and we
will need to check that any mutually exclusive flags aren't
specified.

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>

 mm/shmem.c |   16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

Index: linux-2.6.25-rc5-mm1/mm/shmem.c
===================================================================
--- linux-2.6.25-rc5-mm1.orig/mm/shmem.c	2008-03-12 14:15:27.000000000 -0400
+++ linux-2.6.25-rc5-mm1/mm/shmem.c	2008-03-12 14:18:09.000000000 -0400
@@ -1128,20 +1128,26 @@ static int shmem_parse_mpol(char *value,
 			*policy_nodes = node_states[N_HIGH_MEMORY];
 		err = 0;
 	}
+
+	*mode_flags = 0;
 	if (flags) {
+		/*
+		 * Currently, we only support two mutually exclusive
+		 * mode flags.
+		 */
 		if (!strcmp(flags, "static"))
 			*mode_flags |= MPOL_F_STATIC_NODES;
-		if (!strcmp(flags, "relative"))
+		else if (!strcmp(flags, "relative"))
 			*mode_flags |= MPOL_F_RELATIVE_NODES;
-
-		if ((*mode_flags & MPOL_F_STATIC_NODES) &&
-		    (*mode_flags & MPOL_F_RELATIVE_NODES))
-			err = 1;
+		else
+			err = 1;	/* unrecognized flag */
 	}
 out:
 	/* Restore string for error message */
 	if (nodelist)
 		*--nodelist = ':';
+	if (flags)
+		*--flags = '=';
 	return err;
 }
 



^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2008-03-12 19:33 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-06 20:05 [patch -mm 1/4] mempolicy: move rebind functions David Rientjes
2008-03-06 20:05 ` [patch -mm 2/4] mempolicy: create mempolicy_operations structure David Rientjes
2008-03-06 20:05   ` [patch -mm 3/4] mempolicy: small header file cleanup David Rientjes
2008-03-06 20:05     ` [patch -mm 4/4] mempolicy: remove includes for duplicate headers David Rientjes
2008-03-06 20:19       ` Paul Jackson
2008-03-06 20:51         ` David Rientjes
2008-03-06 21:01           ` Paul Jackson
2008-03-07  8:45             ` Andrew Morton
2008-03-07 20:44   ` Regression: Re: [patch -mm 2/4] mempolicy: create mempolicy_operations structure Lee Schermerhorn
2008-03-07 21:48     ` David Rientjes
2008-03-07 21:57       ` Paul Jackson
2008-03-08 18:49       ` Lee Schermerhorn
2008-03-08 22:09         ` David Rientjes
2008-03-10 14:58           ` Lee Schermerhorn
2008-03-12 19:33       ` [PATCH] Mempolicy: fix parsing of tmpfs mpol mount option Lee Schermerhorn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).