LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] slab: cache alloc cleanups
@ 2007-01-02 13:47 Pekka J Enberg
2007-01-02 14:29 ` Andy Whitcroft
` (4 more replies)
0 siblings, 5 replies; 16+ messages in thread
From: Pekka J Enberg @ 2007-01-02 13:47 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, apw, hch, manfred, christoph, pj
[Andrew, I have been unable to find a NUMA-capable tester for this patch,
so can we please put this in to -mm for some exposure?]
From: Pekka Enberg <penberg@cs.helsinki.fi>
This patch cleans up __cache_alloc and __cache_alloc_node functions. We no
longer need to do NUMA_BUILD tricks and the UMA allocation path is much
simpler. Note: we now do alternate_node_alloc() for kmem_cache_alloc_node as
well.
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Christoph Lameter <christoph@lameter.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
mm/slab.c | 165 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 84 insertions(+), 81 deletions(-)
Index: 2.6/mm/slab.c
===================================================================
--- 2.6.orig/mm/slab.c
+++ 2.6/mm/slab.c
@@ -3197,35 +3197,6 @@ static inline void *____cache_alloc(stru
return objp;
}
-static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
- gfp_t flags, void *caller)
-{
- unsigned long save_flags;
- void *objp = NULL;
-
- cache_alloc_debugcheck_before(cachep, flags);
-
- local_irq_save(save_flags);
-
- if (unlikely(NUMA_BUILD &&
- current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
- objp = alternate_node_alloc(cachep, flags);
-
- if (!objp)
- objp = ____cache_alloc(cachep, flags);
- /*
- * We may just have run out of memory on the local node.
- * ____cache_alloc_node() knows how to locate memory on other nodes
- */
- if (NUMA_BUILD && !objp)
- objp = ____cache_alloc_node(cachep, flags, numa_node_id());
- local_irq_restore(save_flags);
- objp = cache_alloc_debugcheck_after(cachep, flags, objp,
- caller);
- prefetchw(objp);
- return objp;
-}
-
#ifdef CONFIG_NUMA
/*
* Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
@@ -3383,7 +3354,90 @@ must_grow:
done:
return obj;
}
-#endif
+
+/**
+ * __do_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
+ *
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
+ */
+static __always_inline void *
+__do_cache_alloc_node(struct kmem_cache *cache, gfp_t flags, int nodeid)
+{
+ void *obj;
+
+ if (nodeid == -1 || nodeid == numa_node_id()) {
+ if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
+ obj = alternate_node_alloc(cache, flags);
+ if (obj)
+ goto out;
+ }
+
+ /*
+ * Use the locally cached objects if possible. However,
+ * ____cache_alloc does not allow fallback to other nodes.
+ * It may fail while we still have objects on other nodes
+ * available.
+ */
+ obj = ____cache_alloc(cache, flags);
+ if (obj)
+ goto out;
+
+ /* Fall back to other nodes. */
+ obj = ____cache_alloc_node(cache, flags, numa_node_id());
+ } else {
+ if (likely(cache->nodelists[nodeid]))
+ obj = ____cache_alloc_node(cache, flags, nodeid);
+ else {
+ /* Node is not bootstrapped yet. */
+ if (!(flags & __GFP_THISNODE))
+ obj = fallback_alloc(cache, flags);
+ else
+ obj = NULL;
+ }
+ }
+ out:
+ return obj;
+}
+
+#else
+
+static __always_inline void *
+__do_cache_alloc_node(struct kmem_cache *cache, gfp_t flags, int nodeid)
+{
+ /*
+ * For UMA, we always allocate from the local cache.
+ */
+ return ____cache_alloc(cache, flags);
+}
+#endif /* CONFIG_NUMA */
+
+static __always_inline void *
+__cache_alloc_node(struct kmem_cache *cache, gfp_t flags, int nodeid,
+ void *caller)
+{
+ unsigned long save_flags;
+ void *obj;
+
+ cache_alloc_debugcheck_before(cache, flags);
+ local_irq_save(save_flags);
+ obj = __do_cache_alloc_node(cache, flags, nodeid);
+ local_irq_restore(save_flags);
+ obj = cache_alloc_debugcheck_after(cache, flags, obj, caller);
+ return obj;
+}
+
+static __always_inline void *
+__cache_alloc(struct kmem_cache *cache, gfp_t flags, void *caller)
+{
+ void *obj;
+
+ obj = __cache_alloc_node(cache, flags, -1, caller);
+ prefetchw(obj);
+ return obj;
+}
/*
* Caller needs to acquire correct kmem_list's list_lock
@@ -3582,57 +3636,6 @@ out:
}
#ifdef CONFIG_NUMA
-/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- * @nodeid: node number of the target node.
- * @caller: return address of caller, used for debug information
- *
- * Identical to kmem_cache_alloc but it will allocate memory on the given
- * node, which can improve the performance for cpu bound structures.
- *
- * Fallback to other node is possible if __GFP_THISNODE is not set.
- */
-static __always_inline void *
-__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
- int nodeid, void *caller)
-{
- unsigned long save_flags;
- void *ptr = NULL;
-
- cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
-
- if (unlikely(nodeid == -1))
- nodeid = numa_node_id();
-
- if (likely(cachep->nodelists[nodeid])) {
- if (nodeid == numa_node_id()) {
- /*
- * Use the locally cached objects if possible.
- * However ____cache_alloc does not allow fallback
- * to other nodes. It may fail while we still have
- * objects on other nodes available.
- */
- ptr = ____cache_alloc(cachep, flags);
- }
- if (!ptr) {
- /* ___cache_alloc_node can fall back to other nodes */
- ptr = ____cache_alloc_node(cachep, flags, nodeid);
- }
- } else {
- /* Node not bootstrapped yet */
- if (!(flags & __GFP_THISNODE))
- ptr = fallback_alloc(cachep, flags);
- }
-
- local_irq_restore(save_flags);
- ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
-
- return ptr;
-}
-
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
return __cache_alloc_node(cachep, flags, nodeid,
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-02 13:47 [PATCH] slab: cache alloc cleanups Pekka J Enberg
@ 2007-01-02 14:29 ` Andy Whitcroft
2007-01-02 16:25 ` Christoph Lameter
` (3 subsequent siblings)
4 siblings, 0 replies; 16+ messages in thread
From: Andy Whitcroft @ 2007-01-02 14:29 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: akpm, linux-kernel, hch, manfred, christoph, pj
Pekka J Enberg wrote:
> [Andrew, I have been unable to find a NUMA-capable tester for this patch,
> so can we please put this in to -mm for some exposure?]
>
> From: Pekka Enberg <penberg@cs.helsinki.fi>
>
> This patch cleans up __cache_alloc and __cache_alloc_node functions. We no
> longer need to do NUMA_BUILD tricks and the UMA allocation path is much
> simpler. Note: we now do alternate_node_alloc() for kmem_cache_alloc_node as
> well.
I'll push this through our tests here if that helps. I need to rerun
the -rc2-mm1 tests by the looks of it ... looks like the test lab had
some time off over Christmas.
-apw
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-02 13:47 [PATCH] slab: cache alloc cleanups Pekka J Enberg
2007-01-02 14:29 ` Andy Whitcroft
@ 2007-01-02 16:25 ` Christoph Lameter
2007-01-02 20:27 ` Pekka Enberg
2007-01-02 20:22 ` Andrew Morton
` (2 subsequent siblings)
4 siblings, 1 reply; 16+ messages in thread
From: Christoph Lameter @ 2007-01-02 16:25 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: akpm, linux-kernel, apw, hch, manfred, christoph, pj
On Tue, 2 Jan 2007, Pekka J Enberg wrote:
> +
> + if (nodeid == -1 || nodeid == numa_node_id()) {
> + if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
> + obj = alternate_node_alloc(cache, flags);
> + if (obj)
> + goto out;
> + }
This reintroduces a bug that was fixed a while ago.
kmalloc_node() must never obey memory policies.
Alternate_node_alloc implements memory policies.
With this patch kmalloc_node(...., numa_node_id()) would get redirected
again to other nodes if a memory policy is in effect.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-02 16:25 ` Christoph Lameter
@ 2007-01-02 20:27 ` Pekka Enberg
0 siblings, 0 replies; 16+ messages in thread
From: Pekka Enberg @ 2007-01-02 20:27 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-kernel, apw, hch, manfred, christoph, pj
On Tue, 2 Jan 2007, Pekka J Enberg wrote:
> > +
> > + if (nodeid == -1 || nodeid == numa_node_id()) {
> > + if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
> > + obj = alternate_node_alloc(cache, flags);
> > + if (obj)
> > + goto out;
> > + }
On 1/2/07, Christoph Lameter <clameter@sgi.com> wrote:
> This reintroduces a bug that was fixed a while ago.
Aah, well, we could have a can_mempolicy parameter, but I'm not sure
if it's an improvement to the current version still...
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-02 13:47 [PATCH] slab: cache alloc cleanups Pekka J Enberg
2007-01-02 14:29 ` Andy Whitcroft
2007-01-02 16:25 ` Christoph Lameter
@ 2007-01-02 20:22 ` Andrew Morton
2007-01-04 21:15 ` Christoph Hellwig
2007-03-22 22:34 ` [PATCH] slab: NUMA kmem_cache diet Eric Dumazet
4 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2007-01-02 20:22 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: linux-kernel, apw, hch, manfred, christoph, pj
On Tue, 2 Jan 2007 15:47:06 +0200 (EET)
Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> I have been unable to find a NUMA-capable tester for this patch,
Any x86_64 box can be used to test NUMA code via the numa=fake=N boot option.
fake-numa is somewhat sick in mainline and you might find that it doesn't
work right on some machines, or that it fails with high values of N, but
works OK with N=2. There are fixes to address this problem in -mm.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-02 13:47 [PATCH] slab: cache alloc cleanups Pekka J Enberg
` (2 preceding siblings ...)
2007-01-02 20:22 ` Andrew Morton
@ 2007-01-04 21:15 ` Christoph Hellwig
2007-01-04 21:23 ` Pekka Enberg
2007-03-22 22:34 ` [PATCH] slab: NUMA kmem_cache diet Eric Dumazet
4 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2007-01-04 21:15 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: akpm, linux-kernel, apw, hch, manfred, christoph, pj
On Tue, Jan 02, 2007 at 03:47:06PM +0200, Pekka J Enberg wrote:
> [Andrew, I have been unable to find a NUMA-capable tester for this patch,
> so can we please put this in to -mm for some exposure?]
>
> From: Pekka Enberg <penberg@cs.helsinki.fi>
>
> This patch cleans up __cache_alloc and __cache_alloc_node functions. We no
> longer need to do NUMA_BUILD tricks and the UMA allocation path is much
> simpler. Note: we now do alternate_node_alloc() for kmem_cache_alloc_node as
> well.
Seems to work nicely on my 2node cell blade.
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH] slab: NUMA kmem_cache diet
2007-01-02 13:47 [PATCH] slab: cache alloc cleanups Pekka J Enberg
` (3 preceding siblings ...)
2007-01-04 21:15 ` Christoph Hellwig
@ 2007-03-22 22:34 ` Eric Dumazet
2007-03-23 7:09 ` Pekka J Enberg
2007-03-23 13:00 ` Andy Whitcroft
4 siblings, 2 replies; 16+ messages in thread
From: Eric Dumazet @ 2007-03-22 22:34 UTC (permalink / raw)
To: akpm; +Cc: Pekka J Enberg, linux-kernel, apw, hch, manfred, christoph, pj
[-- Attachment #1: Type: text/plain, Size: 684 bytes --]
Some NUMA machines have a big MAX_NUMNODES (possibly 1024), but fewer possible
nodes. This patch dynamically sizes the 'struct kmem_cache' to allocate only
needed space.
I moved nodelists[] field at the end of struct kmem_cache, and use the
following computation in kmem_cache_init()
cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
nr_node_ids * sizeof(struct kmem_list3 *);
On my two nodes x86_64 machine, kmem_cache.obj_size is now 192 instead of 704
(This is because on x86_64, MAX_NUMNODES is 64)
On bigger NUMA setups, this might reduce the gfporder of "cache_cache"
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
[-- Attachment #2: kmem_cache_diet.patch --]
[-- Type: text/plain, Size: 1772 bytes --]
diff --git a/mm/slab.c b/mm/slab.c
index abf46ae..b187618 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -389,7 +389,6 @@ struct kmem_cache {
unsigned int buffer_size;
u32 reciprocal_buffer_size;
/* 3) touched by every alloc & free from the backend */
- struct kmem_list3 *nodelists[MAX_NUMNODES];
unsigned int flags; /* constant flags */
unsigned int num; /* # of objs per slab */
@@ -444,6 +443,17 @@ #if DEBUG
int obj_offset;
int obj_size;
#endif
+ /*
+ * We put nodelists[] at the end of kmem_cache, because we want to size
+ * this array to nr_node_ids slots instead of MAX_NUMNODES
+ * (see kmem_cache_init())
+ * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
+ * is statically defined, so we reserve the max number of nodes.
+ */
+ struct kmem_list3 *nodelists[MAX_NUMNODES];
+ /*
+ * Do not add fields after nodelists[]
+ */
};
#define CFLGS_OFF_SLAB (0x80000000UL)
@@ -678,9 +688,6 @@ static struct kmem_cache cache_cache = {
.shared = 1,
.buffer_size = sizeof(struct kmem_cache),
.name = "kmem_cache",
-#if DEBUG
- .obj_size = sizeof(struct kmem_cache),
-#endif
};
#define BAD_ALIEN_MAGIC 0x01020304ul
@@ -1437,6 +1444,15 @@ void __init kmem_cache_init(void)
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
+ /*
+ * struct kmem_cache size depends on nr_node_ids, which
+ * can be less than MAX_NUMNODES.
+ */
+ cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
+ nr_node_ids * sizeof(struct kmem_list3 *);
+#if DEBUG
+ cache_cache.obj_size = cache_cache.buffer_size;
+#endif
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
cache_line_size());
cache_cache.reciprocal_buffer_size =
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: NUMA kmem_cache diet
2007-03-22 22:34 ` [PATCH] slab: NUMA kmem_cache diet Eric Dumazet
@ 2007-03-23 7:09 ` Pekka J Enberg
2007-03-23 7:45 ` Eric Dumazet
2007-03-23 13:00 ` Andy Whitcroft
1 sibling, 1 reply; 16+ messages in thread
From: Pekka J Enberg @ 2007-03-23 7:09 UTC (permalink / raw)
To: Eric Dumazet; +Cc: akpm, linux-kernel, apw, hch, manfred, christoph, pj
(Please inline patches to the mail, makes it easier to review.)
On Thu, 22 Mar 2007, Eric Dumazet wrote:
> Some NUMA machines have a big MAX_NUMNODES (possibly 1024), but fewer possible
> nodes. This patch dynamically sizes the 'struct kmem_cache' to allocate only
> needed space.
>
> I moved nodelists[] field at the end of struct kmem_cache, and use the
> following computation in kmem_cache_init()
Hmm, what seems bit worrying is:
diff --git a/mm/slab.c b/mm/slab.c
index abf46ae..b187618 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -389,7 +389,6 @@ struct kmem_cache {
unsigned int buffer_size;
u32 reciprocal_buffer_size;
/* 3) touched by every alloc & free from the backend */
- struct kmem_list3 *nodelists[MAX_NUMNODES];
I think nodelists is placed at the beginning of the struct for a reason.
But I have no idea if it actually makes any difference...
Pekka
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: NUMA kmem_cache diet
2007-03-23 7:09 ` Pekka J Enberg
@ 2007-03-23 7:45 ` Eric Dumazet
0 siblings, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2007-03-23 7:45 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: akpm, linux-kernel, apw, hch, manfred, christoph, pj
Pekka J Enberg a écrit :
> (Please inline patches to the mail, makes it easier to review.)
>
> On Thu, 22 Mar 2007, Eric Dumazet wrote:
>> Some NUMA machines have a big MAX_NUMNODES (possibly 1024), but fewer possible
>> nodes. This patch dynamically sizes the 'struct kmem_cache' to allocate only
>> needed space.
>>
>> I moved nodelists[] field at the end of struct kmem_cache, and use the
>> following computation in kmem_cache_init()
>
> Hmm, what seems bit worrying is:
>
> diff --git a/mm/slab.c b/mm/slab.c
> index abf46ae..b187618 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -389,7 +389,6 @@ struct kmem_cache {
> unsigned int buffer_size;
> u32 reciprocal_buffer_size;
> /* 3) touched by every alloc & free from the backend */
> - struct kmem_list3 *nodelists[MAX_NUMNODES];
>
> I think nodelists is placed at the beginning of the struct for a reason.
> But I have no idea if it actually makes any difference...
It might make a difference if STATS is on, because freehit/freemiss might
share a cache line with nodelists. Apart that, a kmem_cache struct is
read_mostly : All changes are done outside of it, via array_cache or nodelists[].
Anyway slab STATS is already a SMP/NUMA nightmare because of cache line ping
pongs. We might place STATS counter in a/some dedicated cache line(s)...
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: NUMA kmem_cache diet
2007-03-22 22:34 ` [PATCH] slab: NUMA kmem_cache diet Eric Dumazet
2007-03-23 7:09 ` Pekka J Enberg
@ 2007-03-23 13:00 ` Andy Whitcroft
2007-03-23 14:52 ` Christoph Lameter
1 sibling, 1 reply; 16+ messages in thread
From: Andy Whitcroft @ 2007-03-23 13:00 UTC (permalink / raw)
To: Eric Dumazet
Cc: akpm, Pekka J Enberg, linux-kernel, hch, manfred, christoph, pj
Eric Dumazet wrote:
> Some NUMA machines have a big MAX_NUMNODES (possibly 1024), but fewer
> possible nodes. This patch dynamically sizes the 'struct kmem_cache' to
> allocate only needed space.
>
> I moved nodelists[] field at the end of struct kmem_cache, and use the
> following computation in kmem_cache_init()
>
> cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
> nr_node_ids * sizeof(struct kmem_list3 *);
>
>
> On my two nodes x86_64 machine, kmem_cache.obj_size is now 192 instead
> of 704
> (This is because on x86_64, MAX_NUMNODES is 64)
>
> On bigger NUMA setups, this might reduce the gfporder of "cache_cache"
That is a dramatic size difference, and I seem to have 128 slabs wow.
I'll try and find some time to test this on some of our numa kit.
>
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
>
>
> ------------------------------------------------------------------------
>
> diff --git a/mm/slab.c b/mm/slab.c
> index abf46ae..b187618 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -389,7 +389,6 @@ struct kmem_cache {
> unsigned int buffer_size;
> u32 reciprocal_buffer_size;
> /* 3) touched by every alloc & free from the backend */
> - struct kmem_list3 *nodelists[MAX_NUMNODES];
>
> unsigned int flags; /* constant flags */
> unsigned int num; /* # of objs per slab */
> @@ -444,6 +443,17 @@ #if DEBUG
> int obj_offset;
> int obj_size;
> #endif
> + /*
> + * We put nodelists[] at the end of kmem_cache, because we want to size
> + * this array to nr_node_ids slots instead of MAX_NUMNODES
> + * (see kmem_cache_init())
> + * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
> + * is statically defined, so we reserve the max number of nodes.
> + */
> + struct kmem_list3 *nodelists[MAX_NUMNODES];
> + /*
> + * Do not add fields after nodelists[]
> + */
> };
>
> #define CFLGS_OFF_SLAB (0x80000000UL)
> @@ -678,9 +688,6 @@ static struct kmem_cache cache_cache = {
> .shared = 1,
> .buffer_size = sizeof(struct kmem_cache),
> .name = "kmem_cache",
> -#if DEBUG
> - .obj_size = sizeof(struct kmem_cache),
> -#endif
Is there any reason to not initialise the .obj_size here? You are
initialising both .buffer_size and .obj_size in kmem_cache_init anyhow
so I would expect either both or neither to be initialised in your new
scheme. Doing only one seems very strange.
> };
>
> #define BAD_ALIEN_MAGIC 0x01020304ul
> @@ -1437,6 +1444,15 @@ void __init kmem_cache_init(void)
> cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
> cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
>
> + /*
> + * struct kmem_cache size depends on nr_node_ids, which
> + * can be less than MAX_NUMNODES.
> + */
> + cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
> + nr_node_ids * sizeof(struct kmem_list3 *);
> +#if DEBUG
> + cache_cache.obj_size = cache_cache.buffer_size;
> +#endif
> cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
> cache_line_size());
> cache_cache.reciprocal_buffer_size =
-apw
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: NUMA kmem_cache diet
2007-03-23 13:00 ` Andy Whitcroft
@ 2007-03-23 14:52 ` Christoph Lameter
0 siblings, 0 replies; 16+ messages in thread
From: Christoph Lameter @ 2007-03-23 14:52 UTC (permalink / raw)
To: Andy Whitcroft
Cc: Eric Dumazet, akpm, Pekka J Enberg, linux-kernel, hch, manfred, pj
On Fri, 23 Mar 2007, Andy Whitcroft wrote:
> > + /*
> > + * We put nodelists[] at the end of kmem_cache, because we want to size
> > + * this array to nr_node_ids slots instead of MAX_NUMNODES
> > + * (see kmem_cache_init())
> > + * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
> > + * is statically defined, so we reserve the max number of nodes.
> > + */
Good idea.
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH] slab: cache alloc cleanups
@ 2007-01-05 11:46 Pekka J Enberg
2007-01-05 19:05 ` Christoph Lameter
2007-01-05 19:50 ` Andrew Morton
0 siblings, 2 replies; 16+ messages in thread
From: Pekka J Enberg @ 2007-01-05 11:46 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, apw, hch, manfred, christoph, pj
From: Pekka Enberg <penberg@cs.helsinki.fi>
Clean up __cache_alloc and __cache_alloc_node functions a bit. We no
longer need to do NUMA_BUILD tricks and the UMA allocation path is much
simpler. No functional changes in this patch.
Note: saves few kernel text bytes on x86 NUMA build due to using gotos in
__cache_alloc_node() and moving __GFP_THISNODE check in to fallback_alloc().
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Christoph Lameter <christoph@lameter.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
diff --git a/mm/slab.c b/mm/slab.c
index 0d4e574..5edb7bf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3197,35 +3197,6 @@ static inline void *____cache_alloc(stru
return objp;
}
-static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
- gfp_t flags, void *caller)
-{
- unsigned long save_flags;
- void *objp = NULL;
-
- cache_alloc_debugcheck_before(cachep, flags);
-
- local_irq_save(save_flags);
-
- if (unlikely(NUMA_BUILD &&
- current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
- objp = alternate_node_alloc(cachep, flags);
-
- if (!objp)
- objp = ____cache_alloc(cachep, flags);
- /*
- * We may just have run out of memory on the local node.
- * ____cache_alloc_node() knows how to locate memory on other nodes
- */
- if (NUMA_BUILD && !objp)
- objp = ____cache_alloc_node(cachep, flags, numa_node_id());
- local_irq_restore(save_flags);
- objp = cache_alloc_debugcheck_after(cachep, flags, objp,
- caller);
- prefetchw(objp);
- return objp;
-}
-
#ifdef CONFIG_NUMA
/*
* Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
@@ -3257,14 +3228,20 @@ static void *alternate_node_alloc(struct
* allocator to do its reclaim / fallback magic. We then insert the
* slab into the proper nodelist and then allocate from it.
*/
-void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
+static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
{
- struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy))
- ->node_zonelists[gfp_zone(flags)];
+ struct zonelist *zonelist;
+ gfp_t local_flags;
struct zone **z;
void *obj = NULL;
int nid;
- gfp_t local_flags = (flags & GFP_LEVEL_MASK);
+
+ if (flags & __GFP_THISNODE)
+ return NULL;
+
+ zonelist = &NODE_DATA(slab_node(current->mempolicy))
+ ->node_zonelists[gfp_zone(flags)];
+ local_flags = (flags & GFP_LEVEL_MASK);
retry:
/*
@@ -3374,16 +3351,110 @@ must_grow:
if (x)
goto retry;
- if (!(flags & __GFP_THISNODE))
- /* Unable to grow the cache. Fall back to other nodes. */
- return fallback_alloc(cachep, flags);
-
- return NULL;
+ return fallback_alloc(cachep, flags);
done:
return obj;
}
-#endif
+
+/**
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
+ * @caller: return address of caller, used for debug information
+ *
+ * Identical to kmem_cache_alloc but it will allocate memory on the given
+ * node, which can improve the performance for cpu bound structures.
+ *
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
+ */
+static __always_inline void *
+__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+ void *caller)
+{
+ unsigned long save_flags;
+ void *ptr;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+ local_irq_save(save_flags);
+
+ if (unlikely(nodeid == -1))
+ nodeid = numa_node_id();
+
+ if (unlikely(!cachep->nodelists[nodeid])) {
+ /* Node not bootstrapped yet */
+ ptr = fallback_alloc(cachep, flags);
+ goto out;
+ }
+
+ if (nodeid == numa_node_id()) {
+ /*
+ * Use the locally cached objects if possible.
+ * However ____cache_alloc does not allow fallback
+ * to other nodes. It may fail while we still have
+ * objects on other nodes available.
+ */
+ ptr = ____cache_alloc(cachep, flags);
+ if (ptr)
+ goto out;
+ }
+ /* ___cache_alloc_node can fall back to other nodes */
+ ptr = ____cache_alloc_node(cachep, flags, nodeid);
+ out:
+ local_irq_restore(save_flags);
+ ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+
+ return ptr;
+}
+
+static __always_inline void *
+__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
+{
+ void *objp;
+
+ if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
+ objp = alternate_node_alloc(cache, flags);
+ if (objp)
+ goto out;
+ }
+ objp = ____cache_alloc(cache, flags);
+
+ /*
+ * We may just have run out of memory on the local node.
+ * ____cache_alloc_node() knows how to locate memory on other nodes
+ */
+ if (!objp)
+ objp = ____cache_alloc_node(cache, flags, numa_node_id());
+
+ out:
+ return objp;
+}
+#else
+
+static __always_inline void *
+__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+ return ____cache_alloc(cachep, flags);
+}
+
+#endif /* CONFIG_NUMA */
+
+static __always_inline void *
+__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
+{
+ unsigned long save_flags;
+ void *objp;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+ local_irq_save(save_flags);
+ objp = __do_cache_alloc(cachep, flags);
+ local_irq_restore(save_flags);
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ prefetchw(objp);
+
+ return objp;
+}
/*
* Caller needs to acquire correct kmem_list's list_lock
@@ -3582,57 +3653,6 @@ out:
}
#ifdef CONFIG_NUMA
-/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- * @nodeid: node number of the target node.
- * @caller: return address of caller, used for debug information
- *
- * Identical to kmem_cache_alloc but it will allocate memory on the given
- * node, which can improve the performance for cpu bound structures.
- *
- * Fallback to other node is possible if __GFP_THISNODE is not set.
- */
-static __always_inline void *
-__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
- int nodeid, void *caller)
-{
- unsigned long save_flags;
- void *ptr = NULL;
-
- cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
-
- if (unlikely(nodeid == -1))
- nodeid = numa_node_id();
-
- if (likely(cachep->nodelists[nodeid])) {
- if (nodeid == numa_node_id()) {
- /*
- * Use the locally cached objects if possible.
- * However ____cache_alloc does not allow fallback
- * to other nodes. It may fail while we still have
- * objects on other nodes available.
- */
- ptr = ____cache_alloc(cachep, flags);
- }
- if (!ptr) {
- /* ___cache_alloc_node can fall back to other nodes */
- ptr = ____cache_alloc_node(cachep, flags, nodeid);
- }
- } else {
- /* Node not bootstrapped yet */
- if (!(flags & __GFP_THISNODE))
- ptr = fallback_alloc(cachep, flags);
- }
-
- local_irq_restore(save_flags);
- ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
-
- return ptr;
-}
-
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
return __cache_alloc_node(cachep, flags, nodeid,
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-05 11:46 [PATCH] slab: cache alloc cleanups Pekka J Enberg
@ 2007-01-05 19:05 ` Christoph Lameter
2007-01-05 19:50 ` Andrew Morton
1 sibling, 0 replies; 16+ messages in thread
From: Christoph Lameter @ 2007-01-05 19:05 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: akpm, linux-kernel, apw, hch, manfred, christoph, pj
On Fri, 5 Jan 2007, Pekka J Enberg wrote:
> From: Pekka Enberg <penberg@cs.helsinki.fi>
>
> Clean up __cache_alloc and __cache_alloc_node functions a bit. We no
> longer need to do NUMA_BUILD tricks and the UMA allocation path is much
> simpler. No functional changes in this patch.
Looks good.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-05 11:46 [PATCH] slab: cache alloc cleanups Pekka J Enberg
2007-01-05 19:05 ` Christoph Lameter
@ 2007-01-05 19:50 ` Andrew Morton
2007-01-05 22:12 ` Christoph Lameter
1 sibling, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2007-01-05 19:50 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: linux-kernel, apw, hch, manfred, christoph, pj
On Fri, 5 Jan 2007 13:46:45 +0200 (EET)
Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> Clean up __cache_alloc and __cache_alloc_node functions a bit. We no
> longer need to do NUMA_BUILD tricks and the UMA allocation path is much
> simpler. No functional changes in this patch.
>
> Note: saves few kernel text bytes on x86 NUMA build due to using gotos in
> __cache_alloc_node() and moving __GFP_THISNODE check in to fallback_alloc().
Does this actually clean things up, or does it randomly move things around
while carefully retaining existing obscurity? Not sure..
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] slab: cache alloc cleanups
2007-01-05 19:50 ` Andrew Morton
@ 2007-01-05 22:12 ` Christoph Lameter
0 siblings, 0 replies; 16+ messages in thread
From: Christoph Lameter @ 2007-01-05 22:12 UTC (permalink / raw)
To: Andrew Morton
Cc: Pekka J Enberg, linux-kernel, apw, hch, manfred, christoph, pj
On Fri, 5 Jan 2007, Andrew Morton wrote:
> Does this actually clean things up, or does it randomly move things around
> while carefully retaining existing obscurity? Not sure..
Looks like a good cleanup to me.
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2007-03-23 14:52 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-02 13:47 [PATCH] slab: cache alloc cleanups Pekka J Enberg
2007-01-02 14:29 ` Andy Whitcroft
2007-01-02 16:25 ` Christoph Lameter
2007-01-02 20:27 ` Pekka Enberg
2007-01-02 20:22 ` Andrew Morton
2007-01-04 21:15 ` Christoph Hellwig
2007-01-04 21:23 ` Pekka Enberg
2007-03-22 22:34 ` [PATCH] slab: NUMA kmem_cache diet Eric Dumazet
2007-03-23 7:09 ` Pekka J Enberg
2007-03-23 7:45 ` Eric Dumazet
2007-03-23 13:00 ` Andy Whitcroft
2007-03-23 14:52 ` Christoph Lameter
2007-01-05 11:46 [PATCH] slab: cache alloc cleanups Pekka J Enberg
2007-01-05 19:05 ` Christoph Lameter
2007-01-05 19:50 ` Andrew Morton
2007-01-05 22:12 ` Christoph Lameter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).