LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] slab: cache_grow cleanup
@ 2007-01-09 13:40 Pekka J Enberg
  2007-01-09 18:18 ` Christoph Lameter
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Pekka J Enberg @ 2007-01-09 13:40 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, hugh, clameter

From: Pekka Enberg <penberg@cs.helsinki.fi>

The current implementation of cache_grow() has to either (1) use pre-allocated
memory for the slab or (2) allocate the memory itself which makes the error
paths messy. Move __GFP_NO_GROW and __GFP_WAIT processing to kmem_getpages()
and introduce a new __cache_grow() variant that expects the memory for a new
slab to always be handed over in the 'objp' parameter.

Cc: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---

diff --git a/mm/slab.c b/mm/slab.c
index c610062..856856c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1597,6 +1597,14 @@ static int __init cpucache_init(void)
 }
 __initcall(cpucache_init);
 
+static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
+{
+	if (flags & GFP_DMA)
+		BUG_ON(!(cachep->gfpflags & GFP_DMA));
+	else
+		BUG_ON(cachep->gfpflags & GFP_DMA);
+}
+
 /*
  * Interface to system's page allocator. No need to hold the cache-lock.
  *
@@ -1608,8 +1616,22 @@ static void *kmem_getpages(struct kmem_c
 {
 	struct page *page;
 	int nr_pages;
+	void *ret;
 	int i;
 
+	if (flags & __GFP_NO_GROW)
+		return NULL;
+
+	/*
+	 * The test for missing atomic flag is performed here, rather than
+	 * the more obvious place, simply to reduce the critical path length
+	 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
+	 * will eventually be caught here (where it matters).
+	 */
+	kmem_flagcheck(cachep, flags);
+	if (flags & __GFP_WAIT)
+		local_irq_enable();
+
 #ifndef CONFIG_MMU
 	/*
 	 * Nommu uses slab's for process anonymous memory allocations, and thus
@@ -1621,8 +1643,10 @@ #endif
 	flags |= cachep->gfpflags;
 
 	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
-	if (!page)
-		return NULL;
+	if (!page) {
+		ret = NULL;
+		goto out;
+	}
 
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1633,7 +1657,12 @@ #endif
 			NR_SLAB_UNRECLAIMABLE, nr_pages);
 	for (i = 0; i < nr_pages; i++)
 		__SetPageSlab(page + i);
-	return page_address(page);
+
+	ret = page_address(page);
+  out:
+	if (flags & __GFP_WAIT)
+		local_irq_disable();
+	return ret;
 }
 
 /*
@@ -2641,14 +2670,6 @@ #endif
 	slabp->free = 0;
 }
 
-static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
-{
-	if (flags & GFP_DMA)
-		BUG_ON(!(cachep->gfpflags & GFP_DMA));
-	else
-		BUG_ON(cachep->gfpflags & GFP_DMA);
-}
-
 static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
 				int nodeid)
 {
@@ -2714,7 +2735,7 @@ static void slab_map_pages(struct kmem_c
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(struct kmem_cache *cachep,
+static int __cache_grow(struct kmem_cache *cachep,
 		gfp_t flags, int nodeid, void *objp)
 {
 	struct slab *slabp;
@@ -2754,39 +2775,17 @@ static int cache_grow(struct kmem_cache 
 
 	offset *= cachep->colour_off;
 
-	if (local_flags & __GFP_WAIT)
-		local_irq_enable();
-
-	/*
-	 * The test for missing atomic flag is performed here, rather than
-	 * the more obvious place, simply to reduce the critical path length
-	 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
-	 * will eventually be caught here (where it matters).
-	 */
-	kmem_flagcheck(cachep, flags);
-
-	/*
-	 * Get mem for the objs.  Attempt to allocate a physical page from
-	 * 'nodeid'.
-	 */
-	if (!objp)
-		objp = kmem_getpages(cachep, flags, nodeid);
-	if (!objp)
-		goto failed;
-
 	/* Get slab management. */
 	slabp = alloc_slabmgmt(cachep, objp, offset,
 			local_flags & ~GFP_THISNODE, nodeid);
 	if (!slabp)
-		goto opps1;
+		return 0;
 
 	slabp->nodeid = nodeid;
 	slab_map_pages(cachep, slabp, objp);
 
 	cache_init_objs(cachep, slabp, ctor_flags);
 
-	if (local_flags & __GFP_WAIT)
-		local_irq_disable();
 	check_irq_off();
 	spin_lock(&l3->list_lock);
 
@@ -2796,12 +2795,25 @@ static int cache_grow(struct kmem_cache 
 	l3->free_objects += cachep->num;
 	spin_unlock(&l3->list_lock);
 	return 1;
-opps1:
-	kmem_freepages(cachep, objp);
-failed:
-	if (local_flags & __GFP_WAIT)
-		local_irq_disable();
-	return 0;
+}
+
+static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+{
+	void *objp;
+	int ret;
+
+	/*
+	 * Get mem for the objs.  Attempt to allocate a physical page from
+	 * 'nodeid'.
+	 */
+	objp = kmem_getpages(cachep, flags, nodeid);
+	if (!objp)
+		return 0;
+	
+	ret = __cache_grow(cachep, flags, nodeid, objp);
+	if (!ret)
+		kmem_freepages(cachep, objp);
+	return ret;
 }
 
 #if DEBUG
@@ -3014,7 +3026,7 @@ alloc_done:
 
 	if (unlikely(!ac->avail)) {
 		int x;
-		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
+		x = cache_grow(cachep, flags | GFP_THISNODE, node);
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = cpu_cache_get(cachep);
@@ -3264,7 +3276,6 @@ void *fallback_alloc(struct kmem_cache *
 	struct zone **z;
 	void *obj = NULL;
 	int nid;
-	gfp_t local_flags = (flags & GFP_LEVEL_MASK);
 
 retry:
 	/*
@@ -3288,18 +3299,13 @@ retry:
 		 * We may trigger various forms of reclaim on the allowed
 		 * set and go into memory reserves if necessary.
 		 */
-		if (local_flags & __GFP_WAIT)
-			local_irq_enable();
-		kmem_flagcheck(cache, flags);
 		obj = kmem_getpages(cache, flags, -1);
-		if (local_flags & __GFP_WAIT)
-			local_irq_disable();
 		if (obj) {
 			/*
 			 * Insert into the appropriate per node queues
 			 */
 			nid = page_to_nid(virt_to_page(obj));
-			if (cache_grow(cache, flags, nid, obj)) {
+			if (__cache_grow(cache, flags, nid, obj)) {
 				obj = ____cache_alloc_node(cache,
 					flags | GFP_THISNODE, nid);
 				if (!obj)
@@ -3310,7 +3316,7 @@ retry:
 					 */
 					goto retry;
 			} else {
-				/* cache_grow already freed obj */
+				kmem_freepages(cache, obj);
 				obj = NULL;
 			}
 		}
@@ -3370,7 +3376,7 @@ retry:
 
 must_grow:
 	spin_unlock(&l3->list_lock);
-	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
+	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid);
 	if (x)
 		goto retry;
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] slab: cache_grow cleanup
  2007-01-09 13:40 [PATCH] slab: cache_grow cleanup Pekka J Enberg
@ 2007-01-09 18:18 ` Christoph Lameter
  2007-01-10  7:12   ` Pekka J Enberg
  2007-01-10 14:44   ` [PATCH] " Pekka Enberg
  2007-01-12  9:22 ` Pekka Enberg
  2007-01-28  0:12 ` Andrew Morton
  2 siblings, 2 replies; 7+ messages in thread
From: Christoph Lameter @ 2007-01-09 18:18 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: akpm, linux-kernel, hugh

On Tue, 9 Jan 2007, Pekka J Enberg wrote:

> The current implementation of cache_grow() has to either (1) use pre-allocated
> memory for the slab or (2) allocate the memory itself which makes the error
> paths messy. Move __GFP_NO_GROW and __GFP_WAIT processing to kmem_getpages()
> and introduce a new __cache_grow() variant that expects the memory for a new
> slab to always be handed over in the 'objp' parameter.

I am loosing track of these. What is the difference to earlier versions?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: slab: cache_grow cleanup
  2007-01-09 18:18 ` Christoph Lameter
@ 2007-01-10  7:12   ` Pekka J Enberg
  2007-01-10 14:44   ` [PATCH] " Pekka Enberg
  1 sibling, 0 replies; 7+ messages in thread
From: Pekka J Enberg @ 2007-01-10  7:12 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: akpm, linux-kernel, hugh

Christoph Lameter writes:
> I am loosing track of these. What is the difference to earlier versions?

Nothing but a rediff on top of Linus' tree as Hugh's fix already went in. 

                Pekka 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] slab: cache_grow cleanup
  2007-01-09 18:18 ` Christoph Lameter
  2007-01-10  7:12   ` Pekka J Enberg
@ 2007-01-10 14:44   ` Pekka Enberg
  1 sibling, 0 replies; 7+ messages in thread
From: Pekka Enberg @ 2007-01-10 14:44 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: akpm, linux-kernel, hugh

On 1/9/07, Christoph Lameter <clameter@sgi.com> wrote:
> I am loosing track of these. What is the difference to earlier versions?

It is just a rediff on top of Linus' tree as Hugh's fix already went in.

                                  Pekka

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] slab: cache_grow cleanup
  2007-01-09 13:40 [PATCH] slab: cache_grow cleanup Pekka J Enberg
  2007-01-09 18:18 ` Christoph Lameter
@ 2007-01-12  9:22 ` Pekka Enberg
  2007-01-28  0:12 ` Andrew Morton
  2 siblings, 0 replies; 7+ messages in thread
From: Pekka Enberg @ 2007-01-12  9:22 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, hugh, clameter

Hi Andrew,

On 1/9/07, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> From: Pekka Enberg <penberg@cs.helsinki.fi>
>
> The current implementation of cache_grow() has to either (1) use pre-allocated
> memory for the slab or (2) allocate the memory itself which makes the error
> paths messy. Move __GFP_NO_GROW and __GFP_WAIT processing to kmem_getpages()
> and introduce a new __cache_grow() variant that expects the memory for a new
> slab to always be handed over in the 'objp' parameter.
>
> Cc: Hugh Dickins <hugh@veritas.com>
> Cc: Christoph Lameter <clameter@sgi.com>
> Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

Can we get this into -mm, please?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] slab: cache_grow cleanup
  2007-01-09 13:40 [PATCH] slab: cache_grow cleanup Pekka J Enberg
  2007-01-09 18:18 ` Christoph Lameter
  2007-01-12  9:22 ` Pekka Enberg
@ 2007-01-28  0:12 ` Andrew Morton
  2007-01-28 11:14   ` Pekka Enberg
  2 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2007-01-28  0:12 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: linux-kernel, hugh, clameter

On Tue, 9 Jan 2007 15:40:03 +0200 (EET)
Pekka J Enberg <penberg@cs.helsinki.fi> wrote:

> The current implementation of cache_grow() has to either (1) use pre-allocated
> memory for the slab or (2) allocate the memory itself which makes the error
> paths messy. Move __GFP_NO_GROW and __GFP_WAIT processing to kmem_getpages()
> and introduce a new __cache_grow() variant that expects the memory for a new
> slab to always be handed over in the 'objp' parameter.

This gets its local interrupt state mucked up.

BUG: sleeping function called from invalid context at mm/slab.c:3038
in_atomic():0, irqs_disabled():1
no locks held by init/1.
irq event stamp: 656902
hardirqs last  enabled at (656901): [<c015f48b>] mod_zone_page_state+0x4b/0x50
hardirqs last disabled at (656902): [<c0171f54>] cache_alloc_refill+0x384/0x6a0
softirqs last  enabled at (650628): [<c03a6317>] unix_release_sock+0x67/0x220
softirqs last disabled at (650626): [<c03cfe8e>] _write_lock_bh+0xe/0x40
 [<c0103f7a>] show_trace_log_lvl+0x1a/0x30
 [<c0104682>] show_trace+0x12/0x20
 [<c0104736>] dump_stack+0x16/0x20
 [<c01175ea>] __might_sleep+0xba/0xd0
 [<c017262f>] kmem_cache_alloc+0xaf/0xd0
 [<c0172131>] cache_alloc_refill+0x561/0x6a0
 [<c0172574>] kmem_cache_zalloc+0xe4/0xf0
 [<c011cb49>] copy_process+0x89/0x1280
 [<c011dfcb>] do_fork+0x6b/0x1c0
 [<c0100e13>] sys_clone+0x33/0x40
 [<c0102edc>] sysenter_past_esp+0x5d/0x99
 =======================

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] slab: cache_grow cleanup
  2007-01-28  0:12 ` Andrew Morton
@ 2007-01-28 11:14   ` Pekka Enberg
  0 siblings, 0 replies; 7+ messages in thread
From: Pekka Enberg @ 2007-01-28 11:14 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, hugh, clameter

Hi Andrew,

On 1/28/07, Andrew Morton <akpm@osdl.org> wrote:
> This gets its local interrupt state mucked up.
>
> BUG: sleeping function called from invalid context at mm/slab.c:3038
> in_atomic():0, irqs_disabled():1
> no locks held by init/1.
> irq event stamp: 656902
> hardirqs last  enabled at (656901): [<c015f48b>] mod_zone_page_state+0x4b/0x50
> hardirqs last disabled at (656902): [<c0171f54>] cache_alloc_refill+0x384/0x6a0
> softirqs last  enabled at (650628): [<c03a6317>] unix_release_sock+0x67/0x220
> softirqs last disabled at (650626): [<c03cfe8e>] _write_lock_bh+0xe/0x40
>  [<c0103f7a>] show_trace_log_lvl+0x1a/0x30
>  [<c0104682>] show_trace+0x12/0x20
>  [<c0104736>] dump_stack+0x16/0x20
>  [<c01175ea>] __might_sleep+0xba/0xd0
>  [<c017262f>] kmem_cache_alloc+0xaf/0xd0
>  [<c0172131>] cache_alloc_refill+0x561/0x6a0
>  [<c0172574>] kmem_cache_zalloc+0xe4/0xf0
>  [<c011cb49>] copy_process+0x89/0x1280
>  [<c011dfcb>] do_fork+0x6b/0x1c0
>  [<c0100e13>] sys_clone+0x33/0x40
>  [<c0102edc>] sysenter_past_esp+0x5d/0x99

Hmm, alloc_slabmgmt calls kmem_cache_alloc with interrupts disabled
which triggers the warning. We won't deadlock though as we enable
interrupts in kmem_getpages in case we need to refill
cachep->slabp_cache. So the debug check is bogus when calling
kmem_cache_alloc within the slab allocator, I think.

I'll whack it some more and resend. Thanks Andrew!

                               Pekka

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-01-28 11:15 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-09 13:40 [PATCH] slab: cache_grow cleanup Pekka J Enberg
2007-01-09 18:18 ` Christoph Lameter
2007-01-10  7:12   ` Pekka J Enberg
2007-01-10 14:44   ` [PATCH] " Pekka Enberg
2007-01-12  9:22 ` Pekka Enberg
2007-01-28  0:12 ` Andrew Morton
2007-01-28 11:14   ` Pekka Enberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).