LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
@ 2018-05-04 15:32 Sebastian Andrzej Siewior
  2018-05-04 23:22 ` Andrew Morton
  0 siblings, 1 reply; 8+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-05-04 15:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: tglx, Nicholas Bellinger, Andrew Morton, Sebastian Andrzej Siewior

percpu_ida() decouples disabling interrupts from the locking operations.
This breaks some assumptions if the locking operations are replaced like
they are under -RT.
The same locking can be achieved by avoiding local_irq_save() and using
spin_lock_irqsave() instead. percpu_ida_alloc() gains one more
preemption point because after unlocking the fastpath and before the
pool lock is acquired, the interrupts are briefly enabled.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 lib/percpu_ida.c | 63 ++++++++++++++++--------------------------------
 1 file changed, 21 insertions(+), 42 deletions(-)

diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 6016f1deb1f5..9bbd9c5d375a 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -112,18 +112,6 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
 		  min(pool->nr_free, pool->percpu_batch_size));
 }
 
-static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
-{
-	int tag = -ENOSPC;
-
-	spin_lock(&tags->lock);
-	if (tags->nr_free)
-		tag = tags->freelist[--tags->nr_free];
-	spin_unlock(&tags->lock);
-
-	return tag;
-}
-
 /**
  * percpu_ida_alloc - allocate a tag
  * @pool: pool to allocate from
@@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 	DEFINE_WAIT(wait);
 	struct percpu_ida_cpu *tags;
 	unsigned long flags;
-	int tag;
+	int tag = -ENOSPC;
 
-	local_irq_save(flags);
-	tags = this_cpu_ptr(pool->tag_cpu);
+	tags = raw_cpu_ptr(pool->tag_cpu);
+	spin_lock_irqsave(&tags->lock, flags);
 
 	/* Fastpath */
-	tag = alloc_local_tag(tags);
-	if (likely(tag >= 0)) {
-		local_irq_restore(flags);
+	if (likely(tags->nr_free >= 0)) {
+		tag = tags->freelist[--tags->nr_free];
+		spin_unlock_irqrestore(&tags->lock, flags);
 		return tag;
 	}
+	spin_unlock_irqrestore(&tags->lock, flags);
 
 	while (1) {
-		spin_lock(&pool->lock);
+		spin_lock_irqsave(&pool->lock, flags);
+		tags = this_cpu_ptr(pool->tag_cpu);
 
 		/*
 		 * prepare_to_wait() must come before steal_tags(), in case
@@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 						&pool->cpus_have_tags);
 		}
 
-		spin_unlock(&pool->lock);
-		local_irq_restore(flags);
+		spin_unlock_irqrestore(&pool->lock, flags);
 
 		if (tag >= 0 || state == TASK_RUNNING)
 			break;
@@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 		}
 
 		schedule();
-
-		local_irq_save(flags);
-		tags = this_cpu_ptr(pool->tag_cpu);
 	}
 	if (state != TASK_RUNNING)
 		finish_wait(&pool->wait, &wait);
@@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 
 	BUG_ON(tag >= pool->nr_tags);
 
-	local_irq_save(flags);
-	tags = this_cpu_ptr(pool->tag_cpu);
+	tags = raw_cpu_ptr(pool->tag_cpu);
 
-	spin_lock(&tags->lock);
+	spin_lock_irqsave(&tags->lock, flags);
 	tags->freelist[tags->nr_free++] = tag;
 
 	nr_free = tags->nr_free;
-	spin_unlock(&tags->lock);
 
 	if (nr_free == 1) {
 		cpumask_set_cpu(smp_processor_id(),
 				&pool->cpus_have_tags);
 		wake_up(&pool->wait);
 	}
+	spin_unlock_irqrestore(&tags->lock, flags);
 
 	if (nr_free == pool->percpu_max_size) {
-		spin_lock(&pool->lock);
+		spin_lock_irqsave(&pool->lock, flags);
+		spin_lock(&tags->lock);
 
-		/*
-		 * Global lock held and irqs disabled, don't need percpu
-		 * lock
-		 */
 		if (tags->nr_free == pool->percpu_max_size) {
 			move_tags(pool->freelist, &pool->nr_free,
 				  tags->freelist, &tags->nr_free,
@@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 
 			wake_up(&pool->wait);
 		}
-		spin_unlock(&pool->lock);
+		spin_unlock(&tags->lock);
+		spin_unlock_irqrestore(&pool->lock, flags);
 	}
-
-	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(percpu_ida_free);
 
@@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
 	struct percpu_ida_cpu *remote;
 	unsigned cpu, i, err = 0;
 
-	local_irq_save(flags);
 	for_each_possible_cpu(cpu) {
 		remote = per_cpu_ptr(pool->tag_cpu, cpu);
-		spin_lock(&remote->lock);
+		spin_lock_irqsave(&remote->lock, flags);
 		for (i = 0; i < remote->nr_free; i++) {
 			err = fn(remote->freelist[i], data);
 			if (err)
 				break;
 		}
-		spin_unlock(&remote->lock);
+		spin_unlock_irqrestore(&remote->lock, flags);
 		if (err)
 			goto out;
 	}
 
-	spin_lock(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 	for (i = 0; i < pool->nr_free; i++) {
 		err = fn(pool->freelist[i], data);
 		if (err)
 			break;
 	}
-	spin_unlock(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 out:
-	local_irq_restore(flags);
 	return err;
 }
 EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
-- 
2.17.0

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
  2018-05-04 15:32 [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock Sebastian Andrzej Siewior
@ 2018-05-04 23:22 ` Andrew Morton
  2018-05-05  3:51   ` Matthew Wilcox
  0 siblings, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2018-05-04 23:22 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, tglx, Nicholas Bellinger, Matthew Wilcox,
	Shaohua Li, Kent Overstreet, Jens Axboe

On Fri,  4 May 2018 17:32:18 +0200 Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> percpu_ida() decouples disabling interrupts from the locking operations.
> This breaks some assumptions if the locking operations are replaced like
> they are under -RT.
> The same locking can be achieved by avoiding local_irq_save() and using
> spin_lock_irqsave() instead. percpu_ida_alloc() gains one more
> preemption point because after unlocking the fastpath and before the
> pool lock is acquired, the interrupts are briefly enabled.
> 

hmm..

> --- a/lib/percpu_ida.c
> +++ b/lib/percpu_ida.c
> @@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
>  	DEFINE_WAIT(wait);
>  	struct percpu_ida_cpu *tags;
>  	unsigned long flags;
> -	int tag;
> +	int tag = -ENOSPC;
>  
> -	local_irq_save(flags);
> -	tags = this_cpu_ptr(pool->tag_cpu);
> +	tags = raw_cpu_ptr(pool->tag_cpu);
> +	spin_lock_irqsave(&tags->lock, flags);

I *guess* this is OK.  If a preemption and schedule occurs we could end
up playing with a different CPU's data, but taking that cpu's data's
lock should prevent problems.

Unless there's a CPU hotunplug and that CPU's data vanishes under our
feet, but this code doesn't handle hotplug - it assumes all possible
CPUs are always present.

(798ab48eecdf659d says "Note that there's no cpu hotplug notifier - we
don't care, because ...")

>  	/* Fastpath */
> -	tag = alloc_local_tag(tags);
> -	if (likely(tag >= 0)) {
> -		local_irq_restore(flags);
> +	if (likely(tags->nr_free >= 0)) {
> +		tag = tags->freelist[--tags->nr_free];
> +		spin_unlock_irqrestore(&tags->lock, flags);
>  		return tag;
>  	}
> +	spin_unlock_irqrestore(&tags->lock, flags);
>  
>  	while (1) {
> -		spin_lock(&pool->lock);
> +		spin_lock_irqsave(&pool->lock, flags);
> +		tags = this_cpu_ptr(pool->tag_cpu);
>  
>  		/*
>  		 * prepare_to_wait() must come before steal_tags(), in case
>
> ...
>
> @@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
>  	struct percpu_ida_cpu *remote;
>  	unsigned cpu, i, err = 0;
>  
> -	local_irq_save(flags);
>  	for_each_possible_cpu(cpu) {
>  		remote = per_cpu_ptr(pool->tag_cpu, cpu);
> -		spin_lock(&remote->lock);
> +		spin_lock_irqsave(&remote->lock, flags);
>  		for (i = 0; i < remote->nr_free; i++) {
>  			err = fn(remote->freelist[i], data);
>  			if (err)
>  				break;
>  		}
> -		spin_unlock(&remote->lock);
> +		spin_unlock_irqrestore(&remote->lock, flags);
>  		if (err)
>  			goto out;
>  	}
>  
> -	spin_lock(&pool->lock);
> +	spin_lock_irqsave(&pool->lock, flags);
>  	for (i = 0; i < pool->nr_free; i++) {
>  		err = fn(pool->freelist[i], data);
>  		if (err)
>  			break;
>  	}
> -	spin_unlock(&pool->lock);
> +	spin_unlock_irqrestore(&pool->lock, flags);
>  out:
> -	local_irq_restore(flags);
>  	return err;
>  }
>  EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);

Unrelated to this patch, but..  Why are there two loops here?  Won't
the first loop process all the data which the second loop attempts to
process?

This function has no callers anyway so perhaps we should just delete
it.

I'm feeling a bit hostile toward lib/percpu_ida.c in general ;) It has
very few users and seems rather complicated (what's with that
schedule() in percpu_ida_alloc?).  I'm suspecting and hoping that if
someone can figure out what the requirements were, this could all be
zapped and reimplemented using something else which we already have.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
  2018-05-04 23:22 ` Andrew Morton
@ 2018-05-05  3:51   ` Matthew Wilcox
  2018-05-05 14:10     ` Jens Axboe
  0 siblings, 1 reply; 8+ messages in thread
From: Matthew Wilcox @ 2018-05-05  3:51 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sebastian Andrzej Siewior, linux-kernel, tglx,
	Nicholas Bellinger, Shaohua Li, Kent Overstreet, Jens Axboe

On Fri, May 04, 2018 at 04:22:16PM -0700, Andrew Morton wrote:
> I'm feeling a bit hostile toward lib/percpu_ida.c in general ;) It has
> very few users and seems rather complicated (what's with that
> schedule() in percpu_ida_alloc?).  I'm suspecting and hoping that if
> someone can figure out what the requirements were, this could all be
> zapped and reimplemented using something else which we already have.

Note that I have no code in percpu_ida ... it's quite different from
the regular IDA.  But I have noticed the stunning similarity between the
percpu_ida and the code in lib/sbitmap.c.  I have no idea which one is
better, but they're essentially doing the same thing.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
  2018-05-05  3:51   ` Matthew Wilcox
@ 2018-05-05 14:10     ` Jens Axboe
  2018-05-05 14:42       ` Jens Axboe
  2018-05-05 15:52       ` Matthew Wilcox
  0 siblings, 2 replies; 8+ messages in thread
From: Jens Axboe @ 2018-05-05 14:10 UTC (permalink / raw)
  To: Matthew Wilcox, Andrew Morton
  Cc: Sebastian Andrzej Siewior, linux-kernel, tglx,
	Nicholas Bellinger, Shaohua Li, Kent Overstreet

On 5/4/18 9:51 PM, Matthew Wilcox wrote:
> On Fri, May 04, 2018 at 04:22:16PM -0700, Andrew Morton wrote:
>> I'm feeling a bit hostile toward lib/percpu_ida.c in general ;) It has
>> very few users and seems rather complicated (what's with that
>> schedule() in percpu_ida_alloc?).  I'm suspecting and hoping that if
>> someone can figure out what the requirements were, this could all be
>> zapped and reimplemented using something else which we already have.
> 
> Note that I have no code in percpu_ida ... it's quite different from
> the regular IDA.  But I have noticed the stunning similarity between the
> percpu_ida and the code in lib/sbitmap.c.  I have no idea which one is
> better, but they're essentially doing the same thing.

Not sure where you see that "stunning similarity"? The sbitmap code is
basically the blk-mq tagging sparse bitmaps, abstracted into a generally
usable form. The percpu_ida design works fine for lower utilization, but
it fell apart for the tagging use case where we can easily run at full
utilization. percpu_ida has percpu caches, sbitmap gets away with just
percpu hints. These caches are why it doesn't work well for > 50%
utilization. sbitmap also supports shallow operations, and online
resizing. Outside of the sharing the same basic functionality of "give
me some free ID", I really don't see a lot of similarities. In terms of
functionality, yes, I don't think it would be hard to get rid of
percpu_ida and just replace it with sbitmap. Probably a worthwhile
pursuit.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
  2018-05-05 14:10     ` Jens Axboe
@ 2018-05-05 14:42       ` Jens Axboe
  2018-05-05 15:52       ` Matthew Wilcox
  1 sibling, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2018-05-05 14:42 UTC (permalink / raw)
  To: Matthew Wilcox, Andrew Morton
  Cc: Sebastian Andrzej Siewior, linux-kernel, tglx,
	Nicholas Bellinger, Shaohua Li, Kent Overstreet

On 5/5/18 8:10 AM, Jens Axboe wrote:
> On 5/4/18 9:51 PM, Matthew Wilcox wrote:
>> On Fri, May 04, 2018 at 04:22:16PM -0700, Andrew Morton wrote:
>>> I'm feeling a bit hostile toward lib/percpu_ida.c in general ;) It has
>>> very few users and seems rather complicated (what's with that
>>> schedule() in percpu_ida_alloc?).  I'm suspecting and hoping that if
>>> someone can figure out what the requirements were, this could all be
>>> zapped and reimplemented using something else which we already have.
>>
>> Note that I have no code in percpu_ida ... it's quite different from
>> the regular IDA.  But I have noticed the stunning similarity between the
>> percpu_ida and the code in lib/sbitmap.c.  I have no idea which one is
>> better, but they're essentially doing the same thing.
> 
> Not sure where you see that "stunning similarity"? The sbitmap code is
> basically the blk-mq tagging sparse bitmaps, abstracted into a generally
> usable form. The percpu_ida design works fine for lower utilization, but
> it fell apart for the tagging use case where we can easily run at full
> utilization. percpu_ida has percpu caches, sbitmap gets away with just
> percpu hints. These caches are why it doesn't work well for > 50%
> utilization. sbitmap also supports shallow operations, and online
> resizing. Outside of the sharing the same basic functionality of "give
> me some free ID", I really don't see a lot of similarities. In terms of
> functionality, yes, I don't think it would be hard to get rid of
> percpu_ida and just replace it with sbitmap. Probably a worthwhile
> pursuit.

Looks like it's just the target code using it. I'll spin a patch
to convert it to sbitmap.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
  2018-05-05 14:10     ` Jens Axboe
  2018-05-05 14:42       ` Jens Axboe
@ 2018-05-05 15:52       ` Matthew Wilcox
  2018-05-07 13:47         ` Matthew Wilcox
  1 sibling, 1 reply; 8+ messages in thread
From: Matthew Wilcox @ 2018-05-05 15:52 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Andrew Morton, Sebastian Andrzej Siewior, linux-kernel, tglx,
	Nicholas Bellinger, Shaohua Li, Kent Overstreet

On Sat, May 05, 2018 at 08:10:20AM -0600, Jens Axboe wrote:
> On 5/4/18 9:51 PM, Matthew Wilcox wrote:
> > On Fri, May 04, 2018 at 04:22:16PM -0700, Andrew Morton wrote:
> >> I'm feeling a bit hostile toward lib/percpu_ida.c in general ;) It has
> >> very few users and seems rather complicated (what's with that
> >> schedule() in percpu_ida_alloc?).  I'm suspecting and hoping that if
> >> someone can figure out what the requirements were, this could all be
> >> zapped and reimplemented using something else which we already have.
> > 
> > Note that I have no code in percpu_ida ... it's quite different from
> > the regular IDA.  But I have noticed the stunning similarity between the
> > percpu_ida and the code in lib/sbitmap.c.  I have no idea which one is
> > better, but they're essentially doing the same thing.
> 
> Not sure where you see that "stunning similarity"? The sbitmap code is
> basically the blk-mq tagging sparse bitmaps, abstracted into a generally
> usable form. The percpu_ida design works fine for lower utilization, but
> it fell apart for the tagging use case where we can easily run at full
> utilization. percpu_ida has percpu caches, sbitmap gets away with just
> percpu hints. These caches are why it doesn't work well for > 50%
> utilization. sbitmap also supports shallow operations, and online
> resizing. Outside of the sharing the same basic functionality of "give
> me some free ID", I really don't see a lot of similarities. In terms of
> functionality, yes, I don't think it would be hard to get rid of
> percpu_ida and just replace it with sbitmap. Probably a worthwhile
> pursuit.

Yes, I meant stunning similarity in terms of functionality, rather than
implementation.  I didn't intend to imply that you'd filed off the serial
numbers, given it a fresh coat of paint and called it yours ;-)

I've been looking into what it'll take to replace percpu_ida with sbitmap.
The good news is that there's large chunks of the percpu_ida API that
aren't being used, and the better news is that there's actually only
one percpu_ida, although it gets used by a lot of target drivers.

Looking at the functions in the header file ...

percpu_ida_alloc - seven drivers, all sess_tag_pool
percpu_ida_free - seven drivers, all sess_tag_pool
percpu_ida_destroy - target_core_transport.c (sess_tag_pool)
percpu_ida_init - target_core_transport.c (sess_tag_pool)
percpu_ida_for_each_free - unused
percpu_ida_free_tags - unused

percpu_ida_alloc uses 'state' in a little bit of an unusual way.  It seems
to me that TASK_RUNNING means "Do not sleep", and any other value means
"sleep in this TASK_ state".  As I understand the sbitmap code, that
means we want an sbitmap_queue.

init and destroy seem to map to sbitmap_queue_init_node and
sbitmap_queue_free.  percpu_ida_free maps to sbitmap_queue_clear.
percpu_ida_alloc(x, TASK_RUNNING) maps to sbitmap_queue_get, and any
other state is going to involve the kind of code we see in blk_mq_get_tag.

Does all of that make sense, or have I missed something?

And, Kent, do you see any reason to keep percpu_ida around?  Is there
an important way in which it's superior to sbitmap?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
  2018-05-05 15:52       ` Matthew Wilcox
@ 2018-05-07 13:47         ` Matthew Wilcox
  2018-05-07 21:34           ` Jens Axboe
  0 siblings, 1 reply; 8+ messages in thread
From: Matthew Wilcox @ 2018-05-07 13:47 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Andrew Morton, Sebastian Andrzej Siewior, linux-kernel, tglx,
	Nicholas Bellinger, Shaohua Li, Kent Overstreet

On Sat, May 05, 2018 at 08:52:02AM -0700, Matthew Wilcox wrote:
> init and destroy seem to map to sbitmap_queue_init_node and
> sbitmap_queue_free.  percpu_ida_free maps to sbitmap_queue_clear.

Hmm.

void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
                         unsigned int cpu)
{
        sbitmap_clear_bit_unlock(&sbq->sb, nr);
        sbq_wake_up(sbq);
        if (likely(!sbq->round_robin && nr < sbq->sb.depth))
                *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
}
EXPORT_SYMBOL_GPL(sbitmap_queue_clear);

If we free a tag on a CPU other than the one it's allocated on, that seems
like it's going to guarantee a cacheline pingpong.  Is the alloc_hint
really that valuable?  I'd be tempted to maintain the alloc_hint (if it's
at all valuable) as being just a hint for which word to look at first,
and only update it on allocation, rather than updating it on free.
Then we can drop the 'cpu' argument to sbitmap_queue_clear(), which
would help this conversion because the percpu_ida users don't know what
CPU their tag was allocated on.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock
  2018-05-07 13:47         ` Matthew Wilcox
@ 2018-05-07 21:34           ` Jens Axboe
  0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2018-05-07 21:34 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andrew Morton, Sebastian Andrzej Siewior, linux-kernel, tglx,
	Nicholas Bellinger, Shaohua Li, Kent Overstreet

On 5/7/18 7:47 AM, Matthew Wilcox wrote:
> On Sat, May 05, 2018 at 08:52:02AM -0700, Matthew Wilcox wrote:
>> init and destroy seem to map to sbitmap_queue_init_node and
>> sbitmap_queue_free.  percpu_ida_free maps to sbitmap_queue_clear.
> 
> Hmm.
> 
> void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
>                          unsigned int cpu)
> {
>         sbitmap_clear_bit_unlock(&sbq->sb, nr);
>         sbq_wake_up(sbq);
>         if (likely(!sbq->round_robin && nr < sbq->sb.depth))
>                 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
> }
> EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
> 
> If we free a tag on a CPU other than the one it's allocated on, that seems
> like it's going to guarantee a cacheline pingpong.  Is the alloc_hint
> really that valuable?  I'd be tempted to maintain the alloc_hint (if it's
> at all valuable) as being just a hint for which word to look at first,
> and only update it on allocation, rather than updating it on free.

The point is to update it on free, so we know where to start the search.
Ideally it'd still be free. Keep in mind that some of this was driven
by blk-mq developments, on how to retain fast allocations without
adding per-cpu caches (which we can't afford, see previous rant on
tag space utilization). On blk-mq, the free is generally on the CPU
you allocated, or close to in terms of caching. Even for shared tag
cases, I haven't seen any bad behavior from bouncing. Doesn't mean
that we could not, but I'd be wary of changing any of it without
substantial performance testing.

> Then we can drop the 'cpu' argument to sbitmap_queue_clear(), which
> would help this conversion because the percpu_ida users don't know what
> CPU their tag was allocated on.

What you want is something around the sbitmap_queue_{get,clear} that
wraps the alloc hint and wait queue mechanism, so the caller doesn't
have to deal with it. We don't have that, since the blk-mq mechanism
works out better just implementing it. We have things like queue
runs that are don't for the failure case.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-05-07 21:34 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-04 15:32 [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + spin_lock Sebastian Andrzej Siewior
2018-05-04 23:22 ` Andrew Morton
2018-05-05  3:51   ` Matthew Wilcox
2018-05-05 14:10     ` Jens Axboe
2018-05-05 14:42       ` Jens Axboe
2018-05-05 15:52       ` Matthew Wilcox
2018-05-07 13:47         ` Matthew Wilcox
2018-05-07 21:34           ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).