LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
To: Yury Norov <yury.norov@gmail.com>
Cc: linux-kernel@vger.kernel.org, heukelum@mailshack.com,
	mita@miraclelinux.com, Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [PATCH] lib: find_*_bit reimplementation
Date: Sat, 24 Jan 2015 01:45:45 +0100	[thread overview]
Message-ID: <87y4otknuu.fsf@rasmusvillemoes.dk> (raw)
In-Reply-To: <1421702706-10404-1-git-send-email-yury.norov@gmail.com> (Yury Norov's message of "Tue, 20 Jan 2015 00:25:06 +0300")

On Mon, Jan 19 2015, Yury Norov <yury.norov@gmail.com> wrote:

> New implementation takes less space, and, I hope, easier to understand.
>
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
>  lib/find_next_bit.c | 265 +++++++++++++++-------------------------------------
>  1 file changed, 73 insertions(+), 192 deletions(-)
>

That diffstat is certainly nice. Do you also have numbers for the
size of the generated code, and do you know if there is a
measurable performance difference? Have you tested whether the new and
old code give the same results, also in corner cases?

Some remarks inline below.

> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
> index 0cbfc0b..a5c915f 100644
> --- a/lib/find_next_bit.c
> +++ b/lib/find_next_bit.c
> @@ -11,10 +11,39 @@
>  
>  #include <linux/bitops.h>
>  #include <linux/export.h>
> +#include <linux/kernel.h>
>  #include <asm/types.h>
>  #include <asm/byteorder.h>
>  
> -#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
> +#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
> +#define MIN(a, b)			((a) < (b) ? (a) : (b))
> +

Please don't duplicate min/max macros. kernel.h already provides everything you need.

> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
> +static unsigned long _find_next_bit(const unsigned long *addr,
> +		unsigned long end, unsigned long start, unsigned long flags)

Having two parameters called end and start appearing in that
order is slightly confusing. Why not keep the name 'size' for
end, or maybe 'nbits' to make the unit clear. Also, I think flags
should just be a bool and maybe renamed to something more meaningful.

> +{
> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
> +		start = round_down(start, BITS_PER_LONG);
> +	}
> +
> +	do {
> +		if (tmp)
> +			return MIN(start + __ffs(tmp), end);
> +
> +		start += BITS_PER_LONG;
> +		if (start >= end)
> +			return end;
> +
> +		tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	} while (1);
> +}
> +#endif
>  
>  #ifndef find_next_bit
>  /*
> @@ -23,86 +52,16 @@
>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>  			    unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
> -	if (offset >= size)
> -		return size;

The previous versions handled this, but your code will always access
the word at addr[start/BITS_PER_LONG]. Are you sure no caller ever
passes start >= size?

> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp &= (~0UL << offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
> -
> -found_first:
> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + __ffs(tmp);
> +	return _find_next_bit(addr, size, offset, 1);
>  }
>  EXPORT_SYMBOL(find_next_bit);
>  #endif
>  
>  #ifndef find_next_zero_bit
> -/*
> - * This implementation of find_{first,next}_zero_bit was stolen from
> - * Linus' asm-alpha/bitops.h.
> - */
>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
>  				 unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
> -	if (offset >= size)
> -		return size;
> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp |= ~0UL >> (BITS_PER_LONG - offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
> -
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + ffz(tmp);
> +	return _find_next_bit(addr, size, offset, 0);
>  }
>  EXPORT_SYMBOL(find_next_zero_bit);
>  #endif
> @@ -113,24 +72,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>   */
>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned int idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx])
> +			return idx * BITS_PER_LONG + __ffs(addr[idx]);
>  	}

I'm afraid this is wrong. It doesn't take garbage bits in the last
(partial) word into account. You either need to loop over the full
words and handle the last separately, masking off the garbage bits, or
wrap min(, size) around the above expression, just as you've done in
_find_next_bit. I think I prefer the latter.

Does anyone use insane size bitmaps with 2^32 bits or more? If so,
you'd have to watch out for overflow and use unsigned long for idx.

> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + __ffs(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_bit);
>  #endif
> @@ -141,24 +90,14 @@ EXPORT_SYMBOL(find_first_bit);
>   */
>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned int idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx] != ULONG_MAX)
> +			return idx * BITS_PER_LONG + ffz(addr[idx]);
>  	}

The same of course applies here.

> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) | (~0UL << size);
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + ffz(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_zero_bit);
>  #endif
> @@ -166,18 +105,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>  #ifdef __BIG_ENDIAN
>  
>  /* include/linux/byteorder does not support "unsigned long" type */
> -static inline unsigned long ext2_swabp(const unsigned long * x)
> -{
> -#if BITS_PER_LONG == 64
> -	return (unsigned long) __swab64p((u64 *) x);
> -#elif BITS_PER_LONG == 32
> -	return (unsigned long) __swab32p((u32 *) x);
> -#else
> -#error BITS_PER_LONG not defined
> -#endif
> -}
> -
> -/* include/linux/byteorder doesn't support "unsigned long" type */
>  static inline unsigned long ext2_swab(const unsigned long y)
>  {
>  #if BITS_PER_LONG == 64
> @@ -189,48 +116,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
>  #endif
>  }
>  
> -#ifndef find_next_zero_bit_le
> -unsigned long find_next_zero_bit_le(const void *addr, unsigned
> -		long size, unsigned long offset)
> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
> +static unsigned long _find_next_bit_le(const unsigned long *addr,
> +		unsigned long end, unsigned long start, unsigned long flags)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
> -	unsigned long tmp;
> -
> -	if (offset >= size)
> -		return size;
> -	p += BITOP_WORD(offset);
> -	size -= result;
> -	offset &= (BITS_PER_LONG - 1UL);
> -	if (offset) {
> -		tmp = ext2_swabp(p++);
> -		tmp |= (~0UL >> (BITS_PER_LONG - offset));
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= ext2_swab(HIGH_BITS_MASK(start
> +					% BITS_PER_LONG));

Nit: There's no reason to break that line.

> +		start = round_down(start, BITS_PER_LONG);
>  	}
>  
> -	while (size & ~(BITS_PER_LONG - 1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle_swap;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = ext2_swabp(p);
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size; /* Nope. Skip ffz */
> -found_middle:
> -	return result + ffz(tmp);
> +	do {
> +		if (tmp)
> +			return MIN(start +
> +				__ffs(ext2_swab(tmp)), end);

Again, no reason to break the line.
  
> -found_middle_swap:
> -	return result + ffz(ext2_swab(tmp));
> +		start += BITS_PER_LONG;
> +		if (start >= end)
> +			return end;
> +
> +		tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	} while (1);
> +}
> +#endif
> +
> +#ifndef find_next_zero_bit_le
> +unsigned long find_next_zero_bit_le(const void *addr, unsigned
> +		long size, unsigned long offset)
> +{
> +	return _find_next_bit_le(addr, size, offset, o);

I'm assuming you only compile-tested this on little-endian. The o
wants to be a 0.

Rasmus


  reply	other threads:[~2015-01-24  0:45 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-19 21:25 Yury Norov
2015-01-24  0:45 ` Rasmus Villemoes [this message]
2015-01-25 23:32   ` Yury

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87y4otknuu.fsf@rasmusvillemoes.dk \
    --to=linux@rasmusvillemoes.dk \
    --cc=akpm@linux-foundation.org \
    --cc=heukelum@mailshack.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mita@miraclelinux.com \
    --cc=yury.norov@gmail.com \
    --subject='Re: [PATCH] lib: find_*_bit reimplementation' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).