LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] lib: find_*_bit reimplementation
@ 2015-01-19 21:25 Yury Norov
  2015-01-24  0:45 ` Rasmus Villemoes
  0 siblings, 1 reply; 3+ messages in thread
From: Yury Norov @ 2015-01-19 21:25 UTC (permalink / raw)
  To: linux-kernel; +Cc: heukelum, mita, Yury Norov

New implementation takes less space, and, I hope, easier to understand.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 lib/find_next_bit.c | 265 +++++++++++++++-------------------------------------
 1 file changed, 73 insertions(+), 192 deletions(-)

diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index 0cbfc0b..a5c915f 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -11,10 +11,39 @@
 
 #include <linux/bitops.h>
 #include <linux/export.h>
+#include <linux/kernel.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
 
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
+#define MIN(a, b)			((a) < (b) ? (a) : (b))
+
+#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+static unsigned long _find_next_bit(const unsigned long *addr,
+		unsigned long end, unsigned long start, unsigned long flags)
+{
+	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+
+	/* Handle 1st word. */
+	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
+		start = round_down(start, BITS_PER_LONG);
+	}
+
+	do {
+		if (tmp)
+			return MIN(start + __ffs(tmp), end);
+
+		start += BITS_PER_LONG;
+		if (start >= end)
+			return end;
+
+		tmp = flags ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+	} while (1);
+}
+#endif
 
 #ifndef find_next_bit
 /*
@@ -23,86 +52,16 @@
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + __ffs(tmp);
+	return _find_next_bit(addr, size, offset, 1);
 }
 EXPORT_SYMBOL(find_next_bit);
 #endif
 
 #ifndef find_next_zero_bit
-/*
- * This implementation of find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h.
- */
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp |= ~0UL >> (BITS_PER_LONG - offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if (~(tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ffz(tmp);
+	return _find_next_bit(addr, size, offset, 0);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
@@ -113,24 +72,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
  */
 unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 {
-	const unsigned long *p = addr;
-	unsigned long result = 0;
-	unsigned long tmp;
+	unsigned int idx;
 
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx])
+			return idx * BITS_PER_LONG + __ffs(addr[idx]);
 	}
-	if (!size)
-		return result;
 
-	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found:
-	return result + __ffs(tmp);
+	return size;
 }
 EXPORT_SYMBOL(find_first_bit);
 #endif
@@ -141,24 +90,14 @@ EXPORT_SYMBOL(find_first_bit);
  */
 unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 {
-	const unsigned long *p = addr;
-	unsigned long result = 0;
-	unsigned long tmp;
+	unsigned int idx;
 
-	while (size & ~(BITS_PER_LONG-1)) {
-		if (~(tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx] != ULONG_MAX)
+			return idx * BITS_PER_LONG + ffz(addr[idx]);
 	}
-	if (!size)
-		return result;
 
-	tmp = (*p) | (~0UL << size);
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found:
-	return result + ffz(tmp);
+	return size;
 }
 EXPORT_SYMBOL(find_first_zero_bit);
 #endif
@@ -166,18 +105,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
 #ifdef __BIG_ENDIAN
 
 /* include/linux/byteorder does not support "unsigned long" type */
-static inline unsigned long ext2_swabp(const unsigned long * x)
-{
-#if BITS_PER_LONG == 64
-	return (unsigned long) __swab64p((u64 *) x);
-#elif BITS_PER_LONG == 32
-	return (unsigned long) __swab32p((u32 *) x);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
-
-/* include/linux/byteorder doesn't support "unsigned long" type */
 static inline unsigned long ext2_swab(const unsigned long y)
 {
 #if BITS_PER_LONG == 64
@@ -189,48 +116,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
 #endif
 }
 
-#ifndef find_next_zero_bit_le
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
+#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
+static unsigned long _find_next_bit_le(const unsigned long *addr,
+		unsigned long end, unsigned long start, unsigned long flags)
 {
-	const unsigned long *p = addr;
-	unsigned long result = offset & ~(BITS_PER_LONG - 1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	p += BITOP_WORD(offset);
-	size -= result;
-	offset &= (BITS_PER_LONG - 1UL);
-	if (offset) {
-		tmp = ext2_swabp(p++);
-		tmp |= (~0UL >> (BITS_PER_LONG - offset));
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
+	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+
+	/* Handle 1st word. */
+	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+		tmp &= ext2_swab(HIGH_BITS_MASK(start
+					% BITS_PER_LONG));
+		start = round_down(start, BITS_PER_LONG);
 	}
 
-	while (size & ~(BITS_PER_LONG - 1)) {
-		if (~(tmp = *(p++)))
-			goto found_middle_swap;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = ext2_swabp(p);
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size; /* Nope. Skip ffz */
-found_middle:
-	return result + ffz(tmp);
+	do {
+		if (tmp)
+			return MIN(start +
+				__ffs(ext2_swab(tmp)), end);
 
-found_middle_swap:
-	return result + ffz(ext2_swab(tmp));
+		start += BITS_PER_LONG;
+		if (start >= end)
+			return end;
+
+		tmp = flags ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+	} while (1);
+}
+#endif
+
+#ifndef find_next_zero_bit_le
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	return _find_next_bit_le(addr, size, offset, o);
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif
@@ -239,45 +158,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	const unsigned long *p = addr;
-	unsigned long result = offset & ~(BITS_PER_LONG - 1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	p += BITOP_WORD(offset);
-	size -= result;
-	offset &= (BITS_PER_LONG - 1UL);
-	if (offset) {
-		tmp = ext2_swabp(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-
-	while (size & ~(BITS_PER_LONG - 1)) {
-		tmp = *(p++);
-		if (tmp)
-			goto found_middle_swap;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = ext2_swabp(p);
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size; /* Nope. */
-found_middle:
-	return result + __ffs(tmp);
-
-found_middle_swap:
-	return result + __ffs(ext2_swab(tmp));
+	return _find_next_bit_le(addr, size, offset, 1);
 }
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
-- 
2.1.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] lib: find_*_bit reimplementation
  2015-01-19 21:25 [PATCH] lib: find_*_bit reimplementation Yury Norov
@ 2015-01-24  0:45 ` Rasmus Villemoes
  2015-01-25 23:32   ` Yury
  0 siblings, 1 reply; 3+ messages in thread
From: Rasmus Villemoes @ 2015-01-24  0:45 UTC (permalink / raw)
  To: Yury Norov; +Cc: linux-kernel, heukelum, mita, Andrew Morton

On Mon, Jan 19 2015, Yury Norov <yury.norov@gmail.com> wrote:

> New implementation takes less space, and, I hope, easier to understand.
>
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
>  lib/find_next_bit.c | 265 +++++++++++++++-------------------------------------
>  1 file changed, 73 insertions(+), 192 deletions(-)
>

That diffstat is certainly nice. Do you also have numbers for the
size of the generated code, and do you know if there is a
measurable performance difference? Have you tested whether the new and
old code give the same results, also in corner cases?

Some remarks inline below.

> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
> index 0cbfc0b..a5c915f 100644
> --- a/lib/find_next_bit.c
> +++ b/lib/find_next_bit.c
> @@ -11,10 +11,39 @@
>  
>  #include <linux/bitops.h>
>  #include <linux/export.h>
> +#include <linux/kernel.h>
>  #include <asm/types.h>
>  #include <asm/byteorder.h>
>  
> -#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
> +#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
> +#define MIN(a, b)			((a) < (b) ? (a) : (b))
> +

Please don't duplicate min/max macros. kernel.h already provides everything you need.

> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
> +static unsigned long _find_next_bit(const unsigned long *addr,
> +		unsigned long end, unsigned long start, unsigned long flags)

Having two parameters called end and start appearing in that
order is slightly confusing. Why not keep the name 'size' for
end, or maybe 'nbits' to make the unit clear. Also, I think flags
should just be a bool and maybe renamed to something more meaningful.

> +{
> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
> +		start = round_down(start, BITS_PER_LONG);
> +	}
> +
> +	do {
> +		if (tmp)
> +			return MIN(start + __ffs(tmp), end);
> +
> +		start += BITS_PER_LONG;
> +		if (start >= end)
> +			return end;
> +
> +		tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	} while (1);
> +}
> +#endif
>  
>  #ifndef find_next_bit
>  /*
> @@ -23,86 +52,16 @@
>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>  			    unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
> -	if (offset >= size)
> -		return size;

The previous versions handled this, but your code will always access
the word at addr[start/BITS_PER_LONG]. Are you sure no caller ever
passes start >= size?

> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp &= (~0UL << offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
> -
> -found_first:
> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + __ffs(tmp);
> +	return _find_next_bit(addr, size, offset, 1);
>  }
>  EXPORT_SYMBOL(find_next_bit);
>  #endif
>  
>  #ifndef find_next_zero_bit
> -/*
> - * This implementation of find_{first,next}_zero_bit was stolen from
> - * Linus' asm-alpha/bitops.h.
> - */
>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
>  				 unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
> -	if (offset >= size)
> -		return size;
> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp |= ~0UL >> (BITS_PER_LONG - offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
> -
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + ffz(tmp);
> +	return _find_next_bit(addr, size, offset, 0);
>  }
>  EXPORT_SYMBOL(find_next_zero_bit);
>  #endif
> @@ -113,24 +72,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>   */
>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned int idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx])
> +			return idx * BITS_PER_LONG + __ffs(addr[idx]);
>  	}

I'm afraid this is wrong. It doesn't take garbage bits in the last
(partial) word into account. You either need to loop over the full
words and handle the last separately, masking off the garbage bits, or
wrap min(, size) around the above expression, just as you've done in
_find_next_bit. I think I prefer the latter.

Does anyone use insane size bitmaps with 2^32 bits or more? If so,
you'd have to watch out for overflow and use unsigned long for idx.

> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + __ffs(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_bit);
>  #endif
> @@ -141,24 +90,14 @@ EXPORT_SYMBOL(find_first_bit);
>   */
>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned int idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx] != ULONG_MAX)
> +			return idx * BITS_PER_LONG + ffz(addr[idx]);
>  	}

The same of course applies here.

> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) | (~0UL << size);
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + ffz(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_zero_bit);
>  #endif
> @@ -166,18 +105,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>  #ifdef __BIG_ENDIAN
>  
>  /* include/linux/byteorder does not support "unsigned long" type */
> -static inline unsigned long ext2_swabp(const unsigned long * x)
> -{
> -#if BITS_PER_LONG == 64
> -	return (unsigned long) __swab64p((u64 *) x);
> -#elif BITS_PER_LONG == 32
> -	return (unsigned long) __swab32p((u32 *) x);
> -#else
> -#error BITS_PER_LONG not defined
> -#endif
> -}
> -
> -/* include/linux/byteorder doesn't support "unsigned long" type */
>  static inline unsigned long ext2_swab(const unsigned long y)
>  {
>  #if BITS_PER_LONG == 64
> @@ -189,48 +116,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
>  #endif
>  }
>  
> -#ifndef find_next_zero_bit_le
> -unsigned long find_next_zero_bit_le(const void *addr, unsigned
> -		long size, unsigned long offset)
> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
> +static unsigned long _find_next_bit_le(const unsigned long *addr,
> +		unsigned long end, unsigned long start, unsigned long flags)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
> -	unsigned long tmp;
> -
> -	if (offset >= size)
> -		return size;
> -	p += BITOP_WORD(offset);
> -	size -= result;
> -	offset &= (BITS_PER_LONG - 1UL);
> -	if (offset) {
> -		tmp = ext2_swabp(p++);
> -		tmp |= (~0UL >> (BITS_PER_LONG - offset));
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= ext2_swab(HIGH_BITS_MASK(start
> +					% BITS_PER_LONG));

Nit: There's no reason to break that line.

> +		start = round_down(start, BITS_PER_LONG);
>  	}
>  
> -	while (size & ~(BITS_PER_LONG - 1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle_swap;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = ext2_swabp(p);
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size; /* Nope. Skip ffz */
> -found_middle:
> -	return result + ffz(tmp);
> +	do {
> +		if (tmp)
> +			return MIN(start +
> +				__ffs(ext2_swab(tmp)), end);

Again, no reason to break the line.
  
> -found_middle_swap:
> -	return result + ffz(ext2_swab(tmp));
> +		start += BITS_PER_LONG;
> +		if (start >= end)
> +			return end;
> +
> +		tmp = flags ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	} while (1);
> +}
> +#endif
> +
> +#ifndef find_next_zero_bit_le
> +unsigned long find_next_zero_bit_le(const void *addr, unsigned
> +		long size, unsigned long offset)
> +{
> +	return _find_next_bit_le(addr, size, offset, o);

I'm assuming you only compile-tested this on little-endian. The o
wants to be a 0.

Rasmus


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] lib: find_*_bit reimplementation
  2015-01-24  0:45 ` Rasmus Villemoes
@ 2015-01-25 23:32   ` Yury
  0 siblings, 0 replies; 3+ messages in thread
From: Yury @ 2015-01-25 23:32 UTC (permalink / raw)
  To: Rasmus Villemoes; +Cc: linux-kernel, heukelum, mita, Andrew Morton, rusty

On 24.01.2015 03:45, Rasmus Villemoes wrote:
> On Mon, Jan 19 2015, Yury Norov <yury.norov@gmail.com> wrote:
>
>> New implementation takes less space, and, I hope, easier to understand.
>>
>> Signed-off-by: Yury Norov <yury.norov@gmail.com>
>> ---
>>  lib/find_next_bit.c | 265 +++++++++++++++-------------------------------------
>>  1 file changed, 73 insertions(+), 192 deletions(-)
>>
> That diffstat is certainly nice. Do you also have numbers for the
> size of the generated code, and do you know if there is a
> measurable performance difference? Have you tested whether the new and
> old code give the same results, also in corner cases?
Hello, Rasmus. Thank you for your time.

> Do you also have numbers for the size of the generated code

Before text section is 817 bytes, after - 533.
Comparing to this version, I have the patch modified now, and numbers may little differ.

> Have you tested whether the new and old code give the same results, also in corner cases?

I tested new version together with old one, and if new did return different value,
it was printed to system log. I didn't measure performance because I don't expect
significant gain here. But now I think it's good idea to write tests for performance and
corner cases. Maybe someone already did it... Brief googling didn't help. Anyway, with
new version of patch I will show my measures.
>
> Some remarks inline below.
>
>> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
>> index 0cbfc0b..a5c915f 100644
>> --- a/lib/find_next_bit.c
>> +++ b/lib/find_next_bit.c
>> @@ -11,10 +11,39 @@
>>  
>>  #include <linux/bitops.h>
>>  #include <linux/export.h>
>> +#include <linux/kernel.h>
>>  #include <asm/types.h>
>>  #include <asm/byteorder.h>
>>  
>> -#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
>> +#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
>> +#define MIN(a, b)			((a) < (b) ? (a) : (b))
>> +
> Please don't duplicate min/max macros. kernel.h already provides everything you need.
Ok.
>
>> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
>> +static unsigned long _find_next_bit(const unsigned long *addr,
>> +		unsigned long end, unsigned long start, unsigned long flags)
> Having two parameters called end and start appearing in that
> order is slightly confusing. Why not keep the name 'size' for
> end, or maybe 'nbits' to make the unit clear. Also, I think flags
> should just be a bool and maybe renamed to something more meaningful.
You're right, Something like this:
 static unsigned long _find_next_bit(const unsigned long *addr,
                        unsigned long nbits, unsigned long start_bit, bool set)
looks better.
>
>> +{
>> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +
>> +	/* Handle 1st word. */
>> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
>> +		start = round_down(start, BITS_PER_LONG);
>> +	}
>> +
>> +	do {
>> +		if (tmp)
>> +			return MIN(start + __ffs(tmp), end);
>> +
>> +		start += BITS_PER_LONG;
>> +		if (start >= end)
>> +			return end;
>> +
>> +		tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +	} while (1);
>> +}
>> +#endif
>>  
>>  #ifndef find_next_bit
>>  /*
>> @@ -23,86 +52,16 @@
>>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>>  			    unsigned long offset)
>>  {
>> -	const unsigned long *p = addr + BITOP_WORD(offset);
>> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -	unsigned long tmp;
>> -
>> -	if (offset >= size)
>> -		return size;
> The previous versions handled this, but your code will always access
> the word at addr[start/BITS_PER_LONG]. Are you sure no caller ever
> passes start >= size?
You're right. Fixed.
>
>> -	size -= result;
>> -	offset %= BITS_PER_LONG;
>> -	if (offset) {
>> -		tmp = *(p++);
>> -		tmp &= (~0UL << offset);
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if ((tmp = *(p++)))
>> -			goto found_middle;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = *p;
>> -
>> -found_first:
>> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
>> -	if (tmp == 0UL)		/* Are any bits set? */
>> -		return result + size;	/* Nope. */
>> -found_middle:
>> -	return result + __ffs(tmp);
>> +	return _find_next_bit(addr, size, offset, 1);
>>  }
>>  EXPORT_SYMBOL(find_next_bit);
>>  #endif
>>  
>>  #ifndef find_next_zero_bit
>> -/*
>> - * This implementation of find_{first,next}_zero_bit was stolen from
>> - * Linus' asm-alpha/bitops.h.
>> - */
>>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
>>  				 unsigned long offset)
>>  {
>> -	const unsigned long *p = addr + BITOP_WORD(offset);
>> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -	unsigned long tmp;
>> -
>> -	if (offset >= size)
>> -		return size;
>> -	size -= result;
>> -	offset %= BITS_PER_LONG;
>> -	if (offset) {
>> -		tmp = *(p++);
>> -		tmp |= ~0UL >> (BITS_PER_LONG - offset);
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (~tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found_middle;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = *p;
>> -
>> -found_first:
>> -	tmp |= ~0UL << size;
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size;	/* Nope. */
>> -found_middle:
>> -	return result + ffz(tmp);
>> +	return _find_next_bit(addr, size, offset, 0);
>>  }
>>  EXPORT_SYMBOL(find_next_zero_bit);
>>  #endif
>> @@ -113,24 +72,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>>   */
>>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = 0;
>> -	unsigned long tmp;
>> +	unsigned int idx;
>>  
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if ((tmp = *(p++)))
>> -			goto found;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +		if (addr[idx])
>> +			return idx * BITS_PER_LONG + __ffs(addr[idx]);
>>  	}
> I'm afraid this is wrong. It doesn't take garbage bits in the last
> (partial) word into account. You either need to loop over the full
> words and handle the last separately, masking off the garbage bits, or
> wrap min(, size) around the above expression, just as you've done in
> _find_next_bit. I think I prefer the latter.
Yes. Done.
>
> Does anyone use insane size bitmaps with 2^32 bits or more? If so,
> you'd have to watch out for overflow and use unsigned long for idx.
Yes.
>
>> -	if (!size)
>> -		return result;
>>  
>> -	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
>> -	if (tmp == 0UL)		/* Are any bits set? */
>> -		return result + size;	/* Nope. */
>> -found:
>> -	return result + __ffs(tmp);
>> +	return size;
>>  }
>>  EXPORT_SYMBOL(find_first_bit);
>>  #endif
>> @@ -141,24 +90,14 @@ EXPORT_SYMBOL(find_first_bit);
>>   */
>>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = 0;
>> -	unsigned long tmp;
>> +	unsigned int idx;
>>  
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +		if (addr[idx] != ULONG_MAX)
>> +			return idx * BITS_PER_LONG + ffz(addr[idx]);
>>  	}
> The same of course applies here.
>
>> -	if (!size)
>> -		return result;
>>  
>> -	tmp = (*p) | (~0UL << size);
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size;	/* Nope. */
>> -found:
>> -	return result + ffz(tmp);
>> +	return size;
>>  }
>>  EXPORT_SYMBOL(find_first_zero_bit);
>>  #endif
>> @@ -166,18 +105,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>>  #ifdef __BIG_ENDIAN
>>  
>>  /* include/linux/byteorder does not support "unsigned long" type */
>> -static inline unsigned long ext2_swabp(const unsigned long * x)
>> -{
>> -#if BITS_PER_LONG == 64
>> -	return (unsigned long) __swab64p((u64 *) x);
>> -#elif BITS_PER_LONG == 32
>> -	return (unsigned long) __swab32p((u32 *) x);
>> -#else
>> -#error BITS_PER_LONG not defined
>> -#endif
>> -}
>> -
>> -/* include/linux/byteorder doesn't support "unsigned long" type */
>>  static inline unsigned long ext2_swab(const unsigned long y)
>>  {
>>  #if BITS_PER_LONG == 64
>> @@ -189,48 +116,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
>>  #endif
>>  }
>>  
>> -#ifndef find_next_zero_bit_le
>> -unsigned long find_next_zero_bit_le(const void *addr, unsigned
>> -		long size, unsigned long offset)
>> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
>> +static unsigned long _find_next_bit_le(const unsigned long *addr,
>> +		unsigned long end, unsigned long start, unsigned long flags)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
>> -	unsigned long tmp;
>> -
>> -	if (offset >= size)
>> -		return size;
>> -	p += BITOP_WORD(offset);
>> -	size -= result;
>> -	offset &= (BITS_PER_LONG - 1UL);
>> -	if (offset) {
>> -		tmp = ext2_swabp(p++);
>> -		tmp |= (~0UL >> (BITS_PER_LONG - offset));
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (~tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +
>> +	/* Handle 1st word. */
>> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +		tmp &= ext2_swab(HIGH_BITS_MASK(start
>> +					% BITS_PER_LONG));
> Nit: There's no reason to break that line.
>
>> +		start = round_down(start, BITS_PER_LONG);
>>  	}
>>  
>> -	while (size & ~(BITS_PER_LONG - 1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found_middle_swap;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = ext2_swabp(p);
>> -found_first:
>> -	tmp |= ~0UL << size;
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size; /* Nope. Skip ffz */
>> -found_middle:
>> -	return result + ffz(tmp);
>> +	do {
>> +		if (tmp)
>> +			return MIN(start +
>> +				__ffs(ext2_swab(tmp)), end);
> Again, no reason to break the line.
The reason is limitation of 80 bytes per line, but anyway, I did it bad way
>   
>> -found_middle_swap:
>> -	return result + ffz(ext2_swab(tmp));
>> +		start += BITS_PER_LONG;
>> +		if (start >= end)
>> +			return end;
>> +
>> +		tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +	} while (1);
>> +}
>> +#endif
>> +
>> +#ifndef find_next_zero_bit_le
>> +unsigned long find_next_zero_bit_le(const void *addr, unsigned
>> +		long size, unsigned long offset)
>> +{
>> +	return _find_next_bit_le(addr, size, offset, o);
> I'm assuming you only compile-tested this on little-endian. The o
> wants to be a 0.
Yes, shame on me. I think I'd bring up SPARK or big-endian ARM environment
on my QEMU before uploading new version.
>
> Rasmus
>


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-01-25 23:33 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-19 21:25 [PATCH] lib: find_*_bit reimplementation Yury Norov
2015-01-24  0:45 ` Rasmus Villemoes
2015-01-25 23:32   ` Yury

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).