Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a366064c authored by Grant Grundler's avatar Grant Grundler Committed by Kyle McMartin
Browse files

[PARISC] Update bitops from parisc tree



Optimize ext2_find_next_zero_bit. Gives about 25% perf improvement with a
rsync test with ext3.

Signed-off-by: default avatarRandolph Chung <tausq@parisc-linux.org>

fix ext3 performance - ext2_find_next_zero() was culprit.
Kudos to jejb for pointing out the the possibility that ext2_test_bit
and ext2_find_next_zero() may in fact not be enumerating bits in
the bitmap because of endianess. Took sparc64 implementation and
adapted it to our tree. I suspect the real problem is ffz() wants
an unsigned long and was getting garbage in the top half of the
unsigned int. Not confirmed but that's what I suspect.

Signed-off-by: default avatarGrant Grundler <grundler@parisc-linux.org>

Fix find_next_bit for 32-bit
Make masking consistent for bitops

From: Joel Soete <soete.joel@tiscali.be>
Signed-off-by: default avatarRandolph Chung <tausq@parisc-linux.org>

Add back incorrectly removed ext2_find_first_zero_bit definition

Signed-off-by: default avatarJames Bottomley <jejb@parisc-linux.org>

Fixup bitops.h to use volatile for *_bit() ops

Based on this email thread:
       http://marc.theaimsgroup.com/?t=108826637900003



In a nutshell:
        *_bit() want use of volatile.
        __*_bit() are "relaxed" and don't use spinlock or volatile.

other minor changes:
o replaces hweight64() macro with alias to generic_hweight64() (Joel Soete)
o cleanup ext2* macros so (a) it's obvious what the XOR magic is about
  and (b) one version that works for both 32/64-bit.
o replace 2 uses of CONFIG_64BIT with __LP64__. bitops.h used both.
  I think header files that might go to user space should use
  something userspace will know about (__LP64__).

Signed-off-by: default avatarGrant Grundler <grundler@parisc-linux.org>

Move SHIFT_PER_LONG to standard location for BITS_PER_LONG (asm/types.h)
and ditch the second definition of BITS_PER_LONG in bitops.h

Signed-off-by: default avatarGrant Grundler <grundler@parisc-linux.org>

Signed-off-by: default avatarKyle McMartin <kyle@parisc-linux.org>
parent 2464212f
Loading
Loading
Loading
Loading
+132 −158
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
#define _PARISC_BITOPS_H

#include <linux/compiler.h>
#include <asm/spinlock.h>
#include <asm/types.h>		/* for BITS_PER_LONG/SHIFT_PER_LONG */
#include <asm/byteorder.h>
#include <asm/atomic.h>

@@ -12,193 +12,157 @@
 * to include/asm-i386/bitops.h or kerneldoc
 */

#ifdef __LP64__
#   define SHIFT_PER_LONG 6
#ifndef BITS_PER_LONG
#   define BITS_PER_LONG 64
#endif
#else
#   define SHIFT_PER_LONG 5
#ifndef BITS_PER_LONG
#   define BITS_PER_LONG 32
#endif
#endif

#define CHOP_SHIFTCOUNT(x) ((x) & (BITS_PER_LONG - 1))
#define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1))


#define smp_mb__before_clear_bit()      smp_mb()
#define smp_mb__after_clear_bit()       smp_mb()

static __inline__ void set_bit(int nr, volatile unsigned long * address)
/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
 * on use of volatile and __*_bit() (set/clear/change):
 *	*_bit() want use of volatile.
 *	__*_bit() are "relaxed" and don't use spinlock or volatile.
 */

static __inline__ void set_bit(int nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long flags;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	_atomic_spin_lock_irqsave(addr, flags);
	*addr |= mask;
	_atomic_spin_unlock_irqrestore(addr, flags);
}

static __inline__ void __set_bit(int nr, volatile unsigned long * address)
static __inline__ void __set_bit(unsigned long nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	unsigned long *m = (unsigned long *) addr + (nr >> SHIFT_PER_LONG);

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	*addr |= mask;
	*m |= 1UL << CHOP_SHIFTCOUNT(nr);
}

static __inline__ void clear_bit(int nr, volatile unsigned long * address)
static __inline__ void clear_bit(int nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	unsigned long mask = ~(1UL << CHOP_SHIFTCOUNT(nr));
	unsigned long flags;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	_atomic_spin_lock_irqsave(addr, flags);
	*addr &= ~mask;
	*addr &= mask;
	_atomic_spin_unlock_irqrestore(addr, flags);
}

static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long * address)
static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	unsigned long *m = (unsigned long *) addr + (nr >> SHIFT_PER_LONG);

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	*addr &= ~mask;
	*m &= ~(1UL << CHOP_SHIFTCOUNT(nr));
}

static __inline__ void change_bit(int nr, volatile unsigned long * address)
static __inline__ void change_bit(int nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long flags;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	_atomic_spin_lock_irqsave(addr, flags);
	*addr ^= mask;
	_atomic_spin_unlock_irqrestore(addr, flags);
}

static __inline__ void __change_bit(int nr, volatile unsigned long * address)
static __inline__ void __change_bit(unsigned long nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	unsigned long *m = (unsigned long *) addr + (nr >> SHIFT_PER_LONG);

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	*addr ^= mask;
	*m ^= 1UL << CHOP_SHIFTCOUNT(nr);
}

static __inline__ int test_and_set_bit(int nr, volatile unsigned long * address)
static __inline__ int test_and_set_bit(int nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	int oldbit;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long oldbit;
	unsigned long flags;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	_atomic_spin_lock_irqsave(addr, flags);
	oldbit = (*addr & mask) ? 1 : 0;
	*addr |= mask;
	oldbit = *addr;
	*addr = oldbit | mask;
	_atomic_spin_unlock_irqrestore(addr, flags);

	return oldbit;
	return (oldbit & mask) ? 1 : 0;
}

static __inline__ int __test_and_set_bit(int nr, volatile unsigned long * address)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	int oldbit;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long oldbit;
	unsigned long *addr = (unsigned long *)address + (nr >> SHIFT_PER_LONG);

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	oldbit = (*addr & mask) ? 1 : 0;
	*addr |= mask;
	oldbit = *addr;
	*addr = oldbit | mask;

	return oldbit;
	return (oldbit & mask) ? 1 : 0;
}

static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * address)
static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	int oldbit;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long oldbit;
	unsigned long flags;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	_atomic_spin_lock_irqsave(addr, flags);
	oldbit = (*addr & mask) ? 1 : 0;
	*addr &= ~mask;
	oldbit = *addr;
	*addr = oldbit & ~mask;
	_atomic_spin_unlock_irqrestore(addr, flags);

	return oldbit;
	return (oldbit & mask) ? 1 : 0;
}

static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long * address)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	int oldbit;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long *addr = (unsigned long *)address + (nr >> SHIFT_PER_LONG);
	unsigned long oldbit;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	oldbit = (*addr & mask) ? 1 : 0;
	*addr &= ~mask;
	oldbit = *addr;
	*addr = oldbit & ~mask;

	return oldbit;
	return (oldbit & mask) ? 1 : 0;
}

static __inline__ int test_and_change_bit(int nr, volatile unsigned long * address)
static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	int oldbit;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long oldbit;
	unsigned long flags;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	_atomic_spin_lock_irqsave(addr, flags);
	oldbit = (*addr & mask) ? 1 : 0;
	*addr ^= mask;
	oldbit = *addr;
	*addr = oldbit ^ mask;
	_atomic_spin_unlock_irqrestore(addr, flags);

	return oldbit;
	return (oldbit & mask) ? 1 : 0;
}

static __inline__ int __test_and_change_bit(int nr, volatile unsigned long * address)
{
	unsigned long mask;
	unsigned long *addr = (unsigned long *) address;
	int oldbit;
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	unsigned long *addr = (unsigned long *)address + (nr >> SHIFT_PER_LONG);
	unsigned long oldbit;

	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	oldbit = (*addr & mask) ? 1 : 0;
	*addr ^= mask;
	oldbit = *addr;
	*addr = oldbit ^ mask;

	return oldbit;
	return (oldbit & mask) ? 1 : 0;
}

static __inline__ int test_bit(int nr, const volatile unsigned long *address)
{
	unsigned long mask;
	const unsigned long *addr = (const unsigned long *)address;
	
	addr += (nr >> SHIFT_PER_LONG);
	mask = 1L << CHOP_SHIFTCOUNT(nr);
	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
	const unsigned long *addr = (const unsigned long *)address + (nr >> SHIFT_PER_LONG);
	
	return !!(*addr & mask);
}
@@ -229,7 +193,7 @@ static __inline__ unsigned long __ffs(unsigned long x)
	unsigned long ret;

	__asm__(
#if BITS_PER_LONG > 32
#ifdef __LP64__
		" ldi       63,%1\n"
		" extrd,u,*<>  %0,63,32,%%r0\n"
		" extrd,u,*TR  %0,31,32,%0\n"	/* move top 32-bits down */
@@ -304,14 +268,7 @@ static __inline__ int fls(int x)
 * hweightN: returns the hamming weight (i.e. the number
 * of bits set) of a N-bit word
 */
#define hweight64(x)						\
({								\
	unsigned long __x = (x);				\
	unsigned int __w;					\
	__w = generic_hweight32((unsigned int) __x);		\
	__w += generic_hweight32((unsigned int) (__x>>32));	\
	__w;							\
})
#define hweight64(x) generic_hweight64(x)
#define hweight32(x) generic_hweight32(x)
#define hweight16(x) generic_hweight16(x)
#define hweight8(x) generic_hweight8(x)
@@ -324,7 +281,13 @@ static __inline__ int fls(int x)
 */
static inline int sched_find_first_bit(const unsigned long *b)
{
#ifndef __LP64__
#ifdef __LP64__
	if (unlikely(b[0]))
		return __ffs(b[0]);
	if (unlikely(b[1]))
		return __ffs(b[1]) + 64;
	return __ffs(b[2]) + 128;
#else
	if (unlikely(b[0]))
		return __ffs(b[0]);
	if (unlikely(b[1]))
@@ -334,14 +297,6 @@ static inline int sched_find_first_bit(const unsigned long *b)
	if (b[3])
		return __ffs(b[3]) + 96;
	return __ffs(b[4]) + 128;
#else
	if (unlikely(b[0]))
		return __ffs(b[0]);
	if (unlikely(((unsigned int)b[1])))
		return __ffs(b[1]) + 64;
	if (b[1] >> 32)
		return __ffs(b[1] >> 32) + 96;
	return __ffs(b[2]) + 128;
#endif
}

@@ -391,7 +346,7 @@ found_middle:

static __inline__ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + (offset >> 6);
	const unsigned long *p = addr + (offset >> SHIFT_PER_LONG);
	unsigned long result = offset & ~(BITS_PER_LONG-1);
	unsigned long tmp;

@@ -445,71 +400,90 @@ found_middle:
 * test_and_{set,clear}_bit guarantee atomicity without
 * disabling interrupts.
 */
#ifdef __LP64__
#define ext2_set_bit(nr, addr)		__test_and_set_bit((nr) ^ 0x38, (unsigned long *)addr)
#define ext2_set_bit_atomic(l,nr,addr)  test_and_set_bit((nr) ^ 0x38, (unsigned long *)addr)
#define ext2_clear_bit(nr, addr)	__test_and_clear_bit((nr) ^ 0x38, (unsigned long *)addr)
#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x38, (unsigned long *)addr)
#else
#define ext2_set_bit(nr, addr)		__test_and_set_bit((nr) ^ 0x18, (unsigned long *)addr)
#define ext2_set_bit_atomic(l,nr,addr)  test_and_set_bit((nr) ^ 0x18, (unsigned long *)addr)
#define ext2_clear_bit(nr, addr)	__test_and_clear_bit((nr) ^ 0x18, (unsigned long *)addr)
#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x18, (unsigned long *)addr)
#endif

#endif	/* __KERNEL__ */
/* '3' is bits per byte */
#define LE_BYTE_ADDR ((sizeof(unsigned long) - 1) << 3)

static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
{
	__const__ unsigned char	*ADDR = (__const__ unsigned char *) addr;
#define ext2_test_bit(nr, addr) \
			test_bit((nr)	^ LE_BYTE_ADDR, (unsigned long *)addr)
#define ext2_set_bit(nr, addr)	\
		__test_and_set_bit((nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
#define ext2_clear_bit(nr, addr) \
		__test_and_clear_bit((nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)

	return (ADDR[nr >> 3] >> (nr & 7)) & 1;
}
#define ext2_set_bit_atomic(l,nr,addr) \
		test_and_set_bit((nr)   ^ LE_BYTE_ADDR, (unsigned long *)addr)
#define ext2_clear_bit_atomic(l,nr,addr) \
		test_and_clear_bit( (nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)

#endif	/* __KERNEL__ */

/*
 * This implementation of ext2_find_{first,next}_zero_bit was stolen from
 * Linus' asm-alpha/bitops.h and modified for a big-endian machine.
 */

#define ext2_find_first_zero_bit(addr, size) \
	ext2_find_next_zero_bit((addr), (size), 0)

extern __inline__ unsigned long ext2_find_next_zero_bit(void *addr,
	unsigned long size, unsigned long offset)
/* include/linux/byteorder does not support "unsigned long" type */
static inline unsigned long ext2_swabp(unsigned long * x)
{
#ifdef __LP64__
	return (unsigned long) __swab64p((u64 *) x);
#else
	return (unsigned long) __swab32p((u32 *) x);
#endif
}

/* include/linux/byteorder doesn't support "unsigned long" type */
static inline unsigned long ext2_swab(unsigned long y)
{
#ifdef __LP64__
	return (unsigned long) __swab64((u64) y);
#else
	return (unsigned long) __swab32((u32) y);
#endif
}

static __inline__ unsigned long ext2_find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
{
	unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
	unsigned int result = offset & ~31UL;
	unsigned int tmp;
	unsigned long *p = (unsigned long *) addr + (offset >> SHIFT_PER_LONG);
	unsigned long result = offset & ~(BITS_PER_LONG - 1);
	unsigned long tmp;

	if (offset >= size)
		return size;
	size -= result;
	offset &= 31UL;
	offset &= (BITS_PER_LONG - 1UL);
	if (offset) {
		tmp = cpu_to_le32p(p++);
		tmp |= ~0UL >> (32-offset);
		if (size < 32)
		tmp = ext2_swabp(p++);
		tmp |= (~0UL >> (BITS_PER_LONG - offset));
		if (size < BITS_PER_LONG)
			goto found_first;
		if (tmp != ~0U)
		if (~tmp)
			goto found_middle;
		size -= 32;
		result += 32;
		size -= BITS_PER_LONG;
		result += BITS_PER_LONG;
	}
	while (size >= 32) {
		if ((tmp = cpu_to_le32p(p++)) != ~0U)
			goto found_middle;
		result += 32;
		size -= 32;

	while (size & ~(BITS_PER_LONG - 1)) {
		if (~(tmp = *(p++)))
			goto found_middle_swap;
		result += BITS_PER_LONG;
		size -= BITS_PER_LONG;
	}
	if (!size)
		return result;
	tmp = cpu_to_le32p(p);
	tmp = ext2_swabp(p);
found_first:
	tmp |= ~0U << size;
	tmp |= ~0UL << size;
	if (tmp == ~0UL)	/* Are any bits zero? */
		return result + size; /* Nope. Skip ffz */
found_middle:
	return result + ffz(tmp);

found_middle_swap:
	return result + ffz(ext2_swab(tmp));
}


/* Bitmap functions for the minix filesystem.  */
#define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr)
#define minix_set_bit(nr,addr) ((void)ext2_set_bit(nr,addr))
+2 −0
Original line number Diff line number Diff line
@@ -33,8 +33,10 @@ typedef unsigned long long __u64;

#ifdef __LP64__
#define BITS_PER_LONG 64
#define SHIFT_PER_LONG 6
#else
#define BITS_PER_LONG 32
#define SHIFT_PER_LONG 5
#endif

#ifndef __ASSEMBLY__