Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e5a2c899 authored by Hannes Frederic Sowa's avatar Hannes Frederic Sowa Committed by David S. Miller
Browse files

fast_hash: avoid indirect function calls



By default the arch_fast_hash hashing function pointers are initialized
to jhash(2). If during boot-up a CPU with SSE4.2 is detected they get
updated to the CRC32 ones. This dispatching scheme incurs a function
pointer lookup and indirect call for every hashing operation.

rhashtable as a user of arch_fast_hash e.g. stores pointers to hashing
functions in its structure, too, causing two indirect branches per
hashing operation.

Using alternative_call we can get away with one of those indirect branches.

Acked-by: default avatarDaniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: default avatarHannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 2c99cd91
Loading
Loading
Loading
Loading
+46 −5
Original line number Diff line number Diff line
#ifndef _ASM_X86_HASH_H
#define _ASM_X86_HASH_H
#ifndef __ASM_X86_HASH_H
#define __ASM_X86_HASH_H

struct fast_hash_ops;
extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
#include <linux/cpufeature.h>
#include <asm/alternative.h>

#endif /* _ASM_X86_HASH_H */
u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed);
u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed);

/*
 * non-inline versions of jhash so gcc does not need to generate
 * duplicate code in every object file
 */
u32 __jhash(const void *data, u32 len, u32 seed);
u32 __jhash2(const u32 *data, u32 len, u32 seed);

/*
 * for documentation of these functions please look into
 * <include/asm-generic/hash.h>
 */

static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
{
	u32 hash;

	alternative_call(__jhash, __intel_crc4_2_hash, X86_FEATURE_XMM4_2,
#ifdef CONFIG_X86_64
			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
#else
			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
#endif
	return hash;
}

static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
{
	u32 hash;

	alternative_call(__jhash2, __intel_crc4_2_hash2, X86_FEATURE_XMM4_2,
#ifdef CONFIG_X86_64
			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
#else
			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
#endif
	return hash;
}

#endif /* __ASM_X86_HASH_H */
+17 −12
Original line number Diff line number Diff line
@@ -31,13 +31,13 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <linux/hash.h>
#include <linux/init.h>

#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/hash.h>

#include <linux/hash.h>
#include <linux/jhash.h>

static inline u32 crc32_u32(u32 crc, u32 val)
{
#ifdef CONFIG_AS_CRC32
@@ -48,7 +48,7 @@ static inline u32 crc32_u32(u32 crc, u32 val)
	return crc;
}

static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed)
{
	const u32 *p32 = (const u32 *) data;
	u32 i, tmp = 0;
@@ -71,22 +71,27 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)

	return seed;
}
EXPORT_SYMBOL(__intel_crc4_2_hash);

static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
{
	const u32 *p32 = (const u32 *) data;
	u32 i;

	for (i = 0; i < len; i++)
		seed = crc32_u32(seed, *p32++);
		seed = crc32_u32(seed, *data++);

	return seed;
}
EXPORT_SYMBOL(__intel_crc4_2_hash2);

void __init setup_arch_fast_hash(struct fast_hash_ops *ops)
u32 __jhash(const void *data, u32 len, u32 seed)
{
	if (cpu_has_xmm4_2) {
		ops->hash  = intel_crc4_2_hash;
		ops->hash2 = intel_crc4_2_hash2;
	return jhash(data, len, seed);
}
EXPORT_SYMBOL(__jhash);

u32 __jhash2(const u32 *data, u32 len, u32 seed)
{
	return jhash2(data, len, seed);
}
EXPORT_SYMBOL(__jhash2);
+34 −2
Original line number Diff line number Diff line
#ifndef __ASM_GENERIC_HASH_H
#define __ASM_GENERIC_HASH_H

struct fast_hash_ops;
static inline void setup_arch_fast_hash(struct fast_hash_ops *ops)
#include <linux/jhash.h>

/**
 *	arch_fast_hash - Caclulates a hash over a given buffer that can have
 *			 arbitrary size. This function will eventually use an
 *			 architecture-optimized hashing implementation if
 *			 available, and trades off distribution for speed.
 *
 *	@data: buffer to hash
 *	@len: length of buffer in bytes
 *	@seed: start seed
 *
 *	Returns 32bit hash.
 */
static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
{
	return jhash(data, len, seed);
}

/**
 *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
 *			  size that is of a multiple of 32bit words. This
 *			  function will eventually use an architecture-
 *			  optimized hashing implementation if available,
 *			  and trades off distribution for speed.
 *
 *	@data: buffer to hash (must be 32bit padded)
 *	@len: number of 32bit words
 *	@seed: start seed
 *
 *	Returns 32bit hash.
 */
static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
{
	return jhash2(data, len, seed);
}

#endif /* __ASM_GENERIC_HASH_H */
+0 −34
Original line number Diff line number Diff line
@@ -84,38 +84,4 @@ static inline u32 hash32_ptr(const void *ptr)
	return (u32)val;
}

struct fast_hash_ops {
	u32 (*hash)(const void *data, u32 len, u32 seed);
	u32 (*hash2)(const u32 *data, u32 len, u32 seed);
};

/**
 *	arch_fast_hash - Caclulates a hash over a given buffer that can have
 *			 arbitrary size. This function will eventually use an
 *			 architecture-optimized hashing implementation if
 *			 available, and trades off distribution for speed.
 *
 *	@data: buffer to hash
 *	@len: length of buffer in bytes
 *	@seed: start seed
 *
 *	Returns 32bit hash.
 */
extern u32 arch_fast_hash(const void *data, u32 len, u32 seed);

/**
 *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
 *			  size that is of a multiple of 32bit words. This
 *			  function will eventually use an architecture-
 *			  optimized hashing implementation if available,
 *			  and trades off distribution for speed.
 *
 *	@data: buffer to hash (must be 32bit padded)
 *	@len: number of 32bit words
 *	@seed: start seed
 *
 *	Returns 32bit hash.
 */
extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed);

#endif /* _LINUX_HASH_H */
+1 −1
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
	 percpu-refcount.o percpu_ida.o hash.o rhashtable.o
	 percpu-refcount.o percpu_ida.o rhashtable.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
obj-y += kstrtox.o
Loading