summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHannes Frederic Sowa <hannes@stressinduktion.org>2014-11-05 00:23:04 +0100
committerDavid S. Miller <davem@davemloft.net>2014-11-05 22:01:21 -0500
commite5a2c899957659cd1a9f789bc462f9c0b35f5150 (patch)
tree4c9b8a6f89d961daf9ada9f5ee95f8b371ce3a04
parent2c99cd914d4fed9160d98849c9dd38034616768e (diff)
downloadlwn-e5a2c899957659cd1a9f789bc462f9c0b35f5150.tar.gz
lwn-e5a2c899957659cd1a9f789bc462f9c0b35f5150.zip
fast_hash: avoid indirect function calls
By default the arch_fast_hash hashing function pointers are initialized to jhash(2). If during boot-up a CPU with SSE4.2 is detected they get updated to the CRC32 ones. This dispatching scheme incurs a function pointer lookup and indirect call for every hashing operation. rhashtable as a user of arch_fast_hash e.g. stores pointers to hashing functions in its structure, too, causing two indirect branches per hashing operation. Using alternative_call we can get away with one of those indirect branches. Acked-by: Daniel Borkmann <dborkman@redhat.com> Cc: Thomas Graf <tgraf@suug.ch> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/x86/include/asm/hash.h51
-rw-r--r--arch/x86/lib/hash.c29
-rw-r--r--include/asm-generic/hash.h36
-rw-r--r--include/linux/hash.h34
-rw-r--r--lib/Makefile2
-rw-r--r--lib/hash.c39
6 files changed, 98 insertions, 93 deletions
diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h
index e8c58f88b1d4..a881d784f044 100644
--- a/arch/x86/include/asm/hash.h
+++ b/arch/x86/include/asm/hash.h
@@ -1,7 +1,48 @@
-#ifndef _ASM_X86_HASH_H
-#define _ASM_X86_HASH_H
+#ifndef __ASM_X86_HASH_H
+#define __ASM_X86_HASH_H
-struct fast_hash_ops;
-extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
+#include <linux/cpufeature.h>
+#include <asm/alternative.h>
-#endif /* _ASM_X86_HASH_H */
+u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed);
+u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed);
+
+/*
+ * non-inline versions of jhash so gcc does not need to generate
+ * duplicate code in every object file
+ */
+u32 __jhash(const void *data, u32 len, u32 seed);
+u32 __jhash2(const u32 *data, u32 len, u32 seed);
+
+/*
+ * for documentation of these functions please look into
+ * <include/asm-generic/hash.h>
+ */
+
+static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+ u32 hash;
+
+ alternative_call(__jhash, __intel_crc4_2_hash, X86_FEATURE_XMM4_2,
+#ifdef CONFIG_X86_64
+ "=a" (hash), "D" (data), "S" (len), "d" (seed));
+#else
+ "=a" (hash), "a" (data), "d" (len), "c" (seed));
+#endif
+ return hash;
+}
+
+static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
+{
+ u32 hash;
+
+ alternative_call(__jhash2, __intel_crc4_2_hash2, X86_FEATURE_XMM4_2,
+#ifdef CONFIG_X86_64
+ "=a" (hash), "D" (data), "S" (len), "d" (seed));
+#else
+ "=a" (hash), "a" (data), "d" (len), "c" (seed));
+#endif
+ return hash;
+}
+
+#endif /* __ASM_X86_HASH_H */
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c
index ff4fa51a5b1f..e14327198835 100644
--- a/arch/x86/lib/hash.c
+++ b/arch/x86/lib/hash.c
@@ -31,13 +31,13 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <linux/hash.h>
-#include <linux/init.h>
-
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/hash.h>
+#include <linux/hash.h>
+#include <linux/jhash.h>
+
static inline u32 crc32_u32(u32 crc, u32 val)
{
#ifdef CONFIG_AS_CRC32
@@ -48,7 +48,7 @@ static inline u32 crc32_u32(u32 crc, u32 val)
return crc;
}
-static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
+u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed)
{
const u32 *p32 = (const u32 *) data;
u32 i, tmp = 0;
@@ -71,22 +71,27 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
return seed;
}
+EXPORT_SYMBOL(__intel_crc4_2_hash);
-static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
+u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
{
- const u32 *p32 = (const u32 *) data;
u32 i;
for (i = 0; i < len; i++)
- seed = crc32_u32(seed, *p32++);
+ seed = crc32_u32(seed, *data++);
return seed;
}
+EXPORT_SYMBOL(__intel_crc4_2_hash2);
-void __init setup_arch_fast_hash(struct fast_hash_ops *ops)
+u32 __jhash(const void *data, u32 len, u32 seed)
{
- if (cpu_has_xmm4_2) {
- ops->hash = intel_crc4_2_hash;
- ops->hash2 = intel_crc4_2_hash2;
- }
+ return jhash(data, len, seed);
+}
+EXPORT_SYMBOL(__jhash);
+
+u32 __jhash2(const u32 *data, u32 len, u32 seed)
+{
+ return jhash2(data, len, seed);
}
+EXPORT_SYMBOL(__jhash2);
diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h
index b6312843dbd9..3c82760ff2a4 100644
--- a/include/asm-generic/hash.h
+++ b/include/asm-generic/hash.h
@@ -1,9 +1,41 @@
#ifndef __ASM_GENERIC_HASH_H
#define __ASM_GENERIC_HASH_H
-struct fast_hash_ops;
-static inline void setup_arch_fast_hash(struct fast_hash_ops *ops)
+#include <linux/jhash.h>
+
+/**
+ * arch_fast_hash - Caclulates a hash over a given buffer that can have
+ * arbitrary size. This function will eventually use an
+ * architecture-optimized hashing implementation if
+ * available, and trades off distribution for speed.
+ *
+ * @data: buffer to hash
+ * @len: length of buffer in bytes
+ * @seed: start seed
+ *
+ * Returns 32bit hash.
+ */
+static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+ return jhash(data, len, seed);
+}
+
+/**
+ * arch_fast_hash2 - Caclulates a hash over a given buffer that has a
+ * size that is of a multiple of 32bit words. This
+ * function will eventually use an architecture-
+ * optimized hashing implementation if available,
+ * and trades off distribution for speed.
+ *
+ * @data: buffer to hash (must be 32bit padded)
+ * @len: number of 32bit words
+ * @seed: start seed
+ *
+ * Returns 32bit hash.
+ */
+static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
{
+ return jhash2(data, len, seed);
}
#endif /* __ASM_GENERIC_HASH_H */
diff --git a/include/linux/hash.h b/include/linux/hash.h
index d0494c399392..6e8fb028848c 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -84,38 +84,4 @@ static inline u32 hash32_ptr(const void *ptr)
return (u32)val;
}
-struct fast_hash_ops {
- u32 (*hash)(const void *data, u32 len, u32 seed);
- u32 (*hash2)(const u32 *data, u32 len, u32 seed);
-};
-
-/**
- * arch_fast_hash - Caclulates a hash over a given buffer that can have
- * arbitrary size. This function will eventually use an
- * architecture-optimized hashing implementation if
- * available, and trades off distribution for speed.
- *
- * @data: buffer to hash
- * @len: length of buffer in bytes
- * @seed: start seed
- *
- * Returns 32bit hash.
- */
-extern u32 arch_fast_hash(const void *data, u32 len, u32 seed);
-
-/**
- * arch_fast_hash2 - Caclulates a hash over a given buffer that has a
- * size that is of a multiple of 32bit words. This
- * function will eventually use an architecture-
- * optimized hashing implementation if available,
- * and trades off distribution for speed.
- *
- * @data: buffer to hash (must be 32bit padded)
- * @len: number of 32bit words
- * @seed: start seed
- *
- * Returns 32bit hash.
- */
-extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed);
-
#endif /* _LINUX_HASH_H */
diff --git a/lib/Makefile b/lib/Makefile
index 7512dc978f18..04e53dd16070 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
- percpu-refcount.o percpu_ida.o hash.o rhashtable.o
+ percpu-refcount.o percpu_ida.o rhashtable.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
obj-y += kstrtox.o
diff --git a/lib/hash.c b/lib/hash.c
deleted file mode 100644
index fea973f4bd57..000000000000
--- a/lib/hash.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* General purpose hashing library
- *
- * That's a start of a kernel hashing library, which can be extended
- * with further algorithms in future. arch_fast_hash{2,}() will
- * eventually resolve to an architecture optimized implementation.
- *
- * Copyright 2013 Francesco Fusco <ffusco@redhat.com>
- * Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
- * Copyright 2013 Thomas Graf <tgraf@redhat.com>
- * Licensed under the GNU General Public License, version 2.0 (GPLv2)
- */
-
-#include <linux/jhash.h>
-#include <linux/hash.h>
-#include <linux/cache.h>
-
-static struct fast_hash_ops arch_hash_ops __read_mostly = {
- .hash = jhash,
- .hash2 = jhash2,
-};
-
-u32 arch_fast_hash(const void *data, u32 len, u32 seed)
-{
- return arch_hash_ops.hash(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash);
-
-u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
-{
- return arch_hash_ops.hash2(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash2);
-
-static int __init hashlib_init(void)
-{
- setup_arch_fast_hash(&arch_hash_ops);
- return 0;
-}
-early_initcall(hashlib_init);