--- /dev/null
+From a6e2f029ae34f41adb6ae3812c32c5d326e1abd2 Mon Sep 17 00:00:00 2001
+From: Chris Metcalf <cmetcalf@ezchip.com>
+Date: Wed, 29 Apr 2015 12:48:40 -0400
+Subject: Make asm/word-at-a-time.h available on all architectures
+
+From: Chris Metcalf <cmetcalf@ezchip.com>
+
+commit a6e2f029ae34f41adb6ae3812c32c5d326e1abd2 upstream.
+
+Added the x86 implementation of word-at-a-time to the
+generic version, which previously only supported big-endian.
+
+Omitted the x86-specific load_unaligned_zeropad(), which in
+any case is also not present for the existing BE-only
+implementation of a word-at-a-time, and is only used under
+CONFIG_DCACHE_WORD_ACCESS.
+
+Added as a "generic-y" to the Kbuilds of all architectures
+that didn't previously have it.
+
+Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/include/asm/Kbuild | 1
+ arch/avr32/include/asm/Kbuild | 1
+ arch/blackfin/include/asm/Kbuild | 1
+ arch/c6x/include/asm/Kbuild | 1
+ arch/cris/include/asm/Kbuild | 1
+ arch/frv/include/asm/Kbuild | 1
+ arch/hexagon/include/asm/Kbuild | 1
+ arch/ia64/include/asm/Kbuild | 1
+ arch/m32r/include/asm/Kbuild | 1
+ arch/metag/include/asm/Kbuild | 1
+ arch/microblaze/include/asm/Kbuild | 1
+ arch/mips/include/asm/Kbuild | 1
+ arch/mn10300/include/asm/Kbuild | 1
+ arch/powerpc/include/asm/Kbuild | 1
+ arch/s390/include/asm/Kbuild | 1
+ arch/score/include/asm/Kbuild | 1
+ arch/tile/include/asm/Kbuild | 1
+ arch/um/include/asm/Kbuild | 1
+ arch/unicore32/include/asm/Kbuild | 1
+ arch/xtensa/include/asm/Kbuild | 1
+ include/asm-generic/word-at-a-time.h | 80 +++++++++++++++++++++++++++++++----
+ 21 files changed, 92 insertions(+), 8 deletions(-)
+
+--- a/arch/arc/include/asm/Kbuild
++++ b/arch/arc/include/asm/Kbuild
+@@ -50,4 +50,5 @@ generic-y += types.h
+ generic-y += ucontext.h
+ generic-y += user.h
+ generic-y += vga.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/avr32/include/asm/Kbuild
++++ b/arch/avr32/include/asm/Kbuild
+@@ -21,4 +21,5 @@ generic-y += sections.h
+ generic-y += topology.h
+ generic-y += trace_clock.h
+ generic-y += vga.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/blackfin/include/asm/Kbuild
++++ b/arch/blackfin/include/asm/Kbuild
+@@ -47,4 +47,5 @@ generic-y += types.h
+ generic-y += ucontext.h
+ generic-y += unaligned.h
+ generic-y += user.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/c6x/include/asm/Kbuild
++++ b/arch/c6x/include/asm/Kbuild
+@@ -59,4 +59,5 @@ generic-y += types.h
+ generic-y += ucontext.h
+ generic-y += user.h
+ generic-y += vga.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/cris/include/asm/Kbuild
++++ b/arch/cris/include/asm/Kbuild
+@@ -18,4 +18,5 @@ generic-y += scatterlist.h
+ generic-y += sections.h
+ generic-y += trace_clock.h
+ generic-y += vga.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/frv/include/asm/Kbuild
++++ b/arch/frv/include/asm/Kbuild
+@@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h
+ generic-y += preempt.h
+ generic-y += scatterlist.h
+ generic-y += trace_clock.h
++generic-y += word-at-a-time.h
+--- a/arch/hexagon/include/asm/Kbuild
++++ b/arch/hexagon/include/asm/Kbuild
+@@ -59,4 +59,5 @@ generic-y += types.h
+ generic-y += ucontext.h
+ generic-y += unaligned.h
+ generic-y += vga.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/ia64/include/asm/Kbuild
++++ b/arch/ia64/include/asm/Kbuild
+@@ -9,3 +9,4 @@ generic-y += preempt.h
+ generic-y += scatterlist.h
+ generic-y += trace_clock.h
+ generic-y += vtime.h
++generic-y += word-at-a-time.h
+--- a/arch/m32r/include/asm/Kbuild
++++ b/arch/m32r/include/asm/Kbuild
+@@ -10,3 +10,4 @@ generic-y += preempt.h
+ generic-y += scatterlist.h
+ generic-y += sections.h
+ generic-y += trace_clock.h
++generic-y += word-at-a-time.h
+--- a/arch/metag/include/asm/Kbuild
++++ b/arch/metag/include/asm/Kbuild
+@@ -55,4 +55,5 @@ generic-y += ucontext.h
+ generic-y += unaligned.h
+ generic-y += user.h
+ generic-y += vga.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/microblaze/include/asm/Kbuild
++++ b/arch/microblaze/include/asm/Kbuild
+@@ -11,3 +11,4 @@ generic-y += preempt.h
+ generic-y += scatterlist.h
+ generic-y += syscalls.h
+ generic-y += trace_clock.h
++generic-y += word-at-a-time.h
+--- a/arch/mips/include/asm/Kbuild
++++ b/arch/mips/include/asm/Kbuild
+@@ -18,4 +18,5 @@ generic-y += serial.h
+ generic-y += trace_clock.h
+ generic-y += ucontext.h
+ generic-y += user.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/mn10300/include/asm/Kbuild
++++ b/arch/mn10300/include/asm/Kbuild
+@@ -10,3 +10,4 @@ generic-y += preempt.h
+ generic-y += scatterlist.h
+ generic-y += sections.h
+ generic-y += trace_clock.h
++generic-y += word-at-a-time.h
+--- a/arch/powerpc/include/asm/Kbuild
++++ b/arch/powerpc/include/asm/Kbuild
+@@ -8,3 +8,4 @@ generic-y += rwsem.h
+ generic-y += scatterlist.h
+ generic-y += trace_clock.h
+ generic-y += vtime.h
++generic-y += word-at-a-time.h
+--- a/arch/s390/include/asm/Kbuild
++++ b/arch/s390/include/asm/Kbuild
+@@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h
+ generic-y += preempt.h
+ generic-y += scatterlist.h
+ generic-y += trace_clock.h
++generic-y += word-at-a-time.h
+--- a/arch/score/include/asm/Kbuild
++++ b/arch/score/include/asm/Kbuild
+@@ -14,3 +14,4 @@ generic-y += sections.h
+ generic-y += trace_clock.h
+ generic-y += xor.h
+ generic-y += serial.h
++generic-y += word-at-a-time.h
+--- a/arch/tile/include/asm/Kbuild
++++ b/arch/tile/include/asm/Kbuild
+@@ -41,4 +41,5 @@ generic-y += termbits.h
+ generic-y += termios.h
+ generic-y += trace_clock.h
+ generic-y += types.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/um/include/asm/Kbuild
++++ b/arch/um/include/asm/Kbuild
+@@ -27,4 +27,5 @@ generic-y += sections.h
+ generic-y += switch_to.h
+ generic-y += topology.h
+ generic-y += trace_clock.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/unicore32/include/asm/Kbuild
++++ b/arch/unicore32/include/asm/Kbuild
+@@ -63,4 +63,5 @@ generic-y += ucontext.h
+ generic-y += unaligned.h
+ generic-y += user.h
+ generic-y += vga.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/arch/xtensa/include/asm/Kbuild
++++ b/arch/xtensa/include/asm/Kbuild
+@@ -30,4 +30,5 @@ generic-y += statfs.h
+ generic-y += termios.h
+ generic-y += topology.h
+ generic-y += trace_clock.h
++generic-y += word-at-a-time.h
+ generic-y += xor.h
+--- a/include/asm-generic/word-at-a-time.h
++++ b/include/asm-generic/word-at-a-time.h
+@@ -1,15 +1,10 @@
+ #ifndef _ASM_WORD_AT_A_TIME_H
+ #define _ASM_WORD_AT_A_TIME_H
+
+-/*
+- * This says "generic", but it's actually big-endian only.
+- * Little-endian can use more efficient versions of these
+- * interfaces, see for example
+- * arch/x86/include/asm/word-at-a-time.h
+- * for those.
+- */
+-
+ #include <linux/kernel.h>
++#include <asm/byteorder.h>
++
++#ifdef __BIG_ENDIAN
+
+ struct word_at_a_time {
+ const unsigned long high_bits, low_bits;
+@@ -53,4 +48,73 @@ static inline bool has_zero(unsigned lon
+ #define zero_bytemask(mask) (~1ul << __fls(mask))
+ #endif
+
++#else
++
++/*
++ * The optimal byte mask counting is probably going to be something
++ * that is architecture-specific. If you have a reliably fast
++ * bit count instruction, that might be better than the multiply
++ * and shift, for example.
++ */
++struct word_at_a_time {
++ const unsigned long one_bits, high_bits;
++};
++
++#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
++
++#ifdef CONFIG_64BIT
++
++/*
++ * Jan Achrenius on G+: microoptimized version of
++ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
++ * that works for the bytemasks without having to
++ * mask them first.
++ */
++static inline long count_masked_bytes(unsigned long mask)
++{
++ return mask*0x0001020304050608ul >> 56;
++}
++
++#else /* 32-bit case */
++
++/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
++static inline long count_masked_bytes(long mask)
++{
++ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
++ long a = (0x0ff0001+mask) >> 23;
++ /* Fix the 1 for 00 case */
++ return a & mask;
++}
++
++#endif
++
++/* Return nonzero if it has a zero */
++static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
++{
++ unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
++ *bits = mask;
++ return mask;
++}
++
++static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
++{
++ return bits;
++}
++
++static inline unsigned long create_zero_mask(unsigned long bits)
++{
++ bits = (bits - 1) & ~bits;
++ return bits >> 7;
++}
++
++/* The mask we created is directly usable as a bytemask */
++#define zero_bytemask(mask) (mask)
++
++static inline unsigned long find_zero(unsigned long mask)
++{
++ return count_masked_bytes(mask);
++}
++
++#endif /* __BIG_ENDIAN */
++
+ #endif /* _ASM_WORD_AT_A_TIME_H */