--- /dev/null
+From b492f7e4e07a28e706db26cf4943bb0911435426 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 3 Nov 2016 16:10:55 +1100
+Subject: powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit b492f7e4e07a28e706db26cf4943bb0911435426 upstream.
+
+These functions compute an IP checksum by computing a 64-bit sum and
+folding it to 32 bits (the "nofold" in their names refers to folding
+down to 16 bits). However, doing (u32) (s + (s >> 32)) is not
+sufficient to fold a 64-bit sum to 32 bits correctly. The addition
+can produce a carry out from bit 31, which needs to be added in to
+the sum to produce the correct result.
+
+To fix this, we copy the from64to32() function from lib/checksum.c
+and use that.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/checksum.h | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/arch/powerpc/include/asm/checksum.h
++++ b/arch/powerpc/include/asm/checksum.h
+@@ -53,17 +53,25 @@ static inline __sum16 csum_fold(__wsum s
+ return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
+ }
+
++static inline u32 from64to32(u64 x)
++{
++ /* add up 32-bit and 32-bit for 32+c bit */
++ x = (x & 0xffffffff) + (x >> 32);
++ /* add up carry.. */
++ x = (x & 0xffffffff) + (x >> 32);
++ return (u32)x;
++}
++
+ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+ {
+ #ifdef __powerpc64__
+- unsigned long s = (__force u32)sum;
++ u64 s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+ s += proto + len;
+- s += (s >> 32);
+- return (__force __wsum) s;
++ return (__force __wsum) from64to32(s);
+ #else
+ __asm__("\n\
+ addc %0,%0,%1 \n\
+@@ -123,8 +131,7 @@ static inline __wsum ip_fast_csum_nofold
+
+ for (i = 0; i < ihl - 1; i++, ptr++)
+ s += *ptr;
+- s += (s >> 32);
+- return (__force __wsum)s;
++ return (__force __wsum)from64to32(s);
+ #else
+ __wsum sum, tmp;
+