From dc0266a105b634e63840254a5b7320a1b59df13b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 17 May 2021 12:11:56 +0200
Subject: [PATCH] 5.12-stable patches

added patches:
	mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
	mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
	mips-reinstate-platform-__div64_32-handler.patch
	mm-fix-struct-page-layout-on-32-bit-systems.patch
---
 ...n-__div64_32-is-result-would-be-zero.patch |  43 +++++
 ...dcoded-divu-in-__div64_32-altogether.patch |  75 ++++++++
 ...einstate-platform-__div64_32-handler.patch | 161 ++++++++++++++++++
 ...struct-page-layout-on-32-bit-systems.patch | 116 +++++++++++++
 queue-5.12/series                             |   4 +
 5 files changed, 399 insertions(+)
 create mode 100644 queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
 create mode 100644 queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
 create mode 100644 queue-5.12/mips-reinstate-platform-__div64_32-handler.patch
 create mode 100644 queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch

diff --git a/queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch b/queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
new file mode 100644
index 00000000000..1305d9937bd
--- /dev/null
+++ b/queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
@@ -0,0 +1,43 @@
+From c1d337d45ec0a802299688e17d568c4e3a585895 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:48 +0200
+Subject: MIPS: Avoid DIVU in `__div64_32' is result would be zero
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c1d337d45ec0a802299688e17d568c4e3a585895 upstream.
+
+We already check the high part of the divident against zero to avoid the
+costly DIVU instruction in that case, needed to reduce the high part of
+the divident, so we may well check against the divisor instead and set
+the high part of the quotient to zero right away.  We need to treat the
+high part the divident in that case though as the remainder that would
+be calculated by the DIVU instruction we avoided.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0445s and 0.2619s from 1.0668s
+and 0.2629s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -68,9 +68,11 @@
+ 									\
+ 	__high = __div >> 32;						\
+ 	__low = __div;							\
+-	__upper = __high;						\
+ 									\
+-	if (__high) {							\
++	if (__high < __radix) {						\
++		__upper = __high;					\
++		__high = 0;						\
++	} else {							\
+ 		__asm__("divu	$0, %z1, %z2"				\
+ 		: "=x" (__modquot)					\
+ 		: "Jr" (__high), "Jr" (__radix));			\
diff --git a/queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch b/queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
new file mode 100644
index 00000000000..6c01ec7853e
--- /dev/null
+++ b/queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
@@ -0,0 +1,75 @@
+From 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Thu, 22 Apr 2021 22:36:12 +0200
+Subject: MIPS: Avoid handcoded DIVU in `__div64_32' altogether
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd upstream.
+
+Remove the inline asm with a DIVU instruction from `__div64_32' and use
+plain C code for the intended DIVMOD calculation instead.  GCC is smart
+enough to know that both the quotient and the remainder are calculated
+with single DIVU, so with ISAs up to R5 the same instruction is actually
+produced with overall similar code.
+
+For R6 compiled code will work, but separate DIVU and MODU instructions
+will be produced, which are also interlocked, so scalar implementations
+will likely not perform as well as older ISAs with their asynchronous MD
+unit.  Likely still faster then the generic algorithm though.
+
+This removes a compilation error for R6 however where the original DIVU
+instruction is not supported anymore and the MDU accumulator registers
+have been removed and consequently GCC complains as to a constraint it
+cannot find a register for:
+
+In file included from ./include/linux/math.h:5,
+                 from ./include/linux/kernel.h:13,
+                 from mm/page-writeback.c:15:
+./include/linux/math64.h: In function 'div_u64_rem':
+./arch/mips/include/asm/div64.h:76:17: error: inconsistent operand constraints in an 'asm'
+   76 |                 __asm__("divu   $0, %z1, %z2"                           \
+      |                 ^~~~~~~
+./include/asm-generic/div64.h:245:25: note: in expansion of macro '__div64_32'
+  245 |                 __rem = __div64_32(&(n), __base);       \
+      |                         ^~~~~~~~~~
+./include/linux/math64.h:91:22: note: in expansion of macro 'do_div'
+   91 |         *remainder = do_div(dividend, divisor);
+      |                      ^~~~~~
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0404s from 1.0445s with R3400
+@40MHz.  The module's MIPS I machine code has also shrunk by 12 bytes or
+3 instructions.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -58,7 +58,6 @@
+ 
+ #define __div64_32(n, base) ({						\
+ 	unsigned long __upper, __low, __high, __radix;			\
+-	unsigned long long __modquot;					\
+ 	unsigned long long __quot;					\
+ 	unsigned long long __div;					\
+ 	unsigned long __mod;						\
+@@ -73,11 +72,8 @@
+ 		__upper = __high;					\
+ 		__high = 0;						\
+ 	} else {							\
+-		__asm__("divu	$0, %z1, %z2"				\
+-		: "=x" (__modquot)					\
+-		: "Jr" (__high), "Jr" (__radix));			\
+-		__upper = __modquot >> 32;				\
+-		__high = __modquot;					\
++		__upper = __high % __radix;				\
++		__high /= __radix;					\
+ 	}								\
+ 									\
+ 	__mod = do_div64_32(__low, __upper, __low, __radix);		\
diff --git a/queue-5.12/mips-reinstate-platform-__div64_32-handler.patch b/queue-5.12/mips-reinstate-platform-__div64_32-handler.patch
new file mode 100644
index 00000000000..3a0267d6f28
--- /dev/null
+++ b/queue-5.12/mips-reinstate-platform-__div64_32-handler.patch
@@ -0,0 +1,161 @@
+From c49f71f60754acbff37505e1d16ca796bf8a8140 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:40 +0200
+Subject: MIPS: Reinstate platform `__div64_32' handler
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c49f71f60754acbff37505e1d16ca796bf8a8140 upstream.
+
+Our current MIPS platform `__div64_32' handler is inactive, because it
+is incorrectly only enabled for 64-bit configurations, for which generic
+`do_div' code does not call it anyway.
+
+The handler is not suitable for being called from there though as it
+only calculates 32 bits of the quotient under the assumption the 64-bit
+divident has been suitably reduced.  Code for such reduction used to be
+there, however it has been incorrectly removed with commit c21004cd5b4c
+("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0."), which should
+have only updated an obsoleted constraint for an inline asm involving
+$hi and $lo register outputs, while possibly wiring the original MIPS
+variant of the `do_div' macro as `__div64_32' handler for the generic
+`do_div' implementation
+
+Correct the handler as follows then:
+
+- Revert most of the commit referred, however retaining the current
+  formatting, except for the final two instructions of the inline asm
+  sequence, which the original commit missed.  Omit the original 64-bit
+  parts though.
+
+- Rename the original `do_div' macro to `__div64_32'.  Use the combined
+  `x' constraint referring to the MD accumulator as a whole, replacing
+  the original individual `h' and `l' constraints used for $hi and $lo
+  registers respectively, of which `h' has been obsoleted with GCC 4.4.
+  Update surrounding code accordingly.
+
+  We have since removed support for GCC versions before 4.9, so no need
+  for a special arrangement here; GCC has supported the `x' constraint
+  since forever anyway, or at least going back to 1991.
+
+- Rename the `__base' local variable in `__div64_32' to `__radix' to
+  avoid a conflict with a local variable in `do_div'.
+
+- Actually enable this code for 32-bit rather than 64-bit configurations
+  by qualifying it with BITS_PER_LONG being 32 instead of 64.  Include
+  <asm/bitsperlong.h> for this macro rather than <linux/types.h> as we
+  don't need anything else.
+
+- Finally include <asm-generic/div64.h> last rather than first.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0668s and 0.2629s from 2.1529s
+and 0.5647s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+For a reference 64-bit `do_div' code where we have the DDIVU instruction
+available to do the whole calculation right away averages at 0.0660s for
+the latter CPU.
+
+Fixes: c21004cd5b4c ("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0.")
+Reported-by: Huacai Chen <chenhuacai@kernel.org>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Cc: stable@vger.kernel.org # v2.6.30+
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |   57 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 41 insertions(+), 16 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000, 2004  Maciej W. Rozycki
++ * Copyright (C) 2000, 2004, 2021  Maciej W. Rozycki
+  * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
+  *
+  * This file is subject to the terms and conditions of the GNU General Public
+@@ -9,25 +9,18 @@
+ #ifndef __ASM_DIV64_H
+ #define __ASM_DIV64_H
+ 
+-#include <asm-generic/div64.h>
+-
+-#if BITS_PER_LONG == 64
++#include <asm/bitsperlong.h>
+ 
+-#include <linux/types.h>
++#if BITS_PER_LONG == 32
+ 
+ /*
+  * No traps on overflows for any of these...
+  */
+ 
+-#define __div64_32(n, base)						\
+-({									\
++#define do_div64_32(res, high, low, base) ({				\
+ 	unsigned long __cf, __tmp, __tmp2, __i;				\
+ 	unsigned long __quot32, __mod32;				\
+-	unsigned long __high, __low;					\
+-	unsigned long long __n;						\
+ 									\
+-	__high = *__n >> 32;						\
+-	__low = __n;							\
+ 	__asm__(							\
+ 	"	.set	push					\n"	\
+ 	"	.set	noat					\n"	\
+@@ -51,18 +44,50 @@
+ 	"	subu	%0, %0, %z6				\n"	\
+ 	"	addiu	%2, %2, 1				\n"	\
+ 	"3:							\n"	\
+-	"	bnez	%4, 0b\n\t"					\
+-	"	 srl	%5, %1, 0x1f\n\t"				\
++	"	bnez	%4, 0b					\n"	\
++	"	 srl	%5, %1, 0x1f				\n"	\
+ 	"	.set	pop"						\
+ 	: "=&r" (__mod32), "=&r" (__tmp),				\
+ 	  "=&r" (__quot32), "=&r" (__cf),				\
+ 	  "=&r" (__i), "=&r" (__tmp2)					\
+-	: "Jr" (base), "0" (__high), "1" (__low));			\
++	: "Jr" (base), "0" (high), "1" (low));				\
+ 									\
+-	(__n) = __quot32;						\
++	(res) = __quot32;						\
+ 	__mod32;							\
+ })
+ 
+-#endif /* BITS_PER_LONG == 64 */
++#define __div64_32(n, base) ({						\
++	unsigned long __upper, __low, __high, __radix;			\
++	unsigned long long __modquot;					\
++	unsigned long long __quot;					\
++	unsigned long long __div;					\
++	unsigned long __mod;						\
++									\
++	__div = (*n);							\
++	__radix = (base);						\
++									\
++	__high = __div >> 32;						\
++	__low = __div;							\
++	__upper = __high;						\
++									\
++	if (__high) {							\
++		__asm__("divu	$0, %z1, %z2"				\
++		: "=x" (__modquot)					\
++		: "Jr" (__high), "Jr" (__radix));			\
++		__upper = __modquot >> 32;				\
++		__high = __modquot;					\
++	}								\
++									\
++	__mod = do_div64_32(__low, __upper, __low, __radix);		\
++									\
++	__quot = __high;						\
++	__quot = __quot << 32 | __low;					\
++	(*n) = __quot;							\
++	__mod;								\
++})
++
++#endif /* BITS_PER_LONG == 32 */
++
++#include <asm-generic/div64.h>
+ 
+ #endif /* __ASM_DIV64_H */
diff --git a/queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch b/queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch
new file mode 100644
index 00000000000..dcb629d21f4
--- /dev/null
+++ b/queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch
@@ -0,0 +1,116 @@
+From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Fri, 14 May 2021 17:27:24 -0700
+Subject: mm: fix struct page layout on 32-bit systems
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 upstream.
+
+32-bit architectures which expect 8-byte alignment for 8-byte integers and
+need 64-bit DMA addresses (arm, mips, ppc) had their struct page
+inadvertently expanded in 2019.  When the dma_addr_t was added, it forced
+the alignment of the union to 8 bytes, which inserted a 4 byte gap between
+'flags' and the union.
+
+Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
+This restores the alignment to that of an unsigned long.  We always
+store the low bits in the first word to prevent the PageTail bit from
+being inadvertently set on a big endian platform.  If that happened,
+get_user_pages_fast() racing against a page which was freed and
+reallocated to the page_pool could dereference a bogus compound_head(),
+which would be hard to trace back to this cause.
+
+Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
+Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Tested-by: Matteo Croce <mcroce@linux.microsoft.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm_types.h |    4 ++--
+ include/net/page_pool.h  |   12 +++++++++++-
+ net/core/page_pool.c     |   12 +++++++-----
+ 3 files changed, 20 insertions(+), 8 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -97,10 +97,10 @@ struct page {
+ 		};
+ 		struct {	/* page_pool used by netstack */
+ 			/**
+-			 * @dma_addr: might require a 64-bit value even on
++			 * @dma_addr: might require a 64-bit value on
+ 			 * 32-bit architectures.
+ 			 */
+-			dma_addr_t dma_addr;
++			unsigned long dma_addr[2];
+ 		};
+ 		struct {	/* slab, slob and slub */
+ 			union {
+--- a/include/net/page_pool.h
++++ b/include/net/page_pool.h
+@@ -198,7 +198,17 @@ static inline void page_pool_recycle_dir
+ 
+ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+ {
+-	return page->dma_addr;
++	dma_addr_t ret = page->dma_addr[0];
++	if (sizeof(dma_addr_t) > sizeof(unsigned long))
++		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
++	return ret;
++}
++
++static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
++{
++	page->dma_addr[0] = addr;
++	if (sizeof(dma_addr_t) > sizeof(unsigned long))
++		page->dma_addr[1] = upper_32_bits(addr);
+ }
+ 
+ static inline bool is_page_pool_compiled_in(void)
+--- a/net/core/page_pool.c
++++ b/net/core/page_pool.c
+@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_devic
+ 					  struct page *page,
+ 					  unsigned int dma_sync_size)
+ {
++	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
++
+ 	dma_sync_size = min(dma_sync_size, pool->p.max_len);
+-	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
++	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
+ 					 pool->p.offset, dma_sync_size,
+ 					 pool->p.dma_dir);
+ }
+@@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pa
+ 		put_page(page);
+ 		return NULL;
+ 	}
+-	page->dma_addr = dma;
++	page_pool_set_dma_addr(page, dma);
+ 
+ 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ 		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+@@ -294,13 +296,13 @@ void page_pool_release_page(struct page_
+ 		 */
+ 		goto skip_dma_unmap;
+ 
+-	dma = page->dma_addr;
++	dma = page_pool_get_dma_addr(page);
+ 
+-	/* When page is unmapped, it cannot be returned our pool */
++	/* When page is unmapped, it cannot be returned to our pool */
+ 	dma_unmap_page_attrs(pool->p.dev, dma,
+ 			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+ 			     DMA_ATTR_SKIP_CPU_SYNC);
+-	page->dma_addr = 0;
++	page_pool_set_dma_addr(page, 0);
+ skip_dma_unmap:
+ 	/* This may be the last page returned, releasing the pool, so
+ 	 * it is not safe to reference pool afterwards.
diff --git a/queue-5.12/series b/queue-5.12/series
index 00a2d412427..c96d61ffd1f 100644
--- a/queue-5.12/series
+++ b/queue-5.12/series
@@ -332,3 +332,7 @@ kvm-x86-add-support-for-rdpid-without-rdtscp.patch
 kvm-nvmx-always-make-an-attempt-to-map-evmcs-after-migration.patch
 kvm-vmx-do-not-advertise-rdpid-if-enable_rdtscp-control-is-unsupported.patch
 kvm-vmx-disable-preemption-when-probing-user-return-msrs.patch
+mm-fix-struct-page-layout-on-32-bit-systems.patch
+mips-reinstate-platform-__div64_32-handler.patch
+mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
+mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
-- 
2.47.3