--- /dev/null
+From c1d337d45ec0a802299688e17d568c4e3a585895 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:48 +0200
+Subject: MIPS: Avoid DIVU in `__div64_32' is result would be zero
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c1d337d45ec0a802299688e17d568c4e3a585895 upstream.
+
+We already check the high part of the divident against zero to avoid the
+costly DIVU instruction in that case, needed to reduce the high part of
+the divident, so we may well check against the divisor instead and set
+the high part of the quotient to zero right away. We need to treat the
+high part the divident in that case though as the remainder that would
+be calculated by the DIVU instruction we avoided.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0445s and 0.2619s from 1.0668s
+and 0.2629s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -68,9 +68,11 @@
+ \
+ __high = __div >> 32; \
+ __low = __div; \
+- __upper = __high; \
+ \
+- if (__high) { \
++ if (__high < __radix) { \
++ __upper = __high; \
++ __high = 0; \
++ } else { \
+ __asm__("divu $0, %z1, %z2" \
+ : "=x" (__modquot) \
+ : "Jr" (__high), "Jr" (__radix)); \
--- /dev/null
+From 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Thu, 22 Apr 2021 22:36:12 +0200
+Subject: MIPS: Avoid handcoded DIVU in `__div64_32' altogether
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd upstream.
+
+Remove the inline asm with a DIVU instruction from `__div64_32' and use
+plain C code for the intended DIVMOD calculation instead. GCC is smart
+enough to know that both the quotient and the remainder are calculated
+with single DIVU, so with ISAs up to R5 the same instruction is actually
+produced with overall similar code.
+
+For R6 compiled code will work, but separate DIVU and MODU instructions
+will be produced, which are also interlocked, so scalar implementations
+will likely not perform as well as older ISAs with their asynchronous MD
+unit. Likely still faster then the generic algorithm though.
+
+This removes a compilation error for R6 however where the original DIVU
+instruction is not supported anymore and the MDU accumulator registers
+have been removed and consequently GCC complains as to a constraint it
+cannot find a register for:
+
+In file included from ./include/linux/math.h:5,
+ from ./include/linux/kernel.h:13,
+ from mm/page-writeback.c:15:
+./include/linux/math64.h: In function 'div_u64_rem':
+./arch/mips/include/asm/div64.h:76:17: error: inconsistent operand constraints in an 'asm'
+ 76 | __asm__("divu $0, %z1, %z2" \
+ | ^~~~~~~
+./include/asm-generic/div64.h:245:25: note: in expansion of macro '__div64_32'
+ 245 | __rem = __div64_32(&(n), __base); \
+ | ^~~~~~~~~~
+./include/linux/math64.h:91:22: note: in expansion of macro 'do_div'
+ 91 | *remainder = do_div(dividend, divisor);
+ | ^~~~~~
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0404s from 1.0445s with R3400
+@40MHz. The module's MIPS I machine code has also shrunk by 12 bytes or
+3 instructions.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -58,7 +58,6 @@
+
+ #define __div64_32(n, base) ({ \
+ unsigned long __upper, __low, __high, __radix; \
+- unsigned long long __modquot; \
+ unsigned long long __quot; \
+ unsigned long long __div; \
+ unsigned long __mod; \
+@@ -73,11 +72,8 @@
+ __upper = __high; \
+ __high = 0; \
+ } else { \
+- __asm__("divu $0, %z1, %z2" \
+- : "=x" (__modquot) \
+- : "Jr" (__high), "Jr" (__radix)); \
+- __upper = __modquot >> 32; \
+- __high = __modquot; \
++ __upper = __high % __radix; \
++ __high /= __radix; \
+ } \
+ \
+ __mod = do_div64_32(__low, __upper, __low, __radix); \
--- /dev/null
+From c49f71f60754acbff37505e1d16ca796bf8a8140 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:40 +0200
+Subject: MIPS: Reinstate platform `__div64_32' handler
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c49f71f60754acbff37505e1d16ca796bf8a8140 upstream.
+
+Our current MIPS platform `__div64_32' handler is inactive, because it
+is incorrectly only enabled for 64-bit configurations, for which generic
+`do_div' code does not call it anyway.
+
+The handler is not suitable for being called from there though as it
+only calculates 32 bits of the quotient under the assumption the 64-bit
+divident has been suitably reduced. Code for such reduction used to be
+there, however it has been incorrectly removed with commit c21004cd5b4c
+("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0."), which should
+have only updated an obsoleted constraint for an inline asm involving
+$hi and $lo register outputs, while possibly wiring the original MIPS
+variant of the `do_div' macro as `__div64_32' handler for the generic
+`do_div' implementation
+
+Correct the handler as follows then:
+
+- Revert most of the commit referred, however retaining the current
+ formatting, except for the final two instructions of the inline asm
+ sequence, which the original commit missed. Omit the original 64-bit
+ parts though.
+
+- Rename the original `do_div' macro to `__div64_32'. Use the combined
+ `x' constraint referring to the MD accumulator as a whole, replacing
+ the original individual `h' and `l' constraints used for $hi and $lo
+ registers respectively, of which `h' has been obsoleted with GCC 4.4.
+ Update surrounding code accordingly.
+
+ We have since removed support for GCC versions before 4.9, so no need
+ for a special arrangement here; GCC has supported the `x' constraint
+ since forever anyway, or at least going back to 1991.
+
+- Rename the `__base' local variable in `__div64_32' to `__radix' to
+ avoid a conflict with a local variable in `do_div'.
+
+- Actually enable this code for 32-bit rather than 64-bit configurations
+ by qualifying it with BITS_PER_LONG being 32 instead of 64. Include
+ <asm/bitsperlong.h> for this macro rather than <linux/types.h> as we
+ don't need anything else.
+
+- Finally include <asm-generic/div64.h> last rather than first.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0668s and 0.2629s from 2.1529s
+and 0.5647s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+For a reference 64-bit `do_div' code where we have the DDIVU instruction
+available to do the whole calculation right away averages at 0.0660s for
+the latter CPU.
+
+Fixes: c21004cd5b4c ("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0.")
+Reported-by: Huacai Chen <chenhuacai@kernel.org>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Cc: stable@vger.kernel.org # v2.6.30+
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h | 57 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 41 insertions(+), 16 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000, 2004 Maciej W. Rozycki
++ * Copyright (C) 2000, 2004, 2021 Maciej W. Rozycki
+ * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+@@ -9,25 +9,18 @@
+ #ifndef __ASM_DIV64_H
+ #define __ASM_DIV64_H
+
+-#include <asm-generic/div64.h>
+-
+-#if BITS_PER_LONG == 64
++#include <asm/bitsperlong.h>
+
+-#include <linux/types.h>
++#if BITS_PER_LONG == 32
+
+ /*
+ * No traps on overflows for any of these...
+ */
+
+-#define __div64_32(n, base) \
+-({ \
++#define do_div64_32(res, high, low, base) ({ \
+ unsigned long __cf, __tmp, __tmp2, __i; \
+ unsigned long __quot32, __mod32; \
+- unsigned long __high, __low; \
+- unsigned long long __n; \
+ \
+- __high = *__n >> 32; \
+- __low = __n; \
+ __asm__( \
+ " .set push \n" \
+ " .set noat \n" \
+@@ -51,18 +44,50 @@
+ " subu %0, %0, %z6 \n" \
+ " addiu %2, %2, 1 \n" \
+ "3: \n" \
+- " bnez %4, 0b\n\t" \
+- " srl %5, %1, 0x1f\n\t" \
++ " bnez %4, 0b \n" \
++ " srl %5, %1, 0x1f \n" \
+ " .set pop" \
+ : "=&r" (__mod32), "=&r" (__tmp), \
+ "=&r" (__quot32), "=&r" (__cf), \
+ "=&r" (__i), "=&r" (__tmp2) \
+- : "Jr" (base), "0" (__high), "1" (__low)); \
++ : "Jr" (base), "0" (high), "1" (low)); \
+ \
+- (__n) = __quot32; \
++ (res) = __quot32; \
+ __mod32; \
+ })
+
+-#endif /* BITS_PER_LONG == 64 */
++#define __div64_32(n, base) ({ \
++ unsigned long __upper, __low, __high, __radix; \
++ unsigned long long __modquot; \
++ unsigned long long __quot; \
++ unsigned long long __div; \
++ unsigned long __mod; \
++ \
++ __div = (*n); \
++ __radix = (base); \
++ \
++ __high = __div >> 32; \
++ __low = __div; \
++ __upper = __high; \
++ \
++ if (__high) { \
++ __asm__("divu $0, %z1, %z2" \
++ : "=x" (__modquot) \
++ : "Jr" (__high), "Jr" (__radix)); \
++ __upper = __modquot >> 32; \
++ __high = __modquot; \
++ } \
++ \
++ __mod = do_div64_32(__low, __upper, __low, __radix); \
++ \
++ __quot = __high; \
++ __quot = __quot << 32 | __low; \
++ (*n) = __quot; \
++ __mod; \
++})
++
++#endif /* BITS_PER_LONG == 32 */
++
++#include <asm-generic/div64.h>
+
+ #endif /* __ASM_DIV64_H */
--- /dev/null
+From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Fri, 14 May 2021 17:27:24 -0700
+Subject: mm: fix struct page layout on 32-bit systems
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 upstream.
+
+32-bit architectures which expect 8-byte alignment for 8-byte integers and
+need 64-bit DMA addresses (arm, mips, ppc) had their struct page
+inadvertently expanded in 2019. When the dma_addr_t was added, it forced
+the alignment of the union to 8 bytes, which inserted a 4 byte gap between
+'flags' and the union.
+
+Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
+This restores the alignment to that of an unsigned long. We always
+store the low bits in the first word to prevent the PageTail bit from
+being inadvertently set on a big endian platform. If that happened,
+get_user_pages_fast() racing against a page which was freed and
+reallocated to the page_pool could dereference a bogus compound_head(),
+which would be hard to trace back to this cause.
+
+Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
+Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Tested-by: Matteo Croce <mcroce@linux.microsoft.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm_types.h | 4 ++--
+ include/net/page_pool.h | 12 +++++++++++-
+ net/core/page_pool.c | 12 +++++++-----
+ 3 files changed, 20 insertions(+), 8 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -97,10 +97,10 @@ struct page {
+ };
+ struct { /* page_pool used by netstack */
+ /**
+- * @dma_addr: might require a 64-bit value even on
++ * @dma_addr: might require a 64-bit value on
+ * 32-bit architectures.
+ */
+- dma_addr_t dma_addr;
++ unsigned long dma_addr[2];
+ };
+ struct { /* slab, slob and slub */
+ union {
+--- a/include/net/page_pool.h
++++ b/include/net/page_pool.h
+@@ -198,7 +198,17 @@ static inline void page_pool_recycle_dir
+
+ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+ {
+- return page->dma_addr;
++ dma_addr_t ret = page->dma_addr[0];
++ if (sizeof(dma_addr_t) > sizeof(unsigned long))
++ ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
++ return ret;
++}
++
++static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
++{
++ page->dma_addr[0] = addr;
++ if (sizeof(dma_addr_t) > sizeof(unsigned long))
++ page->dma_addr[1] = upper_32_bits(addr);
+ }
+
+ static inline bool is_page_pool_compiled_in(void)
+--- a/net/core/page_pool.c
++++ b/net/core/page_pool.c
+@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_devic
+ struct page *page,
+ unsigned int dma_sync_size)
+ {
++ dma_addr_t dma_addr = page_pool_get_dma_addr(page);
++
+ dma_sync_size = min(dma_sync_size, pool->p.max_len);
+- dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
++ dma_sync_single_range_for_device(pool->p.dev, dma_addr,
+ pool->p.offset, dma_sync_size,
+ pool->p.dma_dir);
+ }
+@@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pa
+ put_page(page);
+ return NULL;
+ }
+- page->dma_addr = dma;
++ page_pool_set_dma_addr(page, dma);
+
+ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+@@ -294,13 +296,13 @@ void page_pool_release_page(struct page_
+ */
+ goto skip_dma_unmap;
+
+- dma = page->dma_addr;
++ dma = page_pool_get_dma_addr(page);
+
+- /* When page is unmapped, it cannot be returned our pool */
++ /* When page is unmapped, it cannot be returned to our pool */
+ dma_unmap_page_attrs(pool->p.dev, dma,
+ PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+- page->dma_addr = 0;
++ page_pool_set_dma_addr(page, 0);
+ skip_dma_unmap:
+ /* This may be the last page returned, releasing the pool, so
+ * it is not safe to reference pool afterwards.
kvm-nvmx-always-make-an-attempt-to-map-evmcs-after-migration.patch
kvm-vmx-do-not-advertise-rdpid-if-enable_rdtscp-control-is-unsupported.patch
kvm-vmx-disable-preemption-when-probing-user-return-msrs.patch
+mm-fix-struct-page-layout-on-32-bit-systems.patch
+mips-reinstate-platform-__div64_32-handler.patch
+mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
+mips-avoid-handcoded-divu-in-__div64_32-altogether.patch