5.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 17 May 2021 10:11:56 +0000 (12:11 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 17 May 2021 10:11:56 +0000 (12:11 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 May 2021 10:11:56 +0000 (12:11 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 May 2021 10:11:56 +0000 (12:11 +0200)
diff --git a/queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch b/queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch

new file mode 100644 (file)

index 0000000..1305d99
--- /dev/null
+++ b/queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
@@ -0,0 +1,43 @@
+From c1d337d45ec0a802299688e17d568c4e3a585895 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:48 +0200
+Subject: MIPS: Avoid DIVU in `__div64_32' is result would be zero
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c1d337d45ec0a802299688e17d568c4e3a585895 upstream.
+
+We already check the high part of the divident against zero to avoid the
+costly DIVU instruction in that case, needed to reduce the high part of
+the divident, so we may well check against the divisor instead and set
+the high part of the quotient to zero right away.  We need to treat the
+high part the divident in that case though as the remainder that would
+be calculated by the DIVU instruction we avoided.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0445s and 0.2619s from 1.0668s
+and 0.2629s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -68,9 +68,11 @@
+                                                                       \
+       __high = __div >> 32;                                           \
+       __low = __div;                                                  \
+-      __upper = __high;                                               \
+                                                                       \
+-      if (__high) {                                                   \
++      if (__high < __radix) {                                         \
++              __upper = __high;                                       \
++              __high = 0;                                             \
++      } else {                                                        \
+               __asm__("divu   $0, %z1, %z2"                           \
+               : "=x" (__modquot)                                      \
+               : "Jr" (__high), "Jr" (__radix));                       \
diff --git a/queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch b/queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch

new file mode 100644 (file)

index 0000000..6c01ec7
--- /dev/null
+++ b/queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
@@ -0,0 +1,75 @@
+From 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Thu, 22 Apr 2021 22:36:12 +0200
+Subject: MIPS: Avoid handcoded DIVU in `__div64_32' altogether
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd upstream.
+
+Remove the inline asm with a DIVU instruction from `__div64_32' and use
+plain C code for the intended DIVMOD calculation instead.  GCC is smart
+enough to know that both the quotient and the remainder are calculated
+with single DIVU, so with ISAs up to R5 the same instruction is actually
+produced with overall similar code.
+
+For R6 compiled code will work, but separate DIVU and MODU instructions
+will be produced, which are also interlocked, so scalar implementations
+will likely not perform as well as older ISAs with their asynchronous MD
+unit.  Likely still faster then the generic algorithm though.
+
+This removes a compilation error for R6 however where the original DIVU
+instruction is not supported anymore and the MDU accumulator registers
+have been removed and consequently GCC complains as to a constraint it
+cannot find a register for:
+
+In file included from ./include/linux/math.h:5,
+                 from ./include/linux/kernel.h:13,
+                 from mm/page-writeback.c:15:
+./include/linux/math64.h: In function 'div_u64_rem':
+./arch/mips/include/asm/div64.h:76:17: error: inconsistent operand constraints in an 'asm'
+   76 |                 __asm__("divu   $0, %z1, %z2"                           \
+      |                 ^~~~~~~
+./include/asm-generic/div64.h:245:25: note: in expansion of macro '__div64_32'
+  245 |                 __rem = __div64_32(&(n), __base);       \
+      |                         ^~~~~~~~~~
+./include/linux/math64.h:91:22: note: in expansion of macro 'do_div'
+   91 |         *remainder = do_div(dividend, divisor);
+      |                      ^~~~~~
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0404s from 1.0445s with R3400
+@40MHz.  The module's MIPS I machine code has also shrunk by 12 bytes or
+3 instructions.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -58,7 +58,6 @@
+ 
+ #define __div64_32(n, base) ({                                                \
+       unsigned long __upper, __low, __high, __radix;                  \
+-      unsigned long long __modquot;                                   \
+       unsigned long long __quot;                                      \
+       unsigned long long __div;                                       \
+       unsigned long __mod;                                            \
+@@ -73,11 +72,8 @@
+               __upper = __high;                                       \
+               __high = 0;                                             \
+       } else {                                                        \
+-              __asm__("divu   $0, %z1, %z2"                           \
+-              : "=x" (__modquot)                                      \
+-              : "Jr" (__high), "Jr" (__radix));                       \
+-              __upper = __modquot >> 32;                              \
+-              __high = __modquot;                                     \
++              __upper = __high % __radix;                             \
++              __high /= __radix;                                      \
+       }                                                               \
+                                                                       \
+       __mod = do_div64_32(__low, __upper, __low, __radix);            \
diff --git a/queue-5.12/mips-reinstate-platform-__div64_32-handler.patch b/queue-5.12/mips-reinstate-platform-__div64_32-handler.patch

new file mode 100644 (file)

index 0000000..3a0267d
--- /dev/null
+++ b/queue-5.12/mips-reinstate-platform-__div64_32-handler.patch
@@ -0,0 +1,161 @@
+From c49f71f60754acbff37505e1d16ca796bf8a8140 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:40 +0200
+Subject: MIPS: Reinstate platform `__div64_32' handler
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c49f71f60754acbff37505e1d16ca796bf8a8140 upstream.
+
+Our current MIPS platform `__div64_32' handler is inactive, because it
+is incorrectly only enabled for 64-bit configurations, for which generic
+`do_div' code does not call it anyway.
+
+The handler is not suitable for being called from there though as it
+only calculates 32 bits of the quotient under the assumption the 64-bit
+divident has been suitably reduced.  Code for such reduction used to be
+there, however it has been incorrectly removed with commit c21004cd5b4c
+("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0."), which should
+have only updated an obsoleted constraint for an inline asm involving
+$hi and $lo register outputs, while possibly wiring the original MIPS
+variant of the `do_div' macro as `__div64_32' handler for the generic
+`do_div' implementation
+
+Correct the handler as follows then:
+
+- Revert most of the commit referred, however retaining the current
+  formatting, except for the final two instructions of the inline asm
+  sequence, which the original commit missed.  Omit the original 64-bit
+  parts though.
+
+- Rename the original `do_div' macro to `__div64_32'.  Use the combined
+  `x' constraint referring to the MD accumulator as a whole, replacing
+  the original individual `h' and `l' constraints used for $hi and $lo
+  registers respectively, of which `h' has been obsoleted with GCC 4.4.
+  Update surrounding code accordingly.
+
+  We have since removed support for GCC versions before 4.9, so no need
+  for a special arrangement here; GCC has supported the `x' constraint
+  since forever anyway, or at least going back to 1991.
+
+- Rename the `__base' local variable in `__div64_32' to `__radix' to
+  avoid a conflict with a local variable in `do_div'.
+
+- Actually enable this code for 32-bit rather than 64-bit configurations
+  by qualifying it with BITS_PER_LONG being 32 instead of 64.  Include
+  <asm/bitsperlong.h> for this macro rather than <linux/types.h> as we
+  don't need anything else.
+
+- Finally include <asm-generic/div64.h> last rather than first.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0668s and 0.2629s from 2.1529s
+and 0.5647s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+For a reference 64-bit `do_div' code where we have the DDIVU instruction
+available to do the whole calculation right away averages at 0.0660s for
+the latter CPU.
+
+Fixes: c21004cd5b4c ("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0.")
+Reported-by: Huacai Chen <chenhuacai@kernel.org>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Cc: stable@vger.kernel.org # v2.6.30+
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |   57 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 41 insertions(+), 16 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000, 2004  Maciej W. Rozycki
++ * Copyright (C) 2000, 2004, 2021  Maciej W. Rozycki
+  * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
+  *
+  * This file is subject to the terms and conditions of the GNU General Public
+@@ -9,25 +9,18 @@
+ #ifndef __ASM_DIV64_H
+ #define __ASM_DIV64_H
+ 
+-#include <asm-generic/div64.h>
+-
+-#if BITS_PER_LONG == 64
++#include <asm/bitsperlong.h>
+ 
+-#include <linux/types.h>
++#if BITS_PER_LONG == 32
+ 
+ /*
+  * No traps on overflows for any of these...
+  */
+ 
+-#define __div64_32(n, base)                                           \
+-({                                                                    \
++#define do_div64_32(res, high, low, base) ({                          \
+       unsigned long __cf, __tmp, __tmp2, __i;                         \
+       unsigned long __quot32, __mod32;                                \
+-      unsigned long __high, __low;                                    \
+-      unsigned long long __n;                                         \
+                                                                       \
+-      __high = *__n >> 32;                                            \
+-      __low = __n;                                                    \
+       __asm__(                                                        \
+       "       .set    push                                    \n"     \
+       "       .set    noat                                    \n"     \
+@@ -51,18 +44,50 @@
+       "       subu    %0, %0, %z6                             \n"     \
+       "       addiu   %2, %2, 1                               \n"     \
+       "3:                                                     \n"     \
+-      "       bnez    %4, 0b\n\t"                                     \
+-      "        srl    %5, %1, 0x1f\n\t"                               \
++      "       bnez    %4, 0b                                  \n"     \
++      "        srl    %5, %1, 0x1f                            \n"     \
+       "       .set    pop"                                            \
+       : "=&r" (__mod32), "=&r" (__tmp),                               \
+         "=&r" (__quot32), "=&r" (__cf),                               \
+         "=&r" (__i), "=&r" (__tmp2)                                   \
+-      : "Jr" (base), "0" (__high), "1" (__low));                      \
++      : "Jr" (base), "0" (high), "1" (low));                          \
+                                                                       \
+-      (__n) = __quot32;                                               \
++      (res) = __quot32;                                               \
+       __mod32;                                                        \
+ })
+ 
+-#endif /* BITS_PER_LONG == 64 */
++#define __div64_32(n, base) ({                                                \
++      unsigned long __upper, __low, __high, __radix;                  \
++      unsigned long long __modquot;                                   \
++      unsigned long long __quot;                                      \
++      unsigned long long __div;                                       \
++      unsigned long __mod;                                            \
++                                                                      \
++      __div = (*n);                                                   \
++      __radix = (base);                                               \
++                                                                      \
++      __high = __div >> 32;                                           \
++      __low = __div;                                                  \
++      __upper = __high;                                               \
++                                                                      \
++      if (__high) {                                                   \
++              __asm__("divu   $0, %z1, %z2"                           \
++              : "=x" (__modquot)                                      \
++              : "Jr" (__high), "Jr" (__radix));                       \
++              __upper = __modquot >> 32;                              \
++              __high = __modquot;                                     \
++      }                                                               \
++                                                                      \
++      __mod = do_div64_32(__low, __upper, __low, __radix);            \
++                                                                      \
++      __quot = __high;                                                \
++      __quot = __quot << 32 | __low;                                  \
++      (*n) = __quot;                                                  \
++      __mod;                                                          \
++})
++
++#endif /* BITS_PER_LONG == 32 */
++
++#include <asm-generic/div64.h>
+ 
+ #endif /* __ASM_DIV64_H */
diff --git a/queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch b/queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch

new file mode 100644 (file)

index 0000000..dcb629d
--- /dev/null
+++ b/queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch
@@ -0,0 +1,116 @@
+From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Fri, 14 May 2021 17:27:24 -0700
+Subject: mm: fix struct page layout on 32-bit systems
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 upstream.
+
+32-bit architectures which expect 8-byte alignment for 8-byte integers and
+need 64-bit DMA addresses (arm, mips, ppc) had their struct page
+inadvertently expanded in 2019.  When the dma_addr_t was added, it forced
+the alignment of the union to 8 bytes, which inserted a 4 byte gap between
+'flags' and the union.
+
+Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
+This restores the alignment to that of an unsigned long.  We always
+store the low bits in the first word to prevent the PageTail bit from
+being inadvertently set on a big endian platform.  If that happened,
+get_user_pages_fast() racing against a page which was freed and
+reallocated to the page_pool could dereference a bogus compound_head(),
+which would be hard to trace back to this cause.
+
+Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
+Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Tested-by: Matteo Croce <mcroce@linux.microsoft.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm_types.h |    4 ++--
+ include/net/page_pool.h  |   12 +++++++++++-
+ net/core/page_pool.c     |   12 +++++++-----
+ 3 files changed, 20 insertions(+), 8 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -97,10 +97,10 @@ struct page {
+               };
+               struct {        /* page_pool used by netstack */
+                       /**
+-                       * @dma_addr: might require a 64-bit value even on
++                       * @dma_addr: might require a 64-bit value on
+                        * 32-bit architectures.
+                        */
+-                      dma_addr_t dma_addr;
++                      unsigned long dma_addr[2];
+               };
+               struct {        /* slab, slob and slub */
+                       union {
+--- a/include/net/page_pool.h
++++ b/include/net/page_pool.h
+@@ -198,7 +198,17 @@ static inline void page_pool_recycle_dir
+ 
+ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+ {
+-      return page->dma_addr;
++      dma_addr_t ret = page->dma_addr[0];
++      if (sizeof(dma_addr_t) > sizeof(unsigned long))
++              ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
++      return ret;
++}
++
++static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
++{
++      page->dma_addr[0] = addr;
++      if (sizeof(dma_addr_t) > sizeof(unsigned long))
++              page->dma_addr[1] = upper_32_bits(addr);
+ }
+ 
+ static inline bool is_page_pool_compiled_in(void)
+--- a/net/core/page_pool.c
++++ b/net/core/page_pool.c
+@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_devic
+                                         struct page *page,
+                                         unsigned int dma_sync_size)
+ {
++      dma_addr_t dma_addr = page_pool_get_dma_addr(page);
++
+       dma_sync_size = min(dma_sync_size, pool->p.max_len);
+-      dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
++      dma_sync_single_range_for_device(pool->p.dev, dma_addr,
+                                        pool->p.offset, dma_sync_size,
+                                        pool->p.dma_dir);
+ }
+@@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pa
+               put_page(page);
+               return NULL;
+       }
+-      page->dma_addr = dma;
++      page_pool_set_dma_addr(page, dma);
+ 
+       if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+               page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+@@ -294,13 +296,13 @@ void page_pool_release_page(struct page_
+                */
+               goto skip_dma_unmap;
+ 
+-      dma = page->dma_addr;
++      dma = page_pool_get_dma_addr(page);
+ 
+-      /* When page is unmapped, it cannot be returned our pool */
++      /* When page is unmapped, it cannot be returned to our pool */
+       dma_unmap_page_attrs(pool->p.dev, dma,
+                            PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+                            DMA_ATTR_SKIP_CPU_SYNC);
+-      page->dma_addr = 0;
++      page_pool_set_dma_addr(page, 0);
+ skip_dma_unmap:
+       /* This may be the last page returned, releasing the pool, so
+        * it is not safe to reference pool afterwards.
diff --git a/queue-5.12/series b/queue-5.12/series

index 00a2d412427a368df8f26720d8fd63bc608abe28..c96d61ffd1f3064fc2a6558e1060cfb125c51cdc 100644 (file)
--- a/queue-5.12/series
+++ b/queue-5.12/series
@@ -332,3 +332,7 @@ kvm-x86-add-support-for-rdpid-without-rdtscp.patch
  kvm-nvmx-always-make-an-attempt-to-map-evmcs-after-migration.patch
  kvm-vmx-do-not-advertise-rdpid-if-enable_rdtscp-control-is-unsupported.patch
  kvm-vmx-disable-preemption-when-probing-user-return-msrs.patch
+mm-fix-struct-page-layout-on-32-bit-systems.patch
+mips-reinstate-platform-__div64_32-handler.patch
+mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
+mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 17 May 2021 10:11:56 +0000 (12:11 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 17 May 2021 10:11:56 +0000 (12:11 +0200)
queue-5.12/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch	[new file with mode: 0644]	patch \| blob
queue-5.12/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch	[new file with mode: 0644]	patch \| blob
queue-5.12/mips-reinstate-platform-__div64_32-handler.patch	[new file with mode: 0644]	patch \| blob
queue-5.12/mm-fix-struct-page-layout-on-32-bit-systems.patch	[new file with mode: 0644]	patch \| blob
queue-5.12/series		patch \| blob \| blame \| history