4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 17 May 2021 10:10:12 +0000 (12:10 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 17 May 2021 10:10:12 +0000 (12:10 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 May 2021 10:10:12 +0000 (12:10 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 May 2021 10:10:12 +0000 (12:10 +0200)
diff --git a/queue-4.19/fddi-defxx-make-mmio-the-configuration-default-except-for-eisa.patch b/queue-4.19/fddi-defxx-make-mmio-the-configuration-default-except-for-eisa.patch

new file mode 100644 (file)

index 0000000..783c18e
--- /dev/null
+++ b/queue-4.19/fddi-defxx-make-mmio-the-configuration-default-except-for-eisa.patch
@@ -0,0 +1,76 @@
+From 193ced4a79599352d63cb8c9e2f0c6043106eb6a Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Wed, 10 Mar 2021 13:03:14 +0100
+Subject: FDDI: defxx: Make MMIO the configuration default except for EISA
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 193ced4a79599352d63cb8c9e2f0c6043106eb6a upstream.
+
+Recent versions of the PCI Express specification have deprecated support
+for I/O transactions and actually some PCIe host bridges, such as Power
+Systems Host Bridge 4 (PHB4), do not implement them.
+
+The default kernel configuration choice for the defxx driver is the use
+of I/O ports rather than MMIO for PCI and EISA systems.  It may have
+made sense as a conservative backwards compatible choice back when MMIO
+operation support was added to the driver as a part of TURBOchannel bus
+support.  However nowadays this configuration choice makes the driver
+unusable with systems that do not implement I/O transactions for PCIe.
+
+Make DEFXX_MMIO the configuration default then, except where configured
+for EISA.  This exception is because an EISA adapter can have its MMIO
+decoding disabled with ECU (EISA Configuration Utility) and therefore
+not available with the resource allocation infrastructure we implement,
+while port I/O is always readily available as it uses slot-specific
+addressing, directly mapped to the slot an option card has been placed
+in and handled with our EISA bus support core.  Conversely a kernel that
+supports modern systems which may not have I/O transactions implemented
+for PCIe will usually not be expected to handle legacy EISA systems.
+
+The change of the default will make it easier for people, including but
+not limited to distribution packagers, to make a working choice for the
+driver.
+
+Update the option description accordingly and while at it replace the
+potentially ambiguous PIO acronym with IOP for "port I/O" vs "I/O ports"
+according to our nomenclature used elsewhere.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Fixes: e89a2cfb7d7b ("[TC] defxx: TURBOchannel support")
+Cc: stable@vger.kernel.org # v2.6.21+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/fddi/Kconfig |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/fddi/Kconfig
++++ b/drivers/net/fddi/Kconfig
+@@ -28,17 +28,20 @@ config DEFXX
+ 
+ config DEFXX_MMIO
+       bool
+-      prompt "Use MMIO instead of PIO" if PCI || EISA
++      prompt "Use MMIO instead of IOP" if PCI || EISA
+       depends on DEFXX
+-      default n if PCI || EISA
++      default n if EISA
+       default y
+       ---help---
+         This instructs the driver to use EISA or PCI memory-mapped I/O
+-        (MMIO) as appropriate instead of programmed I/O ports (PIO).
++        (MMIO) as appropriate instead of programmed I/O ports (IOP).
+         Enabling this gives an improvement in processing time in parts
+-        of the driver, but it may cause problems with EISA (DEFEA)
+-        adapters.  TURBOchannel does not have the concept of I/O ports,
+-        so MMIO is always used for these (DEFTA) adapters.
++        of the driver, but it requires a memory window to be configured
++        for EISA (DEFEA) adapters that may not always be available.
++        Conversely some PCIe host bridges do not support IOP, so MMIO
++        may be required to access PCI (DEFPA) adapters on downstream PCI
++        buses with some systems.  TURBOchannel does not have the concept
++        of I/O ports, so MMIO is always used for these (DEFTA) adapters.
+ 
+         If unsure, say N.
+ 
diff --git a/queue-4.19/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch b/queue-4.19/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch

new file mode 100644 (file)

index 0000000..1305d99
--- /dev/null
+++ b/queue-4.19/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
@@ -0,0 +1,43 @@
+From c1d337d45ec0a802299688e17d568c4e3a585895 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:48 +0200
+Subject: MIPS: Avoid DIVU in `__div64_32' is result would be zero
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c1d337d45ec0a802299688e17d568c4e3a585895 upstream.
+
+We already check the high part of the divident against zero to avoid the
+costly DIVU instruction in that case, needed to reduce the high part of
+the divident, so we may well check against the divisor instead and set
+the high part of the quotient to zero right away.  We need to treat the
+high part the divident in that case though as the remainder that would
+be calculated by the DIVU instruction we avoided.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0445s and 0.2619s from 1.0668s
+and 0.2629s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -68,9 +68,11 @@
+                                                                       \
+       __high = __div >> 32;                                           \
+       __low = __div;                                                  \
+-      __upper = __high;                                               \
+                                                                       \
+-      if (__high) {                                                   \
++      if (__high < __radix) {                                         \
++              __upper = __high;                                       \
++              __high = 0;                                             \
++      } else {                                                        \
+               __asm__("divu   $0, %z1, %z2"                           \
+               : "=x" (__modquot)                                      \
+               : "Jr" (__high), "Jr" (__radix));                       \
diff --git a/queue-4.19/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch b/queue-4.19/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch

new file mode 100644 (file)

index 0000000..6c01ec7
--- /dev/null
+++ b/queue-4.19/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
@@ -0,0 +1,75 @@
+From 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Thu, 22 Apr 2021 22:36:12 +0200
+Subject: MIPS: Avoid handcoded DIVU in `__div64_32' altogether
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd upstream.
+
+Remove the inline asm with a DIVU instruction from `__div64_32' and use
+plain C code for the intended DIVMOD calculation instead.  GCC is smart
+enough to know that both the quotient and the remainder are calculated
+with single DIVU, so with ISAs up to R5 the same instruction is actually
+produced with overall similar code.
+
+For R6 compiled code will work, but separate DIVU and MODU instructions
+will be produced, which are also interlocked, so scalar implementations
+will likely not perform as well as older ISAs with their asynchronous MD
+unit.  Likely still faster then the generic algorithm though.
+
+This removes a compilation error for R6 however where the original DIVU
+instruction is not supported anymore and the MDU accumulator registers
+have been removed and consequently GCC complains as to a constraint it
+cannot find a register for:
+
+In file included from ./include/linux/math.h:5,
+                 from ./include/linux/kernel.h:13,
+                 from mm/page-writeback.c:15:
+./include/linux/math64.h: In function 'div_u64_rem':
+./arch/mips/include/asm/div64.h:76:17: error: inconsistent operand constraints in an 'asm'
+   76 |                 __asm__("divu   $0, %z1, %z2"                           \
+      |                 ^~~~~~~
+./include/asm-generic/div64.h:245:25: note: in expansion of macro '__div64_32'
+  245 |                 __rem = __div64_32(&(n), __base);       \
+      |                         ^~~~~~~~~~
+./include/linux/math64.h:91:22: note: in expansion of macro 'do_div'
+   91 |         *remainder = do_div(dividend, divisor);
+      |                      ^~~~~~
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0404s from 1.0445s with R3400
+@40MHz.  The module's MIPS I machine code has also shrunk by 12 bytes or
+3 instructions.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -58,7 +58,6 @@
+ 
+ #define __div64_32(n, base) ({                                                \
+       unsigned long __upper, __low, __high, __radix;                  \
+-      unsigned long long __modquot;                                   \
+       unsigned long long __quot;                                      \
+       unsigned long long __div;                                       \
+       unsigned long __mod;                                            \
+@@ -73,11 +72,8 @@
+               __upper = __high;                                       \
+               __high = 0;                                             \
+       } else {                                                        \
+-              __asm__("divu   $0, %z1, %z2"                           \
+-              : "=x" (__modquot)                                      \
+-              : "Jr" (__high), "Jr" (__radix));                       \
+-              __upper = __modquot >> 32;                              \
+-              __high = __modquot;                                     \
++              __upper = __high % __radix;                             \
++              __high /= __radix;                                      \
+       }                                                               \
+                                                                       \
+       __mod = do_div64_32(__low, __upper, __low, __radix);            \
diff --git a/queue-4.19/mips-reinstate-platform-__div64_32-handler.patch b/queue-4.19/mips-reinstate-platform-__div64_32-handler.patch

new file mode 100644 (file)

index 0000000..3a0267d
--- /dev/null
+++ b/queue-4.19/mips-reinstate-platform-__div64_32-handler.patch
@@ -0,0 +1,161 @@
+From c49f71f60754acbff37505e1d16ca796bf8a8140 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 20 Apr 2021 04:50:40 +0200
+Subject: MIPS: Reinstate platform `__div64_32' handler
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit c49f71f60754acbff37505e1d16ca796bf8a8140 upstream.
+
+Our current MIPS platform `__div64_32' handler is inactive, because it
+is incorrectly only enabled for 64-bit configurations, for which generic
+`do_div' code does not call it anyway.
+
+The handler is not suitable for being called from there though as it
+only calculates 32 bits of the quotient under the assumption the 64-bit
+divident has been suitably reduced.  Code for such reduction used to be
+there, however it has been incorrectly removed with commit c21004cd5b4c
+("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0."), which should
+have only updated an obsoleted constraint for an inline asm involving
+$hi and $lo register outputs, while possibly wiring the original MIPS
+variant of the `do_div' macro as `__div64_32' handler for the generic
+`do_div' implementation
+
+Correct the handler as follows then:
+
+- Revert most of the commit referred, however retaining the current
+  formatting, except for the final two instructions of the inline asm
+  sequence, which the original commit missed.  Omit the original 64-bit
+  parts though.
+
+- Rename the original `do_div' macro to `__div64_32'.  Use the combined
+  `x' constraint referring to the MD accumulator as a whole, replacing
+  the original individual `h' and `l' constraints used for $hi and $lo
+  registers respectively, of which `h' has been obsoleted with GCC 4.4.
+  Update surrounding code accordingly.
+
+  We have since removed support for GCC versions before 4.9, so no need
+  for a special arrangement here; GCC has supported the `x' constraint
+  since forever anyway, or at least going back to 1991.
+
+- Rename the `__base' local variable in `__div64_32' to `__radix' to
+  avoid a conflict with a local variable in `do_div'.
+
+- Actually enable this code for 32-bit rather than 64-bit configurations
+  by qualifying it with BITS_PER_LONG being 32 instead of 64.  Include
+  <asm/bitsperlong.h> for this macro rather than <linux/types.h> as we
+  don't need anything else.
+
+- Finally include <asm-generic/div64.h> last rather than first.
+
+This has passed correctness verification with test_div64 and reduced the
+module's average execution time down to 1.0668s and 0.2629s from 2.1529s
+and 0.5647s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
+For a reference 64-bit `do_div' code where we have the DDIVU instruction
+available to do the whole calculation right away averages at 0.0660s for
+the latter CPU.
+
+Fixes: c21004cd5b4c ("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0.")
+Reported-by: Huacai Chen <chenhuacai@kernel.org>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Cc: stable@vger.kernel.org # v2.6.30+
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/include/asm/div64.h |   57 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 41 insertions(+), 16 deletions(-)
+
+--- a/arch/mips/include/asm/div64.h
++++ b/arch/mips/include/asm/div64.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000, 2004  Maciej W. Rozycki
++ * Copyright (C) 2000, 2004, 2021  Maciej W. Rozycki
+  * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
+  *
+  * This file is subject to the terms and conditions of the GNU General Public
+@@ -9,25 +9,18 @@
+ #ifndef __ASM_DIV64_H
+ #define __ASM_DIV64_H
+ 
+-#include <asm-generic/div64.h>
+-
+-#if BITS_PER_LONG == 64
++#include <asm/bitsperlong.h>
+ 
+-#include <linux/types.h>
++#if BITS_PER_LONG == 32
+ 
+ /*
+  * No traps on overflows for any of these...
+  */
+ 
+-#define __div64_32(n, base)                                           \
+-({                                                                    \
++#define do_div64_32(res, high, low, base) ({                          \
+       unsigned long __cf, __tmp, __tmp2, __i;                         \
+       unsigned long __quot32, __mod32;                                \
+-      unsigned long __high, __low;                                    \
+-      unsigned long long __n;                                         \
+                                                                       \
+-      __high = *__n >> 32;                                            \
+-      __low = __n;                                                    \
+       __asm__(                                                        \
+       "       .set    push                                    \n"     \
+       "       .set    noat                                    \n"     \
+@@ -51,18 +44,50 @@
+       "       subu    %0, %0, %z6                             \n"     \
+       "       addiu   %2, %2, 1                               \n"     \
+       "3:                                                     \n"     \
+-      "       bnez    %4, 0b\n\t"                                     \
+-      "        srl    %5, %1, 0x1f\n\t"                               \
++      "       bnez    %4, 0b                                  \n"     \
++      "        srl    %5, %1, 0x1f                            \n"     \
+       "       .set    pop"                                            \
+       : "=&r" (__mod32), "=&r" (__tmp),                               \
+         "=&r" (__quot32), "=&r" (__cf),                               \
+         "=&r" (__i), "=&r" (__tmp2)                                   \
+-      : "Jr" (base), "0" (__high), "1" (__low));                      \
++      : "Jr" (base), "0" (high), "1" (low));                          \
+                                                                       \
+-      (__n) = __quot32;                                               \
++      (res) = __quot32;                                               \
+       __mod32;                                                        \
+ })
+ 
+-#endif /* BITS_PER_LONG == 64 */
++#define __div64_32(n, base) ({                                                \
++      unsigned long __upper, __low, __high, __radix;                  \
++      unsigned long long __modquot;                                   \
++      unsigned long long __quot;                                      \
++      unsigned long long __div;                                       \
++      unsigned long __mod;                                            \
++                                                                      \
++      __div = (*n);                                                   \
++      __radix = (base);                                               \
++                                                                      \
++      __high = __div >> 32;                                           \
++      __low = __div;                                                  \
++      __upper = __high;                                               \
++                                                                      \
++      if (__high) {                                                   \
++              __asm__("divu   $0, %z1, %z2"                           \
++              : "=x" (__modquot)                                      \
++              : "Jr" (__high), "Jr" (__radix));                       \
++              __upper = __modquot >> 32;                              \
++              __high = __modquot;                                     \
++      }                                                               \
++                                                                      \
++      __mod = do_div64_32(__low, __upper, __low, __radix);            \
++                                                                      \
++      __quot = __high;                                                \
++      __quot = __quot << 32 | __low;                                  \
++      (*n) = __quot;                                                  \
++      __mod;                                                          \
++})
++
++#endif /* BITS_PER_LONG == 32 */
++
++#include <asm-generic/div64.h>
+ 
+ #endif /* __ASM_DIV64_H */
diff --git a/queue-4.19/series b/queue-4.19/series

index 433c5c54c2a93bb49ed66ebfef01d681b0d5a7e8..cc03d2573c4a80855c79a2928703783d310d51d0 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -383,3 +383,7 @@ iio-gyro-mpu3050-fix-reported-temperature-value.patch
  iio-tsl2583-fix-division-by-a-zero-lux_val.patch
  cdc-wdm-untangle-a-circular-dependency-between-callback-and-softint.patch
  kvm-x86-cancel-pvclock_gtod_work-on-module-removal.patch
+fddi-defxx-make-mmio-the-configuration-default-except-for-eisa.patch
+mips-reinstate-platform-__div64_32-handler.patch
+mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch
+mips-avoid-handcoded-divu-in-__div64_32-altogether.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 17 May 2021 10:10:12 +0000 (12:10 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 17 May 2021 10:10:12 +0000 (12:10 +0200)
queue-4.19/fddi-defxx-make-mmio-the-configuration-default-except-for-eisa.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/mips-reinstate-platform-__div64_32-handler.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history