+++ /dev/null
-From c1d337d45ec0a802299688e17d568c4e3a585895 Mon Sep 17 00:00:00 2001
-From: "Maciej W. Rozycki" <macro@orcam.me.uk>
-Date: Tue, 20 Apr 2021 04:50:48 +0200
-Subject: MIPS: Avoid DIVU in `__div64_32' is result would be zero
-
-From: Maciej W. Rozycki <macro@orcam.me.uk>
-
-commit c1d337d45ec0a802299688e17d568c4e3a585895 upstream.
-
-We already check the high part of the divident against zero to avoid the
-costly DIVU instruction in that case, needed to reduce the high part of
-the divident, so we may well check against the divisor instead and set
-the high part of the quotient to zero right away. We need to treat the
-high part the divident in that case though as the remainder that would
-be calculated by the DIVU instruction we avoided.
-
-This has passed correctness verification with test_div64 and reduced the
-module's average execution time down to 1.0445s and 0.2619s from 1.0668s
-and 0.2629s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
-
-Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
-Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/mips/include/asm/div64.h | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
---- a/arch/mips/include/asm/div64.h
-+++ b/arch/mips/include/asm/div64.h
-@@ -68,9 +68,11 @@
- \
- __high = __div >> 32; \
- __low = __div; \
-- __upper = __high; \
- \
-- if (__high) { \
-+ if (__high < __radix) { \
-+ __upper = __high; \
-+ __high = 0; \
-+ } else { \
- __asm__("divu $0, %z1, %z2" \
- : "=x" (__modquot) \
- : "Jr" (__high), "Jr" (__radix)); \
+++ /dev/null
-From 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd Mon Sep 17 00:00:00 2001
-From: "Maciej W. Rozycki" <macro@orcam.me.uk>
-Date: Thu, 22 Apr 2021 22:36:12 +0200
-Subject: MIPS: Avoid handcoded DIVU in `__div64_32' altogether
-
-From: Maciej W. Rozycki <macro@orcam.me.uk>
-
-commit 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd upstream.
-
-Remove the inline asm with a DIVU instruction from `__div64_32' and use
-plain C code for the intended DIVMOD calculation instead. GCC is smart
-enough to know that both the quotient and the remainder are calculated
-with single DIVU, so with ISAs up to R5 the same instruction is actually
-produced with overall similar code.
-
-For R6 compiled code will work, but separate DIVU and MODU instructions
-will be produced, which are also interlocked, so scalar implementations
-will likely not perform as well as older ISAs with their asynchronous MD
-unit. Likely still faster then the generic algorithm though.
-
-This removes a compilation error for R6 however where the original DIVU
-instruction is not supported anymore and the MDU accumulator registers
-have been removed and consequently GCC complains as to a constraint it
-cannot find a register for:
-
-In file included from ./include/linux/math.h:5,
- from ./include/linux/kernel.h:13,
- from mm/page-writeback.c:15:
-./include/linux/math64.h: In function 'div_u64_rem':
-./arch/mips/include/asm/div64.h:76:17: error: inconsistent operand constraints in an 'asm'
- 76 | __asm__("divu $0, %z1, %z2" \
- | ^~~~~~~
-./include/asm-generic/div64.h:245:25: note: in expansion of macro '__div64_32'
- 245 | __rem = __div64_32(&(n), __base); \
- | ^~~~~~~~~~
-./include/linux/math64.h:91:22: note: in expansion of macro 'do_div'
- 91 | *remainder = do_div(dividend, divisor);
- | ^~~~~~
-
-This has passed correctness verification with test_div64 and reduced the
-module's average execution time down to 1.0404s from 1.0445s with R3400
-@40MHz. The module's MIPS I machine code has also shrunk by 12 bytes or
-3 instructions.
-
-Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
-Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/mips/include/asm/div64.h | 8 ++------
- 1 file changed, 2 insertions(+), 6 deletions(-)
-
---- a/arch/mips/include/asm/div64.h
-+++ b/arch/mips/include/asm/div64.h
-@@ -58,7 +58,6 @@
-
- #define __div64_32(n, base) ({ \
- unsigned long __upper, __low, __high, __radix; \
-- unsigned long long __modquot; \
- unsigned long long __quot; \
- unsigned long long __div; \
- unsigned long __mod; \
-@@ -73,11 +72,8 @@
- __upper = __high; \
- __high = 0; \
- } else { \
-- __asm__("divu $0, %z1, %z2" \
-- : "=x" (__modquot) \
-- : "Jr" (__high), "Jr" (__radix)); \
-- __upper = __modquot >> 32; \
-- __high = __modquot; \
-+ __upper = __high % __radix; \
-+ __high /= __radix; \
- } \
- \
- __mod = do_div64_32(__low, __upper, __low, __radix); \
+++ /dev/null
-From c49f71f60754acbff37505e1d16ca796bf8a8140 Mon Sep 17 00:00:00 2001
-From: "Maciej W. Rozycki" <macro@orcam.me.uk>
-Date: Tue, 20 Apr 2021 04:50:40 +0200
-Subject: MIPS: Reinstate platform `__div64_32' handler
-
-From: Maciej W. Rozycki <macro@orcam.me.uk>
-
-commit c49f71f60754acbff37505e1d16ca796bf8a8140 upstream.
-
-Our current MIPS platform `__div64_32' handler is inactive, because it
-is incorrectly only enabled for 64-bit configurations, for which generic
-`do_div' code does not call it anyway.
-
-The handler is not suitable for being called from there though as it
-only calculates 32 bits of the quotient under the assumption the 64-bit
-divident has been suitably reduced. Code for such reduction used to be
-there, however it has been incorrectly removed with commit c21004cd5b4c
-("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0."), which should
-have only updated an obsoleted constraint for an inline asm involving
-$hi and $lo register outputs, while possibly wiring the original MIPS
-variant of the `do_div' macro as `__div64_32' handler for the generic
-`do_div' implementation
-
-Correct the handler as follows then:
-
-- Revert most of the commit referred, however retaining the current
- formatting, except for the final two instructions of the inline asm
- sequence, which the original commit missed. Omit the original 64-bit
- parts though.
-
-- Rename the original `do_div' macro to `__div64_32'. Use the combined
- `x' constraint referring to the MD accumulator as a whole, replacing
- the original individual `h' and `l' constraints used for $hi and $lo
- registers respectively, of which `h' has been obsoleted with GCC 4.4.
- Update surrounding code accordingly.
-
- We have since removed support for GCC versions before 4.9, so no need
- for a special arrangement here; GCC has supported the `x' constraint
- since forever anyway, or at least going back to 1991.
-
-- Rename the `__base' local variable in `__div64_32' to `__radix' to
- avoid a conflict with a local variable in `do_div'.
-
-- Actually enable this code for 32-bit rather than 64-bit configurations
- by qualifying it with BITS_PER_LONG being 32 instead of 64. Include
- <asm/bitsperlong.h> for this macro rather than <linux/types.h> as we
- don't need anything else.
-
-- Finally include <asm-generic/div64.h> last rather than first.
-
-This has passed correctness verification with test_div64 and reduced the
-module's average execution time down to 1.0668s and 0.2629s from 2.1529s
-and 0.5647s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz.
-For a reference 64-bit `do_div' code where we have the DDIVU instruction
-available to do the whole calculation right away averages at 0.0660s for
-the latter CPU.
-
-Fixes: c21004cd5b4c ("MIPS: Rewrite <asm/div64.h> to work with gcc 4.4.0.")
-Reported-by: Huacai Chen <chenhuacai@kernel.org>
-Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
-Cc: stable@vger.kernel.org # v2.6.30+
-Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/mips/include/asm/div64.h | 57 ++++++++++++++++++++++++++++++------------
- 1 file changed, 41 insertions(+), 16 deletions(-)
-
---- a/arch/mips/include/asm/div64.h
-+++ b/arch/mips/include/asm/div64.h
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (C) 2000, 2004 Maciej W. Rozycki
-+ * Copyright (C) 2000, 2004, 2021 Maciej W. Rozycki
- * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
- *
- * This file is subject to the terms and conditions of the GNU General Public
-@@ -9,25 +9,18 @@
- #ifndef __ASM_DIV64_H
- #define __ASM_DIV64_H
-
--#include <asm-generic/div64.h>
--
--#if BITS_PER_LONG == 64
-+#include <asm/bitsperlong.h>
-
--#include <linux/types.h>
-+#if BITS_PER_LONG == 32
-
- /*
- * No traps on overflows for any of these...
- */
-
--#define __div64_32(n, base) \
--({ \
-+#define do_div64_32(res, high, low, base) ({ \
- unsigned long __cf, __tmp, __tmp2, __i; \
- unsigned long __quot32, __mod32; \
-- unsigned long __high, __low; \
-- unsigned long long __n; \
- \
-- __high = *__n >> 32; \
-- __low = __n; \
- __asm__( \
- " .set push \n" \
- " .set noat \n" \
-@@ -51,18 +44,50 @@
- " subu %0, %0, %z6 \n" \
- " addiu %2, %2, 1 \n" \
- "3: \n" \
-- " bnez %4, 0b\n\t" \
-- " srl %5, %1, 0x1f\n\t" \
-+ " bnez %4, 0b \n" \
-+ " srl %5, %1, 0x1f \n" \
- " .set pop" \
- : "=&r" (__mod32), "=&r" (__tmp), \
- "=&r" (__quot32), "=&r" (__cf), \
- "=&r" (__i), "=&r" (__tmp2) \
-- : "Jr" (base), "0" (__high), "1" (__low)); \
-+ : "Jr" (base), "0" (high), "1" (low)); \
- \
-- (__n) = __quot32; \
-+ (res) = __quot32; \
- __mod32; \
- })
-
--#endif /* BITS_PER_LONG == 64 */
-+#define __div64_32(n, base) ({ \
-+ unsigned long __upper, __low, __high, __radix; \
-+ unsigned long long __modquot; \
-+ unsigned long long __quot; \
-+ unsigned long long __div; \
-+ unsigned long __mod; \
-+ \
-+ __div = (*n); \
-+ __radix = (base); \
-+ \
-+ __high = __div >> 32; \
-+ __low = __div; \
-+ __upper = __high; \
-+ \
-+ if (__high) { \
-+ __asm__("divu $0, %z1, %z2" \
-+ : "=x" (__modquot) \
-+ : "Jr" (__high), "Jr" (__radix)); \
-+ __upper = __modquot >> 32; \
-+ __high = __modquot; \
-+ } \
-+ \
-+ __mod = do_div64_32(__low, __upper, __low, __radix); \
-+ \
-+ __quot = __high; \
-+ __quot = __quot << 32 | __low; \
-+ (*n) = __quot; \
-+ __mod; \
-+})
-+
-+#endif /* BITS_PER_LONG == 32 */
-+
-+#include <asm-generic/div64.h>
-
- #endif /* __ASM_DIV64_H */