From 14ac676d8741ada0b499bbc98330f18d86e0f549 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 17 May 2021 12:11:04 +0200 Subject: [PATCH] 5.10-stable patches added patches: mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch mips-avoid-handcoded-divu-in-__div64_32-altogether.patch mips-reinstate-platform-__div64_32-handler.patch mm-fix-struct-page-layout-on-32-bit-systems.patch --- ...n-__div64_32-is-result-would-be-zero.patch | 43 +++++ ...dcoded-divu-in-__div64_32-altogether.patch | 75 ++++++++ ...einstate-platform-__div64_32-handler.patch | 161 ++++++++++++++++++ ...struct-page-layout-on-32-bit-systems.patch | 116 +++++++++++++ queue-5.10/series | 4 + 5 files changed, 399 insertions(+) create mode 100644 queue-5.10/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch create mode 100644 queue-5.10/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch create mode 100644 queue-5.10/mips-reinstate-platform-__div64_32-handler.patch create mode 100644 queue-5.10/mm-fix-struct-page-layout-on-32-bit-systems.patch diff --git a/queue-5.10/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch b/queue-5.10/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch new file mode 100644 index 00000000000..1305d9937bd --- /dev/null +++ b/queue-5.10/mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch @@ -0,0 +1,43 @@ +From c1d337d45ec0a802299688e17d568c4e3a585895 Mon Sep 17 00:00:00 2001 +From: "Maciej W. Rozycki" +Date: Tue, 20 Apr 2021 04:50:48 +0200 +Subject: MIPS: Avoid DIVU in `__div64_32' is result would be zero + +From: Maciej W. Rozycki + +commit c1d337d45ec0a802299688e17d568c4e3a585895 upstream. + +We already check the high part of the divident against zero to avoid the +costly DIVU instruction in that case, needed to reduce the high part of +the divident, so we may well check against the divisor instead and set +the high part of the quotient to zero right away. We need to treat the +high part the divident in that case though as the remainder that would +be calculated by the DIVU instruction we avoided. + +This has passed correctness verification with test_div64 and reduced the +module's average execution time down to 1.0445s and 0.2619s from 1.0668s +and 0.2629s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz. + +Signed-off-by: Maciej W. Rozycki +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/include/asm/div64.h | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/mips/include/asm/div64.h ++++ b/arch/mips/include/asm/div64.h +@@ -68,9 +68,11 @@ + \ + __high = __div >> 32; \ + __low = __div; \ +- __upper = __high; \ + \ +- if (__high) { \ ++ if (__high < __radix) { \ ++ __upper = __high; \ ++ __high = 0; \ ++ } else { \ + __asm__("divu $0, %z1, %z2" \ + : "=x" (__modquot) \ + : "Jr" (__high), "Jr" (__radix)); \ diff --git a/queue-5.10/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch b/queue-5.10/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch new file mode 100644 index 00000000000..6c01ec7853e --- /dev/null +++ b/queue-5.10/mips-avoid-handcoded-divu-in-__div64_32-altogether.patch @@ -0,0 +1,75 @@ +From 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd Mon Sep 17 00:00:00 2001 +From: "Maciej W. Rozycki" +Date: Thu, 22 Apr 2021 22:36:12 +0200 +Subject: MIPS: Avoid handcoded DIVU in `__div64_32' altogether + +From: Maciej W. Rozycki + +commit 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd upstream. + +Remove the inline asm with a DIVU instruction from `__div64_32' and use +plain C code for the intended DIVMOD calculation instead. GCC is smart +enough to know that both the quotient and the remainder are calculated +with single DIVU, so with ISAs up to R5 the same instruction is actually +produced with overall similar code. + +For R6 compiled code will work, but separate DIVU and MODU instructions +will be produced, which are also interlocked, so scalar implementations +will likely not perform as well as older ISAs with their asynchronous MD +unit. Likely still faster then the generic algorithm though. + +This removes a compilation error for R6 however where the original DIVU +instruction is not supported anymore and the MDU accumulator registers +have been removed and consequently GCC complains as to a constraint it +cannot find a register for: + +In file included from ./include/linux/math.h:5, + from ./include/linux/kernel.h:13, + from mm/page-writeback.c:15: +./include/linux/math64.h: In function 'div_u64_rem': +./arch/mips/include/asm/div64.h:76:17: error: inconsistent operand constraints in an 'asm' + 76 | __asm__("divu $0, %z1, %z2" \ + | ^~~~~~~ +./include/asm-generic/div64.h:245:25: note: in expansion of macro '__div64_32' + 245 | __rem = __div64_32(&(n), __base); \ + | ^~~~~~~~~~ +./include/linux/math64.h:91:22: note: in expansion of macro 'do_div' + 91 | *remainder = do_div(dividend, divisor); + | ^~~~~~ + +This has passed correctness verification with test_div64 and reduced the +module's average execution time down to 1.0404s from 1.0445s with R3400 +@40MHz. The module's MIPS I machine code has also shrunk by 12 bytes or +3 instructions. + +Signed-off-by: Maciej W. Rozycki +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/include/asm/div64.h | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/arch/mips/include/asm/div64.h ++++ b/arch/mips/include/asm/div64.h +@@ -58,7 +58,6 @@ + + #define __div64_32(n, base) ({ \ + unsigned long __upper, __low, __high, __radix; \ +- unsigned long long __modquot; \ + unsigned long long __quot; \ + unsigned long long __div; \ + unsigned long __mod; \ +@@ -73,11 +72,8 @@ + __upper = __high; \ + __high = 0; \ + } else { \ +- __asm__("divu $0, %z1, %z2" \ +- : "=x" (__modquot) \ +- : "Jr" (__high), "Jr" (__radix)); \ +- __upper = __modquot >> 32; \ +- __high = __modquot; \ ++ __upper = __high % __radix; \ ++ __high /= __radix; \ + } \ + \ + __mod = do_div64_32(__low, __upper, __low, __radix); \ diff --git a/queue-5.10/mips-reinstate-platform-__div64_32-handler.patch b/queue-5.10/mips-reinstate-platform-__div64_32-handler.patch new file mode 100644 index 00000000000..3a0267d6f28 --- /dev/null +++ b/queue-5.10/mips-reinstate-platform-__div64_32-handler.patch @@ -0,0 +1,161 @@ +From c49f71f60754acbff37505e1d16ca796bf8a8140 Mon Sep 17 00:00:00 2001 +From: "Maciej W. Rozycki" +Date: Tue, 20 Apr 2021 04:50:40 +0200 +Subject: MIPS: Reinstate platform `__div64_32' handler + +From: Maciej W. Rozycki + +commit c49f71f60754acbff37505e1d16ca796bf8a8140 upstream. + +Our current MIPS platform `__div64_32' handler is inactive, because it +is incorrectly only enabled for 64-bit configurations, for which generic +`do_div' code does not call it anyway. + +The handler is not suitable for being called from there though as it +only calculates 32 bits of the quotient under the assumption the 64-bit +divident has been suitably reduced. Code for such reduction used to be +there, however it has been incorrectly removed with commit c21004cd5b4c +("MIPS: Rewrite to work with gcc 4.4.0."), which should +have only updated an obsoleted constraint for an inline asm involving +$hi and $lo register outputs, while possibly wiring the original MIPS +variant of the `do_div' macro as `__div64_32' handler for the generic +`do_div' implementation + +Correct the handler as follows then: + +- Revert most of the commit referred, however retaining the current + formatting, except for the final two instructions of the inline asm + sequence, which the original commit missed. Omit the original 64-bit + parts though. + +- Rename the original `do_div' macro to `__div64_32'. Use the combined + `x' constraint referring to the MD accumulator as a whole, replacing + the original individual `h' and `l' constraints used for $hi and $lo + registers respectively, of which `h' has been obsoleted with GCC 4.4. + Update surrounding code accordingly. + + We have since removed support for GCC versions before 4.9, so no need + for a special arrangement here; GCC has supported the `x' constraint + since forever anyway, or at least going back to 1991. + +- Rename the `__base' local variable in `__div64_32' to `__radix' to + avoid a conflict with a local variable in `do_div'. + +- Actually enable this code for 32-bit rather than 64-bit configurations + by qualifying it with BITS_PER_LONG being 32 instead of 64. Include + for this macro rather than as we + don't need anything else. + +- Finally include last rather than first. + +This has passed correctness verification with test_div64 and reduced the +module's average execution time down to 1.0668s and 0.2629s from 2.1529s +and 0.5647s respectively for an R3400 CPU @40MHz and a 5Kc CPU @160MHz. +For a reference 64-bit `do_div' code where we have the DDIVU instruction +available to do the whole calculation right away averages at 0.0660s for +the latter CPU. + +Fixes: c21004cd5b4c ("MIPS: Rewrite to work with gcc 4.4.0.") +Reported-by: Huacai Chen +Signed-off-by: Maciej W. Rozycki +Cc: stable@vger.kernel.org # v2.6.30+ +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/include/asm/div64.h | 57 ++++++++++++++++++++++++++++++------------ + 1 file changed, 41 insertions(+), 16 deletions(-) + +--- a/arch/mips/include/asm/div64.h ++++ b/arch/mips/include/asm/div64.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2000, 2004 Maciej W. Rozycki ++ * Copyright (C) 2000, 2004, 2021 Maciej W. Rozycki + * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org) + * + * This file is subject to the terms and conditions of the GNU General Public +@@ -9,25 +9,18 @@ + #ifndef __ASM_DIV64_H + #define __ASM_DIV64_H + +-#include +- +-#if BITS_PER_LONG == 64 ++#include + +-#include ++#if BITS_PER_LONG == 32 + + /* + * No traps on overflows for any of these... + */ + +-#define __div64_32(n, base) \ +-({ \ ++#define do_div64_32(res, high, low, base) ({ \ + unsigned long __cf, __tmp, __tmp2, __i; \ + unsigned long __quot32, __mod32; \ +- unsigned long __high, __low; \ +- unsigned long long __n; \ + \ +- __high = *__n >> 32; \ +- __low = __n; \ + __asm__( \ + " .set push \n" \ + " .set noat \n" \ +@@ -51,18 +44,50 @@ + " subu %0, %0, %z6 \n" \ + " addiu %2, %2, 1 \n" \ + "3: \n" \ +- " bnez %4, 0b\n\t" \ +- " srl %5, %1, 0x1f\n\t" \ ++ " bnez %4, 0b \n" \ ++ " srl %5, %1, 0x1f \n" \ + " .set pop" \ + : "=&r" (__mod32), "=&r" (__tmp), \ + "=&r" (__quot32), "=&r" (__cf), \ + "=&r" (__i), "=&r" (__tmp2) \ +- : "Jr" (base), "0" (__high), "1" (__low)); \ ++ : "Jr" (base), "0" (high), "1" (low)); \ + \ +- (__n) = __quot32; \ ++ (res) = __quot32; \ + __mod32; \ + }) + +-#endif /* BITS_PER_LONG == 64 */ ++#define __div64_32(n, base) ({ \ ++ unsigned long __upper, __low, __high, __radix; \ ++ unsigned long long __modquot; \ ++ unsigned long long __quot; \ ++ unsigned long long __div; \ ++ unsigned long __mod; \ ++ \ ++ __div = (*n); \ ++ __radix = (base); \ ++ \ ++ __high = __div >> 32; \ ++ __low = __div; \ ++ __upper = __high; \ ++ \ ++ if (__high) { \ ++ __asm__("divu $0, %z1, %z2" \ ++ : "=x" (__modquot) \ ++ : "Jr" (__high), "Jr" (__radix)); \ ++ __upper = __modquot >> 32; \ ++ __high = __modquot; \ ++ } \ ++ \ ++ __mod = do_div64_32(__low, __upper, __low, __radix); \ ++ \ ++ __quot = __high; \ ++ __quot = __quot << 32 | __low; \ ++ (*n) = __quot; \ ++ __mod; \ ++}) ++ ++#endif /* BITS_PER_LONG == 32 */ ++ ++#include + + #endif /* __ASM_DIV64_H */ diff --git a/queue-5.10/mm-fix-struct-page-layout-on-32-bit-systems.patch b/queue-5.10/mm-fix-struct-page-layout-on-32-bit-systems.patch new file mode 100644 index 00000000000..d4229d559c5 --- /dev/null +++ b/queue-5.10/mm-fix-struct-page-layout-on-32-bit-systems.patch @@ -0,0 +1,116 @@ +From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Fri, 14 May 2021 17:27:24 -0700 +Subject: mm: fix struct page layout on 32-bit systems + +From: Matthew Wilcox (Oracle) + +commit 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 upstream. + +32-bit architectures which expect 8-byte alignment for 8-byte integers and +need 64-bit DMA addresses (arm, mips, ppc) had their struct page +inadvertently expanded in 2019. When the dma_addr_t was added, it forced +the alignment of the union to 8 bytes, which inserted a 4 byte gap between +'flags' and the union. + +Fix this by storing the dma_addr_t in one or two adjacent unsigned longs. +This restores the alignment to that of an unsigned long. We always +store the low bits in the first word to prevent the PageTail bit from +being inadvertently set on a big endian platform. If that happened, +get_user_pages_fast() racing against a page which was freed and +reallocated to the page_pool could dereference a bogus compound_head(), +which would be hard to trace back to this cause. + +Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org +Fixes: c25fff7171be ("mm: add dma_addr_t to struct page") +Signed-off-by: Matthew Wilcox (Oracle) +Acked-by: Ilias Apalodimas +Acked-by: Jesper Dangaard Brouer +Acked-by: Vlastimil Babka +Tested-by: Matteo Croce +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm_types.h | 4 ++-- + include/net/page_pool.h | 12 +++++++++++- + net/core/page_pool.c | 12 +++++++----- + 3 files changed, 20 insertions(+), 8 deletions(-) + +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -97,10 +97,10 @@ struct page { + }; + struct { /* page_pool used by netstack */ + /** +- * @dma_addr: might require a 64-bit value even on ++ * @dma_addr: might require a 64-bit value on + * 32-bit architectures. + */ +- dma_addr_t dma_addr; ++ unsigned long dma_addr[2]; + }; + struct { /* slab, slob and slub */ + union { +--- a/include/net/page_pool.h ++++ b/include/net/page_pool.h +@@ -191,7 +191,17 @@ static inline void page_pool_recycle_dir + + static inline dma_addr_t page_pool_get_dma_addr(struct page *page) + { +- return page->dma_addr; ++ dma_addr_t ret = page->dma_addr[0]; ++ if (sizeof(dma_addr_t) > sizeof(unsigned long)) ++ ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; ++ return ret; ++} ++ ++static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) ++{ ++ page->dma_addr[0] = addr; ++ if (sizeof(dma_addr_t) > sizeof(unsigned long)) ++ page->dma_addr[1] = upper_32_bits(addr); + } + + static inline bool is_page_pool_compiled_in(void) +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -172,8 +172,10 @@ static void page_pool_dma_sync_for_devic + struct page *page, + unsigned int dma_sync_size) + { ++ dma_addr_t dma_addr = page_pool_get_dma_addr(page); ++ + dma_sync_size = min(dma_sync_size, pool->p.max_len); +- dma_sync_single_range_for_device(pool->p.dev, page->dma_addr, ++ dma_sync_single_range_for_device(pool->p.dev, dma_addr, + pool->p.offset, dma_sync_size, + pool->p.dma_dir); + } +@@ -224,7 +226,7 @@ static struct page *__page_pool_alloc_pa + put_page(page); + return NULL; + } +- page->dma_addr = dma; ++ page_pool_set_dma_addr(page, dma); + + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, pool->p.max_len); +@@ -292,13 +294,13 @@ void page_pool_release_page(struct page_ + */ + goto skip_dma_unmap; + +- dma = page->dma_addr; ++ dma = page_pool_get_dma_addr(page); + +- /* When page is unmapped, it cannot be returned our pool */ ++ /* When page is unmapped, it cannot be returned to our pool */ + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC); +- page->dma_addr = 0; ++ page_pool_set_dma_addr(page, 0); + skip_dma_unmap: + /* This may be the last page returned, releasing the pool, so + * it is not safe to reference pool afterwards. diff --git a/queue-5.10/series b/queue-5.10/series index ad63dac676a..b644e897b47 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -262,3 +262,7 @@ revert-iommu-vt-d-remove-wo-permissions-on-second-level.patch revert-iommu-vt-d-preset-access-dirty-bits-for-iova.patch iommu-vt-d-preset-access-dirty-bits-for-iova-over-fl.patch iommu-vt-d-remove-wo-permissions-on-second-level-paging-entries.patch +mm-fix-struct-page-layout-on-32-bit-systems.patch +mips-reinstate-platform-__div64_32-handler.patch +mips-avoid-divu-in-__div64_32-is-result-would-be-zero.patch +mips-avoid-handcoded-divu-in-__div64_32-altogether.patch -- 2.47.3