From: Greg Kroah-Hartman Date: Mon, 15 May 2023 12:49:30 +0000 (+0200) Subject: 6.2-stable patches X-Git-Tag: v4.14.315~23 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=75c9323401a93717ee39f62fdce14fe5c9bb9b9d;p=thirdparty%2Fkernel%2Fstable-queue.git 6.2-stable patches added patches: spi-fsl-cpm-use-16-bit-mode-for-large-transfers-with-even-size.patch spi-fsl-spi-re-organise-transfer-bits_per_word-adaptation.patch x86-fix-clear_user_rep_good-exception-handling-annotation.patch --- diff --git a/queue-6.2/series b/queue-6.2/series index 5e131d6427d..85f3d446cc4 100644 --- a/queue-6.2/series +++ b/queue-6.2/series @@ -236,3 +236,6 @@ ext4-remove-a-bug_on-in-ext4_mb_release_group_pa.patch ext4-fix-invalid-free-tracking-in-ext4_xattr_move_to_block.patch drm-dsc-fix-dp_dsc_max_bpp_delta_-macro-values.patch x86-amd_nb-add-pci-id-for-family-19h-model-78h.patch +x86-fix-clear_user_rep_good-exception-handling-annotation.patch +spi-fsl-spi-re-organise-transfer-bits_per_word-adaptation.patch +spi-fsl-cpm-use-16-bit-mode-for-large-transfers-with-even-size.patch diff --git a/queue-6.2/spi-fsl-cpm-use-16-bit-mode-for-large-transfers-with-even-size.patch b/queue-6.2/spi-fsl-cpm-use-16-bit-mode-for-large-transfers-with-even-size.patch new file mode 100644 index 00000000000..ce873f13b89 --- /dev/null +++ b/queue-6.2/spi-fsl-cpm-use-16-bit-mode-for-large-transfers-with-even-size.patch @@ -0,0 +1,94 @@ +From fc96ec826bced75cc6b9c07a4ac44bbf651337ab Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Sat, 1 Apr 2023 19:59:48 +0200 +Subject: spi: fsl-cpm: Use 16 bit mode for large transfers with even size + +From: Christophe Leroy + +commit fc96ec826bced75cc6b9c07a4ac44bbf651337ab upstream. + +On CPM, the RISC core is a lot more efficiant when doing transfers +in 16-bits chunks than in 8-bits chunks, but unfortunately the +words need to be byte swapped as seen in a previous commit. + +So, for large tranfers with an even size, allocate a temporary tx +buffer and byte-swap data before and after transfer. + +This change allows setting higher speed for transfer. For instance +on an MPC 8xx (CPM1 comms RISC processor), the documentation tells +that transfer in byte mode at 1 kbit/s uses 0.200% of CPM load +at 25 MHz while a word transfer at the same speed uses 0.032% +of CPM load. This means the speed can be 6 times higher in +word mode for the same CPM load. + +For the time being, only do it on CPM1 as there must be a +trade-off between the CPM load reduction and the CPU load required +to byte swap the data. + +Signed-off-by: Christophe Leroy +Link: https://lore.kernel.org/r/f2e981f20f92dd28983c3949702a09248c23845c.1680371809.git.christophe.leroy@csgroup.eu +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi-fsl-cpm.c | 23 +++++++++++++++++++++++ + drivers/spi/spi-fsl-spi.c | 3 +++ + 2 files changed, 26 insertions(+) + +--- a/drivers/spi/spi-fsl-cpm.c ++++ b/drivers/spi/spi-fsl-cpm.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include "spi-fsl-cpm.h" + #include "spi-fsl-lib.h" +@@ -120,6 +121,21 @@ int fsl_spi_cpm_bufs(struct mpc8xxx_spi + mspi->rx_dma = mspi->dma_dummy_rx; + mspi->map_rx_dma = 0; + } ++ if (t->bits_per_word == 16 && t->tx_buf) { ++ const u16 *src = t->tx_buf; ++ u16 *dst; ++ int i; ++ ++ dst = kmalloc(t->len, GFP_KERNEL); ++ if (!dst) ++ return -ENOMEM; ++ ++ for (i = 0; i < t->len >> 1; i++) ++ dst[i] = cpu_to_le16p(src + i); ++ ++ mspi->tx = dst; ++ mspi->map_tx_dma = 1; ++ } + + if (mspi->map_tx_dma) { + void *nonconst_tx = (void *)mspi->tx; /* shut up gcc */ +@@ -173,6 +189,13 @@ void fsl_spi_cpm_bufs_complete(struct mp + if (mspi->map_rx_dma) + dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE); + mspi->xfer_in_progress = NULL; ++ ++ if (t->bits_per_word == 16 && t->rx_buf) { ++ int i; ++ ++ for (i = 0; i < t->len; i += 2) ++ le16_to_cpus(t->rx_buf + i); ++ } + } + EXPORT_SYMBOL_GPL(fsl_spi_cpm_bufs_complete); + +--- a/drivers/spi/spi-fsl-spi.c ++++ b/drivers/spi/spi-fsl-spi.c +@@ -351,6 +351,9 @@ static int fsl_spi_prepare_message(struc + return -EINVAL; + if (t->bits_per_word == 16 || t->bits_per_word == 32) + t->bits_per_word = 8; /* pretend its 8 bits */ ++ if (t->bits_per_word == 8 && t->len >= 256 && ++ (mpc8xxx_spi->flags & SPI_CPM1)) ++ t->bits_per_word = 16; + } + } + return fsl_spi_setup_transfer(m->spi, first); diff --git a/queue-6.2/spi-fsl-spi-re-organise-transfer-bits_per_word-adaptation.patch b/queue-6.2/spi-fsl-spi-re-organise-transfer-bits_per_word-adaptation.patch new file mode 100644 index 00000000000..d2544eb64b9 --- /dev/null +++ b/queue-6.2/spi-fsl-spi-re-organise-transfer-bits_per_word-adaptation.patch @@ -0,0 +1,105 @@ +From 8a5299a1278eadf1e08a598a5345c376206f171e Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Sat, 1 Apr 2023 19:59:47 +0200 +Subject: spi: fsl-spi: Re-organise transfer bits_per_word adaptation + +From: Christophe Leroy + +commit 8a5299a1278eadf1e08a598a5345c376206f171e upstream. + +For different reasons, fsl-spi driver performs bits_per_word +modifications for different reasons: +- On CPU mode, to minimise amount of interrupts +- On CPM/QE mode to work around controller byte order + +For CPU mode that's done in fsl_spi_prepare_message() while +for CPM mode that's done in fsl_spi_setup_transfer(). + +Reunify all of it in fsl_spi_prepare_message(), and catch +impossible cases early through master's bits_per_word_mask +instead of returning EINVAL later. + +Signed-off-by: Christophe Leroy +Link: https://lore.kernel.org/r/0ce96fe96e8b07cba0613e4097cfd94d09b8919a.1680371809.git.christophe.leroy@csgroup.eu +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi-fsl-spi.c | 46 +++++++++++++++++++++------------------------- + 1 file changed, 21 insertions(+), 25 deletions(-) + +--- a/drivers/spi/spi-fsl-spi.c ++++ b/drivers/spi/spi-fsl-spi.c +@@ -177,26 +177,6 @@ static int mspi_apply_cpu_mode_quirks(st + return bits_per_word; + } + +-static int mspi_apply_qe_mode_quirks(struct spi_mpc8xxx_cs *cs, +- struct spi_device *spi, +- int bits_per_word) +-{ +- /* CPM/QE uses Little Endian for words > 8 +- * so transform 16 and 32 bits words into 8 bits +- * Unfortnatly that doesn't work for LSB so +- * reject these for now */ +- /* Note: 32 bits word, LSB works iff +- * tfcr/rfcr is set to CPMFCR_GBL */ +- if (spi->mode & SPI_LSB_FIRST && +- bits_per_word > 8) +- return -EINVAL; +- if (bits_per_word <= 8) +- return bits_per_word; +- if (bits_per_word == 16 || bits_per_word == 32) +- return 8; /* pretend its 8 bits */ +- return -EINVAL; +-} +- + static int fsl_spi_setup_transfer(struct spi_device *spi, + struct spi_transfer *t) + { +@@ -224,9 +204,6 @@ static int fsl_spi_setup_transfer(struct + bits_per_word = mspi_apply_cpu_mode_quirks(cs, spi, + mpc8xxx_spi, + bits_per_word); +- else +- bits_per_word = mspi_apply_qe_mode_quirks(cs, spi, +- bits_per_word); + + if (bits_per_word < 0) + return bits_per_word; +@@ -361,6 +338,19 @@ static int fsl_spi_prepare_message(struc + t->bits_per_word = 32; + else if ((t->len & 1) == 0) + t->bits_per_word = 16; ++ } else { ++ /* ++ * CPM/QE uses Little Endian for words > 8 ++ * so transform 16 and 32 bits words into 8 bits ++ * Unfortnatly that doesn't work for LSB so ++ * reject these for now ++ * Note: 32 bits word, LSB works iff ++ * tfcr/rfcr is set to CPMFCR_GBL ++ */ ++ if (m->spi->mode & SPI_LSB_FIRST && t->bits_per_word > 8) ++ return -EINVAL; ++ if (t->bits_per_word == 16 || t->bits_per_word == 32) ++ t->bits_per_word = 8; /* pretend its 8 bits */ + } + } + return fsl_spi_setup_transfer(m->spi, first); +@@ -594,8 +584,14 @@ static struct spi_master *fsl_spi_probe( + if (mpc8xxx_spi->type == TYPE_GRLIB) + fsl_spi_grlib_probe(dev); + +- master->bits_per_word_mask = +- (SPI_BPW_RANGE_MASK(4, 16) | SPI_BPW_MASK(32)) & ++ if (mpc8xxx_spi->flags & SPI_CPM_MODE) ++ master->bits_per_word_mask = ++ (SPI_BPW_RANGE_MASK(4, 8) | SPI_BPW_MASK(16) | SPI_BPW_MASK(32)); ++ else ++ master->bits_per_word_mask = ++ (SPI_BPW_RANGE_MASK(4, 16) | SPI_BPW_MASK(32)); ++ ++ master->bits_per_word_mask &= + SPI_BPW_RANGE_MASK(1, mpc8xxx_spi->max_bits_per_word); + + if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) diff --git a/queue-6.2/x86-fix-clear_user_rep_good-exception-handling-annotation.patch b/queue-6.2/x86-fix-clear_user_rep_good-exception-handling-annotation.patch new file mode 100644 index 00000000000..957bad252d1 --- /dev/null +++ b/queue-6.2/x86-fix-clear_user_rep_good-exception-handling-annotation.patch @@ -0,0 +1,97 @@ +From 6904bfe4df72f9cb8e5fb465e1075b39402c5c6c Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sun, 14 May 2023 15:46:19 -0700 +Subject: x86: fix clear_user_rep_good() exception handling annotation + +From: Linus Torvalds + +This code no longer exists in mainline, because it was removed in +commit d2c95f9d6802 ("x86: don't use REP_GOOD or ERMS for user memory +clearing") upstream. + +However, rather than backport the full range of x86 memory clearing and +copying cleanups, fix the exception table annotation placement for the +final 'rep movsb' in clear_user_rep_good(): rather than pointing at the +actual instruction that did the user space access, it pointed to the +register move just before it. + +That made sense from a code flow standpoint, but not from an actual +usage standpoint: it means that if user access takes an exception, the +exception handler won't actually find the instruction in the exception +tables. + +As a result, rather than fixing it up and returning -EFAULT, it would +then turn it into a kernel oops report instead, something like: + + BUG: unable to handle page fault for address: 0000000020081000 + #PF: supervisor write access in kernel mode + #PF: error_code(0x0002) - not-present page + ... + RIP: 0010:clear_user_rep_good+0x1c/0x30 arch/x86/lib/clear_page_64.S:147 + ... + Call Trace: + __clear_user arch/x86/include/asm/uaccess_64.h:103 [inline] + clear_user arch/x86/include/asm/uaccess_64.h:124 [inline] + iov_iter_zero+0x709/0x1290 lib/iov_iter.c:800 + iomap_dio_hole_iter fs/iomap/direct-io.c:389 [inline] + iomap_dio_iter fs/iomap/direct-io.c:440 [inline] + __iomap_dio_rw+0xe3d/0x1cd0 fs/iomap/direct-io.c:601 + iomap_dio_rw+0x40/0xa0 fs/iomap/direct-io.c:689 + ext4_dio_read_iter fs/ext4/file.c:94 [inline] + ext4_file_read_iter+0x4be/0x690 fs/ext4/file.c:145 + call_read_iter include/linux/fs.h:2183 [inline] + do_iter_readv_writev+0x2e0/0x3b0 fs/read_write.c:733 + do_iter_read+0x2f2/0x750 fs/read_write.c:796 + vfs_readv+0xe5/0x150 fs/read_write.c:916 + do_preadv+0x1b6/0x270 fs/read_write.c:1008 + __do_sys_preadv2 fs/read_write.c:1070 [inline] + __se_sys_preadv2 fs/read_write.c:1061 [inline] + __x64_sys_preadv2+0xef/0x150 fs/read_write.c:1061 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +which then looks like a filesystem bug rather than the incorrect +exception annotation that it is. + +[ The alternative to this one-liner fix is to take the upstream series + that cleans this all up: + + 68674f94ffc9 ("x86: don't use REP_GOOD or ERMS for small memory copies") + 20f3337d350c ("x86: don't use REP_GOOD or ERMS for small memory clearing") + adfcf4231b8c ("x86: don't use REP_GOOD or ERMS for user memory copies") + * d2c95f9d6802 ("x86: don't use REP_GOOD or ERMS for user memory clearing") + 3639a535587d ("x86: move stac/clac from user copy routines into callers") + 577e6a7fd50d ("x86: inline the 'rep movs' in user copies for the FSRM case") + 8c9b6a88b7e2 ("x86: improve on the non-rep 'clear_user' function") + 427fda2c8a49 ("x86: improve on the non-rep 'copy_user' function") + * e046fe5a36a9 ("x86: set FSRS automatically on AMD CPUs that have FSRM") + e1f2750edc4a ("x86: remove 'zerorest' argument from __copy_user_nocache()") + 034ff37d3407 ("x86: rewrite '__copy_user_nocache' function") + + with either the whole series or at a minimum the two marked commits + being needed to fix this issue ] + +Reported-by: syzbot +Link: https://syzkaller.appspot.com/bug?extid=401145a9a237779feb26 +Fixes: 0db7058e8e23 ("x86/clear_user: Make it faster") +Cc: Borislav Petkov +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/lib/clear_page_64.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/lib/clear_page_64.S ++++ b/arch/x86/lib/clear_page_64.S +@@ -142,8 +142,8 @@ SYM_FUNC_START(clear_user_rep_good) + and $7, %edx + jz .Lrep_good_exit + +-.Lrep_good_bytes: + mov %edx, %ecx ++.Lrep_good_bytes: + rep stosb + + .Lrep_good_exit: