From: Sasha Levin Date: Sat, 21 Oct 2023 00:51:08 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v4.14.328~73 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ed23856c99dabe0dd00a7b52c55775cc9eb0fe0d;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/arm-dts-ti-omap-fix-noisy-serial-with-overrun-thrott.patch b/queue-6.1/arm-dts-ti-omap-fix-noisy-serial-with-overrun-thrott.patch new file mode 100644 index 00000000000..a2d3ef2a996 --- /dev/null +++ b/queue-6.1/arm-dts-ti-omap-fix-noisy-serial-with-overrun-thrott.patch @@ -0,0 +1,44 @@ +From c4474412884b2a90a28fca2cd771a4bbb7031e40 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Sep 2023 07:07:38 +0300 +Subject: ARM: dts: ti: omap: Fix noisy serial with overrun-throttle-ms for + mapphone + +From: Tony Lindgren + +[ Upstream commit 5ad37b5e30433afa7a5513e3eb61f69fa0976785 ] + +On mapphone devices we may get lots of noise on the micro-USB port in debug +uart mode until the phy-cpcap-usb driver probes. Let's limit the noise by +using overrun-throttle-ms. + +Note that there is also a related separate issue where the charger cable +connected may cause random sysrq requests until phy-cpcap-usb probes that +still remains. + +Cc: Ivaylo Dimitrov +Cc: Carl Philipp Klemm +Cc: Merlijn Wajer +Cc: Pavel Machek +Reviewed-by: Sebastian Reichel +Signed-off-by: Tony Lindgren +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/motorola-mapphone-common.dtsi | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm/boot/dts/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/motorola-mapphone-common.dtsi +index d69f0f4b4990d..d2d516d113baa 100644 +--- a/arch/arm/boot/dts/motorola-mapphone-common.dtsi ++++ b/arch/arm/boot/dts/motorola-mapphone-common.dtsi +@@ -640,6 +640,7 @@ &uart1 { + &uart3 { + interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH + &omap4_pmx_core 0x17c>; ++ overrun-throttle-ms = <500>; + }; + + &uart4 { +-- +2.40.1 + diff --git a/queue-6.1/ata-libata-core-fix-compilation-warning-in-ata_dev_c.patch b/queue-6.1/ata-libata-core-fix-compilation-warning-in-ata_dev_c.patch new file mode 100644 index 00000000000..dc63d06efbe --- /dev/null +++ b/queue-6.1/ata-libata-core-fix-compilation-warning-in-ata_dev_c.patch @@ -0,0 +1,60 @@ +From d2db047955304f59501d007f8c92d57d382f554e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Sep 2023 08:46:22 +0900 +Subject: ata: libata-core: Fix compilation warning in ata_dev_config_ncq() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Damien Le Moal + +[ Upstream commit ed518d9ba980dc0d27c7d1dea1e627ba001d1977 ] + +The 24 bytes length allocated to the ncq_desc string in +ata_dev_config_lba() for ata_dev_config_ncq() to use is too short, +causing the following gcc compilation warnings when compiling with W=1: + +drivers/ata/libata-core.c: In function ‘ata_dev_configure’: +drivers/ata/libata-core.c:2378:56: warning: ‘%d’ directive output may be truncated writing between 1 and 2 bytes into a region of size between 1 and 11 [-Wformat-truncation=] + 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth, + | ^~ +In function ‘ata_dev_config_ncq’, + inlined from ‘ata_dev_config_lba’ at drivers/ata/libata-core.c:2649:8, + inlined from ‘ata_dev_configure’ at drivers/ata/libata-core.c:2952:9: +drivers/ata/libata-core.c:2378:41: note: directive argument in the range [1, 32] + 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth, + | ^~~~~~~~~~~~~~~~~~~~~ +drivers/ata/libata-core.c:2378:17: note: ‘snprintf’ output between 16 and 31 bytes into a destination of size 24 + 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth, + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 2379 | ddepth, aa_desc); + | ~~~~~~~~~~~~~~~~ + +Avoid these warnings and the potential truncation by changing the size +of the ncq_desc string to 32 characters. + +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Tested-by: Geert Uytterhoeven +Reviewed-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ata/libata-core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c +index fbc231a3f7951..fa2fc1953fc26 100644 +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -2456,7 +2456,7 @@ static int ata_dev_config_lba(struct ata_device *dev) + { + const u16 *id = dev->id; + const char *lba_desc; +- char ncq_desc[24]; ++ char ncq_desc[32]; + int ret; + + dev->flags |= ATA_DFLAG_LBA; +-- +2.40.1 + diff --git a/queue-6.1/ata-libata-eh-fix-compilation-warning-in-ata_eh_link.patch b/queue-6.1/ata-libata-eh-fix-compilation-warning-in-ata_eh_link.patch new file mode 100644 index 00000000000..4cdfe5dbca9 --- /dev/null +++ b/queue-6.1/ata-libata-eh-fix-compilation-warning-in-ata_eh_link.patch @@ -0,0 +1,55 @@ +From d61d50c0572fca4d03e37735c22eea57d8dda8ec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Sep 2023 09:08:40 +0900 +Subject: ata: libata-eh: Fix compilation warning in ata_eh_link_report() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Damien Le Moal + +[ Upstream commit 49728bdc702391902a473b9393f1620eea32acb0 ] + +The 6 bytes length of the tries_buf string in ata_eh_link_report() is +too short and results in a gcc compilation warning with W-!: + +drivers/ata/libata-eh.c: In function ‘ata_eh_link_report’: +drivers/ata/libata-eh.c:2371:59: warning: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 4 [-Wformat-truncation=] + 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d", + | ^~ +drivers/ata/libata-eh.c:2371:56: note: directive argument in the range [-2147483648, 4] + 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d", + | ^~~~~~ +drivers/ata/libata-eh.c:2371:17: note: ‘snprintf’ output between 4 and 14 bytes into a destination of size 6 + 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d", + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 2372 | ap->eh_tries); + | ~~~~~~~~~~~~~ + +Avoid this warning by increasing the string size to 16B. + +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Tested-by: Geert Uytterhoeven +Reviewed-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ata/libata-eh.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c +index 2a04dd36a4948..1eaaf01418ea7 100644 +--- a/drivers/ata/libata-eh.c ++++ b/drivers/ata/libata-eh.c +@@ -2247,7 +2247,7 @@ static void ata_eh_link_report(struct ata_link *link) + struct ata_eh_context *ehc = &link->eh_context; + struct ata_queued_cmd *qc; + const char *frozen, *desc; +- char tries_buf[6] = ""; ++ char tries_buf[16] = ""; + int tag, nr_failed = 0; + + if (ehc->i.flags & ATA_EHI_QUIET) +-- +2.40.1 + diff --git a/queue-6.1/atomics-provide-atomic_add_negative-variants.patch b/queue-6.1/atomics-provide-atomic_add_negative-variants.patch new file mode 100644 index 00000000000..b8689ca2f17 --- /dev/null +++ b/queue-6.1/atomics-provide-atomic_add_negative-variants.patch @@ -0,0 +1,481 @@ +From 2194a9643e933a16a92f83d3859f3916f95a5e42 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Mar 2023 21:55:30 +0100 +Subject: atomics: Provide atomic_add_negative() variants + +From: Thomas Gleixner + +[ Upstream commit e5ab9eff46b04c5a04778e40d7092fed3fda52ca ] + +atomic_add_negative() does not provide the relaxed/acquire/release +variants. + +Provide them in preparation for a new scalable reference count algorithm. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Mark Rutland +Link: https://lore.kernel.org/r/20230323102800.101763813@linutronix.de +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + include/linux/atomic/atomic-arch-fallback.h | 208 +++++++++++++++++++- + include/linux/atomic/atomic-instrumented.h | 68 ++++++- + include/linux/atomic/atomic-long.h | 38 +++- + scripts/atomic/atomics.tbl | 2 +- + scripts/atomic/fallbacks/add_negative | 11 +- + 5 files changed, 309 insertions(+), 18 deletions(-) + +diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h +index 77bc5522e61c6..4226379a232d5 100644 +--- a/include/linux/atomic/atomic-arch-fallback.h ++++ b/include/linux/atomic/atomic-arch-fallback.h +@@ -1208,15 +1208,21 @@ arch_atomic_inc_and_test(atomic_t *v) + #define arch_atomic_inc_and_test arch_atomic_inc_and_test + #endif + ++#ifndef arch_atomic_add_negative_relaxed ++#ifdef arch_atomic_add_negative ++#define arch_atomic_add_negative_acquire arch_atomic_add_negative ++#define arch_atomic_add_negative_release arch_atomic_add_negative ++#define arch_atomic_add_negative_relaxed arch_atomic_add_negative ++#endif /* arch_atomic_add_negative */ ++ + #ifndef arch_atomic_add_negative + /** +- * arch_atomic_add_negative - add and test if negative ++ * arch_atomic_add_negative - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * +- * Atomically adds @i to @v and returns true +- * if the result is negative, or false when +- * result is greater than or equal to zero. ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. + */ + static __always_inline bool + arch_atomic_add_negative(int i, atomic_t *v) +@@ -1226,6 +1232,95 @@ arch_atomic_add_negative(int i, atomic_t *v) + #define arch_atomic_add_negative arch_atomic_add_negative + #endif + ++#ifndef arch_atomic_add_negative_acquire ++/** ++ * arch_atomic_add_negative_acquire - Add and test if negative ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. ++ */ ++static __always_inline bool ++arch_atomic_add_negative_acquire(int i, atomic_t *v) ++{ ++ return arch_atomic_add_return_acquire(i, v) < 0; ++} ++#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire ++#endif ++ ++#ifndef arch_atomic_add_negative_release ++/** ++ * arch_atomic_add_negative_release - Add and test if negative ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. ++ */ ++static __always_inline bool ++arch_atomic_add_negative_release(int i, atomic_t *v) ++{ ++ return arch_atomic_add_return_release(i, v) < 0; ++} ++#define arch_atomic_add_negative_release arch_atomic_add_negative_release ++#endif ++ ++#ifndef arch_atomic_add_negative_relaxed ++/** ++ * arch_atomic_add_negative_relaxed - Add and test if negative ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. ++ */ ++static __always_inline bool ++arch_atomic_add_negative_relaxed(int i, atomic_t *v) ++{ ++ return arch_atomic_add_return_relaxed(i, v) < 0; ++} ++#define arch_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed ++#endif ++ ++#else /* arch_atomic_add_negative_relaxed */ ++ ++#ifndef arch_atomic_add_negative_acquire ++static __always_inline bool ++arch_atomic_add_negative_acquire(int i, atomic_t *v) ++{ ++ bool ret = arch_atomic_add_negative_relaxed(i, v); ++ __atomic_acquire_fence(); ++ return ret; ++} ++#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire ++#endif ++ ++#ifndef arch_atomic_add_negative_release ++static __always_inline bool ++arch_atomic_add_negative_release(int i, atomic_t *v) ++{ ++ __atomic_release_fence(); ++ return arch_atomic_add_negative_relaxed(i, v); ++} ++#define arch_atomic_add_negative_release arch_atomic_add_negative_release ++#endif ++ ++#ifndef arch_atomic_add_negative ++static __always_inline bool ++arch_atomic_add_negative(int i, atomic_t *v) ++{ ++ bool ret; ++ __atomic_pre_full_fence(); ++ ret = arch_atomic_add_negative_relaxed(i, v); ++ __atomic_post_full_fence(); ++ return ret; ++} ++#define arch_atomic_add_negative arch_atomic_add_negative ++#endif ++ ++#endif /* arch_atomic_add_negative_relaxed */ ++ + #ifndef arch_atomic_fetch_add_unless + /** + * arch_atomic_fetch_add_unless - add unless the number is already a given value +@@ -2329,15 +2424,21 @@ arch_atomic64_inc_and_test(atomic64_t *v) + #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test + #endif + ++#ifndef arch_atomic64_add_negative_relaxed ++#ifdef arch_atomic64_add_negative ++#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative ++#define arch_atomic64_add_negative_release arch_atomic64_add_negative ++#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative ++#endif /* arch_atomic64_add_negative */ ++ + #ifndef arch_atomic64_add_negative + /** +- * arch_atomic64_add_negative - add and test if negative ++ * arch_atomic64_add_negative - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * +- * Atomically adds @i to @v and returns true +- * if the result is negative, or false when +- * result is greater than or equal to zero. ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. + */ + static __always_inline bool + arch_atomic64_add_negative(s64 i, atomic64_t *v) +@@ -2347,6 +2448,95 @@ arch_atomic64_add_negative(s64 i, atomic64_t *v) + #define arch_atomic64_add_negative arch_atomic64_add_negative + #endif + ++#ifndef arch_atomic64_add_negative_acquire ++/** ++ * arch_atomic64_add_negative_acquire - Add and test if negative ++ * @i: integer value to add ++ * @v: pointer of type atomic64_t ++ * ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. ++ */ ++static __always_inline bool ++arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) ++{ ++ return arch_atomic64_add_return_acquire(i, v) < 0; ++} ++#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire ++#endif ++ ++#ifndef arch_atomic64_add_negative_release ++/** ++ * arch_atomic64_add_negative_release - Add and test if negative ++ * @i: integer value to add ++ * @v: pointer of type atomic64_t ++ * ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. ++ */ ++static __always_inline bool ++arch_atomic64_add_negative_release(s64 i, atomic64_t *v) ++{ ++ return arch_atomic64_add_return_release(i, v) < 0; ++} ++#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release ++#endif ++ ++#ifndef arch_atomic64_add_negative_relaxed ++/** ++ * arch_atomic64_add_negative_relaxed - Add and test if negative ++ * @i: integer value to add ++ * @v: pointer of type atomic64_t ++ * ++ * Atomically adds @i to @v and returns true if the result is negative, ++ * or false when the result is greater than or equal to zero. ++ */ ++static __always_inline bool ++arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) ++{ ++ return arch_atomic64_add_return_relaxed(i, v) < 0; ++} ++#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed ++#endif ++ ++#else /* arch_atomic64_add_negative_relaxed */ ++ ++#ifndef arch_atomic64_add_negative_acquire ++static __always_inline bool ++arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) ++{ ++ bool ret = arch_atomic64_add_negative_relaxed(i, v); ++ __atomic_acquire_fence(); ++ return ret; ++} ++#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire ++#endif ++ ++#ifndef arch_atomic64_add_negative_release ++static __always_inline bool ++arch_atomic64_add_negative_release(s64 i, atomic64_t *v) ++{ ++ __atomic_release_fence(); ++ return arch_atomic64_add_negative_relaxed(i, v); ++} ++#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release ++#endif ++ ++#ifndef arch_atomic64_add_negative ++static __always_inline bool ++arch_atomic64_add_negative(s64 i, atomic64_t *v) ++{ ++ bool ret; ++ __atomic_pre_full_fence(); ++ ret = arch_atomic64_add_negative_relaxed(i, v); ++ __atomic_post_full_fence(); ++ return ret; ++} ++#define arch_atomic64_add_negative arch_atomic64_add_negative ++#endif ++ ++#endif /* arch_atomic64_add_negative_relaxed */ ++ + #ifndef arch_atomic64_fetch_add_unless + /** + * arch_atomic64_fetch_add_unless - add unless the number is already a given value +@@ -2456,4 +2646,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) + #endif + + #endif /* _LINUX_ATOMIC_FALLBACK_H */ +-// b5e87bdd5ede61470c29f7a7e4de781af3770f09 ++// 00071fffa021cec66f6290d706d69c91df87bade +diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h +index 7a139ec030b0c..0496816738ca9 100644 +--- a/include/linux/atomic/atomic-instrumented.h ++++ b/include/linux/atomic/atomic-instrumented.h +@@ -592,6 +592,28 @@ atomic_add_negative(int i, atomic_t *v) + return arch_atomic_add_negative(i, v); + } + ++static __always_inline bool ++atomic_add_negative_acquire(int i, atomic_t *v) ++{ ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic_add_negative_acquire(i, v); ++} ++ ++static __always_inline bool ++atomic_add_negative_release(int i, atomic_t *v) ++{ ++ kcsan_release(); ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic_add_negative_release(i, v); ++} ++ ++static __always_inline bool ++atomic_add_negative_relaxed(int i, atomic_t *v) ++{ ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic_add_negative_relaxed(i, v); ++} ++ + static __always_inline int + atomic_fetch_add_unless(atomic_t *v, int a, int u) + { +@@ -1211,6 +1233,28 @@ atomic64_add_negative(s64 i, atomic64_t *v) + return arch_atomic64_add_negative(i, v); + } + ++static __always_inline bool ++atomic64_add_negative_acquire(s64 i, atomic64_t *v) ++{ ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic64_add_negative_acquire(i, v); ++} ++ ++static __always_inline bool ++atomic64_add_negative_release(s64 i, atomic64_t *v) ++{ ++ kcsan_release(); ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic64_add_negative_release(i, v); ++} ++ ++static __always_inline bool ++atomic64_add_negative_relaxed(s64 i, atomic64_t *v) ++{ ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic64_add_negative_relaxed(i, v); ++} ++ + static __always_inline s64 + atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) + { +@@ -1830,6 +1874,28 @@ atomic_long_add_negative(long i, atomic_long_t *v) + return arch_atomic_long_add_negative(i, v); + } + ++static __always_inline bool ++atomic_long_add_negative_acquire(long i, atomic_long_t *v) ++{ ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic_long_add_negative_acquire(i, v); ++} ++ ++static __always_inline bool ++atomic_long_add_negative_release(long i, atomic_long_t *v) ++{ ++ kcsan_release(); ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic_long_add_negative_release(i, v); ++} ++ ++static __always_inline bool ++atomic_long_add_negative_relaxed(long i, atomic_long_t *v) ++{ ++ instrument_atomic_read_write(v, sizeof(*v)); ++ return arch_atomic_long_add_negative_relaxed(i, v); ++} ++ + static __always_inline long + atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) + { +@@ -2083,4 +2149,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) + }) + + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ +-// 764f741eb77a7ad565dc8d99ce2837d5542e8aee ++// 1b485de9cbaa4900de59e14ee2084357eaeb1c3a +diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h +index 800b8c35992d1..2fc51ba66bebd 100644 +--- a/include/linux/atomic/atomic-long.h ++++ b/include/linux/atomic/atomic-long.h +@@ -479,6 +479,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v) + return arch_atomic64_add_negative(i, v); + } + ++static __always_inline bool ++arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) ++{ ++ return arch_atomic64_add_negative_acquire(i, v); ++} ++ ++static __always_inline bool ++arch_atomic_long_add_negative_release(long i, atomic_long_t *v) ++{ ++ return arch_atomic64_add_negative_release(i, v); ++} ++ ++static __always_inline bool ++arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) ++{ ++ return arch_atomic64_add_negative_relaxed(i, v); ++} ++ + static __always_inline long + arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) + { +@@ -973,6 +991,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v) + return arch_atomic_add_negative(i, v); + } + ++static __always_inline bool ++arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) ++{ ++ return arch_atomic_add_negative_acquire(i, v); ++} ++ ++static __always_inline bool ++arch_atomic_long_add_negative_release(long i, atomic_long_t *v) ++{ ++ return arch_atomic_add_negative_release(i, v); ++} ++ ++static __always_inline bool ++arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) ++{ ++ return arch_atomic_add_negative_relaxed(i, v); ++} ++ + static __always_inline long + arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) + { +@@ -1011,4 +1047,4 @@ arch_atomic_long_dec_if_positive(atomic_long_t *v) + + #endif /* CONFIG_64BIT */ + #endif /* _LINUX_ATOMIC_LONG_H */ +-// e8f0e08ff072b74d180eabe2ad001282b38c2c88 ++// a194c07d7d2f4b0e178d3c118c919775d5d65f50 +diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl +index fbee2f6190d9e..85ca8d9b5c279 100644 +--- a/scripts/atomic/atomics.tbl ++++ b/scripts/atomic/atomics.tbl +@@ -33,7 +33,7 @@ try_cmpxchg B v p:old i:new + sub_and_test b i v + dec_and_test b v + inc_and_test b v +-add_negative b i v ++add_negative B i v + add_unless fb v i:a i:u + inc_not_zero b v + inc_unless_negative b v +diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative +index 15caa2eb23712..e5980abf5904e 100755 +--- a/scripts/atomic/fallbacks/add_negative ++++ b/scripts/atomic/fallbacks/add_negative +@@ -1,16 +1,15 @@ + cat < +Date: Thu, 23 Mar 2023 21:55:31 +0100 +Subject: atomics: Provide rcuref - scalable reference counting + +From: Thomas Gleixner + +[ Upstream commit ee1ee6db07795d9637bc5e8993a8ddcf886541ef ] + +atomic_t based reference counting, including refcount_t, uses +atomic_inc_not_zero() for acquiring a reference. atomic_inc_not_zero() is +implemented with a atomic_try_cmpxchg() loop. High contention of the +reference count leads to retry loops and scales badly. There is nothing to +improve on this implementation as the semantics have to be preserved. + +Provide rcuref as a scalable alternative solution which is suitable for RCU +managed objects. Similar to refcount_t it comes with overflow and underflow +detection and mitigation. + +rcuref treats the underlying atomic_t as an unsigned integer and partitions +this space into zones: + + 0x00000000 - 0x7FFFFFFF valid zone (1 .. (INT_MAX + 1) references) + 0x80000000 - 0xBFFFFFFF saturation zone + 0xC0000000 - 0xFFFFFFFE dead zone + 0xFFFFFFFF no reference + +rcuref_get() unconditionally increments the reference count with +atomic_add_negative_relaxed(). rcuref_put() unconditionally decrements the +reference count with atomic_add_negative_release(). + +This unconditional increment avoids the inc_not_zero() problem, but +requires a more complex implementation on the put() side when the count +drops from 0 to -1. + +When this transition is detected then it is attempted to mark the reference +count dead, by setting it to the midpoint of the dead zone with a single +atomic_cmpxchg_release() operation. This operation can fail due to a +concurrent rcuref_get() elevating the reference count from -1 to 0 again. + +If the unconditional increment in rcuref_get() hits a reference count which +is marked dead (or saturated) it will detect it after the fact and bring +back the reference count to the midpoint of the respective zone. The zones +provide enough tolerance which makes it practically impossible to escape +from a zone. + +The racy implementation of rcuref_put() requires to protect rcuref_put() +against a grace period ending in order to prevent a subtle use after +free. As RCU is the only mechanism which allows to protect against that, it +is not possible to fully replace the atomic_inc_not_zero() based +implementation of refcount_t with this scheme. + +The final drop is slightly more expensive than the atomic_dec_return() +counterpart, but that's not the case which this is optimized for. The +optimization is on the high frequeunt get()/put() pairs and their +scalability. + +The performance of an uncontended rcuref_get()/put() pair where the put() +is not dropping the last reference is still on par with the plain atomic +operations, while at the same time providing overflow and underflow +detection and mitigation. + +The performance of rcuref compared to plain atomic_inc_not_zero() and +atomic_dec_return() based reference counting under contention: + + - Micro benchmark: All CPUs running a increment/decrement loop on an + elevated reference count, which means the 0 to -1 transition never + happens. + + The performance gain depends on microarchitecture and the number of + CPUs and has been observed in the range of 1.3X to 4.7X + + - Conversion of dst_entry::__refcnt to rcuref and testing with the + localhost memtier/memcached benchmark. That benchmark shows the + reference count contention prominently. + + The performance gain depends on microarchitecture and the number of + CPUs and has been observed in the range of 1.1X to 2.6X over the + previous fix for the false sharing issue vs. struct + dst_entry::__refcnt. + + When memtier is run over a real 1Gb network connection, there is a + small gain on top of the false sharing fix. The two changes combined + result in a 2%-5% total gain for that networked test. + +Reported-by: Wangyang Guo +Reported-by: Arjan Van De Ven +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20230323102800.158429195@linutronix.de +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + include/linux/rcuref.h | 155 +++++++++++++++++++++++ + include/linux/types.h | 6 + + lib/Makefile | 2 +- + lib/rcuref.c | 281 +++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 443 insertions(+), 1 deletion(-) + create mode 100644 include/linux/rcuref.h + create mode 100644 lib/rcuref.c + +diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h +new file mode 100644 +index 0000000000000..2c8bfd0f1b6b3 +--- /dev/null ++++ b/include/linux/rcuref.h +@@ -0,0 +1,155 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++#ifndef _LINUX_RCUREF_H ++#define _LINUX_RCUREF_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RCUREF_ONEREF 0x00000000U ++#define RCUREF_MAXREF 0x7FFFFFFFU ++#define RCUREF_SATURATED 0xA0000000U ++#define RCUREF_RELEASED 0xC0000000U ++#define RCUREF_DEAD 0xE0000000U ++#define RCUREF_NOREF 0xFFFFFFFFU ++ ++/** ++ * rcuref_init - Initialize a rcuref reference count with the given reference count ++ * @ref: Pointer to the reference count ++ * @cnt: The initial reference count typically '1' ++ */ ++static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) ++{ ++ atomic_set(&ref->refcnt, cnt - 1); ++} ++ ++/** ++ * rcuref_read - Read the number of held reference counts of a rcuref ++ * @ref: Pointer to the reference count ++ * ++ * Return: The number of held references (0 ... N) ++ */ ++static inline unsigned int rcuref_read(rcuref_t *ref) ++{ ++ unsigned int c = atomic_read(&ref->refcnt); ++ ++ /* Return 0 if within the DEAD zone. */ ++ return c >= RCUREF_RELEASED ? 0 : c + 1; ++} ++ ++extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); ++ ++/** ++ * rcuref_get - Acquire one reference on a rcuref reference count ++ * @ref: Pointer to the reference count ++ * ++ * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF. ++ * ++ * Provides no memory ordering, it is assumed the caller has guaranteed the ++ * object memory to be stable (RCU, etc.). It does provide a control dependency ++ * and thereby orders future stores. See documentation in lib/rcuref.c ++ * ++ * Return: ++ * False if the attempt to acquire a reference failed. This happens ++ * when the last reference has been put already ++ * ++ * True if a reference was successfully acquired ++ */ ++static inline __must_check bool rcuref_get(rcuref_t *ref) ++{ ++ /* ++ * Unconditionally increase the reference count. The saturation and ++ * dead zones provide enough tolerance for this. ++ */ ++ if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt))) ++ return true; ++ ++ /* Handle the cases inside the saturation and dead zones */ ++ return rcuref_get_slowpath(ref); ++} ++ ++extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); ++ ++/* ++ * Internal helper. Do not invoke directly. ++ */ ++static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) ++{ ++ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), ++ "suspicious rcuref_put_rcusafe() usage"); ++ /* ++ * Unconditionally decrease the reference count. The saturation and ++ * dead zones provide enough tolerance for this. ++ */ ++ if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) ++ return false; ++ ++ /* ++ * Handle the last reference drop and cases inside the saturation ++ * and dead zones. ++ */ ++ return rcuref_put_slowpath(ref); ++} ++ ++/** ++ * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe ++ * @ref: Pointer to the reference count ++ * ++ * Provides release memory ordering, such that prior loads and stores are done ++ * before, and provides an acquire ordering on success such that free() ++ * must come after. ++ * ++ * Can be invoked from contexts, which guarantee that no grace period can ++ * happen which would free the object concurrently if the decrement drops ++ * the last reference and the slowpath races against a concurrent get() and ++ * put() pair. rcu_read_lock()'ed and atomic contexts qualify. ++ * ++ * Return: ++ * True if this was the last reference with no future references ++ * possible. This signals the caller that it can safely release the ++ * object which is protected by the reference counter. ++ * ++ * False if there are still active references or the put() raced ++ * with a concurrent get()/put() pair. Caller is not allowed to ++ * release the protected object. ++ */ ++static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref) ++{ ++ return __rcuref_put(ref); ++} ++ ++/** ++ * rcuref_put -- Release one reference for a rcuref reference count ++ * @ref: Pointer to the reference count ++ * ++ * Can be invoked from any context. ++ * ++ * Provides release memory ordering, such that prior loads and stores are done ++ * before, and provides an acquire ordering on success such that free() ++ * must come after. ++ * ++ * Return: ++ * ++ * True if this was the last reference with no future references ++ * possible. This signals the caller that it can safely schedule the ++ * object, which is protected by the reference counter, for ++ * deconstruction. ++ * ++ * False if there are still active references or the put() raced ++ * with a concurrent get()/put() pair. Caller is not allowed to ++ * deconstruct the protected object. ++ */ ++static inline __must_check bool rcuref_put(rcuref_t *ref) ++{ ++ bool released; ++ ++ preempt_disable(); ++ released = __rcuref_put(ref); ++ preempt_enable(); ++ return released; ++} ++ ++#endif +diff --git a/include/linux/types.h b/include/linux/types.h +index ea8cf60a8a795..688fb943556a1 100644 +--- a/include/linux/types.h ++++ b/include/linux/types.h +@@ -175,6 +175,12 @@ typedef struct { + } atomic64_t; + #endif + ++typedef struct { ++ atomic_t refcnt; ++} rcuref_t; ++ ++#define RCUREF_INIT(i) { .refcnt = ATOMIC_INIT(i - 1) } ++ + struct list_head { + struct list_head *next, *prev; + }; +diff --git a/lib/Makefile b/lib/Makefile +index 5ffe72ec99797..afd78c497ec76 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -47,7 +47,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \ + list_sort.o uuid.o iov_iter.o clz_ctz.o \ + bsearch.o find_bit.o llist.o memweight.o kfifo.o \ + percpu-refcount.o rhashtable.o base64.o \ +- once.o refcount.o usercopy.o errseq.o bucket_locks.o \ ++ once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \ + generic-radix-tree.o + obj-$(CONFIG_STRING_SELFTEST) += test_string.o + obj-y += string_helpers.o +diff --git a/lib/rcuref.c b/lib/rcuref.c +new file mode 100644 +index 0000000000000..5ec00a4a64d11 +--- /dev/null ++++ b/lib/rcuref.c +@@ -0,0 +1,281 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++ ++/* ++ * rcuref - A scalable reference count implementation for RCU managed objects ++ * ++ * rcuref is provided to replace open coded reference count implementations ++ * based on atomic_t. It protects explicitely RCU managed objects which can ++ * be visible even after the last reference has been dropped and the object ++ * is heading towards destruction. ++ * ++ * A common usage pattern is: ++ * ++ * get() ++ * rcu_read_lock(); ++ * p = get_ptr(); ++ * if (p && !atomic_inc_not_zero(&p->refcnt)) ++ * p = NULL; ++ * rcu_read_unlock(); ++ * return p; ++ * ++ * put() ++ * if (!atomic_dec_return(&->refcnt)) { ++ * remove_ptr(p); ++ * kfree_rcu((p, rcu); ++ * } ++ * ++ * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has ++ * O(N^2) behaviour under contention with N concurrent operations. ++ * ++ * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales ++ * better under contention. ++ * ++ * Why not refcount? ++ * ================= ++ * ++ * In principle it should be possible to make refcount use the rcuref ++ * scheme, but the destruction race described below cannot be prevented ++ * unless the protected object is RCU managed. ++ * ++ * Theory of operation ++ * =================== ++ * ++ * rcuref uses an unsigned integer reference counter. As long as the ++ * counter value is greater than or equal to RCUREF_ONEREF and not larger ++ * than RCUREF_MAXREF the reference is alive: ++ * ++ * ONEREF MAXREF SATURATED RELEASED DEAD NOREF ++ * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF ++ * <---valid --------> <-------saturation zone-------> <-----dead zone-----> ++ * ++ * The get() and put() operations do unconditional increments and ++ * decrements. The result is checked after the operation. This optimizes ++ * for the fast path. ++ * ++ * If the reference count is saturated or dead, then the increments and ++ * decrements are not harmful as the reference count still stays in the ++ * respective zones and is always set back to STATURATED resp. DEAD. The ++ * zones have room for 2^28 racing operations in each direction, which ++ * makes it practically impossible to escape the zones. ++ * ++ * Once the last reference is dropped the reference count becomes ++ * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The ++ * slowpath then tries to set the reference count from RCUREF_NOREF to ++ * RCUREF_DEAD via a cmpxchg(). This opens a small window where a ++ * concurrent rcuref_get() can acquire the reference count and bring it ++ * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD. ++ * ++ * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in ++ * DEAD + 1, which is inside the dead zone. If that happens the reference ++ * count is put back to DEAD. ++ * ++ * The actual race is possible due to the unconditional increment and ++ * decrements in rcuref_get() and rcuref_put(): ++ * ++ * T1 T2 ++ * get() put() ++ * if (atomic_add_negative(-1, &ref->refcnt)) ++ * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); ++ * ++ * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1 ++ * ++ * As the result of T1's add is negative, the get() goes into the slow path ++ * and observes refcnt being in the dead zone which makes the operation fail. ++ * ++ * Possible critical states: ++ * ++ * Context Counter References Operation ++ * T1 0 1 init() ++ * T2 1 2 get() ++ * T1 0 1 put() ++ * T2 -1 0 put() tries to mark dead ++ * T1 0 1 get() ++ * T2 0 1 put() mark dead fails ++ * T1 -1 0 put() tries to mark dead ++ * T1 DEAD 0 put() mark dead succeeds ++ * T2 DEAD+1 0 get() fails and puts it back to DEAD ++ * ++ * Of course there are more complex scenarios, but the above illustrates ++ * the working principle. The rest is left to the imagination of the ++ * reader. ++ * ++ * Deconstruction race ++ * =================== ++ * ++ * The release operation must be protected by prohibiting a grace period in ++ * order to prevent a possible use after free: ++ * ++ * T1 T2 ++ * put() get() ++ * // ref->refcnt = ONEREF ++ * if (!atomic_add_negative(-1, &ref->refcnt)) ++ * return false; <- Not taken ++ * ++ * // ref->refcnt == NOREF ++ * --> preemption ++ * // Elevates ref->refcnt to ONEREF ++ * if (!atomic_add_negative(1, &ref->refcnt)) ++ * return true; <- taken ++ * ++ * if (put(&p->ref)) { <-- Succeeds ++ * remove_pointer(p); ++ * kfree_rcu(p, rcu); ++ * } ++ * ++ * RCU grace period ends, object is freed ++ * ++ * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF ++ * ++ * This is prevented by disabling preemption around the put() operation as ++ * that's in most kernel configurations cheaper than a rcu_read_lock() / ++ * rcu_read_unlock() pair and in many cases even a NOOP. In any case it ++ * prevents the grace period which keeps the object alive until all put() ++ * operations complete. ++ * ++ * Saturation protection ++ * ===================== ++ * ++ * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX). ++ * Once this is exceedded the reference count becomes stale by setting it ++ * to RCUREF_SATURATED, which will cause a memory leak, but it prevents ++ * wrap arounds which obviously cause worse problems than a memory ++ * leak. When saturation is reached a warning is emitted. ++ * ++ * Race conditions ++ * =============== ++ * ++ * All reference count increment/decrement operations are unconditional and ++ * only verified after the fact. This optimizes for the good case and takes ++ * the occasional race vs. a dead or already saturated refcount into ++ * account. The saturation and dead zones are large enough to accomodate ++ * for that. ++ * ++ * Memory ordering ++ * =============== ++ * ++ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions ++ * and provide only what is strictly required for refcounts. ++ * ++ * The increments are fully relaxed; these will not provide ordering. The ++ * rationale is that whatever is used to obtain the object to increase the ++ * reference count on will provide the ordering. For locked data ++ * structures, its the lock acquire, for RCU/lockless data structures its ++ * the dependent load. ++ * ++ * rcuref_get() provides a control dependency ordering future stores which ++ * ensures that the object is not modified when acquiring a reference ++ * fails. ++ * ++ * rcuref_put() provides release order, i.e. all prior loads and stores ++ * will be issued before. It also provides a control dependency ordering ++ * against the subsequent destruction of the object. ++ * ++ * If rcuref_put() successfully dropped the last reference and marked the ++ * object DEAD it also provides acquire ordering. ++ */ ++ ++#include ++#include ++ ++/** ++ * rcuref_get_slowpath - Slowpath of rcuref_get() ++ * @ref: Pointer to the reference count ++ * ++ * Invoked when the reference count is outside of the valid zone. ++ * ++ * Return: ++ * False if the reference count was already marked dead ++ * ++ * True if the reference count is saturated, which prevents the ++ * object from being deconstructed ever. ++ */ ++bool rcuref_get_slowpath(rcuref_t *ref) ++{ ++ unsigned int cnt = atomic_read(&ref->refcnt); ++ ++ /* ++ * If the reference count was already marked dead, undo the ++ * increment so it stays in the middle of the dead zone and return ++ * fail. ++ */ ++ if (cnt >= RCUREF_RELEASED) { ++ atomic_set(&ref->refcnt, RCUREF_DEAD); ++ return false; ++ } ++ ++ /* ++ * If it was saturated, warn and mark it so. In case the increment ++ * was already on a saturated value restore the saturation ++ * marker. This keeps it in the middle of the saturation zone and ++ * prevents the reference count from overflowing. This leaks the ++ * object memory, but prevents the obvious reference count overflow ++ * damage. ++ */ ++ if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory")) ++ atomic_set(&ref->refcnt, RCUREF_SATURATED); ++ return true; ++} ++EXPORT_SYMBOL_GPL(rcuref_get_slowpath); ++ ++/** ++ * rcuref_put_slowpath - Slowpath of __rcuref_put() ++ * @ref: Pointer to the reference count ++ * ++ * Invoked when the reference count is outside of the valid zone. ++ * ++ * Return: ++ * True if this was the last reference with no future references ++ * possible. This signals the caller that it can safely schedule the ++ * object, which is protected by the reference counter, for ++ * deconstruction. ++ * ++ * False if there are still active references or the put() raced ++ * with a concurrent get()/put() pair. Caller is not allowed to ++ * deconstruct the protected object. ++ */ ++bool rcuref_put_slowpath(rcuref_t *ref) ++{ ++ unsigned int cnt = atomic_read(&ref->refcnt); ++ ++ /* Did this drop the last reference? */ ++ if (likely(cnt == RCUREF_NOREF)) { ++ /* ++ * Carefully try to set the reference count to RCUREF_DEAD. ++ * ++ * This can fail if a concurrent get() operation has ++ * elevated it again or the corresponding put() even marked ++ * it dead already. Both are valid situations and do not ++ * require a retry. If this fails the caller is not ++ * allowed to deconstruct the object. ++ */ ++ if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF) ++ return false; ++ ++ /* ++ * The caller can safely schedule the object for ++ * deconstruction. Provide acquire ordering. ++ */ ++ smp_acquire__after_ctrl_dep(); ++ return true; ++ } ++ ++ /* ++ * If the reference count was already in the dead zone, then this ++ * put() operation is imbalanced. Warn, put the reference count back to ++ * DEAD and tell the caller to not deconstruct the object. ++ */ ++ if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) { ++ atomic_set(&ref->refcnt, RCUREF_DEAD); ++ return false; ++ } ++ ++ /* ++ * This is a put() operation on a saturated refcount. Restore the ++ * mean saturation value and tell the caller to not deconstruct the ++ * object. ++ */ ++ if (cnt > RCUREF_MAXREF) ++ atomic_set(&ref->refcnt, RCUREF_SATURATED); ++ return false; ++} ++EXPORT_SYMBOL_GPL(rcuref_put_slowpath); +-- +2.40.1 + diff --git a/queue-6.1/bluetooth-avoid-redundant-authentication.patch b/queue-6.1/bluetooth-avoid-redundant-authentication.patch new file mode 100644 index 00000000000..eddbbd6e371 --- /dev/null +++ b/queue-6.1/bluetooth-avoid-redundant-authentication.patch @@ -0,0 +1,106 @@ +From e6772c1d05f151bc529045893f8179f84b6aa583 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Sep 2023 04:39:34 +0000 +Subject: Bluetooth: Avoid redundant authentication + +From: Ying Hsu + +[ Upstream commit 1d8e801422d66e4b8c7b187c52196bef94eed887 ] + +While executing the Android 13 CTS Verifier Secure Server test on a +ChromeOS device, it was observed that the Bluetooth host initiates +authentication for an RFCOMM connection after SSP completes. +When this happens, some Intel Bluetooth controllers, like AC9560, would +disconnect with "Connection Rejected due to Security Reasons (0x0e)". + +Historically, BlueZ did not mandate this authentication while an +authenticated combination key was already in use for the connection. +This behavior was changed since commit 7b5a9241b780 +("Bluetooth: Introduce requirements for security level 4"). +So, this patch addresses the aforementioned disconnection issue by +restoring the previous behavior. + +Signed-off-by: Ying Hsu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_conn.c | 63 ++++++++++++++++++++++------------------ + 1 file changed, 35 insertions(+), 28 deletions(-) + +diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c +index f8ba3f5aa877b..728be9307f526 100644 +--- a/net/bluetooth/hci_conn.c ++++ b/net/bluetooth/hci_conn.c +@@ -2364,34 +2364,41 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, + if (!test_bit(HCI_CONN_AUTH, &conn->flags)) + goto auth; + +- /* An authenticated FIPS approved combination key has sufficient +- * security for security level 4. */ +- if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 && +- sec_level == BT_SECURITY_FIPS) +- goto encrypt; +- +- /* An authenticated combination key has sufficient security for +- security level 3. */ +- if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 || +- conn->key_type == HCI_LK_AUTH_COMBINATION_P256) && +- sec_level == BT_SECURITY_HIGH) +- goto encrypt; +- +- /* An unauthenticated combination key has sufficient security for +- security level 1 and 2. */ +- if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 || +- conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) && +- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW)) +- goto encrypt; +- +- /* A combination key has always sufficient security for the security +- levels 1 or 2. High security level requires the combination key +- is generated using maximum PIN code length (16). +- For pre 2.1 units. */ +- if (conn->key_type == HCI_LK_COMBINATION && +- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW || +- conn->pin_length == 16)) +- goto encrypt; ++ switch (conn->key_type) { ++ case HCI_LK_AUTH_COMBINATION_P256: ++ /* An authenticated FIPS approved combination key has ++ * sufficient security for security level 4 or lower. ++ */ ++ if (sec_level <= BT_SECURITY_FIPS) ++ goto encrypt; ++ break; ++ case HCI_LK_AUTH_COMBINATION_P192: ++ /* An authenticated combination key has sufficient security for ++ * security level 3 or lower. ++ */ ++ if (sec_level <= BT_SECURITY_HIGH) ++ goto encrypt; ++ break; ++ case HCI_LK_UNAUTH_COMBINATION_P192: ++ case HCI_LK_UNAUTH_COMBINATION_P256: ++ /* An unauthenticated combination key has sufficient security ++ * for security level 2 or lower. ++ */ ++ if (sec_level <= BT_SECURITY_MEDIUM) ++ goto encrypt; ++ break; ++ case HCI_LK_COMBINATION: ++ /* A combination key has always sufficient security for the ++ * security levels 2 or lower. High security level requires the ++ * combination key is generated using maximum PIN code length ++ * (16). For pre 2.1 units. ++ */ ++ if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16) ++ goto encrypt; ++ break; ++ default: ++ break; ++ } + + auth: + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) +-- +2.40.1 + diff --git a/queue-6.1/bluetooth-btusb-add-shutdown-function-for-qca6174.patch b/queue-6.1/bluetooth-btusb-add-shutdown-function-for-qca6174.patch new file mode 100644 index 00000000000..c5585fd916d --- /dev/null +++ b/queue-6.1/bluetooth-btusb-add-shutdown-function-for-qca6174.patch @@ -0,0 +1,34 @@ +From e404dd5f31ecd1523a230d68a5dd1c79d74ce73d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 7 Aug 2023 14:46:26 +0800 +Subject: Bluetooth: btusb: add shutdown function for QCA6174 + +From: Rocky Liao + +[ Upstream commit 187f8b648cc16f07c66ab1d89d961bdcff779bf7 ] + +We should send hci reset command before bt turn off, which can reset bt +firmware status. + +Signed-off-by: Rocky Liao +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/btusb.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c +index f2062c2a28da8..96d4f48e36011 100644 +--- a/drivers/bluetooth/btusb.c ++++ b/drivers/bluetooth/btusb.c +@@ -3984,6 +3984,7 @@ static int btusb_probe(struct usb_interface *intf, + + if (id->driver_info & BTUSB_QCA_ROME) { + data->setup_on_usb = btusb_setup_qca; ++ hdev->shutdown = btusb_shutdown_qca; + hdev->set_bdaddr = btusb_set_bdaddr_ath3012; + hdev->cmd_timeout = btusb_qca_cmd_timeout; + set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); +-- +2.40.1 + diff --git a/queue-6.1/bluetooth-hci_core-fix-build-warnings.patch b/queue-6.1/bluetooth-hci_core-fix-build-warnings.patch new file mode 100644 index 00000000000..df6d991ac26 --- /dev/null +++ b/queue-6.1/bluetooth-hci_core-fix-build-warnings.patch @@ -0,0 +1,78 @@ +From c371ab6d2f08ae126ea2826d8f104fa100db26ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Sep 2023 14:42:27 -0700 +Subject: Bluetooth: hci_core: Fix build warnings +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Luiz Augusto von Dentz + +[ Upstream commit dcda165706b9fbfd685898d46a6749d7d397e0c0 ] + +This fixes the following warnings: + +net/bluetooth/hci_core.c: In function ‘hci_register_dev’: +net/bluetooth/hci_core.c:2620:54: warning: ‘%d’ directive output may +be truncated writing between 1 and 10 bytes into a region of size 5 +[-Wformat-truncation=] + 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); + | ^~ +net/bluetooth/hci_core.c:2620:50: note: directive argument in the range +[0, 2147483647] + 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); + | ^~~~~~~ +net/bluetooth/hci_core.c:2620:9: note: ‘snprintf’ output between 5 and +14 bytes into a destination of size 8 + 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/hci_core.h | 2 +- + net/bluetooth/hci_core.c | 8 +++++--- + 2 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index ddbcbf9ccb2ce..583aebd8c1e01 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -348,7 +348,7 @@ struct hci_dev { + struct list_head list; + struct mutex lock; + +- char name[8]; ++ const char *name; + unsigned long flags; + __u16 id; + __u8 bus; +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index d13b498f148cc..6a1db678d032f 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -2616,7 +2616,11 @@ int hci_register_dev(struct hci_dev *hdev) + if (id < 0) + return id; + +- snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); ++ error = dev_set_name(&hdev->dev, "hci%u", id); ++ if (error) ++ return error; ++ ++ hdev->name = dev_name(&hdev->dev); + hdev->id = id; + + BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); +@@ -2638,8 +2642,6 @@ int hci_register_dev(struct hci_dev *hdev) + if (!IS_ERR_OR_NULL(bt_debugfs)) + hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs); + +- dev_set_name(&hdev->dev, "%s", hdev->name); +- + error = device_add(&hdev->dev); + if (error < 0) + goto err_wqueue; +-- +2.40.1 + diff --git a/queue-6.1/bluetooth-hci_event-fix-using-memcmp-when-comparing-.patch b/queue-6.1/bluetooth-hci_event-fix-using-memcmp-when-comparing-.patch new file mode 100644 index 00000000000..dff9321721a --- /dev/null +++ b/queue-6.1/bluetooth-hci_event-fix-using-memcmp-when-comparing-.patch @@ -0,0 +1,74 @@ +From b460e3650c64f4f9a965951fe29b8cf476de6ace Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Oct 2023 13:59:59 -0700 +Subject: Bluetooth: hci_event: Fix using memcmp when comparing keys + +From: Luiz Augusto von Dentz + +[ Upstream commit b541260615f601ae1b5d6d0cc54e790de706303b ] + +memcmp is not consider safe to use with cryptographic secrets: + + 'Do not use memcmp() to compare security critical data, such as + cryptographic secrets, because the required CPU time depends on the + number of equal bytes.' + +While usage of memcmp for ZERO_KEY may not be considered a security +critical data, it can lead to more usage of memcmp with pairing keys +which could introduce more security problems. + +Fixes: 455c2ff0a558 ("Bluetooth: Fix BR/EDR out-of-band pairing with only initiator data") +Fixes: 33155c4aae52 ("Bluetooth: hci_event: Ignore NULL link key") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_event.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index 152da3ded3faf..c86a45344fe28 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -25,6 +25,8 @@ + /* Bluetooth HCI event handling. */ + + #include ++#include ++#include + + #include + #include +@@ -4697,7 +4699,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, + goto unlock; + + /* Ignore NULL link key against CVE-2020-26555 */ +- if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { ++ if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", + &ev->bdaddr); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); +@@ -5240,8 +5242,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) + * available, then do not declare that OOB data is + * present. + */ +- if (!memcmp(data->rand256, ZERO_KEY, 16) || +- !memcmp(data->hash256, ZERO_KEY, 16)) ++ if (!crypto_memneq(data->rand256, ZERO_KEY, 16) || ++ !crypto_memneq(data->hash256, ZERO_KEY, 16)) + return 0x00; + + return 0x02; +@@ -5251,8 +5253,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) + * not supported by the hardware, then check that if + * P-192 data values are present. + */ +- if (!memcmp(data->rand192, ZERO_KEY, 16) || +- !memcmp(data->hash192, ZERO_KEY, 16)) ++ if (!crypto_memneq(data->rand192, ZERO_KEY, 16) || ++ !crypto_memneq(data->hash192, ZERO_KEY, 16)) + return 0x00; + + return 0x01; +-- +2.40.1 + diff --git a/queue-6.1/btrfs-error-out-when-cowing-block-using-a-stale-tran.patch b/queue-6.1/btrfs-error-out-when-cowing-block-using-a-stale-tran.patch new file mode 100644 index 00000000000..012ddeab60a --- /dev/null +++ b/queue-6.1/btrfs-error-out-when-cowing-block-using-a-stale-tran.patch @@ -0,0 +1,83 @@ +From 6affa55da4b3bf33bf880ee3f1dcc75db268b581 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Sep 2023 12:09:21 +0100 +Subject: btrfs: error out when COWing block using a stale transaction + +From: Filipe Manana + +[ Upstream commit 48774f3bf8b4dd3b1a0e155825c9ce48483db14c ] + +At btrfs_cow_block() we have these checks to verify we are not using a +stale transaction (a past transaction with an unblocked state or higher), +and the only thing we do is to trigger a WARN with a message and a stack +trace. This however is a critical problem, highly unexpected and if it +happens it's most likely due to a bug, so we should error out and turn the +fs into error state so that such issue is much more easily noticed if it's +triggered. + +The problem is critical because using such stale transaction will lead to +not persisting the extent buffer used for the COW operation, as allocating +a tree block adds the range of the respective extent buffer to the +->dirty_pages iotree of the transaction, and a stale transaction, in the +unlocked state or higher, will not flush dirty extent buffers anymore, +therefore resulting in not persisting the tree block and resource leaks +(not cleaning the dirty_pages iotree for example). + +So do the following changes: + +1) Return -EUCLEAN if we find a stale transaction; + +2) Turn the fs into error state, with error -EUCLEAN, so that no + transaction can be committed, and generate a stack trace; + +3) Combine both conditions into a single if statement, as both are related + and have the same error message; + +4) Mark the check as unlikely, since this is not expected to ever happen. + +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index 1a327eb3580b4..98e3e0761a4e5 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -567,14 +567,22 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, + btrfs_err(fs_info, + "COW'ing blocks on a fs root that's being dropped"); + +- if (trans->transaction != fs_info->running_transaction) +- WARN(1, KERN_CRIT "trans %llu running %llu\n", +- trans->transid, +- fs_info->running_transaction->transid); +- +- if (trans->transid != fs_info->generation) +- WARN(1, KERN_CRIT "trans %llu running %llu\n", +- trans->transid, fs_info->generation); ++ /* ++ * COWing must happen through a running transaction, which always ++ * matches the current fs generation (it's a transaction with a state ++ * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs ++ * into error state to prevent the commit of any transaction. ++ */ ++ if (unlikely(trans->transaction != fs_info->running_transaction || ++ trans->transid != fs_info->generation)) { ++ btrfs_abort_transaction(trans, -EUCLEAN); ++ btrfs_crit(fs_info, ++"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu", ++ buf->start, btrfs_root_id(root), trans->transid, ++ fs_info->running_transaction->transid, ++ fs_info->generation); ++ return -EUCLEAN; ++ } + + if (!should_cow_block(trans, root, buf)) { + *cow_ret = buf; +-- +2.40.1 + diff --git a/queue-6.1/btrfs-error-out-when-reallocating-block-for-defrag-u.patch b/queue-6.1/btrfs-error-out-when-reallocating-block-for-defrag-u.patch new file mode 100644 index 00000000000..7b78a8ff4dc --- /dev/null +++ b/queue-6.1/btrfs-error-out-when-reallocating-block-for-defrag-u.patch @@ -0,0 +1,78 @@ +From 53306197a65c328a7f50c8a1be33bd4574f840dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Sep 2023 12:09:23 +0100 +Subject: btrfs: error out when reallocating block for defrag using a stale + transaction + +From: Filipe Manana + +[ Upstream commit e36f94914021e58ee88a8856c7fdf35adf9c7ee1 ] + +At btrfs_realloc_node() we have these checks to verify we are not using a +stale transaction (a past transaction with an unblocked state or higher), +and the only thing we do is to trigger two WARN_ON(). This however is a +critical problem, highly unexpected and if it happens it's most likely due +to a bug, so we should error out and turn the fs into error state so that +such issue is much more easily noticed if it's triggered. + +The problem is critical because in btrfs_realloc_node() we COW tree blocks, +and using such stale transaction will lead to not persisting the extent +buffers used for the COW operations, as allocating tree block adds the +range of the respective extent buffers to the ->dirty_pages iotree of the +transaction, and a stale transaction, in the unlocked state or higher, +will not flush dirty extent buffers anymore, therefore resulting in not +persisting the tree block and resource leaks (not cleaning the dirty_pages +iotree for example). + +So do the following changes: + +1) Return -EUCLEAN if we find a stale transaction; + +2) Turn the fs into error state, with error -EUCLEAN, so that no + transaction can be committed, and generate a stack trace; + +3) Combine both conditions into a single if statement, as both are related + and have the same error message; + +4) Mark the check as unlikely, since this is not expected to ever happen. + +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index 98f68bd1383a3..e08688844f1e1 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -698,8 +698,22 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, + int progress_passed = 0; + struct btrfs_disk_key disk_key; + +- WARN_ON(trans->transaction != fs_info->running_transaction); +- WARN_ON(trans->transid != fs_info->generation); ++ /* ++ * COWing must happen through a running transaction, which always ++ * matches the current fs generation (it's a transaction with a state ++ * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs ++ * into error state to prevent the commit of any transaction. ++ */ ++ if (unlikely(trans->transaction != fs_info->running_transaction || ++ trans->transid != fs_info->generation)) { ++ btrfs_abort_transaction(trans, -EUCLEAN); ++ btrfs_crit(fs_info, ++"unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu", ++ parent->start, btrfs_root_id(root), trans->transid, ++ fs_info->running_transaction->transid, ++ fs_info->generation); ++ return -EUCLEAN; ++ } + + parent_nritems = btrfs_header_nritems(parent); + blocksize = fs_info->nodesize; +-- +2.40.1 + diff --git a/queue-6.1/btrfs-error-when-cowing-block-from-a-root-that-is-be.patch b/queue-6.1/btrfs-error-when-cowing-block-from-a-root-that-is-be.patch new file mode 100644 index 00000000000..503fed10e2a --- /dev/null +++ b/queue-6.1/btrfs-error-when-cowing-block-from-a-root-that-is-be.patch @@ -0,0 +1,59 @@ +From 3203996611533692e96cdb5e6e237006eb19797e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Sep 2023 12:09:22 +0100 +Subject: btrfs: error when COWing block from a root that is being deleted + +From: Filipe Manana + +[ Upstream commit a2caab29884397e583d09be6546259a83ebfbdb1 ] + +At btrfs_cow_block() we check if the block being COWed belongs to a root +that is being deleted and if so we log an error message. However this is +an unexpected case and it indicates a bug somewhere, so we should return +an error and abort the transaction. So change this in the following ways: + +1) Abort the transaction with -EUCLEAN, so that if the issue ever happens + it can easily be noticed; + +2) Change the logged message level from error to critical, and change the + message itself to print the block's logical address and the ID of the + root; + +3) Return -EUCLEAN to the caller; + +4) As this is an unexpected scenario, that should never happen, mark the + check as unlikely, allowing the compiler to potentially generate better + code. + +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index 98e3e0761a4e5..98f68bd1383a3 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -563,9 +563,13 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, + u64 search_start; + int ret; + +- if (test_bit(BTRFS_ROOT_DELETING, &root->state)) +- btrfs_err(fs_info, +- "COW'ing blocks on a fs root that's being dropped"); ++ if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) { ++ btrfs_abort_transaction(trans, -EUCLEAN); ++ btrfs_crit(fs_info, ++ "attempt to COW block %llu on root %llu that is being deleted", ++ buf->start, btrfs_root_id(root)); ++ return -EUCLEAN; ++ } + + /* + * COWing must happen through a running transaction, which always +-- +2.40.1 + diff --git a/queue-6.1/btrfs-fix-some-wmaybe-uninitialized-warnings-in-ioct.patch b/queue-6.1/btrfs-fix-some-wmaybe-uninitialized-warnings-in-ioct.patch new file mode 100644 index 00000000000..dd8c77f2652 --- /dev/null +++ b/queue-6.1/btrfs-fix-some-wmaybe-uninitialized-warnings-in-ioct.patch @@ -0,0 +1,106 @@ +From 87dbedf5bcd43760ba7f6c73a0e748036bd26a97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Sep 2023 15:47:27 -0400 +Subject: btrfs: fix some -Wmaybe-uninitialized warnings in ioctl.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Josef Bacik + +[ Upstream commit 9147b9ded499d9853bdf0e9804b7eaa99c4429ed ] + +Jens reported the following warnings from -Wmaybe-uninitialized recent +Linus' branch. + + In file included from ./include/asm-generic/rwonce.h:26, + from ./arch/arm64/include/asm/rwonce.h:71, + from ./include/linux/compiler.h:246, + from ./include/linux/export.h:5, + from ./include/linux/linkage.h:7, + from ./include/linux/kernel.h:17, + from fs/btrfs/ioctl.c:6: + In function ‘instrument_copy_from_user_before’, + inlined from ‘_copy_from_user’ at ./include/linux/uaccess.h:148:3, + inlined from ‘copy_from_user’ at ./include/linux/uaccess.h:183:7, + inlined from ‘btrfs_ioctl_space_info’ at fs/btrfs/ioctl.c:2999:6, + inlined from ‘btrfs_ioctl’ at fs/btrfs/ioctl.c:4616:10: + ./include/linux/kasan-checks.h:38:27: warning: ‘space_args’ may be used + uninitialized [-Wmaybe-uninitialized] + 38 | #define kasan_check_write __kasan_check_write + ./include/linux/instrumented.h:129:9: note: in expansion of macro + ‘kasan_check_write’ + 129 | kasan_check_write(to, n); + | ^~~~~~~~~~~~~~~~~ + ./include/linux/kasan-checks.h: In function ‘btrfs_ioctl’: + ./include/linux/kasan-checks.h:20:6: note: by argument 1 of type ‘const + volatile void *’ to ‘__kasan_check_write’ declared here + 20 | bool __kasan_check_write(const volatile void *p, unsigned int + size); + | ^~~~~~~~~~~~~~~~~~~ + fs/btrfs/ioctl.c:2981:39: note: ‘space_args’ declared here + 2981 | struct btrfs_ioctl_space_args space_args; + | ^~~~~~~~~~ + In function ‘instrument_copy_from_user_before’, + inlined from ‘_copy_from_user’ at ./include/linux/uaccess.h:148:3, + inlined from ‘copy_from_user’ at ./include/linux/uaccess.h:183:7, + inlined from ‘_btrfs_ioctl_send’ at fs/btrfs/ioctl.c:4343:9, + inlined from ‘btrfs_ioctl’ at fs/btrfs/ioctl.c:4658:10: + ./include/linux/kasan-checks.h:38:27: warning: ‘args32’ may be used + uninitialized [-Wmaybe-uninitialized] + 38 | #define kasan_check_write __kasan_check_write + ./include/linux/instrumented.h:129:9: note: in expansion of macro + ‘kasan_check_write’ + 129 | kasan_check_write(to, n); + | ^~~~~~~~~~~~~~~~~ + ./include/linux/kasan-checks.h: In function ‘btrfs_ioctl’: + ./include/linux/kasan-checks.h:20:6: note: by argument 1 of type ‘const + volatile void *’ to ‘__kasan_check_write’ declared here + 20 | bool __kasan_check_write(const volatile void *p, unsigned int + size); + | ^~~~~~~~~~~~~~~~~~~ + fs/btrfs/ioctl.c:4341:49: note: ‘args32’ declared here + 4341 | struct btrfs_ioctl_send_args_32 args32; + | ^~~~~~ + +This was due to his config options and having KASAN turned on, +which adds some extra checks around copy_from_user(), which then +triggered the -Wmaybe-uninitialized checker for these cases. + +Fix the warnings by initializing the different structs we're copying +into. + +Reported-by: Jens Axboe +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ioctl.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 9e323420c96d3..9474265ee7ea3 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3869,7 +3869,7 @@ static void get_block_group_info(struct list_head *groups_list, + static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, + void __user *arg) + { +- struct btrfs_ioctl_space_args space_args; ++ struct btrfs_ioctl_space_args space_args = { 0 }; + struct btrfs_ioctl_space_info space; + struct btrfs_ioctl_space_info *dest; + struct btrfs_ioctl_space_info *dest_orig; +@@ -5223,7 +5223,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat + + if (compat) { + #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) +- struct btrfs_ioctl_send_args_32 args32; ++ struct btrfs_ioctl_send_args_32 args32 = { 0 }; + + ret = copy_from_user(&args32, argp, sizeof(args32)); + if (ret) +-- +2.40.1 + diff --git a/queue-6.1/btrfs-initialize-start_slot-in-btrfs_log_prealloc_ex.patch b/queue-6.1/btrfs-initialize-start_slot-in-btrfs_log_prealloc_ex.patch new file mode 100644 index 00000000000..1948b139529 --- /dev/null +++ b/queue-6.1/btrfs-initialize-start_slot-in-btrfs_log_prealloc_ex.patch @@ -0,0 +1,57 @@ +From a4cea077b433d623efa49f0fa5c2164800f1cdd3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 5 Sep 2023 12:15:24 -0400 +Subject: btrfs: initialize start_slot in btrfs_log_prealloc_extents +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Josef Bacik + +[ Upstream commit b4c639f699349880b7918b861e1bd360442ec450 ] + +Jens reported a compiler warning when using +CONFIG_CC_OPTIMIZE_FOR_SIZE=y that looks like this + + fs/btrfs/tree-log.c: In function ‘btrfs_log_prealloc_extents’: + fs/btrfs/tree-log.c:4828:23: warning: ‘start_slot’ may be used + uninitialized [-Wmaybe-uninitialized] + 4828 | ret = copy_items(trans, inode, dst_path, path, + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 4829 | start_slot, ins_nr, 1, 0); + | ~~~~~~~~~~~~~~~~~~~~~~~~~ + fs/btrfs/tree-log.c:4725:13: note: ‘start_slot’ was declared here + 4725 | int start_slot; + | ^~~~~~~~~~ + +The compiler is incorrect, as we only use this code when ins_len > 0, +and when ins_len > 0 we have start_slot properly initialized. However +we generally find the -Wmaybe-uninitialized warnings valuable, so +initialize start_slot to get rid of the warning. + +Reported-by: Jens Axboe +Tested-by: Jens Axboe +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/tree-log.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index c03ff6a5a7f6b..7c33b28c02aeb 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -4767,7 +4767,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf; + int slot; + int ins_nr = 0; +- int start_slot; ++ int start_slot = 0; + int ret; + + if (!(inode->flags & BTRFS_INODE_PREALLOC)) +-- +2.40.1 + diff --git a/queue-6.1/btrfs-prevent-transaction-block-reserve-underflow-wh.patch b/queue-6.1/btrfs-prevent-transaction-block-reserve-underflow-wh.patch new file mode 100644 index 00000000000..285156c917b --- /dev/null +++ b/queue-6.1/btrfs-prevent-transaction-block-reserve-underflow-wh.patch @@ -0,0 +1,112 @@ +From f1a7f8da19c27e1293bbc0854411609d2d3cfc04 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 18:20:19 +0100 +Subject: btrfs: prevent transaction block reserve underflow when starting + transaction + +From: Filipe Manana + +[ Upstream commit a7ddeeb079505961355cf0106154da0110f1fdff ] + +When starting a transaction, with a non-zero number of items, we reserve +metadata space for that number of items and for delayed refs by doing a +call to btrfs_block_rsv_add(), with the transaction block reserve passed +as the block reserve argument. This reserves metadata space and adds it +to the transaction block reserve. Later we migrate the space we reserved +for delayed references from the transaction block reserve into the delayed +refs block reserve, by calling btrfs_migrate_to_delayed_refs_rsv(). + +btrfs_migrate_to_delayed_refs_rsv() decrements the number of bytes to +migrate from the source block reserve, and this however may result in an +underflow in case the space added to the transaction block reserve ended +up being used by another task that has not reserved enough space for its +own use - examples are tasks doing reflinks or hole punching because they +end up calling btrfs_replace_file_extents() -> btrfs_drop_extents() and +may need to modify/COW a variable number of leaves/paths, so they keep +trying to use space from the transaction block reserve when they need to +COW an extent buffer, and may end up trying to use more space then they +have reserved (1 unit/path only for removing file extent items). + +This can be avoided by simply reserving space first without adding it to +the transaction block reserve, then add the space for delayed refs to the +delayed refs block reserve and finally add the remaining reserved space +to the transaction block reserve. This also makes the code a bit shorter +and simpler. So just do that. + +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/delayed-ref.c | 9 +-------- + fs/btrfs/delayed-ref.h | 1 - + fs/btrfs/transaction.c | 6 +++--- + 3 files changed, 4 insertions(+), 12 deletions(-) + +diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c +index 36a3debe94930..e08e3852c4788 100644 +--- a/fs/btrfs/delayed-ref.c ++++ b/fs/btrfs/delayed-ref.c +@@ -141,24 +141,17 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) + * Transfer bytes to our delayed refs rsv + * + * @fs_info: the filesystem +- * @src: source block rsv to transfer from + * @num_bytes: number of bytes to transfer + * +- * This transfers up to the num_bytes amount from the src rsv to the ++ * This transfers up to the num_bytes amount, previously reserved, to the + * delayed_refs_rsv. Any extra bytes are returned to the space info. + */ + void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, +- struct btrfs_block_rsv *src, + u64 num_bytes) + { + struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + u64 to_free = 0; + +- spin_lock(&src->lock); +- src->reserved -= num_bytes; +- src->size -= num_bytes; +- spin_unlock(&src->lock); +- + spin_lock(&delayed_refs_rsv->lock); + if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { + u64 delta = delayed_refs_rsv->size - +diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h +index d6304b690ec4a..712a6315e956b 100644 +--- a/fs/btrfs/delayed-ref.h ++++ b/fs/btrfs/delayed-ref.h +@@ -383,7 +383,6 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); + int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, + enum btrfs_reserve_flush_enum flush); + void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, +- struct btrfs_block_rsv *src, + u64 num_bytes); + int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans); + bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index 1193214ba8c10..60db4c3b82fa1 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -614,14 +614,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, + reloc_reserved = true; + } + +- ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush); ++ ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush); + if (ret) + goto reserve_fail; + if (delayed_refs_bytes) { +- btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv, +- delayed_refs_bytes); ++ btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes); + num_bytes -= delayed_refs_bytes; + } ++ btrfs_block_rsv_add_bytes(rsv, num_bytes, true); + + if (rsv->space_info->force_alloc) + do_chunk_alloc = true; +-- +2.40.1 + diff --git a/queue-6.1/btrfs-return-euclean-for-delayed-tree-ref-with-a-ref.patch b/queue-6.1/btrfs-return-euclean-for-delayed-tree-ref-with-a-ref.patch new file mode 100644 index 00000000000..0042ec8588d --- /dev/null +++ b/queue-6.1/btrfs-return-euclean-for-delayed-tree-ref-with-a-ref.patch @@ -0,0 +1,51 @@ +From 3baeabc4520a630fed394828d2a4ff52d9b1ee9c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 18:20:23 +0100 +Subject: btrfs: return -EUCLEAN for delayed tree ref with a ref count not + equals to 1 + +From: Filipe Manana + +[ Upstream commit 1bf76df3fee56d6637718e267f7c34ed70d0c7dc ] + +When running a delayed tree reference, if we find a ref count different +from 1, we return -EIO. This isn't an IO error, as it indicates either a +bug in the delayed refs code or a memory corruption, so change the error +code from -EIO to -EUCLEAN. Also tag the branch as 'unlikely' as this is +not expected to ever happen, and change the error message to print the +tree block's bytenr without the parenthesis (and there was a missing space +between the 'block' word and the opening parenthesis), for consistency as +that's the style we used everywhere else. + +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent-tree.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c +index 08ff10a81cb90..2a7c9088fe1f8 100644 +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -1663,12 +1663,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, + parent = ref->parent; + ref_root = ref->root; + +- if (node->ref_mod != 1) { ++ if (unlikely(node->ref_mod != 1)) { + btrfs_err(trans->fs_info, +- "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", ++ "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu", + node->bytenr, node->ref_mod, node->action, ref_root, + parent); +- return -EIO; ++ return -EUCLEAN; + } + if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { + BUG_ON(!extent_op || !extent_op->update_flags); +-- +2.40.1 + diff --git a/queue-6.1/cpufreq-schedutil-update-next_freq-when-cpufreq_limi.patch b/queue-6.1/cpufreq-schedutil-update-next_freq-when-cpufreq_limi.patch new file mode 100644 index 00000000000..9c226ff3afd --- /dev/null +++ b/queue-6.1/cpufreq-schedutil-update-next_freq-when-cpufreq_limi.patch @@ -0,0 +1,69 @@ +From d526150520942c1952eb6898189e1d3c5d5df5d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:05:27 +0800 +Subject: cpufreq: schedutil: Update next_freq when cpufreq_limits change + +From: Xuewen Yan + +[ Upstream commit 9e0bc36ab07c550d791bf17feeb479f1dfc42d89 ] + +When cpufreq's policy is 'single', there is a scenario that will +cause sg_policy's next_freq to be unable to update. + +When the CPU's util is always max, the cpufreq will be max, +and then if we change the policy's scaling_max_freq to be a +lower freq, indeed, the sg_policy's next_freq need change to +be the lower freq, however, because the cpu_is_busy, the next_freq +would keep the max_freq. + +For example: + +The cpu7 is a single CPU: + + unisoc:/sys/devices/system/cpu/cpufreq/policy7 # while true;do done& [1] 4737 + unisoc:/sys/devices/system/cpu/cpufreq/policy7 # taskset -p 80 4737 + pid 4737's current affinity mask: ff + pid 4737's new affinity mask: 80 + unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_max_freq + 2301000 + unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_cur_freq + 2301000 + unisoc:/sys/devices/system/cpu/cpufreq/policy7 # echo 2171000 > scaling_max_freq + unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_max_freq + 2171000 + +At this time, the sg_policy's next_freq would stay at 2301000, which +is wrong. + +To fix this, add a check for the ->need_freq_update flag. + +[ mingo: Clarified the changelog. ] + +Co-developed-by: Guohua Yan +Signed-off-by: Xuewen Yan +Signed-off-by: Guohua Yan +Signed-off-by: Ingo Molnar +Acked-by: "Rafael J. Wysocki" +Link: https://lore.kernel.org/r/20230719130527.8074-1-xuewen.yan@unisoc.com +Signed-off-by: Sasha Levin +--- + kernel/sched/cpufreq_schedutil.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 1207c78f85c11..853a07618a3cf 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -345,7 +345,8 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, + * Except when the rq is capped by uclamp_max. + */ + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && +- sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { ++ sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq && ++ !sg_policy->need_freq_update) { + next_f = sg_policy->next_freq; + + /* Restore cached freq as next_freq has changed */ +-- +2.40.1 + diff --git a/queue-6.1/drm-amd-pm-add-unique_id-for-gc-11.0.3.patch b/queue-6.1/drm-amd-pm-add-unique_id-for-gc-11.0.3.patch new file mode 100644 index 00000000000..7e267124b21 --- /dev/null +++ b/queue-6.1/drm-amd-pm-add-unique_id-for-gc-11.0.3.patch @@ -0,0 +1,34 @@ +From f019522eb7213549eb7850757c35f5f83bedfd50 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Aug 2023 12:25:26 +0800 +Subject: drm/amd/pm: add unique_id for gc 11.0.3 + +From: Kenneth Feng + +[ Upstream commit 4953856f280b2b606089a72a93a1e9212a3adaca ] + +add unique_id for gc 11.0.3 + +Signed-off-by: Kenneth Feng +Reviewed-by: Feifei Xu +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c +index 8472013ff38a2..0e78437c8389d 100644 +--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c ++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c +@@ -1991,6 +1991,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): ++ case IP_VERSION(11, 0, 3): + *states = ATTR_STATE_SUPPORTED; + break; + default: +-- +2.40.1 + diff --git a/queue-6.1/drm-panel-orientation-quirks-add-quirk-for-one-mix-2.patch b/queue-6.1/drm-panel-orientation-quirks-add-quirk-for-one-mix-2.patch new file mode 100644 index 00000000000..1d850ad973a --- /dev/null +++ b/queue-6.1/drm-panel-orientation-quirks-add-quirk-for-one-mix-2.patch @@ -0,0 +1,59 @@ +From ec3a51f02b2d6b88b37074670e4a1acfd0f4ca90 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Oct 2023 13:47:10 +0200 +Subject: drm: panel-orientation-quirks: Add quirk for One Mix 2S + +From: Kai Uwe Broulik + +[ Upstream commit cbb7eb2dbd9472816e42a1b0fdb51af49abbf812 ] + +The One Mix 2S is a mini laptop with a 1200x1920 portrait screen +mounted in a landscape oriented clamshell case. Because of the too +generic DMI strings this entry is also doing bios-date matching. + +Signed-off-by: Kai Uwe Broulik +Reviewed-by: Hans de Goede +Signed-off-by: Liviu Dudau +Link: https://patchwork.freedesktop.org/patch/msgid/20231001114710.336172-1-foss-linux@broulik.de +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/drm_panel_orientation_quirks.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c +index 0cb646cb04ee1..d5c15292ae937 100644 +--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c ++++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c +@@ -38,6 +38,14 @@ static const struct drm_dmi_panel_orientation_data gpd_micropc = { + .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, + }; + ++static const struct drm_dmi_panel_orientation_data gpd_onemix2s = { ++ .width = 1200, ++ .height = 1920, ++ .bios_dates = (const char * const []){ "05/21/2018", "10/26/2018", ++ "03/04/2019", NULL }, ++ .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, ++}; ++ + static const struct drm_dmi_panel_orientation_data gpd_pocket = { + .width = 1200, + .height = 1920, +@@ -401,6 +409,14 @@ static const struct dmi_system_id orientation_data[] = { + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "LTH17"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, ++ }, { /* One Mix 2S (generic strings, also match on bios date) */ ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"), ++ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"), ++ DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), ++ }, ++ .driver_data = (void *)&gpd_onemix2s, + }, + {} + }; +-- +2.40.1 + diff --git a/queue-6.1/fprobe-add-nr_maxactive-to-specify-rethook_node-pool.patch b/queue-6.1/fprobe-add-nr_maxactive-to-specify-rethook_node-pool.patch new file mode 100644 index 00000000000..27a7f92033b --- /dev/null +++ b/queue-6.1/fprobe-add-nr_maxactive-to-specify-rethook_node-pool.patch @@ -0,0 +1,67 @@ +From 3ebd760fab1840ff832f63608a1d2909713f3b6c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Feb 2023 00:56:19 +0900 +Subject: fprobe: Add nr_maxactive to specify rethook_node pool size + +From: Masami Hiramatsu (Google) + +[ Upstream commit 59a7a298565aa0ce44ce8e4fbcbb89a19730013a ] + +Add nr_maxactive to specify rethook_node pool size. This means +the maximum number of actively running target functions concurrently +for probing by exit_handler. Note that if the running function is +preempted or sleep, it is still counted as 'active'. + +Link: https://lkml.kernel.org/r/167526697917.433354.17779774988245113106.stgit@mhiramat.roam.corp.google.com + +Cc: Florent Revest +Cc: Mark Rutland +Cc: Will Deacon +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 700b2b439766 ("fprobe: Fix to ensure the number of active retprobes is not zero") +Signed-off-by: Sasha Levin +--- + include/linux/fprobe.h | 2 ++ + kernel/trace/fprobe.c | 5 ++++- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h +index e0d4e61362491..678f741a7b330 100644 +--- a/include/linux/fprobe.h ++++ b/include/linux/fprobe.h +@@ -14,6 +14,7 @@ + * @flags: The status flag. + * @rethook: The rethook data structure. (internal data) + * @entry_data_size: The private data storage size. ++ * @nr_maxactive: The max number of active functions. + * @entry_handler: The callback function for function entry. + * @exit_handler: The callback function for function exit. + */ +@@ -31,6 +32,7 @@ struct fprobe { + unsigned int flags; + struct rethook *rethook; + size_t entry_data_size; ++ int nr_maxactive; + + void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, + struct pt_regs *regs, void *entry_data); +diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c +index be28d1bc84e80..441a373079213 100644 +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -143,7 +143,10 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) + } + + /* Initialize rethook if needed */ +- size = num * num_possible_cpus() * 2; ++ if (fp->nr_maxactive) ++ size = fp->nr_maxactive; ++ else ++ size = num * num_possible_cpus() * 2; + if (size < 0) + return -E2BIG; + +-- +2.40.1 + diff --git a/queue-6.1/fprobe-fix-to-ensure-the-number-of-active-retprobes-.patch b/queue-6.1/fprobe-fix-to-ensure-the-number-of-active-retprobes-.patch new file mode 100644 index 00000000000..d91c9e69722 --- /dev/null +++ b/queue-6.1/fprobe-fix-to-ensure-the-number-of-active-retprobes-.patch @@ -0,0 +1,50 @@ +From 4851c3c83da75010c499e225a628a9c6d680c205 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 17 Oct 2023 08:49:45 +0900 +Subject: fprobe: Fix to ensure the number of active retprobes is not zero + +From: Masami Hiramatsu (Google) + +[ Upstream commit 700b2b439766e8aab8a7174991198497345bd411 ] + +The number of active retprobes can be zero but it is not acceptable, +so return EINVAL error if detected. + +Link: https://lore.kernel.org/all/169750018550.186853.11198884812017796410.stgit@devnote2/ + +Reported-by: wuqiang.matt +Closes: https://lore.kernel.org/all/20231016222103.cb9f426edc60220eabd8aa6a@kernel.org/ +Fixes: 5b0ab78998e3 ("fprobe: Add exit_handler support") +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/fprobe.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c +index 441a373079213..f386d6bd8e0e3 100644 +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -134,7 +134,7 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) + { + int i, size; + +- if (num < 0) ++ if (num <= 0) + return -EINVAL; + + if (!fp->exit_handler) { +@@ -147,8 +147,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) + size = fp->nr_maxactive; + else + size = num * num_possible_cpus() * 2; +- if (size < 0) +- return -E2BIG; ++ if (size <= 0) ++ return -EINVAL; + + fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler); + if (!fp->rethook) +-- +2.40.1 + diff --git a/queue-6.1/fprobe-pass-entry_data-to-handlers.patch b/queue-6.1/fprobe-pass-entry_data-to-handlers.patch new file mode 100644 index 00000000000..fdee364f544 --- /dev/null +++ b/queue-6.1/fprobe-pass-entry_data-to-handlers.patch @@ -0,0 +1,192 @@ +From 417c303823ee22e402ad91b7da29b412644cdf4b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Feb 2023 00:56:01 +0900 +Subject: fprobe: Pass entry_data to handlers + +From: Masami Hiramatsu (Google) + +[ Upstream commit 76d0de5729c0569c4071e7f21fcab394e502f03a ] + +Pass the private entry_data to the entry and exit handlers so that +they can share the context data, something like saved function +arguments etc. +User must specify the private entry_data size by @entry_data_size +field before registering the fprobe. + +Link: https://lkml.kernel.org/r/167526696173.433354.17408372048319432574.stgit@mhiramat.roam.corp.google.com + +Cc: Florent Revest +Cc: Mark Rutland +Cc: Will Deacon +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 700b2b439766 ("fprobe: Fix to ensure the number of active retprobes is not zero") +Signed-off-by: Sasha Levin +--- + include/linux/fprobe.h | 8 ++++++-- + kernel/trace/bpf_trace.c | 2 +- + kernel/trace/fprobe.c | 21 ++++++++++++++------- + lib/test_fprobe.c | 6 ++++-- + samples/fprobe/fprobe_example.c | 6 ++++-- + 5 files changed, 29 insertions(+), 14 deletions(-) + +diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h +index 1c2bde0ead736..e0d4e61362491 100644 +--- a/include/linux/fprobe.h ++++ b/include/linux/fprobe.h +@@ -13,6 +13,7 @@ + * @nmissed: The counter for missing events. + * @flags: The status flag. + * @rethook: The rethook data structure. (internal data) ++ * @entry_data_size: The private data storage size. + * @entry_handler: The callback function for function entry. + * @exit_handler: The callback function for function exit. + */ +@@ -29,9 +30,12 @@ struct fprobe { + unsigned long nmissed; + unsigned int flags; + struct rethook *rethook; ++ size_t entry_data_size; + +- void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); +- void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); ++ void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, ++ struct pt_regs *regs, void *entry_data); ++ void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, ++ struct pt_regs *regs, void *entry_data); + }; + + /* This fprobe is soft-disabled. */ +diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c +index 8c77c54e6348b..f4a494a457c52 100644 +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -2646,7 +2646,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, + + static void + kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, +- struct pt_regs *regs) ++ struct pt_regs *regs, void *data) + { + struct bpf_kprobe_multi_link *link; + +diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c +index 1322247ce6488..be28d1bc84e80 100644 +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -17,14 +17,16 @@ + struct fprobe_rethook_node { + struct rethook_node node; + unsigned long entry_ip; ++ char data[]; + }; + + static void fprobe_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct ftrace_regs *fregs) + { + struct fprobe_rethook_node *fpr; +- struct rethook_node *rh; ++ struct rethook_node *rh = NULL; + struct fprobe *fp; ++ void *entry_data = NULL; + int bit; + + fp = container_of(ops, struct fprobe, ops); +@@ -37,9 +39,6 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip, + return; + } + +- if (fp->entry_handler) +- fp->entry_handler(fp, ip, ftrace_get_regs(fregs)); +- + if (fp->exit_handler) { + rh = rethook_try_get(fp->rethook); + if (!rh) { +@@ -48,9 +47,16 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip, + } + fpr = container_of(rh, struct fprobe_rethook_node, node); + fpr->entry_ip = ip; +- rethook_hook(rh, ftrace_get_regs(fregs), true); ++ if (fp->entry_data_size) ++ entry_data = fpr->data; + } + ++ if (fp->entry_handler) ++ fp->entry_handler(fp, ip, ftrace_get_regs(fregs), entry_data); ++ ++ if (rh) ++ rethook_hook(rh, ftrace_get_regs(fregs), true); ++ + out: + ftrace_test_recursion_unlock(bit); + } +@@ -81,7 +87,8 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data, + + fpr = container_of(rh, struct fprobe_rethook_node, node); + +- fp->exit_handler(fp, fpr->entry_ip, regs); ++ fp->exit_handler(fp, fpr->entry_ip, regs, ++ fp->entry_data_size ? (void *)fpr->data : NULL); + } + NOKPROBE_SYMBOL(fprobe_exit_handler); + +@@ -146,7 +153,7 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) + for (i = 0; i < size; i++) { + struct fprobe_rethook_node *node; + +- node = kzalloc(sizeof(*node), GFP_KERNEL); ++ node = kzalloc(sizeof(*node) + fp->entry_data_size, GFP_KERNEL); + if (!node) { + rethook_free(fp->rethook); + fp->rethook = NULL; +diff --git a/lib/test_fprobe.c b/lib/test_fprobe.c +index e0381b3ec410c..34fa5a5bbda1f 100644 +--- a/lib/test_fprobe.c ++++ b/lib/test_fprobe.c +@@ -30,7 +30,8 @@ static noinline u32 fprobe_selftest_target2(u32 value) + return (value / div_factor) + 1; + } + +-static notrace void fp_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) ++static notrace void fp_entry_handler(struct fprobe *fp, unsigned long ip, ++ struct pt_regs *regs, void *data) + { + KUNIT_EXPECT_FALSE(current_test, preemptible()); + /* This can be called on the fprobe_selftest_target and the fprobe_selftest_target2 */ +@@ -39,7 +40,8 @@ static notrace void fp_entry_handler(struct fprobe *fp, unsigned long ip, struct + entry_val = (rand1 / div_factor); + } + +-static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) ++static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip, ++ struct pt_regs *regs, void *data) + { + unsigned long ret = regs_return_value(regs); + +diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c +index e22da8573116e..dd794990ad7ec 100644 +--- a/samples/fprobe/fprobe_example.c ++++ b/samples/fprobe/fprobe_example.c +@@ -48,7 +48,8 @@ static void show_backtrace(void) + stack_trace_print(stacks, len, 24); + } + +-static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) ++static void sample_entry_handler(struct fprobe *fp, unsigned long ip, ++ struct pt_regs *regs, void *data) + { + if (use_trace) + /* +@@ -63,7 +64,8 @@ static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_ + show_backtrace(); + } + +-static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) ++static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs, ++ void *data) + { + unsigned long rip = instruction_pointer(regs); + +-- +2.40.1 + diff --git a/queue-6.1/fs-writeback-do-not-requeue-a-clean-inode-having-ski.patch b/queue-6.1/fs-writeback-do-not-requeue-a-clean-inode-having-ski.patch new file mode 100644 index 00000000000..5dd1059e481 --- /dev/null +++ b/queue-6.1/fs-writeback-do-not-requeue-a-clean-inode-having-ski.patch @@ -0,0 +1,86 @@ +From 41c787522af21180151340e77c0cd1bd15bc6d7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Sep 2023 22:51:31 -0600 +Subject: fs-writeback: do not requeue a clean inode having skipped pages + +From: Chunhai Guo + +[ Upstream commit be049c3a088d512187407b7fd036cecfab46d565 ] + +When writing back an inode and performing an fsync on it concurrently, a +deadlock issue may arise as shown below. In each writeback iteration, a +clean inode is requeued to the wb->b_dirty queue due to non-zero +pages_skipped, without anything actually being written. This causes an +infinite loop and prevents the plug from being flushed, resulting in a +deadlock. We now avoid requeuing the clean inode to prevent this issue. + + wb_writeback fsync (inode-Y) +blk_start_plug(&plug) +for (;;) { + iter i-1: some reqs with page-X added into plug->mq_list // f2fs node page-X with PG_writeback + filemap_fdatawrite + __filemap_fdatawrite_range // write inode-Y with sync_mode WB_SYNC_ALL + do_writepages + f2fs_write_data_pages + __f2fs_write_data_pages // wb_sync_req[DATA]++ for WB_SYNC_ALL + f2fs_write_cache_pages + f2fs_write_single_data_page + f2fs_do_write_data_page + f2fs_outplace_write_data + f2fs_update_data_blkaddr + f2fs_wait_on_page_writeback + wait_on_page_writeback // wait for f2fs node page-X + iter i: + progress = __writeback_inodes_wb(wb, work) + . writeback_sb_inodes + . __writeback_single_inode // write inode-Y with sync_mode WB_SYNC_NONE + . . do_writepages + . . f2fs_write_data_pages + . . . __f2fs_write_data_pages // skip writepages due to (wb_sync_req[DATA]>0) + . . . wbc->pages_skipped += get_dirty_pages(inode) // wbc->pages_skipped = 1 + . if (!(inode->i_state & I_DIRTY_ALL)) // i_state = I_SYNC | I_SYNC_QUEUED + . total_wrote++; // total_wrote = 1 + . requeue_inode // requeue inode-Y to wb->b_dirty queue due to non-zero pages_skipped + if (progress) // progress = 1 + continue; + iter i+1: + queue_io + // similar process with iter i, infinite for-loop ! +} +blk_finish_plug(&plug) // flush plug won't be called + +Signed-off-by: Chunhai Guo +Reviewed-by: Jan Kara +Message-Id: <20230916045131.957929-1-guochunhai@vivo.com> +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/fs-writeback.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index d387708977a50..a5c31a479aacc 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -1522,10 +1522,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, + + if (wbc->pages_skipped) { + /* +- * writeback is not making progress due to locked +- * buffers. Skip this inode for now. ++ * Writeback is not making progress due to locked buffers. ++ * Skip this inode for now. Although having skipped pages ++ * is odd for clean inodes, it can happen for some ++ * filesystems so handle that gracefully. + */ +- redirty_tail_locked(inode, wb); ++ if (inode->i_state & I_DIRTY_ALL) ++ redirty_tail_locked(inode, wb); ++ else ++ inode_cgwb_move_to_attached(inode, wb); + return; + } + +-- +2.40.1 + diff --git a/queue-6.1/gpio-timberdale-fix-potential-deadlock-on-tgpio-lock.patch b/queue-6.1/gpio-timberdale-fix-potential-deadlock-on-tgpio-lock.patch new file mode 100644 index 00000000000..f2155256006 --- /dev/null +++ b/queue-6.1/gpio-timberdale-fix-potential-deadlock-on-tgpio-lock.patch @@ -0,0 +1,64 @@ +From 940105086d532361e6f60fdbc845dfafbe92c3bb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Sep 2023 10:29:14 +0000 +Subject: gpio: timberdale: Fix potential deadlock on &tgpio->lock + +From: Chengfeng Ye + +[ Upstream commit 9e8bc2dda5a7a8e2babc9975f4b11c9a6196e490 ] + +As timbgpio_irq_enable()/timbgpio_irq_disable() callback could be +executed under irq context, it could introduce double locks on +&tgpio->lock if it preempts other execution units requiring +the same locks. + +timbgpio_gpio_set() +--> timbgpio_update_bit() +--> spin_lock(&tgpio->lock) + + --> timbgpio_irq_disable() + --> spin_lock_irqsave(&tgpio->lock) + +This flaw was found by an experimental static analysis tool I am +developing for irq-related deadlock. + +To prevent the potential deadlock, the patch uses spin_lock_irqsave() +on &tgpio->lock inside timbgpio_gpio_set() to prevent the possible +deadlock scenario. + +Signed-off-by: Chengfeng Ye +Reviewed-by: Andy Shevchenko +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-timberdale.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c +index de14949a3fe5a..92c1f2baa4bff 100644 +--- a/drivers/gpio/gpio-timberdale.c ++++ b/drivers/gpio/gpio-timberdale.c +@@ -43,9 +43,10 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index, + unsigned offset, bool enabled) + { + struct timbgpio *tgpio = gpiochip_get_data(gpio); ++ unsigned long flags; + u32 reg; + +- spin_lock(&tgpio->lock); ++ spin_lock_irqsave(&tgpio->lock, flags); + reg = ioread32(tgpio->membase + offset); + + if (enabled) +@@ -54,7 +55,7 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index, + reg &= ~(1 << index); + + iowrite32(reg, tgpio->membase + offset); +- spin_unlock(&tgpio->lock); ++ spin_unlock_irqrestore(&tgpio->lock, flags); + + return 0; + } +-- +2.40.1 + diff --git a/queue-6.1/hid-holtek-fix-slab-out-of-bounds-write-in-holtek_kb.patch b/queue-6.1/hid-holtek-fix-slab-out-of-bounds-write-in-holtek_kb.patch new file mode 100644 index 00000000000..816913dcf66 --- /dev/null +++ b/queue-6.1/hid-holtek-fix-slab-out-of-bounds-write-in-holtek_kb.patch @@ -0,0 +1,40 @@ +From e9a8fa731f0c9db245a6291af7b1d77185194b5a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Sep 2023 10:40:59 +0800 +Subject: HID: holtek: fix slab-out-of-bounds Write in holtek_kbd_input_event + +From: Ma Ke + +[ Upstream commit ffe3b7837a2bb421df84d0177481db9f52c93a71 ] + +There is a slab-out-of-bounds Write bug in hid-holtek-kbd driver. +The problem is the driver assumes the device must have an input +but some malicious devices violate this assumption. + +Fix this by checking hid_device's input is non-empty before its usage. + +Signed-off-by: Ma Ke +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-holtek-kbd.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c +index 403506b9697e7..b346d68a06f5a 100644 +--- a/drivers/hid/hid-holtek-kbd.c ++++ b/drivers/hid/hid-holtek-kbd.c +@@ -130,6 +130,10 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, + return -ENODEV; + + boot_hid = usb_get_intfdata(boot_interface); ++ if (list_empty(&boot_hid->inputs)) { ++ hid_err(hid, "no inputs found\n"); ++ return -ENODEV; ++ } + boot_hid_input = list_first_entry(&boot_hid->inputs, + struct hid_input, list); + +-- +2.40.1 + diff --git a/queue-6.1/hid-logitech-hidpp-add-bluetooth-id-for-the-logitech.patch b/queue-6.1/hid-logitech-hidpp-add-bluetooth-id-for-the-logitech.patch new file mode 100644 index 00000000000..8131684b961 --- /dev/null +++ b/queue-6.1/hid-logitech-hidpp-add-bluetooth-id-for-the-logitech.patch @@ -0,0 +1,37 @@ +From 1db6b1260f7986da2b5367e069bab780b16e2778 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Aug 2023 00:24:38 +0200 +Subject: HID: logitech-hidpp: Add Bluetooth ID for the Logitech M720 Triathlon + mouse + +From: Hans de Goede + +[ Upstream commit 2d866603e25b1ce7e536839f62d1faae1c03d92f ] + +Using hidpp for the M720 adds battery info reporting and hires +scrolling support. + +Signed-off-by: Hans de Goede +Signed-off-by: Bastien Nocera +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-logitech-hidpp.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c +index fb427391c3b86..8d0dad12b2d37 100644 +--- a/drivers/hid/hid-logitech-hidpp.c ++++ b/drivers/hid/hid-logitech-hidpp.c +@@ -4427,6 +4427,8 @@ static const struct hid_device_id hidpp_devices[] = { + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) }, + { /* MX Master mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) }, ++ { /* M720 Triathlon mouse over Bluetooth */ ++ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb015) }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) }, +-- +2.40.1 + diff --git a/queue-6.1/hid-multitouch-add-required-quirk-for-synaptics-0xcd.patch b/queue-6.1/hid-multitouch-add-required-quirk-for-synaptics-0xcd.patch new file mode 100644 index 00000000000..402090fd3de --- /dev/null +++ b/queue-6.1/hid-multitouch-add-required-quirk-for-synaptics-0xcd.patch @@ -0,0 +1,39 @@ +From 67281af95c5e1d4b56ed145d7e68a000e8afbd78 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 17 Sep 2023 16:18:43 +0000 +Subject: HID: multitouch: Add required quirk for Synaptics 0xcd7e device + +From: Rahul Rameshbabu + +[ Upstream commit 1437e4547edf41689d7135faaca4222ef0081bc1 ] + +Register the Synaptics device as a special multitouch device with certain +quirks that may improve usability of the touchpad device. + +Reported-by: Rain +Closes: https://lore.kernel.org/linux-input/2bbb8e1d-1793-4df1-810f-cb0137341ff4@app.fastmail.com/ +Signed-off-by: Rahul Rameshbabu +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-multitouch.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c +index 521b2ffb42449..8db4ae05febc8 100644 +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -2144,6 +2144,10 @@ static const struct hid_device_id mt_devices[] = { + USB_DEVICE_ID_MTP_STM)}, + + /* Synaptics devices */ ++ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, ++ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, ++ USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, ++ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xce08) }, +-- +2.40.1 + diff --git a/queue-6.1/hid-nintendo-reinitialize-usb-pro-controller-after-r.patch b/queue-6.1/hid-nintendo-reinitialize-usb-pro-controller-after-r.patch new file mode 100644 index 00000000000..aafe39a2e75 --- /dev/null +++ b/queue-6.1/hid-nintendo-reinitialize-usb-pro-controller-after-r.patch @@ -0,0 +1,265 @@ +From acc58da9f242d3b2a9a8cbaf5628826bf3a16f14 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Sep 2023 16:06:01 +0200 +Subject: HID: nintendo: reinitialize USB Pro Controller after resuming from + suspend + +From: Martino Fontana + +[ Upstream commit 95ea4d9fd385fe335b989f22d409df079a042b7a ] + +When suspending the computer, a Switch Pro Controller connected via USB will +lose its internal status. However, because the USB connection was technically +never lost, when resuming the computer, the driver will attempt to communicate +with the controller as if nothing happened (and fail). +Because of this, the user was forced to manually disconnect the controller +(or to press the sync button on the controller to power it off), so that it +can be re-initialized. + +With this patch, the controller will be automatically re-initialized after +resuming from suspend. + +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216233 + +Signed-off-by: Martino Fontana +Reviewed-by: Daniel J. Ogorchock +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-nintendo.c | 175 ++++++++++++++++++++++--------------- + 1 file changed, 103 insertions(+), 72 deletions(-) + +diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c +index 5bfc0c4504608..8a8a3dd8af0c1 100644 +--- a/drivers/hid/hid-nintendo.c ++++ b/drivers/hid/hid-nintendo.c +@@ -2011,7 +2011,9 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) + struct joycon_input_report *report; + + req.subcmd_id = JC_SUBCMD_REQ_DEV_INFO; ++ mutex_lock(&ctlr->output_mutex); + ret = joycon_send_subcmd(ctlr, &req, 0, HZ); ++ mutex_unlock(&ctlr->output_mutex); + if (ret) { + hid_err(ctlr->hdev, "Failed to get joycon info; ret=%d\n", ret); + return ret; +@@ -2040,6 +2042,85 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) + return 0; + } + ++static int joycon_init(struct hid_device *hdev) ++{ ++ struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); ++ int ret = 0; ++ ++ mutex_lock(&ctlr->output_mutex); ++ /* if handshake command fails, assume ble pro controller */ ++ if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) && ++ !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) { ++ hid_dbg(hdev, "detected USB controller\n"); ++ /* set baudrate for improved latency */ ++ ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ); ++ if (ret) { ++ hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); ++ goto out_unlock; ++ } ++ /* handshake */ ++ ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ); ++ if (ret) { ++ hid_err(hdev, "Failed handshake; ret=%d\n", ret); ++ goto out_unlock; ++ } ++ /* ++ * Set no timeout (to keep controller in USB mode). ++ * This doesn't send a response, so ignore the timeout. ++ */ ++ joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10); ++ } else if (jc_type_is_chrggrip(ctlr)) { ++ hid_err(hdev, "Failed charging grip handshake\n"); ++ ret = -ETIMEDOUT; ++ goto out_unlock; ++ } ++ ++ /* get controller calibration data, and parse it */ ++ ret = joycon_request_calibration(ctlr); ++ if (ret) { ++ /* ++ * We can function with default calibration, but it may be ++ * inaccurate. Provide a warning, and continue on. ++ */ ++ hid_warn(hdev, "Analog stick positions may be inaccurate\n"); ++ } ++ ++ /* get IMU calibration data, and parse it */ ++ ret = joycon_request_imu_calibration(ctlr); ++ if (ret) { ++ /* ++ * We can function with default calibration, but it may be ++ * inaccurate. Provide a warning, and continue on. ++ */ ++ hid_warn(hdev, "Unable to read IMU calibration data\n"); ++ } ++ ++ /* Set the reporting mode to 0x30, which is the full report mode */ ++ ret = joycon_set_report_mode(ctlr); ++ if (ret) { ++ hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); ++ goto out_unlock; ++ } ++ ++ /* Enable rumble */ ++ ret = joycon_enable_rumble(ctlr); ++ if (ret) { ++ hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret); ++ goto out_unlock; ++ } ++ ++ /* Enable the IMU */ ++ ret = joycon_enable_imu(ctlr); ++ if (ret) { ++ hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret); ++ goto out_unlock; ++ } ++ ++out_unlock: ++ mutex_unlock(&ctlr->output_mutex); ++ return ret; ++} ++ + /* Common handler for parsing inputs */ + static int joycon_ctlr_read_handler(struct joycon_ctlr *ctlr, u8 *data, + int size) +@@ -2171,85 +2252,19 @@ static int nintendo_hid_probe(struct hid_device *hdev, + + hid_device_io_start(hdev); + +- /* Initialize the controller */ +- mutex_lock(&ctlr->output_mutex); +- /* if handshake command fails, assume ble pro controller */ +- if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) && +- !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) { +- hid_dbg(hdev, "detected USB controller\n"); +- /* set baudrate for improved latency */ +- ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ); +- if (ret) { +- hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); +- goto err_mutex; +- } +- /* handshake */ +- ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ); +- if (ret) { +- hid_err(hdev, "Failed handshake; ret=%d\n", ret); +- goto err_mutex; +- } +- /* +- * Set no timeout (to keep controller in USB mode). +- * This doesn't send a response, so ignore the timeout. +- */ +- joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10); +- } else if (jc_type_is_chrggrip(ctlr)) { +- hid_err(hdev, "Failed charging grip handshake\n"); +- ret = -ETIMEDOUT; +- goto err_mutex; +- } +- +- /* get controller calibration data, and parse it */ +- ret = joycon_request_calibration(ctlr); ++ ret = joycon_init(hdev); + if (ret) { +- /* +- * We can function with default calibration, but it may be +- * inaccurate. Provide a warning, and continue on. +- */ +- hid_warn(hdev, "Analog stick positions may be inaccurate\n"); +- } +- +- /* get IMU calibration data, and parse it */ +- ret = joycon_request_imu_calibration(ctlr); +- if (ret) { +- /* +- * We can function with default calibration, but it may be +- * inaccurate. Provide a warning, and continue on. +- */ +- hid_warn(hdev, "Unable to read IMU calibration data\n"); +- } +- +- /* Set the reporting mode to 0x30, which is the full report mode */ +- ret = joycon_set_report_mode(ctlr); +- if (ret) { +- hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); +- goto err_mutex; +- } +- +- /* Enable rumble */ +- ret = joycon_enable_rumble(ctlr); +- if (ret) { +- hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret); +- goto err_mutex; +- } +- +- /* Enable the IMU */ +- ret = joycon_enable_imu(ctlr); +- if (ret) { +- hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret); +- goto err_mutex; ++ hid_err(hdev, "Failed to initialize controller; ret=%d\n", ret); ++ goto err_close; + } + + ret = joycon_read_info(ctlr); + if (ret) { + hid_err(hdev, "Failed to retrieve controller info; ret=%d\n", + ret); +- goto err_mutex; ++ goto err_close; + } + +- mutex_unlock(&ctlr->output_mutex); +- + /* Initialize the leds */ + ret = joycon_leds_create(ctlr); + if (ret) { +@@ -2275,8 +2290,6 @@ static int nintendo_hid_probe(struct hid_device *hdev, + hid_dbg(hdev, "probe - success\n"); + return 0; + +-err_mutex: +- mutex_unlock(&ctlr->output_mutex); + err_close: + hid_hw_close(hdev); + err_stop: +@@ -2306,6 +2319,20 @@ static void nintendo_hid_remove(struct hid_device *hdev) + hid_hw_stop(hdev); + } + ++#ifdef CONFIG_PM ++ ++static int nintendo_hid_resume(struct hid_device *hdev) ++{ ++ int ret = joycon_init(hdev); ++ ++ if (ret) ++ hid_err(hdev, "Failed to restore controller after resume"); ++ ++ return ret; ++} ++ ++#endif ++ + static const struct hid_device_id nintendo_hid_devices[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, +@@ -2327,6 +2354,10 @@ static struct hid_driver nintendo_hid_driver = { + .probe = nintendo_hid_probe, + .remove = nintendo_hid_remove, + .raw_event = nintendo_hid_event, ++ ++#ifdef CONFIG_PM ++ .resume = nintendo_hid_resume, ++#endif + }; + module_hid_driver(nintendo_hid_driver); + +-- +2.40.1 + diff --git a/queue-6.1/i2c-mux-avoid-potential-false-error-message-in-i2c_m.patch b/queue-6.1/i2c-mux-avoid-potential-false-error-message-in-i2c_m.patch new file mode 100644 index 00000000000..a6cdb14b139 --- /dev/null +++ b/queue-6.1/i2c-mux-avoid-potential-false-error-message-in-i2c_m.patch @@ -0,0 +1,37 @@ +From 5091c9bc533da515742df5c19c2336c1e6d9ef20 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 23 Sep 2023 23:54:06 +0200 +Subject: i2c: mux: Avoid potential false error message in i2c_mux_add_adapter + +From: Heiner Kallweit + +[ Upstream commit b13e59e74ff71a1004e0508107e91e9a84fd7388 ] + +I2C_CLASS_DEPRECATED is a flag and not an actual class. +There's nothing speaking against both, parent and child, having +I2C_CLASS_DEPRECATED set. Therefore exclude it from the check. + +Signed-off-by: Heiner Kallweit +Acked-by: Peter Rosin +Signed-off-by: Wolfram Sang +Signed-off-by: Sasha Levin +--- + drivers/i2c/i2c-mux.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c +index 313904be5f3bd..57ff09f18c371 100644 +--- a/drivers/i2c/i2c-mux.c ++++ b/drivers/i2c/i2c-mux.c +@@ -341,7 +341,7 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, + priv->adap.lock_ops = &i2c_parent_lock_ops; + + /* Sanity check on class */ +- if (i2c_mux_parent_classes(parent) & class) ++ if (i2c_mux_parent_classes(parent) & class & ~I2C_CLASS_DEPRECATED) + dev_err(&parent->dev, + "Segment %d behind mux can't share classes with ancestors\n", + chan_id); +-- +2.40.1 + diff --git a/queue-6.1/ice-remove-redundant-pci_enable_pcie_error_reporting.patch b/queue-6.1/ice-remove-redundant-pci_enable_pcie_error_reporting.patch new file mode 100644 index 00000000000..1bf8a1e11a4 --- /dev/null +++ b/queue-6.1/ice-remove-redundant-pci_enable_pcie_error_reporting.patch @@ -0,0 +1,65 @@ +From 6fe87666925af7b0dc7c3cde280e08fac6bee892 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Jan 2023 17:46:09 -0600 +Subject: ice: Remove redundant pci_enable_pcie_error_reporting() + +From: Bjorn Helgaas + +[ Upstream commit ba153552c18d7eb839ec0bad7d7484e29ba4719c ] + +pci_enable_pcie_error_reporting() enables the device to send ERR_* +Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is +native"), the PCI core does this for all devices during enumeration. + +Remove the redundant pci_enable_pcie_error_reporting() call from the +driver. Also remove the corresponding pci_disable_pcie_error_reporting() +from the driver .remove() path. + +Note that this doesn't control interrupt generation by the Root Port; that +is controlled by the AER Root Error Command register, which is managed by +the AER service driver. + +Signed-off-by: Bjorn Helgaas +Cc: Jesse Brandeburg +Cc: Tony Nguyen +Cc: intel-wired-lan@lists.osuosl.org +Cc: netdev@vger.kernel.org +Tested-by: Gurucharan G (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Stable-dep-of: 0288c3e709e5 ("ice: reset first in crash dump kernels") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_main.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index ae733207d0116..f0f39364819ac 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -4723,7 +4723,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) + return err; + } + +- pci_enable_pcie_error_reporting(pdev); + pci_set_master(pdev); + + pf->pdev = pdev; +@@ -5016,7 +5015,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) + ice_devlink_destroy_regions(pf); + ice_deinit_hw(hw); + err_exit_unroll: +- pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + return err; + } +@@ -5142,7 +5140,6 @@ static void ice_remove(struct pci_dev *pdev) + ice_reset(&pf->hw, ICE_RESET_PFR); + pci_wait_for_pending_transaction(pdev); + ice_clear_interrupt_scheme(pf); +- pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + } + +-- +2.40.1 + diff --git a/queue-6.1/iio-adc-ad7192-correct-reference-voltage.patch b/queue-6.1/iio-adc-ad7192-correct-reference-voltage.patch new file mode 100644 index 00000000000..c7e49397cdb --- /dev/null +++ b/queue-6.1/iio-adc-ad7192-correct-reference-voltage.patch @@ -0,0 +1,78 @@ +From 9f3888b3485fe949bf2cb1e0d70fa35a43f27c2a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Sep 2023 18:21:48 +0300 +Subject: iio: adc: ad7192: Correct reference voltage + +From: Alisa-Dariana Roman + +[ Upstream commit 7e7dcab620cd6d34939f615cac63fc0ef7e81c72 ] + +The avdd and the reference voltage are two different sources but the +reference voltage was assigned according to the avdd supply. + +Add vref regulator structure and set the reference voltage according to +the vref supply from the devicetree. + +In case vref supply is missing, reference voltage is set according to +the avdd supply for compatibility with old devicetrees. + +Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging") +Signed-off-by: Alisa-Dariana Roman +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230924152149.41884-1-alisadariana@gmail.com +Signed-off-by: Jonathan Cameron +Signed-off-by: Sasha Levin +--- + drivers/iio/adc/ad7192.c | 29 +++++++++++++++++++++++++---- + 1 file changed, 25 insertions(+), 4 deletions(-) + +diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c +index 18520f7bedccd..faf680140c178 100644 +--- a/drivers/iio/adc/ad7192.c ++++ b/drivers/iio/adc/ad7192.c +@@ -177,6 +177,7 @@ struct ad7192_chip_info { + struct ad7192_state { + const struct ad7192_chip_info *chip_info; + struct regulator *avdd; ++ struct regulator *vref; + struct clk *mclk; + u16 int_vref_mv; + u32 fclk; +@@ -1014,10 +1015,30 @@ static int ad7192_probe(struct spi_device *spi) + if (ret) + return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n"); + +- ret = regulator_get_voltage(st->avdd); +- if (ret < 0) { +- dev_err(&spi->dev, "Device tree error, reference voltage undefined\n"); +- return ret; ++ st->vref = devm_regulator_get_optional(&spi->dev, "vref"); ++ if (IS_ERR(st->vref)) { ++ if (PTR_ERR(st->vref) != -ENODEV) ++ return PTR_ERR(st->vref); ++ ++ ret = regulator_get_voltage(st->avdd); ++ if (ret < 0) ++ return dev_err_probe(&spi->dev, ret, ++ "Device tree error, AVdd voltage undefined\n"); ++ } else { ++ ret = regulator_enable(st->vref); ++ if (ret) { ++ dev_err(&spi->dev, "Failed to enable specified Vref supply\n"); ++ return ret; ++ } ++ ++ ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->vref); ++ if (ret) ++ return ret; ++ ++ ret = regulator_get_voltage(st->vref); ++ if (ret < 0) ++ return dev_err_probe(&spi->dev, ret, ++ "Device tree error, Vref voltage undefined\n"); + } + st->int_vref_mv = ret / 1000; + +-- +2.40.1 + diff --git a/queue-6.1/iio-adc-ad7192-simplify-using-devm_regulator_get_ena.patch b/queue-6.1/iio-adc-ad7192-simplify-using-devm_regulator_get_ena.patch new file mode 100644 index 00000000000..719d909af14 --- /dev/null +++ b/queue-6.1/iio-adc-ad7192-simplify-using-devm_regulator_get_ena.patch @@ -0,0 +1,59 @@ +From 981805aa2d0ad20cd7b263fbacb2e5ed43cba2fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 Aug 2022 22:19:01 +0300 +Subject: iio: adc: ad7192: Simplify using devm_regulator_get_enable() + +From: Matti Vaittinen + +[ Upstream commit 1ccef2e6e9205e209ad958d2e591bcca60981007 ] + +Use devm_regulator_get_enable() instead of open coded get, enable, +add-action-to-disable-at-detach - pattern. Also drop the seemingly unused +struct member 'dvdd'. + +Signed-off-by: Matti Vaittinen +Link: https://lore.kernel.org/r/9719c445c095d3d308e2fc9f4f93294f5806c41c.1660934107.git.mazziesaccount@gmail.com +Signed-off-by: Jonathan Cameron +Stable-dep-of: 7e7dcab620cd ("iio: adc: ad7192: Correct reference voltage") +Signed-off-by: Sasha Levin +--- + drivers/iio/adc/ad7192.c | 15 ++------------- + 1 file changed, 2 insertions(+), 13 deletions(-) + +diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c +index 80eff7090f14a..18520f7bedccd 100644 +--- a/drivers/iio/adc/ad7192.c ++++ b/drivers/iio/adc/ad7192.c +@@ -177,7 +177,6 @@ struct ad7192_chip_info { + struct ad7192_state { + const struct ad7192_chip_info *chip_info; + struct regulator *avdd; +- struct regulator *dvdd; + struct clk *mclk; + u16 int_vref_mv; + u32 fclk; +@@ -1011,19 +1010,9 @@ static int ad7192_probe(struct spi_device *spi) + if (ret) + return ret; + +- st->dvdd = devm_regulator_get(&spi->dev, "dvdd"); +- if (IS_ERR(st->dvdd)) +- return PTR_ERR(st->dvdd); +- +- ret = regulator_enable(st->dvdd); +- if (ret) { +- dev_err(&spi->dev, "Failed to enable specified DVdd supply\n"); +- return ret; +- } +- +- ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->dvdd); ++ ret = devm_regulator_get_enable(&spi->dev, "dvdd"); + if (ret) +- return ret; ++ return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n"); + + ret = regulator_get_voltage(st->avdd); + if (ret < 0) { +-- +2.40.1 + diff --git a/queue-6.1/iio-core-introduce-iio_device_-claim-release-_buffer.patch b/queue-6.1/iio-core-introduce-iio_device_-claim-release-_buffer.patch new file mode 100644 index 00000000000..f73805cdbfd --- /dev/null +++ b/queue-6.1/iio-core-introduce-iio_device_-claim-release-_buffer.patch @@ -0,0 +1,94 @@ +From 40a405b9b7ca7e7ff4e1d82a39565997cb8ccb31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Oct 2022 17:16:17 +0200 +Subject: iio: core: introduce iio_device_{claim|release}_buffer_mode() APIs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Nuno Sá + +[ Upstream commit 0a8565425afd8ba0e1a0ea73e21da119ee6dacea ] + +These APIs are analogous to iio_device_claim_direct_mode() and +iio_device_release_direct_mode() but, as the name suggests, with the +logic flipped. While this looks odd enough, it will have at least two +users (in following changes) and it will be important to move the IIO +mlock to the private struct. + +Signed-off-by: Nuno Sá +Reviewed-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20221012151620.1725215-2-nuno.sa@analog.com +Signed-off-by: Jonathan Cameron +Stable-dep-of: 7771c8c80d62 ("iio: cros_ec: fix an use-after-free in cros_ec_sensors_push_data()") +Signed-off-by: Sasha Levin +--- + drivers/iio/industrialio-core.c | 38 +++++++++++++++++++++++++++++++++ + include/linux/iio/iio.h | 2 ++ + 2 files changed, 40 insertions(+) + +diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c +index f3f8392623a46..c9614982cb671 100644 +--- a/drivers/iio/industrialio-core.c ++++ b/drivers/iio/industrialio-core.c +@@ -2084,6 +2084,44 @@ void iio_device_release_direct_mode(struct iio_dev *indio_dev) + } + EXPORT_SYMBOL_GPL(iio_device_release_direct_mode); + ++/** ++ * iio_device_claim_buffer_mode - Keep device in buffer mode ++ * @indio_dev: the iio_dev associated with the device ++ * ++ * If the device is in buffer mode it is guaranteed to stay ++ * that way until iio_device_release_buffer_mode() is called. ++ * ++ * Use with iio_device_release_buffer_mode(). ++ * ++ * Returns: 0 on success, -EBUSY on failure. ++ */ ++int iio_device_claim_buffer_mode(struct iio_dev *indio_dev) ++{ ++ mutex_lock(&indio_dev->mlock); ++ ++ if (iio_buffer_enabled(indio_dev)) ++ return 0; ++ ++ mutex_unlock(&indio_dev->mlock); ++ return -EBUSY; ++} ++EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode); ++ ++/** ++ * iio_device_release_buffer_mode - releases claim on buffer mode ++ * @indio_dev: the iio_dev associated with the device ++ * ++ * Release the claim. Device is no longer guaranteed to stay ++ * in buffer mode. ++ * ++ * Use with iio_device_claim_buffer_mode(). ++ */ ++void iio_device_release_buffer_mode(struct iio_dev *indio_dev) ++{ ++ mutex_unlock(&indio_dev->mlock); ++} ++EXPORT_SYMBOL_GPL(iio_device_release_buffer_mode); ++ + /** + * iio_device_get_current_mode() - helper function providing read-only access to + * the opaque @currentmode variable +diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h +index f0ec8a5e5a7a9..9d3bd6379eb87 100644 +--- a/include/linux/iio/iio.h ++++ b/include/linux/iio/iio.h +@@ -629,6 +629,8 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, + int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); + int iio_device_claim_direct_mode(struct iio_dev *indio_dev); + void iio_device_release_direct_mode(struct iio_dev *indio_dev); ++int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); ++void iio_device_release_buffer_mode(struct iio_dev *indio_dev); + + extern struct bus_type iio_bus_type; + +-- +2.40.1 + diff --git a/queue-6.1/iio-cros_ec-fix-an-use-after-free-in-cros_ec_sensors.patch b/queue-6.1/iio-cros_ec-fix-an-use-after-free-in-cros_ec_sensors.patch new file mode 100644 index 00000000000..f76085241bf --- /dev/null +++ b/queue-6.1/iio-cros_ec-fix-an-use-after-free-in-cros_ec_sensors.patch @@ -0,0 +1,73 @@ +From 0de7a380026dc67c494104cd8e64a70ae58efa9e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Aug 2023 11:06:22 +0800 +Subject: iio: cros_ec: fix an use-after-free in cros_ec_sensors_push_data() + +From: Tzung-Bi Shih + +[ Upstream commit 7771c8c80d62ad065637ef74ed2962983f6c5f6d ] + +cros_ec_sensors_push_data() reads `indio_dev->active_scan_mask` and +calls iio_push_to_buffers_with_timestamp() without making sure the +`indio_dev` stays in buffer mode. There is a race if `indio_dev` exits +buffer mode right before cros_ec_sensors_push_data() accesses them. + +An use-after-free on `indio_dev->active_scan_mask` was observed. The +call trace: +[...] + _find_next_bit + cros_ec_sensors_push_data + cros_ec_sensorhub_event + blocking_notifier_call_chain + cros_ec_irq_thread + +It was caused by a race condition: one thread just freed +`active_scan_mask` at [1]; while another thread tried to access the +memory at [2]. + +Fix it by calling iio_device_claim_buffer_mode() to ensure the +`indio_dev` can't exit buffer mode during cros_ec_sensors_push_data(). + +[1]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/industrialio-buffer.c#L1189 +[2]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c#L198 + +Cc: stable@vger.kernel.org +Fixes: aa984f1ba4a4 ("iio: cros_ec: Register to cros_ec_sensorhub when EC supports FIFO") +Signed-off-by: Tzung-Bi Shih +Reviewed-by: Guenter Roeck +Reviewed-by: Stephen Boyd +Link: https://lore.kernel.org/r/20230829030622.1571852-1-tzungbi@kernel.org +Signed-off-by: Jonathan Cameron +Signed-off-by: Sasha Levin +--- + drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +index d98f7e4d202c1..1ddce991fb3f4 100644 +--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c ++++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +@@ -190,8 +190,11 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev, + /* + * Ignore samples if the buffer is not set: it is needed if the ODR is + * set but the buffer is not enabled yet. ++ * ++ * Note: iio_device_claim_buffer_mode() returns -EBUSY if the buffer ++ * is not enabled. + */ +- if (!iio_buffer_enabled(indio_dev)) ++ if (iio_device_claim_buffer_mode(indio_dev) < 0) + return 0; + + out = (s16 *)st->samples; +@@ -210,6 +213,7 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev, + iio_push_to_buffers_with_timestamp(indio_dev, st->samples, + timestamp + delta); + ++ iio_device_release_buffer_mode(indio_dev); + return 0; + } + EXPORT_SYMBOL_GPL(cros_ec_sensors_push_data); +-- +2.40.1 + diff --git a/queue-6.1/ipv4-fib-send-notify-when-delete-source-address-rout.patch b/queue-6.1/ipv4-fib-send-notify-when-delete-source-address-rout.patch new file mode 100644 index 00000000000..792e111b117 --- /dev/null +++ b/queue-6.1/ipv4-fib-send-notify-when-delete-source-address-rout.patch @@ -0,0 +1,112 @@ +From d93415c3d75f0be620adc7f1c8a295a640378ca8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Sep 2023 15:55:08 +0800 +Subject: ipv4/fib: send notify when delete source address routes + +From: Hangbin Liu + +[ Upstream commit 4b2b606075e50cdae62ab2356b0a1e206947c354 ] + +After deleting an interface address in fib_del_ifaddr(), the function +scans the fib_info list for stray entries and calls fib_flush() and +fib_table_flush(). Then the stray entries will be deleted silently and no +RTM_DELROUTE notification will be sent. + +This lack of notification can make routing daemons, or monitor like +`ip monitor route` miss the routing changes. e.g. + ++ ip link add dummy1 type dummy ++ ip link add dummy2 type dummy ++ ip link set dummy1 up ++ ip link set dummy2 up ++ ip addr add 192.168.5.5/24 dev dummy1 ++ ip route add 7.7.7.0/24 dev dummy2 src 192.168.5.5 ++ ip -4 route +7.7.7.0/24 dev dummy2 scope link src 192.168.5.5 +192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5 ++ ip monitor route ++ ip addr del 192.168.5.5/24 dev dummy1 +Deleted 192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5 +Deleted broadcast 192.168.5.255 dev dummy1 table local proto kernel scope link src 192.168.5.5 +Deleted local 192.168.5.5 dev dummy1 table local proto kernel scope host src 192.168.5.5 + +As Ido reminded, fib_table_flush() isn't only called when an address is +deleted, but also when an interface is deleted or put down. The lack of +notification in these cases is deliberate. And commit 7c6bb7d2faaf +("net/ipv6: Add knob to skip DELROUTE message on device down") introduced +a sysctl to make IPv6 behave like IPv4 in this regard. So we can't send +the route delete notify blindly in fib_table_flush(). + +To fix this issue, let's add a new flag in "struct fib_info" to track the +deleted prefer source address routes, and only send notify for them. + +After update: ++ ip monitor route ++ ip addr del 192.168.5.5/24 dev dummy1 +Deleted 192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5 +Deleted broadcast 192.168.5.255 dev dummy1 table local proto kernel scope link src 192.168.5.5 +Deleted local 192.168.5.5 dev dummy1 table local proto kernel scope host src 192.168.5.5 +Deleted 7.7.7.0/24 dev dummy2 scope link src 192.168.5.5 + +Suggested-by: Thomas Haller +Signed-off-by: Hangbin Liu +Acked-by: Nicolas Dichtel +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20230922075508.848925-1-liuhangbin@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/net/ip_fib.h | 1 + + net/ipv4/fib_semantics.c | 1 + + net/ipv4/fib_trie.c | 4 ++++ + 3 files changed, 6 insertions(+) + +diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h +index f0c13864180e2..15de07d365405 100644 +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -154,6 +154,7 @@ struct fib_info { + int fib_nhs; + bool fib_nh_is_v6; + bool nh_updated; ++ bool pfsrc_removed; + struct nexthop *nh; + struct rcu_head rcu; + struct fib_nh fib_nh[]; +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index 894d8ac6b9d0e..5eb1b8d302bbd 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1891,6 +1891,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) + continue; + if (fi->fib_prefsrc == local) { + fi->fib_flags |= RTNH_F_DEAD; ++ fi->pfsrc_removed = true; + ret++; + } + } +diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c +index d13fb9e76b971..9bdfdab906fe0 100644 +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2027,6 +2027,7 @@ void fib_table_flush_external(struct fib_table *tb) + int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) + { + struct trie *t = (struct trie *)tb->tb_data; ++ struct nl_info info = { .nl_net = net }; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; +@@ -2089,6 +2090,9 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) + + fib_notify_alias_delete(net, n->key, &n->leaf, fa, + NULL); ++ if (fi->pfsrc_removed) ++ rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa, ++ KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0); + hlist_del_rcu(&fa->fa_list); + fib_release_info(fa->fa_info); + alias_free_mem_rcu(fa); +-- +2.40.1 + diff --git a/queue-6.1/net-devlink-convert-devlink-port-type-specific-point.patch b/queue-6.1/net-devlink-convert-devlink-port-type-specific-point.patch new file mode 100644 index 00000000000..83462473872 --- /dev/null +++ b/queue-6.1/net-devlink-convert-devlink-port-type-specific-point.patch @@ -0,0 +1,100 @@ +From fd9da306627bdf1e29c96bd84e2ad5ed5772797b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 17:01:59 +0100 +Subject: net: devlink: convert devlink port type-specific pointers to union + +From: Jiri Pirko + +[ Upstream commit 3830c5719af66fac9849cf5fb04b03d4e4bb46ff ] + +Instead of storing type_dev as a void pointer, convert it to union and +use it to store either struct net_device or struct ib_device pointer. + +Signed-off-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice") +Signed-off-by: Sasha Levin +--- + include/net/devlink.h | 13 ++++++++++--- + net/devlink/leftover.c | 17 +++++++++++++---- + 2 files changed, 23 insertions(+), 7 deletions(-) + +diff --git a/include/net/devlink.h b/include/net/devlink.h +index ba6b8b0949432..6c55aabaedf19 100644 +--- a/include/net/devlink.h ++++ b/include/net/devlink.h +@@ -121,12 +121,19 @@ struct devlink_port { + struct list_head region_list; + struct devlink *devlink; + unsigned int index; +- spinlock_t type_lock; /* Protects type and type_dev +- * pointer consistency. ++ spinlock_t type_lock; /* Protects type and type_eth/ib ++ * structures consistency. + */ + enum devlink_port_type type; + enum devlink_port_type desired_type; +- void *type_dev; ++ union { ++ struct { ++ struct net_device *netdev; ++ } type_eth; ++ struct { ++ struct ib_device *ibdev; ++ } type_ib; ++ }; + struct devlink_port_attrs attrs; + u8 attrs_set:1, + switch_port:1, +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c +index 032c7af065cd9..6fee4ce6724b7 100644 +--- a/net/devlink/leftover.c ++++ b/net/devlink/leftover.c +@@ -1303,7 +1303,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, + goto nla_put_failure_type_locked; + if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { + struct net *net = devlink_net(devlink_port->devlink); +- struct net_device *netdev = devlink_port->type_dev; ++ struct net_device *netdev = devlink_port->type_eth.netdev; + + if (netdev && net_eq(net, dev_net(netdev)) && + (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX, +@@ -1313,7 +1313,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, + goto nla_put_failure_type_locked; + } + if (devlink_port->type == DEVLINK_PORT_TYPE_IB) { +- struct ib_device *ibdev = devlink_port->type_dev; ++ struct ib_device *ibdev = devlink_port->type_ib.ibdev; + + if (ibdev && + nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME, +@@ -10012,7 +10012,16 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, + devlink_port_type_warn_cancel(devlink_port); + spin_lock_bh(&devlink_port->type_lock); + devlink_port->type = type; +- devlink_port->type_dev = type_dev; ++ switch (type) { ++ case DEVLINK_PORT_TYPE_ETH: ++ devlink_port->type_eth.netdev = type_dev; ++ break; ++ case DEVLINK_PORT_TYPE_IB: ++ devlink_port->type_ib.ibdev = type_dev; ++ break; ++ default: ++ break; ++ } + spin_unlock_bh(&devlink_port->type_lock); + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + } +@@ -12027,7 +12036,7 @@ devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata, + + spin_lock(&in_devlink_port->type_lock); + if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH) +- metadata->input_dev = in_devlink_port->type_dev; ++ metadata->input_dev = in_devlink_port->type_eth.netdev; + spin_unlock(&in_devlink_port->type_lock); + } + +-- +2.40.1 + diff --git a/queue-6.1/net-devlink-move-port_type_netdev_checks-call-to-__d.patch b/queue-6.1/net-devlink-move-port_type_netdev_checks-call-to-__d.patch new file mode 100644 index 00000000000..5a6125ce83f --- /dev/null +++ b/queue-6.1/net-devlink-move-port_type_netdev_checks-call-to-__d.patch @@ -0,0 +1,113 @@ +From 879f3378cd4d13d9048f7fc13aa5ec49504819f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 17:02:01 +0100 +Subject: net: devlink: move port_type_netdev_checks() call to + __devlink_port_type_set() + +From: Jiri Pirko + +[ Upstream commit 45791e0d00c445936bb19535fe847083b1edd26d ] + +As __devlink_port_type_set() is going to be called directly from netdevice +notifier event handle in one of the follow-up patches, move the +port_type_netdev_checks() call there. + +Signed-off-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice") +Signed-off-by: Sasha Levin +--- + net/devlink/leftover.c | 63 ++++++++++++++++++++++-------------------- + 1 file changed, 33 insertions(+), 30 deletions(-) + +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c +index 53dde50c5d6e2..e06fe0fad5d7d 100644 +--- a/net/devlink/leftover.c ++++ b/net/devlink/leftover.c +@@ -10003,33 +10003,6 @@ void devlink_port_unregister(struct devlink_port *devlink_port) + } + EXPORT_SYMBOL_GPL(devlink_port_unregister); + +-static void __devlink_port_type_set(struct devlink_port *devlink_port, +- enum devlink_port_type type, +- void *type_dev) +-{ +- ASSERT_DEVLINK_PORT_REGISTERED(devlink_port); +- +- if (type == DEVLINK_PORT_TYPE_NOTSET) +- devlink_port_type_warn_schedule(devlink_port); +- else +- devlink_port_type_warn_cancel(devlink_port); +- +- spin_lock_bh(&devlink_port->type_lock); +- devlink_port->type = type; +- switch (type) { +- case DEVLINK_PORT_TYPE_ETH: +- devlink_port->type_eth.netdev = type_dev; +- break; +- case DEVLINK_PORT_TYPE_IB: +- devlink_port->type_ib.ibdev = type_dev; +- break; +- default: +- break; +- } +- spin_unlock_bh(&devlink_port->type_lock); +- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); +-} +- + static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port, + struct net_device *netdev) + { +@@ -10067,6 +10040,38 @@ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port, + } + } + ++static void __devlink_port_type_set(struct devlink_port *devlink_port, ++ enum devlink_port_type type, ++ void *type_dev) ++{ ++ struct net_device *netdev = type_dev; ++ ++ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port); ++ ++ if (type == DEVLINK_PORT_TYPE_NOTSET) { ++ devlink_port_type_warn_schedule(devlink_port); ++ } else { ++ devlink_port_type_warn_cancel(devlink_port); ++ if (type == DEVLINK_PORT_TYPE_ETH && netdev) ++ devlink_port_type_netdev_checks(devlink_port, netdev); ++ } ++ ++ spin_lock_bh(&devlink_port->type_lock); ++ devlink_port->type = type; ++ switch (type) { ++ case DEVLINK_PORT_TYPE_ETH: ++ devlink_port->type_eth.netdev = netdev; ++ break; ++ case DEVLINK_PORT_TYPE_IB: ++ devlink_port->type_ib.ibdev = type_dev; ++ break; ++ default: ++ break; ++ } ++ spin_unlock_bh(&devlink_port->type_lock); ++ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); ++} ++ + /** + * devlink_port_type_eth_set - Set port type to Ethernet + * +@@ -10076,9 +10081,7 @@ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port, + void devlink_port_type_eth_set(struct devlink_port *devlink_port, + struct net_device *netdev) + { +- if (netdev) +- devlink_port_type_netdev_checks(devlink_port, netdev); +- else ++ if (!netdev) + dev_warn(devlink_port->devlink->dev, + "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); +-- +2.40.1 + diff --git a/queue-6.1/net-devlink-move-port_type_warn_schedule-call-to-__d.patch b/queue-6.1/net-devlink-move-port_type_warn_schedule-call-to-__d.patch new file mode 100644 index 00000000000..947c86483c4 --- /dev/null +++ b/queue-6.1/net-devlink-move-port_type_warn_schedule-call-to-__d.patch @@ -0,0 +1,50 @@ +From fcd7c13e3f519e3effc607cab19cf65613c7f7bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 17:02:00 +0100 +Subject: net: devlink: move port_type_warn_schedule() call to + __devlink_port_type_set() + +From: Jiri Pirko + +[ Upstream commit 8573a04404ddacb2d966eef09bf38b2ad6dbe86f ] + +As __devlink_port_type_set() is going to be called directly from netdevice +notifier event handle in one of the follow-up patches, move the +port_type_warn_schedule() call there. + +Signed-off-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice") +Signed-off-by: Sasha Levin +--- + net/devlink/leftover.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c +index 6fee4ce6724b7..53dde50c5d6e2 100644 +--- a/net/devlink/leftover.c ++++ b/net/devlink/leftover.c +@@ -10009,7 +10009,11 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, + { + ASSERT_DEVLINK_PORT_REGISTERED(devlink_port); + +- devlink_port_type_warn_cancel(devlink_port); ++ if (type == DEVLINK_PORT_TYPE_NOTSET) ++ devlink_port_type_warn_schedule(devlink_port); ++ else ++ devlink_port_type_warn_cancel(devlink_port); ++ + spin_lock_bh(&devlink_port->type_lock); + devlink_port->type = type; + switch (type) { +@@ -10104,7 +10108,6 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); + void devlink_port_type_clear(struct devlink_port *devlink_port) + { + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL); +- devlink_port_type_warn_schedule(devlink_port); + } + EXPORT_SYMBOL_GPL(devlink_port_type_clear); + +-- +2.40.1 + diff --git a/queue-6.1/net-devlink-take-rtnl-in-port_fill-function-only-if-.patch b/queue-6.1/net-devlink-take-rtnl-in-port_fill-function-only-if-.patch new file mode 100644 index 00000000000..23a7bec3e2c --- /dev/null +++ b/queue-6.1/net-devlink-take-rtnl-in-port_fill-function-only-if-.patch @@ -0,0 +1,181 @@ +From 4dce0f9fdd80044edf6ad75b9906aab98d573553 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 17:02:02 +0100 +Subject: net: devlink: take RTNL in port_fill() function only if it is not + held + +From: Jiri Pirko + +[ Upstream commit d41c9dbd12745cfc1cb2946cd99016d83c2c5364 ] + +Follow-up patch is going to introduce a netdevice notifier event +processing which is called with RTNL mutex held. Processing of this will +eventually lead to call to port_notity() and port_fill() which currently +takes RTNL mutex internally. So as a temporary solution, propagate a +bool indicating if the mutex is already held. This will go away in one +of the follow-up patches. + +Signed-off-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice") +Signed-off-by: Sasha Levin +--- + net/devlink/leftover.c | 46 ++++++++++++++++++++++++++++-------------- + 1 file changed, 31 insertions(+), 15 deletions(-) + +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c +index e06fe0fad5d7d..b077acc255890 100644 +--- a/net/devlink/leftover.c ++++ b/net/devlink/leftover.c +@@ -1278,7 +1278,8 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por + static int devlink_nl_port_fill(struct sk_buff *msg, + struct devlink_port *devlink_port, + enum devlink_command cmd, u32 portid, u32 seq, +- int flags, struct netlink_ext_ack *extack) ++ int flags, struct netlink_ext_ack *extack, ++ bool rtnl_held) + { + struct devlink *devlink = devlink_port->devlink; + void *hdr; +@@ -1293,7 +1294,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, + goto nla_put_failure; + + /* Hold rtnl lock while accessing port's netdev attributes. */ +- rtnl_lock(); ++ if (!rtnl_held) ++ rtnl_lock(); + spin_lock_bh(&devlink_port->type_lock); + if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) + goto nla_put_failure_type_locked; +@@ -1321,7 +1323,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, + goto nla_put_failure_type_locked; + } + spin_unlock_bh(&devlink_port->type_lock); +- rtnl_unlock(); ++ if (!rtnl_held) ++ rtnl_unlock(); + if (devlink_nl_port_attrs_put(msg, devlink_port)) + goto nla_put_failure; + if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) +@@ -1336,14 +1339,15 @@ static int devlink_nl_port_fill(struct sk_buff *msg, + + nla_put_failure_type_locked: + spin_unlock_bh(&devlink_port->type_lock); +- rtnl_unlock(); ++ if (!rtnl_held) ++ rtnl_unlock(); + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; + } + +-static void devlink_port_notify(struct devlink_port *devlink_port, +- enum devlink_command cmd) ++static void __devlink_port_notify(struct devlink_port *devlink_port, ++ enum devlink_command cmd, bool rtnl_held) + { + struct devlink *devlink = devlink_port->devlink; + struct sk_buff *msg; +@@ -1358,7 +1362,8 @@ static void devlink_port_notify(struct devlink_port *devlink_port, + if (!msg) + return; + +- err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL); ++ err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL, ++ rtnl_held); + if (err) { + nlmsg_free(msg); + return; +@@ -1368,6 +1373,12 @@ static void devlink_port_notify(struct devlink_port *devlink_port, + 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + } + ++static void devlink_port_notify(struct devlink_port *devlink_port, ++ enum devlink_command cmd) ++{ ++ __devlink_port_notify(devlink_port, cmd, false); ++} ++ + static void devlink_rate_notify(struct devlink_rate *devlink_rate, + enum devlink_command cmd) + { +@@ -1534,7 +1545,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, + + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW, + info->snd_portid, info->snd_seq, 0, +- info->extack); ++ info->extack, false); + if (err) { + nlmsg_free(msg); + return err; +@@ -1564,7 +1575,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, + DEVLINK_CMD_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, +- NLM_F_MULTI, cb->extack); ++ NLM_F_MULTI, cb->extack, ++ false); + if (err) { + devl_unlock(devlink); + devlink_put(devlink); +@@ -1776,7 +1788,8 @@ static int devlink_port_new_notify(struct devlink *devlink, + } + + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, +- info->snd_portid, info->snd_seq, 0, NULL); ++ info->snd_portid, info->snd_seq, 0, NULL, ++ false); + if (err) + goto out; + +@@ -10042,7 +10055,7 @@ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port, + + static void __devlink_port_type_set(struct devlink_port *devlink_port, + enum devlink_port_type type, +- void *type_dev) ++ void *type_dev, bool rtnl_held) + { + struct net_device *netdev = type_dev; + +@@ -10069,7 +10082,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, + break; + } + spin_unlock_bh(&devlink_port->type_lock); +- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); ++ __devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW, rtnl_held); + } + + /** +@@ -10086,7 +10099,8 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port, + "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); + +- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev); ++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev, ++ false); + } + EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); + +@@ -10099,7 +10113,8 @@ EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); + void devlink_port_type_ib_set(struct devlink_port *devlink_port, + struct ib_device *ibdev) + { +- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev); ++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev, ++ false); + } + EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); + +@@ -10110,7 +10125,8 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); + */ + void devlink_port_type_clear(struct devlink_port *devlink_port) + { +- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL); ++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL, ++ false); + } + EXPORT_SYMBOL_GPL(devlink_port_type_clear); + +-- +2.40.1 + diff --git a/queue-6.1/net-devlink-track-netdev-with-devlink_port-assigned.patch b/queue-6.1/net-devlink-track-netdev-with-devlink_port-assigned.patch new file mode 100644 index 00000000000..324065c2b4d --- /dev/null +++ b/queue-6.1/net-devlink-track-netdev-with-devlink_port-assigned.patch @@ -0,0 +1,249 @@ +From d9a5b96d376231439213984dddf5b0b0ccccfc75 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 17:02:03 +0100 +Subject: net: devlink: track netdev with devlink_port assigned + +From: Jiri Pirko + +[ Upstream commit 02a68a47eadedf95748facfca6ced31fb0181d52 ] + +Currently, ethernet drivers are using devlink_port_type_eth_set() and +devlink_port_type_clear() to set devlink port type and link to related +netdev. + +Instead of calling them directly, let the driver use +SET_NETDEV_DEVLINK_PORT macro to assign devlink_port pointer and let +devlink to track it. Note the devlink port pointer is static during +the time netdevice is registered. + +In devlink code, use per-namespace netdev notifier to track +the netdevices with devlink_port assigned and change the internal +devlink_port type and related type pointer accordingly. + +Signed-off-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice") +Signed-off-by: Sasha Levin +--- + include/linux/netdevice.h | 19 ++++++++++ + net/core/dev.c | 14 +++++--- + net/devlink/leftover.c | 75 ++++++++++++++++++++++++++++++++++++--- + 3 files changed, 99 insertions(+), 9 deletions(-) + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 5a04fbf724768..2b8646c39dcdd 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2011,6 +2011,11 @@ enum netdev_ml_priv_type { + * registered + * @offload_xstats_l3: L3 HW stats for this netdevice. + * ++ * @devlink_port: Pointer to related devlink port structure. ++ * Assigned by a driver before netdev registration using ++ * SET_NETDEV_DEVLINK_PORT macro. This pointer is static ++ * during the time netdevice is registered. ++ * + * FIXME: cleanup struct net_device such that network protocol info + * moves out. + */ +@@ -2361,9 +2366,22 @@ struct net_device { + netdevice_tracker watchdog_dev_tracker; + netdevice_tracker dev_registered_tracker; + struct rtnl_hw_stats64 *offload_xstats_l3; ++ ++ struct devlink_port *devlink_port; + }; + #define to_net_dev(d) container_of(d, struct net_device, dev) + ++/* ++ * Driver should use this to assign devlink port instance to a netdevice ++ * before it registers the netdevice. Therefore devlink_port is static ++ * during the netdev lifetime after it is registered. ++ */ ++#define SET_NETDEV_DEVLINK_PORT(dev, port) \ ++({ \ ++ WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED); \ ++ ((dev)->devlink_port = (port)); \ ++}) ++ + static inline bool netif_elide_gro(const struct net_device *dev) + { + if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) +@@ -2798,6 +2816,7 @@ enum netdev_cmd { + NETDEV_PRE_TYPE_CHANGE, + NETDEV_POST_TYPE_CHANGE, + NETDEV_POST_INIT, ++ NETDEV_PRE_UNINIT, + NETDEV_RELEASE, + NETDEV_NOTIFY_PEERS, + NETDEV_JOIN, +diff --git a/net/core/dev.c b/net/core/dev.c +index 9cacd17feeaae..9bf10c9c4735a 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1637,10 +1637,10 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) + N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER) + N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE) + N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE) +- N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER) +- N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO) +- N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO) +- N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) ++ N(POST_INIT) N(PRE_UNINIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) ++ N(CHANGEUPPER) N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) ++ N(BONDING_INFO) N(PRECHANGEUPPER) N(CHANGELOWERSTATE) ++ N(UDP_TUNNEL_PUSH_INFO) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) + N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) + N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) + N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE) +@@ -10086,7 +10086,7 @@ int register_netdevice(struct net_device *dev) + dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED; + write_unlock(&dev_base_lock); + if (ret) +- goto err_uninit; ++ goto err_uninit_notify; + + __netdev_update_features(dev); + +@@ -10133,6 +10133,8 @@ int register_netdevice(struct net_device *dev) + out: + return ret; + ++err_uninit_notify: ++ call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); + err_uninit: + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); +@@ -10883,6 +10885,8 @@ void unregister_netdevice_many(struct list_head *head) + netdev_name_node_alt_flush(dev); + netdev_name_node_free(dev->name_node); + ++ call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); ++ + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); + +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c +index b077acc255890..7ccfe69afd4b6 100644 +--- a/net/devlink/leftover.c ++++ b/net/devlink/leftover.c +@@ -71,6 +71,7 @@ struct devlink { + refcount_t refcount; + struct completion comp; + struct rcu_head rcu; ++ struct notifier_block netdevice_nb; + char priv[] __aligned(NETDEV_ALIGN); + }; + +@@ -9618,6 +9619,9 @@ void devlink_set_features(struct devlink *devlink, u64 features) + } + EXPORT_SYMBOL_GPL(devlink_set_features); + ++static int devlink_netdevice_event(struct notifier_block *nb, ++ unsigned long event, void *ptr); ++ + /** + * devlink_alloc_ns - Allocate new devlink instance resources + * in specific namespace +@@ -9648,10 +9652,13 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + + ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b, + &last_id, GFP_KERNEL); +- if (ret < 0) { +- kfree(devlink); +- return NULL; +- } ++ if (ret < 0) ++ goto err_xa_alloc; ++ ++ devlink->netdevice_nb.notifier_call = devlink_netdevice_event; ++ ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb); ++ if (ret) ++ goto err_register_netdevice_notifier; + + devlink->dev = dev; + devlink->ops = ops; +@@ -9678,6 +9685,12 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + init_completion(&devlink->comp); + + return devlink; ++ ++err_register_netdevice_notifier: ++ xa_erase(&devlinks, devlink->index); ++err_xa_alloc: ++ kfree(devlink); ++ return NULL; + } + EXPORT_SYMBOL_GPL(devlink_alloc_ns); + +@@ -9834,6 +9847,10 @@ void devlink_free(struct devlink *devlink) + WARN_ON(!list_empty(&devlink->port_list)); + + xa_destroy(&devlink->snapshot_ids); ++ ++ unregister_netdevice_notifier_net(devlink_net(devlink), ++ &devlink->netdevice_nb); ++ + xa_erase(&devlinks, devlink->index); + + kfree(devlink); +@@ -10130,6 +10147,56 @@ void devlink_port_type_clear(struct devlink_port *devlink_port) + } + EXPORT_SYMBOL_GPL(devlink_port_type_clear); + ++static int devlink_netdevice_event(struct notifier_block *nb, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *netdev = netdev_notifier_info_to_dev(ptr); ++ struct devlink_port *devlink_port = netdev->devlink_port; ++ struct devlink *devlink; ++ ++ devlink = container_of(nb, struct devlink, netdevice_nb); ++ ++ if (!devlink_port || devlink_port->devlink != devlink) ++ return NOTIFY_OK; ++ ++ switch (event) { ++ case NETDEV_POST_INIT: ++ /* Set the type but not netdev pointer. It is going to be set ++ * later on by NETDEV_REGISTER event. Happens once during ++ * netdevice register ++ */ ++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, ++ NULL, true); ++ break; ++ case NETDEV_REGISTER: ++ /* Set the netdev on top of previously set type. Note this ++ * event happens also during net namespace change so here ++ * we take into account netdev pointer appearing in this ++ * namespace. ++ */ ++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, ++ netdev, true); ++ break; ++ case NETDEV_UNREGISTER: ++ /* Clear netdev pointer, but not the type. This event happens ++ * also during net namespace change so we need to clear ++ * pointer to netdev that is going to another net namespace. ++ */ ++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, ++ NULL, true); ++ break; ++ case NETDEV_PRE_UNINIT: ++ /* Clear the type and the netdev pointer. Happens one during ++ * netdevice unregister. ++ */ ++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, ++ NULL, true); ++ break; ++ } ++ ++ return NOTIFY_OK; ++} ++ + static int __devlink_port_attrs_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour) + { +-- +2.40.1 + diff --git a/queue-6.1/net-dst-fix-missing-initialization-of-rt_uncached.patch b/queue-6.1/net-dst-fix-missing-initialization-of-rt_uncached.patch new file mode 100644 index 00000000000..3244b264911 --- /dev/null +++ b/queue-6.1/net-dst-fix-missing-initialization-of-rt_uncached.patch @@ -0,0 +1,177 @@ +From f4fdfd10202488104e6e484bd76fd1b5cd7c10c6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Apr 2023 20:25:08 +0200 +Subject: net: dst: fix missing initialization of rt_uncached + +From: Maxime Bizon + +[ Upstream commit 418a73074da9182f571e467eaded03ea501f3281 ] + +xfrm_alloc_dst() followed by xfrm4_dst_destroy(), without a +xfrm4_fill_dst() call in between, causes the following BUG: + + BUG: spinlock bad magic on CPU#0, fbxhostapd/732 + lock: 0x890b7668, .magic: 890b7668, .owner: /-1, .owner_cpu: 0 + CPU: 0 PID: 732 Comm: fbxhostapd Not tainted 6.3.0-rc6-next-20230414-00613-ge8de66369925-dirty #9 + Hardware name: Marvell Kirkwood (Flattened Device Tree) + unwind_backtrace from show_stack+0x10/0x14 + show_stack from dump_stack_lvl+0x28/0x30 + dump_stack_lvl from do_raw_spin_lock+0x20/0x80 + do_raw_spin_lock from rt_del_uncached_list+0x30/0x64 + rt_del_uncached_list from xfrm4_dst_destroy+0x3c/0xbc + xfrm4_dst_destroy from dst_destroy+0x5c/0xb0 + dst_destroy from rcu_process_callbacks+0xc4/0xec + rcu_process_callbacks from __do_softirq+0xb4/0x22c + __do_softirq from call_with_stack+0x1c/0x24 + call_with_stack from do_softirq+0x60/0x6c + do_softirq from __local_bh_enable_ip+0xa0/0xcc + +Patch "net: dst: Prevent false sharing vs. dst_entry:: __refcnt" moved +rt_uncached and rt_uncached_list fields from rtable struct to dst +struct, so they are more zeroed by memset_after(xdst, 0, u.dst) in +xfrm_alloc_dst(). + +Note that rt_uncached (list_head) was never properly initialized at +alloc time, but xfrm[46]_dst_destroy() is written in such a way that +it was not an issue thanks to the memset: + + if (xdst->u.rt.dst.rt_uncached_list) + rt_del_uncached_list(&xdst->u.rt); + +The route code does it the other way around: rt_uncached_list is +assumed to be valid IIF rt_uncached list_head is not empty: + +void rt_del_uncached_list(struct rtable *rt) +{ + if (!list_empty(&rt->dst.rt_uncached)) { + struct uncached_list *ul = rt->dst.rt_uncached_list; + + spin_lock_bh(&ul->lock); + list_del_init(&rt->dst.rt_uncached); + spin_unlock_bh(&ul->lock); + } +} + +This patch adds mandatory rt_uncached list_head initialization in +generic dst_init(), and adapt xfrm[46]_dst_destroy logic to match the +rest of the code. + +Fixes: d288a162dd1c ("net: dst: Prevent false sharing vs. dst_entry:: __refcnt") +Reported-by: kernel test robot +Link: https://lore.kernel.org/oe-lkp/202304162125.18b7bcdd-oliver.sang@intel.com +Reviewed-by: David Ahern +Reviewed-by: Eric Dumazet +CC: Leon Romanovsky +Signed-off-by: Maxime Bizon +Link: https://lore.kernel.org/r/20230420182508.2417582-1-mbizon@freebox.fr +Signed-off-by: Jakub Kicinski +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + net/core/dst.c | 1 + + net/ipv4/route.c | 4 ---- + net/ipv4/xfrm4_policy.c | 4 +--- + net/ipv6/route.c | 1 - + net/ipv6/xfrm6_policy.c | 4 +--- + 5 files changed, 3 insertions(+), 11 deletions(-) + +diff --git a/net/core/dst.c b/net/core/dst.c +index 2b7b1619b5e29..1666a6f5e858e 100644 +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -67,6 +67,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, + #endif + dst->lwtstate = NULL; + rcuref_init(&dst->__rcuref, initial_ref); ++ INIT_LIST_HEAD(&dst->rt_uncached); + dst->__use = 0; + dst->lastuse = jiffies; + dst->flags = flags; +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 7ccf6503d67aa..a44d20644fbc2 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1646,7 +1646,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev, + rt->rt_uses_gateway = 0; + rt->rt_gw_family = 0; + rt->rt_gw4 = 0; +- INIT_LIST_HEAD(&rt->dst.rt_uncached); + + rt->dst.output = ip_output; + if (flags & RTCF_LOCAL) +@@ -1677,7 +1676,6 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) + new_rt->rt_gw4 = rt->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + new_rt->rt_gw6 = rt->rt_gw6; +- INIT_LIST_HEAD(&new_rt->dst.rt_uncached); + + new_rt->dst.input = rt->dst.input; + new_rt->dst.output = rt->dst.output; +@@ -2861,8 +2859,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or + rt->rt_gw4 = ort->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + rt->rt_gw6 = ort->rt_gw6; +- +- INIT_LIST_HEAD(&rt->dst.rt_uncached); + } + + dst_release(dst_orig); +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index 47861c8b7340e..9403bbaf1b616 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -91,7 +91,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + xdst->u.rt.rt_gw6 = rt->rt_gw6; + xdst->u.rt.rt_pmtu = rt->rt_pmtu; + xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; +- INIT_LIST_HEAD(&xdst->u.rt.dst.rt_uncached); + rt_add_uncached_list(&xdst->u.rt); + + return 0; +@@ -121,8 +120,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + + dst_destroy_metrics_generic(dst); +- if (xdst->u.rt.dst.rt_uncached_list) +- rt_del_uncached_list(&xdst->u.rt); ++ rt_del_uncached_list(&xdst->u.rt); + xfrm_dst_destroy(xdst); + } + +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 9db0b2318e918..d4d06a9d985e8 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -334,7 +334,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = { + static void rt6_info_init(struct rt6_info *rt) + { + memset_after(rt, 0, dst); +- INIT_LIST_HEAD(&rt->dst.rt_uncached); + } + + /* allocate dst with ip6_dst_ops */ +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index 2b493f8d00918..eecc5e59da17c 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -89,7 +89,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; + xdst->u.rt6.rt6i_dst = rt->rt6i_dst; + xdst->u.rt6.rt6i_src = rt->rt6i_src; +- INIT_LIST_HEAD(&xdst->u.rt6.dst.rt_uncached); + rt6_uncached_list_add(&xdst->u.rt6); + + return 0; +@@ -121,8 +120,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); + dst_destroy_metrics_generic(dst); +- if (xdst->u.rt6.dst.rt_uncached_list) +- rt6_uncached_list_del(&xdst->u.rt6); ++ rt6_uncached_list_del(&xdst->u.rt6); + xfrm_dst_destroy(xdst); + } + +-- +2.40.1 + diff --git a/queue-6.1/net-dst-prevent-false-sharing-vs.-dst_entry-__refcnt.patch b/queue-6.1/net-dst-prevent-false-sharing-vs.-dst_entry-__refcnt.patch new file mode 100644 index 00000000000..ae7f971dd8c --- /dev/null +++ b/queue-6.1/net-dst-prevent-false-sharing-vs.-dst_entry-__refcnt.patch @@ -0,0 +1,372 @@ +From e7f0083dd5326ec3a897b9d9c144fdaf4f630c4a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Mar 2023 21:55:29 +0100 +Subject: net: dst: Prevent false sharing vs. dst_entry:: __refcnt + +From: Wangyang Guo + +[ Upstream commit d288a162dd1c73507da582966f17dd226e34a0c0 ] + +dst_entry::__refcnt is highly contended in scenarios where many connections +happen from and to the same IP. The reference count is an atomic_t, so the +reference count operations have to take the cache-line exclusive. + +Aside of the unavoidable reference count contention there is another +significant problem which is caused by that: False sharing. + +perf top identified two affected read accesses. dst_entry::lwtstate and +rtable::rt_genid. + +dst_entry:__refcnt is located at offset 64 of dst_entry, which puts it into +a seperate cacheline vs. the read mostly members located at the beginning +of the struct. + +That prevents false sharing vs. the struct members in the first 64 +bytes of the structure, but there is also + + dst_entry::lwtstate + +which is located after the reference count and in the same cache line. This +member is read after a reference count has been acquired. + +struct rtable embeds a struct dst_entry at offset 0. struct dst_entry has a +size of 112 bytes, which means that the struct members of rtable which +follow the dst member share the same cache line as dst_entry::__refcnt. +Especially + + rtable::rt_genid + +is also read by the contexts which have a reference count acquired +already. + +When dst_entry:__refcnt is incremented or decremented via an atomic +operation these read accesses stall. This was found when analysing the +memtier benchmark in 1:100 mode, which amplifies the problem extremly. + +Move the rt[6i]_uncached[_list] members out of struct rtable and struct +rt6_info into struct dst_entry to provide padding and move the lwtstate +member after that so it ends up in the same cache line. + +The resulting improvement depends on the micro-architecture and the number +of CPUs. It ranges from +20% to +120% with a localhost memtier/memcached +benchmark. + +[ tglx: Rearrange struct ] + +Signed-off-by: Wangyang Guo +Signed-off-by: Arjan van de Ven +Signed-off-by: Thomas Gleixner +Reviewed-by: Eric Dumazet +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20230323102800.042297517@linutronix.de +Signed-off-by: Jakub Kicinski +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + include/net/dst.h | 15 ++++++++++++++- + include/net/ip6_fib.h | 3 --- + include/net/ip6_route.h | 2 +- + include/net/route.h | 3 --- + net/ipv4/route.c | 20 ++++++++++---------- + net/ipv4/xfrm4_policy.c | 4 ++-- + net/ipv6/route.c | 26 +++++++++++++------------- + net/ipv6/xfrm6_policy.c | 4 ++-- + 8 files changed, 42 insertions(+), 35 deletions(-) + +diff --git a/include/net/dst.h b/include/net/dst.h +index d67fda89cd0fa..81f2279ea911a 100644 +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -69,15 +69,28 @@ struct dst_entry { + #endif + int __use; + unsigned long lastuse; +- struct lwtunnel_state *lwtstate; + struct rcu_head rcu_head; + short error; + short __pad; + __u32 tclassid; + #ifndef CONFIG_64BIT ++ struct lwtunnel_state *lwtstate; + atomic_t __refcnt; /* 32-bit offset 64 */ + #endif + netdevice_tracker dev_tracker; ++ ++ /* ++ * Used by rtable and rt6_info. Moves lwtstate into the next cache ++ * line on 64bit so that lwtstate does not cause false sharing with ++ * __refcnt under contention of __refcnt. This also puts the ++ * frequently accessed members of rtable and rt6_info out of the ++ * __refcnt cache line. ++ */ ++ struct list_head rt_uncached; ++ struct uncached_list *rt_uncached_list; ++#ifdef CONFIG_64BIT ++ struct lwtunnel_state *lwtstate; ++#endif + }; + + struct dst_metrics { +diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h +index fa4e6af382e2a..9ba6413fd2e3e 100644 +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -217,9 +217,6 @@ struct rt6_info { + struct inet6_dev *rt6i_idev; + u32 rt6i_flags; + +- struct list_head rt6i_uncached; +- struct uncached_list *rt6i_uncached_list; +- + /* more non-fragment space at head required */ + unsigned short rt6i_nfheader_len; + }; +diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h +index 035d61d50a989..6c6b673d92554 100644 +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -104,7 +104,7 @@ static inline struct dst_entry *ip6_route_output(struct net *net, + static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags) + { + if (!(flags & RT6_LOOKUP_F_DST_NOREF) || +- !list_empty(&rt->rt6i_uncached)) ++ !list_empty(&rt->dst.rt_uncached)) + ip6_rt_put(rt); + } + +diff --git a/include/net/route.h b/include/net/route.h +index af8431b25f800..9ca0f72868b76 100644 +--- a/include/net/route.h ++++ b/include/net/route.h +@@ -78,9 +78,6 @@ struct rtable { + /* Miscellaneous cached information */ + u32 rt_mtu_locked:1, + rt_pmtu:31; +- +- struct list_head rt_uncached; +- struct uncached_list *rt_uncached_list; + }; + + static inline bool rt_is_input_route(const struct rtable *rt) +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 9cbaae4f5ee71..7ccf6503d67aa 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1510,20 +1510,20 @@ void rt_add_uncached_list(struct rtable *rt) + { + struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); + +- rt->rt_uncached_list = ul; ++ rt->dst.rt_uncached_list = ul; + + spin_lock_bh(&ul->lock); +- list_add_tail(&rt->rt_uncached, &ul->head); ++ list_add_tail(&rt->dst.rt_uncached, &ul->head); + spin_unlock_bh(&ul->lock); + } + + void rt_del_uncached_list(struct rtable *rt) + { +- if (!list_empty(&rt->rt_uncached)) { +- struct uncached_list *ul = rt->rt_uncached_list; ++ if (!list_empty(&rt->dst.rt_uncached)) { ++ struct uncached_list *ul = rt->dst.rt_uncached_list; + + spin_lock_bh(&ul->lock); +- list_del_init(&rt->rt_uncached); ++ list_del_init(&rt->dst.rt_uncached); + spin_unlock_bh(&ul->lock); + } + } +@@ -1548,13 +1548,13 @@ void rt_flush_dev(struct net_device *dev) + continue; + + spin_lock_bh(&ul->lock); +- list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) { ++ list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { + if (rt->dst.dev != dev) + continue; + rt->dst.dev = blackhole_netdev; + netdev_ref_replace(dev, blackhole_netdev, + &rt->dst.dev_tracker, GFP_ATOMIC); +- list_move(&rt->rt_uncached, &ul->quarantine); ++ list_move(&rt->dst.rt_uncached, &ul->quarantine); + } + spin_unlock_bh(&ul->lock); + } +@@ -1646,7 +1646,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, + rt->rt_uses_gateway = 0; + rt->rt_gw_family = 0; + rt->rt_gw4 = 0; +- INIT_LIST_HEAD(&rt->rt_uncached); ++ INIT_LIST_HEAD(&rt->dst.rt_uncached); + + rt->dst.output = ip_output; + if (flags & RTCF_LOCAL) +@@ -1677,7 +1677,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) + new_rt->rt_gw4 = rt->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + new_rt->rt_gw6 = rt->rt_gw6; +- INIT_LIST_HEAD(&new_rt->rt_uncached); ++ INIT_LIST_HEAD(&new_rt->dst.rt_uncached); + + new_rt->dst.input = rt->dst.input; + new_rt->dst.output = rt->dst.output; +@@ -2862,7 +2862,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or + else if (rt->rt_gw_family == AF_INET6) + rt->rt_gw6 = ort->rt_gw6; + +- INIT_LIST_HEAD(&rt->rt_uncached); ++ INIT_LIST_HEAD(&rt->dst.rt_uncached); + } + + dst_release(dst_orig); +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index 3d0dfa6cf9f96..47861c8b7340e 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -91,7 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + xdst->u.rt.rt_gw6 = rt->rt_gw6; + xdst->u.rt.rt_pmtu = rt->rt_pmtu; + xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; +- INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); ++ INIT_LIST_HEAD(&xdst->u.rt.dst.rt_uncached); + rt_add_uncached_list(&xdst->u.rt); + + return 0; +@@ -121,7 +121,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + + dst_destroy_metrics_generic(dst); +- if (xdst->u.rt.rt_uncached_list) ++ if (xdst->u.rt.dst.rt_uncached_list) + rt_del_uncached_list(&xdst->u.rt); + xfrm_dst_destroy(xdst); + } +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 0bcdb675ba2c1..7205adee46c21 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -139,20 +139,20 @@ void rt6_uncached_list_add(struct rt6_info *rt) + { + struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); + +- rt->rt6i_uncached_list = ul; ++ rt->dst.rt_uncached_list = ul; + + spin_lock_bh(&ul->lock); +- list_add_tail(&rt->rt6i_uncached, &ul->head); ++ list_add_tail(&rt->dst.rt_uncached, &ul->head); + spin_unlock_bh(&ul->lock); + } + + void rt6_uncached_list_del(struct rt6_info *rt) + { +- if (!list_empty(&rt->rt6i_uncached)) { +- struct uncached_list *ul = rt->rt6i_uncached_list; ++ if (!list_empty(&rt->dst.rt_uncached)) { ++ struct uncached_list *ul = rt->dst.rt_uncached_list; + + spin_lock_bh(&ul->lock); +- list_del_init(&rt->rt6i_uncached); ++ list_del_init(&rt->dst.rt_uncached); + spin_unlock_bh(&ul->lock); + } + } +@@ -169,7 +169,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) + continue; + + spin_lock_bh(&ul->lock); +- list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) { ++ list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { + struct inet6_dev *rt_idev = rt->rt6i_idev; + struct net_device *rt_dev = rt->dst.dev; + bool handled = false; +@@ -188,7 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) + handled = true; + } + if (handled) +- list_move(&rt->rt6i_uncached, ++ list_move(&rt->dst.rt_uncached, + &ul->quarantine); + } + spin_unlock_bh(&ul->lock); +@@ -334,7 +334,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { + static void rt6_info_init(struct rt6_info *rt) + { + memset_after(rt, 0, dst); +- INIT_LIST_HEAD(&rt->rt6i_uncached); ++ INIT_LIST_HEAD(&rt->dst.rt_uncached); + } + + /* allocate dst with ip6_dst_ops */ +@@ -2641,7 +2641,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, + dst = ip6_route_output_flags_noref(net, sk, fl6, flags); + rt6 = (struct rt6_info *)dst; + /* For dst cached in uncached_list, refcnt is already taken. */ +- if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { ++ if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + } +@@ -2751,7 +2751,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, + from = rcu_dereference(rt->from); + + if (from && (rt->rt6i_flags & RTF_PCPU || +- unlikely(!list_empty(&rt->rt6i_uncached)))) ++ unlikely(!list_empty(&rt->dst.rt_uncached)))) + dst_ret = rt6_dst_from_check(rt, from, cookie); + else + dst_ret = rt6_check(rt, from, cookie); +@@ -6488,7 +6488,7 @@ static int __net_init ip6_route_net_init(struct net *net) + net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_init_metrics(&net->ipv6.ip6_null_entry->dst, + ip6_template_metrics, true); +- INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached); ++ INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached); + + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.fib6_has_custom_rules = false; +@@ -6500,7 +6500,7 @@ static int __net_init ip6_route_net_init(struct net *net) + net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, + ip6_template_metrics, true); +- INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached); ++ INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached); + + net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, + sizeof(*net->ipv6.ip6_blk_hole_entry), +@@ -6510,7 +6510,7 @@ static int __net_init ip6_route_net_init(struct net *net) + net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, + ip6_template_metrics, true); +- INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached); ++ INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached); + #ifdef CONFIG_IPV6_SUBTREES + net->ipv6.fib6_routes_require_src = 0; + #endif +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index ea435eba30534..2b493f8d00918 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -89,7 +89,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; + xdst->u.rt6.rt6i_dst = rt->rt6i_dst; + xdst->u.rt6.rt6i_src = rt->rt6i_src; +- INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); ++ INIT_LIST_HEAD(&xdst->u.rt6.dst.rt_uncached); + rt6_uncached_list_add(&xdst->u.rt6); + + return 0; +@@ -121,7 +121,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); + dst_destroy_metrics_generic(dst); +- if (xdst->u.rt6.rt6i_uncached_list) ++ if (xdst->u.rt6.dst.rt_uncached_list) + rt6_uncached_list_del(&xdst->u.rt6); + xfrm_dst_destroy(xdst); + } +-- +2.40.1 + diff --git a/queue-6.1/net-dst-switch-to-rcuref_t-reference-counting.patch b/queue-6.1/net-dst-switch-to-rcuref_t-reference-counting.patch new file mode 100644 index 00000000000..8ed6867cc71 --- /dev/null +++ b/queue-6.1/net-dst-switch-to-rcuref_t-reference-counting.patch @@ -0,0 +1,259 @@ +From 180ab46081f3404a77e4cef550c4f0b28701a1b3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Mar 2023 21:55:32 +0100 +Subject: net: dst: Switch to rcuref_t reference counting + +From: Thomas Gleixner + +[ Upstream commit bc9d3a9f2afca189a6ae40225b6985e3c775375e ] + +Under high contention dst_entry::__refcnt becomes a significant bottleneck. + +atomic_inc_not_zero() is implemented with a cmpxchg() loop, which goes into +high retry rates on contention. + +Switch the reference count to rcuref_t which results in a significant +performance gain. Rename the reference count member to __rcuref to reflect +the change. + +The gain depends on the micro-architecture and the number of concurrent +operations and has been measured in the range of +25% to +130% with a +localhost memtier/memcached benchmark which amplifies the problem +massively. + +Running the memtier/memcached benchmark over a real (1Gb) network +connection the conversion on top of the false sharing fix for struct +dst_entry::__refcnt results in a total gain in the 2%-5% range over the +upstream baseline. + +Reported-by: Wangyang Guo +Reported-by: Arjan Van De Ven +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20230307125538.989175656@linutronix.de +Link: https://lore.kernel.org/r/20230323102800.215027837@linutronix.de +Signed-off-by: Jakub Kicinski +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + include/net/dst.h | 19 ++++++++++--------- + include/net/sock.h | 2 +- + net/bridge/br_nf_core.c | 2 +- + net/core/dst.c | 26 +++++--------------------- + net/core/rtnetlink.c | 2 +- + net/ipv6/route.c | 6 +++--- + net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- + 7 files changed, 23 insertions(+), 38 deletions(-) + +diff --git a/include/net/dst.h b/include/net/dst.h +index 81f2279ea911a..78884429deed8 100644 +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -61,11 +62,11 @@ struct dst_entry { + unsigned short trailer_len; /* space to reserve at tail */ + + /* +- * __refcnt wants to be on a different cache line from ++ * __rcuref wants to be on a different cache line from + * input/output/ops or performance tanks badly + */ + #ifdef CONFIG_64BIT +- atomic_t __refcnt; /* 64-bit offset 64 */ ++ rcuref_t __rcuref; /* 64-bit offset 64 */ + #endif + int __use; + unsigned long lastuse; +@@ -75,16 +76,16 @@ struct dst_entry { + __u32 tclassid; + #ifndef CONFIG_64BIT + struct lwtunnel_state *lwtstate; +- atomic_t __refcnt; /* 32-bit offset 64 */ ++ rcuref_t __rcuref; /* 32-bit offset 64 */ + #endif + netdevice_tracker dev_tracker; + + /* + * Used by rtable and rt6_info. Moves lwtstate into the next cache + * line on 64bit so that lwtstate does not cause false sharing with +- * __refcnt under contention of __refcnt. This also puts the ++ * __rcuref under contention of __rcuref. This also puts the + * frequently accessed members of rtable and rt6_info out of the +- * __refcnt cache line. ++ * __rcuref cache line. + */ + struct list_head rt_uncached; + struct uncached_list *rt_uncached_list; +@@ -238,10 +239,10 @@ static inline void dst_hold(struct dst_entry *dst) + { + /* + * If your kernel compilation stops here, please check +- * the placement of __refcnt in struct dst_entry ++ * the placement of __rcuref in struct dst_entry + */ +- BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); +- WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); ++ BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63); ++ WARN_ON(!rcuref_get(&dst->__rcuref)); + } + + static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) +@@ -305,7 +306,7 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb + */ + static inline bool dst_hold_safe(struct dst_entry *dst) + { +- return atomic_inc_not_zero(&dst->__refcnt); ++ return rcuref_get(&dst->__rcuref); + } + + /** +diff --git a/include/net/sock.h b/include/net/sock.h +index fe695e8bfe289..4c988b981d6e1 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -2181,7 +2181,7 @@ sk_dst_get(struct sock *sk) + + rcu_read_lock(); + dst = rcu_dereference(sk->sk_dst_cache); +- if (dst && !atomic_inc_not_zero(&dst->__refcnt)) ++ if (dst && !rcuref_get(&dst->__rcuref)) + dst = NULL; + rcu_read_unlock(); + return dst; +diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c +index 8c69f0c95a8ed..98aea5485aaef 100644 +--- a/net/bridge/br_nf_core.c ++++ b/net/bridge/br_nf_core.c +@@ -73,7 +73,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) + { + struct rtable *rt = &br->fake_rtable; + +- atomic_set(&rt->dst.__refcnt, 1); ++ rcuref_init(&rt->dst.__rcuref, 1); + rt->dst.dev = br->dev; + dst_init_metrics(&rt->dst, br_dst_default_metrics, true); + rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; +diff --git a/net/core/dst.c b/net/core/dst.c +index a4e738d321ba2..2b7b1619b5e29 100644 +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, + dst->tclassid = 0; + #endif + dst->lwtstate = NULL; +- atomic_set(&dst->__refcnt, initial_ref); ++ rcuref_init(&dst->__rcuref, initial_ref); + dst->__use = 0; + dst->lastuse = jiffies; + dst->flags = flags; +@@ -166,31 +166,15 @@ EXPORT_SYMBOL(dst_dev_put); + + void dst_release(struct dst_entry *dst) + { +- if (dst) { +- int newrefcnt; +- +- newrefcnt = atomic_dec_return(&dst->__refcnt); +- if (WARN_ONCE(newrefcnt < 0, "dst_release underflow")) +- net_warn_ratelimited("%s: dst:%p refcnt:%d\n", +- __func__, dst, newrefcnt); +- if (!newrefcnt) +- call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); +- } ++ if (dst && rcuref_put(&dst->__rcuref)) ++ call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); + } + EXPORT_SYMBOL(dst_release); + + void dst_release_immediate(struct dst_entry *dst) + { +- if (dst) { +- int newrefcnt; +- +- newrefcnt = atomic_dec_return(&dst->__refcnt); +- if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow")) +- net_warn_ratelimited("%s: dst:%p refcnt:%d\n", +- __func__, dst, newrefcnt); +- if (!newrefcnt) +- dst_destroy(dst); +- } ++ if (dst && rcuref_put(&dst->__rcuref)) ++ dst_destroy(dst); + } + EXPORT_SYMBOL(dst_release_immediate); + +diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c +index 854b3fd66b1be..90810408cc5df 100644 +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -839,7 +839,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, + if (dst) { + ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); + ci.rta_used = dst->__use; +- ci.rta_clntref = atomic_read(&dst->__refcnt); ++ ci.rta_clntref = rcuref_read(&dst->__rcuref); + } + if (expires) { + unsigned long clock; +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 7205adee46c21..9db0b2318e918 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -293,7 +293,7 @@ static const struct fib6_info fib6_null_entry_template = { + + static const struct rt6_info ip6_null_entry_template = { + .dst = { +- .__refcnt = ATOMIC_INIT(1), ++ .__rcuref = RCUREF_INIT(1), + .__use = 1, + .obsolete = DST_OBSOLETE_FORCE_CHK, + .error = -ENETUNREACH, +@@ -307,7 +307,7 @@ static const struct rt6_info ip6_null_entry_template = { + + static const struct rt6_info ip6_prohibit_entry_template = { + .dst = { +- .__refcnt = ATOMIC_INIT(1), ++ .__rcuref = RCUREF_INIT(1), + .__use = 1, + .obsolete = DST_OBSOLETE_FORCE_CHK, + .error = -EACCES, +@@ -319,7 +319,7 @@ static const struct rt6_info ip6_prohibit_entry_template = { + + static const struct rt6_info ip6_blk_hole_entry_template = { + .dst = { +- .__refcnt = ATOMIC_INIT(1), ++ .__rcuref = RCUREF_INIT(1), + .__use = 1, + .obsolete = DST_OBSOLETE_FORCE_CHK, + .error = -EINVAL, +diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c +index 7243079ef3546..70ef036909fb0 100644 +--- a/net/netfilter/ipvs/ip_vs_xmit.c ++++ b/net/netfilter/ipvs/ip_vs_xmit.c +@@ -339,7 +339,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + spin_unlock_bh(&dest->dst_lock); + IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", + &dest->addr.ip, &dest_dst->dst_saddr.ip, +- atomic_read(&rt->dst.__refcnt)); ++ rcuref_read(&rt->dst.__rcuref)); + } + if (ret_saddr) + *ret_saddr = dest_dst->dst_saddr.ip; +@@ -507,7 +507,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + spin_unlock_bh(&dest->dst_lock); + IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", + &dest->addr.in6, &dest_dst->dst_saddr.in6, +- atomic_read(&rt->dst.__refcnt)); ++ rcuref_read(&rt->dst.__rcuref)); + } + if (ret_saddr) + *ret_saddr = dest_dst->dst_saddr.in6; +-- +2.40.1 + diff --git a/queue-6.1/net-mlx5-e-switch-register-event-handler-before-armi.patch b/queue-6.1/net-mlx5-e-switch-register-event-handler-before-armi.patch new file mode 100644 index 00000000000..7c1f9769819 --- /dev/null +++ b/queue-6.1/net-mlx5-e-switch-register-event-handler-before-armi.patch @@ -0,0 +1,91 @@ +From 0cc97277dc5b56296843165ddf9b8c10dd28b988 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 27 Aug 2023 13:31:53 +0300 +Subject: net/mlx5: E-switch, register event handler before arming the event + +From: Shay Drory + +[ Upstream commit 7624e58a8b3a251e3e5108b32f2183b34453db32 ] + +Currently, mlx5 is registering event handler for vport context change +event some time after arming the event. this can lead to missing an +event, which will result in wrong rules in the FDB. +Hence, register the event handler before arming the event. + +This solution is valid since FW is sending vport context change event +only on vports which SW armed, and SW arming the vport when enabling +it, which is done after the FDB has been created. + +Fixes: 6933a9379559 ("net/mlx5: E-Switch, Use async events chain") +Signed-off-by: Shay Drory +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +index 4b9d567c8f473..48939c72b5925 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -969,11 +969,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) + return ERR_PTR(err); + } + +-static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) ++static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) + { +- MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); +- mlx5_eq_notifier_register(esw->dev, &esw->nb); +- + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { + MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, + ESW_FUNCTIONS_CHANGED); +@@ -981,13 +978,11 @@ static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) + } + } + +-static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) ++static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) + { + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + +- mlx5_eq_notifier_unregister(esw->dev, &esw->nb); +- + flush_workqueue(esw->work_queue); + } + +@@ -1273,6 +1268,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) + + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + ++ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); ++ mlx5_eq_notifier_register(esw->dev, &esw->nb); ++ + if (esw->mode == MLX5_ESWITCH_LEGACY) { + err = esw_legacy_enable(esw); + } else { +@@ -1285,7 +1283,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) + + esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; + +- mlx5_eswitch_event_handlers_register(esw); ++ mlx5_eswitch_event_handler_register(esw); + + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", +@@ -1394,7 +1392,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) + */ + mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); + +- mlx5_eswitch_event_handlers_unregister(esw); ++ mlx5_eq_notifier_unregister(esw->dev, &esw->nb); ++ mlx5_eswitch_event_handler_unregister(esw); + + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", +-- +2.40.1 + diff --git a/queue-6.1/net-mlx5-handle-fw-tracer-change-ownership-event-bas.patch b/queue-6.1/net-mlx5-handle-fw-tracer-change-ownership-event-bas.patch new file mode 100644 index 00000000000..a3723c348e6 --- /dev/null +++ b/queue-6.1/net-mlx5-handle-fw-tracer-change-ownership-event-bas.patch @@ -0,0 +1,50 @@ +From 647422347b0b80222c1e258860fae97a04185ce8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Sep 2023 21:48:30 +0300 +Subject: net/mlx5: Handle fw tracer change ownership event based on MTRC + +From: Maher Sanalla + +[ Upstream commit 92fd39634541eb0a11bf1bafbc8ba92d6ddb8dba ] + +Currently, whenever fw issues a change ownership event, the PF that owns +the fw tracer drops its ownership directly and the other PFs try to pick +up the ownership via what MTRC register suggests. + +In some cases, driver releases the ownership of the tracer and reacquires +it later on. Whenever the driver releases ownership of the tracer, fw +issues a change ownership event. This event can be delayed and come after +driver has reacquired ownership of the tracer. Thus the late event will +trigger the tracer owner PF to release the ownership again and lead to a +scenario where no PF is owning the tracer. + +To prevent the scenario described above, when handling a change +ownership event, do not drop ownership of the tracer directly, instead +read the fw MTRC register to retrieve the up-to-date owner of the tracer +and set it accordingly in driver level. + +Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic") +Signed-off-by: Maher Sanalla +Reviewed-by: Shay Drory +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +index c4e40834e3ff9..374c0011a127b 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +@@ -821,7 +821,7 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) + + mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); + if (tracer->owner) { +- tracer->owner = false; ++ mlx5_fw_tracer_ownership_acquire(tracer); + return; + } + +-- +2.40.1 + diff --git a/queue-6.1/net-mlx5e-don-t-offload-internal-port-if-filter-devi.patch b/queue-6.1/net-mlx5e-don-t-offload-internal-port-if-filter-devi.patch new file mode 100644 index 00000000000..aaf9f650d88 --- /dev/null +++ b/queue-6.1/net-mlx5e-don-t-offload-internal-port-if-filter-devi.patch @@ -0,0 +1,46 @@ +From 34a4405cae46541c39782e3348d53af100de1ba8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Sep 2023 02:28:47 +0000 +Subject: net/mlx5e: Don't offload internal port if filter device is out device + +From: Jianbo Liu + +[ Upstream commit 06b4eac9c4beda520b8a4dbbb8e33dba9d1c8fba ] + +In the cited commit, if the routing device is ovs internal port, the +out device is set to uplink, and packets go out after encapsulation. + +If filter device is uplink, it can trigger the following syndrome: +mlx5_core 0000:08:00.0: mlx5_cmd_out_err:803:(pid 3966): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xcdb051), err(-22) + +Fix this issue by not offloading internal port if filter device is out +device. In this case, packets are not forwarded to the root table to +be processed, the termination table is used instead to forward them +from uplink to uplink. + +Fixes: 100ad4e2d758 ("net/mlx5e: Offload internal port as encap route device") +Signed-off-by: Jianbo Liu +Reviewed-by: Ariel Levkovich +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +index cd15d36b1507e..907ad6ffe7275 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +@@ -23,7 +23,8 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, + + route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); + +- if (!route_dev || !netif_is_ovs_master(route_dev)) ++ if (!route_dev || !netif_is_ovs_master(route_dev) || ++ attr->parse_attr->filter_dev == e->out_dev) + goto out; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, +-- +2.40.1 + diff --git a/queue-6.1/net-move-altnames-together-with-the-netdevice.patch b/queue-6.1/net-move-altnames-together-with-the-netdevice.patch new file mode 100644 index 00000000000..8ec7b8eed54 --- /dev/null +++ b/queue-6.1/net-move-altnames-together-with-the-netdevice.patch @@ -0,0 +1,101 @@ +From 8f55e12b5b35ebb31ca91026b62ab165c32080ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 17 Oct 2023 18:38:16 -0700 +Subject: net: move altnames together with the netdevice + +From: Jakub Kicinski + +[ Upstream commit 8e15aee621618a3ee3abecaf1fd8c1428098b7ef ] + +The altname nodes are currently not moved to the new netns +when netdevice itself moves: + + [ ~]# ip netns add test + [ ~]# ip -netns test link add name eth0 type dummy + [ ~]# ip -netns test link property add dev eth0 altname some-name + [ ~]# ip -netns test link show dev some-name + 2: eth0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/ether 1e:67:ed:19:3d:24 brd ff:ff:ff:ff:ff:ff + altname some-name + [ ~]# ip -netns test link set dev eth0 netns 1 + [ ~]# ip link + ... + 3: eth0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff + altname some-name + [ ~]# ip li show dev some-name + Device "some-name" does not exist. + +Remove them from the hash table when device is unlisted +and add back when listed again. + +Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames") +Reviewed-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/core/dev.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/net/core/dev.c b/net/core/dev.c +index 14066030cb1dc..ed2484f5e54e4 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -381,6 +381,7 @@ static void netdev_name_node_alt_flush(struct net_device *dev) + /* Device list insertion */ + static void list_netdevice(struct net_device *dev) + { ++ struct netdev_name_node *name_node; + struct net *net = dev_net(dev); + + ASSERT_RTNL(); +@@ -391,6 +392,10 @@ static void list_netdevice(struct net_device *dev) + hlist_add_head_rcu(&dev->index_hlist, + dev_index_hash(net, dev->ifindex)); + write_unlock(&dev_base_lock); ++ ++ netdev_for_each_altname(dev, name_node) ++ netdev_name_node_add(net, name_node); ++ + /* We reserved the ifindex, this can't fail */ + WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL)); + +@@ -402,12 +407,16 @@ static void list_netdevice(struct net_device *dev) + */ + static void unlist_netdevice(struct net_device *dev, bool lock) + { ++ struct netdev_name_node *name_node; + struct net *net = dev_net(dev); + + ASSERT_RTNL(); + + xa_erase(&net->dev_by_index, dev->ifindex); + ++ netdev_for_each_altname(dev, name_node) ++ netdev_name_node_del(name_node); ++ + /* Unlink dev from the device chain */ + if (lock) + write_lock(&dev_base_lock); +@@ -10872,7 +10881,6 @@ void unregister_netdevice_many(struct list_head *head) + synchronize_net(); + + list_for_each_entry(dev, head, unreg_list) { +- struct netdev_name_node *name_node; + struct sk_buff *skb = NULL; + + /* Shutdown queueing discipline. */ +@@ -10898,9 +10906,6 @@ void unregister_netdevice_many(struct list_head *head) + dev_uc_flush(dev); + dev_mc_flush(dev); + +- netdev_for_each_altname(dev, name_node) +- netdev_name_node_del(name_node); +- synchronize_rcu(); + netdev_name_node_alt_flush(dev); + netdev_name_node_free(dev->name_node); + +-- +2.40.1 + diff --git a/queue-6.1/net-smc-fix-smc-clc-failed-issue-when-netdevice-not-.patch b/queue-6.1/net-smc-fix-smc-clc-failed-issue-when-netdevice-not-.patch new file mode 100644 index 00000000000..68fec06099d --- /dev/null +++ b/queue-6.1/net-smc-fix-smc-clc-failed-issue-when-netdevice-not-.patch @@ -0,0 +1,105 @@ +From 5d2e7dc7d7580d57cb1069921d0fb2359b96b909 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Oct 2023 15:48:51 +0800 +Subject: net/smc: fix smc clc failed issue when netdevice not in init_net + +From: Albert Huang + +[ Upstream commit c68681ae46eaaa1640b52fe366d21a93b2185df5 ] + +If the netdevice is within a container and communicates externally +through network technologies such as VxLAN, we won't be able to find +routing information in the init_net namespace. To address this issue, +we need to add a struct net parameter to the smc_ib_find_route function. +This allow us to locate the routing information within the corresponding +net namespace, ensuring the correct completion of the SMC CLC interaction. + +Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment") +Signed-off-by: Albert Huang +Reviewed-by: Dust Li +Reviewed-by: Wenjia Zhang +Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/smc/af_smc.c | 3 ++- + net/smc/smc_ib.c | 7 ++++--- + net/smc/smc_ib.h | 2 +- + 3 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c +index 9fe62b5b02974..4ea41d6e36969 100644 +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -1187,6 +1187,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, + struct smc_clc_first_contact_ext *fce = + (struct smc_clc_first_contact_ext *) + (((u8 *)clc_v2) + sizeof(*clc_v2)); ++ struct net *net = sock_net(&smc->sk); + + if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) + return 0; +@@ -1195,7 +1196,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, + memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN); + ini->smcrv2.uses_gateway = false; + } else { +- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr, ++ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr, + smc_ib_gid_to_ipv4(aclc->r0.lcl.gid), + ini->smcrv2.nexthop_mac, + &ini->smcrv2.uses_gateway)) +diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c +index 854772dd52fd1..ace8611735321 100644 +--- a/net/smc/smc_ib.c ++++ b/net/smc/smc_ib.c +@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) + return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; + } + +-int smc_ib_find_route(__be32 saddr, __be32 daddr, ++int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, + u8 nexthop_mac[], u8 *uses_gateway) + { + struct neighbour *neigh = NULL; +@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr, + + if (daddr == cpu_to_be32(INADDR_NONE)) + goto out; +- rt = ip_route_output_flow(&init_net, &fl4, NULL); ++ rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto out; + if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) +@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, + if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && + smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { + struct in_device *in_dev = __in_dev_get_rcu(ndev); ++ struct net *net = dev_net(ndev); + const struct in_ifaddr *ifa; + bool subnet_match = false; + +@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, + } + if (!subnet_match) + goto out; +- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, ++ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr, + smcrv2->daddr, + smcrv2->nexthop_mac, + &smcrv2->uses_gateway)) +diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h +index 034295676e881..ebcb05ede7f55 100644 +--- a/net/smc/smc_ib.h ++++ b/net/smc/smc_ib.h +@@ -113,7 +113,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, + int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, + unsigned short vlan_id, u8 gid[], u8 *sgid_index, + struct smc_init_info_smcrv2 *smcrv2); +-int smc_ib_find_route(__be32 saddr, __be32 daddr, ++int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, + u8 nexthop_mac[], u8 *uses_gateway); + bool smc_ib_is_valid_local_systemid(void); + int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); +-- +2.40.1 + diff --git a/queue-6.1/net-store-netdevs-in-an-xarray.patch b/queue-6.1/net-store-netdevs-in-an-xarray.patch new file mode 100644 index 00000000000..08eab4392f1 --- /dev/null +++ b/queue-6.1/net-store-netdevs-in-an-xarray.patch @@ -0,0 +1,273 @@ +From eb90504709ba5fd1ccd141d303e4e61e940ac3fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Jul 2023 11:55:29 -0700 +Subject: net: store netdevs in an xarray + +From: Jakub Kicinski + +[ Upstream commit 759ab1edb56c88906830fd6b2e7b12514dd32758 ] + +Iterating over the netdev hash table for netlink dumps is hard. +Dumps are done in "chunks" so we need to save the position +after each chunk, so we know where to restart from. Because +netdevs are stored in a hash table we remember which bucket +we were in and how many devices we dumped. + +Since we don't hold any locks across the "chunks" - devices may +come and go while we're dumping. If that happens we may miss +a device (if device is deleted from the bucket we were in). +We indicate to user space that this may have happened by setting +NLM_F_DUMP_INTR. User space is supposed to dump again (I think) +if it sees that. Somehow I doubt most user space gets this right.. + +To illustrate let's look at an example: + + System state: + start: # [A, B, C] + del: B # [A, C] + +with the hash table we may dump [A, B], missing C completely even +tho it existed both before and after the "del B". + +Add an xarray and use it to allocate ifindexes. This way we +can iterate ifindexes in order, without the worry that we'll +skip one. We may still generate a dump of a state which "never +existed", for example for a set of values and sequence of ops: + + System state: + start: # [A, B] + add: C # [A, C, B] + del: B # [A, C] + +we may generate a dump of [A], if C got an index between A and B. +System has never been in such state. But I'm 90% sure that's perfectly +fine, important part is that we can't _miss_ devices which exist before +and after. User space which wants to mirror kernel's state subscribes +to notifications and does periodic dumps so it will know that C exists +from the notification about its creation or from the next dump +(next dump is _guaranteed_ to include C, if it doesn't get removed). + +To avoid any perf regressions keep the hash table for now. Most +net namespaces have very few devices and microbenchmarking 1M lookups +on Skylake I get the following results (not counting loopback +to number of devs): + + #devs | hash | xa | delta + 2 | 18.3 | 20.1 | + 9.8% + 16 | 18.3 | 20.1 | + 9.5% + 64 | 18.3 | 26.3 | +43.8% + 128 | 20.4 | 26.3 | +28.6% + 256 | 20.0 | 26.4 | +32.1% + 1024 | 26.6 | 26.7 | + 0.2% + 8192 |541.3 | 33.5 | -93.8% + +No surprises since the hash table has 256 entries. +The microbenchmark scans indexes in order, if the pattern is more +random xa starts to win at 512 devices already. But that's a lot +of devices, in practice. + +Reviewed-by: Leon Romanovsky +Link: https://lore.kernel.org/r/20230726185530.2247698-2-kuba@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice") +Signed-off-by: Sasha Levin +--- + include/net/net_namespace.h | 4 +- + net/core/dev.c | 82 ++++++++++++++++++++++++------------- + 2 files changed, 57 insertions(+), 29 deletions(-) + +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index 8c3587d5c308f..3f66f32b88edd 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -42,6 +42,7 @@ + #include + #include + #include ++#include + + struct user_namespace; + struct proc_dir_entry; +@@ -69,7 +70,7 @@ struct net { + atomic_t dev_unreg_count; + + unsigned int dev_base_seq; /* protected by rtnl_mutex */ +- int ifindex; ++ u32 ifindex; + + spinlock_t nsid_lock; + atomic_t fnhe_genid; +@@ -108,6 +109,7 @@ struct net { + + struct hlist_head *dev_name_head; + struct hlist_head *dev_index_head; ++ struct xarray dev_by_index; + struct raw_notifier_head netdev_chain; + + /* Note that @hash_mix can be read millions times per second, +diff --git a/net/core/dev.c b/net/core/dev.c +index 9bf10c9c4735a..14066030cb1dc 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -391,6 +391,8 @@ static void list_netdevice(struct net_device *dev) + hlist_add_head_rcu(&dev->index_hlist, + dev_index_hash(net, dev->ifindex)); + write_unlock(&dev_base_lock); ++ /* We reserved the ifindex, this can't fail */ ++ WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL)); + + dev_base_seq_inc(net); + } +@@ -400,8 +402,12 @@ static void list_netdevice(struct net_device *dev) + */ + static void unlist_netdevice(struct net_device *dev, bool lock) + { ++ struct net *net = dev_net(dev); ++ + ASSERT_RTNL(); + ++ xa_erase(&net->dev_by_index, dev->ifindex); ++ + /* Unlink dev from the device chain */ + if (lock) + write_lock(&dev_base_lock); +@@ -9542,23 +9548,35 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + } + + /** +- * dev_new_index - allocate an ifindex +- * @net: the applicable net namespace ++ * dev_index_reserve() - allocate an ifindex in a namespace ++ * @net: the applicable net namespace ++ * @ifindex: requested ifindex, pass %0 to get one allocated ++ * ++ * Allocate a ifindex for a new device. Caller must either use the ifindex ++ * to store the device (via list_netdevice()) or call dev_index_release() ++ * to give the index up. + * +- * Returns a suitable unique value for a new device interface +- * number. The caller must hold the rtnl semaphore or the +- * dev_base_lock to be sure it remains unique. ++ * Return: a suitable unique value for a new device interface number or -errno. + */ +-static int dev_new_index(struct net *net) ++static int dev_index_reserve(struct net *net, u32 ifindex) + { +- int ifindex = net->ifindex; ++ int err; + +- for (;;) { +- if (++ifindex <= 0) +- ifindex = 1; +- if (!__dev_get_by_index(net, ifindex)) +- return net->ifindex = ifindex; +- } ++ if (!ifindex) ++ err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL, ++ xa_limit_31b, &net->ifindex, GFP_KERNEL); ++ else ++ err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL); ++ if (err < 0) ++ return err; ++ ++ return ifindex; ++} ++ ++static void dev_index_release(struct net *net, int ifindex) ++{ ++ /* Expect only unused indexes, unlist_netdevice() removes the used */ ++ WARN_ON(xa_erase(&net->dev_by_index, ifindex)); + } + + /* Delayed registration/unregisteration */ +@@ -10028,11 +10046,10 @@ int register_netdevice(struct net_device *dev) + goto err_uninit; + } + +- ret = -EBUSY; +- if (!dev->ifindex) +- dev->ifindex = dev_new_index(net); +- else if (__dev_get_by_index(net, dev->ifindex)) ++ ret = dev_index_reserve(net, dev->ifindex); ++ if (ret < 0) + goto err_uninit; ++ dev->ifindex = ret; + + /* Transfer changeable features to wanted_features and enable + * software offloads (GSO and GRO). +@@ -10079,7 +10096,7 @@ int register_netdevice(struct net_device *dev) + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); + ret = notifier_to_errno(ret); + if (ret) +- goto err_uninit; ++ goto err_ifindex_release; + + ret = netdev_register_kobject(dev); + write_lock(&dev_base_lock); +@@ -10135,6 +10152,8 @@ int register_netdevice(struct net_device *dev) + + err_uninit_notify: + call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); ++err_ifindex_release: ++ dev_index_release(net, dev->ifindex); + err_uninit: + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); +@@ -10994,9 +11013,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, + goto out; + + /* Check that new_ifindex isn't used yet. */ +- err = -EBUSY; +- if (new_ifindex && __dev_get_by_index(net, new_ifindex)) +- goto out; ++ if (new_ifindex) { ++ err = dev_index_reserve(net, new_ifindex); ++ if (err < 0) ++ goto out; ++ } else { ++ /* If there is an ifindex conflict assign a new one */ ++ err = dev_index_reserve(net, dev->ifindex); ++ if (err == -EBUSY) ++ err = dev_index_reserve(net, 0); ++ if (err < 0) ++ goto out; ++ new_ifindex = err; ++ } + + /* + * And now a mini version of register_netdevice unregister_netdevice. +@@ -11024,13 +11053,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, + rcu_barrier(); + + new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); +- /* If there is an ifindex conflict assign a new one */ +- if (!new_ifindex) { +- if (__dev_get_by_index(net, dev->ifindex)) +- new_ifindex = dev_new_index(net); +- else +- new_ifindex = dev->ifindex; +- } + + rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid, + new_ifindex); +@@ -11211,6 +11233,9 @@ static int __net_init netdev_init(struct net *net) + if (net->dev_index_head == NULL) + goto err_idx; + ++ net->ifindex = 1; ++ xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC); ++ + RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); + + return 0; +@@ -11308,6 +11333,7 @@ static void __net_exit netdev_exit(struct net *net) + { + kfree(net->dev_name_head); + kfree(net->dev_index_head); ++ xa_destroy(&net->dev_by_index); + if (net != &init_net) + WARN_ON_ONCE(!list_empty(&net->dev_base_head)); + } +-- +2.40.1 + diff --git a/queue-6.1/net-tls-split-tls_rx_reader_lock.patch b/queue-6.1/net-tls-split-tls_rx_reader_lock.patch new file mode 100644 index 00000000000..78fe5b13559 --- /dev/null +++ b/queue-6.1/net-tls-split-tls_rx_reader_lock.patch @@ -0,0 +1,102 @@ +From 6ce3f0053906b8490a89c6cf6d77df853020de76 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Jul 2023 21:15:55 +0200 +Subject: net/tls: split tls_rx_reader_lock + +From: Hannes Reinecke + +[ Upstream commit f9ae3204fb45d0749befc1cdff50f691c7461e5a ] + +Split tls_rx_reader_{lock,unlock} into an 'acquire/release' and +the actual locking part. +With that we can use the tls_rx_reader_lock in situations where +the socket is already locked. + +Suggested-by: Sagi Grimberg +Signed-off-by: Hannes Reinecke +Reviewed-by: Jakub Kicinski +Link: https://lore.kernel.org/r/20230726191556.41714-6-hare@suse.de +Signed-off-by: Jakub Kicinski +Stable-dep-of: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 38 ++++++++++++++++++++++---------------- + 1 file changed, 22 insertions(+), 16 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 9be00ebbb2341..c5c8fdadc05e8 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1851,13 +1851,10 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, + return sk_flush_backlog(sk); + } + +-static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, +- bool nonblock) ++static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, ++ bool nonblock) + { + long timeo; +- int err; +- +- lock_sock(sk); + + timeo = sock_rcvtimeo(sk, nonblock); + +@@ -1871,26 +1868,30 @@ static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, + !READ_ONCE(ctx->reader_present), &wait); + remove_wait_queue(&ctx->wq, &wait); + +- if (timeo <= 0) { +- err = -EAGAIN; +- goto err_unlock; +- } +- if (signal_pending(current)) { +- err = sock_intr_errno(timeo); +- goto err_unlock; +- } ++ if (timeo <= 0) ++ return -EAGAIN; ++ if (signal_pending(current)) ++ return sock_intr_errno(timeo); + } + + WRITE_ONCE(ctx->reader_present, 1); + + return 0; ++} + +-err_unlock: +- release_sock(sk); ++static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, ++ bool nonblock) ++{ ++ int err; ++ ++ lock_sock(sk); ++ err = tls_rx_reader_acquire(sk, ctx, nonblock); ++ if (err) ++ release_sock(sk); + return err; + } + +-static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) ++static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx) + { + if (unlikely(ctx->reader_contended)) { + if (wq_has_sleeper(&ctx->wq)) +@@ -1902,6 +1903,11 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) + } + + WRITE_ONCE(ctx->reader_present, 0); ++} ++ ++static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) ++{ ++ tls_rx_reader_release(sk, ctx); + release_sock(sk); + } + +-- +2.40.1 + diff --git a/queue-6.1/net-use-call_rcu_hurry-for-dst_release.patch b/queue-6.1/net-use-call_rcu_hurry-for-dst_release.patch new file mode 100644 index 00000000000..2677027a7af --- /dev/null +++ b/queue-6.1/net-use-call_rcu_hurry-for-dst_release.patch @@ -0,0 +1,92 @@ +From 51290b74abe5ae7c0313a41f7e182e0d23a0ad56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 19:19:08 +0000 +Subject: net: Use call_rcu_hurry() for dst_release() + +From: Joel Fernandes (Google) + +[ Upstream commit 483c26ff63f42e8898ed43aca0b9953bc91f0cd4 ] + +In a networking test on ChromeOS, kernels built with the new +CONFIG_RCU_LAZY=y Kconfig option fail a networking test in the teardown +phase. + +This failure may be reproduced as follows: ip netns del + +The CONFIG_RCU_LAZY=y Kconfig option was introduced by earlier commits +in this series for the benefit of certain battery-powered systems. +This Kconfig option causes call_rcu() to delay its callbacks in order +to batch them. This means that a given RCU grace period covers more +callbacks, thus reducing the number of grace periods, in turn reducing +the amount of energy consumed, which increases battery lifetime which +can be a very good thing. This is not a subtle effect: In some important +use cases, the battery lifetime is increased by more than 10%. + +This CONFIG_RCU_LAZY=y option is available only for CPUs that offload +callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot +parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y. + +Delaying callbacks is normally not a problem because most callbacks do +nothing but free memory. If the system is short on memory, a shrinker +will kick all currently queued lazy callbacks out of their laziness, +thus freeing their memory in short order. Similarly, the rcu_barrier() +function, which blocks until all currently queued callbacks are invoked, +will also kick lazy callbacks, thus enabling rcu_barrier() to complete +in a timely manner. + +However, there are some cases where laziness is not a good option. +For example, synchronize_rcu() invokes call_rcu(), and blocks until +the newly queued callback is invoked. It would not be a good for +synchronize_rcu() to block for ten seconds, even on an idle system. +Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of +call_rcu(). The arrival of a non-lazy call_rcu_hurry() callback on a +given CPU kicks any lazy callbacks that might be already queued on that +CPU. After all, if there is going to be a grace period, all callbacks +might as well get full benefit from it. + +Yes, this could be done the other way around by creating a +call_rcu_lazy(), but earlier experience with this approach and +feedback at the 2022 Linux Plumbers Conference shifted the approach +to call_rcu() being lazy with call_rcu_hurry() for the few places +where laziness is inappropriate. + +Returning to the test failure, use of ftrace showed that this failure +cause caused by the aadded delays due to this new lazy behavior of +call_rcu() in kernels built with CONFIG_RCU_LAZY=y. + +Therefore, make dst_release() use call_rcu_hurry() in order to revert +to the old test-failure-free behavior. + +[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] + +Signed-off-by: Joel Fernandes (Google) +Cc: David Ahern +Cc: "David S. Miller" +Cc: Hideaki YOSHIFUJI +Cc: Jakub Kicinski +Cc: Paolo Abeni +Cc: +Reviewed-by: Eric Dumazet +Signed-off-by: Paul E. McKenney +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + net/core/dst.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/dst.c b/net/core/dst.c +index bc9c9be4e0801..a4e738d321ba2 100644 +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst) + net_warn_ratelimited("%s: dst:%p refcnt:%d\n", + __func__, dst, newrefcnt); + if (!newrefcnt) +- call_rcu(&dst->rcu_head, dst_destroy_rcu); ++ call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); + } + } + EXPORT_SYMBOL(dst_release); +-- +2.40.1 + diff --git a/queue-6.1/net-xfrm-skip-policies-marked-as-dead-while-reinsert.patch b/queue-6.1/net-xfrm-skip-policies-marked-as-dead-while-reinsert.patch new file mode 100644 index 00000000000..10b79a77bee --- /dev/null +++ b/queue-6.1/net-xfrm-skip-policies-marked-as-dead-while-reinsert.patch @@ -0,0 +1,116 @@ +From aceaae0532ca72b5bec91314f57c627992743869 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Aug 2023 22:18:34 +0800 +Subject: net: xfrm: skip policies marked as dead while reinserting policies + +From: Dong Chenchen + +[ Upstream commit 6d41d4fe28724db16ca1016df0713a07e0cc7448 ] + +BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430 +Read of size 1 at addr ffff8881051f3bf8 by task ip/668 + +CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014 +Call Trace: + + dump_stack_lvl+0x72/0xa0 + print_report+0xd0/0x620 + kasan_report+0xb6/0xf0 + xfrm_policy_inexact_list_reinsert+0xb6/0x430 + xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800 + xfrm_policy_inexact_alloc_chain+0x23f/0x320 + xfrm_policy_inexact_insert+0x6b/0x590 + xfrm_policy_insert+0x3b1/0x480 + xfrm_add_policy+0x23c/0x3c0 + xfrm_user_rcv_msg+0x2d0/0x510 + netlink_rcv_skb+0x10d/0x2d0 + xfrm_netlink_rcv+0x49/0x60 + netlink_unicast+0x3fe/0x540 + netlink_sendmsg+0x528/0x970 + sock_sendmsg+0x14a/0x160 + ____sys_sendmsg+0x4fc/0x580 + ___sys_sendmsg+0xef/0x160 + __sys_sendmsg+0xf7/0x1b0 + do_syscall_64+0x3f/0x90 + entry_SYSCALL_64_after_hwframe+0x73/0xdd + +The root cause is: + +cpu 0 cpu1 +xfrm_dump_policy +xfrm_policy_walk +list_move_tail + xfrm_add_policy + ... ... + xfrm_policy_inexact_list_reinsert + list_for_each_entry_reverse + if (!policy->bydst_reinsert) + //read non-existent policy +xfrm_dump_policy_done +xfrm_policy_walk_done +list_del(&walk->walk.all); + +If dump_one_policy() returns err (triggered by netlink socket), +xfrm_policy_walk() will move walk initialized by socket to list +net->xfrm.policy_all. so this socket becomes visible in the global +policy list. The head *walk can be traversed when users add policies +with different prefixlen and trigger xfrm_policy node merge. + +The issue can also be triggered by policy list traversal while rehashing +and flushing policies. + +It can be fixed by skip such "policies" with walk.dead set to 1. + +Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address") +Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list") +Signed-off-by: Dong Chenchen +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_policy.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index e4d320e036fed..e47c670c7e2cd 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -850,7 +850,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, + struct hlist_node *newpos = NULL; + bool matches_s, matches_d; + +- if (!policy->bydst_reinsert) ++ if (policy->walk.dead || !policy->bydst_reinsert) + continue; + + WARN_ON_ONCE(policy->family != family); +@@ -1255,8 +1255,11 @@ static void xfrm_hash_rebuild(struct work_struct *work) + struct xfrm_pol_inexact_bin *bin; + u8 dbits, sbits; + ++ if (policy->walk.dead) ++ continue; ++ + dir = xfrm_policy_id2dir(policy->index); +- if (policy->walk.dead || dir >= XFRM_POLICY_MAX) ++ if (dir >= XFRM_POLICY_MAX) + continue; + + if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { +@@ -1788,9 +1791,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) + + again: + list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { ++ if (pol->walk.dead) ++ continue; ++ + dir = xfrm_policy_id2dir(pol->index); +- if (pol->walk.dead || +- dir >= XFRM_POLICY_MAX || ++ if (dir >= XFRM_POLICY_MAX || + pol->type != type) + continue; + +-- +2.40.1 + diff --git a/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-ba.patch b/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-ba.patch new file mode 100644 index 00000000000..001fba7ab28 --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-ba.patch @@ -0,0 +1,42 @@ +From ba39eabcdc1e1f894d45daaa82a41111a377f7dc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Oct 2023 13:12:58 +0200 +Subject: netfilter: nf_tables: do not remove elements if set backend + implements .abort + +From: Pablo Neira Ayuso + +[ Upstream commit ebd032fa881882fef2acb9da1bbde48d8233241d ] + +pipapo set backend maintains two copies of the datastructure, removing +the elements from the copy that is going to be discarded slows down +the abort path significantly, from several minutes to few seconds after +this patch. + +Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 5e3dbe2652dbd..8ab545802dd15 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9931,7 +9931,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + break; + } + te = (struct nft_trans_elem *)trans->data; +- nft_setelem_remove(net, te->set, &te->elem); ++ if (!te->set->ops->abort || ++ nft_setelem_is_catchall(te->set, &te->elem)) ++ nft_setelem_remove(net, te->set, &te->elem); ++ + if (!nft_setelem_is_catchall(te->set, &te->elem)) + atomic_dec(&te->set->nelems); + +-- +2.40.1 + diff --git a/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if.patch b/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if.patch new file mode 100644 index 00000000000..419cbdc7b62 --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if.patch @@ -0,0 +1,41 @@ +From 8f06b0849f28567f375bb02ef66207b3656145bc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Oct 2023 13:18:39 +0200 +Subject: netfilter: nf_tables: revert do not remove elements if set backend + implements .abort + +From: Pablo Neira Ayuso + +[ Upstream commit f86fb94011aeb3b26337fc22204ca726aeb8bc24 ] + +nf_tables_abort_release() path calls nft_set_elem_destroy() for +NFT_MSG_NEWSETELEM which releases the element, however, a reference to +the element still remains in the working copy. + +Fixes: ebd032fa8818 ("netfilter: nf_tables: do not remove elements if set backend implements .abort") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 8ab545802dd15..5e3dbe2652dbd 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9931,10 +9931,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + break; + } + te = (struct nft_trans_elem *)trans->data; +- if (!te->set->ops->abort || +- nft_setelem_is_catchall(te->set, &te->elem)) +- nft_setelem_remove(net, te->set, &te->elem); +- ++ nft_setelem_remove(net, te->set, &te->elem); + if (!nft_setelem_is_catchall(te->set, &te->elem)) + atomic_dec(&te->set->nelems); + +-- +2.40.1 + diff --git a/queue-6.1/overlayfs-set-ctime-when-setting-mtime-and-atime.patch b/queue-6.1/overlayfs-set-ctime-when-setting-mtime-and-atime.patch new file mode 100644 index 00000000000..3b6fe270d58 --- /dev/null +++ b/queue-6.1/overlayfs-set-ctime-when-setting-mtime-and-atime.patch @@ -0,0 +1,47 @@ +From c8b90985a6ff0526b292308662bf750bbf8c8e09 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Sep 2023 09:33:12 -0400 +Subject: overlayfs: set ctime when setting mtime and atime + +From: Jeff Layton + +[ Upstream commit 03dbab3bba5f009d053635c729d1244f2c8bad38 ] + +Nathan reported that he was seeing the new warning in +setattr_copy_mgtime pop when starting podman containers. Overlayfs is +trying to set the atime and mtime via notify_change without also +setting the ctime. + +POSIX states that when the atime and mtime are updated via utimes() that +we must also update the ctime to the current time. The situation with +overlayfs copy-up is analogies, so add ATTR_CTIME to the bitmask. +notify_change will fill in the value. + +Reported-by: Nathan Chancellor +Signed-off-by: Jeff Layton +Tested-by: Nathan Chancellor +Acked-by: Christian Brauner +Acked-by: Amir Goldstein +Message-Id: <20230913-ctime-v1-1-c6bc509cbc27@kernel.org> +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/overlayfs/copy_up.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c +index e6d711f42607b..86d4b6975dbcb 100644 +--- a/fs/overlayfs/copy_up.c ++++ b/fs/overlayfs/copy_up.c +@@ -300,7 +300,7 @@ static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry, + { + struct iattr attr = { + .ia_valid = +- ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, ++ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME, + .ia_atime = stat->atime, + .ia_mtime = stat->mtime, + }; +-- +2.40.1 + diff --git a/queue-6.1/platform-x86-touchscreen_dmi-add-info-for-the-positi.patch b/queue-6.1/platform-x86-touchscreen_dmi-add-info-for-the-positi.patch new file mode 100644 index 00000000000..9b05e2ea965 --- /dev/null +++ b/queue-6.1/platform-x86-touchscreen_dmi-add-info-for-the-positi.patch @@ -0,0 +1,65 @@ +From cc371d392bce2bad950e7b6d8130a1df94253ed2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Oct 2023 19:59:00 -0400 +Subject: platform/x86: touchscreen_dmi: Add info for the Positivo C4128B + +From: Renan Guilherme Lebre Ramos + +[ Upstream commit aa7dcba3bae6869122828b144a3cfd231718089d ] + +Add information for the Positivo C4128B, a notebook/tablet convertible. + +Link: https://github.com/onitake/gsl-firmware/pull/217 +Signed-off-by: Renan Guilherme Lebre Ramos +Link: https://lore.kernel.org/r/20231004235900.426240-1-japareaggae@gmail.com +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/touchscreen_dmi.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c +index 68e66b60445c3..9a92d515abb9b 100644 +--- a/drivers/platform/x86/touchscreen_dmi.c ++++ b/drivers/platform/x86/touchscreen_dmi.c +@@ -740,6 +740,21 @@ static const struct ts_dmi_data pipo_w11_data = { + .properties = pipo_w11_props, + }; + ++static const struct property_entry positivo_c4128b_props[] = { ++ PROPERTY_ENTRY_U32("touchscreen-min-x", 4), ++ PROPERTY_ENTRY_U32("touchscreen-min-y", 13), ++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1915), ++ PROPERTY_ENTRY_U32("touchscreen-size-y", 1269), ++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-positivo-c4128b.fw"), ++ PROPERTY_ENTRY_U32("silead,max-fingers", 10), ++ { } ++}; ++ ++static const struct ts_dmi_data positivo_c4128b_data = { ++ .acpi_name = "MSSL1680:00", ++ .properties = positivo_c4128b_props, ++}; ++ + static const struct property_entry pov_mobii_wintab_p800w_v20_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 32), + PROPERTY_ENTRY_U32("touchscreen-min-y", 16), +@@ -1457,6 +1472,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { + DMI_MATCH(DMI_BIOS_VERSION, "MOMO.G.WI71C.MABMRBA02"), + }, + }, ++ { ++ /* Positivo C4128B */ ++ .driver_data = (void *)&positivo_c4128b_data, ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "C4128B-1"), ++ }, ++ }, + { + /* Point of View mobii wintab p800w (v2.0) */ + .driver_data = (void *)&pov_mobii_wintab_p800w_v20_data, +-- +2.40.1 + diff --git a/queue-6.1/pwr-mlxbf-extend-kconfig-to-include-gpio-mlxbf3-depe.patch b/queue-6.1/pwr-mlxbf-extend-kconfig-to-include-gpio-mlxbf3-depe.patch new file mode 100644 index 00000000000..675ea4a9df0 --- /dev/null +++ b/queue-6.1/pwr-mlxbf-extend-kconfig-to-include-gpio-mlxbf3-depe.patch @@ -0,0 +1,41 @@ +From 9d9a717eeeef7a5ad593c1e5044cd5890f4e7e0b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Aug 2023 09:37:43 -0400 +Subject: pwr-mlxbf: extend Kconfig to include gpio-mlxbf3 dependency + +From: David Thompson + +[ Upstream commit 82f07f1acf417b81e793145c167dd5e156024de4 ] + +The BlueField power handling driver (pwr-mlxbf.c) provides +functionality for both BlueField-2 and BlueField-3 based +platforms. This driver also depends on the SoC-specific +BlueField GPIO driver, whether gpio-mlxbf2 or gpio-mlxbf3. +This patch extends the Kconfig definition to include the +dependency on the gpio-mlxbf3 driver, if applicable. + +Signed-off-by: David Thompson +Reviewed-by: Asmaa Mnebhi +Link: https://lore.kernel.org/r/20230823133743.31275-1-davthompson@nvidia.com +Signed-off-by: Sebastian Reichel +Signed-off-by: Sasha Levin +--- + drivers/power/reset/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig +index a8c46ba5878fe..54201f0374104 100644 +--- a/drivers/power/reset/Kconfig ++++ b/drivers/power/reset/Kconfig +@@ -299,7 +299,7 @@ config NVMEM_REBOOT_MODE + + config POWER_MLXBF + tristate "Mellanox BlueField power handling driver" +- depends on (GPIO_MLXBF2 && ACPI) ++ depends on (GPIO_MLXBF2 || GPIO_MLXBF3) && ACPI + help + This driver supports reset or low power mode handling for Mellanox BlueField. + +-- +2.40.1 + diff --git a/queue-6.1/rcu-fix-late-wakeup-when-flush-of-bypass-cblist-happ.patch b/queue-6.1/rcu-fix-late-wakeup-when-flush-of-bypass-cblist-happ.patch new file mode 100644 index 00000000000..e38cc5f00b5 --- /dev/null +++ b/queue-6.1/rcu-fix-late-wakeup-when-flush-of-bypass-cblist-happ.patch @@ -0,0 +1,68 @@ +From 3f132e8e674299042d9e5313dfbfcb3de55af912 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 17 Sep 2022 16:41:59 +0000 +Subject: rcu: Fix late wakeup when flush of bypass cblist happens + +From: Joel Fernandes (Google) + +[ Upstream commit b50606f35f4b73c8e4c6b9c64fe7ba72ea919134 ] + +When the bypass cblist gets too big or its timeout has occurred, it is +flushed into the main cblist. However, the bypass timer is still running +and the behavior is that it would eventually expire and wake the GP +thread. + +Since we are going to use the bypass cblist for lazy CBs, do the wakeup +soon as the flush for "too big or too long" bypass list happens. +Otherwise, long delays can happen for callbacks which get promoted from +lazy to non-lazy. + +This is a good thing to do anyway (regardless of future lazy patches), +since it makes the behavior consistent with behavior of other code paths +where flushing into the ->cblist makes the GP kthread into a +non-sleeping state quickly. + +[ Frederic Weisbecker: Changes to avoid unnecessary GP-thread wakeups plus + comment changes. ] + +Reviewed-by: Frederic Weisbecker +Signed-off-by: Joel Fernandes (Google) +Signed-off-by: Paul E. McKenney +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + kernel/rcu/tree_nocb.h | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h +index 0a5f0ef414845..04c87f250e01a 100644 +--- a/kernel/rcu/tree_nocb.h ++++ b/kernel/rcu/tree_nocb.h +@@ -433,8 +433,9 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) || + ncbs >= qhimark) { + rcu_nocb_lock(rdp); ++ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); ++ + if (!rcu_nocb_flush_bypass(rdp, rhp, j)) { +- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); + if (*was_alldone) + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, + TPS("FirstQ")); +@@ -447,7 +448,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + rcu_advance_cbs_nowake(rdp->mynode, rdp); + rdp->nocb_gp_adv_time = j; + } +- rcu_nocb_unlock_irqrestore(rdp, flags); ++ ++ // The flush succeeded and we moved CBs into the regular list. ++ // Don't wait for the wake up timer as it may be too far ahead. ++ // Wake up the GP thread now instead, if the cblist was empty. ++ __call_rcu_nocb_wake(rdp, *was_alldone, flags); ++ + return true; // Callback already enqueued. + } + +-- +2.40.1 + diff --git a/queue-6.1/rcu-fix-missing-nocb-gp-wake-on-rcu_barrier.patch b/queue-6.1/rcu-fix-missing-nocb-gp-wake-on-rcu_barrier.patch new file mode 100644 index 00000000000..d1ccd6ce3de --- /dev/null +++ b/queue-6.1/rcu-fix-missing-nocb-gp-wake-on-rcu_barrier.patch @@ -0,0 +1,94 @@ +From 6e201fbbe533ee08318f49c360c83145a1231ac2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 16 Oct 2022 16:22:53 +0000 +Subject: rcu: Fix missing nocb gp wake on rcu_barrier() + +From: Frederic Weisbecker + +[ Upstream commit b8f7aca3f0e0e6223094ba2662bac90353674b04 ] + +In preparation for RCU lazy changes, wake up the RCU nocb gp thread if +needed after an entrain. This change prevents the RCU barrier callback +from waiting in the queue for several seconds before the lazy callbacks +in front of it are serviced. + +Reported-by: Joel Fernandes (Google) +Signed-off-by: Frederic Weisbecker +Signed-off-by: Joel Fernandes (Google) +Signed-off-by: Paul E. McKenney +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + kernel/rcu/tree.c | 11 +++++++++++ + kernel/rcu/tree.h | 1 + + kernel/rcu/tree_nocb.h | 5 +++++ + 3 files changed, 17 insertions(+) + +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c +index 917a1e43f7839..6ea59aa53db78 100644 +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -3908,6 +3908,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) + { + unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence); + unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); ++ bool wake_nocb = false; ++ bool was_alldone = false; + + lockdep_assert_held(&rcu_state.barrier_lock); + if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq)) +@@ -3916,7 +3918,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) + rdp->barrier_head.func = rcu_barrier_callback; + debug_rcu_head_queue(&rdp->barrier_head); + rcu_nocb_lock(rdp); ++ /* ++ * Flush bypass and wakeup rcuog if we add callbacks to an empty regular ++ * queue. This way we don't wait for bypass timer that can reach seconds ++ * if it's fully lazy. ++ */ ++ was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); ++ wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist); + if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { + atomic_inc(&rcu_state.barrier_cpu_count); + } else { +@@ -3924,6 +3933,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) + rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence); + } + rcu_nocb_unlock(rdp); ++ if (wake_nocb) ++ wake_nocb_gp(rdp, false); + smp_store_release(&rdp->barrier_seq_snap, gseq); + } + +diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h +index d4a97e40ea9c3..925dd98f8b23b 100644 +--- a/kernel/rcu/tree.h ++++ b/kernel/rcu/tree.h +@@ -439,6 +439,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp); + static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); + static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); + static void rcu_init_one_nocb(struct rcu_node *rnp); ++static bool wake_nocb_gp(struct rcu_data *rdp, bool force); + static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + unsigned long j); + static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h +index 04c87f250e01a..74d4983d68f82 100644 +--- a/kernel/rcu/tree_nocb.h ++++ b/kernel/rcu/tree_nocb.h +@@ -1570,6 +1570,11 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) + { + } + ++static bool wake_nocb_gp(struct rcu_data *rdp, bool force) ++{ ++ return false; ++} ++ + static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + unsigned long j) + { +-- +2.40.1 + diff --git a/queue-6.1/rcu-make-call_rcu-lazy-to-save-power.patch b/queue-6.1/rcu-make-call_rcu-lazy-to-save-power.patch new file mode 100644 index 00000000000..f81ebaa1b05 --- /dev/null +++ b/queue-6.1/rcu-make-call_rcu-lazy-to-save-power.patch @@ -0,0 +1,689 @@ +From 7b253194c188b40a04df52ea0aeacae23989ef0d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 16 Oct 2022 16:22:54 +0000 +Subject: rcu: Make call_rcu() lazy to save power + +From: Joel Fernandes (Google) + +[ Upstream commit 3cb278e73be58bfb780ecd55129296d2f74c1fb7 ] + +Implement timer-based RCU callback batching (also known as lazy +callbacks). With this we save about 5-10% of power consumed due +to RCU requests that happen when system is lightly loaded or idle. + +By default, all async callbacks (queued via call_rcu) are marked +lazy. An alternate API call_rcu_hurry() is provided for the few users, +for example synchronize_rcu(), that need the old behavior. + +The batch is flushed whenever a certain amount of time has passed, or +the batch on a particular CPU grows too big. Also memory pressure will +flush it in a future patch. + +To handle several corner cases automagically (such as rcu_barrier() and +hotplug), we re-use bypass lists which were originally introduced to +address lock contention, to handle lazy CBs as well. The bypass list +length has the lazy CB length included in it. A separate lazy CB length +counter is also introduced to keep track of the number of lazy CBs. + +[ paulmck: Fix formatting of inline call_rcu_lazy() definition. ] +[ paulmck: Apply Zqiang feedback. ] +[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] + +Suggested-by: Paul McKenney +Acked-by: Frederic Weisbecker +Signed-off-by: Joel Fernandes (Google) +Signed-off-by: Paul E. McKenney +Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem") +Signed-off-by: Sasha Levin +--- + include/linux/rcupdate.h | 9 +++ + kernel/rcu/Kconfig | 8 ++ + kernel/rcu/rcu.h | 8 ++ + kernel/rcu/tiny.c | 2 +- + kernel/rcu/tree.c | 129 ++++++++++++++++++++----------- + kernel/rcu/tree.h | 11 ++- + kernel/rcu/tree_exp.h | 2 +- + kernel/rcu/tree_nocb.h | 159 +++++++++++++++++++++++++++++++-------- + 8 files changed, 246 insertions(+), 82 deletions(-) + +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index e9e61cd27ef63..46f05dc8b31aa 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void) + + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + ++#ifdef CONFIG_RCU_LAZY ++void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func); ++#else ++static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) ++{ ++ call_rcu(head, func); ++} ++#endif ++ + /* Internal to kernel */ + void rcu_init(void); + extern int rcu_scheduler_active; +diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig +index d471d22a5e21b..d78f6181c8aad 100644 +--- a/kernel/rcu/Kconfig ++++ b/kernel/rcu/Kconfig +@@ -311,4 +311,12 @@ config TASKS_TRACE_RCU_READ_MB + Say N here if you hate read-side memory barriers. + Take the default if you are unsure. + ++config RCU_LAZY ++ bool "RCU callback lazy invocation functionality" ++ depends on RCU_NOCB_CPU ++ default n ++ help ++ To save power, batch RCU callbacks and flush after delay, memory ++ pressure, or callback list growing too big. ++ + endmenu # "RCU Subsystem" +diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h +index 48d8f754b730e..6b86c5912beaf 100644 +--- a/kernel/rcu/rcu.h ++++ b/kernel/rcu/rcu.h +@@ -474,6 +474,14 @@ enum rcutorture_type { + INVALID_RCU_FLAVOR + }; + ++#if defined(CONFIG_RCU_LAZY) ++unsigned long rcu_lazy_get_jiffies_till_flush(void); ++void rcu_lazy_set_jiffies_till_flush(unsigned long j); ++#else ++static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; } ++static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { } ++#endif ++ + #if defined(CONFIG_TREE_RCU) + void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, + unsigned long *gp_seq); +diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c +index a33a8d4942c37..72913ce21258b 100644 +--- a/kernel/rcu/tiny.c ++++ b/kernel/rcu/tiny.c +@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = { + + void rcu_barrier(void) + { +- wait_rcu_gp(call_rcu); ++ wait_rcu_gp(call_rcu_hurry); + } + EXPORT_SYMBOL(rcu_barrier); + +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c +index 6ea59aa53db78..855c035ec9630 100644 +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -2731,47 +2731,8 @@ static void check_cb_ovld(struct rcu_data *rdp) + raw_spin_unlock_rcu_node(rnp); + } + +-/** +- * call_rcu() - Queue an RCU callback for invocation after a grace period. +- * @head: structure to be used for queueing the RCU updates. +- * @func: actual callback function to be invoked after the grace period +- * +- * The callback function will be invoked some time after a full grace +- * period elapses, in other words after all pre-existing RCU read-side +- * critical sections have completed. However, the callback function +- * might well execute concurrently with RCU read-side critical sections +- * that started after call_rcu() was invoked. +- * +- * RCU read-side critical sections are delimited by rcu_read_lock() +- * and rcu_read_unlock(), and may be nested. In addition, but only in +- * v5.0 and later, regions of code across which interrupts, preemption, +- * or softirqs have been disabled also serve as RCU read-side critical +- * sections. This includes hardware interrupt handlers, softirq handlers, +- * and NMI handlers. +- * +- * Note that all CPUs must agree that the grace period extended beyond +- * all pre-existing RCU read-side critical section. On systems with more +- * than one CPU, this means that when "func()" is invoked, each CPU is +- * guaranteed to have executed a full memory barrier since the end of its +- * last RCU read-side critical section whose beginning preceded the call +- * to call_rcu(). It also means that each CPU executing an RCU read-side +- * critical section that continues beyond the start of "func()" must have +- * executed a memory barrier after the call_rcu() but before the beginning +- * of that RCU read-side critical section. Note that these guarantees +- * include CPUs that are offline, idle, or executing in user mode, as +- * well as CPUs that are executing in the kernel. +- * +- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the +- * resulting RCU callback function "func()", then both CPU A and CPU B are +- * guaranteed to execute a full memory barrier during the time interval +- * between the call to call_rcu() and the invocation of "func()" -- even +- * if CPU A and CPU B are the same CPU (but again only if the system has +- * more than one CPU). +- * +- * Implementation of these memory-ordering guarantees is described here: +- * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. +- */ +-void call_rcu(struct rcu_head *head, rcu_callback_t func) ++static void ++__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy) + { + static atomic_t doublefrees; + unsigned long flags; +@@ -2812,7 +2773,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) + } + + check_cb_ovld(rdp); +- if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) ++ if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) + return; // Enqueued onto ->nocb_bypass, so just leave. + // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. + rcu_segcblist_enqueue(&rdp->cblist, head); +@@ -2834,8 +2795,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) + local_irq_restore(flags); + } + } +-EXPORT_SYMBOL_GPL(call_rcu); + ++#ifdef CONFIG_RCU_LAZY ++/** ++ * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and ++ * flush all lazy callbacks (including the new one) to the main ->cblist while ++ * doing so. ++ * ++ * @head: structure to be used for queueing the RCU updates. ++ * @func: actual callback function to be invoked after the grace period ++ * ++ * The callback function will be invoked some time after a full grace ++ * period elapses, in other words after all pre-existing RCU read-side ++ * critical sections have completed. ++ * ++ * Use this API instead of call_rcu() if you don't want the callback to be ++ * invoked after very long periods of time, which can happen on systems without ++ * memory pressure and on systems which are lightly loaded or mostly idle. ++ * This function will cause callbacks to be invoked sooner than later at the ++ * expense of extra power. Other than that, this function is identical to, and ++ * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory ++ * ordering and other functionality. ++ */ ++void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) ++{ ++ return __call_rcu_common(head, func, false); ++} ++EXPORT_SYMBOL_GPL(call_rcu_hurry); ++#endif ++ ++/** ++ * call_rcu() - Queue an RCU callback for invocation after a grace period. ++ * By default the callbacks are 'lazy' and are kept hidden from the main ++ * ->cblist to prevent starting of grace periods too soon. ++ * If you desire grace periods to start very soon, use call_rcu_hurry(). ++ * ++ * @head: structure to be used for queueing the RCU updates. ++ * @func: actual callback function to be invoked after the grace period ++ * ++ * The callback function will be invoked some time after a full grace ++ * period elapses, in other words after all pre-existing RCU read-side ++ * critical sections have completed. However, the callback function ++ * might well execute concurrently with RCU read-side critical sections ++ * that started after call_rcu() was invoked. ++ * ++ * RCU read-side critical sections are delimited by rcu_read_lock() ++ * and rcu_read_unlock(), and may be nested. In addition, but only in ++ * v5.0 and later, regions of code across which interrupts, preemption, ++ * or softirqs have been disabled also serve as RCU read-side critical ++ * sections. This includes hardware interrupt handlers, softirq handlers, ++ * and NMI handlers. ++ * ++ * Note that all CPUs must agree that the grace period extended beyond ++ * all pre-existing RCU read-side critical section. On systems with more ++ * than one CPU, this means that when "func()" is invoked, each CPU is ++ * guaranteed to have executed a full memory barrier since the end of its ++ * last RCU read-side critical section whose beginning preceded the call ++ * to call_rcu(). It also means that each CPU executing an RCU read-side ++ * critical section that continues beyond the start of "func()" must have ++ * executed a memory barrier after the call_rcu() but before the beginning ++ * of that RCU read-side critical section. Note that these guarantees ++ * include CPUs that are offline, idle, or executing in user mode, as ++ * well as CPUs that are executing in the kernel. ++ * ++ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the ++ * resulting RCU callback function "func()", then both CPU A and CPU B are ++ * guaranteed to execute a full memory barrier during the time interval ++ * between the call to call_rcu() and the invocation of "func()" -- even ++ * if CPU A and CPU B are the same CPU (but again only if the system has ++ * more than one CPU). ++ * ++ * Implementation of these memory-ordering guarantees is described here: ++ * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. ++ */ ++void call_rcu(struct rcu_head *head, rcu_callback_t func) ++{ ++ return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); ++} ++EXPORT_SYMBOL_GPL(call_rcu); + + /* Maximum number of jiffies to wait before draining a batch. */ + #define KFREE_DRAIN_JIFFIES (5 * HZ) +@@ -3521,7 +3558,7 @@ void synchronize_rcu(void) + if (rcu_gp_is_expedited()) + synchronize_rcu_expedited(); + else +- wait_rcu_gp(call_rcu); ++ wait_rcu_gp(call_rcu_hurry); + return; + } + +@@ -3924,7 +3961,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) + * if it's fully lazy. + */ + was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); +- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); ++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); + wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist); + if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { + atomic_inc(&rcu_state.barrier_cpu_count); +@@ -4359,7 +4396,7 @@ void rcutree_migrate_callbacks(int cpu) + my_rdp = this_cpu_ptr(&rcu_data); + my_rnp = my_rdp->mynode; + rcu_nocb_lock(my_rdp); /* irqs already disabled. */ +- WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); ++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false)); + raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */ + /* Leverage recent GPs and set GP for new callbacks. */ + needwake = rcu_advance_cbs(my_rnp, rdp) || +diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h +index 925dd98f8b23b..fcb5d696eb170 100644 +--- a/kernel/rcu/tree.h ++++ b/kernel/rcu/tree.h +@@ -263,14 +263,16 @@ struct rcu_data { + unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ + unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ + ++ long lazy_len; /* Length of buffered lazy callbacks. */ + int cpu; + }; + + /* Values for nocb_defer_wakeup field in struct rcu_data. */ + #define RCU_NOCB_WAKE_NOT 0 + #define RCU_NOCB_WAKE_BYPASS 1 +-#define RCU_NOCB_WAKE 2 +-#define RCU_NOCB_WAKE_FORCE 3 ++#define RCU_NOCB_WAKE_LAZY 2 ++#define RCU_NOCB_WAKE 3 ++#define RCU_NOCB_WAKE_FORCE 4 + + #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) + /* For jiffies_till_first_fqs and */ +@@ -441,9 +443,10 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); + static void rcu_init_one_nocb(struct rcu_node *rnp); + static bool wake_nocb_gp(struct rcu_data *rdp, bool force); + static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +- unsigned long j); ++ unsigned long j, bool lazy); + static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +- bool *was_alldone, unsigned long flags); ++ bool *was_alldone, unsigned long flags, ++ bool lazy); + static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, + unsigned long flags); + static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); +diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h +index aa3ec3c3b9f75..b9637df7cda70 100644 +--- a/kernel/rcu/tree_exp.h ++++ b/kernel/rcu/tree_exp.h +@@ -941,7 +941,7 @@ void synchronize_rcu_expedited(void) + + /* If expedited grace periods are prohibited, fall back to normal. */ + if (rcu_gp_is_normal()) { +- wait_rcu_gp(call_rcu); ++ wait_rcu_gp(call_rcu_hurry); + return; + } + +diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h +index 74d4983d68f82..c3ec5f389d27f 100644 +--- a/kernel/rcu/tree_nocb.h ++++ b/kernel/rcu/tree_nocb.h +@@ -256,6 +256,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) + return __wake_nocb_gp(rdp_gp, rdp, force, flags); + } + ++/* ++ * LAZY_FLUSH_JIFFIES decides the maximum amount of time that ++ * can elapse before lazy callbacks are flushed. Lazy callbacks ++ * could be flushed much earlier for a number of other reasons ++ * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are ++ * left unsubmitted to RCU after those many jiffies. ++ */ ++#define LAZY_FLUSH_JIFFIES (10 * HZ) ++static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES; ++ ++#ifdef CONFIG_RCU_LAZY ++// To be called only from test code. ++void rcu_lazy_set_jiffies_till_flush(unsigned long jif) ++{ ++ jiffies_till_flush = jif; ++} ++EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush); ++ ++unsigned long rcu_lazy_get_jiffies_till_flush(void) ++{ ++ return jiffies_till_flush; ++} ++EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush); ++#endif ++ + /* + * Arrange to wake the GP kthread for this NOCB group at some future + * time when it is safe to do so. +@@ -269,10 +294,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, + raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); + + /* +- * Bypass wakeup overrides previous deferments. In case +- * of callback storm, no need to wake up too early. ++ * Bypass wakeup overrides previous deferments. In case of ++ * callback storms, no need to wake up too early. + */ +- if (waketype == RCU_NOCB_WAKE_BYPASS) { ++ if (waketype == RCU_NOCB_WAKE_LAZY && ++ rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { ++ mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush); ++ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); ++ } else if (waketype == RCU_NOCB_WAKE_BYPASS) { + mod_timer(&rdp_gp->nocb_timer, jiffies + 2); + WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); + } else { +@@ -293,10 +322,13 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, + * proves to be initially empty, just return false because the no-CB GP + * kthread may need to be awakened in this case. + * ++ * Return true if there was something to be flushed and it succeeded, otherwise ++ * false. ++ * + * Note that this function always returns true if rhp is NULL. + */ + static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +- unsigned long j) ++ unsigned long j, bool lazy) + { + struct rcu_cblist rcl; + +@@ -310,7 +342,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ + if (rhp) + rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ +- rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); ++ ++ /* ++ * If the new CB requested was a lazy one, queue it onto the main ++ * ->cblist so we can take advantage of a sooner grade period. ++ */ ++ if (lazy && rhp) { ++ rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, NULL); ++ rcu_cblist_enqueue(&rcl, rhp); ++ WRITE_ONCE(rdp->lazy_len, 0); ++ } else { ++ rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); ++ WRITE_ONCE(rdp->lazy_len, 0); ++ } ++ + rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl); + WRITE_ONCE(rdp->nocb_bypass_first, j); + rcu_nocb_bypass_unlock(rdp); +@@ -326,13 +371,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + * Note that this function always returns true if rhp is NULL. + */ + static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +- unsigned long j) ++ unsigned long j, bool lazy) + { + if (!rcu_rdp_is_offloaded(rdp)) + return true; + rcu_lockdep_assert_cblist_protected(rdp); + rcu_nocb_bypass_lock(rdp); +- return rcu_nocb_do_flush_bypass(rdp, rhp, j); ++ return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy); + } + + /* +@@ -345,7 +390,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) + if (!rcu_rdp_is_offloaded(rdp) || + !rcu_nocb_bypass_trylock(rdp)) + return; +- WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j)); ++ WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); + } + + /* +@@ -367,12 +412,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) + * there is only one CPU in operation. + */ + static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +- bool *was_alldone, unsigned long flags) ++ bool *was_alldone, unsigned long flags, ++ bool lazy) + { + unsigned long c; + unsigned long cur_gp_seq; + unsigned long j = jiffies; + long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); ++ bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); + + lockdep_assert_irqs_disabled(); + +@@ -417,25 +464,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + // If there hasn't yet been all that many ->cblist enqueues + // this jiffy, tell the caller to enqueue onto ->cblist. But flush + // ->nocb_bypass first. +- if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) { ++ // Lazy CBs throttle this back and do immediate bypass queuing. ++ if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) { + rcu_nocb_lock(rdp); + *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); + if (*was_alldone) + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, + TPS("FirstQ")); +- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j)); ++ ++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false)); + WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); + return false; // Caller must enqueue the callback. + } + + // If ->nocb_bypass has been used too long or is too full, + // flush ->nocb_bypass to ->cblist. +- if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) || ++ if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || ++ (ncbs && bypass_is_lazy && ++ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) || + ncbs >= qhimark) { + rcu_nocb_lock(rdp); + *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); + +- if (!rcu_nocb_flush_bypass(rdp, rhp, j)) { ++ if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) { + if (*was_alldone) + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, + TPS("FirstQ")); +@@ -463,13 +514,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ + rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); ++ ++ if (lazy) ++ WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1); ++ + if (!ncbs) { + WRITE_ONCE(rdp->nocb_bypass_first, j); + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ")); + } + rcu_nocb_bypass_unlock(rdp); + smp_mb(); /* Order enqueue before wake. */ +- if (ncbs) { ++ // A wake up of the grace period kthread or timer adjustment ++ // needs to be done only if: ++ // 1. Bypass list was fully empty before (this is the first ++ // bypass list entry), or: ++ // 2. Both of these conditions are met: ++ // a. The bypass list previously had only lazy CBs, and: ++ // b. The new CB is non-lazy. ++ if (ncbs && (!bypass_is_lazy || lazy)) { + local_irq_restore(flags); + } else { + // No-CBs GP kthread might be indefinitely asleep, if so, wake. +@@ -497,8 +559,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, + unsigned long flags) + __releases(rdp->nocb_lock) + { ++ long bypass_len; + unsigned long cur_gp_seq; + unsigned long j; ++ long lazy_len; + long len; + struct task_struct *t; + +@@ -512,9 +576,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, + } + // Need to actually to a wakeup. + len = rcu_segcblist_n_cbs(&rdp->cblist); ++ bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass); ++ lazy_len = READ_ONCE(rdp->lazy_len); + if (was_alldone) { + rdp->qlen_last_fqs_check = len; +- if (!irqs_disabled_flags(flags)) { ++ // Only lazy CBs in bypass list ++ if (lazy_len && bypass_len == lazy_len) { ++ rcu_nocb_unlock_irqrestore(rdp, flags); ++ wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, ++ TPS("WakeLazy")); ++ } else if (!irqs_disabled_flags(flags)) { + /* ... if queue was empty ... */ + rcu_nocb_unlock_irqrestore(rdp, flags); + wake_nocb_gp(rdp, false); +@@ -605,12 +676,12 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) + static void nocb_gp_wait(struct rcu_data *my_rdp) + { + bool bypass = false; +- long bypass_ncbs; + int __maybe_unused cpu = my_rdp->cpu; + unsigned long cur_gp_seq; + unsigned long flags; + bool gotcbs = false; + unsigned long j = jiffies; ++ bool lazy = false; + bool needwait_gp = false; // This prevents actual uninitialized use. + bool needwake; + bool needwake_gp; +@@ -640,24 +711,43 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) + * won't be ignored for long. + */ + list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) { ++ long bypass_ncbs; ++ bool flush_bypass = false; ++ long lazy_ncbs; ++ + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check")); + rcu_nocb_lock_irqsave(rdp, flags); + lockdep_assert_held(&rdp->nocb_lock); + bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); +- if (bypass_ncbs && ++ lazy_ncbs = READ_ONCE(rdp->lazy_len); ++ ++ if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && ++ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) || ++ bypass_ncbs > 2 * qhimark)) { ++ flush_bypass = true; ++ } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && + (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || + bypass_ncbs > 2 * qhimark)) { +- // Bypass full or old, so flush it. +- (void)rcu_nocb_try_flush_bypass(rdp, j); +- bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); ++ flush_bypass = true; + } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { + rcu_nocb_unlock_irqrestore(rdp, flags); + continue; /* No callbacks here, try next. */ + } ++ ++ if (flush_bypass) { ++ // Bypass full or old, so flush it. ++ (void)rcu_nocb_try_flush_bypass(rdp, j); ++ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); ++ lazy_ncbs = READ_ONCE(rdp->lazy_len); ++ } ++ + if (bypass_ncbs) { + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, +- TPS("Bypass")); +- bypass = true; ++ bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass")); ++ if (bypass_ncbs == lazy_ncbs) ++ lazy = true; ++ else ++ bypass = true; + } + rnp = rdp->mynode; + +@@ -705,12 +795,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) + my_rdp->nocb_gp_gp = needwait_gp; + my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; + +- if (bypass && !rcu_nocb_poll) { +- // At least one child with non-empty ->nocb_bypass, so set +- // timer in order to avoid stranding its callbacks. +- wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, +- TPS("WakeBypassIsDeferred")); ++ // At least one child with non-empty ->nocb_bypass, so set ++ // timer in order to avoid stranding its callbacks. ++ if (!rcu_nocb_poll) { ++ // If bypass list only has lazy CBs. Add a deferred lazy wake up. ++ if (lazy && !bypass) { ++ wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY, ++ TPS("WakeLazyIsDeferred")); ++ // Otherwise add a deferred bypass wake up. ++ } else if (bypass) { ++ wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, ++ TPS("WakeBypassIsDeferred")); ++ } + } ++ + if (rcu_nocb_poll) { + /* Polling, so trace if first poll in the series. */ + if (gotcbs) +@@ -1036,7 +1134,7 @@ static long rcu_nocb_rdp_deoffload(void *arg) + * return false, which means that future calls to rcu_nocb_try_bypass() + * will refuse to put anything into the bypass. + */ +- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); ++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); + /* + * Start with invoking rcu_core() early. This way if the current thread + * happens to preempt an ongoing call to rcu_core() in the middle, +@@ -1290,6 +1388,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) + raw_spin_lock_init(&rdp->nocb_gp_lock); + timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); + rcu_cblist_init(&rdp->nocb_bypass); ++ WRITE_ONCE(rdp->lazy_len, 0); + mutex_init(&rdp->nocb_gp_kthread_mutex); + } + +@@ -1576,13 +1675,13 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) + } + + static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +- unsigned long j) ++ unsigned long j, bool lazy) + { + return true; + } + + static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +- bool *was_alldone, unsigned long flags) ++ bool *was_alldone, unsigned long flags, bool lazy) + { + return false; + } +-- +2.40.1 + diff --git a/queue-6.1/regulator-core-revert-fix-kobject-release-warning-an.patch b/queue-6.1/regulator-core-revert-fix-kobject-release-warning-an.patch new file mode 100644 index 00000000000..55ec0f8dacc --- /dev/null +++ b/queue-6.1/regulator-core-revert-fix-kobject-release-warning-an.patch @@ -0,0 +1,50 @@ +From 89afe5be70707a1f88207b7c143645df178e25ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Sep 2023 00:50:27 +0200 +Subject: regulator/core: Revert "fix kobject release warning and memory leak + in regulator_register()" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michał Mirosław + +[ Upstream commit 6e800968f6a715c0661716d2ec5e1f56ed9f9c08 ] + +This reverts commit 5f4b204b6b8153923d5be8002c5f7082985d153f. + +Since rdev->dev now has a release() callback, the proper way of freeing +the initialized device can be restored. + +Signed-off-by: Michał Mirosław +Link: https://lore.kernel.org/r/d7f469f3f7b1f0e1d52f9a7ede3f3c5703382090.1695077303.git.mirq-linux@rere.qmqm.pl +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/regulator/core.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c +index f6a95f72af18d..34d3d82819064 100644 +--- a/drivers/regulator/core.c ++++ b/drivers/regulator/core.c +@@ -5725,15 +5725,11 @@ regulator_register(struct device *dev, + mutex_lock(®ulator_list_mutex); + regulator_ena_gpio_free(rdev); + mutex_unlock(®ulator_list_mutex); +- put_device(&rdev->dev); +- rdev = NULL; + clean: + if (dangling_of_gpiod) + gpiod_put(config->ena_gpiod); +- if (rdev && rdev->dev.of_node) +- of_node_put(rdev->dev.of_node); +- kfree(rdev); + kfree(config); ++ put_device(&rdev->dev); + rinse: + if (dangling_cfg_gpiod) + gpiod_put(cfg->ena_gpiod); +-- +2.40.1 + diff --git a/queue-6.1/selftests-mm-fix-awk-usage-in-charge_reserved_hugetl.patch b/queue-6.1/selftests-mm-fix-awk-usage-in-charge_reserved_hugetl.patch new file mode 100644 index 00000000000..3b40ab80680 --- /dev/null +++ b/queue-6.1/selftests-mm-fix-awk-usage-in-charge_reserved_hugetl.patch @@ -0,0 +1,77 @@ +From 390d48cf469ba707ede4f7925e13d99b2cc6d5f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Sep 2023 02:19:44 +0800 +Subject: selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and + hugetlb_reparenting_test.sh that may cause error + +From: Juntong Deng + +[ Upstream commit bbe246f875d064ecfb872fe4f66152e743dfd22d ] + +According to the awk manual, the -e option does not need to be specified +in front of 'program' (unless you need to mix program-file). + +The redundant -e option can cause error when users use awk tools other +than gawk (for example, mawk does not support the -e option). + +Error Example: +awk: not an option: -e + +Link: https://lkml.kernel.org/r/VI1P193MB075228810591AF2FDD7D42C599C3A@VI1P193MB0752.EURP193.PROD.OUTLOOK.COM +Signed-off-by: Juntong Deng +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 4 ++-- + tools/testing/selftests/vm/hugetlb_reparenting_test.sh | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +index a5cb4b09a46c4..0899019a7fcb4 100644 +--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh ++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +@@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then + fi + + if [[ $cgroup2 ]]; then +- cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') ++ cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory + mount -t cgroup2 none $cgroup_path +@@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then + fi + echo "+hugetlb" >$cgroup_path/cgroup.subtree_control + else +- cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') ++ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory + mount -t cgroup memory,hugetlb $cgroup_path +diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh +index bf2d2a684edfd..14d26075c8635 100644 +--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh ++++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh +@@ -20,7 +20,7 @@ fi + + + if [[ $cgroup2 ]]; then +- CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') ++ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory + mount -t cgroup2 none $CGROUP_ROOT +@@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then + fi + echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control + else +- CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') ++ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory + mount -t cgroup memory,hugetlb $CGROUP_ROOT +-- +2.40.1 + diff --git a/queue-6.1/selftests-openvswitch-add-version-check-for-pyroute2.patch b/queue-6.1/selftests-openvswitch-add-version-check-for-pyroute2.patch new file mode 100644 index 00000000000..5317d5d0fda --- /dev/null +++ b/queue-6.1/selftests-openvswitch-add-version-check-for-pyroute2.patch @@ -0,0 +1,78 @@ +From e526abc9ed61fec0728aeaad545e7d832d6054a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Oct 2023 15:49:36 -0400 +Subject: selftests: openvswitch: Add version check for pyroute2 + +From: Aaron Conole + +[ Upstream commit 92e37f20f20a23fec4626ae72eda50f127acb130 ] + +Paolo Abeni reports that on some systems the pyroute2 version isn't +new enough to run the test suite. Ensure that we support a minimum +version of 0.6 for all cases (which does include the existing ones). +The 0.6.1 version was released in May of 2021, so should be +propagated to most installations at this point. + +The alternative that Paolo proposed was to only skip when the +add-flow is being run. This would be okay for most cases, except +if a future test case is added that needs to do flow dump without +an associated add (just guessing). In that case, it could also be +broken and we would need additional skip logic anyway. Just draw +a line in the sand now. + +Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") +Reported-by: Paolo Abeni +Closes: https://lore.kernel.org/lkml/8470c431e0930d2ea204a9363a60937289b7fdbe.camel@redhat.com/ +Signed-off-by: Aaron Conole +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 +- + tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 10 +++++++++- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh +index 5e6686398a313..52054a09d575c 100755 +--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh ++++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh +@@ -117,7 +117,7 @@ run_test() { + fi + + if python3 ovs-dpctl.py -h 2>&1 | \ +- grep "Need to install the python" >/dev/null 2>&1; then ++ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then + stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" + return $ksft_skip + fi +diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +index 5d467d1993cb1..e787a1f967b0d 100644 +--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py ++++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +@@ -17,8 +17,10 @@ try: + from pyroute2.netlink import nla + from pyroute2.netlink.exceptions import NetlinkError + from pyroute2.netlink.generic import GenericNetlinkSocket ++ import pyroute2 ++ + except ModuleNotFoundError: +- print("Need to install the python pyroute2 package.") ++ print("Need to install the python pyroute2 package >= 0.6.") + sys.exit(0) + + +@@ -280,6 +282,12 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB()): + + + def main(argv): ++ # version check for pyroute2 ++ prverscheck = pyroute2.__version__.split(".") ++ if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: ++ print("Need to upgrade the python pyroute2 package to >= 0.6.") ++ sys.exit(0) ++ + parser = argparse.ArgumentParser() + parser.add_argument( + "-v", +-- +2.40.1 + diff --git a/queue-6.1/serial-8250-omap-fix-imprecise-external-abort-for-om.patch b/queue-6.1/serial-8250-omap-fix-imprecise-external-abort-for-om.patch new file mode 100644 index 00000000000..b6ee1cf4e82 --- /dev/null +++ b/queue-6.1/serial-8250-omap-fix-imprecise-external-abort-for-om.patch @@ -0,0 +1,260 @@ +From a29748f3dd0bfac926e8b34dca4dc8332dd7d1ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 11:20:12 +0300 +Subject: serial: 8250: omap: Fix imprecise external abort for omap_8250_pm() + +From: Tony Lindgren + +[ Upstream commit 398cecc24846e867b9f90a0bd22730e3df6b05be ] + +We must idle the uart only after serial8250_unregister_port(). Otherwise +unbinding the uart via sysfs while doing cat on the port produces an +imprecise external abort: + +mem_serial_in from omap_8250_pm+0x44/0xf4 +omap_8250_pm from uart_hangup+0xe0/0x194 +uart_hangup from __tty_hangup.part.0+0x37c/0x3a8 +__tty_hangup.part.0 from uart_remove_one_port+0x9c/0x22c +uart_remove_one_port from serial8250_unregister_port+0x60/0xe8 +serial8250_unregister_port from omap8250_remove+0x6c/0xd0 +omap8250_remove from platform_remove+0x28/0x54 + +Turns out the driver needs to have runtime PM functional before the +driver probe calls serial8250_register_8250_port(). And it needs +runtime PM after driver remove calls serial8250_unregister_port(). + +On probe, we need to read registers before registering the port in +omap_serial_fill_features_erratas(). We do that with custom uart_read() +already. + +On remove, after serial8250_unregister_port(), we need to write to the +uart registers to idle the device. Let's add a custom uart_write() for +that. + +Currently the uart register access depends on port->membase to be +initialized, which won't work after serial8250_unregister_port(). +Let's use priv->membase instead, and use it for runtime PM related +functions to remove the dependency to port->membase for early and +late register access. + +Note that during use, we need to check for a valid port in the runtime PM +related functions. This is needed for the optional wakeup configuration. +We now need to set the drvdata a bit earlier so it's available for the +runtime PM functions. + +With the port checks in runtime PM functions, the old checks for priv in +omap8250_runtime_suspend() and omap8250_runtime_resume() functions are no +longer needed and are removed. + +Signed-off-by: Tony Lindgren +Link: https://lore.kernel.org/r/20230508082014.23083-3-tony@atomide.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 560706eff7c8 ("serial: 8250_omap: Fix errors with no_console_suspend") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/8250/8250_omap.c | 70 ++++++++++++++++------------- + 1 file changed, 38 insertions(+), 32 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c +index adc85e250822c..0aed614110090 100644 +--- a/drivers/tty/serial/8250/8250_omap.c ++++ b/drivers/tty/serial/8250/8250_omap.c +@@ -32,6 +32,7 @@ + #include "8250.h" + + #define DEFAULT_CLK_SPEED 48000000 ++#define OMAP_UART_REGSHIFT 2 + + #define UART_ERRATA_i202_MDR1_ACCESS (1 << 0) + #define OMAP_UART_WER_HAS_TX_WAKEUP (1 << 1) +@@ -109,6 +110,7 @@ + #define UART_OMAP_RX_LVL 0x19 + + struct omap8250_priv { ++ void __iomem *membase; + int line; + u8 habit; + u8 mdr1; +@@ -152,9 +154,14 @@ static void omap_8250_rx_dma_flush(struct uart_8250_port *p); + static inline void omap_8250_rx_dma_flush(struct uart_8250_port *p) { } + #endif + +-static u32 uart_read(struct uart_8250_port *up, u32 reg) ++static u32 uart_read(struct omap8250_priv *priv, u32 reg) + { +- return readl(up->port.membase + (reg << up->port.regshift)); ++ return readl(priv->membase + (reg << OMAP_UART_REGSHIFT)); ++} ++ ++static void uart_write(struct omap8250_priv *priv, u32 reg, u32 val) ++{ ++ writel(val, priv->membase + (reg << OMAP_UART_REGSHIFT)); + } + + /* +@@ -538,7 +545,7 @@ static void omap_serial_fill_features_erratas(struct uart_8250_port *up, + u32 mvr, scheme; + u16 revision, major, minor; + +- mvr = uart_read(up, UART_OMAP_MVER); ++ mvr = uart_read(priv, UART_OMAP_MVER); + + /* Check revision register scheme */ + scheme = mvr >> OMAP_UART_MVR_SCHEME_SHIFT; +@@ -1319,7 +1326,7 @@ static int omap8250_probe(struct platform_device *pdev) + UPF_HARD_FLOW; + up.port.private_data = priv; + +- up.port.regshift = 2; ++ up.port.regshift = OMAP_UART_REGSHIFT; + up.port.fifosize = 64; + up.tx_loadsz = 64; + up.capabilities = UART_CAP_FIFO; +@@ -1381,6 +1388,8 @@ static int omap8250_probe(struct platform_device *pdev) + DEFAULT_CLK_SPEED); + } + ++ priv->membase = membase; ++ priv->line = -ENODEV; + priv->latency = PM_QOS_CPU_LATENCY_DEFAULT_VALUE; + priv->calc_latency = PM_QOS_CPU_LATENCY_DEFAULT_VALUE; + cpu_latency_qos_add_request(&priv->pm_qos_request, priv->latency); +@@ -1388,6 +1397,8 @@ static int omap8250_probe(struct platform_device *pdev) + + spin_lock_init(&priv->rx_dma_lock); + ++ platform_set_drvdata(pdev, priv); ++ + device_init_wakeup(&pdev->dev, true); + pm_runtime_enable(&pdev->dev); + pm_runtime_use_autosuspend(&pdev->dev); +@@ -1449,7 +1460,6 @@ static int omap8250_probe(struct platform_device *pdev) + goto err; + } + priv->line = ret; +- platform_set_drvdata(pdev, priv); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + return 0; +@@ -1471,11 +1481,12 @@ static int omap8250_remove(struct platform_device *pdev) + if (err) + return err; + ++ serial8250_unregister_port(priv->line); ++ priv->line = -ENODEV; + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + flush_work(&priv->qos_work); + pm_runtime_disable(&pdev->dev); +- serial8250_unregister_port(priv->line); + cpu_latency_qos_remove_request(&priv->pm_qos_request); + device_init_wakeup(&pdev->dev, false); + return 0; +@@ -1561,7 +1572,6 @@ static int omap8250_lost_context(struct uart_8250_port *up) + static int omap8250_soft_reset(struct device *dev) + { + struct omap8250_priv *priv = dev_get_drvdata(dev); +- struct uart_8250_port *up = serial8250_get_port(priv->line); + int timeout = 100; + int sysc; + int syss; +@@ -1575,20 +1585,20 @@ static int omap8250_soft_reset(struct device *dev) + * needing omap8250_soft_reset() quirk. Do it in two writes as + * recommended in the comment for omap8250_update_scr(). + */ +- serial_out(up, UART_OMAP_SCR, OMAP_UART_SCR_DMAMODE_1); +- serial_out(up, UART_OMAP_SCR, ++ uart_write(priv, UART_OMAP_SCR, OMAP_UART_SCR_DMAMODE_1); ++ uart_write(priv, UART_OMAP_SCR, + OMAP_UART_SCR_DMAMODE_1 | OMAP_UART_SCR_DMAMODE_CTL); + +- sysc = serial_in(up, UART_OMAP_SYSC); ++ sysc = uart_read(priv, UART_OMAP_SYSC); + + /* softreset the UART */ + sysc |= OMAP_UART_SYSC_SOFTRESET; +- serial_out(up, UART_OMAP_SYSC, sysc); ++ uart_write(priv, UART_OMAP_SYSC, sysc); + + /* By experiments, 1us enough for reset complete on AM335x */ + do { + udelay(1); +- syss = serial_in(up, UART_OMAP_SYSS); ++ syss = uart_read(priv, UART_OMAP_SYSS); + } while (--timeout && !(syss & OMAP_UART_SYSS_RESETDONE)); + + if (!timeout) { +@@ -1602,13 +1612,10 @@ static int omap8250_soft_reset(struct device *dev) + static int omap8250_runtime_suspend(struct device *dev) + { + struct omap8250_priv *priv = dev_get_drvdata(dev); +- struct uart_8250_port *up; +- +- /* In case runtime-pm tries this before we are setup */ +- if (!priv) +- return 0; ++ struct uart_8250_port *up = NULL; + +- up = serial8250_get_port(priv->line); ++ if (priv->line >= 0) ++ up = serial8250_get_port(priv->line); + /* + * When using 'no_console_suspend', the console UART must not be + * suspended. Since driver suspend is managed by runtime suspend, +@@ -1616,7 +1623,7 @@ static int omap8250_runtime_suspend(struct device *dev) + * active during suspend. + */ + if (priv->is_suspending && !console_suspend_enabled) { +- if (uart_console(&up->port)) ++ if (up && uart_console(&up->port)) + return -EBUSY; + } + +@@ -1627,13 +1634,15 @@ static int omap8250_runtime_suspend(struct device *dev) + if (ret) + return ret; + +- /* Restore to UART mode after reset (for wakeup) */ +- omap8250_update_mdr1(up, priv); +- /* Restore wakeup enable register */ +- serial_out(up, UART_OMAP_WER, priv->wer); ++ if (up) { ++ /* Restore to UART mode after reset (for wakeup) */ ++ omap8250_update_mdr1(up, priv); ++ /* Restore wakeup enable register */ ++ serial_out(up, UART_OMAP_WER, priv->wer); ++ } + } + +- if (up->dma && up->dma->rxchan) ++ if (up && up->dma && up->dma->rxchan) + omap_8250_rx_dma_flush(up); + + priv->latency = PM_QOS_CPU_LATENCY_DEFAULT_VALUE; +@@ -1645,18 +1654,15 @@ static int omap8250_runtime_suspend(struct device *dev) + static int omap8250_runtime_resume(struct device *dev) + { + struct omap8250_priv *priv = dev_get_drvdata(dev); +- struct uart_8250_port *up; +- +- /* In case runtime-pm tries this before we are setup */ +- if (!priv) +- return 0; ++ struct uart_8250_port *up = NULL; + +- up = serial8250_get_port(priv->line); ++ if (priv->line >= 0) ++ up = serial8250_get_port(priv->line); + +- if (omap8250_lost_context(up)) ++ if (up && omap8250_lost_context(up)) + omap8250_restore_regs(up); + +- if (up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2)) ++ if (up && up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2)) + omap_8250_rx_dma(up); + + priv->latency = priv->calc_latency; +-- +2.40.1 + diff --git a/queue-6.1/serial-8250_omap-fix-errors-with-no_console_suspend.patch b/queue-6.1/serial-8250_omap-fix-errors-with-no_console_suspend.patch new file mode 100644 index 00000000000..ff4688dc9b4 --- /dev/null +++ b/queue-6.1/serial-8250_omap-fix-errors-with-no_console_suspend.patch @@ -0,0 +1,97 @@ +From 63dc8b9a912e3dac0a76f42ca128843bd3b8931d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Sep 2023 09:13:17 +0300 +Subject: serial: 8250_omap: Fix errors with no_console_suspend + +From: Tony Lindgren + +[ Upstream commit 560706eff7c8e5621b0d63afe0866e0e1906e87e ] + +We now get errors on system suspend if no_console_suspend is set as +reported by Thomas. The errors started with commit 20a41a62618d ("serial: +8250_omap: Use force_suspend and resume for system suspend"). + +Let's fix the issue by checking for console_suspend_enabled in the system +suspend and resume path. + +Note that with this fix the checks for console_suspend_enabled in +omap8250_runtime_suspend() become useless. We now keep runtime PM usage +count for an attached kernel console starting with commit bedb404e91bb +("serial: 8250_port: Don't use power management for kernel console"). + +Fixes: 20a41a62618d ("serial: 8250_omap: Use force_suspend and resume for system suspend") +Cc: stable +Cc: Udit Kumar +Reported-by: Thomas Richard +Signed-off-by: Tony Lindgren +Tested-by: Thomas Richard +Reviewed-by: Dhruva Gole +Link: https://lore.kernel.org/r/20230926061319.15140-1-tony@atomide.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++--------------- + 1 file changed, 10 insertions(+), 15 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c +index 0aed614110090..05f8675925ed6 100644 +--- a/drivers/tty/serial/8250/8250_omap.c ++++ b/drivers/tty/serial/8250/8250_omap.c +@@ -1516,7 +1516,7 @@ static int omap8250_suspend(struct device *dev) + { + struct omap8250_priv *priv = dev_get_drvdata(dev); + struct uart_8250_port *up = serial8250_get_port(priv->line); +- int err; ++ int err = 0; + + serial8250_suspend_port(priv->line); + +@@ -1526,7 +1526,8 @@ static int omap8250_suspend(struct device *dev) + if (!device_may_wakeup(dev)) + priv->wer = 0; + serial_out(up, UART_OMAP_WER, priv->wer); +- err = pm_runtime_force_suspend(dev); ++ if (uart_console(&up->port) && console_suspend_enabled) ++ err = pm_runtime_force_suspend(dev); + flush_work(&priv->qos_work); + + return err; +@@ -1535,11 +1536,15 @@ static int omap8250_suspend(struct device *dev) + static int omap8250_resume(struct device *dev) + { + struct omap8250_priv *priv = dev_get_drvdata(dev); ++ struct uart_8250_port *up = serial8250_get_port(priv->line); + int err; + +- err = pm_runtime_force_resume(dev); +- if (err) +- return err; ++ if (uart_console(&up->port) && console_suspend_enabled) { ++ err = pm_runtime_force_resume(dev); ++ if (err) ++ return err; ++ } ++ + serial8250_resume_port(priv->line); + /* Paired with pm_runtime_resume_and_get() in omap8250_suspend() */ + pm_runtime_mark_last_busy(dev); +@@ -1616,16 +1621,6 @@ static int omap8250_runtime_suspend(struct device *dev) + + if (priv->line >= 0) + up = serial8250_get_port(priv->line); +- /* +- * When using 'no_console_suspend', the console UART must not be +- * suspended. Since driver suspend is managed by runtime suspend, +- * preventing runtime suspend (by returning error) will keep device +- * active during suspend. +- */ +- if (priv->is_suspending && !console_suspend_enabled) { +- if (up && uart_console(&up->port)) +- return -EBUSY; +- } + + if (priv->habit & UART_ERRATA_CLOCK_DISABLE) { + int ret; +-- +2.40.1 + diff --git a/queue-6.1/serial-move-uart_change_speed-earlier.patch b/queue-6.1/serial-move-uart_change_speed-earlier.patch new file mode 100644 index 00000000000..64b90a66f5a --- /dev/null +++ b/queue-6.1/serial-move-uart_change_speed-earlier.patch @@ -0,0 +1,145 @@ +From 5c7f6fa9941aacb73ee2707226cfb043f6c0795f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Mar 2023 10:09:19 +0200 +Subject: serial: Move uart_change_speed() earlier +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +[ Upstream commit 8e90cf29aef77b59ed6a6f6466add2af79621f26 ] + +Move uart_change_speed() earlier to get rid of its forward declaration. + +Signed-off-by: Ilpo Järvinen +Link: https://lore.kernel.org/r/20230309080923.11778-5-ilpo.jarvinen@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8679328eb859 ("serial: Reduce spinlocked portion of uart_rs485_config()") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/serial_core.c | 94 ++++++++++++++++---------------- + 1 file changed, 46 insertions(+), 48 deletions(-) + +diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c +index 2cc5c68c8689f..07e694c4f4827 100644 +--- a/drivers/tty/serial/serial_core.c ++++ b/drivers/tty/serial/serial_core.c +@@ -48,8 +48,6 @@ static struct lock_class_key port_lock_key; + */ + #define RS485_MAX_RTS_DELAY 100 /* msecs */ + +-static void uart_change_speed(struct tty_struct *tty, struct uart_state *state, +- const struct ktermios *old_termios); + static void uart_wait_until_sent(struct tty_struct *tty, int timeout); + static void uart_change_pm(struct uart_state *state, + enum uart_pm_state pm_state); +@@ -177,6 +175,52 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) + uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS); + } + ++/* Caller holds port mutex */ ++static void uart_change_speed(struct tty_struct *tty, struct uart_state *state, ++ const struct ktermios *old_termios) ++{ ++ struct uart_port *uport = uart_port_check(state); ++ struct ktermios *termios; ++ int hw_stopped; ++ ++ /* ++ * If we have no tty, termios, or the port does not exist, ++ * then we can't set the parameters for this port. ++ */ ++ if (!tty || uport->type == PORT_UNKNOWN) ++ return; ++ ++ termios = &tty->termios; ++ uport->ops->set_termios(uport, termios, old_termios); ++ ++ /* ++ * Set modem status enables based on termios cflag ++ */ ++ spin_lock_irq(&uport->lock); ++ if (termios->c_cflag & CRTSCTS) ++ uport->status |= UPSTAT_CTS_ENABLE; ++ else ++ uport->status &= ~UPSTAT_CTS_ENABLE; ++ ++ if (termios->c_cflag & CLOCAL) ++ uport->status &= ~UPSTAT_DCD_ENABLE; ++ else ++ uport->status |= UPSTAT_DCD_ENABLE; ++ ++ /* reset sw-assisted CTS flow control based on (possibly) new mode */ ++ hw_stopped = uport->hw_stopped; ++ uport->hw_stopped = uart_softcts_mode(uport) && ++ !(uport->ops->get_mctrl(uport) & TIOCM_CTS); ++ if (uport->hw_stopped) { ++ if (!hw_stopped) ++ uport->ops->stop_tx(uport); ++ } else { ++ if (hw_stopped) ++ __uart_start(tty); ++ } ++ spin_unlock_irq(&uport->lock); ++} ++ + /* + * Startup the port. This will be called once per open. All calls + * will be serialised by the per-port mutex. +@@ -485,52 +529,6 @@ uart_get_divisor(struct uart_port *port, unsigned int baud) + } + EXPORT_SYMBOL(uart_get_divisor); + +-/* Caller holds port mutex */ +-static void uart_change_speed(struct tty_struct *tty, struct uart_state *state, +- const struct ktermios *old_termios) +-{ +- struct uart_port *uport = uart_port_check(state); +- struct ktermios *termios; +- int hw_stopped; +- +- /* +- * If we have no tty, termios, or the port does not exist, +- * then we can't set the parameters for this port. +- */ +- if (!tty || uport->type == PORT_UNKNOWN) +- return; +- +- termios = &tty->termios; +- uport->ops->set_termios(uport, termios, old_termios); +- +- /* +- * Set modem status enables based on termios cflag +- */ +- spin_lock_irq(&uport->lock); +- if (termios->c_cflag & CRTSCTS) +- uport->status |= UPSTAT_CTS_ENABLE; +- else +- uport->status &= ~UPSTAT_CTS_ENABLE; +- +- if (termios->c_cflag & CLOCAL) +- uport->status &= ~UPSTAT_DCD_ENABLE; +- else +- uport->status |= UPSTAT_DCD_ENABLE; +- +- /* reset sw-assisted CTS flow control based on (possibly) new mode */ +- hw_stopped = uport->hw_stopped; +- uport->hw_stopped = uart_softcts_mode(uport) && +- !(uport->ops->get_mctrl(uport) & TIOCM_CTS); +- if (uport->hw_stopped) { +- if (!hw_stopped) +- uport->ops->stop_tx(uport); +- } else { +- if (hw_stopped) +- __uart_start(tty); +- } +- spin_unlock_irq(&uport->lock); +-} +- + static int uart_put_char(struct tty_struct *tty, unsigned char c) + { + struct uart_state *state = tty->driver_data; +-- +2.40.1 + diff --git a/queue-6.1/serial-reduce-spinlocked-portion-of-uart_rs485_confi.patch b/queue-6.1/serial-reduce-spinlocked-portion-of-uart_rs485_confi.patch new file mode 100644 index 00000000000..6fa1af1912c --- /dev/null +++ b/queue-6.1/serial-reduce-spinlocked-portion-of-uart_rs485_confi.patch @@ -0,0 +1,120 @@ +From cf3669789ceb2996e567248d0bfbd00deb543d11 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Sep 2023 16:52:33 +0200 +Subject: serial: Reduce spinlocked portion of uart_rs485_config() + +From: Lukas Wunner + +[ Upstream commit 8679328eb859d06a1984ab48d90ac35d11bbcaf1 ] + +Commit 44b27aec9d96 ("serial: core, 8250: set RS485 termination GPIO in +serial core") enabled support for RS485 termination GPIOs behind i2c +expanders by setting the GPIO outside of the critical section protected +by the port spinlock. Access to the i2c expander may sleep, which +caused a splat with the port spinlock held. + +Commit 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in +driver-specific way") erroneously regressed that by spinlocking the +GPIO manipulation again. + +Fix by moving uart_rs485_config() (the function manipulating the GPIO) +outside of the spinlocked section and acquiring the spinlock inside of +uart_rs485_config() for the invocation of ->rs485_config() only. + +This gets us one step closer to pushing the spinlock down into the +->rs485_config() callbacks which actually need it. (Some callbacks +do not want to be spinlocked because they perform sleepable register +accesses, see e.g. sc16is7xx_config_rs485().) + +Stack trace for posterity: + + Voluntary context switch within RCU read-side critical section! + WARNING: CPU: 0 PID: 56 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch + Call trace: + rcu_note_context_switch + __schedule + schedule + schedule_timeout + wait_for_completion_timeout + bcm2835_i2c_xfer + __i2c_transfer + i2c_transfer + i2c_transfer_buffer_flags + regmap_i2c_write + _regmap_raw_write_impl + _regmap_bus_raw_write + _regmap_write + _regmap_update_bits + regmap_update_bits_base + pca953x_gpio_set_value + gpiod_set_raw_value_commit + gpiod_set_value_nocheck + gpiod_set_value_cansleep + uart_rs485_config + uart_add_one_port + pl011_register_port + pl011_probe + +Fixes: 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in driver-specific way") +Suggested-by: Lino Sanfilippo +Signed-off-by: Lukas Wunner +Cc: stable@vger.kernel.org # v6.1+ +Link: https://lore.kernel.org/r/f3a35967c28b32f3c6432d0aa5936e6a9908282d.1695307688.git.lukas@wunner.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/serial_core.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c +index 25972767129a3..d4e57f9017db9 100644 +--- a/drivers/tty/serial/serial_core.c ++++ b/drivers/tty/serial/serial_core.c +@@ -1387,12 +1387,18 @@ static void uart_set_rs485_termination(struct uart_port *port, + static int uart_rs485_config(struct uart_port *port) + { + struct serial_rs485 *rs485 = &port->rs485; ++ unsigned long flags; + int ret; + ++ if (!(rs485->flags & SER_RS485_ENABLED)) ++ return 0; ++ + uart_sanitize_serial_rs485(port, rs485); + uart_set_rs485_termination(port, rs485); + ++ spin_lock_irqsave(&port->lock, flags); + ret = port->rs485_config(port, NULL, rs485); ++ spin_unlock_irqrestore(&port->lock, flags); + if (ret) + memset(rs485, 0, sizeof(*rs485)); + +@@ -2455,11 +2461,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) + if (ret == 0) { + if (tty) + uart_change_line_settings(tty, state, NULL); ++ uart_rs485_config(uport); + spin_lock_irq(&uport->lock); + if (!(uport->rs485.flags & SER_RS485_ENABLED)) + ops->set_mctrl(uport, uport->mctrl); +- else +- uart_rs485_config(uport); + ops->start_tx(uport); + spin_unlock_irq(&uport->lock); + tty_port_set_initialized(port, 1); +@@ -2568,10 +2573,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, + port->mctrl &= TIOCM_DTR; + if (!(port->rs485.flags & SER_RS485_ENABLED)) + port->ops->set_mctrl(port, port->mctrl); +- else +- uart_rs485_config(port); + spin_unlock_irqrestore(&port->lock, flags); + ++ uart_rs485_config(port); ++ + /* + * If this driver supports console, and it hasn't been + * successfully registered yet, try to re-register it. +-- +2.40.1 + diff --git a/queue-6.1/serial-rename-uart_change_speed-to-uart_change_line_.patch b/queue-6.1/serial-rename-uart_change_speed-to-uart_change_line_.patch new file mode 100644 index 00000000000..a7d505de9fa --- /dev/null +++ b/queue-6.1/serial-rename-uart_change_speed-to-uart_change_line_.patch @@ -0,0 +1,78 @@ +From 8b904fe504291e39319b4a291c881ecf33111185 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Mar 2023 10:09:20 +0200 +Subject: serial: Rename uart_change_speed() to uart_change_line_settings() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +[ Upstream commit 826736a6c7c8c3185bfb10e03c10d03d53d6cf94 ] + +uart_change_speed() changes more than just speed so rename it to more +generic uart_change_line_settings(). + +Signed-off-by: Ilpo Järvinen +Link: https://lore.kernel.org/r/20230309080923.11778-6-ilpo.jarvinen@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8679328eb859 ("serial: Reduce spinlocked portion of uart_rs485_config()") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/serial_core.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c +index 07e694c4f4827..25972767129a3 100644 +--- a/drivers/tty/serial/serial_core.c ++++ b/drivers/tty/serial/serial_core.c +@@ -176,8 +176,8 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) + } + + /* Caller holds port mutex */ +-static void uart_change_speed(struct tty_struct *tty, struct uart_state *state, +- const struct ktermios *old_termios) ++static void uart_change_line_settings(struct tty_struct *tty, struct uart_state *state, ++ const struct ktermios *old_termios) + { + struct uart_port *uport = uart_port_check(state); + struct ktermios *termios; +@@ -276,7 +276,7 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, + /* + * Initialise the hardware port settings. + */ +- uart_change_speed(tty, state, NULL); ++ uart_change_line_settings(tty, state, NULL); + + /* + * Setup the RTS and DTR signals once the +@@ -992,7 +992,7 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, + current->comm, + tty_name(port->tty)); + } +- uart_change_speed(tty, state, NULL); ++ uart_change_line_settings(tty, state, NULL); + } + } else { + retval = uart_startup(tty, state, 1); +@@ -1654,7 +1654,7 @@ static void uart_set_termios(struct tty_struct *tty, + goto out; + } + +- uart_change_speed(tty, state, old_termios); ++ uart_change_line_settings(tty, state, old_termios); + /* reload cflag from termios; port driver may have overridden flags */ + cflag = tty->termios.c_cflag; + +@@ -2454,7 +2454,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) + ret = ops->startup(uport); + if (ret == 0) { + if (tty) +- uart_change_speed(tty, state, NULL); ++ uart_change_line_settings(tty, state, NULL); + spin_lock_irq(&uport->lock); + if (!(uport->rs485.flags & SER_RS485_ENABLED)) + ops->set_mctrl(uport, uport->mctrl); +-- +2.40.1 + diff --git a/queue-6.1/series b/queue-6.1/series index 5097116deca..8fe31df2744 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -74,3 +74,86 @@ net-pktgen-fix-interface-flags-printing.patch net-avoid-uaf-on-deleted-altname.patch net-fix-ifname-in-netlink-ntf-during-netns-move.patch net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch +selftests-mm-fix-awk-usage-in-charge_reserved_hugetl.patch +usb-misc-onboard_usb_hub-add-genesys-logic-gl850g-hu.patch +usb-misc-onboard_usb_hub-add-genesys-logic-gl852g-hu.patch +usb-misc-onboard_usb_hub-add-genesys-logic-gl3523-hu.patch +usb-misc-onboard_hub-add-support-for-microchip-usb24.patch +serial-move-uart_change_speed-earlier.patch +serial-rename-uart_change_speed-to-uart_change_line_.patch +serial-reduce-spinlocked-portion-of-uart_rs485_confi.patch +serial-8250-omap-fix-imprecise-external-abort-for-om.patch +serial-8250_omap-fix-errors-with-no_console_suspend.patch +iio-core-introduce-iio_device_-claim-release-_buffer.patch +iio-cros_ec-fix-an-use-after-free-in-cros_ec_sensors.patch +iio-adc-ad7192-simplify-using-devm_regulator_get_ena.patch +iio-adc-ad7192-correct-reference-voltage.patch +pwr-mlxbf-extend-kconfig-to-include-gpio-mlxbf3-depe.patch +arm-dts-ti-omap-fix-noisy-serial-with-overrun-thrott.patch +fs-writeback-do-not-requeue-a-clean-inode-having-ski.patch +btrfs-prevent-transaction-block-reserve-underflow-wh.patch +btrfs-return-euclean-for-delayed-tree-ref-with-a-ref.patch +btrfs-initialize-start_slot-in-btrfs_log_prealloc_ex.patch +i2c-mux-avoid-potential-false-error-message-in-i2c_m.patch +overlayfs-set-ctime-when-setting-mtime-and-atime.patch +gpio-timberdale-fix-potential-deadlock-on-tgpio-lock.patch +ata-libata-core-fix-compilation-warning-in-ata_dev_c.patch +ata-libata-eh-fix-compilation-warning-in-ata_eh_link.patch +tracing-relax-trace_event_eval_update-execution-with.patch +wifi-mwifiex-sanity-check-tlv_len-and-tlv_bitmap_len.patch +wifi-iwlwifi-ensure-ack-flag-is-properly-cleared.patch +hid-logitech-hidpp-add-bluetooth-id-for-the-logitech.patch +hid-holtek-fix-slab-out-of-bounds-write-in-holtek_kb.patch +bluetooth-btusb-add-shutdown-function-for-qca6174.patch +bluetooth-avoid-redundant-authentication.patch +bluetooth-hci_core-fix-build-warnings.patch +wifi-cfg80211-fix-6ghz-scan-configuration.patch +wifi-mac80211-work-around-cisco-ap-9115-vht-mpdu-len.patch +wifi-mac80211-allow-transmitting-eapol-frames-with-t.patch +wifi-cfg80211-avoid-leaking-stack-data-into-trace.patch +regulator-core-revert-fix-kobject-release-warning-an.patch +sky2-make-sure-there-is-at-least-one-frag_addr-avail.patch +ipv4-fib-send-notify-when-delete-source-address-rout.patch +drm-panel-orientation-quirks-add-quirk-for-one-mix-2.patch +btrfs-fix-some-wmaybe-uninitialized-warnings-in-ioct.patch +btrfs-error-out-when-cowing-block-using-a-stale-tran.patch +btrfs-error-when-cowing-block-from-a-root-that-is-be.patch +btrfs-error-out-when-reallocating-block-for-defrag-u.patch +drm-amd-pm-add-unique_id-for-gc-11.0.3.patch +hid-multitouch-add-required-quirk-for-synaptics-0xcd.patch +hid-nintendo-reinitialize-usb-pro-controller-after-r.patch +platform-x86-touchscreen_dmi-add-info-for-the-positi.patch +cpufreq-schedutil-update-next_freq-when-cpufreq_limi.patch +fprobe-pass-entry_data-to-handlers.patch +fprobe-add-nr_maxactive-to-specify-rethook_node-pool.patch +fprobe-fix-to-ensure-the-number-of-active-retprobes-.patch +net-xfrm-skip-policies-marked-as-dead-while-reinsert.patch +rcu-fix-late-wakeup-when-flush-of-bypass-cblist-happ.patch +rcu-fix-missing-nocb-gp-wake-on-rcu_barrier.patch +rcu-make-call_rcu-lazy-to-save-power.patch +net-use-call_rcu_hurry-for-dst_release.patch +atomics-provide-atomic_add_negative-variants.patch +atomics-provide-rcuref-scalable-reference-counting.patch +net-dst-prevent-false-sharing-vs.-dst_entry-__refcnt.patch +net-dst-switch-to-rcuref_t-reference-counting.patch +net-dst-fix-missing-initialization-of-rt_uncached.patch +xfrm6-fix-inet6_dev-refcount-underflow-problem.patch +netfilter-nf_tables-do-not-remove-elements-if-set-ba.patch +net-mlx5-e-switch-register-event-handler-before-armi.patch +net-mlx5-handle-fw-tracer-change-ownership-event-bas.patch +net-mlx5e-don-t-offload-internal-port-if-filter-devi.patch +net-tls-split-tls_rx_reader_lock.patch +tcp-allow-again-tcp_disconnect-when-threads-are-wait.patch +ice-remove-redundant-pci_enable_pcie_error_reporting.patch +bluetooth-hci_event-fix-using-memcmp-when-comparing-.patch +selftests-openvswitch-add-version-check-for-pyroute2.patch +netfilter-nf_tables-revert-do-not-remove-elements-if.patch +tcp_bpf-properly-release-resources-on-error-paths.patch +net-devlink-convert-devlink-port-type-specific-point.patch +net-devlink-move-port_type_warn_schedule-call-to-__d.patch +net-devlink-move-port_type_netdev_checks-call-to-__d.patch +net-devlink-take-rtnl-in-port_fill-function-only-if-.patch +net-devlink-track-netdev-with-devlink_port-assigned.patch +net-store-netdevs-in-an-xarray.patch +net-move-altnames-together-with-the-netdevice.patch +net-smc-fix-smc-clc-failed-issue-when-netdevice-not-.patch diff --git a/queue-6.1/sky2-make-sure-there-is-at-least-one-frag_addr-avail.patch b/queue-6.1/sky2-make-sure-there-is-at-least-one-frag_addr-avail.patch new file mode 100644 index 00000000000..a4568997730 --- /dev/null +++ b/queue-6.1/sky2-make-sure-there-is-at-least-one-frag_addr-avail.patch @@ -0,0 +1,73 @@ +From 4b967f19a7c6c09001d87b8374d003080d70e0c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Sep 2023 09:50:39 -0700 +Subject: sky2: Make sure there is at least one frag_addr available + +From: Kees Cook + +[ Upstream commit 6a70e5cbedaf8ad10528ac9ac114f3ec20f422df ] + +In the pathological case of building sky2 with 16k PAGE_SIZE, the +frag_addr[] array would never be used, so the original code was correct +that size should be 0. But the compiler now gets upset with 0 size arrays +in places where it hasn't eliminated the code that might access such an +array (it can't figure out that in this case an rx skb with fragments +would never be created). To keep the compiler happy, make sure there is +at least 1 frag_addr in struct rx_ring_info: + + In file included from include/linux/skbuff.h:28, + from include/net/net_namespace.h:43, + from include/linux/netdevice.h:38, + from drivers/net/ethernet/marvell/sky2.c:18: + drivers/net/ethernet/marvell/sky2.c: In function 'sky2_rx_unmap_skb': + include/linux/dma-mapping.h:416:36: warning: array subscript i is outside array bounds of 'dma_addr_t[0]' {aka 'long long unsigned int[]'} [-Warray-bounds=] + 416 | #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0) + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + drivers/net/ethernet/marvell/sky2.c:1257:17: note: in expansion of macro 'dma_unmap_page' + 1257 | dma_unmap_page(&pdev->dev, re->frag_addr[i], + | ^~~~~~~~~~~~~~ + In file included from drivers/net/ethernet/marvell/sky2.c:41: + drivers/net/ethernet/marvell/sky2.h:2198:25: note: while referencing 'frag_addr' + 2198 | dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT]; + | ^~~~~~~~~ + +With CONFIG_PAGE_SIZE_16KB=y, PAGE_SHIFT == 14, so: + + #define ETH_JUMBO_MTU 9000 + +causes "ETH_JUMBO_MTU >> PAGE_SHIFT" to be 0. Use "?: 1" to solve this build warning. + +Cc: Mirko Lindner +Cc: Stephen Hemminger +Cc: "David S. Miller" +Cc: Eric Dumazet +Cc: Jakub Kicinski +Cc: Paolo Abeni +Cc: netdev@vger.kernel.org +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202309191958.UBw1cjXk-lkp@intel.com/ +Reviewed-by: Alexander Lobakin +Signed-off-by: Kees Cook +Reviewed-by: Gustavo A. R. Silva +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/sky2.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/marvell/sky2.h b/drivers/net/ethernet/marvell/sky2.h +index ddec1627f1a7b..8d0bacf4e49cc 100644 +--- a/drivers/net/ethernet/marvell/sky2.h ++++ b/drivers/net/ethernet/marvell/sky2.h +@@ -2195,7 +2195,7 @@ struct rx_ring_info { + struct sk_buff *skb; + dma_addr_t data_addr; + DEFINE_DMA_UNMAP_LEN(data_size); +- dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT]; ++ dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT ?: 1]; + }; + + enum flow_control { +-- +2.40.1 + diff --git a/queue-6.1/tcp-allow-again-tcp_disconnect-when-threads-are-wait.patch b/queue-6.1/tcp-allow-again-tcp_disconnect-when-threads-are-wait.patch new file mode 100644 index 00000000000..322e0dcc4f7 --- /dev/null +++ b/queue-6.1/tcp-allow-again-tcp_disconnect-when-threads-are-wait.patch @@ -0,0 +1,484 @@ +From e162a18ec7faf8e91b6fabd6c3fbfc8b70827d7a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Oct 2023 09:20:55 +0200 +Subject: tcp: allow again tcp_disconnect() when threads are waiting + +From: Paolo Abeni + +[ Upstream commit 419ce133ab928ab5efd7b50b2ef36ddfd4eadbd2 ] + +As reported by Tom, .NET and applications build on top of it rely +on connect(AF_UNSPEC) to async cancel pending I/O operations on TCP +socket. + +The blamed commit below caused a regression, as such cancellation +can now fail. + +As suggested by Eric, this change addresses the problem explicitly +causing blocking I/O operation to terminate immediately (with an error) +when a concurrent disconnect() is executed. + +Instead of tracking the number of threads blocked on a given socket, +track the number of disconnect() issued on such socket. If such counter +changes after a blocking operation releasing and re-acquiring the socket +lock, error out the current operation. + +Fixes: 4faeee0cf8a5 ("tcp: deny tcp_disconnect() when threads are waiting") +Reported-by: Tom Deseyn +Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1886305 +Suggested-by: Eric Dumazet +Signed-off-by: Paolo Abeni +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/f3b95e47e3dbed840960548aebaa8d954372db41.1697008693.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../chelsio/inline_crypto/chtls/chtls_io.c | 36 +++++++++++++++---- + include/net/sock.h | 10 +++--- + net/core/stream.c | 12 ++++--- + net/ipv4/af_inet.c | 10 ++++-- + net/ipv4/inet_connection_sock.c | 1 - + net/ipv4/tcp.c | 16 ++++----- + net/ipv4/tcp_bpf.c | 4 +++ + net/mptcp/protocol.c | 7 ---- + net/tls/tls_main.c | 10 ++++-- + net/tls/tls_sw.c | 19 ++++++---- + 10 files changed, 80 insertions(+), 45 deletions(-) + +diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +index a4256087ac828..5e45bef4fd34f 100644 +--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c ++++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +@@ -911,7 +911,7 @@ static int csk_wait_memory(struct chtls_dev *cdev, + struct sock *sk, long *timeo_p) + { + DEFINE_WAIT_FUNC(wait, woken_wake_function); +- int err = 0; ++ int ret, err = 0; + long current_timeo; + long vm_wait = 0; + bool noblock; +@@ -942,10 +942,13 @@ static int csk_wait_memory(struct chtls_dev *cdev, + + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + sk->sk_write_pending++; +- sk_wait_event(sk, ¤t_timeo, sk->sk_err || +- (sk->sk_shutdown & SEND_SHUTDOWN) || +- (csk_mem_free(cdev, sk) && !vm_wait), &wait); ++ ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err || ++ (sk->sk_shutdown & SEND_SHUTDOWN) || ++ (csk_mem_free(cdev, sk) && !vm_wait), ++ &wait); + sk->sk_write_pending--; ++ if (ret < 0) ++ goto do_error; + + if (vm_wait) { + vm_wait -= current_timeo; +@@ -1438,6 +1441,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int copied = 0; + int target; + long timeo; ++ int ret; + + buffers_freed = 0; + +@@ -1513,7 +1517,11 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + if (copied >= target) + break; + chtls_cleanup_rbuf(sk, copied); +- sk_wait_data(sk, &timeo, NULL); ++ ret = sk_wait_data(sk, &timeo, NULL); ++ if (ret < 0) { ++ copied = copied ? : ret; ++ goto unlock; ++ } + continue; + found_ok_skb: + if (!skb->len) { +@@ -1608,6 +1616,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + + if (buffers_freed) + chtls_cleanup_rbuf(sk, copied); ++ ++unlock: + release_sock(sk); + return copied; + } +@@ -1624,6 +1634,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, + int copied = 0; + size_t avail; /* amount of available data in current skb */ + long timeo; ++ int ret; + + lock_sock(sk); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); +@@ -1675,7 +1686,12 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, + release_sock(sk); + lock_sock(sk); + } else { +- sk_wait_data(sk, &timeo, NULL); ++ ret = sk_wait_data(sk, &timeo, NULL); ++ if (ret < 0) { ++ /* here 'copied' is 0 due to previous checks */ ++ copied = ret; ++ break; ++ } + } + + if (unlikely(peek_seq != tp->copied_seq)) { +@@ -1746,6 +1762,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int copied = 0; + long timeo; + int target; /* Read at least this many bytes */ ++ int ret; + + buffers_freed = 0; + +@@ -1837,7 +1854,11 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + if (copied >= target) + break; + chtls_cleanup_rbuf(sk, copied); +- sk_wait_data(sk, &timeo, NULL); ++ ret = sk_wait_data(sk, &timeo, NULL); ++ if (ret < 0) { ++ copied = copied ? : ret; ++ goto unlock; ++ } + continue; + + found_ok_skb: +@@ -1906,6 +1927,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + if (buffers_freed) + chtls_cleanup_rbuf(sk, copied); + ++unlock: + release_sock(sk); + return copied; + } +diff --git a/include/net/sock.h b/include/net/sock.h +index 4c988b981d6e1..579c89eb7c5ca 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -333,7 +333,7 @@ struct sk_filter; + * @sk_cgrp_data: cgroup data for this cgroup + * @sk_memcg: this socket's memory cgroup association + * @sk_write_pending: a write to stream socket waits to start +- * @sk_wait_pending: number of threads blocked on this socket ++ * @sk_disconnects: number of disconnect operations performed on this sock + * @sk_state_change: callback to indicate change in the state of the sock + * @sk_data_ready: callback to indicate there is data to be processed + * @sk_write_space: callback to indicate there is bf sending space available +@@ -426,7 +426,7 @@ struct sock { + unsigned int sk_napi_id; + #endif + int sk_rcvbuf; +- int sk_wait_pending; ++ int sk_disconnects; + + struct sk_filter __rcu *sk_filter; + union { +@@ -1185,8 +1185,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) + } + + #define sk_wait_event(__sk, __timeo, __condition, __wait) \ +- ({ int __rc; \ +- __sk->sk_wait_pending++; \ ++ ({ int __rc, __dis = __sk->sk_disconnects; \ + release_sock(__sk); \ + __rc = __condition; \ + if (!__rc) { \ +@@ -1196,8 +1195,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) + } \ + sched_annotate_sleep(); \ + lock_sock(__sk); \ +- __sk->sk_wait_pending--; \ +- __rc = __condition; \ ++ __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \ + __rc; \ + }) + +diff --git a/net/core/stream.c b/net/core/stream.c +index 5b05b889d31af..051aa71a8ad0f 100644 +--- a/net/core/stream.c ++++ b/net/core/stream.c +@@ -117,7 +117,7 @@ EXPORT_SYMBOL(sk_stream_wait_close); + */ + int sk_stream_wait_memory(struct sock *sk, long *timeo_p) + { +- int err = 0; ++ int ret, err = 0; + long vm_wait = 0; + long current_timeo = *timeo_p; + DEFINE_WAIT_FUNC(wait, woken_wake_function); +@@ -142,11 +142,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) + + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + sk->sk_write_pending++; +- sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || +- (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || +- (sk_stream_memory_free(sk) && +- !vm_wait), &wait); ++ ret = sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || ++ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || ++ (sk_stream_memory_free(sk) && !vm_wait), ++ &wait); + sk->sk_write_pending--; ++ if (ret < 0) ++ goto do_error; + + if (vm_wait) { + vm_wait -= current_timeo; +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 04853c83c85c4..5d379df90c826 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -589,7 +589,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) + + add_wait_queue(sk_sleep(sk), &wait); + sk->sk_write_pending += writebias; +- sk->sk_wait_pending++; + + /* Basic assumption: if someone sets sk->sk_err, he _must_ + * change state of the socket from TCP_SYN_*. +@@ -605,7 +604,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) + } + remove_wait_queue(sk_sleep(sk), &wait); + sk->sk_write_pending -= writebias; +- sk->sk_wait_pending--; + return timeo; + } + +@@ -634,6 +632,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) { ++ sk->sk_disconnects++; + err = sk->sk_prot->disconnect(sk, flags); + sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; + goto out; +@@ -688,6 +687,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int writebias = (sk->sk_protocol == IPPROTO_TCP) && + tcp_sk(sk)->fastopen_req && + tcp_sk(sk)->fastopen_req->data ? 1 : 0; ++ int dis = sk->sk_disconnects; + + /* Error code is set above */ + if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) +@@ -696,6 +696,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out; ++ ++ if (dis != sk->sk_disconnects) { ++ err = -EPIPE; ++ goto out; ++ } + } + + /* Connection was closed by RST, timeout, ICMP error +@@ -717,6 +722,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + sock_error: + err = sock_error(sk) ? : -ECONNABORTED; + sock->state = SS_UNCONNECTED; ++ sk->sk_disconnects++; + if (sk->sk_prot->disconnect(sk, flags)) + sock->state = SS_DISCONNECTING; + goto out; +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 62a3b103f258a..80ce0112e24b4 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -1143,7 +1143,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, + if (newsk) { + struct inet_connection_sock *newicsk = inet_csk(newsk); + +- newsk->sk_wait_pending = 0; + inet_sk_set_state(newsk, TCP_SYN_RECV); + newicsk->icsk_bind_hash = NULL; + newicsk->icsk_bind2_hash = NULL; +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 96fdde6e42b1b..288678f17ccaf 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -827,7 +827,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; +- sk_wait_data(sk, &timeo, NULL); ++ ret = sk_wait_data(sk, &timeo, NULL); ++ if (ret < 0) ++ break; + if (signal_pending(current)) { + ret = sock_intr_errno(timeo); + break; +@@ -2549,7 +2551,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, + __sk_flush_backlog(sk); + } else { + tcp_cleanup_rbuf(sk, copied); +- sk_wait_data(sk, &timeo, last); ++ err = sk_wait_data(sk, &timeo, last); ++ if (err < 0) { ++ err = copied ? : err; ++ goto out; ++ } + } + + if ((flags & MSG_PEEK) && +@@ -3073,12 +3079,6 @@ int tcp_disconnect(struct sock *sk, int flags) + int old_state = sk->sk_state; + u32 seq; + +- /* Deny disconnect if other threads are blocked in sk_wait_event() +- * or inet_wait_for_connect(). +- */ +- if (sk->sk_wait_pending) +- return -EBUSY; +- + if (old_state != TCP_CLOSE) + tcp_set_state(sk, TCP_CLOSE); + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index f53380fd89bcf..cb4549db8bcfc 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -302,6 +302,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, + } + + data = tcp_msg_wait_data(sk, psock, timeo); ++ if (data < 0) ++ return data; + if (data && !sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + copied = -EAGAIN; +@@ -346,6 +348,8 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + data = tcp_msg_wait_data(sk, psock, timeo); ++ if (data < 0) ++ return data; + if (data) { + if (!sk_psock_queue_empty(psock)) + goto msg_bytes_ready; +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 9d67f2e4d4a6e..e061091edb394 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -3101,12 +3101,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) + { + struct mptcp_sock *msk = mptcp_sk(sk); + +- /* Deny disconnect if other threads are blocked in sk_wait_event() +- * or inet_wait_for_connect(). +- */ +- if (sk->sk_wait_pending) +- return -EBUSY; +- + /* We are on the fastopen error path. We can't call straight into the + * subflows cleanup code due to lock nesting (we are already under + * msk->firstsocket lock). +@@ -3174,7 +3168,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, + inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); + #endif + +- nsk->sk_wait_pending = 0; + __mptcp_init_sock(nsk); + + msk = mptcp_sk(nsk); +diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c +index f2e7302a4d96b..338a443fa47b2 100644 +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -96,8 +96,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx) + + int wait_on_pending_writer(struct sock *sk, long *timeo) + { +- int rc = 0; + DEFINE_WAIT_FUNC(wait, woken_wake_function); ++ int ret, rc = 0; + + add_wait_queue(sk_sleep(sk), &wait); + while (1) { +@@ -111,9 +111,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo) + break; + } + +- if (sk_wait_event(sk, timeo, +- !READ_ONCE(sk->sk_write_pending), &wait)) ++ ret = sk_wait_event(sk, timeo, ++ !READ_ONCE(sk->sk_write_pending), &wait); ++ if (ret) { ++ if (ret < 0) ++ rc = ret; + break; ++ } + } + remove_wait_queue(sk_sleep(sk), &wait); + return rc; +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index c5c8fdadc05e8..2af72d349192e 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1296,6 +1296,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + DEFINE_WAIT_FUNC(wait, woken_wake_function); ++ int ret = 0; + long timeo; + + timeo = sock_rcvtimeo(sk, nonblock); +@@ -1307,6 +1308,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, + if (sk->sk_err) + return sock_error(sk); + ++ if (ret < 0) ++ return ret; ++ + if (!skb_queue_empty(&sk->sk_receive_queue)) { + tls_strp_check_rcv(&ctx->strp); + if (tls_strp_msg_ready(ctx)) +@@ -1325,10 +1329,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, + released = true; + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); +- sk_wait_event(sk, &timeo, +- tls_strp_msg_ready(ctx) || +- !sk_psock_queue_empty(psock), +- &wait); ++ ret = sk_wait_event(sk, &timeo, ++ tls_strp_msg_ready(ctx) || ++ !sk_psock_queue_empty(psock), ++ &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + +@@ -1855,6 +1859,7 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, + bool nonblock) + { + long timeo; ++ int ret; + + timeo = sock_rcvtimeo(sk, nonblock); + +@@ -1864,14 +1869,16 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, + ctx->reader_contended = 1; + + add_wait_queue(&ctx->wq, &wait); +- sk_wait_event(sk, &timeo, +- !READ_ONCE(ctx->reader_present), &wait); ++ ret = sk_wait_event(sk, &timeo, ++ !READ_ONCE(ctx->reader_present), &wait); + remove_wait_queue(&ctx->wq, &wait); + + if (timeo <= 0) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(timeo); ++ if (ret < 0) ++ return ret; + } + + WRITE_ONCE(ctx->reader_present, 1); +-- +2.40.1 + diff --git a/queue-6.1/tcp_bpf-properly-release-resources-on-error-paths.patch b/queue-6.1/tcp_bpf-properly-release-resources-on-error-paths.patch new file mode 100644 index 00000000000..a3bb7b32b5a --- /dev/null +++ b/queue-6.1/tcp_bpf-properly-release-resources-on-error-paths.patch @@ -0,0 +1,79 @@ +From 73eefd1c290aa4f78f8b75581a19211994dfb9f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 17 Oct 2023 17:49:51 +0200 +Subject: tcp_bpf: properly release resources on error paths + +From: Paolo Abeni + +[ Upstream commit 68b54aeff804acceb02f228ea2e28419272c1fb9 ] + +In the blamed commit below, I completely forgot to release the acquired +resources before erroring out in the TCP BPF code, as reported by Dan. + +Address the issues by replacing the bogus return with a jump to the +relevant cleanup code. + +Fixes: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting") +Reported-by: Dan Carpenter +Signed-off-by: Paolo Abeni +Acked-by: Jakub Sitnicki +Reviewed-by: Eric Dumazet +Reviewed-by: John Fastabend +Link: https://lore.kernel.org/r/8f99194c698bcef12666f0a9a999c58f8b1cb52c.1697557782.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index cb4549db8bcfc..f8037d142bb75 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -302,8 +302,10 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, + } + + data = tcp_msg_wait_data(sk, psock, timeo); +- if (data < 0) +- return data; ++ if (data < 0) { ++ copied = data; ++ goto unlock; ++ } + if (data && !sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + copied = -EAGAIN; +@@ -314,6 +316,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, + tcp_rcv_space_adjust(sk); + if (copied > 0) + __tcp_cleanup_rbuf(sk, copied); ++ ++unlock: + release_sock(sk); + sk_psock_put(sk, psock); + return copied; +@@ -348,8 +352,10 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + data = tcp_msg_wait_data(sk, psock, timeo); +- if (data < 0) +- return data; ++ if (data < 0) { ++ ret = data; ++ goto unlock; ++ } + if (data) { + if (!sk_psock_queue_empty(psock)) + goto msg_bytes_ready; +@@ -360,6 +366,8 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + copied = -EAGAIN; + } + ret = copied; ++ ++unlock: + release_sock(sk); + sk_psock_put(sk, psock); + return ret; +-- +2.40.1 + diff --git a/queue-6.1/tracing-relax-trace_event_eval_update-execution-with.patch b/queue-6.1/tracing-relax-trace_event_eval_update-execution-with.patch new file mode 100644 index 00000000000..690842a9c46 --- /dev/null +++ b/queue-6.1/tracing-relax-trace_event_eval_update-execution-with.patch @@ -0,0 +1,55 @@ +From 446432f170db52eb4d8c51d9932e8516fb316382 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Sep 2023 21:16:37 +0200 +Subject: tracing: relax trace_event_eval_update() execution with + cond_resched() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Clément Léger + +[ Upstream commit 23cce5f25491968b23fb9c399bbfb25f13870cd9 ] + +When kernel is compiled without preemption, the eval_map_work_func() +(which calls trace_event_eval_update()) will not be preempted up to its +complete execution. This can actually cause a problem since if another +CPU call stop_machine(), the call will have to wait for the +eval_map_work_func() function to finish executing in the workqueue +before being able to be scheduled. This problem was observe on a SMP +system at boot time, when the CPU calling the initcalls executed +clocksource_done_booting() which in the end calls stop_machine(). We +observed a 1 second delay because one CPU was executing +eval_map_work_func() and was not preempted by the stop_machine() task. + +Adding a call to cond_resched() in trace_event_eval_update() allows +other tasks to be executed and thus continue working asynchronously +like before without blocking any pending task at boot time. + +Link: https://lore.kernel.org/linux-trace-kernel/20230929191637.416931-1-cleger@rivosinc.com + +Cc: Masami Hiramatsu +Signed-off-by: Clément Léger +Tested-by: Atish Patra +Reviewed-by: Atish Patra +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_events.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index 9da418442a063..2e3dce5e2575e 100644 +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -2777,6 +2777,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) + update_event_fields(call, map[i]); + } + } ++ cond_resched(); + } + up_write(&trace_event_sem); + } +-- +2.40.1 + diff --git a/queue-6.1/usb-misc-onboard_hub-add-support-for-microchip-usb24.patch b/queue-6.1/usb-misc-onboard_hub-add-support-for-microchip-usb24.patch new file mode 100644 index 00000000000..df030606a60 --- /dev/null +++ b/queue-6.1/usb-misc-onboard_hub-add-support-for-microchip-usb24.patch @@ -0,0 +1,54 @@ +From 7a70703054234c3243452ac9220b9d1de1e46eb5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Sep 2023 10:22:38 +0200 +Subject: usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub + +From: Javier Carrasco + +[ Upstream commit e59e38158c61162f2e8beb4620df21a1585117df ] + +The USB2412 is a 2-Port USB 2.0 hub controller that provides a reset pin +and a single 3v3 powre source, which makes it suitable to be controlled +by the onboard_hub driver. + +This hub has the same reset timings as USB2514/2517 and the same +onboard hub specific-data can be reused for USB2412. + +Signed-off-by: Javier Carrasco +Cc: stable +Acked-by: Matthias Kaehlcke +Link: https://lore.kernel.org/r/20230911-topic-2412_onboard_hub-v1-1-7704181ddfff@wolfvision.net +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/misc/onboard_usb_hub.c | 1 + + drivers/usb/misc/onboard_usb_hub.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c +index 8d5c83c9ff877..8edd0375e0a8a 100644 +--- a/drivers/usb/misc/onboard_usb_hub.c ++++ b/drivers/usb/misc/onboard_usb_hub.c +@@ -409,6 +409,7 @@ static const struct usb_device_id onboard_hub_id_table[] = { + { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ + { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */ + { USB_DEVICE(VENDOR_ID_GENESYS, 0x0620) }, /* Genesys Logic GL3523 USB 3.1 */ ++ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ +diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h +index 61fee18f9dfc9..d023fb90b4118 100644 +--- a/drivers/usb/misc/onboard_usb_hub.h ++++ b/drivers/usb/misc/onboard_usb_hub.h +@@ -31,6 +31,7 @@ static const struct onboard_hub_pdata genesys_gl852g_data = { + }; + + static const struct of_device_id onboard_hub_match[] = { ++ { .compatible = "usb424,2412", .data = µchip_usb424_data, }, + { .compatible = "usb424,2514", .data = µchip_usb424_data, }, + { .compatible = "usb424,2517", .data = µchip_usb424_data, }, + { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, +-- +2.40.1 + diff --git a/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl3523-hu.patch b/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl3523-hu.patch new file mode 100644 index 00000000000..6de9c5747ad --- /dev/null +++ b/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl3523-hu.patch @@ -0,0 +1,55 @@ +From 96a8a99bf6bf1b6662aad7118baea93baf0a3dc8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Jun 2023 16:22:28 +0200 +Subject: usb: misc: onboard_usb_hub: add Genesys Logic GL3523 hub support + +From: Anand Moon + +[ Upstream commit d97b4b35adcecd4b747d3e1c262e10e4a093cefa ] + +Genesys Logic GL3523 is a 4-port USB 3.1 hub that has a reset pin to +toggle and a 5.0V core supply exported though an integrated LDO is +available for powering it. + +Add the support for this hub, for controlling the reset pin and the core +power supply. + +Signed-off-by: Anand Moon +[m.felsch@pengutronix.de: include review feedback & port to 6.4] +Signed-off-by: Marco Felsch +Link: https://lore.kernel.org/r/20230623142228.4069084-2-m.felsch@pengutronix.de +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: e59e38158c61 ("usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub") +Signed-off-by: Sasha Levin +--- + drivers/usb/misc/onboard_usb_hub.c | 1 + + drivers/usb/misc/onboard_usb_hub.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c +index 7a1030ddf9956..8d5c83c9ff877 100644 +--- a/drivers/usb/misc/onboard_usb_hub.c ++++ b/drivers/usb/misc/onboard_usb_hub.c +@@ -408,6 +408,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev) + static const struct usb_device_id onboard_hub_id_table[] = { + { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ + { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */ ++ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0620) }, /* Genesys Logic GL3523 USB 3.1 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ +diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h +index 0c2ab5755a7ea..61fee18f9dfc9 100644 +--- a/drivers/usb/misc/onboard_usb_hub.h ++++ b/drivers/usb/misc/onboard_usb_hub.h +@@ -37,6 +37,7 @@ static const struct of_device_id onboard_hub_match[] = { + { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, + { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, }, + { .compatible = "usb5e3,610", .data = &genesys_gl852g_data, }, ++ { .compatible = "usb5e3,620", .data = &genesys_gl852g_data, }, + { .compatible = "usbbda,411", .data = &realtek_rts5411_data, }, + { .compatible = "usbbda,5411", .data = &realtek_rts5411_data, }, + { .compatible = "usbbda,414", .data = &realtek_rts5411_data, }, +-- +2.40.1 + diff --git a/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl850g-hu.patch b/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl850g-hu.patch new file mode 100644 index 00000000000..21d287f94e2 --- /dev/null +++ b/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl850g-hu.patch @@ -0,0 +1,71 @@ +From e1ab429a75e2435a94dc42287687169e782b0924 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Dec 2022 13:52:25 +0800 +Subject: usb: misc: onboard_usb_hub: add Genesys Logic GL850G hub support + +From: Icenowy Zheng + +[ Upstream commit 9bae996ffa28ac03b6d95382a2a082eb219e745a ] + +Genesys Logic GL850G is a 4-port USB 2.0 STT hub that has a reset pin to +toggle and a 3.3V core supply exported (although an integrated LDO is +available for powering it with 5V). + +Add the support for this hub, for controlling the reset pin and the core +power supply. + +Signed-off-by: Icenowy Zheng +Acked-by: Matthias Kaehlcke +Link: https://lore.kernel.org/r/20221206055228.306074-4-uwu@icenowy.me +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: e59e38158c61 ("usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub") +Signed-off-by: Sasha Levin +--- + drivers/usb/misc/onboard_usb_hub.c | 2 ++ + drivers/usb/misc/onboard_usb_hub.h | 5 +++++ + 2 files changed, 7 insertions(+) + +diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c +index 832d3ba9368ff..87df27425ec5f 100644 +--- a/drivers/usb/misc/onboard_usb_hub.c ++++ b/drivers/usb/misc/onboard_usb_hub.c +@@ -329,6 +329,7 @@ static struct platform_driver onboard_hub_driver = { + + /************************** USB driver **************************/ + ++#define VENDOR_ID_GENESYS 0x05e3 + #define VENDOR_ID_MICROCHIP 0x0424 + #define VENDOR_ID_REALTEK 0x0bda + #define VENDOR_ID_TI 0x0451 +@@ -405,6 +406,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev) + } + + static const struct usb_device_id onboard_hub_id_table[] = { ++ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ +diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h +index 2cde54b69eede..a97b0594773fa 100644 +--- a/drivers/usb/misc/onboard_usb_hub.h ++++ b/drivers/usb/misc/onboard_usb_hub.h +@@ -22,11 +22,16 @@ static const struct onboard_hub_pdata ti_tusb8041_data = { + .reset_us = 3000, + }; + ++static const struct onboard_hub_pdata genesys_gl850g_data = { ++ .reset_us = 3, ++}; ++ + static const struct of_device_id onboard_hub_match[] = { + { .compatible = "usb424,2514", .data = µchip_usb424_data, }, + { .compatible = "usb424,2517", .data = µchip_usb424_data, }, + { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, + { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, ++ { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, }, + { .compatible = "usbbda,411", .data = &realtek_rts5411_data, }, + { .compatible = "usbbda,5411", .data = &realtek_rts5411_data, }, + { .compatible = "usbbda,414", .data = &realtek_rts5411_data, }, +-- +2.40.1 + diff --git a/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl852g-hu.patch b/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl852g-hu.patch new file mode 100644 index 00000000000..e20aeb88d09 --- /dev/null +++ b/queue-6.1/usb-misc-onboard_usb_hub-add-genesys-logic-gl852g-hu.patch @@ -0,0 +1,64 @@ +From e9a249382212ab43ab4a222d2403066b8c25b77b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Jan 2023 04:44:12 +0000 +Subject: usb: misc: onboard_usb_hub: add Genesys Logic GL852G hub support + +From: Anand Moon + +[ Upstream commit db7cab26c3d1382ec85d8cadf642f57250edea58 ] + +Genesys Logic GL852G is a 4-port USB 2.0 STT hub that has a reset pin to +toggle and a 5.0V core supply exported though an integrated LDO is +available for powering it. + +Add the support for this hub, for controlling the reset pin and the core +power supply. + +Signed-off-by: Anand Moon +Acked-by: Matthias Kaehlcke +Link: https://lore.kernel.org/r/20230118044418.875-5-linux.amoon@gmail.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: e59e38158c61 ("usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub") +Signed-off-by: Sasha Levin +--- + drivers/usb/misc/onboard_usb_hub.c | 1 + + drivers/usb/misc/onboard_usb_hub.h | 5 +++++ + 2 files changed, 6 insertions(+) + +diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c +index 87df27425ec5f..7a1030ddf9956 100644 +--- a/drivers/usb/misc/onboard_usb_hub.c ++++ b/drivers/usb/misc/onboard_usb_hub.c +@@ -407,6 +407,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev) + + static const struct usb_device_id onboard_hub_id_table[] = { + { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ ++ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ +diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h +index a97b0594773fa..0c2ab5755a7ea 100644 +--- a/drivers/usb/misc/onboard_usb_hub.h ++++ b/drivers/usb/misc/onboard_usb_hub.h +@@ -26,12 +26,17 @@ static const struct onboard_hub_pdata genesys_gl850g_data = { + .reset_us = 3, + }; + ++static const struct onboard_hub_pdata genesys_gl852g_data = { ++ .reset_us = 50, ++}; ++ + static const struct of_device_id onboard_hub_match[] = { + { .compatible = "usb424,2514", .data = µchip_usb424_data, }, + { .compatible = "usb424,2517", .data = µchip_usb424_data, }, + { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, + { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, + { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, }, ++ { .compatible = "usb5e3,610", .data = &genesys_gl852g_data, }, + { .compatible = "usbbda,411", .data = &realtek_rts5411_data, }, + { .compatible = "usbbda,5411", .data = &realtek_rts5411_data, }, + { .compatible = "usbbda,414", .data = &realtek_rts5411_data, }, +-- +2.40.1 + diff --git a/queue-6.1/wifi-cfg80211-avoid-leaking-stack-data-into-trace.patch b/queue-6.1/wifi-cfg80211-avoid-leaking-stack-data-into-trace.patch new file mode 100644 index 00000000000..a53f4d47a0a --- /dev/null +++ b/queue-6.1/wifi-cfg80211-avoid-leaking-stack-data-into-trace.patch @@ -0,0 +1,38 @@ +From b56d1b5567b3b2edc0895e6c297a61c65e161723 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Sep 2023 17:18:56 +0200 +Subject: wifi: cfg80211: avoid leaking stack data into trace + +From: Benjamin Berg + +[ Upstream commit 334bf33eec5701a1e4e967bcb7cc8611a998334b ] + +If the structure is not initialized then boolean types might be copied +into the tracing data without being initialised. This causes data from +the stack to leak into the trace and also triggers a UBSAN failure which +can easily be avoided here. + +Signed-off-by: Benjamin Berg +Link: https://lore.kernel.org/r/20230925171855.a9271ef53b05.I8180bae663984c91a3e036b87f36a640ba409817@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/nl80211.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c +index 1d993a490ac4b..b19b5acfaf3a9 100644 +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -8289,7 +8289,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; +- struct mesh_config cfg; ++ struct mesh_config cfg = {}; + u32 mask; + int err; + +-- +2.40.1 + diff --git a/queue-6.1/wifi-cfg80211-fix-6ghz-scan-configuration.patch b/queue-6.1/wifi-cfg80211-fix-6ghz-scan-configuration.patch new file mode 100644 index 00000000000..87a7dc0ceef --- /dev/null +++ b/queue-6.1/wifi-cfg80211-fix-6ghz-scan-configuration.patch @@ -0,0 +1,40 @@ +From f8112351b0505cd35b1a0a1fed1ad25dbc234ca1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Sep 2023 14:10:54 +0300 +Subject: wifi: cfg80211: Fix 6GHz scan configuration + +From: Ilan Peer + +[ Upstream commit 0914468adf92296c4cba8a2134e06e3dea150f2e ] + +When the scan request includes a non broadcast BSSID, when adding the +scan parameters for 6GHz collocated scanning, do not include entries +that do not match the given BSSID. + +Signed-off-by: Ilan Peer +Signed-off-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230918140607.6d31d2a96baf.I6c4e3e3075d1d1878ee41f45190fdc6b86f18708@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/scan.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/wireless/scan.c b/net/wireless/scan.c +index e5c1510c098fd..b7e1631b3d80d 100644 +--- a/net/wireless/scan.c ++++ b/net/wireless/scan.c +@@ -876,6 +876,10 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) + !cfg80211_find_ssid_match(ap, request)) + continue; + ++ if (!is_broadcast_ether_addr(request->bssid) && ++ !ether_addr_equal(request->bssid, ap->bssid)) ++ continue; ++ + if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid) + continue; + +-- +2.40.1 + diff --git a/queue-6.1/wifi-iwlwifi-ensure-ack-flag-is-properly-cleared.patch b/queue-6.1/wifi-iwlwifi-ensure-ack-flag-is-properly-cleared.patch new file mode 100644 index 00000000000..9ab48b14e67 --- /dev/null +++ b/queue-6.1/wifi-iwlwifi-ensure-ack-flag-is-properly-cleared.patch @@ -0,0 +1,46 @@ +From 9865b6f9bb8d98305584ef8b63fb94b436b84e73 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Aug 2023 13:56:05 -0700 +Subject: wifi: iwlwifi: Ensure ack flag is properly cleared. + +From: Ben Greear + +[ Upstream commit e8fbe99e87877f0412655f40d7c45bf8471470ac ] + +Debugging indicates that nothing else is clearing the info->flags, +so some frames were flagged as ACKed when they should not be. +Explicitly clear the ack flag to ensure this does not happen. + +Signed-off-by: Ben Greear +Acked-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230808205605.4105670-1-greearb@candelatech.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +index 542cfcad6e0e6..2d01f6226b7c6 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +@@ -1585,6 +1585,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, + iwl_trans_free_tx_cmd(mvm->trans, info->driver_data[1]); + + memset(&info->status, 0, sizeof(info->status)); ++ info->flags &= ~(IEEE80211_TX_STAT_ACK | IEEE80211_TX_STAT_TX_FILTERED); + + /* inform mac80211 about what happened with the frame */ + switch (status & TX_STATUS_MSK) { +@@ -1936,6 +1937,8 @@ static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid, + */ + if (!is_flush) + info->flags |= IEEE80211_TX_STAT_ACK; ++ else ++ info->flags &= ~IEEE80211_TX_STAT_ACK; + } + + /* +-- +2.40.1 + diff --git a/queue-6.1/wifi-mac80211-allow-transmitting-eapol-frames-with-t.patch b/queue-6.1/wifi-mac80211-allow-transmitting-eapol-frames-with-t.patch new file mode 100644 index 00000000000..4e30cda2c8b --- /dev/null +++ b/queue-6.1/wifi-mac80211-allow-transmitting-eapol-frames-with-t.patch @@ -0,0 +1,110 @@ +From 42fd080e0a875b6f3773309b72f9e281e5aa003a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Aug 2023 02:47:51 -0400 +Subject: wifi: mac80211: allow transmitting EAPOL frames with tainted key + +From: Wen Gong + +[ Upstream commit 61304336c67358d49a989e5e0060d8c99bad6ca8 ] + +Lower layer device driver stop/wake TX by calling ieee80211_stop_queue()/ +ieee80211_wake_queue() while hw scan. Sometimes hw scan and PTK rekey are +running in parallel, when M4 sent from wpa_supplicant arrive while the TX +queue is stopped, then the M4 will pending send, and then new key install +from wpa_supplicant. After TX queue wake up by lower layer device driver, +the M4 will be dropped by below call stack. + +When key install started, the current key flag is set KEY_FLAG_TAINTED in +ieee80211_pairwise_rekey(), and then mac80211 wait key install complete by +lower layer device driver. Meanwhile ieee80211_tx_h_select_key() will return +TX_DROP for the M4 in step 12 below, and then ieee80211_free_txskb() called +by ieee80211_tx_dequeue(), so the M4 will not send and free, then the rekey +process failed becaue AP not receive M4. Please see details in steps below. + +There are a interval between KEY_FLAG_TAINTED set for current key flag and +install key complete by lower layer device driver, the KEY_FLAG_TAINTED is +set in this interval, all packet including M4 will be dropped in this +interval, the interval is step 8~13 as below. + +issue steps: + TX thread install key thread +1. stop_queue -idle- +2. sending M4 -idle- +3. M4 pending -idle- +4. -idle- starting install key from wpa_supplicant +5. -idle- =>ieee80211_key_replace() +6. -idle- =>ieee80211_pairwise_rekey() and set + currently key->flags |= KEY_FLAG_TAINTED +7. -idle- =>ieee80211_key_enable_hw_accel() +8. -idle- =>drv_set_key() and waiting key install + complete from lower layer device driver +9. wake_queue -waiting state- +10. re-sending M4 -waiting state- +11. =>ieee80211_tx_h_select_key() -waiting state- +12. drop M4 by KEY_FLAG_TAINTED -waiting state- +13. -idle- install key complete with success/fail + success: clear flag KEY_FLAG_TAINTED + fail: start disconnect + +Hence add check in step 11 above to allow the EAPOL send out in the +interval. If lower layer device driver use the old key/cipher to encrypt +the M4, then AP received/decrypt M4 correctly, after M4 send out, lower +layer device driver install the new key/cipher to hardware and return +success. + +If lower layer device driver use new key/cipher to send the M4, then AP +will/should drop the M4, then it is same result with this issue, AP will/ +should kick out station as well as this issue. + +issue log: +kworker/u16:4-5238 [000] 6456.108926: stop_queue: phy1 queue:0, reason:0 +wpa_supplicant-961 [003] 6456.119737: rdev_tx_control_port: wiphy_name=phy1 name=wlan0 ifindex=6 dest=ARRAY[9e, 05, 31, 20, 9b, d0] proto=36488 unencrypted=0 +wpa_supplicant-961 [003] 6456.119839: rdev_return_int_cookie: phy1, returned 0, cookie: 504 +wpa_supplicant-961 [003] 6456.120287: rdev_add_key: phy1, netdev:wlan0(6), key_index: 0, mode: 0, pairwise: true, mac addr: 9e:05:31:20:9b:d0 +wpa_supplicant-961 [003] 6456.120453: drv_set_key: phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 cipher:0xfac04, flags=0x9, keyidx=0, hw_key_idx=0 +kworker/u16:9-3829 [001] 6456.168240: wake_queue: phy1 queue:0, reason:0 +kworker/u16:9-3829 [001] 6456.168255: drv_wake_tx_queue: phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 ac:0 tid:7 +kworker/u16:9-3829 [001] 6456.168305: cfg80211_control_port_tx_status: wdev(1), cookie: 504, ack: false +wpa_supplicant-961 [003] 6459.167982: drv_return_int: phy1 - -110 + +issue call stack: +nl80211_frame_tx_status+0x230/0x340 [cfg80211] +cfg80211_control_port_tx_status+0x1c/0x28 [cfg80211] +ieee80211_report_used_skb+0x374/0x3e8 [mac80211] +ieee80211_free_txskb+0x24/0x40 [mac80211] +ieee80211_tx_dequeue+0x644/0x954 [mac80211] +ath10k_mac_tx_push_txq+0xac/0x238 [ath10k_core] +ath10k_mac_op_wake_tx_queue+0xac/0xe0 [ath10k_core] +drv_wake_tx_queue+0x80/0x168 [mac80211] +__ieee80211_wake_txqs+0xe8/0x1c8 [mac80211] +_ieee80211_wake_txqs+0xb4/0x120 [mac80211] +ieee80211_wake_txqs+0x48/0x80 [mac80211] +tasklet_action_common+0xa8/0x254 +tasklet_action+0x2c/0x38 +__do_softirq+0xdc/0x384 + +Signed-off-by: Wen Gong +Link: https://lore.kernel.org/r/20230801064751.25803-1-quic_wgong@quicinc.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/tx.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c +index 2f9e1abdf375d..2db103a56a28f 100644 +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -680,7 +680,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) + } + + if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED && +- !ieee80211_is_deauth(hdr->frame_control))) ++ !ieee80211_is_deauth(hdr->frame_control)) && ++ tx->skb->protocol != tx->sdata->control_port_protocol) + return TX_DROP; + + if (!skip_hw && tx->key && +-- +2.40.1 + diff --git a/queue-6.1/wifi-mac80211-work-around-cisco-ap-9115-vht-mpdu-len.patch b/queue-6.1/wifi-mac80211-work-around-cisco-ap-9115-vht-mpdu-len.patch new file mode 100644 index 00000000000..fdebdb57462 --- /dev/null +++ b/queue-6.1/wifi-mac80211-work-around-cisco-ap-9115-vht-mpdu-len.patch @@ -0,0 +1,177 @@ +From ec2ef90b8890dba12da3cda729f00969c61df036 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Sep 2023 14:10:55 +0300 +Subject: wifi: mac80211: work around Cisco AP 9115 VHT MPDU length + +From: Johannes Berg + +[ Upstream commit 084cf2aeca97566db4fa15d55653c1cba2db83ed ] + +Cisco AP module 9115 with FW 17.3 has a bug and sends a too +large maximum MPDU length in the association response +(indicating 12k) that it cannot actually process. + +Work around that by taking the minimum between what's in the +association response and the BSS elements (from beacon or +probe response). + +Signed-off-by: Johannes Berg +Signed-off-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230918140607.d1966a9a532e.I090225babb7cd4d1081ee9acd40e7de7e41c15ae@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/cfg.c | 3 ++- + net/mac80211/ibss.c | 2 +- + net/mac80211/ieee80211_i.h | 1 + + net/mac80211/mesh_plink.c | 2 +- + net/mac80211/mlme.c | 27 +++++++++++++++++++++++++-- + net/mac80211/vht.c | 16 ++++++++++++++-- + 6 files changed, 44 insertions(+), 7 deletions(-) + +diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c +index 0167413d56972..ee9f455bb2d18 100644 +--- a/net/mac80211/cfg.c ++++ b/net/mac80211/cfg.c +@@ -1748,7 +1748,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + /* VHT can override some HT caps such as the A-MSDU max length */ + if (params->vht_capa) + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, +- params->vht_capa, link_sta); ++ params->vht_capa, NULL, ++ link_sta); + + if (params->he_capa) + ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, +diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c +index 9dffc30795887..79d2c55052897 100644 +--- a/net/mac80211/ibss.c ++++ b/net/mac80211/ibss.c +@@ -1068,7 +1068,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, + &chandef); + memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, +- &cap_ie, ++ &cap_ie, NULL, + &sta->deflink); + if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap))) + rates_updated |= true; +diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h +index 27479bbb093ac..99a976ea17498 100644 +--- a/net/mac80211/ieee80211_i.h ++++ b/net/mac80211/ieee80211_i.h +@@ -2062,6 +2062,7 @@ void + ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, ++ const struct ieee80211_vht_cap *vht_cap_ie2, + struct link_sta_info *link_sta); + enum ieee80211_sta_rx_bandwidth + ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta); +diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c +index ddfe5102b9a43..bd0b7c189adfa 100644 +--- a/net/mac80211/mesh_plink.c ++++ b/net/mac80211/mesh_plink.c +@@ -443,7 +443,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, + changed |= IEEE80211_RC_BW_CHANGED; + + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, +- elems->vht_cap_elem, ++ elems->vht_cap_elem, NULL, + &sta->deflink); + + ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, +diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c +index dc9e7eb7dd857..c07645c999f9a 100644 +--- a/net/mac80211/mlme.c ++++ b/net/mac80211/mlme.c +@@ -4083,10 +4083,33 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, + elems->ht_cap_elem, + link_sta); + +- if (elems->vht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) ++ if (elems->vht_cap_elem && ++ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) { ++ const struct ieee80211_vht_cap *bss_vht_cap = NULL; ++ const struct cfg80211_bss_ies *ies; ++ ++ /* ++ * Cisco AP module 9115 with FW 17.3 has a bug and sends a ++ * too large maximum MPDU length in the association response ++ * (indicating 12k) that it cannot actually process ... ++ * Work around that. ++ */ ++ rcu_read_lock(); ++ ies = rcu_dereference(cbss->ies); ++ if (ies) { ++ const struct element *elem; ++ ++ elem = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY, ++ ies->data, ies->len); ++ if (elem && elem->datalen >= sizeof(*bss_vht_cap)) ++ bss_vht_cap = (const void *)elem->data; ++ } ++ + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + elems->vht_cap_elem, +- link_sta); ++ bss_vht_cap, link_sta); ++ rcu_read_unlock(); ++ } + + if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && + elems->he_cap) { +diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c +index 803de58814852..f7526be8a1c7e 100644 +--- a/net/mac80211/vht.c ++++ b/net/mac80211/vht.c +@@ -4,7 +4,7 @@ + * + * Portions of this file + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH +- * Copyright (C) 2018 - 2022 Intel Corporation ++ * Copyright (C) 2018 - 2023 Intel Corporation + */ + + #include +@@ -116,12 +116,14 @@ void + ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, ++ const struct ieee80211_vht_cap *vht_cap_ie2, + struct link_sta_info *link_sta) + { + struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; + struct ieee80211_sta_vht_cap own_cap; + u32 cap_info, i; + bool have_80mhz; ++ u32 mpdu_len; + + memset(vht_cap, 0, sizeof(*vht_cap)); + +@@ -317,11 +319,21 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + + link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + ++ /* ++ * Work around the Cisco 9115 FW 17.3 bug by taking the min of ++ * both reported MPDU lengths. ++ */ ++ mpdu_len = vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK; ++ if (vht_cap_ie2) ++ mpdu_len = min_t(u32, mpdu_len, ++ le32_get_bits(vht_cap_ie2->vht_cap_info, ++ IEEE80211_VHT_CAP_MAX_MPDU_MASK)); ++ + /* + * FIXME - should the amsdu len be per link? store per link + * and maintain a minimum? + */ +- switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) { ++ switch (mpdu_len) { + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; + break; +-- +2.40.1 + diff --git a/queue-6.1/wifi-mwifiex-sanity-check-tlv_len-and-tlv_bitmap_len.patch b/queue-6.1/wifi-mwifiex-sanity-check-tlv_len-and-tlv_bitmap_len.patch new file mode 100644 index 00000000000..b5d51cf702e --- /dev/null +++ b/queue-6.1/wifi-mwifiex-sanity-check-tlv_len-and-tlv_bitmap_len.patch @@ -0,0 +1,62 @@ +From c7fa6306400f2018c68e1f5af3fed775b99522be Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Aug 2023 21:10:45 -0600 +Subject: wifi: mwifiex: Sanity check tlv_len and tlv_bitmap_len + +From: Gustavo A. R. Silva + +[ Upstream commit d5a93b7d2877aae4ba7590ad6cb65f8d33079489 ] + +Add sanity checks for both `tlv_len` and `tlv_bitmap_len` before +decoding data from `event_buf`. + +This prevents any malicious or buggy firmware from overflowing +`event_buf` through large values for `tlv_len` and `tlv_bitmap_len`. + +Suggested-by: Dan Williams +Signed-off-by: Gustavo A. R. Silva +Reviewed-by: Kees Cook +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/d4f8780527d551552ee96f17a0229e02e1c200d1.1692931954.git.gustavoars@kernel.org +Signed-off-by: Sasha Levin +--- + .../net/wireless/marvell/mwifiex/11n_rxreorder.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +index 7351acac6932d..54ab8b54369ba 100644 +--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c ++++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +@@ -921,6 +921,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + while (tlv_buf_left >= sizeof(*tlv_rxba)) { + tlv_type = le16_to_cpu(tlv_rxba->header.type); + tlv_len = le16_to_cpu(tlv_rxba->header.len); ++ if (size_add(sizeof(tlv_rxba->header), tlv_len) > tlv_buf_left) { ++ mwifiex_dbg(priv->adapter, WARN, ++ "TLV size (%zu) overflows event_buf buf_left=%d\n", ++ size_add(sizeof(tlv_rxba->header), tlv_len), ++ tlv_buf_left); ++ return; ++ } ++ + if (tlv_type != TLV_TYPE_RXBA_SYNC) { + mwifiex_dbg(priv->adapter, ERROR, + "Wrong TLV id=0x%x\n", tlv_type); +@@ -929,6 +937,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + + tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num); + tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len); ++ if (size_add(sizeof(*tlv_rxba), tlv_bitmap_len) > tlv_buf_left) { ++ mwifiex_dbg(priv->adapter, WARN, ++ "TLV size (%zu) overflows event_buf buf_left=%d\n", ++ size_add(sizeof(*tlv_rxba), tlv_bitmap_len), ++ tlv_buf_left); ++ return; ++ } ++ + mwifiex_dbg(priv->adapter, INFO, + "%pM tid=%d seq_num=%d bitmap_len=%d\n", + tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num, +-- +2.40.1 + diff --git a/queue-6.1/xfrm6-fix-inet6_dev-refcount-underflow-problem.patch b/queue-6.1/xfrm6-fix-inet6_dev-refcount-underflow-problem.patch new file mode 100644 index 00000000000..c0b5edf66cf --- /dev/null +++ b/queue-6.1/xfrm6-fix-inet6_dev-refcount-underflow-problem.patch @@ -0,0 +1,59 @@ +From e18f0e6509ebb2ed91524ab5b591218445998b92 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Sep 2023 19:20:41 +0800 +Subject: xfrm6: fix inet6_dev refcount underflow problem + +From: Zhang Changzhong + +[ Upstream commit cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b ] + +There are race conditions that may lead to inet6_dev refcount underflow +in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev(). + +One of the refcount underflow bugs is shown below: + (cpu 1) | (cpu 2) +xfrm6_dst_destroy() | + ... | + in6_dev_put() | + | rt6_uncached_list_flush_dev() + ... | ... + | in6_dev_put() + rt6_uncached_list_del() | ... + ... | + +xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(), +so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put() +again for the same inet6_dev. + +Fix it by moving in6_dev_put() after rt6_uncached_list_del() in +xfrm6_dst_destroy(). + +Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts") +Signed-off-by: Zhang Changzhong +Reviewed-by: Xin Long +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/ipv6/xfrm6_policy.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index eecc5e59da17c..50c278f1c1063 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) + { + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + +- if (likely(xdst->u.rt6.rt6i_idev)) +- in6_dev_put(xdst->u.rt6.rt6i_idev); + dst_destroy_metrics_generic(dst); + rt6_uncached_list_del(&xdst->u.rt6); ++ if (likely(xdst->u.rt6.rt6i_idev)) ++ in6_dev_put(xdst->u.rt6.rt6i_idev); + xfrm_dst_destroy(xdst); + } + +-- +2.40.1 +