--- /dev/null
+From c4474412884b2a90a28fca2cd771a4bbb7031e40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Sep 2023 07:07:38 +0300
+Subject: ARM: dts: ti: omap: Fix noisy serial with overrun-throttle-ms for
+ mapphone
+
+From: Tony Lindgren <tony@atomide.com>
+
+[ Upstream commit 5ad37b5e30433afa7a5513e3eb61f69fa0976785 ]
+
+On mapphone devices we may get lots of noise on the micro-USB port in debug
+uart mode until the phy-cpcap-usb driver probes. Let's limit the noise by
+using overrun-throttle-ms.
+
+Note that there is also a related separate issue where the charger cable
+connected may cause random sysrq requests until phy-cpcap-usb probes that
+still remains.
+
+Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
+Cc: Carl Philipp Klemm <philipp@uvos.xyz>
+Cc: Merlijn Wajer <merlijn@wizzup.org>
+Cc: Pavel Machek <pavel@ucw.cz>
+Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/motorola-mapphone-common.dtsi | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/arm/boot/dts/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
+index d69f0f4b4990d..d2d516d113baa 100644
+--- a/arch/arm/boot/dts/motorola-mapphone-common.dtsi
++++ b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
+@@ -640,6 +640,7 @@ &uart1 {
+ &uart3 {
+ interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+ &omap4_pmx_core 0x17c>;
++ overrun-throttle-ms = <500>;
+ };
+
+ &uart4 {
+--
+2.40.1
+
--- /dev/null
+From d2db047955304f59501d007f8c92d57d382f554e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 08:46:22 +0900
+Subject: ata: libata-core: Fix compilation warning in ata_dev_config_ncq()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit ed518d9ba980dc0d27c7d1dea1e627ba001d1977 ]
+
+The 24 bytes length allocated to the ncq_desc string in
+ata_dev_config_lba() for ata_dev_config_ncq() to use is too short,
+causing the following gcc compilation warnings when compiling with W=1:
+
+drivers/ata/libata-core.c: In function ‘ata_dev_configure’:
+drivers/ata/libata-core.c:2378:56: warning: ‘%d’ directive output may be truncated writing between 1 and 2 bytes into a region of size between 1 and 11 [-Wformat-truncation=]
+ 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth,
+ | ^~
+In function ‘ata_dev_config_ncq’,
+ inlined from ‘ata_dev_config_lba’ at drivers/ata/libata-core.c:2649:8,
+ inlined from ‘ata_dev_configure’ at drivers/ata/libata-core.c:2952:9:
+drivers/ata/libata-core.c:2378:41: note: directive argument in the range [1, 32]
+ 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth,
+ | ^~~~~~~~~~~~~~~~~~~~~
+drivers/ata/libata-core.c:2378:17: note: ‘snprintf’ output between 16 and 31 bytes into a destination of size 24
+ 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth,
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ 2379 | ddepth, aa_desc);
+ | ~~~~~~~~~~~~~~~~
+
+Avoid these warnings and the potential truncation by changing the size
+of the ncq_desc string to 32 characters.
+
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libata-core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index fbc231a3f7951..fa2fc1953fc26 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -2456,7 +2456,7 @@ static int ata_dev_config_lba(struct ata_device *dev)
+ {
+ const u16 *id = dev->id;
+ const char *lba_desc;
+- char ncq_desc[24];
++ char ncq_desc[32];
+ int ret;
+
+ dev->flags |= ATA_DFLAG_LBA;
+--
+2.40.1
+
--- /dev/null
+From d61d50c0572fca4d03e37735c22eea57d8dda8ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:08:40 +0900
+Subject: ata: libata-eh: Fix compilation warning in ata_eh_link_report()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit 49728bdc702391902a473b9393f1620eea32acb0 ]
+
+The 6 bytes length of the tries_buf string in ata_eh_link_report() is
+too short and results in a gcc compilation warning with W-!:
+
+drivers/ata/libata-eh.c: In function ‘ata_eh_link_report’:
+drivers/ata/libata-eh.c:2371:59: warning: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 4 [-Wformat-truncation=]
+ 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d",
+ | ^~
+drivers/ata/libata-eh.c:2371:56: note: directive argument in the range [-2147483648, 4]
+ 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d",
+ | ^~~~~~
+drivers/ata/libata-eh.c:2371:17: note: ‘snprintf’ output between 4 and 14 bytes into a destination of size 6
+ 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d",
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ 2372 | ap->eh_tries);
+ | ~~~~~~~~~~~~~
+
+Avoid this warning by increasing the string size to 16B.
+
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libata-eh.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
+index 2a04dd36a4948..1eaaf01418ea7 100644
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -2247,7 +2247,7 @@ static void ata_eh_link_report(struct ata_link *link)
+ struct ata_eh_context *ehc = &link->eh_context;
+ struct ata_queued_cmd *qc;
+ const char *frozen, *desc;
+- char tries_buf[6] = "";
++ char tries_buf[16] = "";
+ int tag, nr_failed = 0;
+
+ if (ehc->i.flags & ATA_EHI_QUIET)
+--
+2.40.1
+
--- /dev/null
+From 2194a9643e933a16a92f83d3859f3916f95a5e42 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Mar 2023 21:55:30 +0100
+Subject: atomics: Provide atomic_add_negative() variants
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit e5ab9eff46b04c5a04778e40d7092fed3fda52ca ]
+
+atomic_add_negative() does not provide the relaxed/acquire/release
+variants.
+
+Provide them in preparation for a new scalable reference count algorithm.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Link: https://lore.kernel.org/r/20230323102800.101763813@linutronix.de
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/atomic/atomic-arch-fallback.h | 208 +++++++++++++++++++-
+ include/linux/atomic/atomic-instrumented.h | 68 ++++++-
+ include/linux/atomic/atomic-long.h | 38 +++-
+ scripts/atomic/atomics.tbl | 2 +-
+ scripts/atomic/fallbacks/add_negative | 11 +-
+ 5 files changed, 309 insertions(+), 18 deletions(-)
+
+diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
+index 77bc5522e61c6..4226379a232d5 100644
+--- a/include/linux/atomic/atomic-arch-fallback.h
++++ b/include/linux/atomic/atomic-arch-fallback.h
+@@ -1208,15 +1208,21 @@ arch_atomic_inc_and_test(atomic_t *v)
+ #define arch_atomic_inc_and_test arch_atomic_inc_and_test
+ #endif
+
++#ifndef arch_atomic_add_negative_relaxed
++#ifdef arch_atomic_add_negative
++#define arch_atomic_add_negative_acquire arch_atomic_add_negative
++#define arch_atomic_add_negative_release arch_atomic_add_negative
++#define arch_atomic_add_negative_relaxed arch_atomic_add_negative
++#endif /* arch_atomic_add_negative */
++
+ #ifndef arch_atomic_add_negative
+ /**
+- * arch_atomic_add_negative - add and test if negative
++ * arch_atomic_add_negative - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+- * Atomically adds @i to @v and returns true
+- * if the result is negative, or false when
+- * result is greater than or equal to zero.
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
+ */
+ static __always_inline bool
+ arch_atomic_add_negative(int i, atomic_t *v)
+@@ -1226,6 +1232,95 @@ arch_atomic_add_negative(int i, atomic_t *v)
+ #define arch_atomic_add_negative arch_atomic_add_negative
+ #endif
+
++#ifndef arch_atomic_add_negative_acquire
++/**
++ * arch_atomic_add_negative_acquire - Add and test if negative
++ * @i: integer value to add
++ * @v: pointer of type atomic_t
++ *
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
++ */
++static __always_inline bool
++arch_atomic_add_negative_acquire(int i, atomic_t *v)
++{
++ return arch_atomic_add_return_acquire(i, v) < 0;
++}
++#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire
++#endif
++
++#ifndef arch_atomic_add_negative_release
++/**
++ * arch_atomic_add_negative_release - Add and test if negative
++ * @i: integer value to add
++ * @v: pointer of type atomic_t
++ *
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
++ */
++static __always_inline bool
++arch_atomic_add_negative_release(int i, atomic_t *v)
++{
++ return arch_atomic_add_return_release(i, v) < 0;
++}
++#define arch_atomic_add_negative_release arch_atomic_add_negative_release
++#endif
++
++#ifndef arch_atomic_add_negative_relaxed
++/**
++ * arch_atomic_add_negative_relaxed - Add and test if negative
++ * @i: integer value to add
++ * @v: pointer of type atomic_t
++ *
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
++ */
++static __always_inline bool
++arch_atomic_add_negative_relaxed(int i, atomic_t *v)
++{
++ return arch_atomic_add_return_relaxed(i, v) < 0;
++}
++#define arch_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed
++#endif
++
++#else /* arch_atomic_add_negative_relaxed */
++
++#ifndef arch_atomic_add_negative_acquire
++static __always_inline bool
++arch_atomic_add_negative_acquire(int i, atomic_t *v)
++{
++ bool ret = arch_atomic_add_negative_relaxed(i, v);
++ __atomic_acquire_fence();
++ return ret;
++}
++#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire
++#endif
++
++#ifndef arch_atomic_add_negative_release
++static __always_inline bool
++arch_atomic_add_negative_release(int i, atomic_t *v)
++{
++ __atomic_release_fence();
++ return arch_atomic_add_negative_relaxed(i, v);
++}
++#define arch_atomic_add_negative_release arch_atomic_add_negative_release
++#endif
++
++#ifndef arch_atomic_add_negative
++static __always_inline bool
++arch_atomic_add_negative(int i, atomic_t *v)
++{
++ bool ret;
++ __atomic_pre_full_fence();
++ ret = arch_atomic_add_negative_relaxed(i, v);
++ __atomic_post_full_fence();
++ return ret;
++}
++#define arch_atomic_add_negative arch_atomic_add_negative
++#endif
++
++#endif /* arch_atomic_add_negative_relaxed */
++
+ #ifndef arch_atomic_fetch_add_unless
+ /**
+ * arch_atomic_fetch_add_unless - add unless the number is already a given value
+@@ -2329,15 +2424,21 @@ arch_atomic64_inc_and_test(atomic64_t *v)
+ #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
+ #endif
+
++#ifndef arch_atomic64_add_negative_relaxed
++#ifdef arch_atomic64_add_negative
++#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative
++#define arch_atomic64_add_negative_release arch_atomic64_add_negative
++#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative
++#endif /* arch_atomic64_add_negative */
++
+ #ifndef arch_atomic64_add_negative
+ /**
+- * arch_atomic64_add_negative - add and test if negative
++ * arch_atomic64_add_negative - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+- * Atomically adds @i to @v and returns true
+- * if the result is negative, or false when
+- * result is greater than or equal to zero.
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
+ */
+ static __always_inline bool
+ arch_atomic64_add_negative(s64 i, atomic64_t *v)
+@@ -2347,6 +2448,95 @@ arch_atomic64_add_negative(s64 i, atomic64_t *v)
+ #define arch_atomic64_add_negative arch_atomic64_add_negative
+ #endif
+
++#ifndef arch_atomic64_add_negative_acquire
++/**
++ * arch_atomic64_add_negative_acquire - Add and test if negative
++ * @i: integer value to add
++ * @v: pointer of type atomic64_t
++ *
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
++ */
++static __always_inline bool
++arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
++{
++ return arch_atomic64_add_return_acquire(i, v) < 0;
++}
++#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
++#endif
++
++#ifndef arch_atomic64_add_negative_release
++/**
++ * arch_atomic64_add_negative_release - Add and test if negative
++ * @i: integer value to add
++ * @v: pointer of type atomic64_t
++ *
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
++ */
++static __always_inline bool
++arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
++{
++ return arch_atomic64_add_return_release(i, v) < 0;
++}
++#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release
++#endif
++
++#ifndef arch_atomic64_add_negative_relaxed
++/**
++ * arch_atomic64_add_negative_relaxed - Add and test if negative
++ * @i: integer value to add
++ * @v: pointer of type atomic64_t
++ *
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
++ */
++static __always_inline bool
++arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
++{
++ return arch_atomic64_add_return_relaxed(i, v) < 0;
++}
++#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed
++#endif
++
++#else /* arch_atomic64_add_negative_relaxed */
++
++#ifndef arch_atomic64_add_negative_acquire
++static __always_inline bool
++arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
++{
++ bool ret = arch_atomic64_add_negative_relaxed(i, v);
++ __atomic_acquire_fence();
++ return ret;
++}
++#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
++#endif
++
++#ifndef arch_atomic64_add_negative_release
++static __always_inline bool
++arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
++{
++ __atomic_release_fence();
++ return arch_atomic64_add_negative_relaxed(i, v);
++}
++#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release
++#endif
++
++#ifndef arch_atomic64_add_negative
++static __always_inline bool
++arch_atomic64_add_negative(s64 i, atomic64_t *v)
++{
++ bool ret;
++ __atomic_pre_full_fence();
++ ret = arch_atomic64_add_negative_relaxed(i, v);
++ __atomic_post_full_fence();
++ return ret;
++}
++#define arch_atomic64_add_negative arch_atomic64_add_negative
++#endif
++
++#endif /* arch_atomic64_add_negative_relaxed */
++
+ #ifndef arch_atomic64_fetch_add_unless
+ /**
+ * arch_atomic64_fetch_add_unless - add unless the number is already a given value
+@@ -2456,4 +2646,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
+ #endif
+
+ #endif /* _LINUX_ATOMIC_FALLBACK_H */
+-// b5e87bdd5ede61470c29f7a7e4de781af3770f09
++// 00071fffa021cec66f6290d706d69c91df87bade
+diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
+index 7a139ec030b0c..0496816738ca9 100644
+--- a/include/linux/atomic/atomic-instrumented.h
++++ b/include/linux/atomic/atomic-instrumented.h
+@@ -592,6 +592,28 @@ atomic_add_negative(int i, atomic_t *v)
+ return arch_atomic_add_negative(i, v);
+ }
+
++static __always_inline bool
++atomic_add_negative_acquire(int i, atomic_t *v)
++{
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic_add_negative_acquire(i, v);
++}
++
++static __always_inline bool
++atomic_add_negative_release(int i, atomic_t *v)
++{
++ kcsan_release();
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic_add_negative_release(i, v);
++}
++
++static __always_inline bool
++atomic_add_negative_relaxed(int i, atomic_t *v)
++{
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic_add_negative_relaxed(i, v);
++}
++
+ static __always_inline int
+ atomic_fetch_add_unless(atomic_t *v, int a, int u)
+ {
+@@ -1211,6 +1233,28 @@ atomic64_add_negative(s64 i, atomic64_t *v)
+ return arch_atomic64_add_negative(i, v);
+ }
+
++static __always_inline bool
++atomic64_add_negative_acquire(s64 i, atomic64_t *v)
++{
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic64_add_negative_acquire(i, v);
++}
++
++static __always_inline bool
++atomic64_add_negative_release(s64 i, atomic64_t *v)
++{
++ kcsan_release();
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic64_add_negative_release(i, v);
++}
++
++static __always_inline bool
++atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
++{
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic64_add_negative_relaxed(i, v);
++}
++
+ static __always_inline s64
+ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+ {
+@@ -1830,6 +1874,28 @@ atomic_long_add_negative(long i, atomic_long_t *v)
+ return arch_atomic_long_add_negative(i, v);
+ }
+
++static __always_inline bool
++atomic_long_add_negative_acquire(long i, atomic_long_t *v)
++{
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic_long_add_negative_acquire(i, v);
++}
++
++static __always_inline bool
++atomic_long_add_negative_release(long i, atomic_long_t *v)
++{
++ kcsan_release();
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic_long_add_negative_release(i, v);
++}
++
++static __always_inline bool
++atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
++{
++ instrument_atomic_read_write(v, sizeof(*v));
++ return arch_atomic_long_add_negative_relaxed(i, v);
++}
++
+ static __always_inline long
+ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+ {
+@@ -2083,4 +2149,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
+ })
+
+ #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
+-// 764f741eb77a7ad565dc8d99ce2837d5542e8aee
++// 1b485de9cbaa4900de59e14ee2084357eaeb1c3a
+diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
+index 800b8c35992d1..2fc51ba66bebd 100644
+--- a/include/linux/atomic/atomic-long.h
++++ b/include/linux/atomic/atomic-long.h
+@@ -479,6 +479,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v)
+ return arch_atomic64_add_negative(i, v);
+ }
+
++static __always_inline bool
++arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
++{
++ return arch_atomic64_add_negative_acquire(i, v);
++}
++
++static __always_inline bool
++arch_atomic_long_add_negative_release(long i, atomic_long_t *v)
++{
++ return arch_atomic64_add_negative_release(i, v);
++}
++
++static __always_inline bool
++arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
++{
++ return arch_atomic64_add_negative_relaxed(i, v);
++}
++
+ static __always_inline long
+ arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+ {
+@@ -973,6 +991,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v)
+ return arch_atomic_add_negative(i, v);
+ }
+
++static __always_inline bool
++arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
++{
++ return arch_atomic_add_negative_acquire(i, v);
++}
++
++static __always_inline bool
++arch_atomic_long_add_negative_release(long i, atomic_long_t *v)
++{
++ return arch_atomic_add_negative_release(i, v);
++}
++
++static __always_inline bool
++arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
++{
++ return arch_atomic_add_negative_relaxed(i, v);
++}
++
+ static __always_inline long
+ arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+ {
+@@ -1011,4 +1047,4 @@ arch_atomic_long_dec_if_positive(atomic_long_t *v)
+
+ #endif /* CONFIG_64BIT */
+ #endif /* _LINUX_ATOMIC_LONG_H */
+-// e8f0e08ff072b74d180eabe2ad001282b38c2c88
++// a194c07d7d2f4b0e178d3c118c919775d5d65f50
+diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl
+index fbee2f6190d9e..85ca8d9b5c279 100644
+--- a/scripts/atomic/atomics.tbl
++++ b/scripts/atomic/atomics.tbl
+@@ -33,7 +33,7 @@ try_cmpxchg B v p:old i:new
+ sub_and_test b i v
+ dec_and_test b v
+ inc_and_test b v
+-add_negative b i v
++add_negative B i v
+ add_unless fb v i:a i:u
+ inc_not_zero b v
+ inc_unless_negative b v
+diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
+index 15caa2eb23712..e5980abf5904e 100755
+--- a/scripts/atomic/fallbacks/add_negative
++++ b/scripts/atomic/fallbacks/add_negative
+@@ -1,16 +1,15 @@
+ cat <<EOF
+ /**
+- * arch_${atomic}_add_negative - add and test if negative
++ * arch_${atomic}_add_negative${order} - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type ${atomic}_t
+ *
+- * Atomically adds @i to @v and returns true
+- * if the result is negative, or false when
+- * result is greater than or equal to zero.
++ * Atomically adds @i to @v and returns true if the result is negative,
++ * or false when the result is greater than or equal to zero.
+ */
+ static __always_inline bool
+-arch_${atomic}_add_negative(${int} i, ${atomic}_t *v)
++arch_${atomic}_add_negative${order}(${int} i, ${atomic}_t *v)
+ {
+- return arch_${atomic}_add_return(i, v) < 0;
++ return arch_${atomic}_add_return${order}(i, v) < 0;
+ }
+ EOF
+--
+2.40.1
+
--- /dev/null
+From 47ac5394259bfc9dd07646a58feea1be4e624eef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Mar 2023 21:55:31 +0100
+Subject: atomics: Provide rcuref - scalable reference counting
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit ee1ee6db07795d9637bc5e8993a8ddcf886541ef ]
+
+atomic_t based reference counting, including refcount_t, uses
+atomic_inc_not_zero() for acquiring a reference. atomic_inc_not_zero() is
+implemented with a atomic_try_cmpxchg() loop. High contention of the
+reference count leads to retry loops and scales badly. There is nothing to
+improve on this implementation as the semantics have to be preserved.
+
+Provide rcuref as a scalable alternative solution which is suitable for RCU
+managed objects. Similar to refcount_t it comes with overflow and underflow
+detection and mitigation.
+
+rcuref treats the underlying atomic_t as an unsigned integer and partitions
+this space into zones:
+
+ 0x00000000 - 0x7FFFFFFF valid zone (1 .. (INT_MAX + 1) references)
+ 0x80000000 - 0xBFFFFFFF saturation zone
+ 0xC0000000 - 0xFFFFFFFE dead zone
+ 0xFFFFFFFF no reference
+
+rcuref_get() unconditionally increments the reference count with
+atomic_add_negative_relaxed(). rcuref_put() unconditionally decrements the
+reference count with atomic_add_negative_release().
+
+This unconditional increment avoids the inc_not_zero() problem, but
+requires a more complex implementation on the put() side when the count
+drops from 0 to -1.
+
+When this transition is detected then it is attempted to mark the reference
+count dead, by setting it to the midpoint of the dead zone with a single
+atomic_cmpxchg_release() operation. This operation can fail due to a
+concurrent rcuref_get() elevating the reference count from -1 to 0 again.
+
+If the unconditional increment in rcuref_get() hits a reference count which
+is marked dead (or saturated) it will detect it after the fact and bring
+back the reference count to the midpoint of the respective zone. The zones
+provide enough tolerance which makes it practically impossible to escape
+from a zone.
+
+The racy implementation of rcuref_put() requires to protect rcuref_put()
+against a grace period ending in order to prevent a subtle use after
+free. As RCU is the only mechanism which allows to protect against that, it
+is not possible to fully replace the atomic_inc_not_zero() based
+implementation of refcount_t with this scheme.
+
+The final drop is slightly more expensive than the atomic_dec_return()
+counterpart, but that's not the case which this is optimized for. The
+optimization is on the high frequeunt get()/put() pairs and their
+scalability.
+
+The performance of an uncontended rcuref_get()/put() pair where the put()
+is not dropping the last reference is still on par with the plain atomic
+operations, while at the same time providing overflow and underflow
+detection and mitigation.
+
+The performance of rcuref compared to plain atomic_inc_not_zero() and
+atomic_dec_return() based reference counting under contention:
+
+ - Micro benchmark: All CPUs running a increment/decrement loop on an
+ elevated reference count, which means the 0 to -1 transition never
+ happens.
+
+ The performance gain depends on microarchitecture and the number of
+ CPUs and has been observed in the range of 1.3X to 4.7X
+
+ - Conversion of dst_entry::__refcnt to rcuref and testing with the
+ localhost memtier/memcached benchmark. That benchmark shows the
+ reference count contention prominently.
+
+ The performance gain depends on microarchitecture and the number of
+ CPUs and has been observed in the range of 1.1X to 2.6X over the
+ previous fix for the false sharing issue vs. struct
+ dst_entry::__refcnt.
+
+ When memtier is run over a real 1Gb network connection, there is a
+ small gain on top of the false sharing fix. The two changes combined
+ result in a 2%-5% total gain for that networked test.
+
+Reported-by: Wangyang Guo <wangyang.guo@intel.com>
+Reported-by: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230323102800.158429195@linutronix.de
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/rcuref.h | 155 +++++++++++++++++++++++
+ include/linux/types.h | 6 +
+ lib/Makefile | 2 +-
+ lib/rcuref.c | 281 +++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 443 insertions(+), 1 deletion(-)
+ create mode 100644 include/linux/rcuref.h
+ create mode 100644 lib/rcuref.c
+
+diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
+new file mode 100644
+index 0000000000000..2c8bfd0f1b6b3
+--- /dev/null
++++ b/include/linux/rcuref.h
+@@ -0,0 +1,155 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++#ifndef _LINUX_RCUREF_H
++#define _LINUX_RCUREF_H
++
++#include <linux/atomic.h>
++#include <linux/bug.h>
++#include <linux/limits.h>
++#include <linux/lockdep.h>
++#include <linux/preempt.h>
++#include <linux/rcupdate.h>
++
++#define RCUREF_ONEREF 0x00000000U
++#define RCUREF_MAXREF 0x7FFFFFFFU
++#define RCUREF_SATURATED 0xA0000000U
++#define RCUREF_RELEASED 0xC0000000U
++#define RCUREF_DEAD 0xE0000000U
++#define RCUREF_NOREF 0xFFFFFFFFU
++
++/**
++ * rcuref_init - Initialize a rcuref reference count with the given reference count
++ * @ref: Pointer to the reference count
++ * @cnt: The initial reference count typically '1'
++ */
++static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
++{
++ atomic_set(&ref->refcnt, cnt - 1);
++}
++
++/**
++ * rcuref_read - Read the number of held reference counts of a rcuref
++ * @ref: Pointer to the reference count
++ *
++ * Return: The number of held references (0 ... N)
++ */
++static inline unsigned int rcuref_read(rcuref_t *ref)
++{
++ unsigned int c = atomic_read(&ref->refcnt);
++
++ /* Return 0 if within the DEAD zone. */
++ return c >= RCUREF_RELEASED ? 0 : c + 1;
++}
++
++extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
++
++/**
++ * rcuref_get - Acquire one reference on a rcuref reference count
++ * @ref: Pointer to the reference count
++ *
++ * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF.
++ *
++ * Provides no memory ordering, it is assumed the caller has guaranteed the
++ * object memory to be stable (RCU, etc.). It does provide a control dependency
++ * and thereby orders future stores. See documentation in lib/rcuref.c
++ *
++ * Return:
++ * False if the attempt to acquire a reference failed. This happens
++ * when the last reference has been put already
++ *
++ * True if a reference was successfully acquired
++ */
++static inline __must_check bool rcuref_get(rcuref_t *ref)
++{
++ /*
++ * Unconditionally increase the reference count. The saturation and
++ * dead zones provide enough tolerance for this.
++ */
++ if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt)))
++ return true;
++
++ /* Handle the cases inside the saturation and dead zones */
++ return rcuref_get_slowpath(ref);
++}
++
++extern __must_check bool rcuref_put_slowpath(rcuref_t *ref);
++
++/*
++ * Internal helper. Do not invoke directly.
++ */
++static __always_inline __must_check bool __rcuref_put(rcuref_t *ref)
++{
++ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(),
++ "suspicious rcuref_put_rcusafe() usage");
++ /*
++ * Unconditionally decrease the reference count. The saturation and
++ * dead zones provide enough tolerance for this.
++ */
++ if (likely(!atomic_add_negative_release(-1, &ref->refcnt)))
++ return false;
++
++ /*
++ * Handle the last reference drop and cases inside the saturation
++ * and dead zones.
++ */
++ return rcuref_put_slowpath(ref);
++}
++
++/**
++ * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe
++ * @ref: Pointer to the reference count
++ *
++ * Provides release memory ordering, such that prior loads and stores are done
++ * before, and provides an acquire ordering on success such that free()
++ * must come after.
++ *
++ * Can be invoked from contexts, which guarantee that no grace period can
++ * happen which would free the object concurrently if the decrement drops
++ * the last reference and the slowpath races against a concurrent get() and
++ * put() pair. rcu_read_lock()'ed and atomic contexts qualify.
++ *
++ * Return:
++ * True if this was the last reference with no future references
++ * possible. This signals the caller that it can safely release the
++ * object which is protected by the reference counter.
++ *
++ * False if there are still active references or the put() raced
++ * with a concurrent get()/put() pair. Caller is not allowed to
++ * release the protected object.
++ */
++static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref)
++{
++ return __rcuref_put(ref);
++}
++
++/**
++ * rcuref_put -- Release one reference for a rcuref reference count
++ * @ref: Pointer to the reference count
++ *
++ * Can be invoked from any context.
++ *
++ * Provides release memory ordering, such that prior loads and stores are done
++ * before, and provides an acquire ordering on success such that free()
++ * must come after.
++ *
++ * Return:
++ *
++ * True if this was the last reference with no future references
++ * possible. This signals the caller that it can safely schedule the
++ * object, which is protected by the reference counter, for
++ * deconstruction.
++ *
++ * False if there are still active references or the put() raced
++ * with a concurrent get()/put() pair. Caller is not allowed to
++ * deconstruct the protected object.
++ */
++static inline __must_check bool rcuref_put(rcuref_t *ref)
++{
++ bool released;
++
++ preempt_disable();
++ released = __rcuref_put(ref);
++ preempt_enable();
++ return released;
++}
++
++#endif
+diff --git a/include/linux/types.h b/include/linux/types.h
+index ea8cf60a8a795..688fb943556a1 100644
+--- a/include/linux/types.h
++++ b/include/linux/types.h
+@@ -175,6 +175,12 @@ typedef struct {
+ } atomic64_t;
+ #endif
+
++typedef struct {
++ atomic_t refcnt;
++} rcuref_t;
++
++#define RCUREF_INIT(i) { .refcnt = ATOMIC_INIT(i - 1) }
++
+ struct list_head {
+ struct list_head *next, *prev;
+ };
+diff --git a/lib/Makefile b/lib/Makefile
+index 5ffe72ec99797..afd78c497ec76 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -47,7 +47,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
+ list_sort.o uuid.o iov_iter.o clz_ctz.o \
+ bsearch.o find_bit.o llist.o memweight.o kfifo.o \
+ percpu-refcount.o rhashtable.o base64.o \
+- once.o refcount.o usercopy.o errseq.o bucket_locks.o \
++ once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \
+ generic-radix-tree.o
+ obj-$(CONFIG_STRING_SELFTEST) += test_string.o
+ obj-y += string_helpers.o
+diff --git a/lib/rcuref.c b/lib/rcuref.c
+new file mode 100644
+index 0000000000000..5ec00a4a64d11
+--- /dev/null
++++ b/lib/rcuref.c
+@@ -0,0 +1,281 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++/*
++ * rcuref - A scalable reference count implementation for RCU managed objects
++ *
++ * rcuref is provided to replace open coded reference count implementations
++ * based on atomic_t. It protects explicitely RCU managed objects which can
++ * be visible even after the last reference has been dropped and the object
++ * is heading towards destruction.
++ *
++ * A common usage pattern is:
++ *
++ * get()
++ * rcu_read_lock();
++ * p = get_ptr();
++ * if (p && !atomic_inc_not_zero(&p->refcnt))
++ * p = NULL;
++ * rcu_read_unlock();
++ * return p;
++ *
++ * put()
++ * if (!atomic_dec_return(&->refcnt)) {
++ * remove_ptr(p);
++ * kfree_rcu((p, rcu);
++ * }
++ *
++ * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has
++ * O(N^2) behaviour under contention with N concurrent operations.
++ *
++ * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales
++ * better under contention.
++ *
++ * Why not refcount?
++ * =================
++ *
++ * In principle it should be possible to make refcount use the rcuref
++ * scheme, but the destruction race described below cannot be prevented
++ * unless the protected object is RCU managed.
++ *
++ * Theory of operation
++ * ===================
++ *
++ * rcuref uses an unsigned integer reference counter. As long as the
++ * counter value is greater than or equal to RCUREF_ONEREF and not larger
++ * than RCUREF_MAXREF the reference is alive:
++ *
++ * ONEREF MAXREF SATURATED RELEASED DEAD NOREF
++ * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF
++ * <---valid --------> <-------saturation zone-------> <-----dead zone----->
++ *
++ * The get() and put() operations do unconditional increments and
++ * decrements. The result is checked after the operation. This optimizes
++ * for the fast path.
++ *
++ * If the reference count is saturated or dead, then the increments and
++ * decrements are not harmful as the reference count still stays in the
++ * respective zones and is always set back to STATURATED resp. DEAD. The
++ * zones have room for 2^28 racing operations in each direction, which
++ * makes it practically impossible to escape the zones.
++ *
++ * Once the last reference is dropped the reference count becomes
++ * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The
++ * slowpath then tries to set the reference count from RCUREF_NOREF to
++ * RCUREF_DEAD via a cmpxchg(). This opens a small window where a
++ * concurrent rcuref_get() can acquire the reference count and bring it
++ * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD.
++ *
++ * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in
++ * DEAD + 1, which is inside the dead zone. If that happens the reference
++ * count is put back to DEAD.
++ *
++ * The actual race is possible due to the unconditional increment and
++ * decrements in rcuref_get() and rcuref_put():
++ *
++ * T1 T2
++ * get() put()
++ * if (atomic_add_negative(-1, &ref->refcnt))
++ * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);
++ *
++ * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1
++ *
++ * As the result of T1's add is negative, the get() goes into the slow path
++ * and observes refcnt being in the dead zone which makes the operation fail.
++ *
++ * Possible critical states:
++ *
++ * Context Counter References Operation
++ * T1 0 1 init()
++ * T2 1 2 get()
++ * T1 0 1 put()
++ * T2 -1 0 put() tries to mark dead
++ * T1 0 1 get()
++ * T2 0 1 put() mark dead fails
++ * T1 -1 0 put() tries to mark dead
++ * T1 DEAD 0 put() mark dead succeeds
++ * T2 DEAD+1 0 get() fails and puts it back to DEAD
++ *
++ * Of course there are more complex scenarios, but the above illustrates
++ * the working principle. The rest is left to the imagination of the
++ * reader.
++ *
++ * Deconstruction race
++ * ===================
++ *
++ * The release operation must be protected by prohibiting a grace period in
++ * order to prevent a possible use after free:
++ *
++ * T1 T2
++ * put() get()
++ * // ref->refcnt = ONEREF
++ * if (!atomic_add_negative(-1, &ref->refcnt))
++ * return false; <- Not taken
++ *
++ * // ref->refcnt == NOREF
++ * --> preemption
++ * // Elevates ref->refcnt to ONEREF
++ * if (!atomic_add_negative(1, &ref->refcnt))
++ * return true; <- taken
++ *
++ * if (put(&p->ref)) { <-- Succeeds
++ * remove_pointer(p);
++ * kfree_rcu(p, rcu);
++ * }
++ *
++ * RCU grace period ends, object is freed
++ *
++ * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF
++ *
++ * This is prevented by disabling preemption around the put() operation as
++ * that's in most kernel configurations cheaper than a rcu_read_lock() /
++ * rcu_read_unlock() pair and in many cases even a NOOP. In any case it
++ * prevents the grace period which keeps the object alive until all put()
++ * operations complete.
++ *
++ * Saturation protection
++ * =====================
++ *
++ * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX).
++ * Once this is exceedded the reference count becomes stale by setting it
++ * to RCUREF_SATURATED, which will cause a memory leak, but it prevents
++ * wrap arounds which obviously cause worse problems than a memory
++ * leak. When saturation is reached a warning is emitted.
++ *
++ * Race conditions
++ * ===============
++ *
++ * All reference count increment/decrement operations are unconditional and
++ * only verified after the fact. This optimizes for the good case and takes
++ * the occasional race vs. a dead or already saturated refcount into
++ * account. The saturation and dead zones are large enough to accomodate
++ * for that.
++ *
++ * Memory ordering
++ * ===============
++ *
++ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
++ * and provide only what is strictly required for refcounts.
++ *
++ * The increments are fully relaxed; these will not provide ordering. The
++ * rationale is that whatever is used to obtain the object to increase the
++ * reference count on will provide the ordering. For locked data
++ * structures, its the lock acquire, for RCU/lockless data structures its
++ * the dependent load.
++ *
++ * rcuref_get() provides a control dependency ordering future stores which
++ * ensures that the object is not modified when acquiring a reference
++ * fails.
++ *
++ * rcuref_put() provides release order, i.e. all prior loads and stores
++ * will be issued before. It also provides a control dependency ordering
++ * against the subsequent destruction of the object.
++ *
++ * If rcuref_put() successfully dropped the last reference and marked the
++ * object DEAD it also provides acquire ordering.
++ */
++
++#include <linux/export.h>
++#include <linux/rcuref.h>
++
++/**
++ * rcuref_get_slowpath - Slowpath of rcuref_get()
++ * @ref: Pointer to the reference count
++ *
++ * Invoked when the reference count is outside of the valid zone.
++ *
++ * Return:
++ * False if the reference count was already marked dead
++ *
++ * True if the reference count is saturated, which prevents the
++ * object from being deconstructed ever.
++ */
++bool rcuref_get_slowpath(rcuref_t *ref)
++{
++ unsigned int cnt = atomic_read(&ref->refcnt);
++
++ /*
++ * If the reference count was already marked dead, undo the
++ * increment so it stays in the middle of the dead zone and return
++ * fail.
++ */
++ if (cnt >= RCUREF_RELEASED) {
++ atomic_set(&ref->refcnt, RCUREF_DEAD);
++ return false;
++ }
++
++ /*
++ * If it was saturated, warn and mark it so. In case the increment
++ * was already on a saturated value restore the saturation
++ * marker. This keeps it in the middle of the saturation zone and
++ * prevents the reference count from overflowing. This leaks the
++ * object memory, but prevents the obvious reference count overflow
++ * damage.
++ */
++ if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory"))
++ atomic_set(&ref->refcnt, RCUREF_SATURATED);
++ return true;
++}
++EXPORT_SYMBOL_GPL(rcuref_get_slowpath);
++
++/**
++ * rcuref_put_slowpath - Slowpath of __rcuref_put()
++ * @ref: Pointer to the reference count
++ *
++ * Invoked when the reference count is outside of the valid zone.
++ *
++ * Return:
++ * True if this was the last reference with no future references
++ * possible. This signals the caller that it can safely schedule the
++ * object, which is protected by the reference counter, for
++ * deconstruction.
++ *
++ * False if there are still active references or the put() raced
++ * with a concurrent get()/put() pair. Caller is not allowed to
++ * deconstruct the protected object.
++ */
++bool rcuref_put_slowpath(rcuref_t *ref)
++{
++ unsigned int cnt = atomic_read(&ref->refcnt);
++
++ /* Did this drop the last reference? */
++ if (likely(cnt == RCUREF_NOREF)) {
++ /*
++ * Carefully try to set the reference count to RCUREF_DEAD.
++ *
++ * This can fail if a concurrent get() operation has
++ * elevated it again or the corresponding put() even marked
++ * it dead already. Both are valid situations and do not
++ * require a retry. If this fails the caller is not
++ * allowed to deconstruct the object.
++ */
++ if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF)
++ return false;
++
++ /*
++ * The caller can safely schedule the object for
++ * deconstruction. Provide acquire ordering.
++ */
++ smp_acquire__after_ctrl_dep();
++ return true;
++ }
++
++ /*
++ * If the reference count was already in the dead zone, then this
++ * put() operation is imbalanced. Warn, put the reference count back to
++ * DEAD and tell the caller to not deconstruct the object.
++ */
++ if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) {
++ atomic_set(&ref->refcnt, RCUREF_DEAD);
++ return false;
++ }
++
++ /*
++ * This is a put() operation on a saturated refcount. Restore the
++ * mean saturation value and tell the caller to not deconstruct the
++ * object.
++ */
++ if (cnt > RCUREF_MAXREF)
++ atomic_set(&ref->refcnt, RCUREF_SATURATED);
++ return false;
++}
++EXPORT_SYMBOL_GPL(rcuref_put_slowpath);
+--
+2.40.1
+
--- /dev/null
+From e6772c1d05f151bc529045893f8179f84b6aa583 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Sep 2023 04:39:34 +0000
+Subject: Bluetooth: Avoid redundant authentication
+
+From: Ying Hsu <yinghsu@chromium.org>
+
+[ Upstream commit 1d8e801422d66e4b8c7b187c52196bef94eed887 ]
+
+While executing the Android 13 CTS Verifier Secure Server test on a
+ChromeOS device, it was observed that the Bluetooth host initiates
+authentication for an RFCOMM connection after SSP completes.
+When this happens, some Intel Bluetooth controllers, like AC9560, would
+disconnect with "Connection Rejected due to Security Reasons (0x0e)".
+
+Historically, BlueZ did not mandate this authentication while an
+authenticated combination key was already in use for the connection.
+This behavior was changed since commit 7b5a9241b780
+("Bluetooth: Introduce requirements for security level 4").
+So, this patch addresses the aforementioned disconnection issue by
+restoring the previous behavior.
+
+Signed-off-by: Ying Hsu <yinghsu@chromium.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_conn.c | 63 ++++++++++++++++++++++------------------
+ 1 file changed, 35 insertions(+), 28 deletions(-)
+
+diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
+index f8ba3f5aa877b..728be9307f526 100644
+--- a/net/bluetooth/hci_conn.c
++++ b/net/bluetooth/hci_conn.c
+@@ -2364,34 +2364,41 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
+ if (!test_bit(HCI_CONN_AUTH, &conn->flags))
+ goto auth;
+
+- /* An authenticated FIPS approved combination key has sufficient
+- * security for security level 4. */
+- if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 &&
+- sec_level == BT_SECURITY_FIPS)
+- goto encrypt;
+-
+- /* An authenticated combination key has sufficient security for
+- security level 3. */
+- if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 ||
+- conn->key_type == HCI_LK_AUTH_COMBINATION_P256) &&
+- sec_level == BT_SECURITY_HIGH)
+- goto encrypt;
+-
+- /* An unauthenticated combination key has sufficient security for
+- security level 1 and 2. */
+- if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 ||
+- conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) &&
+- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW))
+- goto encrypt;
+-
+- /* A combination key has always sufficient security for the security
+- levels 1 or 2. High security level requires the combination key
+- is generated using maximum PIN code length (16).
+- For pre 2.1 units. */
+- if (conn->key_type == HCI_LK_COMBINATION &&
+- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW ||
+- conn->pin_length == 16))
+- goto encrypt;
++ switch (conn->key_type) {
++ case HCI_LK_AUTH_COMBINATION_P256:
++ /* An authenticated FIPS approved combination key has
++ * sufficient security for security level 4 or lower.
++ */
++ if (sec_level <= BT_SECURITY_FIPS)
++ goto encrypt;
++ break;
++ case HCI_LK_AUTH_COMBINATION_P192:
++ /* An authenticated combination key has sufficient security for
++ * security level 3 or lower.
++ */
++ if (sec_level <= BT_SECURITY_HIGH)
++ goto encrypt;
++ break;
++ case HCI_LK_UNAUTH_COMBINATION_P192:
++ case HCI_LK_UNAUTH_COMBINATION_P256:
++ /* An unauthenticated combination key has sufficient security
++ * for security level 2 or lower.
++ */
++ if (sec_level <= BT_SECURITY_MEDIUM)
++ goto encrypt;
++ break;
++ case HCI_LK_COMBINATION:
++ /* A combination key has always sufficient security for the
++ * security levels 2 or lower. High security level requires the
++ * combination key is generated using maximum PIN code length
++ * (16). For pre 2.1 units.
++ */
++ if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16)
++ goto encrypt;
++ break;
++ default:
++ break;
++ }
+
+ auth:
+ if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
+--
+2.40.1
+
--- /dev/null
+From e404dd5f31ecd1523a230d68a5dd1c79d74ce73d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Aug 2023 14:46:26 +0800
+Subject: Bluetooth: btusb: add shutdown function for QCA6174
+
+From: Rocky Liao <quic_rjliao@quicinc.com>
+
+[ Upstream commit 187f8b648cc16f07c66ab1d89d961bdcff779bf7 ]
+
+We should send hci reset command before bt turn off, which can reset bt
+firmware status.
+
+Signed-off-by: Rocky Liao <quic_rjliao@quicinc.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/bluetooth/btusb.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
+index f2062c2a28da8..96d4f48e36011 100644
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -3984,6 +3984,7 @@ static int btusb_probe(struct usb_interface *intf,
+
+ if (id->driver_info & BTUSB_QCA_ROME) {
+ data->setup_on_usb = btusb_setup_qca;
++ hdev->shutdown = btusb_shutdown_qca;
+ hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
+ hdev->cmd_timeout = btusb_qca_cmd_timeout;
+ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+--
+2.40.1
+
--- /dev/null
+From c371ab6d2f08ae126ea2826d8f104fa100db26ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Sep 2023 14:42:27 -0700
+Subject: Bluetooth: hci_core: Fix build warnings
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit dcda165706b9fbfd685898d46a6749d7d397e0c0 ]
+
+This fixes the following warnings:
+
+net/bluetooth/hci_core.c: In function ‘hci_register_dev’:
+net/bluetooth/hci_core.c:2620:54: warning: ‘%d’ directive output may
+be truncated writing between 1 and 10 bytes into a region of size 5
+[-Wformat-truncation=]
+ 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
+ | ^~
+net/bluetooth/hci_core.c:2620:50: note: directive argument in the range
+[0, 2147483647]
+ 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
+ | ^~~~~~~
+net/bluetooth/hci_core.c:2620:9: note: ‘snprintf’ output between 5 and
+14 bytes into a destination of size 8
+ 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/bluetooth/hci_core.h | 2 +-
+ net/bluetooth/hci_core.c | 8 +++++---
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
+index ddbcbf9ccb2ce..583aebd8c1e01 100644
+--- a/include/net/bluetooth/hci_core.h
++++ b/include/net/bluetooth/hci_core.h
+@@ -348,7 +348,7 @@ struct hci_dev {
+ struct list_head list;
+ struct mutex lock;
+
+- char name[8];
++ const char *name;
+ unsigned long flags;
+ __u16 id;
+ __u8 bus;
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index d13b498f148cc..6a1db678d032f 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -2616,7 +2616,11 @@ int hci_register_dev(struct hci_dev *hdev)
+ if (id < 0)
+ return id;
+
+- snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
++ error = dev_set_name(&hdev->dev, "hci%u", id);
++ if (error)
++ return error;
++
++ hdev->name = dev_name(&hdev->dev);
+ hdev->id = id;
+
+ BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
+@@ -2638,8 +2642,6 @@ int hci_register_dev(struct hci_dev *hdev)
+ if (!IS_ERR_OR_NULL(bt_debugfs))
+ hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs);
+
+- dev_set_name(&hdev->dev, "%s", hdev->name);
+-
+ error = device_add(&hdev->dev);
+ if (error < 0)
+ goto err_wqueue;
+--
+2.40.1
+
--- /dev/null
+From b460e3650c64f4f9a965951fe29b8cf476de6ace Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 13:59:59 -0700
+Subject: Bluetooth: hci_event: Fix using memcmp when comparing keys
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit b541260615f601ae1b5d6d0cc54e790de706303b ]
+
+memcmp is not consider safe to use with cryptographic secrets:
+
+ 'Do not use memcmp() to compare security critical data, such as
+ cryptographic secrets, because the required CPU time depends on the
+ number of equal bytes.'
+
+While usage of memcmp for ZERO_KEY may not be considered a security
+critical data, it can lead to more usage of memcmp with pairing keys
+which could introduce more security problems.
+
+Fixes: 455c2ff0a558 ("Bluetooth: Fix BR/EDR out-of-band pairing with only initiator data")
+Fixes: 33155c4aae52 ("Bluetooth: hci_event: Ignore NULL link key")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 152da3ded3faf..c86a45344fe28 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -25,6 +25,8 @@
+ /* Bluetooth HCI event handling. */
+
+ #include <asm/unaligned.h>
++#include <linux/crypto.h>
++#include <crypto/algapi.h>
+
+ #include <net/bluetooth/bluetooth.h>
+ #include <net/bluetooth/hci_core.h>
+@@ -4697,7 +4699,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data,
+ goto unlock;
+
+ /* Ignore NULL link key against CVE-2020-26555 */
+- if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) {
++ if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) {
+ bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR",
+ &ev->bdaddr);
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+@@ -5240,8 +5242,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
+ * available, then do not declare that OOB data is
+ * present.
+ */
+- if (!memcmp(data->rand256, ZERO_KEY, 16) ||
+- !memcmp(data->hash256, ZERO_KEY, 16))
++ if (!crypto_memneq(data->rand256, ZERO_KEY, 16) ||
++ !crypto_memneq(data->hash256, ZERO_KEY, 16))
+ return 0x00;
+
+ return 0x02;
+@@ -5251,8 +5253,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
+ * not supported by the hardware, then check that if
+ * P-192 data values are present.
+ */
+- if (!memcmp(data->rand192, ZERO_KEY, 16) ||
+- !memcmp(data->hash192, ZERO_KEY, 16))
++ if (!crypto_memneq(data->rand192, ZERO_KEY, 16) ||
++ !crypto_memneq(data->hash192, ZERO_KEY, 16))
+ return 0x00;
+
+ return 0x01;
+--
+2.40.1
+
--- /dev/null
+From 6affa55da4b3bf33bf880ee3f1dcc75db268b581 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 12:09:21 +0100
+Subject: btrfs: error out when COWing block using a stale transaction
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 48774f3bf8b4dd3b1a0e155825c9ce48483db14c ]
+
+At btrfs_cow_block() we have these checks to verify we are not using a
+stale transaction (a past transaction with an unblocked state or higher),
+and the only thing we do is to trigger a WARN with a message and a stack
+trace. This however is a critical problem, highly unexpected and if it
+happens it's most likely due to a bug, so we should error out and turn the
+fs into error state so that such issue is much more easily noticed if it's
+triggered.
+
+The problem is critical because using such stale transaction will lead to
+not persisting the extent buffer used for the COW operation, as allocating
+a tree block adds the range of the respective extent buffer to the
+->dirty_pages iotree of the transaction, and a stale transaction, in the
+unlocked state or higher, will not flush dirty extent buffers anymore,
+therefore resulting in not persisting the tree block and resource leaks
+(not cleaning the dirty_pages iotree for example).
+
+So do the following changes:
+
+1) Return -EUCLEAN if we find a stale transaction;
+
+2) Turn the fs into error state, with error -EUCLEAN, so that no
+ transaction can be committed, and generate a stack trace;
+
+3) Combine both conditions into a single if statement, as both are related
+ and have the same error message;
+
+4) Mark the check as unlikely, since this is not expected to ever happen.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.c | 24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
+index 1a327eb3580b4..98e3e0761a4e5 100644
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -567,14 +567,22 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
+ btrfs_err(fs_info,
+ "COW'ing blocks on a fs root that's being dropped");
+
+- if (trans->transaction != fs_info->running_transaction)
+- WARN(1, KERN_CRIT "trans %llu running %llu\n",
+- trans->transid,
+- fs_info->running_transaction->transid);
+-
+- if (trans->transid != fs_info->generation)
+- WARN(1, KERN_CRIT "trans %llu running %llu\n",
+- trans->transid, fs_info->generation);
++ /*
++ * COWing must happen through a running transaction, which always
++ * matches the current fs generation (it's a transaction with a state
++ * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs
++ * into error state to prevent the commit of any transaction.
++ */
++ if (unlikely(trans->transaction != fs_info->running_transaction ||
++ trans->transid != fs_info->generation)) {
++ btrfs_abort_transaction(trans, -EUCLEAN);
++ btrfs_crit(fs_info,
++"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu",
++ buf->start, btrfs_root_id(root), trans->transid,
++ fs_info->running_transaction->transid,
++ fs_info->generation);
++ return -EUCLEAN;
++ }
+
+ if (!should_cow_block(trans, root, buf)) {
+ *cow_ret = buf;
+--
+2.40.1
+
--- /dev/null
+From 53306197a65c328a7f50c8a1be33bd4574f840dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 12:09:23 +0100
+Subject: btrfs: error out when reallocating block for defrag using a stale
+ transaction
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit e36f94914021e58ee88a8856c7fdf35adf9c7ee1 ]
+
+At btrfs_realloc_node() we have these checks to verify we are not using a
+stale transaction (a past transaction with an unblocked state or higher),
+and the only thing we do is to trigger two WARN_ON(). This however is a
+critical problem, highly unexpected and if it happens it's most likely due
+to a bug, so we should error out and turn the fs into error state so that
+such issue is much more easily noticed if it's triggered.
+
+The problem is critical because in btrfs_realloc_node() we COW tree blocks,
+and using such stale transaction will lead to not persisting the extent
+buffers used for the COW operations, as allocating tree block adds the
+range of the respective extent buffers to the ->dirty_pages iotree of the
+transaction, and a stale transaction, in the unlocked state or higher,
+will not flush dirty extent buffers anymore, therefore resulting in not
+persisting the tree block and resource leaks (not cleaning the dirty_pages
+iotree for example).
+
+So do the following changes:
+
+1) Return -EUCLEAN if we find a stale transaction;
+
+2) Turn the fs into error state, with error -EUCLEAN, so that no
+ transaction can be committed, and generate a stack trace;
+
+3) Combine both conditions into a single if statement, as both are related
+ and have the same error message;
+
+4) Mark the check as unlikely, since this is not expected to ever happen.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
+index 98f68bd1383a3..e08688844f1e1 100644
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -698,8 +698,22 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
+ int progress_passed = 0;
+ struct btrfs_disk_key disk_key;
+
+- WARN_ON(trans->transaction != fs_info->running_transaction);
+- WARN_ON(trans->transid != fs_info->generation);
++ /*
++ * COWing must happen through a running transaction, which always
++ * matches the current fs generation (it's a transaction with a state
++ * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs
++ * into error state to prevent the commit of any transaction.
++ */
++ if (unlikely(trans->transaction != fs_info->running_transaction ||
++ trans->transid != fs_info->generation)) {
++ btrfs_abort_transaction(trans, -EUCLEAN);
++ btrfs_crit(fs_info,
++"unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu",
++ parent->start, btrfs_root_id(root), trans->transid,
++ fs_info->running_transaction->transid,
++ fs_info->generation);
++ return -EUCLEAN;
++ }
+
+ parent_nritems = btrfs_header_nritems(parent);
+ blocksize = fs_info->nodesize;
+--
+2.40.1
+
--- /dev/null
+From 3203996611533692e96cdb5e6e237006eb19797e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 12:09:22 +0100
+Subject: btrfs: error when COWing block from a root that is being deleted
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit a2caab29884397e583d09be6546259a83ebfbdb1 ]
+
+At btrfs_cow_block() we check if the block being COWed belongs to a root
+that is being deleted and if so we log an error message. However this is
+an unexpected case and it indicates a bug somewhere, so we should return
+an error and abort the transaction. So change this in the following ways:
+
+1) Abort the transaction with -EUCLEAN, so that if the issue ever happens
+ it can easily be noticed;
+
+2) Change the logged message level from error to critical, and change the
+ message itself to print the block's logical address and the ID of the
+ root;
+
+3) Return -EUCLEAN to the caller;
+
+4) As this is an unexpected scenario, that should never happen, mark the
+ check as unlikely, allowing the compiler to potentially generate better
+ code.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
+index 98e3e0761a4e5..98f68bd1383a3 100644
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -563,9 +563,13 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
+ u64 search_start;
+ int ret;
+
+- if (test_bit(BTRFS_ROOT_DELETING, &root->state))
+- btrfs_err(fs_info,
+- "COW'ing blocks on a fs root that's being dropped");
++ if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
++ btrfs_abort_transaction(trans, -EUCLEAN);
++ btrfs_crit(fs_info,
++ "attempt to COW block %llu on root %llu that is being deleted",
++ buf->start, btrfs_root_id(root));
++ return -EUCLEAN;
++ }
+
+ /*
+ * COWing must happen through a running transaction, which always
+--
+2.40.1
+
--- /dev/null
+From 87dbedf5bcd43760ba7f6c73a0e748036bd26a97 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Sep 2023 15:47:27 -0400
+Subject: btrfs: fix some -Wmaybe-uninitialized warnings in ioctl.c
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit 9147b9ded499d9853bdf0e9804b7eaa99c4429ed ]
+
+Jens reported the following warnings from -Wmaybe-uninitialized recent
+Linus' branch.
+
+ In file included from ./include/asm-generic/rwonce.h:26,
+ from ./arch/arm64/include/asm/rwonce.h:71,
+ from ./include/linux/compiler.h:246,
+ from ./include/linux/export.h:5,
+ from ./include/linux/linkage.h:7,
+ from ./include/linux/kernel.h:17,
+ from fs/btrfs/ioctl.c:6:
+ In function ‘instrument_copy_from_user_before’,
+ inlined from ‘_copy_from_user’ at ./include/linux/uaccess.h:148:3,
+ inlined from ‘copy_from_user’ at ./include/linux/uaccess.h:183:7,
+ inlined from ‘btrfs_ioctl_space_info’ at fs/btrfs/ioctl.c:2999:6,
+ inlined from ‘btrfs_ioctl’ at fs/btrfs/ioctl.c:4616:10:
+ ./include/linux/kasan-checks.h:38:27: warning: ‘space_args’ may be used
+ uninitialized [-Wmaybe-uninitialized]
+ 38 | #define kasan_check_write __kasan_check_write
+ ./include/linux/instrumented.h:129:9: note: in expansion of macro
+ ‘kasan_check_write’
+ 129 | kasan_check_write(to, n);
+ | ^~~~~~~~~~~~~~~~~
+ ./include/linux/kasan-checks.h: In function ‘btrfs_ioctl’:
+ ./include/linux/kasan-checks.h:20:6: note: by argument 1 of type ‘const
+ volatile void *’ to ‘__kasan_check_write’ declared here
+ 20 | bool __kasan_check_write(const volatile void *p, unsigned int
+ size);
+ | ^~~~~~~~~~~~~~~~~~~
+ fs/btrfs/ioctl.c:2981:39: note: ‘space_args’ declared here
+ 2981 | struct btrfs_ioctl_space_args space_args;
+ | ^~~~~~~~~~
+ In function ‘instrument_copy_from_user_before’,
+ inlined from ‘_copy_from_user’ at ./include/linux/uaccess.h:148:3,
+ inlined from ‘copy_from_user’ at ./include/linux/uaccess.h:183:7,
+ inlined from ‘_btrfs_ioctl_send’ at fs/btrfs/ioctl.c:4343:9,
+ inlined from ‘btrfs_ioctl’ at fs/btrfs/ioctl.c:4658:10:
+ ./include/linux/kasan-checks.h:38:27: warning: ‘args32’ may be used
+ uninitialized [-Wmaybe-uninitialized]
+ 38 | #define kasan_check_write __kasan_check_write
+ ./include/linux/instrumented.h:129:9: note: in expansion of macro
+ ‘kasan_check_write’
+ 129 | kasan_check_write(to, n);
+ | ^~~~~~~~~~~~~~~~~
+ ./include/linux/kasan-checks.h: In function ‘btrfs_ioctl’:
+ ./include/linux/kasan-checks.h:20:6: note: by argument 1 of type ‘const
+ volatile void *’ to ‘__kasan_check_write’ declared here
+ 20 | bool __kasan_check_write(const volatile void *p, unsigned int
+ size);
+ | ^~~~~~~~~~~~~~~~~~~
+ fs/btrfs/ioctl.c:4341:49: note: ‘args32’ declared here
+ 4341 | struct btrfs_ioctl_send_args_32 args32;
+ | ^~~~~~
+
+This was due to his config options and having KASAN turned on,
+which adds some extra checks around copy_from_user(), which then
+triggered the -Wmaybe-uninitialized checker for these cases.
+
+Fix the warnings by initializing the different structs we're copying
+into.
+
+Reported-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ioctl.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 9e323420c96d3..9474265ee7ea3 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3869,7 +3869,7 @@ static void get_block_group_info(struct list_head *groups_list,
+ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
+ void __user *arg)
+ {
+- struct btrfs_ioctl_space_args space_args;
++ struct btrfs_ioctl_space_args space_args = { 0 };
+ struct btrfs_ioctl_space_info space;
+ struct btrfs_ioctl_space_info *dest;
+ struct btrfs_ioctl_space_info *dest_orig;
+@@ -5223,7 +5223,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
+
+ if (compat) {
+ #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+- struct btrfs_ioctl_send_args_32 args32;
++ struct btrfs_ioctl_send_args_32 args32 = { 0 };
+
+ ret = copy_from_user(&args32, argp, sizeof(args32));
+ if (ret)
+--
+2.40.1
+
--- /dev/null
+From a4cea077b433d623efa49f0fa5c2164800f1cdd3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 12:15:24 -0400
+Subject: btrfs: initialize start_slot in btrfs_log_prealloc_extents
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit b4c639f699349880b7918b861e1bd360442ec450 ]
+
+Jens reported a compiler warning when using
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y that looks like this
+
+ fs/btrfs/tree-log.c: In function ‘btrfs_log_prealloc_extents’:
+ fs/btrfs/tree-log.c:4828:23: warning: ‘start_slot’ may be used
+ uninitialized [-Wmaybe-uninitialized]
+ 4828 | ret = copy_items(trans, inode, dst_path, path,
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ 4829 | start_slot, ins_nr, 1, 0);
+ | ~~~~~~~~~~~~~~~~~~~~~~~~~
+ fs/btrfs/tree-log.c:4725:13: note: ‘start_slot’ was declared here
+ 4725 | int start_slot;
+ | ^~~~~~~~~~
+
+The compiler is incorrect, as we only use this code when ins_len > 0,
+and when ins_len > 0 we have start_slot properly initialized. However
+we generally find the -Wmaybe-uninitialized warnings valuable, so
+initialize start_slot to get rid of the warning.
+
+Reported-by: Jens Axboe <axboe@kernel.dk>
+Tested-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index c03ff6a5a7f6b..7c33b28c02aeb 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4767,7 +4767,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
+ struct extent_buffer *leaf;
+ int slot;
+ int ins_nr = 0;
+- int start_slot;
++ int start_slot = 0;
+ int ret;
+
+ if (!(inode->flags & BTRFS_INODE_PREALLOC))
+--
+2.40.1
+
--- /dev/null
+From f1a7f8da19c27e1293bbc0854411609d2d3cfc04 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Sep 2023 18:20:19 +0100
+Subject: btrfs: prevent transaction block reserve underflow when starting
+ transaction
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit a7ddeeb079505961355cf0106154da0110f1fdff ]
+
+When starting a transaction, with a non-zero number of items, we reserve
+metadata space for that number of items and for delayed refs by doing a
+call to btrfs_block_rsv_add(), with the transaction block reserve passed
+as the block reserve argument. This reserves metadata space and adds it
+to the transaction block reserve. Later we migrate the space we reserved
+for delayed references from the transaction block reserve into the delayed
+refs block reserve, by calling btrfs_migrate_to_delayed_refs_rsv().
+
+btrfs_migrate_to_delayed_refs_rsv() decrements the number of bytes to
+migrate from the source block reserve, and this however may result in an
+underflow in case the space added to the transaction block reserve ended
+up being used by another task that has not reserved enough space for its
+own use - examples are tasks doing reflinks or hole punching because they
+end up calling btrfs_replace_file_extents() -> btrfs_drop_extents() and
+may need to modify/COW a variable number of leaves/paths, so they keep
+trying to use space from the transaction block reserve when they need to
+COW an extent buffer, and may end up trying to use more space then they
+have reserved (1 unit/path only for removing file extent items).
+
+This can be avoided by simply reserving space first without adding it to
+the transaction block reserve, then add the space for delayed refs to the
+delayed refs block reserve and finally add the remaining reserved space
+to the transaction block reserve. This also makes the code a bit shorter
+and simpler. So just do that.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/delayed-ref.c | 9 +--------
+ fs/btrfs/delayed-ref.h | 1 -
+ fs/btrfs/transaction.c | 6 +++---
+ 3 files changed, 4 insertions(+), 12 deletions(-)
+
+diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
+index 36a3debe94930..e08e3852c4788 100644
+--- a/fs/btrfs/delayed-ref.c
++++ b/fs/btrfs/delayed-ref.c
+@@ -141,24 +141,17 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
+ * Transfer bytes to our delayed refs rsv
+ *
+ * @fs_info: the filesystem
+- * @src: source block rsv to transfer from
+ * @num_bytes: number of bytes to transfer
+ *
+- * This transfers up to the num_bytes amount from the src rsv to the
++ * This transfers up to the num_bytes amount, previously reserved, to the
+ * delayed_refs_rsv. Any extra bytes are returned to the space info.
+ */
+ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+- struct btrfs_block_rsv *src,
+ u64 num_bytes)
+ {
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ u64 to_free = 0;
+
+- spin_lock(&src->lock);
+- src->reserved -= num_bytes;
+- src->size -= num_bytes;
+- spin_unlock(&src->lock);
+-
+ spin_lock(&delayed_refs_rsv->lock);
+ if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
+ u64 delta = delayed_refs_rsv->size -
+diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
+index d6304b690ec4a..712a6315e956b 100644
+--- a/fs/btrfs/delayed-ref.h
++++ b/fs/btrfs/delayed-ref.h
+@@ -383,7 +383,6 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
+ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+ enum btrfs_reserve_flush_enum flush);
+ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+- struct btrfs_block_rsv *src,
+ u64 num_bytes);
+ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
+ bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 1193214ba8c10..60db4c3b82fa1 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -614,14 +614,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
+ reloc_reserved = true;
+ }
+
+- ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush);
++ ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush);
+ if (ret)
+ goto reserve_fail;
+ if (delayed_refs_bytes) {
+- btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv,
+- delayed_refs_bytes);
++ btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes);
+ num_bytes -= delayed_refs_bytes;
+ }
++ btrfs_block_rsv_add_bytes(rsv, num_bytes, true);
+
+ if (rsv->space_info->force_alloc)
+ do_chunk_alloc = true;
+--
+2.40.1
+
--- /dev/null
+From 3baeabc4520a630fed394828d2a4ff52d9b1ee9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Sep 2023 18:20:23 +0100
+Subject: btrfs: return -EUCLEAN for delayed tree ref with a ref count not
+ equals to 1
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 1bf76df3fee56d6637718e267f7c34ed70d0c7dc ]
+
+When running a delayed tree reference, if we find a ref count different
+from 1, we return -EIO. This isn't an IO error, as it indicates either a
+bug in the delayed refs code or a memory corruption, so change the error
+code from -EIO to -EUCLEAN. Also tag the branch as 'unlikely' as this is
+not expected to ever happen, and change the error message to print the
+tree block's bytenr without the parenthesis (and there was a missing space
+between the 'block' word and the opening parenthesis), for consistency as
+that's the style we used everywhere else.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 08ff10a81cb90..2a7c9088fe1f8 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1663,12 +1663,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
+ parent = ref->parent;
+ ref_root = ref->root;
+
+- if (node->ref_mod != 1) {
++ if (unlikely(node->ref_mod != 1)) {
+ btrfs_err(trans->fs_info,
+- "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
++ "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
+ node->bytenr, node->ref_mod, node->action, ref_root,
+ parent);
+- return -EIO;
++ return -EUCLEAN;
+ }
+ if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+ BUG_ON(!extent_op || !extent_op->update_flags);
+--
+2.40.1
+
--- /dev/null
+From d526150520942c1952eb6898189e1d3c5d5df5d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:05:27 +0800
+Subject: cpufreq: schedutil: Update next_freq when cpufreq_limits change
+
+From: Xuewen Yan <xuewen.yan@unisoc.com>
+
+[ Upstream commit 9e0bc36ab07c550d791bf17feeb479f1dfc42d89 ]
+
+When cpufreq's policy is 'single', there is a scenario that will
+cause sg_policy's next_freq to be unable to update.
+
+When the CPU's util is always max, the cpufreq will be max,
+and then if we change the policy's scaling_max_freq to be a
+lower freq, indeed, the sg_policy's next_freq need change to
+be the lower freq, however, because the cpu_is_busy, the next_freq
+would keep the max_freq.
+
+For example:
+
+The cpu7 is a single CPU:
+
+ unisoc:/sys/devices/system/cpu/cpufreq/policy7 # while true;do done& [1] 4737
+ unisoc:/sys/devices/system/cpu/cpufreq/policy7 # taskset -p 80 4737
+ pid 4737's current affinity mask: ff
+ pid 4737's new affinity mask: 80
+ unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_max_freq
+ 2301000
+ unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_cur_freq
+ 2301000
+ unisoc:/sys/devices/system/cpu/cpufreq/policy7 # echo 2171000 > scaling_max_freq
+ unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_max_freq
+ 2171000
+
+At this time, the sg_policy's next_freq would stay at 2301000, which
+is wrong.
+
+To fix this, add a check for the ->need_freq_update flag.
+
+[ mingo: Clarified the changelog. ]
+
+Co-developed-by: Guohua Yan <guohua.yan@unisoc.com>
+Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
+Signed-off-by: Guohua Yan <guohua.yan@unisoc.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: "Rafael J. Wysocki" <rafael@kernel.org>
+Link: https://lore.kernel.org/r/20230719130527.8074-1-xuewen.yan@unisoc.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/cpufreq_schedutil.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
+index 1207c78f85c11..853a07618a3cf 100644
+--- a/kernel/sched/cpufreq_schedutil.c
++++ b/kernel/sched/cpufreq_schedutil.c
+@@ -345,7 +345,8 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
+ * Except when the rq is capped by uclamp_max.
+ */
+ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
+- sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
++ sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq &&
++ !sg_policy->need_freq_update) {
+ next_f = sg_policy->next_freq;
+
+ /* Restore cached freq as next_freq has changed */
+--
+2.40.1
+
--- /dev/null
+From f019522eb7213549eb7850757c35f5f83bedfd50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Aug 2023 12:25:26 +0800
+Subject: drm/amd/pm: add unique_id for gc 11.0.3
+
+From: Kenneth Feng <kenneth.feng@amd.com>
+
+[ Upstream commit 4953856f280b2b606089a72a93a1e9212a3adaca ]
+
+add unique_id for gc 11.0.3
+
+Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
+Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+index 8472013ff38a2..0e78437c8389d 100644
+--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+@@ -1991,6 +1991,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
++ case IP_VERSION(11, 0, 3):
+ *states = ATTR_STATE_SUPPORTED;
+ break;
+ default:
+--
+2.40.1
+
--- /dev/null
+From ec3a51f02b2d6b88b37074670e4a1acfd0f4ca90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Oct 2023 13:47:10 +0200
+Subject: drm: panel-orientation-quirks: Add quirk for One Mix 2S
+
+From: Kai Uwe Broulik <foss-linux@broulik.de>
+
+[ Upstream commit cbb7eb2dbd9472816e42a1b0fdb51af49abbf812 ]
+
+The One Mix 2S is a mini laptop with a 1200x1920 portrait screen
+mounted in a landscape oriented clamshell case. Because of the too
+generic DMI strings this entry is also doing bios-date matching.
+
+Signed-off-by: Kai Uwe Broulik <foss-linux@broulik.de>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231001114710.336172-1-foss-linux@broulik.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/drm_panel_orientation_quirks.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
+index 0cb646cb04ee1..d5c15292ae937 100644
+--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
++++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
+@@ -38,6 +38,14 @@ static const struct drm_dmi_panel_orientation_data gpd_micropc = {
+ .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
+ };
+
++static const struct drm_dmi_panel_orientation_data gpd_onemix2s = {
++ .width = 1200,
++ .height = 1920,
++ .bios_dates = (const char * const []){ "05/21/2018", "10/26/2018",
++ "03/04/2019", NULL },
++ .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
++};
++
+ static const struct drm_dmi_panel_orientation_data gpd_pocket = {
+ .width = 1200,
+ .height = 1920,
+@@ -401,6 +409,14 @@ static const struct dmi_system_id orientation_data[] = {
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "LTH17"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
++ }, { /* One Mix 2S (generic strings, also match on bios date) */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"),
++ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"),
++ DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"),
++ },
++ .driver_data = (void *)&gpd_onemix2s,
+ },
+ {}
+ };
+--
+2.40.1
+
--- /dev/null
+From 3ebd760fab1840ff832f63608a1d2909713f3b6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Feb 2023 00:56:19 +0900
+Subject: fprobe: Add nr_maxactive to specify rethook_node pool size
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 59a7a298565aa0ce44ce8e4fbcbb89a19730013a ]
+
+Add nr_maxactive to specify rethook_node pool size. This means
+the maximum number of actively running target functions concurrently
+for probing by exit_handler. Note that if the running function is
+preempted or sleep, it is still counted as 'active'.
+
+Link: https://lkml.kernel.org/r/167526697917.433354.17779774988245113106.stgit@mhiramat.roam.corp.google.com
+
+Cc: Florent Revest <revest@chromium.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 700b2b439766 ("fprobe: Fix to ensure the number of active retprobes is not zero")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/fprobe.h | 2 ++
+ kernel/trace/fprobe.c | 5 ++++-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
+index e0d4e61362491..678f741a7b330 100644
+--- a/include/linux/fprobe.h
++++ b/include/linux/fprobe.h
+@@ -14,6 +14,7 @@
+ * @flags: The status flag.
+ * @rethook: The rethook data structure. (internal data)
+ * @entry_data_size: The private data storage size.
++ * @nr_maxactive: The max number of active functions.
+ * @entry_handler: The callback function for function entry.
+ * @exit_handler: The callback function for function exit.
+ */
+@@ -31,6 +32,7 @@ struct fprobe {
+ unsigned int flags;
+ struct rethook *rethook;
+ size_t entry_data_size;
++ int nr_maxactive;
+
+ void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip,
+ struct pt_regs *regs, void *entry_data);
+diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
+index be28d1bc84e80..441a373079213 100644
+--- a/kernel/trace/fprobe.c
++++ b/kernel/trace/fprobe.c
+@@ -143,7 +143,10 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
+ }
+
+ /* Initialize rethook if needed */
+- size = num * num_possible_cpus() * 2;
++ if (fp->nr_maxactive)
++ size = fp->nr_maxactive;
++ else
++ size = num * num_possible_cpus() * 2;
+ if (size < 0)
+ return -E2BIG;
+
+--
+2.40.1
+
--- /dev/null
+From 4851c3c83da75010c499e225a628a9c6d680c205 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Oct 2023 08:49:45 +0900
+Subject: fprobe: Fix to ensure the number of active retprobes is not zero
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 700b2b439766e8aab8a7174991198497345bd411 ]
+
+The number of active retprobes can be zero but it is not acceptable,
+so return EINVAL error if detected.
+
+Link: https://lore.kernel.org/all/169750018550.186853.11198884812017796410.stgit@devnote2/
+
+Reported-by: wuqiang.matt <wuqiang.matt@bytedance.com>
+Closes: https://lore.kernel.org/all/20231016222103.cb9f426edc60220eabd8aa6a@kernel.org/
+Fixes: 5b0ab78998e3 ("fprobe: Add exit_handler support")
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/fprobe.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
+index 441a373079213..f386d6bd8e0e3 100644
+--- a/kernel/trace/fprobe.c
++++ b/kernel/trace/fprobe.c
+@@ -134,7 +134,7 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
+ {
+ int i, size;
+
+- if (num < 0)
++ if (num <= 0)
+ return -EINVAL;
+
+ if (!fp->exit_handler) {
+@@ -147,8 +147,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
+ size = fp->nr_maxactive;
+ else
+ size = num * num_possible_cpus() * 2;
+- if (size < 0)
+- return -E2BIG;
++ if (size <= 0)
++ return -EINVAL;
+
+ fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler);
+ if (!fp->rethook)
+--
+2.40.1
+
--- /dev/null
+From 417c303823ee22e402ad91b7da29b412644cdf4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Feb 2023 00:56:01 +0900
+Subject: fprobe: Pass entry_data to handlers
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 76d0de5729c0569c4071e7f21fcab394e502f03a ]
+
+Pass the private entry_data to the entry and exit handlers so that
+they can share the context data, something like saved function
+arguments etc.
+User must specify the private entry_data size by @entry_data_size
+field before registering the fprobe.
+
+Link: https://lkml.kernel.org/r/167526696173.433354.17408372048319432574.stgit@mhiramat.roam.corp.google.com
+
+Cc: Florent Revest <revest@chromium.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 700b2b439766 ("fprobe: Fix to ensure the number of active retprobes is not zero")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/fprobe.h | 8 ++++++--
+ kernel/trace/bpf_trace.c | 2 +-
+ kernel/trace/fprobe.c | 21 ++++++++++++++-------
+ lib/test_fprobe.c | 6 ++++--
+ samples/fprobe/fprobe_example.c | 6 ++++--
+ 5 files changed, 29 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
+index 1c2bde0ead736..e0d4e61362491 100644
+--- a/include/linux/fprobe.h
++++ b/include/linux/fprobe.h
+@@ -13,6 +13,7 @@
+ * @nmissed: The counter for missing events.
+ * @flags: The status flag.
+ * @rethook: The rethook data structure. (internal data)
++ * @entry_data_size: The private data storage size.
+ * @entry_handler: The callback function for function entry.
+ * @exit_handler: The callback function for function exit.
+ */
+@@ -29,9 +30,12 @@ struct fprobe {
+ unsigned long nmissed;
+ unsigned int flags;
+ struct rethook *rethook;
++ size_t entry_data_size;
+
+- void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs);
+- void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs);
++ void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip,
++ struct pt_regs *regs, void *entry_data);
++ void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip,
++ struct pt_regs *regs, void *entry_data);
+ };
+
+ /* This fprobe is soft-disabled. */
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index 8c77c54e6348b..f4a494a457c52 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -2646,7 +2646,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
+
+ static void
+ kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
+- struct pt_regs *regs)
++ struct pt_regs *regs, void *data)
+ {
+ struct bpf_kprobe_multi_link *link;
+
+diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
+index 1322247ce6488..be28d1bc84e80 100644
+--- a/kernel/trace/fprobe.c
++++ b/kernel/trace/fprobe.c
+@@ -17,14 +17,16 @@
+ struct fprobe_rethook_node {
+ struct rethook_node node;
+ unsigned long entry_ip;
++ char data[];
+ };
+
+ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+ {
+ struct fprobe_rethook_node *fpr;
+- struct rethook_node *rh;
++ struct rethook_node *rh = NULL;
+ struct fprobe *fp;
++ void *entry_data = NULL;
+ int bit;
+
+ fp = container_of(ops, struct fprobe, ops);
+@@ -37,9 +39,6 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ return;
+ }
+
+- if (fp->entry_handler)
+- fp->entry_handler(fp, ip, ftrace_get_regs(fregs));
+-
+ if (fp->exit_handler) {
+ rh = rethook_try_get(fp->rethook);
+ if (!rh) {
+@@ -48,9 +47,16 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ }
+ fpr = container_of(rh, struct fprobe_rethook_node, node);
+ fpr->entry_ip = ip;
+- rethook_hook(rh, ftrace_get_regs(fregs), true);
++ if (fp->entry_data_size)
++ entry_data = fpr->data;
+ }
+
++ if (fp->entry_handler)
++ fp->entry_handler(fp, ip, ftrace_get_regs(fregs), entry_data);
++
++ if (rh)
++ rethook_hook(rh, ftrace_get_regs(fregs), true);
++
+ out:
+ ftrace_test_recursion_unlock(bit);
+ }
+@@ -81,7 +87,8 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data,
+
+ fpr = container_of(rh, struct fprobe_rethook_node, node);
+
+- fp->exit_handler(fp, fpr->entry_ip, regs);
++ fp->exit_handler(fp, fpr->entry_ip, regs,
++ fp->entry_data_size ? (void *)fpr->data : NULL);
+ }
+ NOKPROBE_SYMBOL(fprobe_exit_handler);
+
+@@ -146,7 +153,7 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
+ for (i = 0; i < size; i++) {
+ struct fprobe_rethook_node *node;
+
+- node = kzalloc(sizeof(*node), GFP_KERNEL);
++ node = kzalloc(sizeof(*node) + fp->entry_data_size, GFP_KERNEL);
+ if (!node) {
+ rethook_free(fp->rethook);
+ fp->rethook = NULL;
+diff --git a/lib/test_fprobe.c b/lib/test_fprobe.c
+index e0381b3ec410c..34fa5a5bbda1f 100644
+--- a/lib/test_fprobe.c
++++ b/lib/test_fprobe.c
+@@ -30,7 +30,8 @@ static noinline u32 fprobe_selftest_target2(u32 value)
+ return (value / div_factor) + 1;
+ }
+
+-static notrace void fp_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
++static notrace void fp_entry_handler(struct fprobe *fp, unsigned long ip,
++ struct pt_regs *regs, void *data)
+ {
+ KUNIT_EXPECT_FALSE(current_test, preemptible());
+ /* This can be called on the fprobe_selftest_target and the fprobe_selftest_target2 */
+@@ -39,7 +40,8 @@ static notrace void fp_entry_handler(struct fprobe *fp, unsigned long ip, struct
+ entry_val = (rand1 / div_factor);
+ }
+
+-static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
++static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip,
++ struct pt_regs *regs, void *data)
+ {
+ unsigned long ret = regs_return_value(regs);
+
+diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
+index e22da8573116e..dd794990ad7ec 100644
+--- a/samples/fprobe/fprobe_example.c
++++ b/samples/fprobe/fprobe_example.c
+@@ -48,7 +48,8 @@ static void show_backtrace(void)
+ stack_trace_print(stacks, len, 24);
+ }
+
+-static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
++static void sample_entry_handler(struct fprobe *fp, unsigned long ip,
++ struct pt_regs *regs, void *data)
+ {
+ if (use_trace)
+ /*
+@@ -63,7 +64,8 @@ static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_
+ show_backtrace();
+ }
+
+-static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
++static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs,
++ void *data)
+ {
+ unsigned long rip = instruction_pointer(regs);
+
+--
+2.40.1
+
--- /dev/null
+From 41c787522af21180151340e77c0cd1bd15bc6d7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Sep 2023 22:51:31 -0600
+Subject: fs-writeback: do not requeue a clean inode having skipped pages
+
+From: Chunhai Guo <guochunhai@vivo.com>
+
+[ Upstream commit be049c3a088d512187407b7fd036cecfab46d565 ]
+
+When writing back an inode and performing an fsync on it concurrently, a
+deadlock issue may arise as shown below. In each writeback iteration, a
+clean inode is requeued to the wb->b_dirty queue due to non-zero
+pages_skipped, without anything actually being written. This causes an
+infinite loop and prevents the plug from being flushed, resulting in a
+deadlock. We now avoid requeuing the clean inode to prevent this issue.
+
+ wb_writeback fsync (inode-Y)
+blk_start_plug(&plug)
+for (;;) {
+ iter i-1: some reqs with page-X added into plug->mq_list // f2fs node page-X with PG_writeback
+ filemap_fdatawrite
+ __filemap_fdatawrite_range // write inode-Y with sync_mode WB_SYNC_ALL
+ do_writepages
+ f2fs_write_data_pages
+ __f2fs_write_data_pages // wb_sync_req[DATA]++ for WB_SYNC_ALL
+ f2fs_write_cache_pages
+ f2fs_write_single_data_page
+ f2fs_do_write_data_page
+ f2fs_outplace_write_data
+ f2fs_update_data_blkaddr
+ f2fs_wait_on_page_writeback
+ wait_on_page_writeback // wait for f2fs node page-X
+ iter i:
+ progress = __writeback_inodes_wb(wb, work)
+ . writeback_sb_inodes
+ . __writeback_single_inode // write inode-Y with sync_mode WB_SYNC_NONE
+ . . do_writepages
+ . . f2fs_write_data_pages
+ . . . __f2fs_write_data_pages // skip writepages due to (wb_sync_req[DATA]>0)
+ . . . wbc->pages_skipped += get_dirty_pages(inode) // wbc->pages_skipped = 1
+ . if (!(inode->i_state & I_DIRTY_ALL)) // i_state = I_SYNC | I_SYNC_QUEUED
+ . total_wrote++; // total_wrote = 1
+ . requeue_inode // requeue inode-Y to wb->b_dirty queue due to non-zero pages_skipped
+ if (progress) // progress = 1
+ continue;
+ iter i+1:
+ queue_io
+ // similar process with iter i, infinite for-loop !
+}
+blk_finish_plug(&plug) // flush plug won't be called
+
+Signed-off-by: Chunhai Guo <guochunhai@vivo.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-Id: <20230916045131.957929-1-guochunhai@vivo.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index d387708977a50..a5c31a479aacc 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -1522,10 +1522,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
+
+ if (wbc->pages_skipped) {
+ /*
+- * writeback is not making progress due to locked
+- * buffers. Skip this inode for now.
++ * Writeback is not making progress due to locked buffers.
++ * Skip this inode for now. Although having skipped pages
++ * is odd for clean inodes, it can happen for some
++ * filesystems so handle that gracefully.
+ */
+- redirty_tail_locked(inode, wb);
++ if (inode->i_state & I_DIRTY_ALL)
++ redirty_tail_locked(inode, wb);
++ else
++ inode_cgwb_move_to_attached(inode, wb);
+ return;
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 940105086d532361e6f60fdbc845dfafbe92c3bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Sep 2023 10:29:14 +0000
+Subject: gpio: timberdale: Fix potential deadlock on &tgpio->lock
+
+From: Chengfeng Ye <dg573847474@gmail.com>
+
+[ Upstream commit 9e8bc2dda5a7a8e2babc9975f4b11c9a6196e490 ]
+
+As timbgpio_irq_enable()/timbgpio_irq_disable() callback could be
+executed under irq context, it could introduce double locks on
+&tgpio->lock if it preempts other execution units requiring
+the same locks.
+
+timbgpio_gpio_set()
+--> timbgpio_update_bit()
+--> spin_lock(&tgpio->lock)
+<interrupt>
+ --> timbgpio_irq_disable()
+ --> spin_lock_irqsave(&tgpio->lock)
+
+This flaw was found by an experimental static analysis tool I am
+developing for irq-related deadlock.
+
+To prevent the potential deadlock, the patch uses spin_lock_irqsave()
+on &tgpio->lock inside timbgpio_gpio_set() to prevent the possible
+deadlock scenario.
+
+Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-timberdale.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c
+index de14949a3fe5a..92c1f2baa4bff 100644
+--- a/drivers/gpio/gpio-timberdale.c
++++ b/drivers/gpio/gpio-timberdale.c
+@@ -43,9 +43,10 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index,
+ unsigned offset, bool enabled)
+ {
+ struct timbgpio *tgpio = gpiochip_get_data(gpio);
++ unsigned long flags;
+ u32 reg;
+
+- spin_lock(&tgpio->lock);
++ spin_lock_irqsave(&tgpio->lock, flags);
+ reg = ioread32(tgpio->membase + offset);
+
+ if (enabled)
+@@ -54,7 +55,7 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index,
+ reg &= ~(1 << index);
+
+ iowrite32(reg, tgpio->membase + offset);
+- spin_unlock(&tgpio->lock);
++ spin_unlock_irqrestore(&tgpio->lock, flags);
+
+ return 0;
+ }
+--
+2.40.1
+
--- /dev/null
+From e9a8fa731f0c9db245a6291af7b1d77185194b5a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Sep 2023 10:40:59 +0800
+Subject: HID: holtek: fix slab-out-of-bounds Write in holtek_kbd_input_event
+
+From: Ma Ke <make_ruc2021@163.com>
+
+[ Upstream commit ffe3b7837a2bb421df84d0177481db9f52c93a71 ]
+
+There is a slab-out-of-bounds Write bug in hid-holtek-kbd driver.
+The problem is the driver assumes the device must have an input
+but some malicious devices violate this assumption.
+
+Fix this by checking hid_device's input is non-empty before its usage.
+
+Signed-off-by: Ma Ke <make_ruc2021@163.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-holtek-kbd.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c
+index 403506b9697e7..b346d68a06f5a 100644
+--- a/drivers/hid/hid-holtek-kbd.c
++++ b/drivers/hid/hid-holtek-kbd.c
+@@ -130,6 +130,10 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type,
+ return -ENODEV;
+
+ boot_hid = usb_get_intfdata(boot_interface);
++ if (list_empty(&boot_hid->inputs)) {
++ hid_err(hid, "no inputs found\n");
++ return -ENODEV;
++ }
+ boot_hid_input = list_first_entry(&boot_hid->inputs,
+ struct hid_input, list);
+
+--
+2.40.1
+
--- /dev/null
+From 1db6b1260f7986da2b5367e069bab780b16e2778 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Aug 2023 00:24:38 +0200
+Subject: HID: logitech-hidpp: Add Bluetooth ID for the Logitech M720 Triathlon
+ mouse
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit 2d866603e25b1ce7e536839f62d1faae1c03d92f ]
+
+Using hidpp for the M720 adds battery info reporting and hires
+scrolling support.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Bastien Nocera <hadess@hadess.net>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-logitech-hidpp.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
+index fb427391c3b86..8d0dad12b2d37 100644
+--- a/drivers/hid/hid-logitech-hidpp.c
++++ b/drivers/hid/hid-logitech-hidpp.c
+@@ -4427,6 +4427,8 @@ static const struct hid_device_id hidpp_devices[] = {
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) },
+ { /* MX Master mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) },
++ { /* M720 Triathlon mouse over Bluetooth */
++ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb015) },
+ { /* MX Ergo trackball over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) },
+--
+2.40.1
+
--- /dev/null
+From 67281af95c5e1d4b56ed145d7e68a000e8afbd78 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Sep 2023 16:18:43 +0000
+Subject: HID: multitouch: Add required quirk for Synaptics 0xcd7e device
+
+From: Rahul Rameshbabu <sergeantsagara@protonmail.com>
+
+[ Upstream commit 1437e4547edf41689d7135faaca4222ef0081bc1 ]
+
+Register the Synaptics device as a special multitouch device with certain
+quirks that may improve usability of the touchpad device.
+
+Reported-by: Rain <rain@sunshowers.io>
+Closes: https://lore.kernel.org/linux-input/2bbb8e1d-1793-4df1-810f-cb0137341ff4@app.fastmail.com/
+Signed-off-by: Rahul Rameshbabu <sergeantsagara@protonmail.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-multitouch.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index 521b2ffb42449..8db4ae05febc8 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -2144,6 +2144,10 @@ static const struct hid_device_id mt_devices[] = {
+ USB_DEVICE_ID_MTP_STM)},
+
+ /* Synaptics devices */
++ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
++ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
++ USB_VENDOR_ID_SYNAPTICS, 0xcd7e) },
++
+ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
+ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_SYNAPTICS, 0xce08) },
+--
+2.40.1
+
--- /dev/null
+From acc58da9f242d3b2a9a8cbaf5628826bf3a16f14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Sep 2023 16:06:01 +0200
+Subject: HID: nintendo: reinitialize USB Pro Controller after resuming from
+ suspend
+
+From: Martino Fontana <tinozzo123@gmail.com>
+
+[ Upstream commit 95ea4d9fd385fe335b989f22d409df079a042b7a ]
+
+When suspending the computer, a Switch Pro Controller connected via USB will
+lose its internal status. However, because the USB connection was technically
+never lost, when resuming the computer, the driver will attempt to communicate
+with the controller as if nothing happened (and fail).
+Because of this, the user was forced to manually disconnect the controller
+(or to press the sync button on the controller to power it off), so that it
+can be re-initialized.
+
+With this patch, the controller will be automatically re-initialized after
+resuming from suspend.
+
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216233
+
+Signed-off-by: Martino Fontana <tinozzo123@gmail.com>
+Reviewed-by: Daniel J. Ogorchock <djogorchock@gmail.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-nintendo.c | 175 ++++++++++++++++++++++---------------
+ 1 file changed, 103 insertions(+), 72 deletions(-)
+
+diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
+index 5bfc0c4504608..8a8a3dd8af0c1 100644
+--- a/drivers/hid/hid-nintendo.c
++++ b/drivers/hid/hid-nintendo.c
+@@ -2011,7 +2011,9 @@ static int joycon_read_info(struct joycon_ctlr *ctlr)
+ struct joycon_input_report *report;
+
+ req.subcmd_id = JC_SUBCMD_REQ_DEV_INFO;
++ mutex_lock(&ctlr->output_mutex);
+ ret = joycon_send_subcmd(ctlr, &req, 0, HZ);
++ mutex_unlock(&ctlr->output_mutex);
+ if (ret) {
+ hid_err(ctlr->hdev, "Failed to get joycon info; ret=%d\n", ret);
+ return ret;
+@@ -2040,6 +2042,85 @@ static int joycon_read_info(struct joycon_ctlr *ctlr)
+ return 0;
+ }
+
++static int joycon_init(struct hid_device *hdev)
++{
++ struct joycon_ctlr *ctlr = hid_get_drvdata(hdev);
++ int ret = 0;
++
++ mutex_lock(&ctlr->output_mutex);
++ /* if handshake command fails, assume ble pro controller */
++ if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) &&
++ !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) {
++ hid_dbg(hdev, "detected USB controller\n");
++ /* set baudrate for improved latency */
++ ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ);
++ if (ret) {
++ hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret);
++ goto out_unlock;
++ }
++ /* handshake */
++ ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ);
++ if (ret) {
++ hid_err(hdev, "Failed handshake; ret=%d\n", ret);
++ goto out_unlock;
++ }
++ /*
++ * Set no timeout (to keep controller in USB mode).
++ * This doesn't send a response, so ignore the timeout.
++ */
++ joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10);
++ } else if (jc_type_is_chrggrip(ctlr)) {
++ hid_err(hdev, "Failed charging grip handshake\n");
++ ret = -ETIMEDOUT;
++ goto out_unlock;
++ }
++
++ /* get controller calibration data, and parse it */
++ ret = joycon_request_calibration(ctlr);
++ if (ret) {
++ /*
++ * We can function with default calibration, but it may be
++ * inaccurate. Provide a warning, and continue on.
++ */
++ hid_warn(hdev, "Analog stick positions may be inaccurate\n");
++ }
++
++ /* get IMU calibration data, and parse it */
++ ret = joycon_request_imu_calibration(ctlr);
++ if (ret) {
++ /*
++ * We can function with default calibration, but it may be
++ * inaccurate. Provide a warning, and continue on.
++ */
++ hid_warn(hdev, "Unable to read IMU calibration data\n");
++ }
++
++ /* Set the reporting mode to 0x30, which is the full report mode */
++ ret = joycon_set_report_mode(ctlr);
++ if (ret) {
++ hid_err(hdev, "Failed to set report mode; ret=%d\n", ret);
++ goto out_unlock;
++ }
++
++ /* Enable rumble */
++ ret = joycon_enable_rumble(ctlr);
++ if (ret) {
++ hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret);
++ goto out_unlock;
++ }
++
++ /* Enable the IMU */
++ ret = joycon_enable_imu(ctlr);
++ if (ret) {
++ hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret);
++ goto out_unlock;
++ }
++
++out_unlock:
++ mutex_unlock(&ctlr->output_mutex);
++ return ret;
++}
++
+ /* Common handler for parsing inputs */
+ static int joycon_ctlr_read_handler(struct joycon_ctlr *ctlr, u8 *data,
+ int size)
+@@ -2171,85 +2252,19 @@ static int nintendo_hid_probe(struct hid_device *hdev,
+
+ hid_device_io_start(hdev);
+
+- /* Initialize the controller */
+- mutex_lock(&ctlr->output_mutex);
+- /* if handshake command fails, assume ble pro controller */
+- if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) &&
+- !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) {
+- hid_dbg(hdev, "detected USB controller\n");
+- /* set baudrate for improved latency */
+- ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ);
+- if (ret) {
+- hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret);
+- goto err_mutex;
+- }
+- /* handshake */
+- ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ);
+- if (ret) {
+- hid_err(hdev, "Failed handshake; ret=%d\n", ret);
+- goto err_mutex;
+- }
+- /*
+- * Set no timeout (to keep controller in USB mode).
+- * This doesn't send a response, so ignore the timeout.
+- */
+- joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10);
+- } else if (jc_type_is_chrggrip(ctlr)) {
+- hid_err(hdev, "Failed charging grip handshake\n");
+- ret = -ETIMEDOUT;
+- goto err_mutex;
+- }
+-
+- /* get controller calibration data, and parse it */
+- ret = joycon_request_calibration(ctlr);
++ ret = joycon_init(hdev);
+ if (ret) {
+- /*
+- * We can function with default calibration, but it may be
+- * inaccurate. Provide a warning, and continue on.
+- */
+- hid_warn(hdev, "Analog stick positions may be inaccurate\n");
+- }
+-
+- /* get IMU calibration data, and parse it */
+- ret = joycon_request_imu_calibration(ctlr);
+- if (ret) {
+- /*
+- * We can function with default calibration, but it may be
+- * inaccurate. Provide a warning, and continue on.
+- */
+- hid_warn(hdev, "Unable to read IMU calibration data\n");
+- }
+-
+- /* Set the reporting mode to 0x30, which is the full report mode */
+- ret = joycon_set_report_mode(ctlr);
+- if (ret) {
+- hid_err(hdev, "Failed to set report mode; ret=%d\n", ret);
+- goto err_mutex;
+- }
+-
+- /* Enable rumble */
+- ret = joycon_enable_rumble(ctlr);
+- if (ret) {
+- hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret);
+- goto err_mutex;
+- }
+-
+- /* Enable the IMU */
+- ret = joycon_enable_imu(ctlr);
+- if (ret) {
+- hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret);
+- goto err_mutex;
++ hid_err(hdev, "Failed to initialize controller; ret=%d\n", ret);
++ goto err_close;
+ }
+
+ ret = joycon_read_info(ctlr);
+ if (ret) {
+ hid_err(hdev, "Failed to retrieve controller info; ret=%d\n",
+ ret);
+- goto err_mutex;
++ goto err_close;
+ }
+
+- mutex_unlock(&ctlr->output_mutex);
+-
+ /* Initialize the leds */
+ ret = joycon_leds_create(ctlr);
+ if (ret) {
+@@ -2275,8 +2290,6 @@ static int nintendo_hid_probe(struct hid_device *hdev,
+ hid_dbg(hdev, "probe - success\n");
+ return 0;
+
+-err_mutex:
+- mutex_unlock(&ctlr->output_mutex);
+ err_close:
+ hid_hw_close(hdev);
+ err_stop:
+@@ -2306,6 +2319,20 @@ static void nintendo_hid_remove(struct hid_device *hdev)
+ hid_hw_stop(hdev);
+ }
+
++#ifdef CONFIG_PM
++
++static int nintendo_hid_resume(struct hid_device *hdev)
++{
++ int ret = joycon_init(hdev);
++
++ if (ret)
++ hid_err(hdev, "Failed to restore controller after resume");
++
++ return ret;
++}
++
++#endif
++
+ static const struct hid_device_id nintendo_hid_devices[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO,
+ USB_DEVICE_ID_NINTENDO_PROCON) },
+@@ -2327,6 +2354,10 @@ static struct hid_driver nintendo_hid_driver = {
+ .probe = nintendo_hid_probe,
+ .remove = nintendo_hid_remove,
+ .raw_event = nintendo_hid_event,
++
++#ifdef CONFIG_PM
++ .resume = nintendo_hid_resume,
++#endif
+ };
+ module_hid_driver(nintendo_hid_driver);
+
+--
+2.40.1
+
--- /dev/null
+From 5091c9bc533da515742df5c19c2336c1e6d9ef20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Sep 2023 23:54:06 +0200
+Subject: i2c: mux: Avoid potential false error message in i2c_mux_add_adapter
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit b13e59e74ff71a1004e0508107e91e9a84fd7388 ]
+
+I2C_CLASS_DEPRECATED is a flag and not an actual class.
+There's nothing speaking against both, parent and child, having
+I2C_CLASS_DEPRECATED set. Therefore exclude it from the check.
+
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Acked-by: Peter Rosin <peda@axentia.se>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/i2c-mux.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
+index 313904be5f3bd..57ff09f18c371 100644
+--- a/drivers/i2c/i2c-mux.c
++++ b/drivers/i2c/i2c-mux.c
+@@ -341,7 +341,7 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
+ priv->adap.lock_ops = &i2c_parent_lock_ops;
+
+ /* Sanity check on class */
+- if (i2c_mux_parent_classes(parent) & class)
++ if (i2c_mux_parent_classes(parent) & class & ~I2C_CLASS_DEPRECATED)
+ dev_err(&parent->dev,
+ "Segment %d behind mux can't share classes with ancestors\n",
+ chan_id);
+--
+2.40.1
+
--- /dev/null
+From 6fe87666925af7b0dc7c3cde280e08fac6bee892 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jan 2023 17:46:09 -0600
+Subject: ice: Remove redundant pci_enable_pcie_error_reporting()
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+[ Upstream commit ba153552c18d7eb839ec0bad7d7484e29ba4719c ]
+
+pci_enable_pcie_error_reporting() enables the device to send ERR_*
+Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is
+native"), the PCI core does this for all devices during enumeration.
+
+Remove the redundant pci_enable_pcie_error_reporting() call from the
+driver. Also remove the corresponding pci_disable_pcie_error_reporting()
+from the driver .remove() path.
+
+Note that this doesn't control interrupt generation by the Root Port; that
+is controlled by the AER Root Error Command register, which is managed by
+the AER service driver.
+
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
+Cc: intel-wired-lan@lists.osuosl.org
+Cc: netdev@vger.kernel.org
+Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: 0288c3e709e5 ("ice: reset first in crash dump kernels")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index ae733207d0116..f0f39364819ac 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -4723,7 +4723,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
+ return err;
+ }
+
+- pci_enable_pcie_error_reporting(pdev);
+ pci_set_master(pdev);
+
+ pf->pdev = pdev;
+@@ -5016,7 +5015,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
+ ice_devlink_destroy_regions(pf);
+ ice_deinit_hw(hw);
+ err_exit_unroll:
+- pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+ return err;
+ }
+@@ -5142,7 +5140,6 @@ static void ice_remove(struct pci_dev *pdev)
+ ice_reset(&pf->hw, ICE_RESET_PFR);
+ pci_wait_for_pending_transaction(pdev);
+ ice_clear_interrupt_scheme(pf);
+- pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 9f3888b3485fe949bf2cb1e0d70fa35a43f27c2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Sep 2023 18:21:48 +0300
+Subject: iio: adc: ad7192: Correct reference voltage
+
+From: Alisa-Dariana Roman <alisa.roman@analog.com>
+
+[ Upstream commit 7e7dcab620cd6d34939f615cac63fc0ef7e81c72 ]
+
+The avdd and the reference voltage are two different sources but the
+reference voltage was assigned according to the avdd supply.
+
+Add vref regulator structure and set the reference voltage according to
+the vref supply from the devicetree.
+
+In case vref supply is missing, reference voltage is set according to
+the avdd supply for compatibility with old devicetrees.
+
+Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging")
+Signed-off-by: Alisa-Dariana Roman <alisa.roman@analog.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230924152149.41884-1-alisadariana@gmail.com
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iio/adc/ad7192.c | 29 +++++++++++++++++++++++++----
+ 1 file changed, 25 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
+index 18520f7bedccd..faf680140c178 100644
+--- a/drivers/iio/adc/ad7192.c
++++ b/drivers/iio/adc/ad7192.c
+@@ -177,6 +177,7 @@ struct ad7192_chip_info {
+ struct ad7192_state {
+ const struct ad7192_chip_info *chip_info;
+ struct regulator *avdd;
++ struct regulator *vref;
+ struct clk *mclk;
+ u16 int_vref_mv;
+ u32 fclk;
+@@ -1014,10 +1015,30 @@ static int ad7192_probe(struct spi_device *spi)
+ if (ret)
+ return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n");
+
+- ret = regulator_get_voltage(st->avdd);
+- if (ret < 0) {
+- dev_err(&spi->dev, "Device tree error, reference voltage undefined\n");
+- return ret;
++ st->vref = devm_regulator_get_optional(&spi->dev, "vref");
++ if (IS_ERR(st->vref)) {
++ if (PTR_ERR(st->vref) != -ENODEV)
++ return PTR_ERR(st->vref);
++
++ ret = regulator_get_voltage(st->avdd);
++ if (ret < 0)
++ return dev_err_probe(&spi->dev, ret,
++ "Device tree error, AVdd voltage undefined\n");
++ } else {
++ ret = regulator_enable(st->vref);
++ if (ret) {
++ dev_err(&spi->dev, "Failed to enable specified Vref supply\n");
++ return ret;
++ }
++
++ ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->vref);
++ if (ret)
++ return ret;
++
++ ret = regulator_get_voltage(st->vref);
++ if (ret < 0)
++ return dev_err_probe(&spi->dev, ret,
++ "Device tree error, Vref voltage undefined\n");
+ }
+ st->int_vref_mv = ret / 1000;
+
+--
+2.40.1
+
--- /dev/null
+From 981805aa2d0ad20cd7b263fbacb2e5ed43cba2fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Aug 2022 22:19:01 +0300
+Subject: iio: adc: ad7192: Simplify using devm_regulator_get_enable()
+
+From: Matti Vaittinen <mazziesaccount@gmail.com>
+
+[ Upstream commit 1ccef2e6e9205e209ad958d2e591bcca60981007 ]
+
+Use devm_regulator_get_enable() instead of open coded get, enable,
+add-action-to-disable-at-detach - pattern. Also drop the seemingly unused
+struct member 'dvdd'.
+
+Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
+Link: https://lore.kernel.org/r/9719c445c095d3d308e2fc9f4f93294f5806c41c.1660934107.git.mazziesaccount@gmail.com
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Stable-dep-of: 7e7dcab620cd ("iio: adc: ad7192: Correct reference voltage")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iio/adc/ad7192.c | 15 ++-------------
+ 1 file changed, 2 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
+index 80eff7090f14a..18520f7bedccd 100644
+--- a/drivers/iio/adc/ad7192.c
++++ b/drivers/iio/adc/ad7192.c
+@@ -177,7 +177,6 @@ struct ad7192_chip_info {
+ struct ad7192_state {
+ const struct ad7192_chip_info *chip_info;
+ struct regulator *avdd;
+- struct regulator *dvdd;
+ struct clk *mclk;
+ u16 int_vref_mv;
+ u32 fclk;
+@@ -1011,19 +1010,9 @@ static int ad7192_probe(struct spi_device *spi)
+ if (ret)
+ return ret;
+
+- st->dvdd = devm_regulator_get(&spi->dev, "dvdd");
+- if (IS_ERR(st->dvdd))
+- return PTR_ERR(st->dvdd);
+-
+- ret = regulator_enable(st->dvdd);
+- if (ret) {
+- dev_err(&spi->dev, "Failed to enable specified DVdd supply\n");
+- return ret;
+- }
+-
+- ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->dvdd);
++ ret = devm_regulator_get_enable(&spi->dev, "dvdd");
+ if (ret)
+- return ret;
++ return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n");
+
+ ret = regulator_get_voltage(st->avdd);
+ if (ret < 0) {
+--
+2.40.1
+
--- /dev/null
+From 40a405b9b7ca7e7ff4e1d82a39565997cb8ccb31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Oct 2022 17:16:17 +0200
+Subject: iio: core: introduce iio_device_{claim|release}_buffer_mode() APIs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Nuno Sá <nuno.sa@analog.com>
+
+[ Upstream commit 0a8565425afd8ba0e1a0ea73e21da119ee6dacea ]
+
+These APIs are analogous to iio_device_claim_direct_mode() and
+iio_device_release_direct_mode() but, as the name suggests, with the
+logic flipped. While this looks odd enough, it will have at least two
+users (in following changes) and it will be important to move the IIO
+mlock to the private struct.
+
+Signed-off-by: Nuno Sá <nuno.sa@analog.com>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/20221012151620.1725215-2-nuno.sa@analog.com
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Stable-dep-of: 7771c8c80d62 ("iio: cros_ec: fix an use-after-free in cros_ec_sensors_push_data()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iio/industrialio-core.c | 38 +++++++++++++++++++++++++++++++++
+ include/linux/iio/iio.h | 2 ++
+ 2 files changed, 40 insertions(+)
+
+diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
+index f3f8392623a46..c9614982cb671 100644
+--- a/drivers/iio/industrialio-core.c
++++ b/drivers/iio/industrialio-core.c
+@@ -2084,6 +2084,44 @@ void iio_device_release_direct_mode(struct iio_dev *indio_dev)
+ }
+ EXPORT_SYMBOL_GPL(iio_device_release_direct_mode);
+
++/**
++ * iio_device_claim_buffer_mode - Keep device in buffer mode
++ * @indio_dev: the iio_dev associated with the device
++ *
++ * If the device is in buffer mode it is guaranteed to stay
++ * that way until iio_device_release_buffer_mode() is called.
++ *
++ * Use with iio_device_release_buffer_mode().
++ *
++ * Returns: 0 on success, -EBUSY on failure.
++ */
++int iio_device_claim_buffer_mode(struct iio_dev *indio_dev)
++{
++ mutex_lock(&indio_dev->mlock);
++
++ if (iio_buffer_enabled(indio_dev))
++ return 0;
++
++ mutex_unlock(&indio_dev->mlock);
++ return -EBUSY;
++}
++EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode);
++
++/**
++ * iio_device_release_buffer_mode - releases claim on buffer mode
++ * @indio_dev: the iio_dev associated with the device
++ *
++ * Release the claim. Device is no longer guaranteed to stay
++ * in buffer mode.
++ *
++ * Use with iio_device_claim_buffer_mode().
++ */
++void iio_device_release_buffer_mode(struct iio_dev *indio_dev)
++{
++ mutex_unlock(&indio_dev->mlock);
++}
++EXPORT_SYMBOL_GPL(iio_device_release_buffer_mode);
++
+ /**
+ * iio_device_get_current_mode() - helper function providing read-only access to
+ * the opaque @currentmode variable
+diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
+index f0ec8a5e5a7a9..9d3bd6379eb87 100644
+--- a/include/linux/iio/iio.h
++++ b/include/linux/iio/iio.h
+@@ -629,6 +629,8 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev,
+ int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp);
+ int iio_device_claim_direct_mode(struct iio_dev *indio_dev);
+ void iio_device_release_direct_mode(struct iio_dev *indio_dev);
++int iio_device_claim_buffer_mode(struct iio_dev *indio_dev);
++void iio_device_release_buffer_mode(struct iio_dev *indio_dev);
+
+ extern struct bus_type iio_bus_type;
+
+--
+2.40.1
+
--- /dev/null
+From 0de7a380026dc67c494104cd8e64a70ae58efa9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Aug 2023 11:06:22 +0800
+Subject: iio: cros_ec: fix an use-after-free in cros_ec_sensors_push_data()
+
+From: Tzung-Bi Shih <tzungbi@kernel.org>
+
+[ Upstream commit 7771c8c80d62ad065637ef74ed2962983f6c5f6d ]
+
+cros_ec_sensors_push_data() reads `indio_dev->active_scan_mask` and
+calls iio_push_to_buffers_with_timestamp() without making sure the
+`indio_dev` stays in buffer mode. There is a race if `indio_dev` exits
+buffer mode right before cros_ec_sensors_push_data() accesses them.
+
+An use-after-free on `indio_dev->active_scan_mask` was observed. The
+call trace:
+[...]
+ _find_next_bit
+ cros_ec_sensors_push_data
+ cros_ec_sensorhub_event
+ blocking_notifier_call_chain
+ cros_ec_irq_thread
+
+It was caused by a race condition: one thread just freed
+`active_scan_mask` at [1]; while another thread tried to access the
+memory at [2].
+
+Fix it by calling iio_device_claim_buffer_mode() to ensure the
+`indio_dev` can't exit buffer mode during cros_ec_sensors_push_data().
+
+[1]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/industrialio-buffer.c#L1189
+[2]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c#L198
+
+Cc: stable@vger.kernel.org
+Fixes: aa984f1ba4a4 ("iio: cros_ec: Register to cros_ec_sensorhub when EC supports FIFO")
+Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
+Reviewed-by: Guenter Roeck <groeck@chromium.org>
+Reviewed-by: Stephen Boyd <swboyd@chromium.org>
+Link: https://lore.kernel.org/r/20230829030622.1571852-1-tzungbi@kernel.org
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+index d98f7e4d202c1..1ddce991fb3f4 100644
+--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
++++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+@@ -190,8 +190,11 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev,
+ /*
+ * Ignore samples if the buffer is not set: it is needed if the ODR is
+ * set but the buffer is not enabled yet.
++ *
++ * Note: iio_device_claim_buffer_mode() returns -EBUSY if the buffer
++ * is not enabled.
+ */
+- if (!iio_buffer_enabled(indio_dev))
++ if (iio_device_claim_buffer_mode(indio_dev) < 0)
+ return 0;
+
+ out = (s16 *)st->samples;
+@@ -210,6 +213,7 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev,
+ iio_push_to_buffers_with_timestamp(indio_dev, st->samples,
+ timestamp + delta);
+
++ iio_device_release_buffer_mode(indio_dev);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(cros_ec_sensors_push_data);
+--
+2.40.1
+
--- /dev/null
+From d93415c3d75f0be620adc7f1c8a295a640378ca8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 15:55:08 +0800
+Subject: ipv4/fib: send notify when delete source address routes
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 4b2b606075e50cdae62ab2356b0a1e206947c354 ]
+
+After deleting an interface address in fib_del_ifaddr(), the function
+scans the fib_info list for stray entries and calls fib_flush() and
+fib_table_flush(). Then the stray entries will be deleted silently and no
+RTM_DELROUTE notification will be sent.
+
+This lack of notification can make routing daemons, or monitor like
+`ip monitor route` miss the routing changes. e.g.
+
++ ip link add dummy1 type dummy
++ ip link add dummy2 type dummy
++ ip link set dummy1 up
++ ip link set dummy2 up
++ ip addr add 192.168.5.5/24 dev dummy1
++ ip route add 7.7.7.0/24 dev dummy2 src 192.168.5.5
++ ip -4 route
+7.7.7.0/24 dev dummy2 scope link src 192.168.5.5
+192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5
++ ip monitor route
++ ip addr del 192.168.5.5/24 dev dummy1
+Deleted 192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5
+Deleted broadcast 192.168.5.255 dev dummy1 table local proto kernel scope link src 192.168.5.5
+Deleted local 192.168.5.5 dev dummy1 table local proto kernel scope host src 192.168.5.5
+
+As Ido reminded, fib_table_flush() isn't only called when an address is
+deleted, but also when an interface is deleted or put down. The lack of
+notification in these cases is deliberate. And commit 7c6bb7d2faaf
+("net/ipv6: Add knob to skip DELROUTE message on device down") introduced
+a sysctl to make IPv6 behave like IPv4 in this regard. So we can't send
+the route delete notify blindly in fib_table_flush().
+
+To fix this issue, let's add a new flag in "struct fib_info" to track the
+deleted prefer source address routes, and only send notify for them.
+
+After update:
++ ip monitor route
++ ip addr del 192.168.5.5/24 dev dummy1
+Deleted 192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5
+Deleted broadcast 192.168.5.255 dev dummy1 table local proto kernel scope link src 192.168.5.5
+Deleted local 192.168.5.5 dev dummy1 table local proto kernel scope host src 192.168.5.5
+Deleted 7.7.7.0/24 dev dummy2 scope link src 192.168.5.5
+
+Suggested-by: Thomas Haller <thaller@redhat.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230922075508.848925-1-liuhangbin@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip_fib.h | 1 +
+ net/ipv4/fib_semantics.c | 1 +
+ net/ipv4/fib_trie.c | 4 ++++
+ 3 files changed, 6 insertions(+)
+
+diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
+index f0c13864180e2..15de07d365405 100644
+--- a/include/net/ip_fib.h
++++ b/include/net/ip_fib.h
+@@ -154,6 +154,7 @@ struct fib_info {
+ int fib_nhs;
+ bool fib_nh_is_v6;
+ bool nh_updated;
++ bool pfsrc_removed;
+ struct nexthop *nh;
+ struct rcu_head rcu;
+ struct fib_nh fib_nh[];
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index 894d8ac6b9d0e..5eb1b8d302bbd 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1891,6 +1891,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
+ continue;
+ if (fi->fib_prefsrc == local) {
+ fi->fib_flags |= RTNH_F_DEAD;
++ fi->pfsrc_removed = true;
+ ret++;
+ }
+ }
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index d13fb9e76b971..9bdfdab906fe0 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -2027,6 +2027,7 @@ void fib_table_flush_external(struct fib_table *tb)
+ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
+ {
+ struct trie *t = (struct trie *)tb->tb_data;
++ struct nl_info info = { .nl_net = net };
+ struct key_vector *pn = t->kv;
+ unsigned long cindex = 1;
+ struct hlist_node *tmp;
+@@ -2089,6 +2090,9 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
+
+ fib_notify_alias_delete(net, n->key, &n->leaf, fa,
+ NULL);
++ if (fi->pfsrc_removed)
++ rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa,
++ KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0);
+ hlist_del_rcu(&fa->fa_list);
+ fib_release_info(fa->fa_info);
+ alias_free_mem_rcu(fa);
+--
+2.40.1
+
--- /dev/null
+From fd9da306627bdf1e29c96bd84e2ad5ed5772797b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 17:01:59 +0100
+Subject: net: devlink: convert devlink port type-specific pointers to union
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 3830c5719af66fac9849cf5fb04b03d4e4bb46ff ]
+
+Instead of storing type_dev as a void pointer, convert it to union and
+use it to store either struct net_device or struct ib_device pointer.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/devlink.h | 13 ++++++++++---
+ net/devlink/leftover.c | 17 +++++++++++++----
+ 2 files changed, 23 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/devlink.h b/include/net/devlink.h
+index ba6b8b0949432..6c55aabaedf19 100644
+--- a/include/net/devlink.h
++++ b/include/net/devlink.h
+@@ -121,12 +121,19 @@ struct devlink_port {
+ struct list_head region_list;
+ struct devlink *devlink;
+ unsigned int index;
+- spinlock_t type_lock; /* Protects type and type_dev
+- * pointer consistency.
++ spinlock_t type_lock; /* Protects type and type_eth/ib
++ * structures consistency.
+ */
+ enum devlink_port_type type;
+ enum devlink_port_type desired_type;
+- void *type_dev;
++ union {
++ struct {
++ struct net_device *netdev;
++ } type_eth;
++ struct {
++ struct ib_device *ibdev;
++ } type_ib;
++ };
+ struct devlink_port_attrs attrs;
+ u8 attrs_set:1,
+ switch_port:1,
+diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
+index 032c7af065cd9..6fee4ce6724b7 100644
+--- a/net/devlink/leftover.c
++++ b/net/devlink/leftover.c
+@@ -1303,7 +1303,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
+ goto nla_put_failure_type_locked;
+ if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+ struct net *net = devlink_net(devlink_port->devlink);
+- struct net_device *netdev = devlink_port->type_dev;
++ struct net_device *netdev = devlink_port->type_eth.netdev;
+
+ if (netdev && net_eq(net, dev_net(netdev)) &&
+ (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
+@@ -1313,7 +1313,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
+ goto nla_put_failure_type_locked;
+ }
+ if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
+- struct ib_device *ibdev = devlink_port->type_dev;
++ struct ib_device *ibdev = devlink_port->type_ib.ibdev;
+
+ if (ibdev &&
+ nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
+@@ -10012,7 +10012,16 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
+ devlink_port_type_warn_cancel(devlink_port);
+ spin_lock_bh(&devlink_port->type_lock);
+ devlink_port->type = type;
+- devlink_port->type_dev = type_dev;
++ switch (type) {
++ case DEVLINK_PORT_TYPE_ETH:
++ devlink_port->type_eth.netdev = type_dev;
++ break;
++ case DEVLINK_PORT_TYPE_IB:
++ devlink_port->type_ib.ibdev = type_dev;
++ break;
++ default:
++ break;
++ }
+ spin_unlock_bh(&devlink_port->type_lock);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ }
+@@ -12027,7 +12036,7 @@ devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
+
+ spin_lock(&in_devlink_port->type_lock);
+ if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+- metadata->input_dev = in_devlink_port->type_dev;
++ metadata->input_dev = in_devlink_port->type_eth.netdev;
+ spin_unlock(&in_devlink_port->type_lock);
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 879f3378cd4d13d9048f7fc13aa5ec49504819f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 17:02:01 +0100
+Subject: net: devlink: move port_type_netdev_checks() call to
+ __devlink_port_type_set()
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 45791e0d00c445936bb19535fe847083b1edd26d ]
+
+As __devlink_port_type_set() is going to be called directly from netdevice
+notifier event handle in one of the follow-up patches, move the
+port_type_netdev_checks() call there.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/devlink/leftover.c | 63 ++++++++++++++++++++++--------------------
+ 1 file changed, 33 insertions(+), 30 deletions(-)
+
+diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
+index 53dde50c5d6e2..e06fe0fad5d7d 100644
+--- a/net/devlink/leftover.c
++++ b/net/devlink/leftover.c
+@@ -10003,33 +10003,6 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
+ }
+ EXPORT_SYMBOL_GPL(devlink_port_unregister);
+
+-static void __devlink_port_type_set(struct devlink_port *devlink_port,
+- enum devlink_port_type type,
+- void *type_dev)
+-{
+- ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+-
+- if (type == DEVLINK_PORT_TYPE_NOTSET)
+- devlink_port_type_warn_schedule(devlink_port);
+- else
+- devlink_port_type_warn_cancel(devlink_port);
+-
+- spin_lock_bh(&devlink_port->type_lock);
+- devlink_port->type = type;
+- switch (type) {
+- case DEVLINK_PORT_TYPE_ETH:
+- devlink_port->type_eth.netdev = type_dev;
+- break;
+- case DEVLINK_PORT_TYPE_IB:
+- devlink_port->type_ib.ibdev = type_dev;
+- break;
+- default:
+- break;
+- }
+- spin_unlock_bh(&devlink_port->type_lock);
+- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+-}
+-
+ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+ struct net_device *netdev)
+ {
+@@ -10067,6 +10040,38 @@ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+ }
+ }
+
++static void __devlink_port_type_set(struct devlink_port *devlink_port,
++ enum devlink_port_type type,
++ void *type_dev)
++{
++ struct net_device *netdev = type_dev;
++
++ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
++
++ if (type == DEVLINK_PORT_TYPE_NOTSET) {
++ devlink_port_type_warn_schedule(devlink_port);
++ } else {
++ devlink_port_type_warn_cancel(devlink_port);
++ if (type == DEVLINK_PORT_TYPE_ETH && netdev)
++ devlink_port_type_netdev_checks(devlink_port, netdev);
++ }
++
++ spin_lock_bh(&devlink_port->type_lock);
++ devlink_port->type = type;
++ switch (type) {
++ case DEVLINK_PORT_TYPE_ETH:
++ devlink_port->type_eth.netdev = netdev;
++ break;
++ case DEVLINK_PORT_TYPE_IB:
++ devlink_port->type_ib.ibdev = type_dev;
++ break;
++ default:
++ break;
++ }
++ spin_unlock_bh(&devlink_port->type_lock);
++ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
++}
++
+ /**
+ * devlink_port_type_eth_set - Set port type to Ethernet
+ *
+@@ -10076,9 +10081,7 @@ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+ void devlink_port_type_eth_set(struct devlink_port *devlink_port,
+ struct net_device *netdev)
+ {
+- if (netdev)
+- devlink_port_type_netdev_checks(devlink_port, netdev);
+- else
++ if (!netdev)
+ dev_warn(devlink_port->devlink->dev,
+ "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
+ devlink_port->index);
+--
+2.40.1
+
--- /dev/null
+From fcd7c13e3f519e3effc607cab19cf65613c7f7bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 17:02:00 +0100
+Subject: net: devlink: move port_type_warn_schedule() call to
+ __devlink_port_type_set()
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 8573a04404ddacb2d966eef09bf38b2ad6dbe86f ]
+
+As __devlink_port_type_set() is going to be called directly from netdevice
+notifier event handle in one of the follow-up patches, move the
+port_type_warn_schedule() call there.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/devlink/leftover.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
+index 6fee4ce6724b7..53dde50c5d6e2 100644
+--- a/net/devlink/leftover.c
++++ b/net/devlink/leftover.c
+@@ -10009,7 +10009,11 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
+ {
+ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
+- devlink_port_type_warn_cancel(devlink_port);
++ if (type == DEVLINK_PORT_TYPE_NOTSET)
++ devlink_port_type_warn_schedule(devlink_port);
++ else
++ devlink_port_type_warn_cancel(devlink_port);
++
+ spin_lock_bh(&devlink_port->type_lock);
+ devlink_port->type = type;
+ switch (type) {
+@@ -10104,7 +10108,6 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
+ void devlink_port_type_clear(struct devlink_port *devlink_port)
+ {
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
+- devlink_port_type_warn_schedule(devlink_port);
+ }
+ EXPORT_SYMBOL_GPL(devlink_port_type_clear);
+
+--
+2.40.1
+
--- /dev/null
+From 4dce0f9fdd80044edf6ad75b9906aab98d573553 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 17:02:02 +0100
+Subject: net: devlink: take RTNL in port_fill() function only if it is not
+ held
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit d41c9dbd12745cfc1cb2946cd99016d83c2c5364 ]
+
+Follow-up patch is going to introduce a netdevice notifier event
+processing which is called with RTNL mutex held. Processing of this will
+eventually lead to call to port_notity() and port_fill() which currently
+takes RTNL mutex internally. So as a temporary solution, propagate a
+bool indicating if the mutex is already held. This will go away in one
+of the follow-up patches.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/devlink/leftover.c | 46 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 31 insertions(+), 15 deletions(-)
+
+diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
+index e06fe0fad5d7d..b077acc255890 100644
+--- a/net/devlink/leftover.c
++++ b/net/devlink/leftover.c
+@@ -1278,7 +1278,8 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
+ static int devlink_nl_port_fill(struct sk_buff *msg,
+ struct devlink_port *devlink_port,
+ enum devlink_command cmd, u32 portid, u32 seq,
+- int flags, struct netlink_ext_ack *extack)
++ int flags, struct netlink_ext_ack *extack,
++ bool rtnl_held)
+ {
+ struct devlink *devlink = devlink_port->devlink;
+ void *hdr;
+@@ -1293,7 +1294,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
+ goto nla_put_failure;
+
+ /* Hold rtnl lock while accessing port's netdev attributes. */
+- rtnl_lock();
++ if (!rtnl_held)
++ rtnl_lock();
+ spin_lock_bh(&devlink_port->type_lock);
+ if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
+ goto nla_put_failure_type_locked;
+@@ -1321,7 +1323,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
+ goto nla_put_failure_type_locked;
+ }
+ spin_unlock_bh(&devlink_port->type_lock);
+- rtnl_unlock();
++ if (!rtnl_held)
++ rtnl_unlock();
+ if (devlink_nl_port_attrs_put(msg, devlink_port))
+ goto nla_put_failure;
+ if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+@@ -1336,14 +1339,15 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
+
+ nla_put_failure_type_locked:
+ spin_unlock_bh(&devlink_port->type_lock);
+- rtnl_unlock();
++ if (!rtnl_held)
++ rtnl_unlock();
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+-static void devlink_port_notify(struct devlink_port *devlink_port,
+- enum devlink_command cmd)
++static void __devlink_port_notify(struct devlink_port *devlink_port,
++ enum devlink_command cmd, bool rtnl_held)
+ {
+ struct devlink *devlink = devlink_port->devlink;
+ struct sk_buff *msg;
+@@ -1358,7 +1362,8 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
+ if (!msg)
+ return;
+
+- err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
++ err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL,
++ rtnl_held);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+@@ -1368,6 +1373,12 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ }
+
++static void devlink_port_notify(struct devlink_port *devlink_port,
++ enum devlink_command cmd)
++{
++ __devlink_port_notify(devlink_port, cmd, false);
++}
++
+ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+ enum devlink_command cmd)
+ {
+@@ -1534,7 +1545,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
+
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
+ info->snd_portid, info->snd_seq, 0,
+- info->extack);
++ info->extack, false);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+@@ -1564,7 +1575,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
+ DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+- NLM_F_MULTI, cb->extack);
++ NLM_F_MULTI, cb->extack,
++ false);
+ if (err) {
+ devl_unlock(devlink);
+ devlink_put(devlink);
+@@ -1776,7 +1788,8 @@ static int devlink_port_new_notify(struct devlink *devlink,
+ }
+
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+- info->snd_portid, info->snd_seq, 0, NULL);
++ info->snd_portid, info->snd_seq, 0, NULL,
++ false);
+ if (err)
+ goto out;
+
+@@ -10042,7 +10055,7 @@ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+
+ static void __devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type type,
+- void *type_dev)
++ void *type_dev, bool rtnl_held)
+ {
+ struct net_device *netdev = type_dev;
+
+@@ -10069,7 +10082,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
+ break;
+ }
+ spin_unlock_bh(&devlink_port->type_lock);
+- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
++ __devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW, rtnl_held);
+ }
+
+ /**
+@@ -10086,7 +10099,8 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port,
+ "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
+ devlink_port->index);
+
+- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev);
++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev,
++ false);
+ }
+ EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
+
+@@ -10099,7 +10113,8 @@ EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
+ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
+ struct ib_device *ibdev)
+ {
+- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev,
++ false);
+ }
+ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
+
+@@ -10110,7 +10125,8 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
+ */
+ void devlink_port_type_clear(struct devlink_port *devlink_port)
+ {
+- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL,
++ false);
+ }
+ EXPORT_SYMBOL_GPL(devlink_port_type_clear);
+
+--
+2.40.1
+
--- /dev/null
+From d9a5b96d376231439213984dddf5b0b0ccccfc75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 17:02:03 +0100
+Subject: net: devlink: track netdev with devlink_port assigned
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 02a68a47eadedf95748facfca6ced31fb0181d52 ]
+
+Currently, ethernet drivers are using devlink_port_type_eth_set() and
+devlink_port_type_clear() to set devlink port type and link to related
+netdev.
+
+Instead of calling them directly, let the driver use
+SET_NETDEV_DEVLINK_PORT macro to assign devlink_port pointer and let
+devlink to track it. Note the devlink port pointer is static during
+the time netdevice is registered.
+
+In devlink code, use per-namespace netdev notifier to track
+the netdevices with devlink_port assigned and change the internal
+devlink_port type and related type pointer accordingly.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 19 ++++++++++
+ net/core/dev.c | 14 +++++---
+ net/devlink/leftover.c | 75 ++++++++++++++++++++++++++++++++++++---
+ 3 files changed, 99 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 5a04fbf724768..2b8646c39dcdd 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2011,6 +2011,11 @@ enum netdev_ml_priv_type {
+ * registered
+ * @offload_xstats_l3: L3 HW stats for this netdevice.
+ *
++ * @devlink_port: Pointer to related devlink port structure.
++ * Assigned by a driver before netdev registration using
++ * SET_NETDEV_DEVLINK_PORT macro. This pointer is static
++ * during the time netdevice is registered.
++ *
+ * FIXME: cleanup struct net_device such that network protocol info
+ * moves out.
+ */
+@@ -2361,9 +2366,22 @@ struct net_device {
+ netdevice_tracker watchdog_dev_tracker;
+ netdevice_tracker dev_registered_tracker;
+ struct rtnl_hw_stats64 *offload_xstats_l3;
++
++ struct devlink_port *devlink_port;
+ };
+ #define to_net_dev(d) container_of(d, struct net_device, dev)
+
++/*
++ * Driver should use this to assign devlink port instance to a netdevice
++ * before it registers the netdevice. Therefore devlink_port is static
++ * during the netdev lifetime after it is registered.
++ */
++#define SET_NETDEV_DEVLINK_PORT(dev, port) \
++({ \
++ WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED); \
++ ((dev)->devlink_port = (port)); \
++})
++
+ static inline bool netif_elide_gro(const struct net_device *dev)
+ {
+ if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
+@@ -2798,6 +2816,7 @@ enum netdev_cmd {
+ NETDEV_PRE_TYPE_CHANGE,
+ NETDEV_POST_TYPE_CHANGE,
+ NETDEV_POST_INIT,
++ NETDEV_PRE_UNINIT,
+ NETDEV_RELEASE,
+ NETDEV_NOTIFY_PEERS,
+ NETDEV_JOIN,
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 9cacd17feeaae..9bf10c9c4735a 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1637,10 +1637,10 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
+ N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
+ N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
+ N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
+- N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
+- N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
+- N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
+- N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
++ N(POST_INIT) N(PRE_UNINIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN)
++ N(CHANGEUPPER) N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA)
++ N(BONDING_INFO) N(PRECHANGEUPPER) N(CHANGELOWERSTATE)
++ N(UDP_TUNNEL_PUSH_INFO) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+ N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
+ N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
+@@ -10086,7 +10086,7 @@ int register_netdevice(struct net_device *dev)
+ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+ write_unlock(&dev_base_lock);
+ if (ret)
+- goto err_uninit;
++ goto err_uninit_notify;
+
+ __netdev_update_features(dev);
+
+@@ -10133,6 +10133,8 @@ int register_netdevice(struct net_device *dev)
+ out:
+ return ret;
+
++err_uninit_notify:
++ call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
+ err_uninit:
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
+@@ -10883,6 +10885,8 @@ void unregister_netdevice_many(struct list_head *head)
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
++ call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
++
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
+
+diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
+index b077acc255890..7ccfe69afd4b6 100644
+--- a/net/devlink/leftover.c
++++ b/net/devlink/leftover.c
+@@ -71,6 +71,7 @@ struct devlink {
+ refcount_t refcount;
+ struct completion comp;
+ struct rcu_head rcu;
++ struct notifier_block netdevice_nb;
+ char priv[] __aligned(NETDEV_ALIGN);
+ };
+
+@@ -9618,6 +9619,9 @@ void devlink_set_features(struct devlink *devlink, u64 features)
+ }
+ EXPORT_SYMBOL_GPL(devlink_set_features);
+
++static int devlink_netdevice_event(struct notifier_block *nb,
++ unsigned long event, void *ptr);
++
+ /**
+ * devlink_alloc_ns - Allocate new devlink instance resources
+ * in specific namespace
+@@ -9648,10 +9652,13 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+
+ ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
+ &last_id, GFP_KERNEL);
+- if (ret < 0) {
+- kfree(devlink);
+- return NULL;
+- }
++ if (ret < 0)
++ goto err_xa_alloc;
++
++ devlink->netdevice_nb.notifier_call = devlink_netdevice_event;
++ ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb);
++ if (ret)
++ goto err_register_netdevice_notifier;
+
+ devlink->dev = dev;
+ devlink->ops = ops;
+@@ -9678,6 +9685,12 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+ init_completion(&devlink->comp);
+
+ return devlink;
++
++err_register_netdevice_notifier:
++ xa_erase(&devlinks, devlink->index);
++err_xa_alloc:
++ kfree(devlink);
++ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(devlink_alloc_ns);
+
+@@ -9834,6 +9847,10 @@ void devlink_free(struct devlink *devlink)
+ WARN_ON(!list_empty(&devlink->port_list));
+
+ xa_destroy(&devlink->snapshot_ids);
++
++ unregister_netdevice_notifier_net(devlink_net(devlink),
++ &devlink->netdevice_nb);
++
+ xa_erase(&devlinks, devlink->index);
+
+ kfree(devlink);
+@@ -10130,6 +10147,56 @@ void devlink_port_type_clear(struct devlink_port *devlink_port)
+ }
+ EXPORT_SYMBOL_GPL(devlink_port_type_clear);
+
++static int devlink_netdevice_event(struct notifier_block *nb,
++ unsigned long event, void *ptr)
++{
++ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
++ struct devlink_port *devlink_port = netdev->devlink_port;
++ struct devlink *devlink;
++
++ devlink = container_of(nb, struct devlink, netdevice_nb);
++
++ if (!devlink_port || devlink_port->devlink != devlink)
++ return NOTIFY_OK;
++
++ switch (event) {
++ case NETDEV_POST_INIT:
++ /* Set the type but not netdev pointer. It is going to be set
++ * later on by NETDEV_REGISTER event. Happens once during
++ * netdevice register
++ */
++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
++ NULL, true);
++ break;
++ case NETDEV_REGISTER:
++ /* Set the netdev on top of previously set type. Note this
++ * event happens also during net namespace change so here
++ * we take into account netdev pointer appearing in this
++ * namespace.
++ */
++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
++ netdev, true);
++ break;
++ case NETDEV_UNREGISTER:
++ /* Clear netdev pointer, but not the type. This event happens
++ * also during net namespace change so we need to clear
++ * pointer to netdev that is going to another net namespace.
++ */
++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
++ NULL, true);
++ break;
++ case NETDEV_PRE_UNINIT:
++ /* Clear the type and the netdev pointer. Happens one during
++ * netdevice unregister.
++ */
++ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
++ NULL, true);
++ break;
++ }
++
++ return NOTIFY_OK;
++}
++
+ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
+ enum devlink_port_flavour flavour)
+ {
+--
+2.40.1
+
--- /dev/null
+From f4fdfd10202488104e6e484bd76fd1b5cd7c10c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Apr 2023 20:25:08 +0200
+Subject: net: dst: fix missing initialization of rt_uncached
+
+From: Maxime Bizon <mbizon@freebox.fr>
+
+[ Upstream commit 418a73074da9182f571e467eaded03ea501f3281 ]
+
+xfrm_alloc_dst() followed by xfrm4_dst_destroy(), without a
+xfrm4_fill_dst() call in between, causes the following BUG:
+
+ BUG: spinlock bad magic on CPU#0, fbxhostapd/732
+ lock: 0x890b7668, .magic: 890b7668, .owner: <none>/-1, .owner_cpu: 0
+ CPU: 0 PID: 732 Comm: fbxhostapd Not tainted 6.3.0-rc6-next-20230414-00613-ge8de66369925-dirty #9
+ Hardware name: Marvell Kirkwood (Flattened Device Tree)
+ unwind_backtrace from show_stack+0x10/0x14
+ show_stack from dump_stack_lvl+0x28/0x30
+ dump_stack_lvl from do_raw_spin_lock+0x20/0x80
+ do_raw_spin_lock from rt_del_uncached_list+0x30/0x64
+ rt_del_uncached_list from xfrm4_dst_destroy+0x3c/0xbc
+ xfrm4_dst_destroy from dst_destroy+0x5c/0xb0
+ dst_destroy from rcu_process_callbacks+0xc4/0xec
+ rcu_process_callbacks from __do_softirq+0xb4/0x22c
+ __do_softirq from call_with_stack+0x1c/0x24
+ call_with_stack from do_softirq+0x60/0x6c
+ do_softirq from __local_bh_enable_ip+0xa0/0xcc
+
+Patch "net: dst: Prevent false sharing vs. dst_entry:: __refcnt" moved
+rt_uncached and rt_uncached_list fields from rtable struct to dst
+struct, so they are more zeroed by memset_after(xdst, 0, u.dst) in
+xfrm_alloc_dst().
+
+Note that rt_uncached (list_head) was never properly initialized at
+alloc time, but xfrm[46]_dst_destroy() is written in such a way that
+it was not an issue thanks to the memset:
+
+ if (xdst->u.rt.dst.rt_uncached_list)
+ rt_del_uncached_list(&xdst->u.rt);
+
+The route code does it the other way around: rt_uncached_list is
+assumed to be valid IIF rt_uncached list_head is not empty:
+
+void rt_del_uncached_list(struct rtable *rt)
+{
+ if (!list_empty(&rt->dst.rt_uncached)) {
+ struct uncached_list *ul = rt->dst.rt_uncached_list;
+
+ spin_lock_bh(&ul->lock);
+ list_del_init(&rt->dst.rt_uncached);
+ spin_unlock_bh(&ul->lock);
+ }
+}
+
+This patch adds mandatory rt_uncached list_head initialization in
+generic dst_init(), and adapt xfrm[46]_dst_destroy logic to match the
+rest of the code.
+
+Fixes: d288a162dd1c ("net: dst: Prevent false sharing vs. dst_entry:: __refcnt")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Link: https://lore.kernel.org/oe-lkp/202304162125.18b7bcdd-oliver.sang@intel.com
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+CC: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
+Link: https://lore.kernel.org/r/20230420182508.2417582-1-mbizon@freebox.fr
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dst.c | 1 +
+ net/ipv4/route.c | 4 ----
+ net/ipv4/xfrm4_policy.c | 4 +---
+ net/ipv6/route.c | 1 -
+ net/ipv6/xfrm6_policy.c | 4 +---
+ 5 files changed, 3 insertions(+), 11 deletions(-)
+
+diff --git a/net/core/dst.c b/net/core/dst.c
+index 2b7b1619b5e29..1666a6f5e858e 100644
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -67,6 +67,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
+ #endif
+ dst->lwtstate = NULL;
+ rcuref_init(&dst->__rcuref, initial_ref);
++ INIT_LIST_HEAD(&dst->rt_uncached);
+ dst->__use = 0;
+ dst->lastuse = jiffies;
+ dst->flags = flags;
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 7ccf6503d67aa..a44d20644fbc2 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1646,7 +1646,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
+ rt->rt_uses_gateway = 0;
+ rt->rt_gw_family = 0;
+ rt->rt_gw4 = 0;
+- INIT_LIST_HEAD(&rt->dst.rt_uncached);
+
+ rt->dst.output = ip_output;
+ if (flags & RTCF_LOCAL)
+@@ -1677,7 +1676,6 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+ new_rt->rt_gw4 = rt->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ new_rt->rt_gw6 = rt->rt_gw6;
+- INIT_LIST_HEAD(&new_rt->dst.rt_uncached);
+
+ new_rt->dst.input = rt->dst.input;
+ new_rt->dst.output = rt->dst.output;
+@@ -2861,8 +2859,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
+ rt->rt_gw4 = ort->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ rt->rt_gw6 = ort->rt_gw6;
+-
+- INIT_LIST_HEAD(&rt->dst.rt_uncached);
+ }
+
+ dst_release(dst_orig);
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 47861c8b7340e..9403bbaf1b616 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -91,7 +91,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ xdst->u.rt.rt_gw6 = rt->rt_gw6;
+ xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+ xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
+- INIT_LIST_HEAD(&xdst->u.rt.dst.rt_uncached);
+ rt_add_uncached_list(&xdst->u.rt);
+
+ return 0;
+@@ -121,8 +120,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+ dst_destroy_metrics_generic(dst);
+- if (xdst->u.rt.dst.rt_uncached_list)
+- rt_del_uncached_list(&xdst->u.rt);
++ rt_del_uncached_list(&xdst->u.rt);
+ xfrm_dst_destroy(xdst);
+ }
+
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 9db0b2318e918..d4d06a9d985e8 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -334,7 +334,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
+ static void rt6_info_init(struct rt6_info *rt)
+ {
+ memset_after(rt, 0, dst);
+- INIT_LIST_HEAD(&rt->dst.rt_uncached);
+ }
+
+ /* allocate dst with ip6_dst_ops */
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index 2b493f8d00918..eecc5e59da17c 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -89,7 +89,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
+ xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
+ xdst->u.rt6.rt6i_src = rt->rt6i_src;
+- INIT_LIST_HEAD(&xdst->u.rt6.dst.rt_uncached);
+ rt6_uncached_list_add(&xdst->u.rt6);
+
+ return 0;
+@@ -121,8 +120,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
+ dst_destroy_metrics_generic(dst);
+- if (xdst->u.rt6.dst.rt_uncached_list)
+- rt6_uncached_list_del(&xdst->u.rt6);
++ rt6_uncached_list_del(&xdst->u.rt6);
+ xfrm_dst_destroy(xdst);
+ }
+
+--
+2.40.1
+
--- /dev/null
+From e7f0083dd5326ec3a897b9d9c144fdaf4f630c4a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Mar 2023 21:55:29 +0100
+Subject: net: dst: Prevent false sharing vs. dst_entry:: __refcnt
+
+From: Wangyang Guo <wangyang.guo@intel.com>
+
+[ Upstream commit d288a162dd1c73507da582966f17dd226e34a0c0 ]
+
+dst_entry::__refcnt is highly contended in scenarios where many connections
+happen from and to the same IP. The reference count is an atomic_t, so the
+reference count operations have to take the cache-line exclusive.
+
+Aside of the unavoidable reference count contention there is another
+significant problem which is caused by that: False sharing.
+
+perf top identified two affected read accesses. dst_entry::lwtstate and
+rtable::rt_genid.
+
+dst_entry:__refcnt is located at offset 64 of dst_entry, which puts it into
+a seperate cacheline vs. the read mostly members located at the beginning
+of the struct.
+
+That prevents false sharing vs. the struct members in the first 64
+bytes of the structure, but there is also
+
+ dst_entry::lwtstate
+
+which is located after the reference count and in the same cache line. This
+member is read after a reference count has been acquired.
+
+struct rtable embeds a struct dst_entry at offset 0. struct dst_entry has a
+size of 112 bytes, which means that the struct members of rtable which
+follow the dst member share the same cache line as dst_entry::__refcnt.
+Especially
+
+ rtable::rt_genid
+
+is also read by the contexts which have a reference count acquired
+already.
+
+When dst_entry:__refcnt is incremented or decremented via an atomic
+operation these read accesses stall. This was found when analysing the
+memtier benchmark in 1:100 mode, which amplifies the problem extremly.
+
+Move the rt[6i]_uncached[_list] members out of struct rtable and struct
+rt6_info into struct dst_entry to provide padding and move the lwtstate
+member after that so it ends up in the same cache line.
+
+The resulting improvement depends on the micro-architecture and the number
+of CPUs. It ranges from +20% to +120% with a localhost memtier/memcached
+benchmark.
+
+[ tglx: Rearrange struct ]
+
+Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
+Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230323102800.042297517@linutronix.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/dst.h | 15 ++++++++++++++-
+ include/net/ip6_fib.h | 3 ---
+ include/net/ip6_route.h | 2 +-
+ include/net/route.h | 3 ---
+ net/ipv4/route.c | 20 ++++++++++----------
+ net/ipv4/xfrm4_policy.c | 4 ++--
+ net/ipv6/route.c | 26 +++++++++++++-------------
+ net/ipv6/xfrm6_policy.c | 4 ++--
+ 8 files changed, 42 insertions(+), 35 deletions(-)
+
+diff --git a/include/net/dst.h b/include/net/dst.h
+index d67fda89cd0fa..81f2279ea911a 100644
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -69,15 +69,28 @@ struct dst_entry {
+ #endif
+ int __use;
+ unsigned long lastuse;
+- struct lwtunnel_state *lwtstate;
+ struct rcu_head rcu_head;
+ short error;
+ short __pad;
+ __u32 tclassid;
+ #ifndef CONFIG_64BIT
++ struct lwtunnel_state *lwtstate;
+ atomic_t __refcnt; /* 32-bit offset 64 */
+ #endif
+ netdevice_tracker dev_tracker;
++
++ /*
++ * Used by rtable and rt6_info. Moves lwtstate into the next cache
++ * line on 64bit so that lwtstate does not cause false sharing with
++ * __refcnt under contention of __refcnt. This also puts the
++ * frequently accessed members of rtable and rt6_info out of the
++ * __refcnt cache line.
++ */
++ struct list_head rt_uncached;
++ struct uncached_list *rt_uncached_list;
++#ifdef CONFIG_64BIT
++ struct lwtunnel_state *lwtstate;
++#endif
+ };
+
+ struct dst_metrics {
+diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
+index fa4e6af382e2a..9ba6413fd2e3e 100644
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -217,9 +217,6 @@ struct rt6_info {
+ struct inet6_dev *rt6i_idev;
+ u32 rt6i_flags;
+
+- struct list_head rt6i_uncached;
+- struct uncached_list *rt6i_uncached_list;
+-
+ /* more non-fragment space at head required */
+ unsigned short rt6i_nfheader_len;
+ };
+diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
+index 035d61d50a989..6c6b673d92554 100644
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -104,7 +104,7 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
+ static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags)
+ {
+ if (!(flags & RT6_LOOKUP_F_DST_NOREF) ||
+- !list_empty(&rt->rt6i_uncached))
++ !list_empty(&rt->dst.rt_uncached))
+ ip6_rt_put(rt);
+ }
+
+diff --git a/include/net/route.h b/include/net/route.h
+index af8431b25f800..9ca0f72868b76 100644
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -78,9 +78,6 @@ struct rtable {
+ /* Miscellaneous cached information */
+ u32 rt_mtu_locked:1,
+ rt_pmtu:31;
+-
+- struct list_head rt_uncached;
+- struct uncached_list *rt_uncached_list;
+ };
+
+ static inline bool rt_is_input_route(const struct rtable *rt)
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 9cbaae4f5ee71..7ccf6503d67aa 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1510,20 +1510,20 @@ void rt_add_uncached_list(struct rtable *rt)
+ {
+ struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
+
+- rt->rt_uncached_list = ul;
++ rt->dst.rt_uncached_list = ul;
+
+ spin_lock_bh(&ul->lock);
+- list_add_tail(&rt->rt_uncached, &ul->head);
++ list_add_tail(&rt->dst.rt_uncached, &ul->head);
+ spin_unlock_bh(&ul->lock);
+ }
+
+ void rt_del_uncached_list(struct rtable *rt)
+ {
+- if (!list_empty(&rt->rt_uncached)) {
+- struct uncached_list *ul = rt->rt_uncached_list;
++ if (!list_empty(&rt->dst.rt_uncached)) {
++ struct uncached_list *ul = rt->dst.rt_uncached_list;
+
+ spin_lock_bh(&ul->lock);
+- list_del_init(&rt->rt_uncached);
++ list_del_init(&rt->dst.rt_uncached);
+ spin_unlock_bh(&ul->lock);
+ }
+ }
+@@ -1548,13 +1548,13 @@ void rt_flush_dev(struct net_device *dev)
+ continue;
+
+ spin_lock_bh(&ul->lock);
+- list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) {
++ list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
+ if (rt->dst.dev != dev)
+ continue;
+ rt->dst.dev = blackhole_netdev;
+ netdev_ref_replace(dev, blackhole_netdev,
+ &rt->dst.dev_tracker, GFP_ATOMIC);
+- list_move(&rt->rt_uncached, &ul->quarantine);
++ list_move(&rt->dst.rt_uncached, &ul->quarantine);
+ }
+ spin_unlock_bh(&ul->lock);
+ }
+@@ -1646,7 +1646,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
+ rt->rt_uses_gateway = 0;
+ rt->rt_gw_family = 0;
+ rt->rt_gw4 = 0;
+- INIT_LIST_HEAD(&rt->rt_uncached);
++ INIT_LIST_HEAD(&rt->dst.rt_uncached);
+
+ rt->dst.output = ip_output;
+ if (flags & RTCF_LOCAL)
+@@ -1677,7 +1677,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+ new_rt->rt_gw4 = rt->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ new_rt->rt_gw6 = rt->rt_gw6;
+- INIT_LIST_HEAD(&new_rt->rt_uncached);
++ INIT_LIST_HEAD(&new_rt->dst.rt_uncached);
+
+ new_rt->dst.input = rt->dst.input;
+ new_rt->dst.output = rt->dst.output;
+@@ -2862,7 +2862,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
+ else if (rt->rt_gw_family == AF_INET6)
+ rt->rt_gw6 = ort->rt_gw6;
+
+- INIT_LIST_HEAD(&rt->rt_uncached);
++ INIT_LIST_HEAD(&rt->dst.rt_uncached);
+ }
+
+ dst_release(dst_orig);
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 3d0dfa6cf9f96..47861c8b7340e 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -91,7 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ xdst->u.rt.rt_gw6 = rt->rt_gw6;
+ xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+ xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
+- INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
++ INIT_LIST_HEAD(&xdst->u.rt.dst.rt_uncached);
+ rt_add_uncached_list(&xdst->u.rt);
+
+ return 0;
+@@ -121,7 +121,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+ dst_destroy_metrics_generic(dst);
+- if (xdst->u.rt.rt_uncached_list)
++ if (xdst->u.rt.dst.rt_uncached_list)
+ rt_del_uncached_list(&xdst->u.rt);
+ xfrm_dst_destroy(xdst);
+ }
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 0bcdb675ba2c1..7205adee46c21 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -139,20 +139,20 @@ void rt6_uncached_list_add(struct rt6_info *rt)
+ {
+ struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
+
+- rt->rt6i_uncached_list = ul;
++ rt->dst.rt_uncached_list = ul;
+
+ spin_lock_bh(&ul->lock);
+- list_add_tail(&rt->rt6i_uncached, &ul->head);
++ list_add_tail(&rt->dst.rt_uncached, &ul->head);
+ spin_unlock_bh(&ul->lock);
+ }
+
+ void rt6_uncached_list_del(struct rt6_info *rt)
+ {
+- if (!list_empty(&rt->rt6i_uncached)) {
+- struct uncached_list *ul = rt->rt6i_uncached_list;
++ if (!list_empty(&rt->dst.rt_uncached)) {
++ struct uncached_list *ul = rt->dst.rt_uncached_list;
+
+ spin_lock_bh(&ul->lock);
+- list_del_init(&rt->rt6i_uncached);
++ list_del_init(&rt->dst.rt_uncached);
+ spin_unlock_bh(&ul->lock);
+ }
+ }
+@@ -169,7 +169,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
+ continue;
+
+ spin_lock_bh(&ul->lock);
+- list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) {
++ list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
+ struct inet6_dev *rt_idev = rt->rt6i_idev;
+ struct net_device *rt_dev = rt->dst.dev;
+ bool handled = false;
+@@ -188,7 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
+ handled = true;
+ }
+ if (handled)
+- list_move(&rt->rt6i_uncached,
++ list_move(&rt->dst.rt_uncached,
+ &ul->quarantine);
+ }
+ spin_unlock_bh(&ul->lock);
+@@ -334,7 +334,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
+ static void rt6_info_init(struct rt6_info *rt)
+ {
+ memset_after(rt, 0, dst);
+- INIT_LIST_HEAD(&rt->rt6i_uncached);
++ INIT_LIST_HEAD(&rt->dst.rt_uncached);
+ }
+
+ /* allocate dst with ip6_dst_ops */
+@@ -2641,7 +2641,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
+ dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+ rt6 = (struct rt6_info *)dst;
+ /* For dst cached in uncached_list, refcnt is already taken. */
+- if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
++ if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
+ dst = &net->ipv6.ip6_null_entry->dst;
+ dst_hold(dst);
+ }
+@@ -2751,7 +2751,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
+ from = rcu_dereference(rt->from);
+
+ if (from && (rt->rt6i_flags & RTF_PCPU ||
+- unlikely(!list_empty(&rt->rt6i_uncached))))
++ unlikely(!list_empty(&rt->dst.rt_uncached))))
+ dst_ret = rt6_dst_from_check(rt, from, cookie);
+ else
+ dst_ret = rt6_check(rt, from, cookie);
+@@ -6488,7 +6488,7 @@ static int __net_init ip6_route_net_init(struct net *net)
+ net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+ ip6_template_metrics, true);
+- INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
++ INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
+
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.fib6_has_custom_rules = false;
+@@ -6500,7 +6500,7 @@ static int __net_init ip6_route_net_init(struct net *net)
+ net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+ ip6_template_metrics, true);
+- INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
++ INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
+
+ net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
+ sizeof(*net->ipv6.ip6_blk_hole_entry),
+@@ -6510,7 +6510,7 @@ static int __net_init ip6_route_net_init(struct net *net)
+ net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+ ip6_template_metrics, true);
+- INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
++ INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
+ #ifdef CONFIG_IPV6_SUBTREES
+ net->ipv6.fib6_routes_require_src = 0;
+ #endif
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index ea435eba30534..2b493f8d00918 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -89,7 +89,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
+ xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
+ xdst->u.rt6.rt6i_src = rt->rt6i_src;
+- INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
++ INIT_LIST_HEAD(&xdst->u.rt6.dst.rt_uncached);
+ rt6_uncached_list_add(&xdst->u.rt6);
+
+ return 0;
+@@ -121,7 +121,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
+ dst_destroy_metrics_generic(dst);
+- if (xdst->u.rt6.rt6i_uncached_list)
++ if (xdst->u.rt6.dst.rt_uncached_list)
+ rt6_uncached_list_del(&xdst->u.rt6);
+ xfrm_dst_destroy(xdst);
+ }
+--
+2.40.1
+
--- /dev/null
+From 180ab46081f3404a77e4cef550c4f0b28701a1b3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Mar 2023 21:55:32 +0100
+Subject: net: dst: Switch to rcuref_t reference counting
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit bc9d3a9f2afca189a6ae40225b6985e3c775375e ]
+
+Under high contention dst_entry::__refcnt becomes a significant bottleneck.
+
+atomic_inc_not_zero() is implemented with a cmpxchg() loop, which goes into
+high retry rates on contention.
+
+Switch the reference count to rcuref_t which results in a significant
+performance gain. Rename the reference count member to __rcuref to reflect
+the change.
+
+The gain depends on the micro-architecture and the number of concurrent
+operations and has been measured in the range of +25% to +130% with a
+localhost memtier/memcached benchmark which amplifies the problem
+massively.
+
+Running the memtier/memcached benchmark over a real (1Gb) network
+connection the conversion on top of the false sharing fix for struct
+dst_entry::__refcnt results in a total gain in the 2%-5% range over the
+upstream baseline.
+
+Reported-by: Wangyang Guo <wangyang.guo@intel.com>
+Reported-by: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20230307125538.989175656@linutronix.de
+Link: https://lore.kernel.org/r/20230323102800.215027837@linutronix.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/dst.h | 19 ++++++++++---------
+ include/net/sock.h | 2 +-
+ net/bridge/br_nf_core.c | 2 +-
+ net/core/dst.c | 26 +++++---------------------
+ net/core/rtnetlink.c | 2 +-
+ net/ipv6/route.c | 6 +++---
+ net/netfilter/ipvs/ip_vs_xmit.c | 4 ++--
+ 7 files changed, 23 insertions(+), 38 deletions(-)
+
+diff --git a/include/net/dst.h b/include/net/dst.h
+index 81f2279ea911a..78884429deed8 100644
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -16,6 +16,7 @@
+ #include <linux/bug.h>
+ #include <linux/jiffies.h>
+ #include <linux/refcount.h>
++#include <linux/rcuref.h>
+ #include <net/neighbour.h>
+ #include <asm/processor.h>
+ #include <linux/indirect_call_wrapper.h>
+@@ -61,11 +62,11 @@ struct dst_entry {
+ unsigned short trailer_len; /* space to reserve at tail */
+
+ /*
+- * __refcnt wants to be on a different cache line from
++ * __rcuref wants to be on a different cache line from
+ * input/output/ops or performance tanks badly
+ */
+ #ifdef CONFIG_64BIT
+- atomic_t __refcnt; /* 64-bit offset 64 */
++ rcuref_t __rcuref; /* 64-bit offset 64 */
+ #endif
+ int __use;
+ unsigned long lastuse;
+@@ -75,16 +76,16 @@ struct dst_entry {
+ __u32 tclassid;
+ #ifndef CONFIG_64BIT
+ struct lwtunnel_state *lwtstate;
+- atomic_t __refcnt; /* 32-bit offset 64 */
++ rcuref_t __rcuref; /* 32-bit offset 64 */
+ #endif
+ netdevice_tracker dev_tracker;
+
+ /*
+ * Used by rtable and rt6_info. Moves lwtstate into the next cache
+ * line on 64bit so that lwtstate does not cause false sharing with
+- * __refcnt under contention of __refcnt. This also puts the
++ * __rcuref under contention of __rcuref. This also puts the
+ * frequently accessed members of rtable and rt6_info out of the
+- * __refcnt cache line.
++ * __rcuref cache line.
+ */
+ struct list_head rt_uncached;
+ struct uncached_list *rt_uncached_list;
+@@ -238,10 +239,10 @@ static inline void dst_hold(struct dst_entry *dst)
+ {
+ /*
+ * If your kernel compilation stops here, please check
+- * the placement of __refcnt in struct dst_entry
++ * the placement of __rcuref in struct dst_entry
+ */
+- BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
+- WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
++ BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63);
++ WARN_ON(!rcuref_get(&dst->__rcuref));
+ }
+
+ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
+@@ -305,7 +306,7 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
+ */
+ static inline bool dst_hold_safe(struct dst_entry *dst)
+ {
+- return atomic_inc_not_zero(&dst->__refcnt);
++ return rcuref_get(&dst->__rcuref);
+ }
+
+ /**
+diff --git a/include/net/sock.h b/include/net/sock.h
+index fe695e8bfe289..4c988b981d6e1 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2181,7 +2181,7 @@ sk_dst_get(struct sock *sk)
+
+ rcu_read_lock();
+ dst = rcu_dereference(sk->sk_dst_cache);
+- if (dst && !atomic_inc_not_zero(&dst->__refcnt))
++ if (dst && !rcuref_get(&dst->__rcuref))
+ dst = NULL;
+ rcu_read_unlock();
+ return dst;
+diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
+index 8c69f0c95a8ed..98aea5485aaef 100644
+--- a/net/bridge/br_nf_core.c
++++ b/net/bridge/br_nf_core.c
+@@ -73,7 +73,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
+ {
+ struct rtable *rt = &br->fake_rtable;
+
+- atomic_set(&rt->dst.__refcnt, 1);
++ rcuref_init(&rt->dst.__rcuref, 1);
+ rt->dst.dev = br->dev;
+ dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+ rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
+diff --git a/net/core/dst.c b/net/core/dst.c
+index a4e738d321ba2..2b7b1619b5e29 100644
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
+ dst->tclassid = 0;
+ #endif
+ dst->lwtstate = NULL;
+- atomic_set(&dst->__refcnt, initial_ref);
++ rcuref_init(&dst->__rcuref, initial_ref);
+ dst->__use = 0;
+ dst->lastuse = jiffies;
+ dst->flags = flags;
+@@ -166,31 +166,15 @@ EXPORT_SYMBOL(dst_dev_put);
+
+ void dst_release(struct dst_entry *dst)
+ {
+- if (dst) {
+- int newrefcnt;
+-
+- newrefcnt = atomic_dec_return(&dst->__refcnt);
+- if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
+- net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
+- __func__, dst, newrefcnt);
+- if (!newrefcnt)
+- call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
+- }
++ if (dst && rcuref_put(&dst->__rcuref))
++ call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
+ }
+ EXPORT_SYMBOL(dst_release);
+
+ void dst_release_immediate(struct dst_entry *dst)
+ {
+- if (dst) {
+- int newrefcnt;
+-
+- newrefcnt = atomic_dec_return(&dst->__refcnt);
+- if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
+- net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
+- __func__, dst, newrefcnt);
+- if (!newrefcnt)
+- dst_destroy(dst);
+- }
++ if (dst && rcuref_put(&dst->__rcuref))
++ dst_destroy(dst);
+ }
+ EXPORT_SYMBOL(dst_release_immediate);
+
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 854b3fd66b1be..90810408cc5df 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -839,7 +839,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
+ if (dst) {
+ ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
+ ci.rta_used = dst->__use;
+- ci.rta_clntref = atomic_read(&dst->__refcnt);
++ ci.rta_clntref = rcuref_read(&dst->__rcuref);
+ }
+ if (expires) {
+ unsigned long clock;
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 7205adee46c21..9db0b2318e918 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -293,7 +293,7 @@ static const struct fib6_info fib6_null_entry_template = {
+
+ static const struct rt6_info ip6_null_entry_template = {
+ .dst = {
+- .__refcnt = ATOMIC_INIT(1),
++ .__rcuref = RCUREF_INIT(1),
+ .__use = 1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
+ .error = -ENETUNREACH,
+@@ -307,7 +307,7 @@ static const struct rt6_info ip6_null_entry_template = {
+
+ static const struct rt6_info ip6_prohibit_entry_template = {
+ .dst = {
+- .__refcnt = ATOMIC_INIT(1),
++ .__rcuref = RCUREF_INIT(1),
+ .__use = 1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
+ .error = -EACCES,
+@@ -319,7 +319,7 @@ static const struct rt6_info ip6_prohibit_entry_template = {
+
+ static const struct rt6_info ip6_blk_hole_entry_template = {
+ .dst = {
+- .__refcnt = ATOMIC_INIT(1),
++ .__rcuref = RCUREF_INIT(1),
+ .__use = 1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
+ .error = -EINVAL,
+diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
+index 7243079ef3546..70ef036909fb0 100644
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -339,7 +339,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ spin_unlock_bh(&dest->dst_lock);
+ IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
+ &dest->addr.ip, &dest_dst->dst_saddr.ip,
+- atomic_read(&rt->dst.__refcnt));
++ rcuref_read(&rt->dst.__rcuref));
+ }
+ if (ret_saddr)
+ *ret_saddr = dest_dst->dst_saddr.ip;
+@@ -507,7 +507,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ spin_unlock_bh(&dest->dst_lock);
+ IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
+ &dest->addr.in6, &dest_dst->dst_saddr.in6,
+- atomic_read(&rt->dst.__refcnt));
++ rcuref_read(&rt->dst.__rcuref));
+ }
+ if (ret_saddr)
+ *ret_saddr = dest_dst->dst_saddr.in6;
+--
+2.40.1
+
--- /dev/null
+From 0cc97277dc5b56296843165ddf9b8c10dd28b988 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 27 Aug 2023 13:31:53 +0300
+Subject: net/mlx5: E-switch, register event handler before arming the event
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 7624e58a8b3a251e3e5108b32f2183b34453db32 ]
+
+Currently, mlx5 is registering event handler for vport context change
+event some time after arming the event. this can lead to missing an
+event, which will result in wrong rules in the FDB.
+Hence, register the event handler before arming the event.
+
+This solution is valid since FW is sending vport context change event
+only on vports which SW armed, and SW arming the vport when enabling
+it, which is done after the FDB has been created.
+
+Fixes: 6933a9379559 ("net/mlx5: E-Switch, Use async events chain")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 4b9d567c8f473..48939c72b5925 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -969,11 +969,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+ return ERR_PTR(err);
+ }
+
+-static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw)
++static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw)
+ {
+- MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+- mlx5_eq_notifier_register(esw->dev, &esw->nb);
+-
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
+ MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler,
+ ESW_FUNCTIONS_CHANGED);
+@@ -981,13 +978,11 @@ static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw)
+ }
+ }
+
+-static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
++static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw)
+ {
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev))
+ mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
+
+- mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+-
+ flush_workqueue(esw->work_queue);
+ }
+
+@@ -1273,6 +1268,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
+
+ mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
+
++ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
++ mlx5_eq_notifier_register(esw->dev, &esw->nb);
++
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
+ err = esw_legacy_enable(esw);
+ } else {
+@@ -1285,7 +1283,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
+
+ esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED;
+
+- mlx5_eswitch_event_handlers_register(esw);
++ mlx5_eswitch_event_handler_register(esw);
+
+ esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+@@ -1394,7 +1392,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
+ */
+ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY);
+
+- mlx5_eswitch_event_handlers_unregister(esw);
++ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
++ mlx5_eswitch_event_handler_unregister(esw);
+
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+--
+2.40.1
+
--- /dev/null
+From 647422347b0b80222c1e258860fae97a04185ce8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 21:48:30 +0300
+Subject: net/mlx5: Handle fw tracer change ownership event based on MTRC
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit 92fd39634541eb0a11bf1bafbc8ba92d6ddb8dba ]
+
+Currently, whenever fw issues a change ownership event, the PF that owns
+the fw tracer drops its ownership directly and the other PFs try to pick
+up the ownership via what MTRC register suggests.
+
+In some cases, driver releases the ownership of the tracer and reacquires
+it later on. Whenever the driver releases ownership of the tracer, fw
+issues a change ownership event. This event can be delayed and come after
+driver has reacquired ownership of the tracer. Thus the late event will
+trigger the tracer owner PF to release the ownership again and lead to a
+scenario where no PF is owning the tracer.
+
+To prevent the scenario described above, when handling a change
+ownership event, do not drop ownership of the tracer directly, instead
+read the fw MTRC register to retrieve the up-to-date owner of the tracer
+and set it accordingly in driver level.
+
+Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+index c4e40834e3ff9..374c0011a127b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+@@ -821,7 +821,7 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
+ if (tracer->owner) {
+- tracer->owner = false;
++ mlx5_fw_tracer_ownership_acquire(tracer);
+ return;
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 34a4405cae46541c39782e3348d53af100de1ba8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 02:28:47 +0000
+Subject: net/mlx5e: Don't offload internal port if filter device is out device
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit 06b4eac9c4beda520b8a4dbbb8e33dba9d1c8fba ]
+
+In the cited commit, if the routing device is ovs internal port, the
+out device is set to uplink, and packets go out after encapsulation.
+
+If filter device is uplink, it can trigger the following syndrome:
+mlx5_core 0000:08:00.0: mlx5_cmd_out_err:803:(pid 3966): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xcdb051), err(-22)
+
+Fix this issue by not offloading internal port if filter device is out
+device. In this case, packets are not forwarded to the root table to
+be processed, the termination table is used instead to forward them
+from uplink to uplink.
+
+Fixes: 100ad4e2d758 ("net/mlx5e: Offload internal port as encap route device")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Ariel Levkovich <lariel@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+index cd15d36b1507e..907ad6ffe7275 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -23,7 +23,8 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
+
+ route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
+
+- if (!route_dev || !netif_is_ovs_master(route_dev))
++ if (!route_dev || !netif_is_ovs_master(route_dev) ||
++ attr->parse_attr->filter_dev == e->out_dev)
+ goto out;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
+--
+2.40.1
+
--- /dev/null
+From 8f55e12b5b35ebb31ca91026b62ab165c32080ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Oct 2023 18:38:16 -0700
+Subject: net: move altnames together with the netdevice
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 8e15aee621618a3ee3abecaf1fd8c1428098b7ef ]
+
+The altname nodes are currently not moved to the new netns
+when netdevice itself moves:
+
+ [ ~]# ip netns add test
+ [ ~]# ip -netns test link add name eth0 type dummy
+ [ ~]# ip -netns test link property add dev eth0 altname some-name
+ [ ~]# ip -netns test link show dev some-name
+ 2: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+ link/ether 1e:67:ed:19:3d:24 brd ff:ff:ff:ff:ff:ff
+ altname some-name
+ [ ~]# ip -netns test link set dev eth0 netns 1
+ [ ~]# ip link
+ ...
+ 3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+ link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
+ altname some-name
+ [ ~]# ip li show dev some-name
+ Device "some-name" does not exist.
+
+Remove them from the hash table when device is unlisted
+and add back when listed again.
+
+Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dev.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 14066030cb1dc..ed2484f5e54e4 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -381,6 +381,7 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
+ /* Device list insertion */
+ static void list_netdevice(struct net_device *dev)
+ {
++ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ ASSERT_RTNL();
+@@ -391,6 +392,10 @@ static void list_netdevice(struct net_device *dev)
+ hlist_add_head_rcu(&dev->index_hlist,
+ dev_index_hash(net, dev->ifindex));
+ write_unlock(&dev_base_lock);
++
++ netdev_for_each_altname(dev, name_node)
++ netdev_name_node_add(net, name_node);
++
+ /* We reserved the ifindex, this can't fail */
+ WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
+
+@@ -402,12 +407,16 @@ static void list_netdevice(struct net_device *dev)
+ */
+ static void unlist_netdevice(struct net_device *dev, bool lock)
+ {
++ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ ASSERT_RTNL();
+
+ xa_erase(&net->dev_by_index, dev->ifindex);
+
++ netdev_for_each_altname(dev, name_node)
++ netdev_name_node_del(name_node);
++
+ /* Unlink dev from the device chain */
+ if (lock)
+ write_lock(&dev_base_lock);
+@@ -10872,7 +10881,6 @@ void unregister_netdevice_many(struct list_head *head)
+ synchronize_net();
+
+ list_for_each_entry(dev, head, unreg_list) {
+- struct netdev_name_node *name_node;
+ struct sk_buff *skb = NULL;
+
+ /* Shutdown queueing discipline. */
+@@ -10898,9 +10906,6 @@ void unregister_netdevice_many(struct list_head *head)
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
+
+- netdev_for_each_altname(dev, name_node)
+- netdev_name_node_del(name_node);
+- synchronize_rcu();
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
+--
+2.40.1
+
--- /dev/null
+From 5d2e7dc7d7580d57cb1069921d0fb2359b96b909 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Oct 2023 15:48:51 +0800
+Subject: net/smc: fix smc clc failed issue when netdevice not in init_net
+
+From: Albert Huang <huangjie.albert@bytedance.com>
+
+[ Upstream commit c68681ae46eaaa1640b52fe366d21a93b2185df5 ]
+
+If the netdevice is within a container and communicates externally
+through network technologies such as VxLAN, we won't be able to find
+routing information in the init_net namespace. To address this issue,
+we need to add a struct net parameter to the smc_ib_find_route function.
+This allow us to locate the routing information within the corresponding
+net namespace, ensuring the correct completion of the SMC CLC interaction.
+
+Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment")
+Signed-off-by: Albert Huang <huangjie.albert@bytedance.com>
+Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
+Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
+Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/af_smc.c | 3 ++-
+ net/smc/smc_ib.c | 7 ++++---
+ net/smc/smc_ib.h | 2 +-
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
+index 9fe62b5b02974..4ea41d6e36969 100644
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -1187,6 +1187,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
+ struct smc_clc_first_contact_ext *fce =
+ (struct smc_clc_first_contact_ext *)
+ (((u8 *)clc_v2) + sizeof(*clc_v2));
++ struct net *net = sock_net(&smc->sk);
+
+ if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
+ return 0;
+@@ -1195,7 +1196,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
+ memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
+ ini->smcrv2.uses_gateway = false;
+ } else {
+- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
++ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
+ smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
+ ini->smcrv2.nexthop_mac,
+ &ini->smcrv2.uses_gateway))
+diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
+index 854772dd52fd1..ace8611735321 100644
+--- a/net/smc/smc_ib.c
++++ b/net/smc/smc_ib.c
+@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
+ return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
+ }
+
+-int smc_ib_find_route(__be32 saddr, __be32 daddr,
++int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
+ u8 nexthop_mac[], u8 *uses_gateway)
+ {
+ struct neighbour *neigh = NULL;
+@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
+
+ if (daddr == cpu_to_be32(INADDR_NONE))
+ goto out;
+- rt = ip_route_output_flow(&init_net, &fl4, NULL);
++ rt = ip_route_output_flow(net, &fl4, NULL);
+ if (IS_ERR(rt))
+ goto out;
+ if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
+@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
+ if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
+ struct in_device *in_dev = __in_dev_get_rcu(ndev);
++ struct net *net = dev_net(ndev);
+ const struct in_ifaddr *ifa;
+ bool subnet_match = false;
+
+@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
+ }
+ if (!subnet_match)
+ goto out;
+- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
++ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
+ smcrv2->daddr,
+ smcrv2->nexthop_mac,
+ &smcrv2->uses_gateway))
+diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
+index 034295676e881..ebcb05ede7f55 100644
+--- a/net/smc/smc_ib.h
++++ b/net/smc/smc_ib.h
+@@ -113,7 +113,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
+ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
+ unsigned short vlan_id, u8 gid[], u8 *sgid_index,
+ struct smc_init_info_smcrv2 *smcrv2);
+-int smc_ib_find_route(__be32 saddr, __be32 daddr,
++int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
+ u8 nexthop_mac[], u8 *uses_gateway);
+ bool smc_ib_is_valid_local_systemid(void);
+ int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
+--
+2.40.1
+
--- /dev/null
+From eb90504709ba5fd1ccd141d303e4e61e940ac3fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 11:55:29 -0700
+Subject: net: store netdevs in an xarray
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 759ab1edb56c88906830fd6b2e7b12514dd32758 ]
+
+Iterating over the netdev hash table for netlink dumps is hard.
+Dumps are done in "chunks" so we need to save the position
+after each chunk, so we know where to restart from. Because
+netdevs are stored in a hash table we remember which bucket
+we were in and how many devices we dumped.
+
+Since we don't hold any locks across the "chunks" - devices may
+come and go while we're dumping. If that happens we may miss
+a device (if device is deleted from the bucket we were in).
+We indicate to user space that this may have happened by setting
+NLM_F_DUMP_INTR. User space is supposed to dump again (I think)
+if it sees that. Somehow I doubt most user space gets this right..
+
+To illustrate let's look at an example:
+
+ System state:
+ start: # [A, B, C]
+ del: B # [A, C]
+
+with the hash table we may dump [A, B], missing C completely even
+tho it existed both before and after the "del B".
+
+Add an xarray and use it to allocate ifindexes. This way we
+can iterate ifindexes in order, without the worry that we'll
+skip one. We may still generate a dump of a state which "never
+existed", for example for a set of values and sequence of ops:
+
+ System state:
+ start: # [A, B]
+ add: C # [A, C, B]
+ del: B # [A, C]
+
+we may generate a dump of [A], if C got an index between A and B.
+System has never been in such state. But I'm 90% sure that's perfectly
+fine, important part is that we can't _miss_ devices which exist before
+and after. User space which wants to mirror kernel's state subscribes
+to notifications and does periodic dumps so it will know that C exists
+from the notification about its creation or from the next dump
+(next dump is _guaranteed_ to include C, if it doesn't get removed).
+
+To avoid any perf regressions keep the hash table for now. Most
+net namespaces have very few devices and microbenchmarking 1M lookups
+on Skylake I get the following results (not counting loopback
+to number of devs):
+
+ #devs | hash | xa | delta
+ 2 | 18.3 | 20.1 | + 9.8%
+ 16 | 18.3 | 20.1 | + 9.5%
+ 64 | 18.3 | 26.3 | +43.8%
+ 128 | 20.4 | 26.3 | +28.6%
+ 256 | 20.0 | 26.4 | +32.1%
+ 1024 | 26.6 | 26.7 | + 0.2%
+ 8192 |541.3 | 33.5 | -93.8%
+
+No surprises since the hash table has 256 entries.
+The microbenchmark scans indexes in order, if the pattern is more
+random xa starts to win at 512 devices already. But that's a lot
+of devices, in practice.
+
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230726185530.2247698-2-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 8e15aee62161 ("net: move altnames together with the netdevice")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/net_namespace.h | 4 +-
+ net/core/dev.c | 82 ++++++++++++++++++++++++-------------
+ 2 files changed, 57 insertions(+), 29 deletions(-)
+
+diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
+index 8c3587d5c308f..3f66f32b88edd 100644
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -42,6 +42,7 @@
+ #include <linux/idr.h>
+ #include <linux/skbuff.h>
+ #include <linux/notifier.h>
++#include <linux/xarray.h>
+
+ struct user_namespace;
+ struct proc_dir_entry;
+@@ -69,7 +70,7 @@ struct net {
+ atomic_t dev_unreg_count;
+
+ unsigned int dev_base_seq; /* protected by rtnl_mutex */
+- int ifindex;
++ u32 ifindex;
+
+ spinlock_t nsid_lock;
+ atomic_t fnhe_genid;
+@@ -108,6 +109,7 @@ struct net {
+
+ struct hlist_head *dev_name_head;
+ struct hlist_head *dev_index_head;
++ struct xarray dev_by_index;
+ struct raw_notifier_head netdev_chain;
+
+ /* Note that @hash_mix can be read millions times per second,
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 9bf10c9c4735a..14066030cb1dc 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -391,6 +391,8 @@ static void list_netdevice(struct net_device *dev)
+ hlist_add_head_rcu(&dev->index_hlist,
+ dev_index_hash(net, dev->ifindex));
+ write_unlock(&dev_base_lock);
++ /* We reserved the ifindex, this can't fail */
++ WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
+
+ dev_base_seq_inc(net);
+ }
+@@ -400,8 +402,12 @@ static void list_netdevice(struct net_device *dev)
+ */
+ static void unlist_netdevice(struct net_device *dev, bool lock)
+ {
++ struct net *net = dev_net(dev);
++
+ ASSERT_RTNL();
+
++ xa_erase(&net->dev_by_index, dev->ifindex);
++
+ /* Unlink dev from the device chain */
+ if (lock)
+ write_lock(&dev_base_lock);
+@@ -9542,23 +9548,35 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ }
+
+ /**
+- * dev_new_index - allocate an ifindex
+- * @net: the applicable net namespace
++ * dev_index_reserve() - allocate an ifindex in a namespace
++ * @net: the applicable net namespace
++ * @ifindex: requested ifindex, pass %0 to get one allocated
++ *
++ * Allocate a ifindex for a new device. Caller must either use the ifindex
++ * to store the device (via list_netdevice()) or call dev_index_release()
++ * to give the index up.
+ *
+- * Returns a suitable unique value for a new device interface
+- * number. The caller must hold the rtnl semaphore or the
+- * dev_base_lock to be sure it remains unique.
++ * Return: a suitable unique value for a new device interface number or -errno.
+ */
+-static int dev_new_index(struct net *net)
++static int dev_index_reserve(struct net *net, u32 ifindex)
+ {
+- int ifindex = net->ifindex;
++ int err;
+
+- for (;;) {
+- if (++ifindex <= 0)
+- ifindex = 1;
+- if (!__dev_get_by_index(net, ifindex))
+- return net->ifindex = ifindex;
+- }
++ if (!ifindex)
++ err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
++ xa_limit_31b, &net->ifindex, GFP_KERNEL);
++ else
++ err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
++ if (err < 0)
++ return err;
++
++ return ifindex;
++}
++
++static void dev_index_release(struct net *net, int ifindex)
++{
++ /* Expect only unused indexes, unlist_netdevice() removes the used */
++ WARN_ON(xa_erase(&net->dev_by_index, ifindex));
+ }
+
+ /* Delayed registration/unregisteration */
+@@ -10028,11 +10046,10 @@ int register_netdevice(struct net_device *dev)
+ goto err_uninit;
+ }
+
+- ret = -EBUSY;
+- if (!dev->ifindex)
+- dev->ifindex = dev_new_index(net);
+- else if (__dev_get_by_index(net, dev->ifindex))
++ ret = dev_index_reserve(net, dev->ifindex);
++ if (ret < 0)
+ goto err_uninit;
++ dev->ifindex = ret;
+
+ /* Transfer changeable features to wanted_features and enable
+ * software offloads (GSO and GRO).
+@@ -10079,7 +10096,7 @@ int register_netdevice(struct net_device *dev)
+ ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
+ ret = notifier_to_errno(ret);
+ if (ret)
+- goto err_uninit;
++ goto err_ifindex_release;
+
+ ret = netdev_register_kobject(dev);
+ write_lock(&dev_base_lock);
+@@ -10135,6 +10152,8 @@ int register_netdevice(struct net_device *dev)
+
+ err_uninit_notify:
+ call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
++err_ifindex_release:
++ dev_index_release(net, dev->ifindex);
+ err_uninit:
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
+@@ -10994,9 +11013,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+ goto out;
+
+ /* Check that new_ifindex isn't used yet. */
+- err = -EBUSY;
+- if (new_ifindex && __dev_get_by_index(net, new_ifindex))
+- goto out;
++ if (new_ifindex) {
++ err = dev_index_reserve(net, new_ifindex);
++ if (err < 0)
++ goto out;
++ } else {
++ /* If there is an ifindex conflict assign a new one */
++ err = dev_index_reserve(net, dev->ifindex);
++ if (err == -EBUSY)
++ err = dev_index_reserve(net, 0);
++ if (err < 0)
++ goto out;
++ new_ifindex = err;
++ }
+
+ /*
+ * And now a mini version of register_netdevice unregister_netdevice.
+@@ -11024,13 +11053,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+ rcu_barrier();
+
+ new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
+- /* If there is an ifindex conflict assign a new one */
+- if (!new_ifindex) {
+- if (__dev_get_by_index(net, dev->ifindex))
+- new_ifindex = dev_new_index(net);
+- else
+- new_ifindex = dev->ifindex;
+- }
+
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
+ new_ifindex);
+@@ -11211,6 +11233,9 @@ static int __net_init netdev_init(struct net *net)
+ if (net->dev_index_head == NULL)
+ goto err_idx;
+
++ net->ifindex = 1;
++ xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC);
++
+ RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
+
+ return 0;
+@@ -11308,6 +11333,7 @@ static void __net_exit netdev_exit(struct net *net)
+ {
+ kfree(net->dev_name_head);
+ kfree(net->dev_index_head);
++ xa_destroy(&net->dev_by_index);
+ if (net != &init_net)
+ WARN_ON_ONCE(!list_empty(&net->dev_base_head));
+ }
+--
+2.40.1
+
--- /dev/null
+From 6ce3f0053906b8490a89c6cf6d77df853020de76 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 21:15:55 +0200
+Subject: net/tls: split tls_rx_reader_lock
+
+From: Hannes Reinecke <hare@suse.de>
+
+[ Upstream commit f9ae3204fb45d0749befc1cdff50f691c7461e5a ]
+
+Split tls_rx_reader_{lock,unlock} into an 'acquire/release' and
+the actual locking part.
+With that we can use the tls_rx_reader_lock in situations where
+the socket is already locked.
+
+Suggested-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Link: https://lore.kernel.org/r/20230726191556.41714-6-hare@suse.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 38 ++++++++++++++++++++++----------------
+ 1 file changed, 22 insertions(+), 16 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 9be00ebbb2341..c5c8fdadc05e8 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1851,13 +1851,10 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
+ return sk_flush_backlog(sk);
+ }
+
+-static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+- bool nonblock)
++static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
++ bool nonblock)
+ {
+ long timeo;
+- int err;
+-
+- lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+
+@@ -1871,26 +1868,30 @@ static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+ !READ_ONCE(ctx->reader_present), &wait);
+ remove_wait_queue(&ctx->wq, &wait);
+
+- if (timeo <= 0) {
+- err = -EAGAIN;
+- goto err_unlock;
+- }
+- if (signal_pending(current)) {
+- err = sock_intr_errno(timeo);
+- goto err_unlock;
+- }
++ if (timeo <= 0)
++ return -EAGAIN;
++ if (signal_pending(current))
++ return sock_intr_errno(timeo);
+ }
+
+ WRITE_ONCE(ctx->reader_present, 1);
+
+ return 0;
++}
+
+-err_unlock:
+- release_sock(sk);
++static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
++ bool nonblock)
++{
++ int err;
++
++ lock_sock(sk);
++ err = tls_rx_reader_acquire(sk, ctx, nonblock);
++ if (err)
++ release_sock(sk);
+ return err;
+ }
+
+-static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
++static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx)
+ {
+ if (unlikely(ctx->reader_contended)) {
+ if (wq_has_sleeper(&ctx->wq))
+@@ -1902,6 +1903,11 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+ }
+
+ WRITE_ONCE(ctx->reader_present, 0);
++}
++
++static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
++{
++ tls_rx_reader_release(sk, ctx);
+ release_sock(sk);
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 51290b74abe5ae7c0313a41f7e182e0d23a0ad56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 19:19:08 +0000
+Subject: net: Use call_rcu_hurry() for dst_release()
+
+From: Joel Fernandes (Google) <joel@joelfernandes.org>
+
+[ Upstream commit 483c26ff63f42e8898ed43aca0b9953bc91f0cd4 ]
+
+In a networking test on ChromeOS, kernels built with the new
+CONFIG_RCU_LAZY=y Kconfig option fail a networking test in the teardown
+phase.
+
+This failure may be reproduced as follows: ip netns del <name>
+
+The CONFIG_RCU_LAZY=y Kconfig option was introduced by earlier commits
+in this series for the benefit of certain battery-powered systems.
+This Kconfig option causes call_rcu() to delay its callbacks in order
+to batch them. This means that a given RCU grace period covers more
+callbacks, thus reducing the number of grace periods, in turn reducing
+the amount of energy consumed, which increases battery lifetime which
+can be a very good thing. This is not a subtle effect: In some important
+use cases, the battery lifetime is increased by more than 10%.
+
+This CONFIG_RCU_LAZY=y option is available only for CPUs that offload
+callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot
+parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y.
+
+Delaying callbacks is normally not a problem because most callbacks do
+nothing but free memory. If the system is short on memory, a shrinker
+will kick all currently queued lazy callbacks out of their laziness,
+thus freeing their memory in short order. Similarly, the rcu_barrier()
+function, which blocks until all currently queued callbacks are invoked,
+will also kick lazy callbacks, thus enabling rcu_barrier() to complete
+in a timely manner.
+
+However, there are some cases where laziness is not a good option.
+For example, synchronize_rcu() invokes call_rcu(), and blocks until
+the newly queued callback is invoked. It would not be a good for
+synchronize_rcu() to block for ten seconds, even on an idle system.
+Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of
+call_rcu(). The arrival of a non-lazy call_rcu_hurry() callback on a
+given CPU kicks any lazy callbacks that might be already queued on that
+CPU. After all, if there is going to be a grace period, all callbacks
+might as well get full benefit from it.
+
+Yes, this could be done the other way around by creating a
+call_rcu_lazy(), but earlier experience with this approach and
+feedback at the 2022 Linux Plumbers Conference shifted the approach
+to call_rcu() being lazy with call_rcu_hurry() for the few places
+where laziness is inappropriate.
+
+Returning to the test failure, use of ftrace showed that this failure
+cause caused by the aadded delays due to this new lazy behavior of
+call_rcu() in kernels built with CONFIG_RCU_LAZY=y.
+
+Therefore, make dst_release() use call_rcu_hurry() in order to revert
+to the old test-failure-free behavior.
+
+[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ]
+
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Cc: David Ahern <dsahern@kernel.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: <netdev@vger.kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dst.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/dst.c b/net/core/dst.c
+index bc9c9be4e0801..a4e738d321ba2 100644
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst)
+ net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
+ __func__, dst, newrefcnt);
+ if (!newrefcnt)
+- call_rcu(&dst->rcu_head, dst_destroy_rcu);
++ call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
+ }
+ }
+ EXPORT_SYMBOL(dst_release);
+--
+2.40.1
+
--- /dev/null
+From aceaae0532ca72b5bec91314f57c627992743869 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Aug 2023 22:18:34 +0800
+Subject: net: xfrm: skip policies marked as dead while reinserting policies
+
+From: Dong Chenchen <dongchenchen2@huawei.com>
+
+[ Upstream commit 6d41d4fe28724db16ca1016df0713a07e0cc7448 ]
+
+BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430
+Read of size 1 at addr ffff8881051f3bf8 by task ip/668
+
+CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x72/0xa0
+ print_report+0xd0/0x620
+ kasan_report+0xb6/0xf0
+ xfrm_policy_inexact_list_reinsert+0xb6/0x430
+ xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800
+ xfrm_policy_inexact_alloc_chain+0x23f/0x320
+ xfrm_policy_inexact_insert+0x6b/0x590
+ xfrm_policy_insert+0x3b1/0x480
+ xfrm_add_policy+0x23c/0x3c0
+ xfrm_user_rcv_msg+0x2d0/0x510
+ netlink_rcv_skb+0x10d/0x2d0
+ xfrm_netlink_rcv+0x49/0x60
+ netlink_unicast+0x3fe/0x540
+ netlink_sendmsg+0x528/0x970
+ sock_sendmsg+0x14a/0x160
+ ____sys_sendmsg+0x4fc/0x580
+ ___sys_sendmsg+0xef/0x160
+ __sys_sendmsg+0xf7/0x1b0
+ do_syscall_64+0x3f/0x90
+ entry_SYSCALL_64_after_hwframe+0x73/0xdd
+
+The root cause is:
+
+cpu 0 cpu1
+xfrm_dump_policy
+xfrm_policy_walk
+list_move_tail
+ xfrm_add_policy
+ ... ...
+ xfrm_policy_inexact_list_reinsert
+ list_for_each_entry_reverse
+ if (!policy->bydst_reinsert)
+ //read non-existent policy
+xfrm_dump_policy_done
+xfrm_policy_walk_done
+list_del(&walk->walk.all);
+
+If dump_one_policy() returns err (triggered by netlink socket),
+xfrm_policy_walk() will move walk initialized by socket to list
+net->xfrm.policy_all. so this socket becomes visible in the global
+policy list. The head *walk can be traversed when users add policies
+with different prefixlen and trigger xfrm_policy node merge.
+
+The issue can also be triggered by policy list traversal while rehashing
+and flushing policies.
+
+It can be fixed by skip such "policies" with walk.dead set to 1.
+
+Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address")
+Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list")
+Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_policy.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index e4d320e036fed..e47c670c7e2cd 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -850,7 +850,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
+ struct hlist_node *newpos = NULL;
+ bool matches_s, matches_d;
+
+- if (!policy->bydst_reinsert)
++ if (policy->walk.dead || !policy->bydst_reinsert)
+ continue;
+
+ WARN_ON_ONCE(policy->family != family);
+@@ -1255,8 +1255,11 @@ static void xfrm_hash_rebuild(struct work_struct *work)
+ struct xfrm_pol_inexact_bin *bin;
+ u8 dbits, sbits;
+
++ if (policy->walk.dead)
++ continue;
++
+ dir = xfrm_policy_id2dir(policy->index);
+- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
++ if (dir >= XFRM_POLICY_MAX)
+ continue;
+
+ if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+@@ -1788,9 +1791,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
+
+ again:
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
++ if (pol->walk.dead)
++ continue;
++
+ dir = xfrm_policy_id2dir(pol->index);
+- if (pol->walk.dead ||
+- dir >= XFRM_POLICY_MAX ||
++ if (dir >= XFRM_POLICY_MAX ||
+ pol->type != type)
+ continue;
+
+--
+2.40.1
+
--- /dev/null
+From ba39eabcdc1e1f894d45daaa82a41111a377f7dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Oct 2023 13:12:58 +0200
+Subject: netfilter: nf_tables: do not remove elements if set backend
+ implements .abort
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit ebd032fa881882fef2acb9da1bbde48d8233241d ]
+
+pipapo set backend maintains two copies of the datastructure, removing
+the elements from the copy that is going to be discarded slows down
+the abort path significantly, from several minutes to few seconds after
+this patch.
+
+Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 5e3dbe2652dbd..8ab545802dd15 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9931,7 +9931,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ break;
+ }
+ te = (struct nft_trans_elem *)trans->data;
+- nft_setelem_remove(net, te->set, &te->elem);
++ if (!te->set->ops->abort ||
++ nft_setelem_is_catchall(te->set, &te->elem))
++ nft_setelem_remove(net, te->set, &te->elem);
++
+ if (!nft_setelem_is_catchall(te->set, &te->elem))
+ atomic_dec(&te->set->nelems);
+
+--
+2.40.1
+
--- /dev/null
+From 8f06b0849f28567f375bb02ef66207b3656145bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Oct 2023 13:18:39 +0200
+Subject: netfilter: nf_tables: revert do not remove elements if set backend
+ implements .abort
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit f86fb94011aeb3b26337fc22204ca726aeb8bc24 ]
+
+nf_tables_abort_release() path calls nft_set_elem_destroy() for
+NFT_MSG_NEWSETELEM which releases the element, however, a reference to
+the element still remains in the working copy.
+
+Fixes: ebd032fa8818 ("netfilter: nf_tables: do not remove elements if set backend implements .abort")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 8ab545802dd15..5e3dbe2652dbd 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9931,10 +9931,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ break;
+ }
+ te = (struct nft_trans_elem *)trans->data;
+- if (!te->set->ops->abort ||
+- nft_setelem_is_catchall(te->set, &te->elem))
+- nft_setelem_remove(net, te->set, &te->elem);
+-
++ nft_setelem_remove(net, te->set, &te->elem);
+ if (!nft_setelem_is_catchall(te->set, &te->elem))
+ atomic_dec(&te->set->nelems);
+
+--
+2.40.1
+
--- /dev/null
+From c8b90985a6ff0526b292308662bf750bbf8c8e09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Sep 2023 09:33:12 -0400
+Subject: overlayfs: set ctime when setting mtime and atime
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 03dbab3bba5f009d053635c729d1244f2c8bad38 ]
+
+Nathan reported that he was seeing the new warning in
+setattr_copy_mgtime pop when starting podman containers. Overlayfs is
+trying to set the atime and mtime via notify_change without also
+setting the ctime.
+
+POSIX states that when the atime and mtime are updated via utimes() that
+we must also update the ctime to the current time. The situation with
+overlayfs copy-up is analogies, so add ATTR_CTIME to the bitmask.
+notify_change will fill in the value.
+
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Acked-by: Christian Brauner <brauner@kernel.org>
+Acked-by: Amir Goldstein <amir73il@gmail.com>
+Message-Id: <20230913-ctime-v1-1-c6bc509cbc27@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/copy_up.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
+index e6d711f42607b..86d4b6975dbcb 100644
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -300,7 +300,7 @@ static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry,
+ {
+ struct iattr attr = {
+ .ia_valid =
+- ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
++ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
+ .ia_atime = stat->atime,
+ .ia_mtime = stat->mtime,
+ };
+--
+2.40.1
+
--- /dev/null
+From cc371d392bce2bad950e7b6d8130a1df94253ed2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Oct 2023 19:59:00 -0400
+Subject: platform/x86: touchscreen_dmi: Add info for the Positivo C4128B
+
+From: Renan Guilherme Lebre Ramos <japareaggae@gmail.com>
+
+[ Upstream commit aa7dcba3bae6869122828b144a3cfd231718089d ]
+
+Add information for the Positivo C4128B, a notebook/tablet convertible.
+
+Link: https://github.com/onitake/gsl-firmware/pull/217
+Signed-off-by: Renan Guilherme Lebre Ramos <japareaggae@gmail.com>
+Link: https://lore.kernel.org/r/20231004235900.426240-1-japareaggae@gmail.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/touchscreen_dmi.c | 23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
+index 68e66b60445c3..9a92d515abb9b 100644
+--- a/drivers/platform/x86/touchscreen_dmi.c
++++ b/drivers/platform/x86/touchscreen_dmi.c
+@@ -740,6 +740,21 @@ static const struct ts_dmi_data pipo_w11_data = {
+ .properties = pipo_w11_props,
+ };
+
++static const struct property_entry positivo_c4128b_props[] = {
++ PROPERTY_ENTRY_U32("touchscreen-min-x", 4),
++ PROPERTY_ENTRY_U32("touchscreen-min-y", 13),
++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1915),
++ PROPERTY_ENTRY_U32("touchscreen-size-y", 1269),
++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-positivo-c4128b.fw"),
++ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
++ { }
++};
++
++static const struct ts_dmi_data positivo_c4128b_data = {
++ .acpi_name = "MSSL1680:00",
++ .properties = positivo_c4128b_props,
++};
++
+ static const struct property_entry pov_mobii_wintab_p800w_v20_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 32),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 16),
+@@ -1457,6 +1472,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
+ DMI_MATCH(DMI_BIOS_VERSION, "MOMO.G.WI71C.MABMRBA02"),
+ },
+ },
++ {
++ /* Positivo C4128B */
++ .driver_data = (void *)&positivo_c4128b_data,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "C4128B-1"),
++ },
++ },
+ {
+ /* Point of View mobii wintab p800w (v2.0) */
+ .driver_data = (void *)&pov_mobii_wintab_p800w_v20_data,
+--
+2.40.1
+
--- /dev/null
+From 9d9a717eeeef7a5ad593c1e5044cd5890f4e7e0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Aug 2023 09:37:43 -0400
+Subject: pwr-mlxbf: extend Kconfig to include gpio-mlxbf3 dependency
+
+From: David Thompson <davthompson@nvidia.com>
+
+[ Upstream commit 82f07f1acf417b81e793145c167dd5e156024de4 ]
+
+The BlueField power handling driver (pwr-mlxbf.c) provides
+functionality for both BlueField-2 and BlueField-3 based
+platforms. This driver also depends on the SoC-specific
+BlueField GPIO driver, whether gpio-mlxbf2 or gpio-mlxbf3.
+This patch extends the Kconfig definition to include the
+dependency on the gpio-mlxbf3 driver, if applicable.
+
+Signed-off-by: David Thompson <davthompson@nvidia.com>
+Reviewed-by: Asmaa Mnebhi <asmaa@nvidia.com>
+Link: https://lore.kernel.org/r/20230823133743.31275-1-davthompson@nvidia.com
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/power/reset/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
+index a8c46ba5878fe..54201f0374104 100644
+--- a/drivers/power/reset/Kconfig
++++ b/drivers/power/reset/Kconfig
+@@ -299,7 +299,7 @@ config NVMEM_REBOOT_MODE
+
+ config POWER_MLXBF
+ tristate "Mellanox BlueField power handling driver"
+- depends on (GPIO_MLXBF2 && ACPI)
++ depends on (GPIO_MLXBF2 || GPIO_MLXBF3) && ACPI
+ help
+ This driver supports reset or low power mode handling for Mellanox BlueField.
+
+--
+2.40.1
+
--- /dev/null
+From 3f132e8e674299042d9e5313dfbfcb3de55af912 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 17 Sep 2022 16:41:59 +0000
+Subject: rcu: Fix late wakeup when flush of bypass cblist happens
+
+From: Joel Fernandes (Google) <joel@joelfernandes.org>
+
+[ Upstream commit b50606f35f4b73c8e4c6b9c64fe7ba72ea919134 ]
+
+When the bypass cblist gets too big or its timeout has occurred, it is
+flushed into the main cblist. However, the bypass timer is still running
+and the behavior is that it would eventually expire and wake the GP
+thread.
+
+Since we are going to use the bypass cblist for lazy CBs, do the wakeup
+soon as the flush for "too big or too long" bypass list happens.
+Otherwise, long delays can happen for callbacks which get promoted from
+lazy to non-lazy.
+
+This is a good thing to do anyway (regardless of future lazy patches),
+since it makes the behavior consistent with behavior of other code paths
+where flushing into the ->cblist makes the GP kthread into a
+non-sleeping state quickly.
+
+[ Frederic Weisbecker: Changes to avoid unnecessary GP-thread wakeups plus
+ comment changes. ]
+
+Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tree_nocb.h | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
+index 0a5f0ef414845..04c87f250e01a 100644
+--- a/kernel/rcu/tree_nocb.h
++++ b/kernel/rcu/tree_nocb.h
+@@ -433,8 +433,9 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
+ ncbs >= qhimark) {
+ rcu_nocb_lock(rdp);
++ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
++
+ if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
+- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ if (*was_alldone)
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("FirstQ"));
+@@ -447,7 +448,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ rcu_advance_cbs_nowake(rdp->mynode, rdp);
+ rdp->nocb_gp_adv_time = j;
+ }
+- rcu_nocb_unlock_irqrestore(rdp, flags);
++
++ // The flush succeeded and we moved CBs into the regular list.
++ // Don't wait for the wake up timer as it may be too far ahead.
++ // Wake up the GP thread now instead, if the cblist was empty.
++ __call_rcu_nocb_wake(rdp, *was_alldone, flags);
++
+ return true; // Callback already enqueued.
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 6e201fbbe533ee08318f49c360c83145a1231ac2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 16 Oct 2022 16:22:53 +0000
+Subject: rcu: Fix missing nocb gp wake on rcu_barrier()
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+[ Upstream commit b8f7aca3f0e0e6223094ba2662bac90353674b04 ]
+
+In preparation for RCU lazy changes, wake up the RCU nocb gp thread if
+needed after an entrain. This change prevents the RCU barrier callback
+from waiting in the queue for several seconds before the lazy callbacks
+in front of it are serviced.
+
+Reported-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tree.c | 11 +++++++++++
+ kernel/rcu/tree.h | 1 +
+ kernel/rcu/tree_nocb.h | 5 +++++
+ 3 files changed, 17 insertions(+)
+
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index 917a1e43f7839..6ea59aa53db78 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -3908,6 +3908,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
+ {
+ unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
+ unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
++ bool wake_nocb = false;
++ bool was_alldone = false;
+
+ lockdep_assert_held(&rcu_state.barrier_lock);
+ if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
+@@ -3916,7 +3918,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
+ rdp->barrier_head.func = rcu_barrier_callback;
+ debug_rcu_head_queue(&rdp->barrier_head);
+ rcu_nocb_lock(rdp);
++ /*
++ * Flush bypass and wakeup rcuog if we add callbacks to an empty regular
++ * queue. This way we don't wait for bypass timer that can reach seconds
++ * if it's fully lazy.
++ */
++ was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
++ wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
+ if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
+ atomic_inc(&rcu_state.barrier_cpu_count);
+ } else {
+@@ -3924,6 +3933,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
+ rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
+ }
+ rcu_nocb_unlock(rdp);
++ if (wake_nocb)
++ wake_nocb_gp(rdp, false);
+ smp_store_release(&rdp->barrier_seq_snap, gseq);
+ }
+
+diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
+index d4a97e40ea9c3..925dd98f8b23b 100644
+--- a/kernel/rcu/tree.h
++++ b/kernel/rcu/tree.h
+@@ -439,6 +439,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
+ static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
+ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
+ static void rcu_init_one_nocb(struct rcu_node *rnp);
++static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
+ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ unsigned long j);
+ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
+index 04c87f250e01a..74d4983d68f82 100644
+--- a/kernel/rcu/tree_nocb.h
++++ b/kernel/rcu/tree_nocb.h
+@@ -1570,6 +1570,11 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
+ {
+ }
+
++static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
++{
++ return false;
++}
++
+ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ unsigned long j)
+ {
+--
+2.40.1
+
--- /dev/null
+From 7b253194c188b40a04df52ea0aeacae23989ef0d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 16 Oct 2022 16:22:54 +0000
+Subject: rcu: Make call_rcu() lazy to save power
+
+From: Joel Fernandes (Google) <joel@joelfernandes.org>
+
+[ Upstream commit 3cb278e73be58bfb780ecd55129296d2f74c1fb7 ]
+
+Implement timer-based RCU callback batching (also known as lazy
+callbacks). With this we save about 5-10% of power consumed due
+to RCU requests that happen when system is lightly loaded or idle.
+
+By default, all async callbacks (queued via call_rcu) are marked
+lazy. An alternate API call_rcu_hurry() is provided for the few users,
+for example synchronize_rcu(), that need the old behavior.
+
+The batch is flushed whenever a certain amount of time has passed, or
+the batch on a particular CPU grows too big. Also memory pressure will
+flush it in a future patch.
+
+To handle several corner cases automagically (such as rcu_barrier() and
+hotplug), we re-use bypass lists which were originally introduced to
+address lock contention, to handle lazy CBs as well. The bypass list
+length has the lazy CB length included in it. A separate lazy CB length
+counter is also introduced to keep track of the number of lazy CBs.
+
+[ paulmck: Fix formatting of inline call_rcu_lazy() definition. ]
+[ paulmck: Apply Zqiang feedback. ]
+[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ]
+
+Suggested-by: Paul McKenney <paulmck@kernel.org>
+Acked-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: cc9b364bb1d5 ("xfrm6: fix inet6_dev refcount underflow problem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/rcupdate.h | 9 +++
+ kernel/rcu/Kconfig | 8 ++
+ kernel/rcu/rcu.h | 8 ++
+ kernel/rcu/tiny.c | 2 +-
+ kernel/rcu/tree.c | 129 ++++++++++++++++++++-----------
+ kernel/rcu/tree.h | 11 ++-
+ kernel/rcu/tree_exp.h | 2 +-
+ kernel/rcu/tree_nocb.h | 159 +++++++++++++++++++++++++++++++--------
+ 8 files changed, 246 insertions(+), 82 deletions(-)
+
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index e9e61cd27ef63..46f05dc8b31aa 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void)
+
+ #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
++#ifdef CONFIG_RCU_LAZY
++void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
++#else
++static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
++{
++ call_rcu(head, func);
++}
++#endif
++
+ /* Internal to kernel */
+ void rcu_init(void);
+ extern int rcu_scheduler_active;
+diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
+index d471d22a5e21b..d78f6181c8aad 100644
+--- a/kernel/rcu/Kconfig
++++ b/kernel/rcu/Kconfig
+@@ -311,4 +311,12 @@ config TASKS_TRACE_RCU_READ_MB
+ Say N here if you hate read-side memory barriers.
+ Take the default if you are unsure.
+
++config RCU_LAZY
++ bool "RCU callback lazy invocation functionality"
++ depends on RCU_NOCB_CPU
++ default n
++ help
++ To save power, batch RCU callbacks and flush after delay, memory
++ pressure, or callback list growing too big.
++
+ endmenu # "RCU Subsystem"
+diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
+index 48d8f754b730e..6b86c5912beaf 100644
+--- a/kernel/rcu/rcu.h
++++ b/kernel/rcu/rcu.h
+@@ -474,6 +474,14 @@ enum rcutorture_type {
+ INVALID_RCU_FLAVOR
+ };
+
++#if defined(CONFIG_RCU_LAZY)
++unsigned long rcu_lazy_get_jiffies_till_flush(void);
++void rcu_lazy_set_jiffies_till_flush(unsigned long j);
++#else
++static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
++static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
++#endif
++
+ #if defined(CONFIG_TREE_RCU)
+ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+ unsigned long *gp_seq);
+diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
+index a33a8d4942c37..72913ce21258b 100644
+--- a/kernel/rcu/tiny.c
++++ b/kernel/rcu/tiny.c
+@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
+
+ void rcu_barrier(void)
+ {
+- wait_rcu_gp(call_rcu);
++ wait_rcu_gp(call_rcu_hurry);
+ }
+ EXPORT_SYMBOL(rcu_barrier);
+
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index 6ea59aa53db78..855c035ec9630 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -2731,47 +2731,8 @@ static void check_cb_ovld(struct rcu_data *rdp)
+ raw_spin_unlock_rcu_node(rnp);
+ }
+
+-/**
+- * call_rcu() - Queue an RCU callback for invocation after a grace period.
+- * @head: structure to be used for queueing the RCU updates.
+- * @func: actual callback function to be invoked after the grace period
+- *
+- * The callback function will be invoked some time after a full grace
+- * period elapses, in other words after all pre-existing RCU read-side
+- * critical sections have completed. However, the callback function
+- * might well execute concurrently with RCU read-side critical sections
+- * that started after call_rcu() was invoked.
+- *
+- * RCU read-side critical sections are delimited by rcu_read_lock()
+- * and rcu_read_unlock(), and may be nested. In addition, but only in
+- * v5.0 and later, regions of code across which interrupts, preemption,
+- * or softirqs have been disabled also serve as RCU read-side critical
+- * sections. This includes hardware interrupt handlers, softirq handlers,
+- * and NMI handlers.
+- *
+- * Note that all CPUs must agree that the grace period extended beyond
+- * all pre-existing RCU read-side critical section. On systems with more
+- * than one CPU, this means that when "func()" is invoked, each CPU is
+- * guaranteed to have executed a full memory barrier since the end of its
+- * last RCU read-side critical section whose beginning preceded the call
+- * to call_rcu(). It also means that each CPU executing an RCU read-side
+- * critical section that continues beyond the start of "func()" must have
+- * executed a memory barrier after the call_rcu() but before the beginning
+- * of that RCU read-side critical section. Note that these guarantees
+- * include CPUs that are offline, idle, or executing in user mode, as
+- * well as CPUs that are executing in the kernel.
+- *
+- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+- * resulting RCU callback function "func()", then both CPU A and CPU B are
+- * guaranteed to execute a full memory barrier during the time interval
+- * between the call to call_rcu() and the invocation of "func()" -- even
+- * if CPU A and CPU B are the same CPU (but again only if the system has
+- * more than one CPU).
+- *
+- * Implementation of these memory-ordering guarantees is described here:
+- * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+- */
+-void call_rcu(struct rcu_head *head, rcu_callback_t func)
++static void
++__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
+ {
+ static atomic_t doublefrees;
+ unsigned long flags;
+@@ -2812,7 +2773,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
+ }
+
+ check_cb_ovld(rdp);
+- if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
++ if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
+ return; // Enqueued onto ->nocb_bypass, so just leave.
+ // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
+ rcu_segcblist_enqueue(&rdp->cblist, head);
+@@ -2834,8 +2795,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
+ local_irq_restore(flags);
+ }
+ }
+-EXPORT_SYMBOL_GPL(call_rcu);
+
++#ifdef CONFIG_RCU_LAZY
++/**
++ * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
++ * flush all lazy callbacks (including the new one) to the main ->cblist while
++ * doing so.
++ *
++ * @head: structure to be used for queueing the RCU updates.
++ * @func: actual callback function to be invoked after the grace period
++ *
++ * The callback function will be invoked some time after a full grace
++ * period elapses, in other words after all pre-existing RCU read-side
++ * critical sections have completed.
++ *
++ * Use this API instead of call_rcu() if you don't want the callback to be
++ * invoked after very long periods of time, which can happen on systems without
++ * memory pressure and on systems which are lightly loaded or mostly idle.
++ * This function will cause callbacks to be invoked sooner than later at the
++ * expense of extra power. Other than that, this function is identical to, and
++ * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
++ * ordering and other functionality.
++ */
++void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
++{
++ return __call_rcu_common(head, func, false);
++}
++EXPORT_SYMBOL_GPL(call_rcu_hurry);
++#endif
++
++/**
++ * call_rcu() - Queue an RCU callback for invocation after a grace period.
++ * By default the callbacks are 'lazy' and are kept hidden from the main
++ * ->cblist to prevent starting of grace periods too soon.
++ * If you desire grace periods to start very soon, use call_rcu_hurry().
++ *
++ * @head: structure to be used for queueing the RCU updates.
++ * @func: actual callback function to be invoked after the grace period
++ *
++ * The callback function will be invoked some time after a full grace
++ * period elapses, in other words after all pre-existing RCU read-side
++ * critical sections have completed. However, the callback function
++ * might well execute concurrently with RCU read-side critical sections
++ * that started after call_rcu() was invoked.
++ *
++ * RCU read-side critical sections are delimited by rcu_read_lock()
++ * and rcu_read_unlock(), and may be nested. In addition, but only in
++ * v5.0 and later, regions of code across which interrupts, preemption,
++ * or softirqs have been disabled also serve as RCU read-side critical
++ * sections. This includes hardware interrupt handlers, softirq handlers,
++ * and NMI handlers.
++ *
++ * Note that all CPUs must agree that the grace period extended beyond
++ * all pre-existing RCU read-side critical section. On systems with more
++ * than one CPU, this means that when "func()" is invoked, each CPU is
++ * guaranteed to have executed a full memory barrier since the end of its
++ * last RCU read-side critical section whose beginning preceded the call
++ * to call_rcu(). It also means that each CPU executing an RCU read-side
++ * critical section that continues beyond the start of "func()" must have
++ * executed a memory barrier after the call_rcu() but before the beginning
++ * of that RCU read-side critical section. Note that these guarantees
++ * include CPUs that are offline, idle, or executing in user mode, as
++ * well as CPUs that are executing in the kernel.
++ *
++ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
++ * resulting RCU callback function "func()", then both CPU A and CPU B are
++ * guaranteed to execute a full memory barrier during the time interval
++ * between the call to call_rcu() and the invocation of "func()" -- even
++ * if CPU A and CPU B are the same CPU (but again only if the system has
++ * more than one CPU).
++ *
++ * Implementation of these memory-ordering guarantees is described here:
++ * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
++ */
++void call_rcu(struct rcu_head *head, rcu_callback_t func)
++{
++ return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
++}
++EXPORT_SYMBOL_GPL(call_rcu);
+
+ /* Maximum number of jiffies to wait before draining a batch. */
+ #define KFREE_DRAIN_JIFFIES (5 * HZ)
+@@ -3521,7 +3558,7 @@ void synchronize_rcu(void)
+ if (rcu_gp_is_expedited())
+ synchronize_rcu_expedited();
+ else
+- wait_rcu_gp(call_rcu);
++ wait_rcu_gp(call_rcu_hurry);
+ return;
+ }
+
+@@ -3924,7 +3961,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
+ * if it's fully lazy.
+ */
+ was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
+- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
+ wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
+ if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
+ atomic_inc(&rcu_state.barrier_cpu_count);
+@@ -4359,7 +4396,7 @@ void rcutree_migrate_callbacks(int cpu)
+ my_rdp = this_cpu_ptr(&rcu_data);
+ my_rnp = my_rdp->mynode;
+ rcu_nocb_lock(my_rdp); /* irqs already disabled. */
+- WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
+ raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
+ /* Leverage recent GPs and set GP for new callbacks. */
+ needwake = rcu_advance_cbs(my_rnp, rdp) ||
+diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
+index 925dd98f8b23b..fcb5d696eb170 100644
+--- a/kernel/rcu/tree.h
++++ b/kernel/rcu/tree.h
+@@ -263,14 +263,16 @@ struct rcu_data {
+ unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
+ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
+
++ long lazy_len; /* Length of buffered lazy callbacks. */
+ int cpu;
+ };
+
+ /* Values for nocb_defer_wakeup field in struct rcu_data. */
+ #define RCU_NOCB_WAKE_NOT 0
+ #define RCU_NOCB_WAKE_BYPASS 1
+-#define RCU_NOCB_WAKE 2
+-#define RCU_NOCB_WAKE_FORCE 3
++#define RCU_NOCB_WAKE_LAZY 2
++#define RCU_NOCB_WAKE 3
++#define RCU_NOCB_WAKE_FORCE 4
+
+ #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
+ /* For jiffies_till_first_fqs and */
+@@ -441,9 +443,10 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
+ static void rcu_init_one_nocb(struct rcu_node *rnp);
+ static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
+ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+- unsigned long j);
++ unsigned long j, bool lazy);
+ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+- bool *was_alldone, unsigned long flags);
++ bool *was_alldone, unsigned long flags,
++ bool lazy);
+ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
+ unsigned long flags);
+ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
+diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
+index aa3ec3c3b9f75..b9637df7cda70 100644
+--- a/kernel/rcu/tree_exp.h
++++ b/kernel/rcu/tree_exp.h
+@@ -941,7 +941,7 @@ void synchronize_rcu_expedited(void)
+
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+- wait_rcu_gp(call_rcu);
++ wait_rcu_gp(call_rcu_hurry);
+ return;
+ }
+
+diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
+index 74d4983d68f82..c3ec5f389d27f 100644
+--- a/kernel/rcu/tree_nocb.h
++++ b/kernel/rcu/tree_nocb.h
+@@ -256,6 +256,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+ return __wake_nocb_gp(rdp_gp, rdp, force, flags);
+ }
+
++/*
++ * LAZY_FLUSH_JIFFIES decides the maximum amount of time that
++ * can elapse before lazy callbacks are flushed. Lazy callbacks
++ * could be flushed much earlier for a number of other reasons
++ * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are
++ * left unsubmitted to RCU after those many jiffies.
++ */
++#define LAZY_FLUSH_JIFFIES (10 * HZ)
++static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
++
++#ifdef CONFIG_RCU_LAZY
++// To be called only from test code.
++void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
++{
++ jiffies_till_flush = jif;
++}
++EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
++
++unsigned long rcu_lazy_get_jiffies_till_flush(void)
++{
++ return jiffies_till_flush;
++}
++EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
++#endif
++
+ /*
+ * Arrange to wake the GP kthread for this NOCB group at some future
+ * time when it is safe to do so.
+@@ -269,10 +294,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
+ raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+
+ /*
+- * Bypass wakeup overrides previous deferments. In case
+- * of callback storm, no need to wake up too early.
++ * Bypass wakeup overrides previous deferments. In case of
++ * callback storms, no need to wake up too early.
+ */
+- if (waketype == RCU_NOCB_WAKE_BYPASS) {
++ if (waketype == RCU_NOCB_WAKE_LAZY &&
++ rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
++ mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
++ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
++ } else if (waketype == RCU_NOCB_WAKE_BYPASS) {
+ mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
+ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
+ } else {
+@@ -293,10 +322,13 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
+ * proves to be initially empty, just return false because the no-CB GP
+ * kthread may need to be awakened in this case.
+ *
++ * Return true if there was something to be flushed and it succeeded, otherwise
++ * false.
++ *
+ * Note that this function always returns true if rhp is NULL.
+ */
+ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+- unsigned long j)
++ unsigned long j, bool lazy)
+ {
+ struct rcu_cblist rcl;
+
+@@ -310,7 +342,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
+ if (rhp)
+ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
+- rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
++
++ /*
++ * If the new CB requested was a lazy one, queue it onto the main
++ * ->cblist so we can take advantage of a sooner grade period.
++ */
++ if (lazy && rhp) {
++ rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, NULL);
++ rcu_cblist_enqueue(&rcl, rhp);
++ WRITE_ONCE(rdp->lazy_len, 0);
++ } else {
++ rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
++ WRITE_ONCE(rdp->lazy_len, 0);
++ }
++
+ rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
+ WRITE_ONCE(rdp->nocb_bypass_first, j);
+ rcu_nocb_bypass_unlock(rdp);
+@@ -326,13 +371,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ * Note that this function always returns true if rhp is NULL.
+ */
+ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+- unsigned long j)
++ unsigned long j, bool lazy)
+ {
+ if (!rcu_rdp_is_offloaded(rdp))
+ return true;
+ rcu_lockdep_assert_cblist_protected(rdp);
+ rcu_nocb_bypass_lock(rdp);
+- return rcu_nocb_do_flush_bypass(rdp, rhp, j);
++ return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy);
+ }
+
+ /*
+@@ -345,7 +390,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
+ if (!rcu_rdp_is_offloaded(rdp) ||
+ !rcu_nocb_bypass_trylock(rdp))
+ return;
+- WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
++ WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
+ }
+
+ /*
+@@ -367,12 +412,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
+ * there is only one CPU in operation.
+ */
+ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+- bool *was_alldone, unsigned long flags)
++ bool *was_alldone, unsigned long flags,
++ bool lazy)
+ {
+ unsigned long c;
+ unsigned long cur_gp_seq;
+ unsigned long j = jiffies;
+ long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
++ bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
+
+ lockdep_assert_irqs_disabled();
+
+@@ -417,25 +464,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ // If there hasn't yet been all that many ->cblist enqueues
+ // this jiffy, tell the caller to enqueue onto ->cblist. But flush
+ // ->nocb_bypass first.
+- if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
++ // Lazy CBs throttle this back and do immediate bypass queuing.
++ if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) {
+ rcu_nocb_lock(rdp);
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ if (*was_alldone)
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("FirstQ"));
+- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
++
++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false));
+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
+ return false; // Caller must enqueue the callback.
+ }
+
+ // If ->nocb_bypass has been used too long or is too full,
+ // flush ->nocb_bypass to ->cblist.
+- if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
++ if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
++ (ncbs && bypass_is_lazy &&
++ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
+ ncbs >= qhimark) {
+ rcu_nocb_lock(rdp);
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+
+- if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
++ if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) {
+ if (*was_alldone)
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("FirstQ"));
+@@ -463,13 +514,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
+ rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
++
++ if (lazy)
++ WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1);
++
+ if (!ncbs) {
+ WRITE_ONCE(rdp->nocb_bypass_first, j);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
+ }
+ rcu_nocb_bypass_unlock(rdp);
+ smp_mb(); /* Order enqueue before wake. */
+- if (ncbs) {
++ // A wake up of the grace period kthread or timer adjustment
++ // needs to be done only if:
++ // 1. Bypass list was fully empty before (this is the first
++ // bypass list entry), or:
++ // 2. Both of these conditions are met:
++ // a. The bypass list previously had only lazy CBs, and:
++ // b. The new CB is non-lazy.
++ if (ncbs && (!bypass_is_lazy || lazy)) {
+ local_irq_restore(flags);
+ } else {
+ // No-CBs GP kthread might be indefinitely asleep, if so, wake.
+@@ -497,8 +559,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
+ unsigned long flags)
+ __releases(rdp->nocb_lock)
+ {
++ long bypass_len;
+ unsigned long cur_gp_seq;
+ unsigned long j;
++ long lazy_len;
+ long len;
+ struct task_struct *t;
+
+@@ -512,9 +576,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
+ }
+ // Need to actually to a wakeup.
+ len = rcu_segcblist_n_cbs(&rdp->cblist);
++ bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass);
++ lazy_len = READ_ONCE(rdp->lazy_len);
+ if (was_alldone) {
+ rdp->qlen_last_fqs_check = len;
+- if (!irqs_disabled_flags(flags)) {
++ // Only lazy CBs in bypass list
++ if (lazy_len && bypass_len == lazy_len) {
++ rcu_nocb_unlock_irqrestore(rdp, flags);
++ wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
++ TPS("WakeLazy"));
++ } else if (!irqs_disabled_flags(flags)) {
+ /* ... if queue was empty ... */
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ wake_nocb_gp(rdp, false);
+@@ -605,12 +676,12 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu)
+ static void nocb_gp_wait(struct rcu_data *my_rdp)
+ {
+ bool bypass = false;
+- long bypass_ncbs;
+ int __maybe_unused cpu = my_rdp->cpu;
+ unsigned long cur_gp_seq;
+ unsigned long flags;
+ bool gotcbs = false;
+ unsigned long j = jiffies;
++ bool lazy = false;
+ bool needwait_gp = false; // This prevents actual uninitialized use.
+ bool needwake;
+ bool needwake_gp;
+@@ -640,24 +711,43 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
+ * won't be ignored for long.
+ */
+ list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) {
++ long bypass_ncbs;
++ bool flush_bypass = false;
++ long lazy_ncbs;
++
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
+ rcu_nocb_lock_irqsave(rdp, flags);
+ lockdep_assert_held(&rdp->nocb_lock);
+ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+- if (bypass_ncbs &&
++ lazy_ncbs = READ_ONCE(rdp->lazy_len);
++
++ if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
++ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
++ bypass_ncbs > 2 * qhimark)) {
++ flush_bypass = true;
++ } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
+ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
+ bypass_ncbs > 2 * qhimark)) {
+- // Bypass full or old, so flush it.
+- (void)rcu_nocb_try_flush_bypass(rdp, j);
+- bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
++ flush_bypass = true;
+ } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ continue; /* No callbacks here, try next. */
+ }
++
++ if (flush_bypass) {
++ // Bypass full or old, so flush it.
++ (void)rcu_nocb_try_flush_bypass(rdp, j);
++ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
++ lazy_ncbs = READ_ONCE(rdp->lazy_len);
++ }
++
+ if (bypass_ncbs) {
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+- TPS("Bypass"));
+- bypass = true;
++ bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass"));
++ if (bypass_ncbs == lazy_ncbs)
++ lazy = true;
++ else
++ bypass = true;
+ }
+ rnp = rdp->mynode;
+
+@@ -705,12 +795,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
+ my_rdp->nocb_gp_gp = needwait_gp;
+ my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
+
+- if (bypass && !rcu_nocb_poll) {
+- // At least one child with non-empty ->nocb_bypass, so set
+- // timer in order to avoid stranding its callbacks.
+- wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
+- TPS("WakeBypassIsDeferred"));
++ // At least one child with non-empty ->nocb_bypass, so set
++ // timer in order to avoid stranding its callbacks.
++ if (!rcu_nocb_poll) {
++ // If bypass list only has lazy CBs. Add a deferred lazy wake up.
++ if (lazy && !bypass) {
++ wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY,
++ TPS("WakeLazyIsDeferred"));
++ // Otherwise add a deferred bypass wake up.
++ } else if (bypass) {
++ wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
++ TPS("WakeBypassIsDeferred"));
++ }
+ }
++
+ if (rcu_nocb_poll) {
+ /* Polling, so trace if first poll in the series. */
+ if (gotcbs)
+@@ -1036,7 +1134,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
+ * return false, which means that future calls to rcu_nocb_try_bypass()
+ * will refuse to put anything into the bypass.
+ */
+- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
++ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
+ /*
+ * Start with invoking rcu_core() early. This way if the current thread
+ * happens to preempt an ongoing call to rcu_core() in the middle,
+@@ -1290,6 +1388,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
+ raw_spin_lock_init(&rdp->nocb_gp_lock);
+ timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
+ rcu_cblist_init(&rdp->nocb_bypass);
++ WRITE_ONCE(rdp->lazy_len, 0);
+ mutex_init(&rdp->nocb_gp_kthread_mutex);
+ }
+
+@@ -1576,13 +1675,13 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+ }
+
+ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+- unsigned long j)
++ unsigned long j, bool lazy)
+ {
+ return true;
+ }
+
+ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+- bool *was_alldone, unsigned long flags)
++ bool *was_alldone, unsigned long flags, bool lazy)
+ {
+ return false;
+ }
+--
+2.40.1
+
--- /dev/null
+From 89afe5be70707a1f88207b7c143645df178e25ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Sep 2023 00:50:27 +0200
+Subject: regulator/core: Revert "fix kobject release warning and memory leak
+ in regulator_register()"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+
+[ Upstream commit 6e800968f6a715c0661716d2ec5e1f56ed9f9c08 ]
+
+This reverts commit 5f4b204b6b8153923d5be8002c5f7082985d153f.
+
+Since rdev->dev now has a release() callback, the proper way of freeing
+the initialized device can be restored.
+
+Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+Link: https://lore.kernel.org/r/d7f469f3f7b1f0e1d52f9a7ede3f3c5703382090.1695077303.git.mirq-linux@rere.qmqm.pl
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/core.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
+index f6a95f72af18d..34d3d82819064 100644
+--- a/drivers/regulator/core.c
++++ b/drivers/regulator/core.c
+@@ -5725,15 +5725,11 @@ regulator_register(struct device *dev,
+ mutex_lock(®ulator_list_mutex);
+ regulator_ena_gpio_free(rdev);
+ mutex_unlock(®ulator_list_mutex);
+- put_device(&rdev->dev);
+- rdev = NULL;
+ clean:
+ if (dangling_of_gpiod)
+ gpiod_put(config->ena_gpiod);
+- if (rdev && rdev->dev.of_node)
+- of_node_put(rdev->dev.of_node);
+- kfree(rdev);
+ kfree(config);
++ put_device(&rdev->dev);
+ rinse:
+ if (dangling_cfg_gpiod)
+ gpiod_put(cfg->ena_gpiod);
+--
+2.40.1
+
--- /dev/null
+From 390d48cf469ba707ede4f7925e13d99b2cc6d5f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 02:19:44 +0800
+Subject: selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and
+ hugetlb_reparenting_test.sh that may cause error
+
+From: Juntong Deng <juntong.deng@outlook.com>
+
+[ Upstream commit bbe246f875d064ecfb872fe4f66152e743dfd22d ]
+
+According to the awk manual, the -e option does not need to be specified
+in front of 'program' (unless you need to mix program-file).
+
+The redundant -e option can cause error when users use awk tools other
+than gawk (for example, mawk does not support the -e option).
+
+Error Example:
+awk: not an option: -e
+
+Link: https://lkml.kernel.org/r/VI1P193MB075228810591AF2FDD7D42C599C3A@VI1P193MB0752.EURP193.PROD.OUTLOOK.COM
+Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 4 ++--
+ tools/testing/selftests/vm/hugetlb_reparenting_test.sh | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+index a5cb4b09a46c4..0899019a7fcb4 100644
+--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+@@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then
+ fi
+
+ if [[ $cgroup2 ]]; then
+- cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
++ cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
+ mount -t cgroup2 none $cgroup_path
+@@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+ else
+- cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
++ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
+ mount -t cgroup memory,hugetlb $cgroup_path
+diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+index bf2d2a684edfd..14d26075c8635 100644
+--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
++++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+@@ -20,7 +20,7 @@ fi
+
+
+ if [[ $cgroup2 ]]; then
+- CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
++ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
+ mount -t cgroup2 none $CGROUP_ROOT
+@@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+ else
+- CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
++ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
+ mount -t cgroup memory,hugetlb $CGROUP_ROOT
+--
+2.40.1
+
--- /dev/null
+From e526abc9ed61fec0728aeaad545e7d832d6054a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Oct 2023 15:49:36 -0400
+Subject: selftests: openvswitch: Add version check for pyroute2
+
+From: Aaron Conole <aconole@redhat.com>
+
+[ Upstream commit 92e37f20f20a23fec4626ae72eda50f127acb130 ]
+
+Paolo Abeni reports that on some systems the pyroute2 version isn't
+new enough to run the test suite. Ensure that we support a minimum
+version of 0.6 for all cases (which does include the existing ones).
+The 0.6.1 version was released in May of 2021, so should be
+propagated to most installations at this point.
+
+The alternative that Paolo proposed was to only skip when the
+add-flow is being run. This would be okay for most cases, except
+if a future test case is added that needs to do flow dump without
+an associated add (just guessing). In that case, it could also be
+broken and we would need additional skip logic anyway. Just draw
+a line in the sand now.
+
+Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite")
+Reported-by: Paolo Abeni <pabeni@redhat.com>
+Closes: https://lore.kernel.org/lkml/8470c431e0930d2ea204a9363a60937289b7fdbe.camel@redhat.com/
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 +-
+ tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 10 +++++++++-
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
+index 5e6686398a313..52054a09d575c 100755
+--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
++++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
+@@ -117,7 +117,7 @@ run_test() {
+ fi
+
+ if python3 ovs-dpctl.py -h 2>&1 | \
+- grep "Need to install the python" >/dev/null 2>&1; then
++ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
+ stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}"
+ return $ksft_skip
+ fi
+diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+index 5d467d1993cb1..e787a1f967b0d 100644
+--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
++++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+@@ -17,8 +17,10 @@ try:
+ from pyroute2.netlink import nla
+ from pyroute2.netlink.exceptions import NetlinkError
+ from pyroute2.netlink.generic import GenericNetlinkSocket
++ import pyroute2
++
+ except ModuleNotFoundError:
+- print("Need to install the python pyroute2 package.")
++ print("Need to install the python pyroute2 package >= 0.6.")
+ sys.exit(0)
+
+
+@@ -280,6 +282,12 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB()):
+
+
+ def main(argv):
++ # version check for pyroute2
++ prverscheck = pyroute2.__version__.split(".")
++ if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6:
++ print("Need to upgrade the python pyroute2 package to >= 0.6.")
++ sys.exit(0)
++
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-v",
+--
+2.40.1
+
--- /dev/null
+From a29748f3dd0bfac926e8b34dca4dc8332dd7d1ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 May 2023 11:20:12 +0300
+Subject: serial: 8250: omap: Fix imprecise external abort for omap_8250_pm()
+
+From: Tony Lindgren <tony@atomide.com>
+
+[ Upstream commit 398cecc24846e867b9f90a0bd22730e3df6b05be ]
+
+We must idle the uart only after serial8250_unregister_port(). Otherwise
+unbinding the uart via sysfs while doing cat on the port produces an
+imprecise external abort:
+
+mem_serial_in from omap_8250_pm+0x44/0xf4
+omap_8250_pm from uart_hangup+0xe0/0x194
+uart_hangup from __tty_hangup.part.0+0x37c/0x3a8
+__tty_hangup.part.0 from uart_remove_one_port+0x9c/0x22c
+uart_remove_one_port from serial8250_unregister_port+0x60/0xe8
+serial8250_unregister_port from omap8250_remove+0x6c/0xd0
+omap8250_remove from platform_remove+0x28/0x54
+
+Turns out the driver needs to have runtime PM functional before the
+driver probe calls serial8250_register_8250_port(). And it needs
+runtime PM after driver remove calls serial8250_unregister_port().
+
+On probe, we need to read registers before registering the port in
+omap_serial_fill_features_erratas(). We do that with custom uart_read()
+already.
+
+On remove, after serial8250_unregister_port(), we need to write to the
+uart registers to idle the device. Let's add a custom uart_write() for
+that.
+
+Currently the uart register access depends on port->membase to be
+initialized, which won't work after serial8250_unregister_port().
+Let's use priv->membase instead, and use it for runtime PM related
+functions to remove the dependency to port->membase for early and
+late register access.
+
+Note that during use, we need to check for a valid port in the runtime PM
+related functions. This is needed for the optional wakeup configuration.
+We now need to set the drvdata a bit earlier so it's available for the
+runtime PM functions.
+
+With the port checks in runtime PM functions, the old checks for priv in
+omap8250_runtime_suspend() and omap8250_runtime_resume() functions are no
+longer needed and are removed.
+
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Link: https://lore.kernel.org/r/20230508082014.23083-3-tony@atomide.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 560706eff7c8 ("serial: 8250_omap: Fix errors with no_console_suspend")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/8250/8250_omap.c | 70 ++++++++++++++++-------------
+ 1 file changed, 38 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
+index adc85e250822c..0aed614110090 100644
+--- a/drivers/tty/serial/8250/8250_omap.c
++++ b/drivers/tty/serial/8250/8250_omap.c
+@@ -32,6 +32,7 @@
+ #include "8250.h"
+
+ #define DEFAULT_CLK_SPEED 48000000
++#define OMAP_UART_REGSHIFT 2
+
+ #define UART_ERRATA_i202_MDR1_ACCESS (1 << 0)
+ #define OMAP_UART_WER_HAS_TX_WAKEUP (1 << 1)
+@@ -109,6 +110,7 @@
+ #define UART_OMAP_RX_LVL 0x19
+
+ struct omap8250_priv {
++ void __iomem *membase;
+ int line;
+ u8 habit;
+ u8 mdr1;
+@@ -152,9 +154,14 @@ static void omap_8250_rx_dma_flush(struct uart_8250_port *p);
+ static inline void omap_8250_rx_dma_flush(struct uart_8250_port *p) { }
+ #endif
+
+-static u32 uart_read(struct uart_8250_port *up, u32 reg)
++static u32 uart_read(struct omap8250_priv *priv, u32 reg)
+ {
+- return readl(up->port.membase + (reg << up->port.regshift));
++ return readl(priv->membase + (reg << OMAP_UART_REGSHIFT));
++}
++
++static void uart_write(struct omap8250_priv *priv, u32 reg, u32 val)
++{
++ writel(val, priv->membase + (reg << OMAP_UART_REGSHIFT));
+ }
+
+ /*
+@@ -538,7 +545,7 @@ static void omap_serial_fill_features_erratas(struct uart_8250_port *up,
+ u32 mvr, scheme;
+ u16 revision, major, minor;
+
+- mvr = uart_read(up, UART_OMAP_MVER);
++ mvr = uart_read(priv, UART_OMAP_MVER);
+
+ /* Check revision register scheme */
+ scheme = mvr >> OMAP_UART_MVR_SCHEME_SHIFT;
+@@ -1319,7 +1326,7 @@ static int omap8250_probe(struct platform_device *pdev)
+ UPF_HARD_FLOW;
+ up.port.private_data = priv;
+
+- up.port.regshift = 2;
++ up.port.regshift = OMAP_UART_REGSHIFT;
+ up.port.fifosize = 64;
+ up.tx_loadsz = 64;
+ up.capabilities = UART_CAP_FIFO;
+@@ -1381,6 +1388,8 @@ static int omap8250_probe(struct platform_device *pdev)
+ DEFAULT_CLK_SPEED);
+ }
+
++ priv->membase = membase;
++ priv->line = -ENODEV;
+ priv->latency = PM_QOS_CPU_LATENCY_DEFAULT_VALUE;
+ priv->calc_latency = PM_QOS_CPU_LATENCY_DEFAULT_VALUE;
+ cpu_latency_qos_add_request(&priv->pm_qos_request, priv->latency);
+@@ -1388,6 +1397,8 @@ static int omap8250_probe(struct platform_device *pdev)
+
+ spin_lock_init(&priv->rx_dma_lock);
+
++ platform_set_drvdata(pdev, priv);
++
+ device_init_wakeup(&pdev->dev, true);
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_use_autosuspend(&pdev->dev);
+@@ -1449,7 +1460,6 @@ static int omap8250_probe(struct platform_device *pdev)
+ goto err;
+ }
+ priv->line = ret;
+- platform_set_drvdata(pdev, priv);
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_autosuspend(&pdev->dev);
+ return 0;
+@@ -1471,11 +1481,12 @@ static int omap8250_remove(struct platform_device *pdev)
+ if (err)
+ return err;
+
++ serial8250_unregister_port(priv->line);
++ priv->line = -ENODEV;
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
+ flush_work(&priv->qos_work);
+ pm_runtime_disable(&pdev->dev);
+- serial8250_unregister_port(priv->line);
+ cpu_latency_qos_remove_request(&priv->pm_qos_request);
+ device_init_wakeup(&pdev->dev, false);
+ return 0;
+@@ -1561,7 +1572,6 @@ static int omap8250_lost_context(struct uart_8250_port *up)
+ static int omap8250_soft_reset(struct device *dev)
+ {
+ struct omap8250_priv *priv = dev_get_drvdata(dev);
+- struct uart_8250_port *up = serial8250_get_port(priv->line);
+ int timeout = 100;
+ int sysc;
+ int syss;
+@@ -1575,20 +1585,20 @@ static int omap8250_soft_reset(struct device *dev)
+ * needing omap8250_soft_reset() quirk. Do it in two writes as
+ * recommended in the comment for omap8250_update_scr().
+ */
+- serial_out(up, UART_OMAP_SCR, OMAP_UART_SCR_DMAMODE_1);
+- serial_out(up, UART_OMAP_SCR,
++ uart_write(priv, UART_OMAP_SCR, OMAP_UART_SCR_DMAMODE_1);
++ uart_write(priv, UART_OMAP_SCR,
+ OMAP_UART_SCR_DMAMODE_1 | OMAP_UART_SCR_DMAMODE_CTL);
+
+- sysc = serial_in(up, UART_OMAP_SYSC);
++ sysc = uart_read(priv, UART_OMAP_SYSC);
+
+ /* softreset the UART */
+ sysc |= OMAP_UART_SYSC_SOFTRESET;
+- serial_out(up, UART_OMAP_SYSC, sysc);
++ uart_write(priv, UART_OMAP_SYSC, sysc);
+
+ /* By experiments, 1us enough for reset complete on AM335x */
+ do {
+ udelay(1);
+- syss = serial_in(up, UART_OMAP_SYSS);
++ syss = uart_read(priv, UART_OMAP_SYSS);
+ } while (--timeout && !(syss & OMAP_UART_SYSS_RESETDONE));
+
+ if (!timeout) {
+@@ -1602,13 +1612,10 @@ static int omap8250_soft_reset(struct device *dev)
+ static int omap8250_runtime_suspend(struct device *dev)
+ {
+ struct omap8250_priv *priv = dev_get_drvdata(dev);
+- struct uart_8250_port *up;
+-
+- /* In case runtime-pm tries this before we are setup */
+- if (!priv)
+- return 0;
++ struct uart_8250_port *up = NULL;
+
+- up = serial8250_get_port(priv->line);
++ if (priv->line >= 0)
++ up = serial8250_get_port(priv->line);
+ /*
+ * When using 'no_console_suspend', the console UART must not be
+ * suspended. Since driver suspend is managed by runtime suspend,
+@@ -1616,7 +1623,7 @@ static int omap8250_runtime_suspend(struct device *dev)
+ * active during suspend.
+ */
+ if (priv->is_suspending && !console_suspend_enabled) {
+- if (uart_console(&up->port))
++ if (up && uart_console(&up->port))
+ return -EBUSY;
+ }
+
+@@ -1627,13 +1634,15 @@ static int omap8250_runtime_suspend(struct device *dev)
+ if (ret)
+ return ret;
+
+- /* Restore to UART mode after reset (for wakeup) */
+- omap8250_update_mdr1(up, priv);
+- /* Restore wakeup enable register */
+- serial_out(up, UART_OMAP_WER, priv->wer);
++ if (up) {
++ /* Restore to UART mode after reset (for wakeup) */
++ omap8250_update_mdr1(up, priv);
++ /* Restore wakeup enable register */
++ serial_out(up, UART_OMAP_WER, priv->wer);
++ }
+ }
+
+- if (up->dma && up->dma->rxchan)
++ if (up && up->dma && up->dma->rxchan)
+ omap_8250_rx_dma_flush(up);
+
+ priv->latency = PM_QOS_CPU_LATENCY_DEFAULT_VALUE;
+@@ -1645,18 +1654,15 @@ static int omap8250_runtime_suspend(struct device *dev)
+ static int omap8250_runtime_resume(struct device *dev)
+ {
+ struct omap8250_priv *priv = dev_get_drvdata(dev);
+- struct uart_8250_port *up;
+-
+- /* In case runtime-pm tries this before we are setup */
+- if (!priv)
+- return 0;
++ struct uart_8250_port *up = NULL;
+
+- up = serial8250_get_port(priv->line);
++ if (priv->line >= 0)
++ up = serial8250_get_port(priv->line);
+
+- if (omap8250_lost_context(up))
++ if (up && omap8250_lost_context(up))
+ omap8250_restore_regs(up);
+
+- if (up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2))
++ if (up && up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2))
+ omap_8250_rx_dma(up);
+
+ priv->latency = priv->calc_latency;
+--
+2.40.1
+
--- /dev/null
+From 63dc8b9a912e3dac0a76f42ca128843bd3b8931d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Sep 2023 09:13:17 +0300
+Subject: serial: 8250_omap: Fix errors with no_console_suspend
+
+From: Tony Lindgren <tony@atomide.com>
+
+[ Upstream commit 560706eff7c8e5621b0d63afe0866e0e1906e87e ]
+
+We now get errors on system suspend if no_console_suspend is set as
+reported by Thomas. The errors started with commit 20a41a62618d ("serial:
+8250_omap: Use force_suspend and resume for system suspend").
+
+Let's fix the issue by checking for console_suspend_enabled in the system
+suspend and resume path.
+
+Note that with this fix the checks for console_suspend_enabled in
+omap8250_runtime_suspend() become useless. We now keep runtime PM usage
+count for an attached kernel console starting with commit bedb404e91bb
+("serial: 8250_port: Don't use power management for kernel console").
+
+Fixes: 20a41a62618d ("serial: 8250_omap: Use force_suspend and resume for system suspend")
+Cc: stable <stable@kernel.org>
+Cc: Udit Kumar <u-kumar1@ti.com>
+Reported-by: Thomas Richard <thomas.richard@bootlin.com>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Tested-by: Thomas Richard <thomas.richard@bootlin.com>
+Reviewed-by: Dhruva Gole <d-gole@ti.com>
+Link: https://lore.kernel.org/r/20230926061319.15140-1-tony@atomide.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++---------------
+ 1 file changed, 10 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
+index 0aed614110090..05f8675925ed6 100644
+--- a/drivers/tty/serial/8250/8250_omap.c
++++ b/drivers/tty/serial/8250/8250_omap.c
+@@ -1516,7 +1516,7 @@ static int omap8250_suspend(struct device *dev)
+ {
+ struct omap8250_priv *priv = dev_get_drvdata(dev);
+ struct uart_8250_port *up = serial8250_get_port(priv->line);
+- int err;
++ int err = 0;
+
+ serial8250_suspend_port(priv->line);
+
+@@ -1526,7 +1526,8 @@ static int omap8250_suspend(struct device *dev)
+ if (!device_may_wakeup(dev))
+ priv->wer = 0;
+ serial_out(up, UART_OMAP_WER, priv->wer);
+- err = pm_runtime_force_suspend(dev);
++ if (uart_console(&up->port) && console_suspend_enabled)
++ err = pm_runtime_force_suspend(dev);
+ flush_work(&priv->qos_work);
+
+ return err;
+@@ -1535,11 +1536,15 @@ static int omap8250_suspend(struct device *dev)
+ static int omap8250_resume(struct device *dev)
+ {
+ struct omap8250_priv *priv = dev_get_drvdata(dev);
++ struct uart_8250_port *up = serial8250_get_port(priv->line);
+ int err;
+
+- err = pm_runtime_force_resume(dev);
+- if (err)
+- return err;
++ if (uart_console(&up->port) && console_suspend_enabled) {
++ err = pm_runtime_force_resume(dev);
++ if (err)
++ return err;
++ }
++
+ serial8250_resume_port(priv->line);
+ /* Paired with pm_runtime_resume_and_get() in omap8250_suspend() */
+ pm_runtime_mark_last_busy(dev);
+@@ -1616,16 +1621,6 @@ static int omap8250_runtime_suspend(struct device *dev)
+
+ if (priv->line >= 0)
+ up = serial8250_get_port(priv->line);
+- /*
+- * When using 'no_console_suspend', the console UART must not be
+- * suspended. Since driver suspend is managed by runtime suspend,
+- * preventing runtime suspend (by returning error) will keep device
+- * active during suspend.
+- */
+- if (priv->is_suspending && !console_suspend_enabled) {
+- if (up && uart_console(&up->port))
+- return -EBUSY;
+- }
+
+ if (priv->habit & UART_ERRATA_CLOCK_DISABLE) {
+ int ret;
+--
+2.40.1
+
--- /dev/null
+From 5c7f6fa9941aacb73ee2707226cfb043f6c0795f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Mar 2023 10:09:19 +0200
+Subject: serial: Move uart_change_speed() earlier
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+
+[ Upstream commit 8e90cf29aef77b59ed6a6f6466add2af79621f26 ]
+
+Move uart_change_speed() earlier to get rid of its forward declaration.
+
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Link: https://lore.kernel.org/r/20230309080923.11778-5-ilpo.jarvinen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8679328eb859 ("serial: Reduce spinlocked portion of uart_rs485_config()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/serial_core.c | 94 ++++++++++++++++----------------
+ 1 file changed, 46 insertions(+), 48 deletions(-)
+
+diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
+index 2cc5c68c8689f..07e694c4f4827 100644
+--- a/drivers/tty/serial/serial_core.c
++++ b/drivers/tty/serial/serial_core.c
+@@ -48,8 +48,6 @@ static struct lock_class_key port_lock_key;
+ */
+ #define RS485_MAX_RTS_DELAY 100 /* msecs */
+
+-static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
+- const struct ktermios *old_termios);
+ static void uart_wait_until_sent(struct tty_struct *tty, int timeout);
+ static void uart_change_pm(struct uart_state *state,
+ enum uart_pm_state pm_state);
+@@ -177,6 +175,52 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise)
+ uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
+ }
+
++/* Caller holds port mutex */
++static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
++ const struct ktermios *old_termios)
++{
++ struct uart_port *uport = uart_port_check(state);
++ struct ktermios *termios;
++ int hw_stopped;
++
++ /*
++ * If we have no tty, termios, or the port does not exist,
++ * then we can't set the parameters for this port.
++ */
++ if (!tty || uport->type == PORT_UNKNOWN)
++ return;
++
++ termios = &tty->termios;
++ uport->ops->set_termios(uport, termios, old_termios);
++
++ /*
++ * Set modem status enables based on termios cflag
++ */
++ spin_lock_irq(&uport->lock);
++ if (termios->c_cflag & CRTSCTS)
++ uport->status |= UPSTAT_CTS_ENABLE;
++ else
++ uport->status &= ~UPSTAT_CTS_ENABLE;
++
++ if (termios->c_cflag & CLOCAL)
++ uport->status &= ~UPSTAT_DCD_ENABLE;
++ else
++ uport->status |= UPSTAT_DCD_ENABLE;
++
++ /* reset sw-assisted CTS flow control based on (possibly) new mode */
++ hw_stopped = uport->hw_stopped;
++ uport->hw_stopped = uart_softcts_mode(uport) &&
++ !(uport->ops->get_mctrl(uport) & TIOCM_CTS);
++ if (uport->hw_stopped) {
++ if (!hw_stopped)
++ uport->ops->stop_tx(uport);
++ } else {
++ if (hw_stopped)
++ __uart_start(tty);
++ }
++ spin_unlock_irq(&uport->lock);
++}
++
+ /*
+ * Startup the port. This will be called once per open. All calls
+ * will be serialised by the per-port mutex.
+@@ -485,52 +529,6 @@ uart_get_divisor(struct uart_port *port, unsigned int baud)
+ }
+ EXPORT_SYMBOL(uart_get_divisor);
+
+-/* Caller holds port mutex */
+-static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
+- const struct ktermios *old_termios)
+-{
+- struct uart_port *uport = uart_port_check(state);
+- struct ktermios *termios;
+- int hw_stopped;
+-
+- /*
+- * If we have no tty, termios, or the port does not exist,
+- * then we can't set the parameters for this port.
+- */
+- if (!tty || uport->type == PORT_UNKNOWN)
+- return;
+-
+- termios = &tty->termios;
+- uport->ops->set_termios(uport, termios, old_termios);
+-
+- /*
+- * Set modem status enables based on termios cflag
+- */
+- spin_lock_irq(&uport->lock);
+- if (termios->c_cflag & CRTSCTS)
+- uport->status |= UPSTAT_CTS_ENABLE;
+- else
+- uport->status &= ~UPSTAT_CTS_ENABLE;
+-
+- if (termios->c_cflag & CLOCAL)
+- uport->status &= ~UPSTAT_DCD_ENABLE;
+- else
+- uport->status |= UPSTAT_DCD_ENABLE;
+-
+- /* reset sw-assisted CTS flow control based on (possibly) new mode */
+- hw_stopped = uport->hw_stopped;
+- uport->hw_stopped = uart_softcts_mode(uport) &&
+- !(uport->ops->get_mctrl(uport) & TIOCM_CTS);
+- if (uport->hw_stopped) {
+- if (!hw_stopped)
+- uport->ops->stop_tx(uport);
+- } else {
+- if (hw_stopped)
+- __uart_start(tty);
+- }
+- spin_unlock_irq(&uport->lock);
+-}
+-
+ static int uart_put_char(struct tty_struct *tty, unsigned char c)
+ {
+ struct uart_state *state = tty->driver_data;
+--
+2.40.1
+
--- /dev/null
+From cf3669789ceb2996e567248d0bfbd00deb543d11 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Sep 2023 16:52:33 +0200
+Subject: serial: Reduce spinlocked portion of uart_rs485_config()
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 8679328eb859d06a1984ab48d90ac35d11bbcaf1 ]
+
+Commit 44b27aec9d96 ("serial: core, 8250: set RS485 termination GPIO in
+serial core") enabled support for RS485 termination GPIOs behind i2c
+expanders by setting the GPIO outside of the critical section protected
+by the port spinlock. Access to the i2c expander may sleep, which
+caused a splat with the port spinlock held.
+
+Commit 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in
+driver-specific way") erroneously regressed that by spinlocking the
+GPIO manipulation again.
+
+Fix by moving uart_rs485_config() (the function manipulating the GPIO)
+outside of the spinlocked section and acquiring the spinlock inside of
+uart_rs485_config() for the invocation of ->rs485_config() only.
+
+This gets us one step closer to pushing the spinlock down into the
+->rs485_config() callbacks which actually need it. (Some callbacks
+do not want to be spinlocked because they perform sleepable register
+accesses, see e.g. sc16is7xx_config_rs485().)
+
+Stack trace for posterity:
+
+ Voluntary context switch within RCU read-side critical section!
+ WARNING: CPU: 0 PID: 56 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch
+ Call trace:
+ rcu_note_context_switch
+ __schedule
+ schedule
+ schedule_timeout
+ wait_for_completion_timeout
+ bcm2835_i2c_xfer
+ __i2c_transfer
+ i2c_transfer
+ i2c_transfer_buffer_flags
+ regmap_i2c_write
+ _regmap_raw_write_impl
+ _regmap_bus_raw_write
+ _regmap_write
+ _regmap_update_bits
+ regmap_update_bits_base
+ pca953x_gpio_set_value
+ gpiod_set_raw_value_commit
+ gpiod_set_value_nocheck
+ gpiod_set_value_cansleep
+ uart_rs485_config
+ uart_add_one_port
+ pl011_register_port
+ pl011_probe
+
+Fixes: 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in driver-specific way")
+Suggested-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org # v6.1+
+Link: https://lore.kernel.org/r/f3a35967c28b32f3c6432d0aa5936e6a9908282d.1695307688.git.lukas@wunner.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/serial_core.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
+index 25972767129a3..d4e57f9017db9 100644
+--- a/drivers/tty/serial/serial_core.c
++++ b/drivers/tty/serial/serial_core.c
+@@ -1387,12 +1387,18 @@ static void uart_set_rs485_termination(struct uart_port *port,
+ static int uart_rs485_config(struct uart_port *port)
+ {
+ struct serial_rs485 *rs485 = &port->rs485;
++ unsigned long flags;
+ int ret;
+
++ if (!(rs485->flags & SER_RS485_ENABLED))
++ return 0;
++
+ uart_sanitize_serial_rs485(port, rs485);
+ uart_set_rs485_termination(port, rs485);
+
++ spin_lock_irqsave(&port->lock, flags);
+ ret = port->rs485_config(port, NULL, rs485);
++ spin_unlock_irqrestore(&port->lock, flags);
+ if (ret)
+ memset(rs485, 0, sizeof(*rs485));
+
+@@ -2455,11 +2461,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
+ if (ret == 0) {
+ if (tty)
+ uart_change_line_settings(tty, state, NULL);
++ uart_rs485_config(uport);
+ spin_lock_irq(&uport->lock);
+ if (!(uport->rs485.flags & SER_RS485_ENABLED))
+ ops->set_mctrl(uport, uport->mctrl);
+- else
+- uart_rs485_config(uport);
+ ops->start_tx(uport);
+ spin_unlock_irq(&uport->lock);
+ tty_port_set_initialized(port, 1);
+@@ -2568,10 +2573,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
+ port->mctrl &= TIOCM_DTR;
+ if (!(port->rs485.flags & SER_RS485_ENABLED))
+ port->ops->set_mctrl(port, port->mctrl);
+- else
+- uart_rs485_config(port);
+ spin_unlock_irqrestore(&port->lock, flags);
+
++ uart_rs485_config(port);
++
+ /*
+ * If this driver supports console, and it hasn't been
+ * successfully registered yet, try to re-register it.
+--
+2.40.1
+
--- /dev/null
+From 8b904fe504291e39319b4a291c881ecf33111185 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Mar 2023 10:09:20 +0200
+Subject: serial: Rename uart_change_speed() to uart_change_line_settings()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+
+[ Upstream commit 826736a6c7c8c3185bfb10e03c10d03d53d6cf94 ]
+
+uart_change_speed() changes more than just speed so rename it to more
+generic uart_change_line_settings().
+
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Link: https://lore.kernel.org/r/20230309080923.11778-6-ilpo.jarvinen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8679328eb859 ("serial: Reduce spinlocked portion of uart_rs485_config()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/serial_core.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
+index 07e694c4f4827..25972767129a3 100644
+--- a/drivers/tty/serial/serial_core.c
++++ b/drivers/tty/serial/serial_core.c
+@@ -176,8 +176,8 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise)
+ }
+
+ /* Caller holds port mutex */
+-static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
+- const struct ktermios *old_termios)
++static void uart_change_line_settings(struct tty_struct *tty, struct uart_state *state,
++ const struct ktermios *old_termios)
+ {
+ struct uart_port *uport = uart_port_check(state);
+ struct ktermios *termios;
+@@ -276,7 +276,7 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
+ /*
+ * Initialise the hardware port settings.
+ */
+- uart_change_speed(tty, state, NULL);
++ uart_change_line_settings(tty, state, NULL);
+
+ /*
+ * Setup the RTS and DTR signals once the
+@@ -992,7 +992,7 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
+ current->comm,
+ tty_name(port->tty));
+ }
+- uart_change_speed(tty, state, NULL);
++ uart_change_line_settings(tty, state, NULL);
+ }
+ } else {
+ retval = uart_startup(tty, state, 1);
+@@ -1654,7 +1654,7 @@ static void uart_set_termios(struct tty_struct *tty,
+ goto out;
+ }
+
+- uart_change_speed(tty, state, old_termios);
++ uart_change_line_settings(tty, state, old_termios);
+ /* reload cflag from termios; port driver may have overridden flags */
+ cflag = tty->termios.c_cflag;
+
+@@ -2454,7 +2454,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
+ ret = ops->startup(uport);
+ if (ret == 0) {
+ if (tty)
+- uart_change_speed(tty, state, NULL);
++ uart_change_line_settings(tty, state, NULL);
+ spin_lock_irq(&uport->lock);
+ if (!(uport->rs485.flags & SER_RS485_ENABLED))
+ ops->set_mctrl(uport, uport->mctrl);
+--
+2.40.1
+
net-avoid-uaf-on-deleted-altname.patch
net-fix-ifname-in-netlink-ntf-during-netns-move.patch
net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch
+selftests-mm-fix-awk-usage-in-charge_reserved_hugetl.patch
+usb-misc-onboard_usb_hub-add-genesys-logic-gl850g-hu.patch
+usb-misc-onboard_usb_hub-add-genesys-logic-gl852g-hu.patch
+usb-misc-onboard_usb_hub-add-genesys-logic-gl3523-hu.patch
+usb-misc-onboard_hub-add-support-for-microchip-usb24.patch
+serial-move-uart_change_speed-earlier.patch
+serial-rename-uart_change_speed-to-uart_change_line_.patch
+serial-reduce-spinlocked-portion-of-uart_rs485_confi.patch
+serial-8250-omap-fix-imprecise-external-abort-for-om.patch
+serial-8250_omap-fix-errors-with-no_console_suspend.patch
+iio-core-introduce-iio_device_-claim-release-_buffer.patch
+iio-cros_ec-fix-an-use-after-free-in-cros_ec_sensors.patch
+iio-adc-ad7192-simplify-using-devm_regulator_get_ena.patch
+iio-adc-ad7192-correct-reference-voltage.patch
+pwr-mlxbf-extend-kconfig-to-include-gpio-mlxbf3-depe.patch
+arm-dts-ti-omap-fix-noisy-serial-with-overrun-thrott.patch
+fs-writeback-do-not-requeue-a-clean-inode-having-ski.patch
+btrfs-prevent-transaction-block-reserve-underflow-wh.patch
+btrfs-return-euclean-for-delayed-tree-ref-with-a-ref.patch
+btrfs-initialize-start_slot-in-btrfs_log_prealloc_ex.patch
+i2c-mux-avoid-potential-false-error-message-in-i2c_m.patch
+overlayfs-set-ctime-when-setting-mtime-and-atime.patch
+gpio-timberdale-fix-potential-deadlock-on-tgpio-lock.patch
+ata-libata-core-fix-compilation-warning-in-ata_dev_c.patch
+ata-libata-eh-fix-compilation-warning-in-ata_eh_link.patch
+tracing-relax-trace_event_eval_update-execution-with.patch
+wifi-mwifiex-sanity-check-tlv_len-and-tlv_bitmap_len.patch
+wifi-iwlwifi-ensure-ack-flag-is-properly-cleared.patch
+hid-logitech-hidpp-add-bluetooth-id-for-the-logitech.patch
+hid-holtek-fix-slab-out-of-bounds-write-in-holtek_kb.patch
+bluetooth-btusb-add-shutdown-function-for-qca6174.patch
+bluetooth-avoid-redundant-authentication.patch
+bluetooth-hci_core-fix-build-warnings.patch
+wifi-cfg80211-fix-6ghz-scan-configuration.patch
+wifi-mac80211-work-around-cisco-ap-9115-vht-mpdu-len.patch
+wifi-mac80211-allow-transmitting-eapol-frames-with-t.patch
+wifi-cfg80211-avoid-leaking-stack-data-into-trace.patch
+regulator-core-revert-fix-kobject-release-warning-an.patch
+sky2-make-sure-there-is-at-least-one-frag_addr-avail.patch
+ipv4-fib-send-notify-when-delete-source-address-rout.patch
+drm-panel-orientation-quirks-add-quirk-for-one-mix-2.patch
+btrfs-fix-some-wmaybe-uninitialized-warnings-in-ioct.patch
+btrfs-error-out-when-cowing-block-using-a-stale-tran.patch
+btrfs-error-when-cowing-block-from-a-root-that-is-be.patch
+btrfs-error-out-when-reallocating-block-for-defrag-u.patch
+drm-amd-pm-add-unique_id-for-gc-11.0.3.patch
+hid-multitouch-add-required-quirk-for-synaptics-0xcd.patch
+hid-nintendo-reinitialize-usb-pro-controller-after-r.patch
+platform-x86-touchscreen_dmi-add-info-for-the-positi.patch
+cpufreq-schedutil-update-next_freq-when-cpufreq_limi.patch
+fprobe-pass-entry_data-to-handlers.patch
+fprobe-add-nr_maxactive-to-specify-rethook_node-pool.patch
+fprobe-fix-to-ensure-the-number-of-active-retprobes-.patch
+net-xfrm-skip-policies-marked-as-dead-while-reinsert.patch
+rcu-fix-late-wakeup-when-flush-of-bypass-cblist-happ.patch
+rcu-fix-missing-nocb-gp-wake-on-rcu_barrier.patch
+rcu-make-call_rcu-lazy-to-save-power.patch
+net-use-call_rcu_hurry-for-dst_release.patch
+atomics-provide-atomic_add_negative-variants.patch
+atomics-provide-rcuref-scalable-reference-counting.patch
+net-dst-prevent-false-sharing-vs.-dst_entry-__refcnt.patch
+net-dst-switch-to-rcuref_t-reference-counting.patch
+net-dst-fix-missing-initialization-of-rt_uncached.patch
+xfrm6-fix-inet6_dev-refcount-underflow-problem.patch
+netfilter-nf_tables-do-not-remove-elements-if-set-ba.patch
+net-mlx5-e-switch-register-event-handler-before-armi.patch
+net-mlx5-handle-fw-tracer-change-ownership-event-bas.patch
+net-mlx5e-don-t-offload-internal-port-if-filter-devi.patch
+net-tls-split-tls_rx_reader_lock.patch
+tcp-allow-again-tcp_disconnect-when-threads-are-wait.patch
+ice-remove-redundant-pci_enable_pcie_error_reporting.patch
+bluetooth-hci_event-fix-using-memcmp-when-comparing-.patch
+selftests-openvswitch-add-version-check-for-pyroute2.patch
+netfilter-nf_tables-revert-do-not-remove-elements-if.patch
+tcp_bpf-properly-release-resources-on-error-paths.patch
+net-devlink-convert-devlink-port-type-specific-point.patch
+net-devlink-move-port_type_warn_schedule-call-to-__d.patch
+net-devlink-move-port_type_netdev_checks-call-to-__d.patch
+net-devlink-take-rtnl-in-port_fill-function-only-if-.patch
+net-devlink-track-netdev-with-devlink_port-assigned.patch
+net-store-netdevs-in-an-xarray.patch
+net-move-altnames-together-with-the-netdevice.patch
+net-smc-fix-smc-clc-failed-issue-when-netdevice-not-.patch
--- /dev/null
+From 4b967f19a7c6c09001d87b8374d003080d70e0c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 09:50:39 -0700
+Subject: sky2: Make sure there is at least one frag_addr available
+
+From: Kees Cook <keescook@chromium.org>
+
+[ Upstream commit 6a70e5cbedaf8ad10528ac9ac114f3ec20f422df ]
+
+In the pathological case of building sky2 with 16k PAGE_SIZE, the
+frag_addr[] array would never be used, so the original code was correct
+that size should be 0. But the compiler now gets upset with 0 size arrays
+in places where it hasn't eliminated the code that might access such an
+array (it can't figure out that in this case an rx skb with fragments
+would never be created). To keep the compiler happy, make sure there is
+at least 1 frag_addr in struct rx_ring_info:
+
+ In file included from include/linux/skbuff.h:28,
+ from include/net/net_namespace.h:43,
+ from include/linux/netdevice.h:38,
+ from drivers/net/ethernet/marvell/sky2.c:18:
+ drivers/net/ethernet/marvell/sky2.c: In function 'sky2_rx_unmap_skb':
+ include/linux/dma-mapping.h:416:36: warning: array subscript i is outside array bounds of 'dma_addr_t[0]' {aka 'long long unsigned int[]'} [-Warray-bounds=]
+ 416 | #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ drivers/net/ethernet/marvell/sky2.c:1257:17: note: in expansion of macro 'dma_unmap_page'
+ 1257 | dma_unmap_page(&pdev->dev, re->frag_addr[i],
+ | ^~~~~~~~~~~~~~
+ In file included from drivers/net/ethernet/marvell/sky2.c:41:
+ drivers/net/ethernet/marvell/sky2.h:2198:25: note: while referencing 'frag_addr'
+ 2198 | dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT];
+ | ^~~~~~~~~
+
+With CONFIG_PAGE_SIZE_16KB=y, PAGE_SHIFT == 14, so:
+
+ #define ETH_JUMBO_MTU 9000
+
+causes "ETH_JUMBO_MTU >> PAGE_SHIFT" to be 0. Use "?: 1" to solve this build warning.
+
+Cc: Mirko Lindner <mlindner@marvell.com>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: netdev@vger.kernel.org
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202309191958.UBw1cjXk-lkp@intel.com/
+Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/sky2.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/sky2.h b/drivers/net/ethernet/marvell/sky2.h
+index ddec1627f1a7b..8d0bacf4e49cc 100644
+--- a/drivers/net/ethernet/marvell/sky2.h
++++ b/drivers/net/ethernet/marvell/sky2.h
+@@ -2195,7 +2195,7 @@ struct rx_ring_info {
+ struct sk_buff *skb;
+ dma_addr_t data_addr;
+ DEFINE_DMA_UNMAP_LEN(data_size);
+- dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT];
++ dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT ?: 1];
+ };
+
+ enum flow_control {
+--
+2.40.1
+
--- /dev/null
+From e162a18ec7faf8e91b6fabd6c3fbfc8b70827d7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Oct 2023 09:20:55 +0200
+Subject: tcp: allow again tcp_disconnect() when threads are waiting
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 419ce133ab928ab5efd7b50b2ef36ddfd4eadbd2 ]
+
+As reported by Tom, .NET and applications build on top of it rely
+on connect(AF_UNSPEC) to async cancel pending I/O operations on TCP
+socket.
+
+The blamed commit below caused a regression, as such cancellation
+can now fail.
+
+As suggested by Eric, this change addresses the problem explicitly
+causing blocking I/O operation to terminate immediately (with an error)
+when a concurrent disconnect() is executed.
+
+Instead of tracking the number of threads blocked on a given socket,
+track the number of disconnect() issued on such socket. If such counter
+changes after a blocking operation releasing and re-acquiring the socket
+lock, error out the current operation.
+
+Fixes: 4faeee0cf8a5 ("tcp: deny tcp_disconnect() when threads are waiting")
+Reported-by: Tom Deseyn <tdeseyn@redhat.com>
+Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1886305
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/f3b95e47e3dbed840960548aebaa8d954372db41.1697008693.git.pabeni@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../chelsio/inline_crypto/chtls/chtls_io.c | 36 +++++++++++++++----
+ include/net/sock.h | 10 +++---
+ net/core/stream.c | 12 ++++---
+ net/ipv4/af_inet.c | 10 ++++--
+ net/ipv4/inet_connection_sock.c | 1 -
+ net/ipv4/tcp.c | 16 ++++-----
+ net/ipv4/tcp_bpf.c | 4 +++
+ net/mptcp/protocol.c | 7 ----
+ net/tls/tls_main.c | 10 ++++--
+ net/tls/tls_sw.c | 19 ++++++----
+ 10 files changed, 80 insertions(+), 45 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
+index a4256087ac828..5e45bef4fd34f 100644
+--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
++++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
+@@ -911,7 +911,7 @@ static int csk_wait_memory(struct chtls_dev *cdev,
+ struct sock *sk, long *timeo_p)
+ {
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+- int err = 0;
++ int ret, err = 0;
+ long current_timeo;
+ long vm_wait = 0;
+ bool noblock;
+@@ -942,10 +942,13 @@ static int csk_wait_memory(struct chtls_dev *cdev,
+
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+- sk_wait_event(sk, ¤t_timeo, sk->sk_err ||
+- (sk->sk_shutdown & SEND_SHUTDOWN) ||
+- (csk_mem_free(cdev, sk) && !vm_wait), &wait);
++ ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err ||
++ (sk->sk_shutdown & SEND_SHUTDOWN) ||
++ (csk_mem_free(cdev, sk) && !vm_wait),
++ &wait);
+ sk->sk_write_pending--;
++ if (ret < 0)
++ goto do_error;
+
+ if (vm_wait) {
+ vm_wait -= current_timeo;
+@@ -1438,6 +1441,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int copied = 0;
+ int target;
+ long timeo;
++ int ret;
+
+ buffers_freed = 0;
+
+@@ -1513,7 +1517,11 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ if (copied >= target)
+ break;
+ chtls_cleanup_rbuf(sk, copied);
+- sk_wait_data(sk, &timeo, NULL);
++ ret = sk_wait_data(sk, &timeo, NULL);
++ if (ret < 0) {
++ copied = copied ? : ret;
++ goto unlock;
++ }
+ continue;
+ found_ok_skb:
+ if (!skb->len) {
+@@ -1608,6 +1616,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+
+ if (buffers_freed)
+ chtls_cleanup_rbuf(sk, copied);
++
++unlock:
+ release_sock(sk);
+ return copied;
+ }
+@@ -1624,6 +1634,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg,
+ int copied = 0;
+ size_t avail; /* amount of available data in current skb */
+ long timeo;
++ int ret;
+
+ lock_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+@@ -1675,7 +1686,12 @@ static int peekmsg(struct sock *sk, struct msghdr *msg,
+ release_sock(sk);
+ lock_sock(sk);
+ } else {
+- sk_wait_data(sk, &timeo, NULL);
++ ret = sk_wait_data(sk, &timeo, NULL);
++ if (ret < 0) {
++ /* here 'copied' is 0 due to previous checks */
++ copied = ret;
++ break;
++ }
+ }
+
+ if (unlikely(peek_seq != tp->copied_seq)) {
+@@ -1746,6 +1762,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int copied = 0;
+ long timeo;
+ int target; /* Read at least this many bytes */
++ int ret;
+
+ buffers_freed = 0;
+
+@@ -1837,7 +1854,11 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ if (copied >= target)
+ break;
+ chtls_cleanup_rbuf(sk, copied);
+- sk_wait_data(sk, &timeo, NULL);
++ ret = sk_wait_data(sk, &timeo, NULL);
++ if (ret < 0) {
++ copied = copied ? : ret;
++ goto unlock;
++ }
+ continue;
+
+ found_ok_skb:
+@@ -1906,6 +1927,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ if (buffers_freed)
+ chtls_cleanup_rbuf(sk, copied);
+
++unlock:
+ release_sock(sk);
+ return copied;
+ }
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 4c988b981d6e1..579c89eb7c5ca 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -333,7 +333,7 @@ struct sk_filter;
+ * @sk_cgrp_data: cgroup data for this cgroup
+ * @sk_memcg: this socket's memory cgroup association
+ * @sk_write_pending: a write to stream socket waits to start
+- * @sk_wait_pending: number of threads blocked on this socket
++ * @sk_disconnects: number of disconnect operations performed on this sock
+ * @sk_state_change: callback to indicate change in the state of the sock
+ * @sk_data_ready: callback to indicate there is data to be processed
+ * @sk_write_space: callback to indicate there is bf sending space available
+@@ -426,7 +426,7 @@ struct sock {
+ unsigned int sk_napi_id;
+ #endif
+ int sk_rcvbuf;
+- int sk_wait_pending;
++ int sk_disconnects;
+
+ struct sk_filter __rcu *sk_filter;
+ union {
+@@ -1185,8 +1185,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
+ }
+
+ #define sk_wait_event(__sk, __timeo, __condition, __wait) \
+- ({ int __rc; \
+- __sk->sk_wait_pending++; \
++ ({ int __rc, __dis = __sk->sk_disconnects; \
+ release_sock(__sk); \
+ __rc = __condition; \
+ if (!__rc) { \
+@@ -1196,8 +1195,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
+ } \
+ sched_annotate_sleep(); \
+ lock_sock(__sk); \
+- __sk->sk_wait_pending--; \
+- __rc = __condition; \
++ __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \
+ __rc; \
+ })
+
+diff --git a/net/core/stream.c b/net/core/stream.c
+index 5b05b889d31af..051aa71a8ad0f 100644
+--- a/net/core/stream.c
++++ b/net/core/stream.c
+@@ -117,7 +117,7 @@ EXPORT_SYMBOL(sk_stream_wait_close);
+ */
+ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+ {
+- int err = 0;
++ int ret, err = 0;
+ long vm_wait = 0;
+ long current_timeo = *timeo_p;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+@@ -142,11 +142,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+- sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) ||
+- (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
+- (sk_stream_memory_free(sk) &&
+- !vm_wait), &wait);
++ ret = sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) ||
++ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
++ (sk_stream_memory_free(sk) && !vm_wait),
++ &wait);
+ sk->sk_write_pending--;
++ if (ret < 0)
++ goto do_error;
+
+ if (vm_wait) {
+ vm_wait -= current_timeo;
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 04853c83c85c4..5d379df90c826 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -589,7 +589,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk->sk_write_pending += writebias;
+- sk->sk_wait_pending++;
+
+ /* Basic assumption: if someone sets sk->sk_err, he _must_
+ * change state of the socket from TCP_SYN_*.
+@@ -605,7 +604,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+ sk->sk_write_pending -= writebias;
+- sk->sk_wait_pending--;
+ return timeo;
+ }
+
+@@ -634,6 +632,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ return -EINVAL;
+
+ if (uaddr->sa_family == AF_UNSPEC) {
++ sk->sk_disconnects++;
+ err = sk->sk_prot->disconnect(sk, flags);
+ sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+ goto out;
+@@ -688,6 +687,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
+ tcp_sk(sk)->fastopen_req &&
+ tcp_sk(sk)->fastopen_req->data ? 1 : 0;
++ int dis = sk->sk_disconnects;
+
+ /* Error code is set above */
+ if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
+@@ -696,6 +696,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ goto out;
++
++ if (dis != sk->sk_disconnects) {
++ err = -EPIPE;
++ goto out;
++ }
+ }
+
+ /* Connection was closed by RST, timeout, ICMP error
+@@ -717,6 +722,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ sock_error:
+ err = sock_error(sk) ? : -ECONNABORTED;
+ sock->state = SS_UNCONNECTED;
++ sk->sk_disconnects++;
+ if (sk->sk_prot->disconnect(sk, flags))
+ sock->state = SS_DISCONNECTING;
+ goto out;
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 62a3b103f258a..80ce0112e24b4 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1143,7 +1143,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
+ if (newsk) {
+ struct inet_connection_sock *newicsk = inet_csk(newsk);
+
+- newsk->sk_wait_pending = 0;
+ inet_sk_set_state(newsk, TCP_SYN_RECV);
+ newicsk->icsk_bind_hash = NULL;
+ newicsk->icsk_bind2_hash = NULL;
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 96fdde6e42b1b..288678f17ccaf 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -827,7 +827,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
+ */
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ break;
+- sk_wait_data(sk, &timeo, NULL);
++ ret = sk_wait_data(sk, &timeo, NULL);
++ if (ret < 0)
++ break;
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeo);
+ break;
+@@ -2549,7 +2551,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
+ __sk_flush_backlog(sk);
+ } else {
+ tcp_cleanup_rbuf(sk, copied);
+- sk_wait_data(sk, &timeo, last);
++ err = sk_wait_data(sk, &timeo, last);
++ if (err < 0) {
++ err = copied ? : err;
++ goto out;
++ }
+ }
+
+ if ((flags & MSG_PEEK) &&
+@@ -3073,12 +3079,6 @@ int tcp_disconnect(struct sock *sk, int flags)
+ int old_state = sk->sk_state;
+ u32 seq;
+
+- /* Deny disconnect if other threads are blocked in sk_wait_event()
+- * or inet_wait_for_connect().
+- */
+- if (sk->sk_wait_pending)
+- return -EBUSY;
+-
+ if (old_state != TCP_CLOSE)
+ tcp_set_state(sk, TCP_CLOSE);
+
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index f53380fd89bcf..cb4549db8bcfc 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -302,6 +302,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+ }
+
+ data = tcp_msg_wait_data(sk, psock, timeo);
++ if (data < 0)
++ return data;
+ if (data && !sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ copied = -EAGAIN;
+@@ -346,6 +348,8 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ data = tcp_msg_wait_data(sk, psock, timeo);
++ if (data < 0)
++ return data;
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 9d67f2e4d4a6e..e061091edb394 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -3101,12 +3101,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
+ {
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+- /* Deny disconnect if other threads are blocked in sk_wait_event()
+- * or inet_wait_for_connect().
+- */
+- if (sk->sk_wait_pending)
+- return -EBUSY;
+-
+ /* We are on the fastopen error path. We can't call straight into the
+ * subflows cleanup code due to lock nesting (we are already under
+ * msk->firstsocket lock).
+@@ -3174,7 +3168,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
+ #endif
+
+- nsk->sk_wait_pending = 0;
+ __mptcp_init_sock(nsk);
+
+ msk = mptcp_sk(nsk);
+diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
+index f2e7302a4d96b..338a443fa47b2 100644
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -96,8 +96,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+
+ int wait_on_pending_writer(struct sock *sk, long *timeo)
+ {
+- int rc = 0;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
++ int ret, rc = 0;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ while (1) {
+@@ -111,9 +111,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo)
+ break;
+ }
+
+- if (sk_wait_event(sk, timeo,
+- !READ_ONCE(sk->sk_write_pending), &wait))
++ ret = sk_wait_event(sk, timeo,
++ !READ_ONCE(sk->sk_write_pending), &wait);
++ if (ret) {
++ if (ret < 0)
++ rc = ret;
+ break;
++ }
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return rc;
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index c5c8fdadc05e8..2af72d349192e 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1296,6 +1296,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
++ int ret = 0;
+ long timeo;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+@@ -1307,6 +1308,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
+ if (sk->sk_err)
+ return sock_error(sk);
+
++ if (ret < 0)
++ return ret;
++
+ if (!skb_queue_empty(&sk->sk_receive_queue)) {
+ tls_strp_check_rcv(&ctx->strp);
+ if (tls_strp_msg_ready(ctx))
+@@ -1325,10 +1329,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
+ released = true;
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+- sk_wait_event(sk, &timeo,
+- tls_strp_msg_ready(ctx) ||
+- !sk_psock_queue_empty(psock),
+- &wait);
++ ret = sk_wait_event(sk, &timeo,
++ tls_strp_msg_ready(ctx) ||
++ !sk_psock_queue_empty(psock),
++ &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+
+@@ -1855,6 +1859,7 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
+ {
+ long timeo;
++ int ret;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+
+@@ -1864,14 +1869,16 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
+ ctx->reader_contended = 1;
+
+ add_wait_queue(&ctx->wq, &wait);
+- sk_wait_event(sk, &timeo,
+- !READ_ONCE(ctx->reader_present), &wait);
++ ret = sk_wait_event(sk, &timeo,
++ !READ_ONCE(ctx->reader_present), &wait);
+ remove_wait_queue(&ctx->wq, &wait);
+
+ if (timeo <= 0)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return sock_intr_errno(timeo);
++ if (ret < 0)
++ return ret;
+ }
+
+ WRITE_ONCE(ctx->reader_present, 1);
+--
+2.40.1
+
--- /dev/null
+From 73eefd1c290aa4f78f8b75581a19211994dfb9f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Oct 2023 17:49:51 +0200
+Subject: tcp_bpf: properly release resources on error paths
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 68b54aeff804acceb02f228ea2e28419272c1fb9 ]
+
+In the blamed commit below, I completely forgot to release the acquired
+resources before erroring out in the TCP BPF code, as reported by Dan.
+
+Address the issues by replacing the bogus return with a jump to the
+relevant cleanup code.
+
+Fixes: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting")
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/r/8f99194c698bcef12666f0a9a999c58f8b1cb52c.1697557782.git.pabeni@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_bpf.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index cb4549db8bcfc..f8037d142bb75 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -302,8 +302,10 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+ }
+
+ data = tcp_msg_wait_data(sk, psock, timeo);
+- if (data < 0)
+- return data;
++ if (data < 0) {
++ copied = data;
++ goto unlock;
++ }
+ if (data && !sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ copied = -EAGAIN;
+@@ -314,6 +316,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+ tcp_rcv_space_adjust(sk);
+ if (copied > 0)
+ __tcp_cleanup_rbuf(sk, copied);
++
++unlock:
+ release_sock(sk);
+ sk_psock_put(sk, psock);
+ return copied;
+@@ -348,8 +352,10 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ data = tcp_msg_wait_data(sk, psock, timeo);
+- if (data < 0)
+- return data;
++ if (data < 0) {
++ ret = data;
++ goto unlock;
++ }
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+@@ -360,6 +366,8 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ copied = -EAGAIN;
+ }
+ ret = copied;
++
++unlock:
+ release_sock(sk);
+ sk_psock_put(sk, psock);
+ return ret;
+--
+2.40.1
+
--- /dev/null
+From 446432f170db52eb4d8c51d9932e8516fb316382 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Sep 2023 21:16:37 +0200
+Subject: tracing: relax trace_event_eval_update() execution with
+ cond_resched()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Clément Léger <cleger@rivosinc.com>
+
+[ Upstream commit 23cce5f25491968b23fb9c399bbfb25f13870cd9 ]
+
+When kernel is compiled without preemption, the eval_map_work_func()
+(which calls trace_event_eval_update()) will not be preempted up to its
+complete execution. This can actually cause a problem since if another
+CPU call stop_machine(), the call will have to wait for the
+eval_map_work_func() function to finish executing in the workqueue
+before being able to be scheduled. This problem was observe on a SMP
+system at boot time, when the CPU calling the initcalls executed
+clocksource_done_booting() which in the end calls stop_machine(). We
+observed a 1 second delay because one CPU was executing
+eval_map_work_func() and was not preempted by the stop_machine() task.
+
+Adding a call to cond_resched() in trace_event_eval_update() allows
+other tasks to be executed and thus continue working asynchronously
+like before without blocking any pending task at boot time.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20230929191637.416931-1-cleger@rivosinc.com
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Clément Léger <cleger@rivosinc.com>
+Tested-by: Atish Patra <atishp@rivosinc.com>
+Reviewed-by: Atish Patra <atishp@rivosinc.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 9da418442a063..2e3dce5e2575e 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -2777,6 +2777,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
+ update_event_fields(call, map[i]);
+ }
+ }
++ cond_resched();
+ }
+ up_write(&trace_event_sem);
+ }
+--
+2.40.1
+
--- /dev/null
+From 7a70703054234c3243452ac9220b9d1de1e46eb5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Sep 2023 10:22:38 +0200
+Subject: usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub
+
+From: Javier Carrasco <javier.carrasco@wolfvision.net>
+
+[ Upstream commit e59e38158c61162f2e8beb4620df21a1585117df ]
+
+The USB2412 is a 2-Port USB 2.0 hub controller that provides a reset pin
+and a single 3v3 powre source, which makes it suitable to be controlled
+by the onboard_hub driver.
+
+This hub has the same reset timings as USB2514/2517 and the same
+onboard hub specific-data can be reused for USB2412.
+
+Signed-off-by: Javier Carrasco <javier.carrasco@wolfvision.net>
+Cc: stable <stable@kernel.org>
+Acked-by: Matthias Kaehlcke <mka@chromium.org>
+Link: https://lore.kernel.org/r/20230911-topic-2412_onboard_hub-v1-1-7704181ddfff@wolfvision.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/misc/onboard_usb_hub.c | 1 +
+ drivers/usb/misc/onboard_usb_hub.h | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c
+index 8d5c83c9ff877..8edd0375e0a8a 100644
+--- a/drivers/usb/misc/onboard_usb_hub.c
++++ b/drivers/usb/misc/onboard_usb_hub.c
+@@ -409,6 +409,7 @@ static const struct usb_device_id onboard_hub_id_table[] = {
+ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0620) }, /* Genesys Logic GL3523 USB 3.1 */
++ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */
+diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h
+index 61fee18f9dfc9..d023fb90b4118 100644
+--- a/drivers/usb/misc/onboard_usb_hub.h
++++ b/drivers/usb/misc/onboard_usb_hub.h
+@@ -31,6 +31,7 @@ static const struct onboard_hub_pdata genesys_gl852g_data = {
+ };
+
+ static const struct of_device_id onboard_hub_match[] = {
++ { .compatible = "usb424,2412", .data = µchip_usb424_data, },
+ { .compatible = "usb424,2514", .data = µchip_usb424_data, },
+ { .compatible = "usb424,2517", .data = µchip_usb424_data, },
+ { .compatible = "usb451,8140", .data = &ti_tusb8041_data, },
+--
+2.40.1
+
--- /dev/null
+From 96a8a99bf6bf1b6662aad7118baea93baf0a3dc8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Jun 2023 16:22:28 +0200
+Subject: usb: misc: onboard_usb_hub: add Genesys Logic GL3523 hub support
+
+From: Anand Moon <linux.amoon@gmail.com>
+
+[ Upstream commit d97b4b35adcecd4b747d3e1c262e10e4a093cefa ]
+
+Genesys Logic GL3523 is a 4-port USB 3.1 hub that has a reset pin to
+toggle and a 5.0V core supply exported though an integrated LDO is
+available for powering it.
+
+Add the support for this hub, for controlling the reset pin and the core
+power supply.
+
+Signed-off-by: Anand Moon <linux.amoon@gmail.com>
+[m.felsch@pengutronix.de: include review feedback & port to 6.4]
+Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
+Link: https://lore.kernel.org/r/20230623142228.4069084-2-m.felsch@pengutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: e59e38158c61 ("usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/misc/onboard_usb_hub.c | 1 +
+ drivers/usb/misc/onboard_usb_hub.h | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c
+index 7a1030ddf9956..8d5c83c9ff877 100644
+--- a/drivers/usb/misc/onboard_usb_hub.c
++++ b/drivers/usb/misc/onboard_usb_hub.c
+@@ -408,6 +408,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev)
+ static const struct usb_device_id onboard_hub_id_table[] = {
+ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */
++ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0620) }, /* Genesys Logic GL3523 USB 3.1 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */
+diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h
+index 0c2ab5755a7ea..61fee18f9dfc9 100644
+--- a/drivers/usb/misc/onboard_usb_hub.h
++++ b/drivers/usb/misc/onboard_usb_hub.h
+@@ -37,6 +37,7 @@ static const struct of_device_id onboard_hub_match[] = {
+ { .compatible = "usb451,8142", .data = &ti_tusb8041_data, },
+ { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, },
+ { .compatible = "usb5e3,610", .data = &genesys_gl852g_data, },
++ { .compatible = "usb5e3,620", .data = &genesys_gl852g_data, },
+ { .compatible = "usbbda,411", .data = &realtek_rts5411_data, },
+ { .compatible = "usbbda,5411", .data = &realtek_rts5411_data, },
+ { .compatible = "usbbda,414", .data = &realtek_rts5411_data, },
+--
+2.40.1
+
--- /dev/null
+From e1ab429a75e2435a94dc42287687169e782b0924 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 13:52:25 +0800
+Subject: usb: misc: onboard_usb_hub: add Genesys Logic GL850G hub support
+
+From: Icenowy Zheng <uwu@icenowy.me>
+
+[ Upstream commit 9bae996ffa28ac03b6d95382a2a082eb219e745a ]
+
+Genesys Logic GL850G is a 4-port USB 2.0 STT hub that has a reset pin to
+toggle and a 3.3V core supply exported (although an integrated LDO is
+available for powering it with 5V).
+
+Add the support for this hub, for controlling the reset pin and the core
+power supply.
+
+Signed-off-by: Icenowy Zheng <uwu@icenowy.me>
+Acked-by: Matthias Kaehlcke <mka@chromium.org>
+Link: https://lore.kernel.org/r/20221206055228.306074-4-uwu@icenowy.me
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: e59e38158c61 ("usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/misc/onboard_usb_hub.c | 2 ++
+ drivers/usb/misc/onboard_usb_hub.h | 5 +++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c
+index 832d3ba9368ff..87df27425ec5f 100644
+--- a/drivers/usb/misc/onboard_usb_hub.c
++++ b/drivers/usb/misc/onboard_usb_hub.c
+@@ -329,6 +329,7 @@ static struct platform_driver onboard_hub_driver = {
+
+ /************************** USB driver **************************/
+
++#define VENDOR_ID_GENESYS 0x05e3
+ #define VENDOR_ID_MICROCHIP 0x0424
+ #define VENDOR_ID_REALTEK 0x0bda
+ #define VENDOR_ID_TI 0x0451
+@@ -405,6 +406,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev)
+ }
+
+ static const struct usb_device_id onboard_hub_id_table[] = {
++ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */
+diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h
+index 2cde54b69eede..a97b0594773fa 100644
+--- a/drivers/usb/misc/onboard_usb_hub.h
++++ b/drivers/usb/misc/onboard_usb_hub.h
+@@ -22,11 +22,16 @@ static const struct onboard_hub_pdata ti_tusb8041_data = {
+ .reset_us = 3000,
+ };
+
++static const struct onboard_hub_pdata genesys_gl850g_data = {
++ .reset_us = 3,
++};
++
+ static const struct of_device_id onboard_hub_match[] = {
+ { .compatible = "usb424,2514", .data = µchip_usb424_data, },
+ { .compatible = "usb424,2517", .data = µchip_usb424_data, },
+ { .compatible = "usb451,8140", .data = &ti_tusb8041_data, },
+ { .compatible = "usb451,8142", .data = &ti_tusb8041_data, },
++ { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, },
+ { .compatible = "usbbda,411", .data = &realtek_rts5411_data, },
+ { .compatible = "usbbda,5411", .data = &realtek_rts5411_data, },
+ { .compatible = "usbbda,414", .data = &realtek_rts5411_data, },
+--
+2.40.1
+
--- /dev/null
+From e9a249382212ab43ab4a222d2403066b8c25b77b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jan 2023 04:44:12 +0000
+Subject: usb: misc: onboard_usb_hub: add Genesys Logic GL852G hub support
+
+From: Anand Moon <linux.amoon@gmail.com>
+
+[ Upstream commit db7cab26c3d1382ec85d8cadf642f57250edea58 ]
+
+Genesys Logic GL852G is a 4-port USB 2.0 STT hub that has a reset pin to
+toggle and a 5.0V core supply exported though an integrated LDO is
+available for powering it.
+
+Add the support for this hub, for controlling the reset pin and the core
+power supply.
+
+Signed-off-by: Anand Moon <linux.amoon@gmail.com>
+Acked-by: Matthias Kaehlcke <mka@chromium.org>
+Link: https://lore.kernel.org/r/20230118044418.875-5-linux.amoon@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: e59e38158c61 ("usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/misc/onboard_usb_hub.c | 1 +
+ drivers/usb/misc/onboard_usb_hub.h | 5 +++++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c
+index 87df27425ec5f..7a1030ddf9956 100644
+--- a/drivers/usb/misc/onboard_usb_hub.c
++++ b/drivers/usb/misc/onboard_usb_hub.c
+@@ -407,6 +407,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev)
+
+ static const struct usb_device_id onboard_hub_id_table[] = {
+ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */
++ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */
+ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */
+diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h
+index a97b0594773fa..0c2ab5755a7ea 100644
+--- a/drivers/usb/misc/onboard_usb_hub.h
++++ b/drivers/usb/misc/onboard_usb_hub.h
+@@ -26,12 +26,17 @@ static const struct onboard_hub_pdata genesys_gl850g_data = {
+ .reset_us = 3,
+ };
+
++static const struct onboard_hub_pdata genesys_gl852g_data = {
++ .reset_us = 50,
++};
++
+ static const struct of_device_id onboard_hub_match[] = {
+ { .compatible = "usb424,2514", .data = µchip_usb424_data, },
+ { .compatible = "usb424,2517", .data = µchip_usb424_data, },
+ { .compatible = "usb451,8140", .data = &ti_tusb8041_data, },
+ { .compatible = "usb451,8142", .data = &ti_tusb8041_data, },
+ { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, },
++ { .compatible = "usb5e3,610", .data = &genesys_gl852g_data, },
+ { .compatible = "usbbda,411", .data = &realtek_rts5411_data, },
+ { .compatible = "usbbda,5411", .data = &realtek_rts5411_data, },
+ { .compatible = "usbbda,414", .data = &realtek_rts5411_data, },
+--
+2.40.1
+
--- /dev/null
+From b56d1b5567b3b2edc0895e6c297a61c65e161723 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Sep 2023 17:18:56 +0200
+Subject: wifi: cfg80211: avoid leaking stack data into trace
+
+From: Benjamin Berg <benjamin.berg@intel.com>
+
+[ Upstream commit 334bf33eec5701a1e4e967bcb7cc8611a998334b ]
+
+If the structure is not initialized then boolean types might be copied
+into the tracing data without being initialised. This causes data from
+the stack to leak into the trace and also triggers a UBSAN failure which
+can easily be avoided here.
+
+Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
+Link: https://lore.kernel.org/r/20230925171855.a9271ef53b05.I8180bae663984c91a3e036b87f36a640ba409817@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/nl80211.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
+index 1d993a490ac4b..b19b5acfaf3a9 100644
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -8289,7 +8289,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+- struct mesh_config cfg;
++ struct mesh_config cfg = {};
+ u32 mask;
+ int err;
+
+--
+2.40.1
+
--- /dev/null
+From f8112351b0505cd35b1a0a1fed1ad25dbc234ca1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Sep 2023 14:10:54 +0300
+Subject: wifi: cfg80211: Fix 6GHz scan configuration
+
+From: Ilan Peer <ilan.peer@intel.com>
+
+[ Upstream commit 0914468adf92296c4cba8a2134e06e3dea150f2e ]
+
+When the scan request includes a non broadcast BSSID, when adding the
+scan parameters for 6GHz collocated scanning, do not include entries
+that do not match the given BSSID.
+
+Signed-off-by: Ilan Peer <ilan.peer@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230918140607.6d31d2a96baf.I6c4e3e3075d1d1878ee41f45190fdc6b86f18708@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/scan.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/net/wireless/scan.c b/net/wireless/scan.c
+index e5c1510c098fd..b7e1631b3d80d 100644
+--- a/net/wireless/scan.c
++++ b/net/wireless/scan.c
+@@ -876,6 +876,10 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
+ !cfg80211_find_ssid_match(ap, request))
+ continue;
+
++ if (!is_broadcast_ether_addr(request->bssid) &&
++ !ether_addr_equal(request->bssid, ap->bssid))
++ continue;
++
+ if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
+ continue;
+
+--
+2.40.1
+
--- /dev/null
+From 9865b6f9bb8d98305584ef8b63fb94b436b84e73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Aug 2023 13:56:05 -0700
+Subject: wifi: iwlwifi: Ensure ack flag is properly cleared.
+
+From: Ben Greear <greearb@candelatech.com>
+
+[ Upstream commit e8fbe99e87877f0412655f40d7c45bf8471470ac ]
+
+Debugging indicates that nothing else is clearing the info->flags,
+so some frames were flagged as ACKed when they should not be.
+Explicitly clear the ack flag to ensure this does not happen.
+
+Signed-off-by: Ben Greear <greearb@candelatech.com>
+Acked-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230808205605.4105670-1-greearb@candelatech.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+index 542cfcad6e0e6..2d01f6226b7c6 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+@@ -1585,6 +1585,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
+ iwl_trans_free_tx_cmd(mvm->trans, info->driver_data[1]);
+
+ memset(&info->status, 0, sizeof(info->status));
++ info->flags &= ~(IEEE80211_TX_STAT_ACK | IEEE80211_TX_STAT_TX_FILTERED);
+
+ /* inform mac80211 about what happened with the frame */
+ switch (status & TX_STATUS_MSK) {
+@@ -1936,6 +1937,8 @@ static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid,
+ */
+ if (!is_flush)
+ info->flags |= IEEE80211_TX_STAT_ACK;
++ else
++ info->flags &= ~IEEE80211_TX_STAT_ACK;
+ }
+
+ /*
+--
+2.40.1
+
--- /dev/null
+From 42fd080e0a875b6f3773309b72f9e281e5aa003a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 02:47:51 -0400
+Subject: wifi: mac80211: allow transmitting EAPOL frames with tainted key
+
+From: Wen Gong <quic_wgong@quicinc.com>
+
+[ Upstream commit 61304336c67358d49a989e5e0060d8c99bad6ca8 ]
+
+Lower layer device driver stop/wake TX by calling ieee80211_stop_queue()/
+ieee80211_wake_queue() while hw scan. Sometimes hw scan and PTK rekey are
+running in parallel, when M4 sent from wpa_supplicant arrive while the TX
+queue is stopped, then the M4 will pending send, and then new key install
+from wpa_supplicant. After TX queue wake up by lower layer device driver,
+the M4 will be dropped by below call stack.
+
+When key install started, the current key flag is set KEY_FLAG_TAINTED in
+ieee80211_pairwise_rekey(), and then mac80211 wait key install complete by
+lower layer device driver. Meanwhile ieee80211_tx_h_select_key() will return
+TX_DROP for the M4 in step 12 below, and then ieee80211_free_txskb() called
+by ieee80211_tx_dequeue(), so the M4 will not send and free, then the rekey
+process failed becaue AP not receive M4. Please see details in steps below.
+
+There are a interval between KEY_FLAG_TAINTED set for current key flag and
+install key complete by lower layer device driver, the KEY_FLAG_TAINTED is
+set in this interval, all packet including M4 will be dropped in this
+interval, the interval is step 8~13 as below.
+
+issue steps:
+ TX thread install key thread
+1. stop_queue -idle-
+2. sending M4 -idle-
+3. M4 pending -idle-
+4. -idle- starting install key from wpa_supplicant
+5. -idle- =>ieee80211_key_replace()
+6. -idle- =>ieee80211_pairwise_rekey() and set
+ currently key->flags |= KEY_FLAG_TAINTED
+7. -idle- =>ieee80211_key_enable_hw_accel()
+8. -idle- =>drv_set_key() and waiting key install
+ complete from lower layer device driver
+9. wake_queue -waiting state-
+10. re-sending M4 -waiting state-
+11. =>ieee80211_tx_h_select_key() -waiting state-
+12. drop M4 by KEY_FLAG_TAINTED -waiting state-
+13. -idle- install key complete with success/fail
+ success: clear flag KEY_FLAG_TAINTED
+ fail: start disconnect
+
+Hence add check in step 11 above to allow the EAPOL send out in the
+interval. If lower layer device driver use the old key/cipher to encrypt
+the M4, then AP received/decrypt M4 correctly, after M4 send out, lower
+layer device driver install the new key/cipher to hardware and return
+success.
+
+If lower layer device driver use new key/cipher to send the M4, then AP
+will/should drop the M4, then it is same result with this issue, AP will/
+should kick out station as well as this issue.
+
+issue log:
+kworker/u16:4-5238 [000] 6456.108926: stop_queue: phy1 queue:0, reason:0
+wpa_supplicant-961 [003] 6456.119737: rdev_tx_control_port: wiphy_name=phy1 name=wlan0 ifindex=6 dest=ARRAY[9e, 05, 31, 20, 9b, d0] proto=36488 unencrypted=0
+wpa_supplicant-961 [003] 6456.119839: rdev_return_int_cookie: phy1, returned 0, cookie: 504
+wpa_supplicant-961 [003] 6456.120287: rdev_add_key: phy1, netdev:wlan0(6), key_index: 0, mode: 0, pairwise: true, mac addr: 9e:05:31:20:9b:d0
+wpa_supplicant-961 [003] 6456.120453: drv_set_key: phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 cipher:0xfac04, flags=0x9, keyidx=0, hw_key_idx=0
+kworker/u16:9-3829 [001] 6456.168240: wake_queue: phy1 queue:0, reason:0
+kworker/u16:9-3829 [001] 6456.168255: drv_wake_tx_queue: phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 ac:0 tid:7
+kworker/u16:9-3829 [001] 6456.168305: cfg80211_control_port_tx_status: wdev(1), cookie: 504, ack: false
+wpa_supplicant-961 [003] 6459.167982: drv_return_int: phy1 - -110
+
+issue call stack:
+nl80211_frame_tx_status+0x230/0x340 [cfg80211]
+cfg80211_control_port_tx_status+0x1c/0x28 [cfg80211]
+ieee80211_report_used_skb+0x374/0x3e8 [mac80211]
+ieee80211_free_txskb+0x24/0x40 [mac80211]
+ieee80211_tx_dequeue+0x644/0x954 [mac80211]
+ath10k_mac_tx_push_txq+0xac/0x238 [ath10k_core]
+ath10k_mac_op_wake_tx_queue+0xac/0xe0 [ath10k_core]
+drv_wake_tx_queue+0x80/0x168 [mac80211]
+__ieee80211_wake_txqs+0xe8/0x1c8 [mac80211]
+_ieee80211_wake_txqs+0xb4/0x120 [mac80211]
+ieee80211_wake_txqs+0x48/0x80 [mac80211]
+tasklet_action_common+0xa8/0x254
+tasklet_action+0x2c/0x38
+__do_softirq+0xdc/0x384
+
+Signed-off-by: Wen Gong <quic_wgong@quicinc.com>
+Link: https://lore.kernel.org/r/20230801064751.25803-1-quic_wgong@quicinc.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/tx.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
+index 2f9e1abdf375d..2db103a56a28f 100644
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -680,7 +680,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
+ }
+
+ if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED &&
+- !ieee80211_is_deauth(hdr->frame_control)))
++ !ieee80211_is_deauth(hdr->frame_control)) &&
++ tx->skb->protocol != tx->sdata->control_port_protocol)
+ return TX_DROP;
+
+ if (!skip_hw && tx->key &&
+--
+2.40.1
+
--- /dev/null
+From ec2ef90b8890dba12da3cda729f00969c61df036 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Sep 2023 14:10:55 +0300
+Subject: wifi: mac80211: work around Cisco AP 9115 VHT MPDU length
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 084cf2aeca97566db4fa15d55653c1cba2db83ed ]
+
+Cisco AP module 9115 with FW 17.3 has a bug and sends a too
+large maximum MPDU length in the association response
+(indicating 12k) that it cannot actually process.
+
+Work around that by taking the minimum between what's in the
+association response and the BSS elements (from beacon or
+probe response).
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230918140607.d1966a9a532e.I090225babb7cd4d1081ee9acd40e7de7e41c15ae@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/cfg.c | 3 ++-
+ net/mac80211/ibss.c | 2 +-
+ net/mac80211/ieee80211_i.h | 1 +
+ net/mac80211/mesh_plink.c | 2 +-
+ net/mac80211/mlme.c | 27 +++++++++++++++++++++++++--
+ net/mac80211/vht.c | 16 ++++++++++++++--
+ 6 files changed, 44 insertions(+), 7 deletions(-)
+
+diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
+index 0167413d56972..ee9f455bb2d18 100644
+--- a/net/mac80211/cfg.c
++++ b/net/mac80211/cfg.c
+@@ -1748,7 +1748,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
+ /* VHT can override some HT caps such as the A-MSDU max length */
+ if (params->vht_capa)
+ ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
+- params->vht_capa, link_sta);
++ params->vht_capa, NULL,
++ link_sta);
+
+ if (params->he_capa)
+ ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
+index 9dffc30795887..79d2c55052897 100644
+--- a/net/mac80211/ibss.c
++++ b/net/mac80211/ibss.c
+@@ -1068,7 +1068,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
+ &chandef);
+ memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
+ ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
+- &cap_ie,
++ &cap_ie, NULL,
+ &sta->deflink);
+ if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap)))
+ rates_updated |= true;
+diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
+index 27479bbb093ac..99a976ea17498 100644
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -2062,6 +2062,7 @@ void
+ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const struct ieee80211_vht_cap *vht_cap_ie,
++ const struct ieee80211_vht_cap *vht_cap_ie2,
+ struct link_sta_info *link_sta);
+ enum ieee80211_sta_rx_bandwidth
+ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
+diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
+index ddfe5102b9a43..bd0b7c189adfa 100644
+--- a/net/mac80211/mesh_plink.c
++++ b/net/mac80211/mesh_plink.c
+@@ -443,7 +443,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
+ changed |= IEEE80211_RC_BW_CHANGED;
+
+ ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
+- elems->vht_cap_elem,
++ elems->vht_cap_elem, NULL,
+ &sta->deflink);
+
+ ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
+diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
+index dc9e7eb7dd857..c07645c999f9a 100644
+--- a/net/mac80211/mlme.c
++++ b/net/mac80211/mlme.c
+@@ -4083,10 +4083,33 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
+ elems->ht_cap_elem,
+ link_sta);
+
+- if (elems->vht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT))
++ if (elems->vht_cap_elem &&
++ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
++ const struct ieee80211_vht_cap *bss_vht_cap = NULL;
++ const struct cfg80211_bss_ies *ies;
++
++ /*
++ * Cisco AP module 9115 with FW 17.3 has a bug and sends a
++ * too large maximum MPDU length in the association response
++ * (indicating 12k) that it cannot actually process ...
++ * Work around that.
++ */
++ rcu_read_lock();
++ ies = rcu_dereference(cbss->ies);
++ if (ies) {
++ const struct element *elem;
++
++ elem = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY,
++ ies->data, ies->len);
++ if (elem && elem->datalen >= sizeof(*bss_vht_cap))
++ bss_vht_cap = (const void *)elem->data;
++ }
++
+ ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
+ elems->vht_cap_elem,
+- link_sta);
++ bss_vht_cap, link_sta);
++ rcu_read_unlock();
++ }
+
+ if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
+ elems->he_cap) {
+diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
+index 803de58814852..f7526be8a1c7e 100644
+--- a/net/mac80211/vht.c
++++ b/net/mac80211/vht.c
+@@ -4,7 +4,7 @@
+ *
+ * Portions of this file
+ * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+- * Copyright (C) 2018 - 2022 Intel Corporation
++ * Copyright (C) 2018 - 2023 Intel Corporation
+ */
+
+ #include <linux/ieee80211.h>
+@@ -116,12 +116,14 @@ void
+ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const struct ieee80211_vht_cap *vht_cap_ie,
++ const struct ieee80211_vht_cap *vht_cap_ie2,
+ struct link_sta_info *link_sta)
+ {
+ struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap;
+ struct ieee80211_sta_vht_cap own_cap;
+ u32 cap_info, i;
+ bool have_80mhz;
++ u32 mpdu_len;
+
+ memset(vht_cap, 0, sizeof(*vht_cap));
+
+@@ -317,11 +319,21 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+
+ link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
+
++ /*
++ * Work around the Cisco 9115 FW 17.3 bug by taking the min of
++ * both reported MPDU lengths.
++ */
++ mpdu_len = vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK;
++ if (vht_cap_ie2)
++ mpdu_len = min_t(u32, mpdu_len,
++ le32_get_bits(vht_cap_ie2->vht_cap_info,
++ IEEE80211_VHT_CAP_MAX_MPDU_MASK));
++
+ /*
+ * FIXME - should the amsdu len be per link? store per link
+ * and maintain a minimum?
+ */
+- switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
++ switch (mpdu_len) {
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
+ break;
+--
+2.40.1
+
--- /dev/null
+From c7fa6306400f2018c68e1f5af3fed775b99522be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Aug 2023 21:10:45 -0600
+Subject: wifi: mwifiex: Sanity check tlv_len and tlv_bitmap_len
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+[ Upstream commit d5a93b7d2877aae4ba7590ad6cb65f8d33079489 ]
+
+Add sanity checks for both `tlv_len` and `tlv_bitmap_len` before
+decoding data from `event_buf`.
+
+This prevents any malicious or buggy firmware from overflowing
+`event_buf` through large values for `tlv_len` and `tlv_bitmap_len`.
+
+Suggested-by: Dan Williams <dcbw@redhat.com>
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Kalle Valo <kvalo@kernel.org>
+Link: https://lore.kernel.org/r/d4f8780527d551552ee96f17a0229e02e1c200d1.1692931954.git.gustavoars@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/wireless/marvell/mwifiex/11n_rxreorder.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+index 7351acac6932d..54ab8b54369ba 100644
+--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
++++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+@@ -921,6 +921,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
+ while (tlv_buf_left >= sizeof(*tlv_rxba)) {
+ tlv_type = le16_to_cpu(tlv_rxba->header.type);
+ tlv_len = le16_to_cpu(tlv_rxba->header.len);
++ if (size_add(sizeof(tlv_rxba->header), tlv_len) > tlv_buf_left) {
++ mwifiex_dbg(priv->adapter, WARN,
++ "TLV size (%zu) overflows event_buf buf_left=%d\n",
++ size_add(sizeof(tlv_rxba->header), tlv_len),
++ tlv_buf_left);
++ return;
++ }
++
+ if (tlv_type != TLV_TYPE_RXBA_SYNC) {
+ mwifiex_dbg(priv->adapter, ERROR,
+ "Wrong TLV id=0x%x\n", tlv_type);
+@@ -929,6 +937,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
+
+ tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num);
+ tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len);
++ if (size_add(sizeof(*tlv_rxba), tlv_bitmap_len) > tlv_buf_left) {
++ mwifiex_dbg(priv->adapter, WARN,
++ "TLV size (%zu) overflows event_buf buf_left=%d\n",
++ size_add(sizeof(*tlv_rxba), tlv_bitmap_len),
++ tlv_buf_left);
++ return;
++ }
++
+ mwifiex_dbg(priv->adapter, INFO,
+ "%pM tid=%d seq_num=%d bitmap_len=%d\n",
+ tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num,
+--
+2.40.1
+
--- /dev/null
+From e18f0e6509ebb2ed91524ab5b591218445998b92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Sep 2023 19:20:41 +0800
+Subject: xfrm6: fix inet6_dev refcount underflow problem
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+[ Upstream commit cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b ]
+
+There are race conditions that may lead to inet6_dev refcount underflow
+in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev().
+
+One of the refcount underflow bugs is shown below:
+ (cpu 1) | (cpu 2)
+xfrm6_dst_destroy() |
+ ... |
+ in6_dev_put() |
+ | rt6_uncached_list_flush_dev()
+ ... | ...
+ | in6_dev_put()
+ rt6_uncached_list_del() | ...
+ ... |
+
+xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(),
+so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put()
+again for the same inet6_dev.
+
+Fix it by moving in6_dev_put() after rt6_uncached_list_del() in
+xfrm6_dst_destroy().
+
+Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts")
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/xfrm6_policy.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index eecc5e59da17c..50c278f1c1063 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
+ {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+- if (likely(xdst->u.rt6.rt6i_idev))
+- in6_dev_put(xdst->u.rt6.rt6i_idev);
+ dst_destroy_metrics_generic(dst);
+ rt6_uncached_list_del(&xdst->u.rt6);
++ if (likely(xdst->u.rt6.rt6i_idev))
++ in6_dev_put(xdst->u.rt6.rt6i_idev);
+ xfrm_dst_destroy(xdst);
+ }
+
+--
+2.40.1
+