--- /dev/null
+From 33908660e814203e996f6e775d033c5c32fcf9a7 Mon Sep 17 00:00:00 2001
+From: Yu Liao <liaoyu15@huawei.com>
+Date: Sat, 26 Aug 2023 15:16:53 +0800
+Subject: ACPI: NFIT: Fix incorrect calculation of idt size
+
+From: Yu Liao <liaoyu15@huawei.com>
+
+commit 33908660e814203e996f6e775d033c5c32fcf9a7 upstream.
+
+acpi_nfit_interleave's field 'line_offset' is switched to flexible array [1],
+but sizeof_idt() still calculates the size in the form of 1-element array.
+
+Therefore, fix incorrect calculation in sizeof_idt().
+
+[1] https://lore.kernel.org/lkml/2652195.BddDVKsqQX@kreacher/
+
+Fixes: 2a5ab99847bd ("ACPICA: struct acpi_nfit_interleave: Replace 1-element array with flexible array")
+Cc: stable@vger.kernel.org # v6.4+
+Signed-off-by: Yu Liao <liaoyu15@huawei.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Link: https://lore.kernel.org/r/20230826071654.564372-1-liaoyu15@huawei.com
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/nfit/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
+index f0e6738ae3c9..f96bf32cd368 100644
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -855,7 +855,7 @@ static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
+ {
+ if (idt->header.length < sizeof(*idt))
+ return 0;
+- return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
++ return sizeof(*idt) + sizeof(u32) * idt->line_count;
+ }
+
+ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
+--
+2.42.0
+
--- /dev/null
+From 7d3e4e9d3bde9c8bd8914d47ddaa90e0d0ffbcab Mon Sep 17 00:00:00 2001
+From: Mikko Rapeli <mikko.rapeli@linaro.org>
+Date: Thu, 21 Sep 2023 17:57:22 +0300
+Subject: arm64: defconfig: remove CONFIG_COMMON_CLK_NPCM8XX=y
+
+From: Mikko Rapeli <mikko.rapeli@linaro.org>
+
+commit 7d3e4e9d3bde9c8bd8914d47ddaa90e0d0ffbcab upstream.
+
+There is no code for this config option and enabling it in defconfig
+causes warnings from tools which are detecting unused and obsolete
+kernel config flags since the flag will be completely missing from
+effective build config after "make olddefconfig".
+
+Fixes yocto kernel recipe build time warning:
+
+WARNING: [kernel config]: This BSP contains fragments with warnings:
+...
+[INFO]: the following symbols were not found in the active
+configuration:
+ - CONFIG_COMMON_CLK_NPCM8XX
+
+The flag was added with commit 45472f1e5348c7b755b4912f2f529ec81cea044b
+v5.19-rc4-15-g45472f1e5348 so 6.1 and 6.4 stable kernel trees are
+affected.
+
+Fixes: 45472f1e5348c7b755b4912f2f529ec81cea044b ("arm64: defconfig: Add Nuvoton NPCM family support")
+Cc: stable@kernel.org
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Bjorn Andersson <quic_bjorande@quicinc.com>
+Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Cc: Konrad Dybcio <konrad.dybcio@linaro.org>
+Cc: Neil Armstrong <neil.armstrong@linaro.org>
+Cc: Tomer Maimon <tmaimon77@gmail.com>
+Cc: Bruce Ashfield <bruce.ashfield@gmail.com>
+Cc: Jon Mason <jon.mason@arm.com>
+Cc: Jon Mason <jdmason@kudzu.us>
+Cc: Ross Burton <ross@burtonini.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Mikko Rapeli <mikko.rapeli@linaro.org>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/configs/defconfig | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/arm64/configs/defconfig
++++ b/arch/arm64/configs/defconfig
+@@ -1145,7 +1145,6 @@ CONFIG_COMMON_CLK_S2MPS11=y
+ CONFIG_COMMON_CLK_PWM=y
+ CONFIG_COMMON_CLK_RS9_PCIE=y
+ CONFIG_COMMON_CLK_VC5=y
+-CONFIG_COMMON_CLK_NPCM8XX=y
+ CONFIG_COMMON_CLK_BD718XX=m
+ CONFIG_CLK_RASPBERRYPI=m
+ CONFIG_CLK_IMX8MM=y
--- /dev/null
+From 75e2bd5f1ede42a2bc88aa34b431e1ace8e0bea0 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Fri, 8 Sep 2023 20:04:52 +0900
+Subject: ata: libata-core: Do not register PM operations for SAS ports
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 75e2bd5f1ede42a2bc88aa34b431e1ace8e0bea0 upstream.
+
+libsas does its own domain based power management of ports. For such
+ports, libata should not use a device type defining power management
+operations as executing these operations for suspend/resume in addition
+to libsas calls to ata_sas_port_suspend() and ata_sas_port_resume() is
+not necessary (and likely dangerous to do, even though problems are not
+seen currently).
+
+Introduce the new ata_port_sas_type device_type for ports managed by
+libsas. This new device type is used in ata_tport_add() and is defined
+without power management operations.
+
+Fixes: 2fcbdcb4c802 ("[SCSI] libata: export ata_port suspend/resume infrastructure for sas")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Tested-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c | 2 +-
+ drivers/ata/libata-transport.c | 9 ++++++++-
+ drivers/ata/libata.h | 2 ++
+ 3 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -5396,7 +5396,7 @@ EXPORT_SYMBOL_GPL(ata_host_resume);
+ #endif
+
+ const struct device_type ata_port_type = {
+- .name = "ata_port",
++ .name = ATA_PORT_TYPE_NAME,
+ #ifdef CONFIG_PM
+ .pm = &ata_port_pm_ops,
+ #endif
+--- a/drivers/ata/libata-transport.c
++++ b/drivers/ata/libata-transport.c
+@@ -266,6 +266,10 @@ void ata_tport_delete(struct ata_port *a
+ put_device(dev);
+ }
+
++static const struct device_type ata_port_sas_type = {
++ .name = ATA_PORT_TYPE_NAME,
++};
++
+ /** ata_tport_add - initialize a transport ATA port structure
+ *
+ * @parent: parent device
+@@ -283,7 +287,10 @@ int ata_tport_add(struct device *parent,
+ struct device *dev = &ap->tdev;
+
+ device_initialize(dev);
+- dev->type = &ata_port_type;
++ if (ap->flags & ATA_FLAG_SAS_HOST)
++ dev->type = &ata_port_sas_type;
++ else
++ dev->type = &ata_port_type;
+
+ dev->parent = parent;
+ ata_host_get(ap->host);
+--- a/drivers/ata/libata.h
++++ b/drivers/ata/libata.h
+@@ -30,6 +30,8 @@ enum {
+ ATA_DNXFER_QUIET = (1 << 31),
+ };
+
++#define ATA_PORT_TYPE_NAME "ata_port"
++
+ extern atomic_t ata_print_id;
+ extern int atapi_passthru16;
+ extern int libata_fua;
--- /dev/null
+From 3b8e0af4a7a331d1510e963b8fd77e2fca0a77f1 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Mon, 4 Sep 2023 20:38:13 +0900
+Subject: ata: libata-core: Fix ata_port_request_pm() locking
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 3b8e0af4a7a331d1510e963b8fd77e2fca0a77f1 upstream.
+
+The function ata_port_request_pm() checks the port flag
+ATA_PFLAG_PM_PENDING and calls ata_port_wait_eh() if this flag is set to
+ensure that power management operations for a port are not scheduled
+simultaneously. However, this flag check is done without holding the
+port lock.
+
+Fix this by taking the port lock on entry to the function and checking
+the flag under this lock. The lock is released and re-taken if
+ata_port_wait_eh() needs to be called. The two WARN_ON() macros checking
+that the ATA_PFLAG_PM_PENDING flag was cleared are removed as the first
+call is racy and the second one done without holding the port lock.
+
+Fixes: 5ef41082912b ("ata: add ata port system PM callbacks")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Tested-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
+Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -5204,17 +5204,19 @@ static void ata_port_request_pm(struct a
+ struct ata_link *link;
+ unsigned long flags;
+
+- /* Previous resume operation might still be in
+- * progress. Wait for PM_PENDING to clear.
++ spin_lock_irqsave(ap->lock, flags);
++
++ /*
++ * A previous PM operation might still be in progress. Wait for
++ * ATA_PFLAG_PM_PENDING to clear.
+ */
+ if (ap->pflags & ATA_PFLAG_PM_PENDING) {
++ spin_unlock_irqrestore(ap->lock, flags);
+ ata_port_wait_eh(ap);
+- WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
++ spin_lock_irqsave(ap->lock, flags);
+ }
+
+- /* request PM ops to EH */
+- spin_lock_irqsave(ap->lock, flags);
+-
++ /* Request PM operation to EH */
+ ap->pm_mesg = mesg;
+ ap->pflags |= ATA_PFLAG_PM_PENDING;
+ ata_for_each_link(link, ap, HOST_FIRST) {
+@@ -5226,10 +5228,8 @@ static void ata_port_request_pm(struct a
+
+ spin_unlock_irqrestore(ap->lock, flags);
+
+- if (!async) {
++ if (!async)
+ ata_port_wait_eh(ap);
+- WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
+- }
+ }
+
+ /*
--- /dev/null
+From 84d76529c650f887f1e18caee72d6f0589e1baf9 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Sat, 26 Aug 2023 13:07:36 +0900
+Subject: ata: libata-core: Fix port and device removal
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 84d76529c650f887f1e18caee72d6f0589e1baf9 upstream.
+
+Whenever an ATA adapter driver is removed (e.g. rmmod),
+ata_port_detach() is called repeatedly for all the adapter ports to
+remove (unload) the devices attached to the port and delete the port
+device itself. Removing of devices is done using libata EH with the
+ATA_PFLAG_UNLOADING port flag set. This causes libata EH to execute
+ata_eh_unload() which disables all devices attached to the port.
+
+ata_port_detach() finishes by calling scsi_remove_host() to remove the
+scsi host associated with the port. This function will trigger the
+removal of all scsi devices attached to the host and in the case of
+disks, calls to sd_shutdown() which will flush the device write cache
+and stop the device. However, given that the devices were already
+disabled by ata_eh_unload(), the synchronize write cache command and
+start stop unit commands fail. E.g. running "rmmod ahci" with first
+removing sd_mod results in error messages like:
+
+ata13.00: disable device
+sd 0:0:0:0: [sda] Synchronizing SCSI cache
+sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK
+sd 0:0:0:0: [sda] Stopping disk
+sd 0:0:0:0: [sda] Start/Stop Unit failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK
+
+Fix this by removing all scsi devices of the ata devices connected to
+the port before scheduling libata EH to disable the ATA devices.
+
+Fixes: 720ba12620ee ("[PATCH] libata-hp: update unload-unplug")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
+Tested-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c | 21 ++++++++++++++++++++-
+ 1 file changed, 20 insertions(+), 1 deletion(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -6130,11 +6130,30 @@ static void ata_port_detach(struct ata_p
+ if (!ap->ops->error_handler)
+ goto skip_eh;
+
+- /* tell EH we're leaving & flush EH */
++ /* Wait for any ongoing EH */
++ ata_port_wait_eh(ap);
++
++ mutex_lock(&ap->scsi_scan_mutex);
+ spin_lock_irqsave(ap->lock, flags);
++
++ /* Remove scsi devices */
++ ata_for_each_link(link, ap, HOST_FIRST) {
++ ata_for_each_dev(dev, link, ALL) {
++ if (dev->sdev) {
++ spin_unlock_irqrestore(ap->lock, flags);
++ scsi_remove_device(dev->sdev);
++ spin_lock_irqsave(ap->lock, flags);
++ dev->sdev = NULL;
++ }
++ }
++ }
++
++ /* Tell EH to disable all devices */
+ ap->pflags |= ATA_PFLAG_UNLOADING;
+ ata_port_schedule_eh(ap);
++
+ spin_unlock_irqrestore(ap->lock, flags);
++ mutex_unlock(&ap->scsi_scan_mutex);
+
+ /* wait till EH commits suicide */
+ ata_port_wait_eh(ap);
--- /dev/null
+From 753a4d531bc518633ea88ac0ed02b25a16823d51 Mon Sep 17 00:00:00 2001
+From: Matthias Schiffer <mschiffer@universe-factory.net>
+Date: Fri, 22 Sep 2023 22:55:16 +0200
+Subject: ata: libata-sata: increase PMP SRST timeout to 10s
+
+From: Matthias Schiffer <mschiffer@universe-factory.net>
+
+commit 753a4d531bc518633ea88ac0ed02b25a16823d51 upstream.
+
+On certain SATA controllers, softreset fails after wakeup from S2RAM with
+the message "softreset failed (1st FIS failed)", sometimes resulting in
+drives not being detected again. With the increased timeout, this issue
+is avoided. Instead, "softreset failed (device not ready)" is now
+logged 1-2 times; this later failure seems to cause fewer problems
+however, and the drives are detected reliably once they've spun up and
+the probe is retried.
+
+The issue was observed with the primary SATA controller of the QNAP
+TS-453B, which is an "Intel Corporation Celeron/Pentium Silver Processor
+SATA Controller [8086:31e3] (rev 06)" integrated in the Celeron J4125 CPU,
+and the following drives:
+
+- Seagate IronWolf ST12000VN0008
+- Seagate IronWolf ST8000NE0004
+
+The SATA controller seems to be more relevant to this issue than the
+drives, as the same drives are always detected reliably on the secondary
+SATA controller on the same board (an ASMedia 106x) without any "softreset
+failed" errors even without the increased timeout.
+
+Fixes: e7d3ef13d52a ("libata: change drive ready wait after hard reset to 5s")
+Cc: stable@vger.kernel.org
+Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/libata.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -259,7 +259,7 @@ enum {
+ * advised to wait only for the following duration before
+ * doing SRST.
+ */
+- ATA_TMOUT_PMP_SRST_WAIT = 5000,
++ ATA_TMOUT_PMP_SRST_WAIT = 10000,
+
+ /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might
+ * be a spurious PHY event, so ignore the first PHY event that
--- /dev/null
+From 41bc46c12a8053a1b3279a379bd6b5e87b045b85 Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Thu, 7 Sep 2023 22:06:51 +0200
+Subject: bpf: Add override check to kprobe multi link attach
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+commit 41bc46c12a8053a1b3279a379bd6b5e87b045b85 upstream.
+
+Currently the multi_kprobe link attach does not check error
+injection list for programs with bpf_override_return helper
+and allows them to attach anywhere. Adding the missing check.
+
+Fixes: 0dcac2725406 ("bpf: Add multi kprobe link")
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/bpf/20230907200652.926951-1-jolsa@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -2772,6 +2772,17 @@ static int get_modules_for_addrs(struct
+ return arr.mods_cnt;
+ }
+
++static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
++{
++ u32 i;
++
++ for (i = 0; i < cnt; i++) {
++ if (!within_error_injection_list(addrs[i]))
++ return -EINVAL;
++ }
++ return 0;
++}
++
+ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+ {
+ struct bpf_kprobe_multi_link *link = NULL;
+@@ -2849,6 +2860,11 @@ int bpf_kprobe_multi_link_attach(const u
+ goto error;
+ }
+
++ if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
++ err = -EINVAL;
++ goto error;
++ }
++
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
+ err = -ENOMEM;
--- /dev/null
+From c0bb9fb0e52a64601d38b3739b729d9138d4c8a1 Mon Sep 17 00:00:00 2001
+From: Nick Desaulniers <ndesaulniers@google.com>
+Date: Fri, 15 Sep 2023 10:34:28 -0700
+Subject: bpf: Fix BTF_ID symbol generation collision in tools/
+
+From: Nick Desaulniers <ndesaulniers@google.com>
+
+commit c0bb9fb0e52a64601d38b3739b729d9138d4c8a1 upstream.
+
+Marcus and Satya reported an issue where BTF_ID macro generates same
+symbol in separate objects and that breaks final vmlinux link.
+
+ ld.lld: error: ld-temp.o <inline asm>:14577:1: symbol
+ '__BTF_ID__struct__cgroup__624' is already defined
+
+This can be triggered under specific configs when __COUNTER__ happens to
+be the same for the same symbol in two different translation units,
+which is already quite unlikely to happen.
+
+Add __LINE__ number suffix to make BTF_ID symbol more unique, which is
+not a complete fix, but it would help for now and meanwhile we can work
+on better solution as suggested by Andrii.
+
+Cc: stable@vger.kernel.org
+Reported-by: Satya Durga Srinivasu Prabhala <quic_satyap@quicinc.com>
+Reported-by: Marcus Seyfarth <m.seyfarth@gmail.com>
+Closes: https://github.com/ClangBuiltLinux/linux/issues/1913
+Debugged-by: Nathan Chancellor <nathan@kernel.org>
+Co-developed-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-2-263fc519c21f@google.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/include/linux/btf_ids.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/include/linux/btf_ids.h
++++ b/tools/include/linux/btf_ids.h
+@@ -38,7 +38,7 @@ asm( \
+ ____BTF_ID(symbol)
+
+ #define __ID(prefix) \
+- __PASTE(prefix, __COUNTER__)
++ __PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
+
+ /*
+ * The BTF_ID defines unique symbol for each ID pointing
--- /dev/null
+From 8f908db77782630c45ba29dac35c434b5ce0b730 Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Fri, 15 Sep 2023 10:34:27 -0700
+Subject: bpf: Fix BTF_ID symbol generation collision
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+commit 8f908db77782630c45ba29dac35c434b5ce0b730 upstream.
+
+Marcus and Satya reported an issue where BTF_ID macro generates same
+symbol in separate objects and that breaks final vmlinux link.
+
+ld.lld: error: ld-temp.o <inline asm>:14577:1: symbol
+'__BTF_ID__struct__cgroup__624' is already defined
+
+This can be triggered under specific configs when __COUNTER__ happens to
+be the same for the same symbol in two different translation units,
+which is already quite unlikely to happen.
+
+Add __LINE__ number suffix to make BTF_ID symbol more unique, which is
+not a complete fix, but it would help for now and meanwhile we can work
+on better solution as suggested by Andrii.
+
+Cc: stable@vger.kernel.org
+Reported-by: Satya Durga Srinivasu Prabhala <quic_satyap@quicinc.com>
+Reported-by: Marcus Seyfarth <m.seyfarth@gmail.com>
+Closes: https://github.com/ClangBuiltLinux/linux/issues/1913
+Debugged-by: Nathan Chancellor <nathan@kernel.org>
+Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Nathan Chancellor <nathan@kernel.org>
+Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-1-263fc519c21f@google.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/btf_ids.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/btf_ids.h
++++ b/include/linux/btf_ids.h
+@@ -49,7 +49,7 @@ word \
+ ____BTF_ID(symbol, word)
+
+ #define __ID(prefix) \
+- __PASTE(prefix, __COUNTER__)
++ __PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
+
+ /*
+ * The BTF_ID defines unique symbol for each ID pointing
--- /dev/null
+From 9af86694fd5d387992699ec99007ed374966ce9a Mon Sep 17 00:00:00 2001
+From: Bernd Schubert <bschubert@ddn.com>
+Date: Wed, 6 Sep 2023 17:59:03 +0200
+Subject: btrfs: file_remove_privs needs an exclusive lock in direct io write
+
+From: Bernd Schubert <bschubert@ddn.com>
+
+commit 9af86694fd5d387992699ec99007ed374966ce9a upstream.
+
+This was noticed by Miklos that file_remove_privs might call into
+notify_change(), which requires to hold an exclusive lock. The problem
+exists in FUSE and btrfs. We can fix it without any additional helpers
+from VFS, in case the privileges would need to be dropped, change the
+lock type to be exclusive and redo the loop.
+
+Fixes: e9adabb9712e ("btrfs: use shared lock for direct writes within EOF")
+CC: Miklos Szeredi <miklos@szeredi.hu>
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Bernd Schubert <bschubert@ddn.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1466,8 +1466,13 @@ static ssize_t btrfs_direct_write(struct
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ ilock_flags |= BTRFS_ILOCK_TRY;
+
+- /* If the write DIO is within EOF, use a shared lock */
+- if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode))
++ /*
++ * If the write DIO is within EOF, use a shared lock and also only if
++ * security bits will likely not be dropped by file_remove_privs() called
++ * from btrfs_write_check(). Either will need to be rechecked after the
++ * lock was acquired.
++ */
++ if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode))
+ ilock_flags |= BTRFS_ILOCK_SHARED;
+
+ relock:
+@@ -1475,6 +1480,13 @@ relock:
+ if (err < 0)
+ return err;
+
++ /* Shared lock cannot be used with security bits set. */
++ if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) {
++ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
++ ilock_flags &= ~BTRFS_ILOCK_SHARED;
++ goto relock;
++ }
++
+ err = generic_write_checks(iocb, from);
+ if (err <= 0) {
+ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
--- /dev/null
+From 8e7f82deb0c0386a03b62e30082574347f8b57d5 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 12 Sep 2023 11:45:39 +0100
+Subject: btrfs: fix race between reading a directory and adding entries to it
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8e7f82deb0c0386a03b62e30082574347f8b57d5 upstream.
+
+When opening a directory (opendir(3)) or rewinding it (rewinddir(3)), we
+are not holding the directory's inode locked, and this can result in later
+attempting to add two entries to the directory with the same index number,
+resulting in a transaction abort, with -EEXIST (-17), when inserting the
+second delayed dir index. This results in a trace like the following:
+
+ Sep 11 22:34:59 myhostname kernel: BTRFS error (device dm-3): err add delayed dir index item(name: cockroach-stderr.log) into the insertion tree of the delayed node(root id: 5, inode id: 4539217, errno: -17)
+ Sep 11 22:34:59 myhostname kernel: ------------[ cut here ]------------
+ Sep 11 22:34:59 myhostname kernel: kernel BUG at fs/btrfs/delayed-inode.c:1504!
+ Sep 11 22:34:59 myhostname kernel: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+ Sep 11 22:34:59 myhostname kernel: CPU: 0 PID: 7159 Comm: cockroach Not tainted 6.4.15-200.fc38.x86_64 #1
+ Sep 11 22:34:59 myhostname kernel: Hardware name: ASUS ESC500 G3/P9D WS, BIOS 2402 06/27/2018
+ Sep 11 22:34:59 myhostname kernel: RIP: 0010:btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: Code: eb dd 48 (...)
+ Sep 11 22:34:59 myhostname kernel: RSP: 0000:ffffa9980e0fbb28 EFLAGS: 00010282
+ Sep 11 22:34:59 myhostname kernel: RAX: 0000000000000000 RBX: ffff8b10b8f4a3c0 RCX: 0000000000000000
+ Sep 11 22:34:59 myhostname kernel: RDX: 0000000000000000 RSI: ffff8b177ec21540 RDI: ffff8b177ec21540
+ Sep 11 22:34:59 myhostname kernel: RBP: ffff8b110cf80888 R08: 0000000000000000 R09: ffffa9980e0fb938
+ Sep 11 22:34:59 myhostname kernel: R10: 0000000000000003 R11: ffffffff86146508 R12: 0000000000000014
+ Sep 11 22:34:59 myhostname kernel: R13: ffff8b1131ae5b40 R14: ffff8b10b8f4a418 R15: 00000000ffffffef
+ Sep 11 22:34:59 myhostname kernel: FS: 00007fb14a7fe6c0(0000) GS:ffff8b177ec00000(0000) knlGS:0000000000000000
+ Sep 11 22:34:59 myhostname kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ Sep 11 22:34:59 myhostname kernel: CR2: 000000c00143d000 CR3: 00000001b3b4e002 CR4: 00000000001706f0
+ Sep 11 22:34:59 myhostname kernel: Call Trace:
+ Sep 11 22:34:59 myhostname kernel: <TASK>
+ Sep 11 22:34:59 myhostname kernel: ? die+0x36/0x90
+ Sep 11 22:34:59 myhostname kernel: ? do_trap+0xda/0x100
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? do_error_trap+0x6a/0x90
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? exc_invalid_op+0x50/0x70
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? asm_exc_invalid_op+0x1a/0x20
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: btrfs_insert_dir_item+0x200/0x280
+ Sep 11 22:34:59 myhostname kernel: btrfs_add_link+0xab/0x4f0
+ Sep 11 22:34:59 myhostname kernel: ? ktime_get_real_ts64+0x47/0xe0
+ Sep 11 22:34:59 myhostname kernel: btrfs_create_new_inode+0x7cd/0xa80
+ Sep 11 22:34:59 myhostname kernel: btrfs_symlink+0x190/0x4d0
+ Sep 11 22:34:59 myhostname kernel: ? schedule+0x5e/0xd0
+ Sep 11 22:34:59 myhostname kernel: ? __d_lookup+0x7e/0xc0
+ Sep 11 22:34:59 myhostname kernel: vfs_symlink+0x148/0x1e0
+ Sep 11 22:34:59 myhostname kernel: do_symlinkat+0x130/0x140
+ Sep 11 22:34:59 myhostname kernel: __x64_sys_symlinkat+0x3d/0x50
+ Sep 11 22:34:59 myhostname kernel: do_syscall_64+0x5d/0x90
+ Sep 11 22:34:59 myhostname kernel: ? syscall_exit_to_user_mode+0x2b/0x40
+ Sep 11 22:34:59 myhostname kernel: ? do_syscall_64+0x6c/0x90
+ Sep 11 22:34:59 myhostname kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+The race leading to the problem happens like this:
+
+1) Directory inode X is loaded into memory, its ->index_cnt field is
+ initialized to (u64)-1 (at btrfs_alloc_inode());
+
+2) Task A is adding a new file to directory X, holding its vfs inode lock,
+ and calls btrfs_set_inode_index() to get an index number for the entry.
+
+ Because the inode's index_cnt field is set to (u64)-1 it calls
+ btrfs_inode_delayed_dir_index_count() which fails because no dir index
+ entries were added yet to the delayed inode and then it calls
+ btrfs_set_inode_index_count(). This functions finds the last dir index
+ key and then sets index_cnt to that index value + 1. It found that the
+ last index key has an offset of 100. However before it assigns a value
+ of 101 to index_cnt...
+
+3) Task B calls opendir(3), ending up at btrfs_opendir(), where the VFS
+ lock for inode X is not taken, so it calls btrfs_get_dir_last_index()
+ and sees index_cnt still with a value of (u64)-1. Because of that it
+ calls btrfs_inode_delayed_dir_index_count() which fails since no dir
+ index entries were added to the delayed inode yet, and then it also
+ calls btrfs_set_inode_index_count(). This also finds that the last
+ index key has an offset of 100, and before it assigns the value 101
+ to the index_cnt field of inode X...
+
+4) Task A assigns a value of 101 to index_cnt. And then the code flow
+ goes to btrfs_set_inode_index() where it increments index_cnt from
+ 101 to 102. Task A then creates a delayed dir index entry with a
+ sequence number of 101 and adds it to the delayed inode;
+
+5) Task B assigns 101 to the index_cnt field of inode X;
+
+6) At some later point when someone tries to add a new entry to the
+ directory, btrfs_set_inode_index() will return 101 again and shortly
+ after an attempt to add another delayed dir index key with index
+ number 101 will fail with -EEXIST resulting in a transaction abort.
+
+Fix this by locking the inode at btrfs_get_dir_last_index(), which is only
+only used when opening a directory or attempting to lseek on it.
+
+Reported-by: ken <ken@bllue.org>
+Link: https://lore.kernel.org/linux-btrfs/CAE6xmH+Lp=Q=E61bU+v9eWX8gYfLvu6jLYxjxjFpo3zHVPR0EQ@mail.gmail.com/
+Reported-by: syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/
+Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads")
+CC: stable@vger.kernel.org # 6.5+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5931,21 +5931,24 @@ out:
+
+ static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+ {
+- if (dir->index_cnt == (u64)-1) {
+- int ret;
++ int ret = 0;
+
++ btrfs_inode_lock(dir, 0);
++ if (dir->index_cnt == (u64)-1) {
+ ret = btrfs_inode_delayed_dir_index_count(dir);
+ if (ret) {
+ ret = btrfs_set_inode_index_count(dir);
+ if (ret)
+- return ret;
++ goto out;
+ }
+ }
+
+ /* index_cnt is the index number of next new entry, so decrement it. */
+ *index = dir->index_cnt - 1;
++out:
++ btrfs_inode_unlock(dir, 0);
+
+- return 0;
++ return ret;
+ }
+
+ /*
--- /dev/null
+From 58bfe2ccec5f9f137b41dd38f335290dcc13cd5c Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 18 Sep 2023 10:34:51 -0400
+Subject: btrfs: properly report 0 avail for very full file systems
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 58bfe2ccec5f9f137b41dd38f335290dcc13cd5c upstream.
+
+A user reported some issues with smaller file systems that get very
+full. While investigating this issue I noticed that df wasn't showing
+100% full, despite having 0 chunk space and having < 1MiB of available
+metadata space.
+
+This turns out to be an overflow issue, we're doing:
+
+ total_available_metadata_space - SZ_4M < global_block_rsv_size
+
+to determine if there's not enough space to make metadata allocations,
+which overflows if total_available_metadata_space is < 4M. Fix this by
+checking to see if our available space is greater than the 4M threshold.
+This makes df properly report 100% usage on the file system.
+
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/super.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2111,7 +2111,7 @@ static int btrfs_statfs(struct dentry *d
+ * calculated f_bavail.
+ */
+ if (!mixed && block_rsv->space_info->full &&
+- total_free_meta - thresh < block_rsv->size)
++ (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
+ buf->f_bavail = 0;
+
+ buf->f_type = BTRFS_SUPER_MAGIC;
--- /dev/null
+From e60aa5da14d01fed8411202dbe4adf6c44bd2a57 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sat, 9 Sep 2023 13:08:32 +0100
+Subject: btrfs: refresh dir last index during a rewinddir(3) call
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit e60aa5da14d01fed8411202dbe4adf6c44bd2a57 upstream.
+
+When opening a directory we find what's the index of its last entry and
+then store it in the directory's file handle private data (struct
+btrfs_file_private::last_index), so that in the case new directory entries
+are added to a directory after an opendir(3) call we don't end up in an
+infinite loop (see commit 9b378f6ad48c ("btrfs: fix infinite directory
+reads")) when calling readdir(3).
+
+However once rewinddir(3) is called, POSIX states [1] that any new
+directory entries added after the previous opendir(3) call, must be
+returned by subsequent calls to readdir(3):
+
+ "The rewinddir() function shall reset the position of the directory
+ stream to which dirp refers to the beginning of the directory.
+ It shall also cause the directory stream to refer to the current
+ state of the corresponding directory, as a call to opendir() would
+ have done."
+
+We currently don't refresh the last_index field of the struct
+btrfs_file_private associated to the directory, so after a rewinddir(3)
+we are not returning any new entries added after the opendir(3) call.
+
+Fix this by finding the current last index of the directory when llseek
+is called against the directory.
+
+This can be reproduced by the following C program provided by Ian Johnson:
+
+ #include <dirent.h>
+ #include <stdio.h>
+
+ int main(void) {
+ DIR *dir = opendir("test");
+
+ FILE *file;
+ file = fopen("test/1", "w");
+ fwrite("1", 1, 1, file);
+ fclose(file);
+
+ file = fopen("test/2", "w");
+ fwrite("2", 1, 1, file);
+ fclose(file);
+
+ rewinddir(dir);
+
+ struct dirent *entry;
+ while ((entry = readdir(dir))) {
+ printf("%s\n", entry->d_name);
+ }
+ closedir(dir);
+ return 0;
+ }
+
+Reported-by: Ian Johnson <ian@ianjohnson.dev>
+Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/
+Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads")
+CC: stable@vger.kernel.org # 6.5+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5979,6 +5979,19 @@ static int btrfs_opendir(struct inode *i
+ return 0;
+ }
+
++static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence)
++{
++ struct btrfs_file_private *private = file->private_data;
++ int ret;
++
++ ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)),
++ &private->last_index);
++ if (ret)
++ return ret;
++
++ return generic_file_llseek(file, offset, whence);
++}
++
+ struct dir_entry {
+ u64 ino;
+ u64 offset;
+@@ -11059,7 +11072,7 @@ static const struct inode_operations btr
+ };
+
+ static const struct file_operations btrfs_dir_file_operations = {
+- .llseek = generic_file_llseek,
++ .llseek = btrfs_dir_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = btrfs_real_readdir,
+ .open = btrfs_opendir,
--- /dev/null
+From 357950361cbc6d54fb68ed878265c647384684ae Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sat, 9 Sep 2023 13:08:31 +0100
+Subject: btrfs: set last dir index to the current last index when opening dir
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 357950361cbc6d54fb68ed878265c647384684ae upstream.
+
+When opening a directory for reading it, we set the last index where we
+stop iteration to the value in struct btrfs_inode::index_cnt. That value
+does not match the index of the most recently added directory entry but
+it's instead the index number that will be assigned the next directory
+entry.
+
+This means that if after the call to opendir(3) new directory entries are
+added, a readdir(3) call will return the first new directory entry. This
+is fine because POSIX says the following [1]:
+
+ "If a file is removed from or added to the directory after the most
+ recent call to opendir() or rewinddir(), whether a subsequent call to
+ readdir() returns an entry for that file is unspecified."
+
+For example for the test script from commit 9b378f6ad48c ("btrfs: fix
+infinite directory reads"), where we have 2000 files in a directory, ext4
+doesn't return any new directory entry after opendir(3), while xfs returns
+the first 13 new directory entries added after the opendir(3) call.
+
+If we move to a shorter example with an empty directory when opendir(3) is
+called, and 2 files added to the directory after the opendir(3) call, then
+readdir(3) on btrfs will return the first file, ext4 and xfs return the 2
+files (but in a different order). A test program for this, reported by
+Ian Johnson, is the following:
+
+ #include <dirent.h>
+ #include <stdio.h>
+
+ int main(void) {
+ DIR *dir = opendir("test");
+
+ FILE *file;
+ file = fopen("test/1", "w");
+ fwrite("1", 1, 1, file);
+ fclose(file);
+
+ file = fopen("test/2", "w");
+ fwrite("2", 1, 1, file);
+ fclose(file);
+
+ struct dirent *entry;
+ while ((entry = readdir(dir))) {
+ printf("%s\n", entry->d_name);
+ }
+ closedir(dir);
+ return 0;
+ }
+
+To make this less odd, change the behaviour to never return new entries
+that were added after the opendir(3) call. This is done by setting the
+last_index field of the struct btrfs_file_private attached to the
+directory's file handle with a value matching btrfs_inode::index_cnt
+minus 1, since that value always matches the index of the next new
+directory entry and not the index of the most recently added entry.
+
+[1] https://pubs.opengroup.org/onlinepubs/007904875/functions/readdir_r.html
+
+Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/
+CC: stable@vger.kernel.org # 6.5+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5942,7 +5942,8 @@ static int btrfs_get_dir_last_index(stru
+ }
+ }
+
+- *index = dir->index_cnt;
++ /* index_cnt is the index number of next new entry, so decrement it. */
++ *index = dir->index_cnt - 1;
+
+ return 0;
+ }
--- /dev/null
+From d2f706058826b803f5b9dc3f6d4c213ae0c54eb9 Mon Sep 17 00:00:00 2001
+From: Ira Weiny <ira.weiny@intel.com>
+Date: Sun, 3 Sep 2023 14:42:58 -0700
+Subject: cxl/mbox: Fix CEL logic for poison and security commands
+
+From: Ira Weiny <ira.weiny@intel.com>
+
+commit d2f706058826b803f5b9dc3f6d4c213ae0c54eb9 upstream.
+
+The following debug output was observed while testing CXL
+
+cxl_core:cxl_walk_cel:721: cxl_mock_mem cxl_mem.0: Opcode 0x4300 unsupported by driver
+
+opcode 0x4300 (Get Poison) is supported by the driver and the mock
+device supports it. The logic should be checking that the opcode is
+both not poison and not security.
+
+Fix the logic to allow poison and security commands.
+
+Fixes: ad64f5952ce3 ("cxl/memdev: Only show sanitize sysfs files when supported")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
+Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Link: https://lore.kernel.org/r/20230903-cxl-cel-fix-v1-1-e260c9467be3@intel.com
+[cleanup cxl_walk_cel() to centralized "enabled" checks]
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cxl/core/mbox.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
+index ca60bb8114f2..4df4f614f490 100644
+--- a/drivers/cxl/core/mbox.c
++++ b/drivers/cxl/core/mbox.c
+@@ -715,24 +715,25 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
+ for (i = 0; i < cel_entries; i++) {
+ u16 opcode = le16_to_cpu(cel_entry[i].opcode);
+ struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
++ int enabled = 0;
+
+- if (!cmd && (!cxl_is_poison_command(opcode) ||
+- !cxl_is_security_command(opcode))) {
+- dev_dbg(dev,
+- "Opcode 0x%04x unsupported by driver\n", opcode);
+- continue;
++ if (cmd) {
++ set_bit(cmd->info.id, mds->enabled_cmds);
++ enabled++;
+ }
+
+- if (cmd)
+- set_bit(cmd->info.id, mds->enabled_cmds);
+-
+- if (cxl_is_poison_command(opcode))
++ if (cxl_is_poison_command(opcode)) {
+ cxl_set_poison_cmd_enabled(&mds->poison, opcode);
++ enabled++;
++ }
+
+- if (cxl_is_security_command(opcode))
++ if (cxl_is_security_command(opcode)) {
+ cxl_set_security_cmd_enabled(&mds->security, opcode);
++ enabled++;
++ }
+
+- dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode);
++ dev_dbg(dev, "Opcode 0x%04x %s\n", opcode,
++ enabled ? "enabled" : "unsupported by driver");
+ }
+ }
+
+--
+2.42.0
+
--- /dev/null
+From 0339dc39a521ead3dbcf101acd8c028c61db57dc Mon Sep 17 00:00:00 2001
+From: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
+Date: Wed, 23 Aug 2023 23:43:03 +0000
+Subject: cxl/pci: Fix appropriate checking for _OSC while handling CXL RAS registers
+
+From: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
+
+commit 0339dc39a521ead3dbcf101acd8c028c61db57dc upstream.
+
+cxl_pci fails to unmask CXL protocol errors when CXL memory error reporting
+is not granted native control. Given that CXL memory error reporting uses
+the event interface and protocol errors use AER, unmask protocol errors
+based only on the native AER setting. Without this change end user
+deployments will fail to report protocol errors in the case where native
+memory error handling is not granted to Linux.
+
+Also, return zero instead of an error code to not block the communication
+with the cxl device when in native memory error reporting mode.
+
+Fixes: 248529edc86f ("cxl: add RAS status unmasking for CXL")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
+Reviewed-by: Robert Richter <rrichter@amd.com>
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/20230823234305.27333-2-Smita.KoralahalliChannabasappa@amd.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cxl/pci.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
+index 1cb1494c28fe..2323169b6e5f 100644
+--- a/drivers/cxl/pci.c
++++ b/drivers/cxl/pci.c
+@@ -541,9 +541,9 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev)
+ return 0;
+ }
+
+- /* BIOS has CXL error control */
+- if (!host_bridge->native_cxl_error)
+- return -ENXIO;
++ /* BIOS has PCIe AER error control */
++ if (!host_bridge->native_aer)
++ return 0;
+
+ rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap);
+ if (rc)
+--
+2.42.0
+
--- /dev/null
+From a76b62518eb30ef59158fa777ab2e2a23e1334f9 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 15 Sep 2023 01:07:30 -0700
+Subject: cxl/port: Fix cxl_test register enumeration regression
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit a76b62518eb30ef59158fa777ab2e2a23e1334f9 upstream.
+
+The cxl_test unit test environment models a CXL topology for
+sysfs/user-ABI regression testing. It uses interface mocking via the
+"--wrap=" linker option to redirect cxl_core routines that parse
+hardware registers with versions that just publish objects, like
+devm_cxl_enumerate_decoders().
+
+Starting with:
+
+Commit 19ab69a60e3b ("cxl/port: Store the port's Component Register mappings in struct cxl_port")
+
+...port register enumeration is moved into devm_cxl_add_port(). This
+conflicts with the "cxl_test avoids emulating registers stance" so
+either the port code needs to be refactored (too violent), or modified
+so that register enumeration is skipped on "fake" cxl_test ports
+(annoying, but straightforward).
+
+This conflict has happened previously and the "check for platform
+device" workaround to avoid instrusive refactoring was deployed in those
+scenarios. In general, refactoring should only benefit production code,
+test code needs to remain minimally instrusive to the greatest extent
+possible.
+
+This was missed previously because it may sometimes just cause warning
+messages to be emitted, but it can also cause test failures. The
+backport to -stable is only nice to have for clean cxl_test runs.
+
+Fixes: 19ab69a60e3b ("cxl/port: Store the port's Component Register mappings in struct cxl_port")
+Cc: stable@vger.kernel.org
+Reported-by: Alison Schofield <alison.schofield@intel.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Tested-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/169476525052.1013896.6235102957693675187.stgit@dwillia2-xfh.jf.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cxl/core/port.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
+index 724be8448eb4..7ca01a834e18 100644
+--- a/drivers/cxl/core/port.c
++++ b/drivers/cxl/core/port.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
++#include <linux/platform_device.h>
+ #include <linux/memregion.h>
+ #include <linux/workqueue.h>
+ #include <linux/debugfs.h>
+@@ -706,16 +707,20 @@ static int cxl_setup_comp_regs(struct device *dev, struct cxl_register_map *map,
+ return cxl_setup_regs(map);
+ }
+
+-static inline int cxl_port_setup_regs(struct cxl_port *port,
+- resource_size_t component_reg_phys)
++static int cxl_port_setup_regs(struct cxl_port *port,
++ resource_size_t component_reg_phys)
+ {
++ if (dev_is_platform(port->uport_dev))
++ return 0;
+ return cxl_setup_comp_regs(&port->dev, &port->comp_map,
+ component_reg_phys);
+ }
+
+-static inline int cxl_dport_setup_regs(struct cxl_dport *dport,
+- resource_size_t component_reg_phys)
++static int cxl_dport_setup_regs(struct cxl_dport *dport,
++ resource_size_t component_reg_phys)
+ {
++ if (dev_is_platform(dport->dport_dev))
++ return 0;
+ return cxl_setup_comp_regs(dport->dport_dev, &dport->comp_map,
+ component_reg_phys);
+ }
+--
+2.42.0
+
--- /dev/null
+From 2de19022c5d7ff519dd5b9690f7713267bd1abfe Mon Sep 17 00:00:00 2001
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Date: Wed, 13 Sep 2023 14:48:08 -0400
+Subject: drm/amd/display: fix the ability to use lower resolution modes on eDP
+
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+
+commit 2de19022c5d7ff519dd5b9690f7713267bd1abfe upstream.
+
+On eDP we can receive invalid modes from dm_update_crtc_state() for
+entirely new streams for which drm_mode_set_crtcinfo() shouldn't be
+called on. So, instead of calling drm_mode_set_crtcinfo() from within
+create_stream_for_sink() we can instead call it from
+amdgpu_dm_connector_mode_valid(). Since, we are guaranteed to only call
+drm_mode_set_crtcinfo() for valid modes from that function (invalid
+modes are rejected by that callback) and that is the only user
+of create_validate_stream_for_sink() that we need to call
+drm_mode_set_crtcinfo() for (as before commit cb841d27b876
+("drm/amd/display: Always pass connector_state to stream validation"),
+that is the only place where create_validate_stream_for_sink()'s
+dm_state was NULL).
+
+Cc: stable@vger.kernel.org
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2693
+Fixes: cb841d27b876 ("drm/amd/display: Always pass connector_state to stream validation")
+Tested-by: Mark Broadworth <mark.broadworth@amd.com>
+Reviewed-by: Harry Wentland <harry.wentland@amd.com>
+Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -6062,8 +6062,6 @@ create_stream_for_sink(struct amdgpu_dm_
+
+ if (recalculate_timing)
+ drm_mode_set_crtcinfo(&saved_mode, 0);
+- else if (!old_stream)
+- drm_mode_set_crtcinfo(&mode, 0);
+
+ /*
+ * If scaling is enabled and refresh rate didn't change
+@@ -6625,6 +6623,8 @@ enum drm_mode_status amdgpu_dm_connector
+ goto fail;
+ }
+
++ drm_mode_set_crtcinfo(mode, 0);
++
+ stream = create_validate_stream_for_sink(aconnector, mode,
+ to_dm_connector_state(connector->state),
+ NULL);
--- /dev/null
+From cc39f9ccb82426e576734b493e1777ea01b144a8 Mon Sep 17 00:00:00 2001
+From: YuBiao Wang <YuBiao.Wang@amd.com>
+Date: Fri, 15 Sep 2023 10:47:50 +0800
+Subject: drm/amdkfd: Use gpu_offset for user queue's wptr
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: YuBiao Wang <YuBiao.Wang@amd.com>
+
+commit cc39f9ccb82426e576734b493e1777ea01b144a8 upstream.
+
+Directly use tbo's start address will miss the domain start offset. Need
+to use gpu_offset instead.
+
+Signed-off-by: YuBiao Wang <YuBiao.Wang@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -216,7 +216,7 @@ static int add_queue_mes(struct device_q
+
+ if (q->wptr_bo) {
+ wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
+- queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
++ queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off;
+ }
+
+ queue_input.is_kfd_process = 1;
--- /dev/null
+From b7599d241778d0b10cdf7a5c755aa7db9b83250c Mon Sep 17 00:00:00 2001
+From: Javier Pello <devel@otheo.eu>
+Date: Sat, 2 Sep 2023 17:10:39 +0200
+Subject: drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top
+
+From: Javier Pello <devel@otheo.eu>
+
+commit b7599d241778d0b10cdf7a5c755aa7db9b83250c upstream.
+
+There is an assertion in ggtt_reserve_guc_top that the global GTT
+is of size at least GUC_GGTT_TOP, which is not the case on a 32-bit
+platform; see commit 562d55d991b39ce376c492df2f7890fd6a541ffc
+("drm/i915/bdw: Only use 2g GGTT for 32b platforms"). If GEM_BUG_ON
+is enabled, this triggers a BUG(); if GEM_BUG_ON is disabled, the
+subsequent reservation fails and the driver fails to initialise
+the device:
+
+i915 0000:00:02.0: [drm:i915_init_ggtt [i915]] Failed to reserve top of GGTT for GuC
+i915 0000:00:02.0: Device initialization failed (-28)
+i915 0000:00:02.0: Please file a bug on drm/i915; see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.
+i915: probe of 0000:00:02.0 failed with error -28
+
+Make the reservation at the top of the available space, whatever
+that is, instead of assuming that the top will be GUC_GGTT_TOP.
+
+Fixes: 911800765ef6 ("drm/i915/uc: Reserve upper range of GGTT")
+Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9080
+Signed-off-by: Javier Pello <devel@otheo.eu>
+Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Cc: Fernando Pacheco <fernando.pacheco@intel.com>
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
+Cc: intel-gfx@lists.freedesktop.org
+Cc: stable@vger.kernel.org # v5.3+
+Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230902171039.2229126186d697dbcf62d6d8@otheo.eu
+(cherry picked from commit 0f3fa942d91165c2702577e9274d2ee1c7212afc)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_ggtt.c | 23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
++++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
+@@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_a
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+ }
+
++/*
++ * Reserve the top of the GuC address space for firmware images. Addresses
++ * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
++ * which makes for a suitable range to hold GuC/HuC firmware images if the
++ * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
++ * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
++ * of the same size anyway, which is far more than needed, to keep the logic
++ * in uc_fw_ggtt_offset() simple.
++ */
++#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
++
+ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
+ {
+- u64 size;
++ u64 offset;
+ int ret;
+
+ if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
+ return 0;
+
+- GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
+- size = ggtt->vm.total - GUC_GGTT_TOP;
++ GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
++ offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
+
+- ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
+- GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
+- PIN_NOEVICT);
++ ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
++ GUC_TOP_RESERVE_SIZE, offset,
++ I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
+ if (ret)
+ drm_dbg(&ggtt->vm.i915->drm,
+ "Failed to reserve top of GGTT for GuC\n");
--- /dev/null
+From 099f0af9d98231bb74956ce92508e87cbcb896be Mon Sep 17 00:00:00 2001
+From: Jani Nikula <jani.nikula@intel.com>
+Date: Thu, 14 Sep 2023 16:10:15 +0300
+Subject: drm/meson: fix memory leak on ->hpd_notify callback
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+commit 099f0af9d98231bb74956ce92508e87cbcb896be upstream.
+
+The EDID returned by drm_bridge_get_edid() needs to be freed.
+
+Fixes: 0af5e0b41110 ("drm/meson: encoder_hdmi: switch to bridge DRM_BRIDGE_ATTACH_NO_CONNECTOR")
+Cc: Neil Armstrong <narmstrong@baylibre.com>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Cc: Neil Armstrong <neil.armstrong@linaro.org>
+Cc: Kevin Hilman <khilman@baylibre.com>
+Cc: Jerome Brunet <jbrunet@baylibre.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: linux-amlogic@lists.infradead.org
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: stable@vger.kernel.org # v5.17+
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230914131015.2472029-1-jani.nikula@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/meson/meson_encoder_hdmi.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+@@ -332,6 +332,8 @@ static void meson_encoder_hdmi_hpd_notif
+ return;
+
+ cec_notifier_set_phys_addr_from_edid(encoder_hdmi->cec_notifier, edid);
++
++ kfree(edid);
+ } else
+ cec_notifier_phys_addr_invalidate(encoder_hdmi->cec_notifier);
+ }
--- /dev/null
+From 2ba157983974ae1b6aaef7d4953812020d6f1eb5 Mon Sep 17 00:00:00 2001
+From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+Date: Mon, 11 Sep 2023 15:03:24 +0200
+Subject: drm/tests: Fix incorrect argument in drm_test_mm_insert_range
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+
+commit 2ba157983974ae1b6aaef7d4953812020d6f1eb5 upstream.
+
+While drm_mm test was converted form igt selftest to kunit, unexpected
+value of "end" argument equal "start" was introduced to one of calls to a
+function that executes the drm_test_mm_insert_range for specific start/end
+pair of arguments. As a consequence, DRM_MM_BUG_ON(end <= start) is
+triggered. Fix it by restoring the original value.
+
+Fixes: fc8d29e298cf ("drm: selftest: convert drm_mm selftest to KUnit")
+Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+Cc: "Maíra Canal" <mairacanal@riseup.net>
+Cc: Arthur Grillo <arthurgrillo@riseup.net>
+Cc: Javier Martinez Canillas <javierm@redhat.com>
+Cc: Daniel Latypov <dlatypov@google.com>
+Cc: stable@vger.kernel.org # v6.1+
+Reviewed-by: Maíra Canal <mairacanal@riseup.net>
+Signed-off-by: Maíra Canal <mairacanal@riseup.net>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230911130323.7037-2-janusz.krzysztofik@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/tests/drm_mm_test.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/tests/drm_mm_test.c
++++ b/drivers/gpu/drm/tests/drm_mm_test.c
+@@ -939,7 +939,7 @@ static void drm_test_mm_insert_range(str
+ KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max - 1));
+ KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max / 2));
+ KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size,
+- max / 2, max / 2));
++ max / 2, max));
+ KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size,
+ max / 4 + 1, 3 * max / 4 - 1));
+
--- /dev/null
+From 7c3151585730b7095287be8162b846d31e6eee61 Mon Sep 17 00:00:00 2001
+From: Greg Ungerer <gerg@kernel.org>
+Date: Thu, 7 Sep 2023 11:18:08 +1000
+Subject: fs: binfmt_elf_efpic: fix personality for ELF-FDPIC
+
+From: Greg Ungerer <gerg@kernel.org>
+
+commit 7c3151585730b7095287be8162b846d31e6eee61 upstream.
+
+The elf-fdpic loader hard sets the process personality to either
+PER_LINUX_FDPIC for true elf-fdpic binaries or to PER_LINUX for normal ELF
+binaries (in this case they would be constant displacement compiled with
+-pie for example). The problem with that is that it will lose any other
+bits that may be in the ELF header personality (such as the "bug
+emulation" bits).
+
+On the ARM architecture the ADDR_LIMIT_32BIT flag is used to signify a
+normal 32bit binary - as opposed to a legacy 26bit address binary. This
+matters since start_thread() will set the ARM CPSR register as required
+based on this flag. If the elf-fdpic loader loses this bit the process
+will be mis-configured and crash out pretty quickly.
+
+Modify elf-fdpic loader personality setting so that it preserves the upper
+three bytes by using the SET_PERSONALITY macro to set it. This macro in
+the generic case sets PER_LINUX and preserves the upper bytes.
+Architectures can override this for their specific use case, and ARM does
+exactly this.
+
+The problem shows up quite easily running under qemu using the ARM
+architecture, but not necessarily on all types of real ARM hardware. If
+the underlying ARM processor does not support the legacy 26-bit addressing
+mode then everything will work as expected.
+
+Link: https://lkml.kernel.org/r/20230907011808.2985083-1-gerg@kernel.org
+Fixes: 1bde925d23547 ("fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries")
+Signed-off-by: Greg Ungerer <gerg@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Greg Ungerer <gerg@kernel.org>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/binfmt_elf_fdpic.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/binfmt_elf_fdpic.c
++++ b/fs/binfmt_elf_fdpic.c
+@@ -345,10 +345,9 @@ static int load_elf_fdpic_binary(struct
+ /* there's now no turning back... the old userspace image is dead,
+ * defunct, deceased, etc.
+ */
++ SET_PERSONALITY(exec_params.hdr);
+ if (elf_check_fdpic(&exec_params.hdr))
+- set_personality(PER_LINUX_FDPIC);
+- else
+- set_personality(PER_LINUX);
++ current->personality |= PER_LINUX_FDPIC;
+ if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
+ current->personality |= READ_IMPLIES_EXEC;
+
--- /dev/null
+From e6e43b8aa7cd3c3af686caf0c2e11819a886d705 Mon Sep 17 00:00:00 2001
+From: Quang Le <quanglex97@gmail.com>
+Date: Fri, 29 Sep 2023 00:44:13 +0700
+Subject: fs/smb/client: Reset password pointer to NULL
+
+From: Quang Le <quanglex97@gmail.com>
+
+commit e6e43b8aa7cd3c3af686caf0c2e11819a886d705 upstream.
+
+Forget to reset ctx->password to NULL will lead to bug like double free
+
+Cc: stable@vger.kernel.org
+Cc: Willy Tarreau <w@1wt.eu>
+Reviewed-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Quang Le <quanglex97@gmail.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/fs_context.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/smb/client/fs_context.c
++++ b/fs/smb/client/fs_context.c
+@@ -1532,6 +1532,7 @@ static int smb3_fs_context_parse_param(s
+
+ cifs_parse_mount_err:
+ kfree_sensitive(ctx->password);
++ ctx->password = NULL;
+ return -EINVAL;
+ }
+
--- /dev/null
+From 863a8eb3f27098b42772f668e3977ff4cae10b04 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Tue, 19 Sep 2023 20:48:55 +0100
+Subject: i915: Limit the length of an sg list to the requested length
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 863a8eb3f27098b42772f668e3977ff4cae10b04 upstream.
+
+The folio conversion changed the behaviour of shmem_sg_alloc_table() to
+put the entire length of the last folio into the sg list, even if the sg
+list should have been shorter. gen8_ggtt_insert_entries() relied on the
+list being the right length and would overrun the end of the page tables.
+Other functions may also have been affected.
+
+Clamp the length of the last entry in the sg list to be the expected
+length.
+
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
+Cc: stable@vger.kernel.org # 6.5.x
+Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9256
+Link: https://lore.kernel.org/lkml/6287208.lOV4Wx5bFT@natalenko.name/
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
+Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230919194855.347582-1-willy@infradead.org
+(cherry picked from commit 26a8e32e6d77900819c0c730fbfb393692dbbeea)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+index 8f1633c3fb93..73a4a4eb29e0 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+@@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+ st->nents = 0;
+ for (i = 0; i < page_count; i++) {
+ struct folio *folio;
++ unsigned long nr_pages;
+ const unsigned int shrink[] = {
+ I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
+ 0,
+@@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+ }
+ } while (1);
+
++ nr_pages = min_t(unsigned long,
++ folio_nr_pages(folio), page_count - i);
+ if (!i ||
+ sg->length >= max_segment ||
+ folio_pfn(folio) != next_pfn) {
+@@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+ sg = sg_next(sg);
+
+ st->nents++;
+- sg_set_folio(sg, folio, folio_size(folio), 0);
++ sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0);
+ } else {
+ /* XXX: could overflow? */
+- sg->length += folio_size(folio);
++ sg->length += nr_pages * PAGE_SIZE;
+ }
+- next_pfn = folio_pfn(folio) + folio_nr_pages(folio);
+- i += folio_nr_pages(folio) - 1;
++ next_pfn = folio_pfn(folio) + nr_pages;
++ i += nr_pages - 1;
+
+ /* Check that the i965g/gm workaround works. */
+ GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
+--
+2.42.0
+
--- /dev/null
+From 41ebaa5e0eebea4c3bac96b72f9f8ae0d77c0bdb Mon Sep 17 00:00:00 2001
+From: Ricardo Ribalda <ribalda@chromium.org>
+Date: Thu, 20 Jul 2023 17:46:54 +0000
+Subject: media: uvcvideo: Fix OOB read
+
+From: Ricardo Ribalda <ribalda@chromium.org>
+
+commit 41ebaa5e0eebea4c3bac96b72f9f8ae0d77c0bdb upstream.
+
+If the index provided by the user is bigger than the mask size, we might do
+an out of bound read.
+
+CC: stable@kernel.org
+Fixes: 40140eda661e ("media: uvcvideo: Implement mask for V4L2_CTRL_TYPE_MENU")
+Reported-by: Zubin Mithra <zsm@chromium.org>
+Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
+Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
+Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/media/usb/uvc/uvc_ctrl.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/media/usb/uvc/uvc_ctrl.c
++++ b/drivers/media/usb/uvc/uvc_ctrl.c
+@@ -1402,6 +1402,9 @@ int uvc_query_v4l2_menu(struct uvc_video
+ query_menu->id = id;
+ query_menu->index = index;
+
++ if (index >= BITS_PER_TYPE(mapping->menu_mask))
++ return -EINVAL;
++
+ ret = mutex_lock_interruptible(&chain->ctrl_mutex);
+ if (ret < 0)
+ return -ERESTARTSYS;
--- /dev/null
+From 45120b15743fa7c0aa53d5db6dfb4c8f87be4abd Mon Sep 17 00:00:00 2001
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+Date: Mon, 25 Sep 2023 15:20:59 +0800
+Subject: mm/damon/vaddr-test: fix memory leak in damon_do_test_apply_three_regions()
+
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+
+commit 45120b15743fa7c0aa53d5db6dfb4c8f87be4abd upstream.
+
+When CONFIG_DAMON_VADDR_KUNIT_TEST=y and making CONFIG_DEBUG_KMEMLEAK=y
+and CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y, the below memory leak is detected.
+
+Since commit 9f86d624292c ("mm/damon/vaddr-test: remove unnecessary
+variables"), the damon_destroy_ctx() is removed, but still call
+damon_new_target() and damon_new_region(), the damon_region which is
+allocated by kmem_cache_alloc() in damon_new_region() and the damon_target
+which is allocated by kmalloc in damon_new_target() are not freed. And
+the damon_region which is allocated in damon_new_region() in
+damon_set_regions() is also not freed.
+
+So use damon_destroy_target to free all the damon_regions and damon_target.
+
+ unreferenced object 0xffff888107c9a940 (size 64):
+ comm "kunit_try_catch", pid 1069, jiffies 4294670592 (age 732.761s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk
+ 60 c7 9c 07 81 88 ff ff f8 cb 9c 07 81 88 ff ff `...............
+ backtrace:
+ [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0
+ [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0
+ [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0
+ [<ffffffff819c82be>] damon_test_apply_three_regions1+0x21e/0x260
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff8881079cc740 (size 56):
+ comm "kunit_try_catch", pid 1069, jiffies 4294670592 (age 732.761s)
+ hex dump (first 32 bytes):
+ 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................
+ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk
+ backtrace:
+ [<ffffffff819bc492>] damon_new_region+0x22/0x1c0
+ [<ffffffff819c7d91>] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0
+ [<ffffffff819c82be>] damon_test_apply_three_regions1+0x21e/0x260
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff888107c9ac40 (size 64):
+ comm "kunit_try_catch", pid 1071, jiffies 4294670595 (age 732.843s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk
+ a0 cc 9c 07 81 88 ff ff 78 a1 76 07 81 88 ff ff ........x.v.....
+ backtrace:
+ [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0
+ [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0
+ [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0
+ [<ffffffff819c851e>] damon_test_apply_three_regions2+0x21e/0x260
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff8881079ccc80 (size 56):
+ comm "kunit_try_catch", pid 1071, jiffies 4294670595 (age 732.843s)
+ hex dump (first 32 bytes):
+ 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................
+ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk
+ backtrace:
+ [<ffffffff819bc492>] damon_new_region+0x22/0x1c0
+ [<ffffffff819c7d91>] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0
+ [<ffffffff819c851e>] damon_test_apply_three_regions2+0x21e/0x260
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff888107c9af40 (size 64):
+ comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.011s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk
+ 20 a2 76 07 81 88 ff ff b8 a6 76 07 81 88 ff ff .v.......v.....
+ backtrace:
+ [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0
+ [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0
+ [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0
+ [<ffffffff819c877e>] damon_test_apply_three_regions3+0x21e/0x260
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff88810776a200 (size 56):
+ comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.011s)
+ hex dump (first 32 bytes):
+ 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................
+ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk
+ backtrace:
+ [<ffffffff819bc492>] damon_new_region+0x22/0x1c0
+ [<ffffffff819c7d91>] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0
+ [<ffffffff819c877e>] damon_test_apply_three_regions3+0x21e/0x260
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff88810776a740 (size 56):
+ comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.025s)
+ hex dump (first 32 bytes):
+ 3d 00 00 00 00 00 00 00 3f 00 00 00 00 00 00 00 =.......?.......
+ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk
+ backtrace:
+ [<ffffffff819bc492>] damon_new_region+0x22/0x1c0
+ [<ffffffff819bfcc2>] damon_set_regions+0x4c2/0x8e0
+ [<ffffffff819c7dbb>] damon_do_test_apply_three_regions.constprop.0+0xfb/0x3e0
+ [<ffffffff819c877e>] damon_test_apply_three_regions3+0x21e/0x260
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff888108038240 (size 64):
+ comm "kunit_try_catch", pid 1075, jiffies 4294670600 (age 733.022s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 03 00 00 00 6b 6b 6b 6b ............kkkk
+ 48 ad 76 07 81 88 ff ff 98 ae 76 07 81 88 ff ff H.v.......v.....
+ backtrace:
+ [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0
+ [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0
+ [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0
+ [<ffffffff819c898d>] damon_test_apply_three_regions4+0x1cd/0x210
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+ unreferenced object 0xffff88810776ad28 (size 56):
+ comm "kunit_try_catch", pid 1075, jiffies 4294670600 (age 733.022s)
+ hex dump (first 32 bytes):
+ 05 00 00 00 00 00 00 00 07 00 00 00 00 00 00 00 ................
+ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk
+ backtrace:
+ [<ffffffff819bc492>] damon_new_region+0x22/0x1c0
+ [<ffffffff819bfcc2>] damon_set_regions+0x4c2/0x8e0
+ [<ffffffff819c7dbb>] damon_do_test_apply_three_regions.constprop.0+0xfb/0x3e0
+ [<ffffffff819c898d>] damon_test_apply_three_regions4+0x1cd/0x210
+ [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
+ [<ffffffff81237cf6>] kthread+0x2b6/0x380
+ [<ffffffff81097add>] ret_from_fork+0x2d/0x70
+ [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20
+
+Link: https://lkml.kernel.org/r/20230925072100.3725620-1-ruanjinjie@huawei.com
+Fixes: 9f86d624292c ("mm/damon/vaddr-test: remove unnecessary variables")
+Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/vaddr-test.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/damon/vaddr-test.h
++++ b/mm/damon/vaddr-test.h
+@@ -148,6 +148,8 @@ static void damon_do_test_apply_three_re
+ KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
+ }
++
++ damon_destroy_target(t);
+ }
+
+ /*
--- /dev/null
+From 9ea9cb00a82b53ec39630eac718776d37e41b35a Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 14 Sep 2023 11:21:39 -0400
+Subject: mm: memcontrol: fix GFP_NOFS recursion in memory.high enforcement
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 9ea9cb00a82b53ec39630eac718776d37e41b35a upstream.
+
+Breno and Josef report a deadlock scenario from cgroup reclaim
+re-entering the filesystem:
+
+[ 361.546690] ======================================================
+[ 361.559210] WARNING: possible circular locking dependency detected
+[ 361.571703] 6.5.0-0_fbk700_debug_rc0_kbuilder_13159_gbf787a128001 #1 Tainted: G S E
+[ 361.589704] ------------------------------------------------------
+[ 361.602277] find/9315 is trying to acquire lock:
+[ 361.611625] ffff88837ba140c0 (&delayed_node->mutex){+.+.}-{4:4}, at: __btrfs_release_delayed_node+0x68/0x4f0
+[ 361.631437]
+[ 361.631437] but task is already holding lock:
+[ 361.643243] ffff8881765b8678 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock+0x1e/0x40
+
+[ 362.904457] mutex_lock_nested+0x1c/0x30
+[ 362.912414] __btrfs_release_delayed_node+0x68/0x4f0
+[ 362.922460] btrfs_evict_inode+0x301/0x770
+[ 362.982726] evict+0x17c/0x380
+[ 362.988944] prune_icache_sb+0x100/0x1d0
+[ 363.005559] super_cache_scan+0x1f8/0x260
+[ 363.013695] do_shrink_slab+0x2a2/0x540
+[ 363.021489] shrink_slab_memcg+0x237/0x3d0
+[ 363.050606] shrink_slab+0xa7/0x240
+[ 363.083382] shrink_node_memcgs+0x262/0x3b0
+[ 363.091870] shrink_node+0x1a4/0x720
+[ 363.099150] shrink_zones+0x1f6/0x5d0
+[ 363.148798] do_try_to_free_pages+0x19b/0x5e0
+[ 363.157633] try_to_free_mem_cgroup_pages+0x266/0x370
+[ 363.190575] reclaim_high+0x16f/0x1f0
+[ 363.208409] mem_cgroup_handle_over_high+0x10b/0x270
+[ 363.246678] try_charge_memcg+0xaf2/0xc70
+[ 363.304151] charge_memcg+0xf0/0x350
+[ 363.320070] __mem_cgroup_charge+0x28/0x40
+[ 363.328371] __filemap_add_folio+0x870/0xd50
+[ 363.371303] filemap_add_folio+0xdd/0x310
+[ 363.399696] __filemap_get_folio+0x2fc/0x7d0
+[ 363.419086] pagecache_get_page+0xe/0x30
+[ 363.427048] alloc_extent_buffer+0x1cd/0x6a0
+[ 363.435704] read_tree_block+0x43/0xc0
+[ 363.443316] read_block_for_search+0x361/0x510
+[ 363.466690] btrfs_search_slot+0xc8c/0x1520
+
+This is caused by the mem_cgroup_handle_over_high() not respecting the
+gfp_mask of the allocation context. We used to only call this function on
+resume to userspace, where no locks were held. But c9afe31ec443 ("memcg:
+synchronously enforce memory.high for large overcharges") added a call
+from the allocation context without considering the gfp.
+
+Link: https://lkml.kernel.org/r/20230914152139.100822-1-hannes@cmpxchg.org
+Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Breno Leitao <leitao@debian.org>
+Reported-by: Josef Bacik <josef@toxicpanda.com>
+Acked-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: <stable@vger.kernel.org> [5.17+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h | 4 ++--
+ include/linux/resume_user_mode.h | 2 +-
+ mm/memcontrol.c | 6 +++---
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -919,7 +919,7 @@ unsigned long mem_cgroup_get_zone_lru_si
+ return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
+ }
+
+-void mem_cgroup_handle_over_high(void);
++void mem_cgroup_handle_over_high(gfp_t gfp_mask);
+
+ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
+
+@@ -1460,7 +1460,7 @@ static inline void mem_cgroup_unlock_pag
+ rcu_read_unlock();
+ }
+
+-static inline void mem_cgroup_handle_over_high(void)
++static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
+ {
+ }
+
+--- a/include/linux/resume_user_mode.h
++++ b/include/linux/resume_user_mode.h
+@@ -55,7 +55,7 @@ static inline void resume_user_mode_work
+ }
+ #endif
+
+- mem_cgroup_handle_over_high();
++ mem_cgroup_handle_over_high(GFP_KERNEL);
+ blkcg_maybe_throttle_current();
+
+ rseq_handle_notify_resume(NULL, regs);
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2559,7 +2559,7 @@ static unsigned long calculate_high_dela
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+-void mem_cgroup_handle_over_high(void)
++void mem_cgroup_handle_over_high(gfp_t gfp_mask)
+ {
+ unsigned long penalty_jiffies;
+ unsigned long pflags;
+@@ -2587,7 +2587,7 @@ retry_reclaim:
+ */
+ nr_reclaimed = reclaim_high(memcg,
+ in_retry ? SWAP_CLUSTER_MAX : nr_pages,
+- GFP_KERNEL);
++ gfp_mask);
+
+ /*
+ * memory.high is breached and reclaim is unable to keep up. Throttle
+@@ -2823,7 +2823,7 @@ done_restock:
+ if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
+ !(current->flags & PF_MEMALLOC) &&
+ gfpflags_allow_blocking(gfp_mask)) {
+- mem_cgroup_handle_over_high();
++ mem_cgroup_handle_over_high(gfp_mask);
+ }
+ return 0;
+ }
--- /dev/null
+From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001
+From: Yang Shi <yang@os.amperecomputing.com>
+Date: Wed, 20 Sep 2023 15:32:42 -0700
+Subject: mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified
+
+From: Yang Shi <yang@os.amperecomputing.com>
+
+commit 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 upstream.
+
+When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel
+should attempt to migrate all existing pages, and return -EIO if there is
+misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply
+page table walker on queue_pages_range()") messed up the return value and
+didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The
+return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy:
+make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke
+the VMA walk early if unmovable page is met, it may cause some pages are
+not migrated as expected.
+
+The code should conceptually do:
+
+ if (MPOL_MF_MOVE|MOVEALL)
+ scan all vmas
+ try to migrate the existing pages
+ return success
+ else if (MPOL_MF_MOVE* | MPOL_MF_STRICT)
+ scan all vmas
+ try to migrate the existing pages
+ return -EIO if unmovable or migration failed
+ else /* MPOL_MF_STRICT alone */
+ break early if meets unmovable and don't call mbind_range() at all
+ else /* none of those flags */
+ check the ranges in test_walk, EFAULT without mbind_range() if discontig.
+
+Fixed the behavior.
+
+Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.com
+Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified")
+Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: David Rientjes <rientjes@google.com>
+Cc: <stable@vger.kernel.org> [4.9+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c | 39 +++++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 20 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -426,6 +426,7 @@ struct queue_pages {
+ unsigned long start;
+ unsigned long end;
+ struct vm_area_struct *first;
++ bool has_unmovable;
+ };
+
+ /*
+@@ -446,9 +447,8 @@ static inline bool queue_folio_required(
+ /*
+ * queue_folios_pmd() has three possible return values:
+ * 0 - folios are placed on the right node or queued successfully, or
+- * special page is met, i.e. huge zero page.
+- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+- * specified.
++ * special page is met, i.e. zero page, or unmovable page is found
++ * but continue walking (indicated by queue_pages.has_unmovable).
+ * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+ * existing folio was already on a node that does not follow the
+ * policy.
+@@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd,
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(walk->vma) ||
+ migrate_folio_add(folio, qp->pagelist, flags)) {
+- ret = 1;
++ qp->has_unmovable = true;
+ goto unlock;
+ }
+ } else
+@@ -495,9 +495,8 @@ unlock:
+ *
+ * queue_folios_pte_range() has three possible return values:
+ * 0 - folios are placed on the right node or queued successfully, or
+- * special page is met, i.e. zero page.
+- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+- * specified.
++ * special page is met, i.e. zero page, or unmovable page is found
++ * but continue walking (indicated by queue_pages.has_unmovable).
+ * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
+ * on a node that does not follow the policy.
+ */
+@@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t
+ struct folio *folio;
+ struct queue_pages *qp = walk->private;
+ unsigned long flags = qp->flags;
+- bool has_unmovable = false;
+ pte_t *pte, *mapped_pte;
+ pte_t ptent;
+ spinlock_t *ptl;
+@@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t
+ if (!queue_folio_required(folio, qp))
+ continue;
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+- /* MPOL_MF_STRICT must be specified if we get here */
+- if (!vma_migratable(vma)) {
+- has_unmovable = true;
+- break;
+- }
++ /*
++ * MPOL_MF_STRICT must be specified if we get here.
++ * Continue walking vmas due to MPOL_MF_MOVE* flags.
++ */
++ if (!vma_migratable(vma))
++ qp->has_unmovable = true;
+
+ /*
+ * Do not abort immediately since there may be
+@@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t
+ * need migrate other LRU pages.
+ */
+ if (migrate_folio_add(folio, qp->pagelist, flags))
+- has_unmovable = true;
++ qp->has_unmovable = true;
+ } else
+ break;
+ }
+ pte_unmap_unlock(mapped_pte, ptl);
+ cond_resched();
+
+- if (has_unmovable)
+- return 1;
+-
+ return addr != end ? -EIO : 0;
+ }
+
+@@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *p
+ * Detecting misplaced folio but allow migrating folios which
+ * have been queued.
+ */
+- ret = 1;
++ qp->has_unmovable = true;
+ goto unlock;
+ }
+
+@@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *p
+ * Failed to isolate folio but allow migrating pages
+ * which have been queued.
+ */
+- ret = 1;
++ qp->has_unmovable = true;
+ }
+ unlock:
+ spin_unlock(ptl);
+@@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm,
+ .start = start,
+ .end = end,
+ .first = NULL,
++ .has_unmovable = false,
+ };
+ const struct mm_walk_ops *ops = lock_vma ?
+ &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
+
+ err = walk_page_range(mm, start, end, ops, &qp);
+
++ if (qp.has_unmovable)
++ err = 1;
+ if (!qp.first)
+ /* whole range in hole */
+ err = -EFAULT;
+@@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start
+ putback_movable_pages(&pagelist);
+ }
+
+- if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
++ if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT))
+ err = -EIO;
+ } else {
+ up_out:
--- /dev/null
+From 7b086755fb8cdbb6b3e45a1bbddc00e7f9b1dc03 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Mon, 11 Sep 2023 14:11:08 -0400
+Subject: mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy list
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 7b086755fb8cdbb6b3e45a1bbddc00e7f9b1dc03 upstream.
+
+Commit 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock")
+bypasses the pcplist on lock contention and returns the page directly to
+the buddy list of the page's migratetype.
+
+For pages that don't have their own pcplist, such as CMA and HIGHATOMIC,
+the migratetype is temporarily updated such that the page can hitch a ride
+on the MOVABLE pcplist. Their true type is later reassessed when flushing
+in free_pcppages_bulk(). However, when lock contention is detected after
+the type was already overridden, the bypass will then put the page on the
+wrong buddy list.
+
+Once on the MOVABLE buddy list, the page becomes eligible for fallbacks
+and even stealing. In the case of HIGHATOMIC, otherwise ineligible
+allocations can dip into the highatomic reserves. In the case of CMA, the
+page can be lost from the CMA region permanently.
+
+Use a separate pcpmigratetype variable for the pcplist override. Use the
+original migratetype when going directly to the buddy. This fixes the bug
+and should make the intentions more obvious in the code.
+
+Originally sent here to address the HIGHATOMIC case:
+https://lore.kernel.org/lkml/20230821183733.106619-4-hannes@cmpxchg.org/
+
+Changelog updated in response to the CMA-specific bug report.
+
+[mgorman@techsingularity.net: updated changelog]
+Link: https://lkml.kernel.org/r/20230911181108.GA104295@cmpxchg.org
+Fixes: 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Joe Liu <joe.liu@mediatek.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2438,7 +2438,7 @@ void free_unref_page(struct page *page,
+ struct per_cpu_pages *pcp;
+ struct zone *zone;
+ unsigned long pfn = page_to_pfn(page);
+- int migratetype;
++ int migratetype, pcpmigratetype;
+
+ if (!free_unref_page_prepare(page, pfn, order))
+ return;
+@@ -2446,24 +2446,24 @@ void free_unref_page(struct page *page,
+ /*
+ * We only track unmovable, reclaimable and movable on pcp lists.
+ * Place ISOLATE pages on the isolated list because they are being
+- * offlined but treat HIGHATOMIC as movable pages so we can get those
+- * areas back if necessary. Otherwise, we may have to free
++ * offlined but treat HIGHATOMIC and CMA as movable pages so we can
++ * get those areas back if necessary. Otherwise, we may have to free
+ * excessively into the page allocator
+ */
+- migratetype = get_pcppage_migratetype(page);
++ migratetype = pcpmigratetype = get_pcppage_migratetype(page);
+ if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
+ if (unlikely(is_migrate_isolate(migratetype))) {
+ free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+ return;
+ }
+- migratetype = MIGRATE_MOVABLE;
++ pcpmigratetype = MIGRATE_MOVABLE;
+ }
+
+ zone = page_zone(page);
+ pcp_trylock_prepare(UP_flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+ if (pcp) {
+- free_unref_page_commit(zone, pcp, page, migratetype, order);
++ free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+ pcp_spin_unlock(pcp);
+ } else {
+ free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
--- /dev/null
+From 46a9ea6681907a3be6b6b0d43776dccc62cad6cf Mon Sep 17 00:00:00 2001
+From: Rafael Aquini <aquini@redhat.com>
+Date: Fri, 8 Sep 2023 19:06:49 -0400
+Subject: mm/slab_common: fix slab_caches list corruption after kmem_cache_destroy()
+
+From: Rafael Aquini <aquini@redhat.com>
+
+commit 46a9ea6681907a3be6b6b0d43776dccc62cad6cf upstream.
+
+After the commit in Fixes:, if a module that created a slab cache does not
+release all of its allocated objects before destroying the cache (at rmmod
+time), we might end up releasing the kmem_cache object without removing it
+from the slab_caches list thus corrupting the list as kmem_cache_destroy()
+ignores the return value from shutdown_cache(), which in turn never removes
+the kmem_cache object from slabs_list in case __kmem_cache_shutdown() fails
+to release all of the cache's slabs.
+
+This is easily observable on a kernel built with CONFIG_DEBUG_LIST=y
+as after that ill release the system will immediately trip on list_add,
+or list_del, assertions similar to the one shown below as soon as another
+kmem_cache gets created, or destroyed:
+
+ [ 1041.213632] list_del corruption. next->prev should be ffff89f596fb5768, but was 52f1e5016aeee75d. (next=ffff89f595a1b268)
+ [ 1041.219165] ------------[ cut here ]------------
+ [ 1041.221517] kernel BUG at lib/list_debug.c:62!
+ [ 1041.223452] invalid opcode: 0000 [#1] PREEMPT SMP PTI
+ [ 1041.225408] CPU: 2 PID: 1852 Comm: rmmod Kdump: loaded Tainted: G B W OE 6.5.0 #15
+ [ 1041.228244] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
+ [ 1041.231212] RIP: 0010:__list_del_entry_valid+0xae/0xb0
+
+Another quick way to trigger this issue, in a kernel with CONFIG_SLUB=y,
+is to set slub_debug to poison the released objects and then just run
+cat /proc/slabinfo after removing the module that leaks slab objects,
+in which case the kernel will panic:
+
+ [ 50.954843] general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 [#1] PREEMPT SMP PTI
+ [ 50.961545] CPU: 2 PID: 1495 Comm: cat Kdump: loaded Tainted: G B W OE 6.5.0 #15
+ [ 50.966808] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
+ [ 50.972663] RIP: 0010:get_slabinfo+0x42/0xf0
+
+This patch fixes this issue by properly checking shutdown_cache()'s
+return value before taking the kmem_cache_release() branch.
+
+Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock")
+Signed-off-by: Rafael Aquini <aquini@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slab_common.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem
+
+ void kmem_cache_destroy(struct kmem_cache *s)
+ {
+- int refcnt;
++ int err = -EBUSY;
+ bool rcu_set;
+
+ if (unlikely(!s) || !kasan_check_byte(s))
+@@ -490,17 +490,17 @@ void kmem_cache_destroy(struct kmem_cach
+
+ rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
+
+- refcnt = --s->refcount;
+- if (refcnt)
++ s->refcount--;
++ if (s->refcount)
+ goto out_unlock;
+
+- WARN(shutdown_cache(s),
+- "%s %s: Slab cache still has objects when called from %pS",
++ err = shutdown_cache(s);
++ WARN(err, "%s %s: Slab cache still has objects when called from %pS",
+ __func__, s->name, (void *)_RET_IP_);
+ out_unlock:
+ mutex_unlock(&slab_mutex);
+ cpus_read_unlock();
+- if (!refcnt && !rcu_set)
++ if (!err && !rcu_set)
+ kmem_cache_release(s);
+ }
+ EXPORT_SYMBOL(kmem_cache_destroy);
--- /dev/null
+From e0b65f9b81fef180cf5f103adecbe5505c961153 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Wed, 13 Sep 2023 08:26:47 +0300
+Subject: net: thunderbolt: Fix TCPv6 GSO checksum calculation
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit e0b65f9b81fef180cf5f103adecbe5505c961153 upstream.
+
+Alex reported that running ssh over IPv6 does not work with
+Thunderbolt/USB4 networking driver. The reason for that is that driver
+should call skb_is_gso() before calling skb_is_gso_v6(), and it should
+not return false after calculates the checksum successfully. This probably
+was a copy paste error from the original driver where it was done properly.
+
+Reported-by: Alex Balcanquall <alex@alexbal.com>
+Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/thunderbolt/main.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/thunderbolt/main.c
++++ b/drivers/net/thunderbolt/main.c
+@@ -1049,12 +1049,11 @@ static bool tbnet_xmit_csum_and_map(stru
+ *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, 0,
+ ip_hdr(skb)->protocol, 0);
+- } else if (skb_is_gso_v6(skb)) {
++ } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) {
+ tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0,
+ IPPROTO_TCP, 0);
+- return false;
+ } else if (protocol == htons(ETH_P_IPV6)) {
+ tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
--- /dev/null
+From 956fd46f97d238032cb5fa4771cdaccc6e760f9a Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Sun, 24 Sep 2023 13:14:15 -0400
+Subject: NFSv4: Fix a state manager thread deadlock regression
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+commit 956fd46f97d238032cb5fa4771cdaccc6e760f9a upstream.
+
+Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit
+aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it
+prevents the setup of new threads to handle reboot recovery, while the
+older recovery thread is stuck returning delegations.
+
+Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled")
+Cc: stable@vger.kernel.org
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/nfs4proc.c | 4 +++-
+ fs/nfs/nfs4state.c | 38 ++++++++++++++++++++++++++------------
+ 2 files changed, 29 insertions(+), 13 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -10622,7 +10622,9 @@ static void nfs4_disable_swap(struct ino
+ */
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+
+- nfs4_schedule_state_manager(clp);
++ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
++ wake_up_var(&clp->cl_state);
+ }
+
+ static const struct inode_operations nfs4_dir_inode_operations = {
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct
+ {
+ struct task_struct *task;
+ char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
++ struct rpc_clnt *clnt = clp->cl_rpcclient;
++ bool swapon = false;
+
+- if (clp->cl_rpcclient->cl_shutdown)
++ if (clnt->cl_shutdown)
+ return;
+
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+- if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
+- wake_up_var(&clp->cl_state);
+- return;
++
++ if (atomic_read(&clnt->cl_swapper)) {
++ swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
++ &clp->cl_state);
++ if (!swapon) {
++ wake_up_var(&clp->cl_state);
++ return;
++ }
+ }
+- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
++
++ if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
++ return;
++
+ __module_get(THIS_MODULE);
+ refcount_inc(&clp->cl_count);
+
+@@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct
+ __func__, PTR_ERR(task));
+ if (!nfs_client_init_is_complete(clp))
+ nfs_mark_client_ready(clp, PTR_ERR(task));
++ if (swapon)
++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ nfs4_clear_state_manager_bit(clp);
+- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ nfs_put_client(clp);
+ module_put(THIS_MODULE);
+ }
+@@ -2741,22 +2752,25 @@ static int nfs4_run_state_manager(void *
+
+ allow_signal(SIGKILL);
+ again:
+- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+ nfs4_state_manager(clp);
+- if (atomic_read(&cl->cl_swapper)) {
++
++ if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) &&
++ !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) {
+ wait_var_event_interruptible(&clp->cl_state,
+ test_bit(NFS4CLNT_RUN_MANAGER,
+ &clp->cl_state));
+- if (atomic_read(&cl->cl_swapper) &&
+- test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
++ if (!atomic_read(&cl->cl_swapper))
++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
++ if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
++ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
+ goto again;
+ /* Either no longer a swapper, or were signalled */
++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ }
+- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+
+ if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
+ test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
+- !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
++ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
+ goto again;
+
+ nfs_put_client(clp);
--- /dev/null
+From dc77721ea4aa1e8937e2436f230b5a69065cc508 Mon Sep 17 00:00:00 2001
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Tue, 13 Jun 2023 23:31:50 +0200
+Subject: power: supply: ab8500: Set typing and props
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+commit dc77721ea4aa1e8937e2436f230b5a69065cc508 upstream.
+
+I had the following weird phenomena on a mobile phone: while
+the capacity in /sys/class/power_supply/ab8500_fg/capacity
+would reflect the actual charge and capacity of the battery,
+only 1/3 of the value was shown on the battery status
+indicator and warnings for low battery appeared.
+
+It turns out that UPower, the Freedesktop power daemon,
+will average all the power supplies of type "battery" in
+/sys/class/power_supply/* if there is more than one battery.
+
+For the AB8500, there was "battery" ab8500_fg, ab8500_btemp
+and ab8500_chargalg. The latter two don't know anything
+about the battery, and should not be considered. They were
+however averaged and with the capacity of 0.
+
+Flag ab8500_btemp and ab8500_chargalg with type "unknown"
+so they are not averaged as batteries.
+
+Remove the technology prop from ab8500_btemp as well, all
+it does is snoop in on knowledge from another supply.
+
+After this the battery indicator shows the right value.
+
+Cc: Stefan Hansson <newbyte@disroot.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/power/supply/ab8500_btemp.c | 9 +--------
+ drivers/power/supply/ab8500_chargalg.c | 2 +-
+ 2 files changed, 2 insertions(+), 9 deletions(-)
+
+--- a/drivers/power/supply/ab8500_btemp.c
++++ b/drivers/power/supply/ab8500_btemp.c
+@@ -115,7 +115,6 @@ struct ab8500_btemp {
+ static enum power_supply_property ab8500_btemp_props[] = {
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_ONLINE,
+- POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_TEMP,
+ };
+
+@@ -532,12 +531,6 @@ static int ab8500_btemp_get_property(str
+ else
+ val->intval = 1;
+ break;
+- case POWER_SUPPLY_PROP_TECHNOLOGY:
+- if (di->bm->bi)
+- val->intval = di->bm->bi->technology;
+- else
+- val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+- break;
+ case POWER_SUPPLY_PROP_TEMP:
+ val->intval = ab8500_btemp_get_temp(di);
+ break;
+@@ -662,7 +655,7 @@ static char *supply_interface[] = {
+
+ static const struct power_supply_desc ab8500_btemp_desc = {
+ .name = "ab8500_btemp",
+- .type = POWER_SUPPLY_TYPE_BATTERY,
++ .type = POWER_SUPPLY_TYPE_UNKNOWN,
+ .properties = ab8500_btemp_props,
+ .num_properties = ARRAY_SIZE(ab8500_btemp_props),
+ .get_property = ab8500_btemp_get_property,
+--- a/drivers/power/supply/ab8500_chargalg.c
++++ b/drivers/power/supply/ab8500_chargalg.c
+@@ -1720,7 +1720,7 @@ static char *supply_interface[] = {
+
+ static const struct power_supply_desc ab8500_chargalg_desc = {
+ .name = "ab8500_chargalg",
+- .type = POWER_SUPPLY_TYPE_BATTERY,
++ .type = POWER_SUPPLY_TYPE_UNKNOWN,
+ .properties = ab8500_chargalg_props,
+ .num_properties = ARRAY_SIZE(ab8500_chargalg_props),
+ .get_property = ab8500_chargalg_get_property,
--- /dev/null
+From cbcdfbf5a6cd66e47e5ee5d49c4c5a27a07ba082 Mon Sep 17 00:00:00 2001
+From: Nicolas Frattaroli <frattaroli.nicolas@gmail.com>
+Date: Mon, 12 Jun 2023 16:36:52 +0200
+Subject: power: supply: rk817: Add missing module alias
+
+From: Nicolas Frattaroli <frattaroli.nicolas@gmail.com>
+
+commit cbcdfbf5a6cd66e47e5ee5d49c4c5a27a07ba082 upstream.
+
+Similar to the rk817 codec alias that was missing, the rk817 charger
+driver is missing a module alias as well. This absence prevents the
+driver from autoprobing on OF systems when it is built as a module.
+
+Add the right MODULE_ALIAS to fix this.
+
+Fixes: 11cb8da0189b ("power: supply: Add charger driver for Rockchip RK817")
+Cc: stable@vger.kernel.org
+Signed-off-by: Nicolas Frattaroli <frattaroli.nicolas@gmail.com>
+Reviewed-by: Chris Morgan <macromorgan@hotmail.com>
+Link: https://lore.kernel.org/r/20230612143651.959646-2-frattaroli.nicolas@gmail.com
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/power/supply/rk817_charger.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/power/supply/rk817_charger.c
++++ b/drivers/power/supply/rk817_charger.c
+@@ -1220,3 +1220,4 @@ MODULE_DESCRIPTION("Battery power supply
+ MODULE_AUTHOR("Maya Matuszczyk <maccraft123mc@gmail.com>");
+ MODULE_AUTHOR("Chris Morgan <macromorgan@hotmail.com>");
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS("platform:rk817-charger");
--- /dev/null
+From 510a7330c82a7754d5df0117a8589e8a539067c7 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 19 Sep 2023 20:41:47 +0200
+Subject: rbd: decouple header read-in from updating rbd_dev->header
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 510a7330c82a7754d5df0117a8589e8a539067c7 upstream.
+
+Make rbd_dev_header_info() populate a passed struct rbd_image_header
+instead of rbd_dev->header and introduce rbd_dev_update_header() for
+updating mutable fields in rbd_dev->header upon refresh. The initial
+read-in of both mutable and immutable fields in rbd_dev_image_probe()
+passes in rbd_dev->header so no update step is required there.
+
+rbd_init_layout() is now called directly from rbd_dev_image_probe()
+instead of individually in format 1 and format 2 implementations.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/rbd.c | 206 ++++++++++++++++++++++++++++------------------------
+ 1 file changed, 114 insertions(+), 92 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -632,7 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev
+ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
++static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header);
+ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id);
+ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+@@ -993,15 +994,24 @@ static void rbd_init_layout(struct rbd_d
+ RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
+ }
+
++static void rbd_image_header_cleanup(struct rbd_image_header *header)
++{
++ kfree(header->object_prefix);
++ ceph_put_snap_context(header->snapc);
++ kfree(header->snap_sizes);
++ kfree(header->snap_names);
++
++ memset(header, 0, sizeof(*header));
++}
++
+ /*
+ * Fill an rbd image header with information from the given format 1
+ * on-disk header.
+ */
+-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
+- struct rbd_image_header_ondisk *ondisk)
++static int rbd_header_from_disk(struct rbd_image_header *header,
++ struct rbd_image_header_ondisk *ondisk,
++ bool first_time)
+ {
+- struct rbd_image_header *header = &rbd_dev->header;
+- bool first_time = header->object_prefix == NULL;
+ struct ceph_snap_context *snapc;
+ char *object_prefix = NULL;
+ char *snap_names = NULL;
+@@ -1068,11 +1078,6 @@ static int rbd_header_from_disk(struct r
+ if (first_time) {
+ header->object_prefix = object_prefix;
+ header->obj_order = ondisk->options.order;
+- rbd_init_layout(rbd_dev);
+- } else {
+- ceph_put_snap_context(header->snapc);
+- kfree(header->snap_names);
+- kfree(header->snap_sizes);
+ }
+
+ /* The remaining fields always get updated (when we refresh) */
+@@ -4857,7 +4862,9 @@ out_req:
+ * return, the rbd_dev->header field will contain up-to-date
+ * information about the image.
+ */
+-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header,
++ bool first_time)
+ {
+ struct rbd_image_header_ondisk *ondisk = NULL;
+ u32 snap_count = 0;
+@@ -4905,7 +4912,7 @@ static int rbd_dev_v1_header_info(struct
+ snap_count = le32_to_cpu(ondisk->snap_count);
+ } while (snap_count != want_count);
+
+- ret = rbd_header_from_disk(rbd_dev, ondisk);
++ ret = rbd_header_from_disk(header, ondisk, first_time);
+ out:
+ kfree(ondisk);
+
+@@ -5468,17 +5475,12 @@ static int _rbd_dev_v2_snap_size(struct
+ return 0;
+ }
+
+-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
+-{
+- return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+- &rbd_dev->header.obj_order,
+- &rbd_dev->header.image_size);
+-}
+-
+-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
++ char **pobject_prefix)
+ {
+ size_t size;
+ void *reply_buf;
++ char *object_prefix;
+ int ret;
+ void *p;
+
+@@ -5496,16 +5498,16 @@ static int rbd_dev_v2_object_prefix(stru
+ goto out;
+
+ p = reply_buf;
+- rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
+- p + ret, NULL, GFP_NOIO);
++ object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
++ GFP_NOIO);
++ if (IS_ERR(object_prefix)) {
++ ret = PTR_ERR(object_prefix);
++ goto out;
++ }
+ ret = 0;
+
+- if (IS_ERR(rbd_dev->header.object_prefix)) {
+- ret = PTR_ERR(rbd_dev->header.object_prefix);
+- rbd_dev->header.object_prefix = NULL;
+- } else {
+- dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
+- }
++ *pobject_prefix = object_prefix;
++ dout(" object_prefix = %s\n", object_prefix);
+ out:
+ kfree(reply_buf);
+
+@@ -5556,13 +5558,6 @@ static int _rbd_dev_v2_snap_features(str
+ return 0;
+ }
+
+-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
+-{
+- return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+- rbd_is_ro(rbd_dev),
+- &rbd_dev->header.features);
+-}
+-
+ /*
+ * These are generic image flags, but since they are used only for
+ * object map, store them in rbd_dev->object_map_flags.
+@@ -5837,14 +5832,14 @@ out_err:
+ return ret;
+ }
+
+-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
++ u64 *stripe_unit, u64 *stripe_count)
+ {
+ struct {
+ __le64 stripe_unit;
+ __le64 stripe_count;
+ } __attribute__ ((packed)) striping_info_buf = { 0 };
+ size_t size = sizeof (striping_info_buf);
+- void *p;
+ int ret;
+
+ ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+@@ -5856,27 +5851,33 @@ static int rbd_dev_v2_striping_info(stru
+ if (ret < size)
+ return -ERANGE;
+
+- p = &striping_info_buf;
+- rbd_dev->header.stripe_unit = ceph_decode_64(&p);
+- rbd_dev->header.stripe_count = ceph_decode_64(&p);
++ *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
++ *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
++ dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
++ *stripe_count);
++
+ return 0;
+ }
+
+-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
+ {
+- __le64 data_pool_id;
++ __le64 data_pool_buf;
+ int ret;
+
+ ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, "get_data_pool",
+- NULL, 0, &data_pool_id, sizeof(data_pool_id));
++ NULL, 0, &data_pool_buf,
++ sizeof(data_pool_buf));
++ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ return ret;
+- if (ret < sizeof(data_pool_id))
++ if (ret < sizeof(data_pool_buf))
+ return -EBADMSG;
+
+- rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
+- WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
++ *data_pool_id = le64_to_cpu(data_pool_buf);
++ dout(" data_pool_id = %lld\n", *data_pool_id);
++ WARN_ON(*data_pool_id == CEPH_NOPOOL);
++
+ return 0;
+ }
+
+@@ -6068,7 +6069,8 @@ out_err:
+ return ret;
+ }
+
+-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
++ struct ceph_snap_context **psnapc)
+ {
+ size_t size;
+ int ret;
+@@ -6129,9 +6131,7 @@ static int rbd_dev_v2_snap_context(struc
+ for (i = 0; i < snap_count; i++)
+ snapc->snaps[i] = ceph_decode_64(&p);
+
+- ceph_put_snap_context(rbd_dev->header.snapc);
+- rbd_dev->header.snapc = snapc;
+-
++ *psnapc = snapc;
+ dout(" snap context seq = %llu, snap_count = %u\n",
+ (unsigned long long)seq, (unsigned int)snap_count);
+ out:
+@@ -6180,38 +6180,42 @@ out:
+ return snap_name;
+ }
+
+-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header,
++ bool first_time)
+ {
+- bool first_time = rbd_dev->header.object_prefix == NULL;
+ int ret;
+
+- ret = rbd_dev_v2_image_size(rbd_dev);
++ ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
++ first_time ? &header->obj_order : NULL,
++ &header->image_size);
+ if (ret)
+ return ret;
+
+ if (first_time) {
+- ret = rbd_dev_v2_header_onetime(rbd_dev);
++ ret = rbd_dev_v2_header_onetime(rbd_dev, header);
+ if (ret)
+ return ret;
+ }
+
+- ret = rbd_dev_v2_snap_context(rbd_dev);
+- if (ret && first_time) {
+- kfree(rbd_dev->header.object_prefix);
+- rbd_dev->header.object_prefix = NULL;
+- }
++ ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
++ if (ret)
++ return ret;
+
+- return ret;
++ return 0;
+ }
+
+-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_header_info(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header,
++ bool first_time)
+ {
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
++ rbd_assert(!header->object_prefix && !header->snapc);
+
+ if (rbd_dev->image_format == 1)
+- return rbd_dev_v1_header_info(rbd_dev);
++ return rbd_dev_v1_header_info(rbd_dev, header, first_time);
+
+- return rbd_dev_v2_header_info(rbd_dev);
++ return rbd_dev_v2_header_info(rbd_dev, header, first_time);
+ }
+
+ /*
+@@ -6699,60 +6703,49 @@ out:
+ */
+ static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
+ {
+- struct rbd_image_header *header;
+-
+ rbd_dev_parent_put(rbd_dev);
+ rbd_object_map_free(rbd_dev);
+ rbd_dev_mapping_clear(rbd_dev);
+
+ /* Free dynamic fields from the header, then zero it out */
+
+- header = &rbd_dev->header;
+- ceph_put_snap_context(header->snapc);
+- kfree(header->snap_sizes);
+- kfree(header->snap_names);
+- kfree(header->object_prefix);
+- memset(header, 0, sizeof (*header));
++ rbd_image_header_cleanup(&rbd_dev->header);
+ }
+
+-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header)
+ {
+ int ret;
+
+- ret = rbd_dev_v2_object_prefix(rbd_dev);
++ ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ /*
+ * Get the and check features for the image. Currently the
+ * features are assumed to never change.
+ */
+- ret = rbd_dev_v2_features(rbd_dev);
++ ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
++ rbd_is_ro(rbd_dev), &header->features);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ /* If the image supports fancy striping, get its parameters */
+
+- if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
+- ret = rbd_dev_v2_striping_info(rbd_dev);
+- if (ret < 0)
+- goto out_err;
++ if (header->features & RBD_FEATURE_STRIPINGV2) {
++ ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
++ &header->stripe_count);
++ if (ret)
++ return ret;
+ }
+
+- if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
+- ret = rbd_dev_v2_data_pool(rbd_dev);
++ if (header->features & RBD_FEATURE_DATA_POOL) {
++ ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
+ if (ret)
+- goto out_err;
++ return ret;
+ }
+
+- rbd_init_layout(rbd_dev);
+ return 0;
+-
+-out_err:
+- rbd_dev->header.features = 0;
+- kfree(rbd_dev->header.object_prefix);
+- rbd_dev->header.object_prefix = NULL;
+- return ret;
+ }
+
+ /*
+@@ -6947,13 +6940,15 @@ static int rbd_dev_image_probe(struct rb
+ if (!depth)
+ down_write(&rbd_dev->header_rwsem);
+
+- ret = rbd_dev_header_info(rbd_dev);
++ ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
+ if (ret) {
+ if (ret == -ENOENT && !need_watch)
+ rbd_print_dne(rbd_dev, false);
+ goto err_out_probe;
+ }
+
++ rbd_init_layout(rbd_dev);
++
+ /*
+ * If this image is the one being mapped, we have pool name and
+ * id, image name and id, and snap name - need to fill snap id.
+@@ -7008,15 +7003,39 @@ err_out_format:
+ return ret;
+ }
+
++static void rbd_dev_update_header(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header)
++{
++ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
++ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
++
++ rbd_dev->header.image_size = header->image_size;
++
++ ceph_put_snap_context(rbd_dev->header.snapc);
++ rbd_dev->header.snapc = header->snapc;
++ header->snapc = NULL;
++
++ if (rbd_dev->image_format == 1) {
++ kfree(rbd_dev->header.snap_names);
++ rbd_dev->header.snap_names = header->snap_names;
++ header->snap_names = NULL;
++
++ kfree(rbd_dev->header.snap_sizes);
++ rbd_dev->header.snap_sizes = header->snap_sizes;
++ header->snap_sizes = NULL;
++ }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
++ struct rbd_image_header header = { 0 };
+ u64 mapping_size;
+ int ret;
+
+ down_write(&rbd_dev->header_rwsem);
+ mapping_size = rbd_dev->mapping.size;
+
+- ret = rbd_dev_header_info(rbd_dev);
++ ret = rbd_dev_header_info(rbd_dev, &header, false);
+ if (ret)
+ goto out;
+
+@@ -7030,6 +7049,8 @@ static int rbd_dev_refresh(struct rbd_de
+ goto out;
+ }
+
++ rbd_dev_update_header(rbd_dev, &header);
++
+ rbd_assert(!rbd_is_snap(rbd_dev));
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
+
+@@ -7038,6 +7059,7 @@ out:
+ if (!ret && mapping_size != rbd_dev->mapping.size)
+ rbd_dev_update_size(rbd_dev);
+
++ rbd_image_header_cleanup(&header);
+ return ret;
+ }
+
--- /dev/null
+From c10311776f0a8ddea2276df96e255625b07045a8 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Wed, 20 Sep 2023 18:38:26 +0200
+Subject: rbd: decouple parent info read-in from updating rbd_dev
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit c10311776f0a8ddea2276df96e255625b07045a8 upstream.
+
+Unlike header read-in, parent info read-in is already decoupled in
+get_parent_info(), but it's buried in rbd_dev_v2_parent_info() along
+with the processing logic.
+
+Separate the initial read-in and update read-in logic into
+rbd_dev_setup_parent() and rbd_dev_update_parent() respectively and
+have rbd_dev_v2_parent_info() just populate struct parent_image_info
+(i.e. what get_parent_info() did). Some existing QoI issues, like
+flatten of a standalone clone being disregarded on refresh, remain.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/rbd.c | 144 +++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 81 insertions(+), 63 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5594,6 +5594,14 @@ struct parent_image_info {
+ u64 overlap;
+ };
+
++static void rbd_parent_info_cleanup(struct parent_image_info *pii)
++{
++ kfree(pii->pool_ns);
++ kfree(pii->image_id);
++
++ memset(pii, 0, sizeof(*pii));
++}
++
+ /*
+ * The caller is responsible for @pii.
+ */
+@@ -5663,6 +5671,9 @@ static int __get_parent_info(struct rbd_
+ if (pii->has_overlap)
+ ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
++ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
++ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
++ pii->has_overlap, pii->overlap);
+ return 0;
+
+ e_inval:
+@@ -5701,14 +5712,17 @@ static int __get_parent_info_legacy(stru
+ pii->has_overlap = true;
+ ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
++ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
++ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
++ pii->has_overlap, pii->overlap);
+ return 0;
+
+ e_inval:
+ return -EINVAL;
+ }
+
+-static int get_parent_info(struct rbd_device *rbd_dev,
+- struct parent_image_info *pii)
++static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
++ struct parent_image_info *pii)
+ {
+ struct page *req_page, *reply_page;
+ void *p;
+@@ -5736,7 +5750,7 @@ static int get_parent_info(struct rbd_de
+ return ret;
+ }
+
+-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
++static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
+ {
+ struct rbd_spec *parent_spec;
+ struct parent_image_info pii = { 0 };
+@@ -5746,37 +5760,12 @@ static int rbd_dev_v2_parent_info(struct
+ if (!parent_spec)
+ return -ENOMEM;
+
+- ret = get_parent_info(rbd_dev, &pii);
++ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+ if (ret)
+ goto out_err;
+
+- dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+- __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
+- pii.has_overlap, pii.overlap);
+-
+- if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
+- /*
+- * Either the parent never existed, or we have
+- * record of it but the image got flattened so it no
+- * longer has a parent. When the parent of a
+- * layered image disappears we immediately set the
+- * overlap to 0. The effect of this is that all new
+- * requests will be treated as if the image had no
+- * parent.
+- *
+- * If !pii.has_overlap, the parent image spec is not
+- * applicable. It's there to avoid duplication in each
+- * snapshot record.
+- */
+- if (rbd_dev->parent_overlap) {
+- rbd_dev->parent_overlap = 0;
+- rbd_dev_parent_put(rbd_dev);
+- pr_info("%s: clone image has been flattened\n",
+- rbd_dev->disk->disk_name);
+- }
+-
++ if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
+ goto out; /* No parent? No problem. */
+- }
+
+ /* The ceph file layout needs to fit pool id in 32 bits */
+
+@@ -5788,46 +5777,34 @@ static int rbd_dev_v2_parent_info(struct
+ }
+
+ /*
+- * The parent won't change (except when the clone is
+- * flattened, already handled that). So we only need to
+- * record the parent spec we have not already done so.
++ * The parent won't change except when the clone is flattened,
++ * so we only need to record the parent image spec once.
+ */
+- if (!rbd_dev->parent_spec) {
+- parent_spec->pool_id = pii.pool_id;
+- if (pii.pool_ns && *pii.pool_ns) {
+- parent_spec->pool_ns = pii.pool_ns;
+- pii.pool_ns = NULL;
+- }
+- parent_spec->image_id = pii.image_id;
+- pii.image_id = NULL;
+- parent_spec->snap_id = pii.snap_id;
+-
+- rbd_dev->parent_spec = parent_spec;
+- parent_spec = NULL; /* rbd_dev now owns this */
+- }
++ parent_spec->pool_id = pii.pool_id;
++ if (pii.pool_ns && *pii.pool_ns) {
++ parent_spec->pool_ns = pii.pool_ns;
++ pii.pool_ns = NULL;
++ }
++ parent_spec->image_id = pii.image_id;
++ pii.image_id = NULL;
++ parent_spec->snap_id = pii.snap_id;
++
++ rbd_assert(!rbd_dev->parent_spec);
++ rbd_dev->parent_spec = parent_spec;
++ parent_spec = NULL; /* rbd_dev now owns this */
+
+ /*
+- * We always update the parent overlap. If it's zero we issue
+- * a warning, as we will proceed as if there was no parent.
++ * Record the parent overlap. If it's zero, issue a warning as
++ * we will proceed as if there is no parent.
+ */
+- if (!pii.overlap) {
+- if (parent_spec) {
+- /* refresh, careful to warn just once */
+- if (rbd_dev->parent_overlap)
+- rbd_warn(rbd_dev,
+- "clone now standalone (overlap became 0)");
+- } else {
+- /* initial probe */
+- rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
+- }
+- }
++ if (!pii.overlap)
++ rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
+ rbd_dev->parent_overlap = pii.overlap;
+
+ out:
+ ret = 0;
+ out_err:
+- kfree(pii.pool_ns);
+- kfree(pii.image_id);
++ rbd_parent_info_cleanup(&pii);
+ rbd_spec_put(parent_spec);
+ return ret;
+ }
+@@ -6977,7 +6954,7 @@ static int rbd_dev_image_probe(struct rb
+ }
+
+ if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
+- ret = rbd_dev_v2_parent_info(rbd_dev);
++ ret = rbd_dev_setup_parent(rbd_dev);
+ if (ret)
+ goto err_out_probe;
+ }
+@@ -7026,9 +7003,47 @@ static void rbd_dev_update_header(struct
+ }
+ }
+
++static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
++ struct parent_image_info *pii)
++{
++ if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
++ /*
++ * Either the parent never existed, or we have
++ * record of it but the image got flattened so it no
++ * longer has a parent. When the parent of a
++ * layered image disappears we immediately set the
++ * overlap to 0. The effect of this is that all new
++ * requests will be treated as if the image had no
++ * parent.
++ *
++ * If !pii.has_overlap, the parent image spec is not
++ * applicable. It's there to avoid duplication in each
++ * snapshot record.
++ */
++ if (rbd_dev->parent_overlap) {
++ rbd_dev->parent_overlap = 0;
++ rbd_dev_parent_put(rbd_dev);
++ pr_info("%s: clone has been flattened\n",
++ rbd_dev->disk->disk_name);
++ }
++ } else {
++ rbd_assert(rbd_dev->parent_spec);
++
++ /*
++ * Update the parent overlap. If it became zero, issue
++ * a warning as we will proceed as if there is no parent.
++ */
++ if (!pii->overlap && rbd_dev->parent_overlap)
++ rbd_warn(rbd_dev,
++ "clone has become standalone (overlap 0)");
++ rbd_dev->parent_overlap = pii->overlap;
++ }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
+ struct rbd_image_header header = { 0 };
++ struct parent_image_info pii = { 0 };
+ u64 mapping_size;
+ int ret;
+
+@@ -7044,12 +7059,14 @@ static int rbd_dev_refresh(struct rbd_de
+ * mapped image getting flattened.
+ */
+ if (rbd_dev->parent) {
+- ret = rbd_dev_v2_parent_info(rbd_dev);
++ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+ if (ret)
+ goto out;
+ }
+
+ rbd_dev_update_header(rbd_dev, &header);
++ if (rbd_dev->parent)
++ rbd_dev_update_parent(rbd_dev, &pii);
+
+ rbd_assert(!rbd_is_snap(rbd_dev));
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
+@@ -7059,6 +7076,7 @@ out:
+ if (!ret && mapping_size != rbd_dev->mapping.size)
+ rbd_dev_update_size(rbd_dev);
+
++ rbd_parent_info_cleanup(&pii);
+ rbd_image_header_cleanup(&header);
+ return ret;
+ }
--- /dev/null
+From 0b035401c57021fc6c300272cbb1c5a889d4fe45 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Sun, 17 Sep 2023 15:07:40 +0200
+Subject: rbd: move rbd_dev_refresh() definition
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0b035401c57021fc6c300272cbb1c5a889d4fe45 upstream.
+
+Move rbd_dev_refresh() definition further down to avoid having to
+move struct parent_image_info definition in the next commit. This
+spares some forward declarations too.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/rbd.c | 68 +++++++++++++++++++++++++---------------------------
+ 1 file changed, 33 insertions(+), 35 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -633,8 +633,6 @@ static void rbd_dev_remove_parent(struct
+
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
+-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
+-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
+ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id);
+ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+@@ -4931,39 +4929,6 @@ static void rbd_dev_update_size(struct r
+ }
+ }
+
+-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+-{
+- u64 mapping_size;
+- int ret;
+-
+- down_write(&rbd_dev->header_rwsem);
+- mapping_size = rbd_dev->mapping.size;
+-
+- ret = rbd_dev_header_info(rbd_dev);
+- if (ret)
+- goto out;
+-
+- /*
+- * If there is a parent, see if it has disappeared due to the
+- * mapped image getting flattened.
+- */
+- if (rbd_dev->parent) {
+- ret = rbd_dev_v2_parent_info(rbd_dev);
+- if (ret)
+- goto out;
+- }
+-
+- rbd_assert(!rbd_is_snap(rbd_dev));
+- rbd_dev->mapping.size = rbd_dev->header.image_size;
+-
+-out:
+- up_write(&rbd_dev->header_rwsem);
+- if (!ret && mapping_size != rbd_dev->mapping.size)
+- rbd_dev_update_size(rbd_dev);
+-
+- return ret;
+-}
+-
+ static const struct blk_mq_ops rbd_mq_ops = {
+ .queue_rq = rbd_queue_rq,
+ };
+@@ -7043,6 +7008,39 @@ err_out_format:
+ return ret;
+ }
+
++static int rbd_dev_refresh(struct rbd_device *rbd_dev)
++{
++ u64 mapping_size;
++ int ret;
++
++ down_write(&rbd_dev->header_rwsem);
++ mapping_size = rbd_dev->mapping.size;
++
++ ret = rbd_dev_header_info(rbd_dev);
++ if (ret)
++ goto out;
++
++ /*
++ * If there is a parent, see if it has disappeared due to the
++ * mapped image getting flattened.
++ */
++ if (rbd_dev->parent) {
++ ret = rbd_dev_v2_parent_info(rbd_dev);
++ if (ret)
++ goto out;
++ }
++
++ rbd_assert(!rbd_is_snap(rbd_dev));
++ rbd_dev->mapping.size = rbd_dev->header.image_size;
++
++out:
++ up_write(&rbd_dev->header_rwsem);
++ if (!ret && mapping_size != rbd_dev->mapping.size)
++ rbd_dev_update_size(rbd_dev);
++
++ return ret;
++}
++
+ static ssize_t do_rbd_add(const char *buf, size_t count)
+ {
+ struct rbd_device *rbd_dev = NULL;
--- /dev/null
+From 0b207d02bd9ab8dcc31b262ca9f60dbc1822500d Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Wed, 20 Sep 2023 19:01:03 +0200
+Subject: rbd: take header_rwsem in rbd_dev_refresh() only when updating
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0b207d02bd9ab8dcc31b262ca9f60dbc1822500d upstream.
+
+rbd_dev_refresh() has been holding header_rwsem across header and
+parent info read-in unnecessarily for ages. With commit 870611e4877e
+("rbd: get snapshot context after exclusive lock is ensured to be
+held"), the potential for deadlocks became much more real owning to
+a) header_rwsem now nesting inside lock_rwsem and b) rw_semaphores
+not allowing new readers after a writer is registered.
+
+For example, assuming that I/O request 1, I/O request 2 and header
+read-in request all target the same OSD:
+
+1. I/O request 1 comes in and gets submitted
+2. watch error occurs
+3. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and
+ releases lock_rwsem
+4. after reestablishing the watch, rbd_reregister_watch() calls
+ rbd_dev_refresh() which takes header_rwsem for write and submits
+ a header read-in request
+5. I/O request 2 comes in: after taking lock_rwsem for read in
+ __rbd_img_handle_request(), it blocks trying to take header_rwsem
+ for read in rbd_img_object_requests()
+6. another watch error occurs
+7. rbd_watch_errcb() blocks trying to take lock_rwsem for write
+8. I/O request 1 completion is received by the messenger but can't be
+ processed because lock_rwsem won't be granted anymore
+9. header read-in request completion can't be received, let alone
+ processed, because the messenger is stranded
+
+Change rbd_dev_refresh() to take header_rwsem only for actually
+updating rbd_dev->header. Header and parent info read-in don't need
+any locking.
+
+Cc: stable@vger.kernel.org # 0b035401c570: rbd: move rbd_dev_refresh() definition
+Cc: stable@vger.kernel.org # 510a7330c82a: rbd: decouple header read-in from updating rbd_dev->header
+Cc: stable@vger.kernel.org # c10311776f0a: rbd: decouple parent info read-in from updating rbd_dev
+Cc: stable@vger.kernel.org
+Fixes: 870611e4877e ("rbd: get snapshot context after exclusive lock is ensured to be held")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/rbd.c | 22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -6986,7 +6986,14 @@ static void rbd_dev_update_header(struct
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
+
+- rbd_dev->header.image_size = header->image_size;
++ if (rbd_dev->header.image_size != header->image_size) {
++ rbd_dev->header.image_size = header->image_size;
++
++ if (!rbd_is_snap(rbd_dev)) {
++ rbd_dev->mapping.size = header->image_size;
++ rbd_dev_update_size(rbd_dev);
++ }
++ }
+
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.snapc = header->snapc;
+@@ -7044,11 +7051,9 @@ static int rbd_dev_refresh(struct rbd_de
+ {
+ struct rbd_image_header header = { 0 };
+ struct parent_image_info pii = { 0 };
+- u64 mapping_size;
+ int ret;
+
+- down_write(&rbd_dev->header_rwsem);
+- mapping_size = rbd_dev->mapping.size;
++ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ ret = rbd_dev_header_info(rbd_dev, &header, false);
+ if (ret)
+@@ -7064,18 +7069,13 @@ static int rbd_dev_refresh(struct rbd_de
+ goto out;
+ }
+
++ down_write(&rbd_dev->header_rwsem);
+ rbd_dev_update_header(rbd_dev, &header);
+ if (rbd_dev->parent)
+ rbd_dev_update_parent(rbd_dev, &pii);
+-
+- rbd_assert(!rbd_is_snap(rbd_dev));
+- rbd_dev->mapping.size = rbd_dev->header.image_size;
+-
+-out:
+ up_write(&rbd_dev->header_rwsem);
+- if (!ret && mapping_size != rbd_dev->mapping.size)
+- rbd_dev_update_size(rbd_dev);
+
++out:
+ rbd_parent_info_cleanup(&pii);
+ rbd_image_header_cleanup(&header);
+ return ret;
--- /dev/null
+From a275ab62606bcd894ddff09460f7d253828313dc Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Sun, 17 Sep 2023 19:26:46 -0400
+Subject: Revert "SUNRPC dont update timeout value on connection reset"
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+commit a275ab62606bcd894ddff09460f7d253828313dc upstream.
+
+This reverts commit 88428cc4ae7abcc879295fbb19373dd76aad2bdd.
+
+The problem this commit is intended to fix was comprehensively fixed
+in commit 7de62bc09fe6 ("SUNRPC dont update timeout value on connection
+reset").
+Since then, this commit has been preventing the correct timeout of soft
+mounted requests.
+
+Cc: stable@vger.kernel.org # 5.9.x: 09252177d5f9: SUNRPC: Handle major timeout in xprt_adjust_timeout()
+Cc: stable@vger.kernel.org # 5.9.x: 7de62bc09fe6: SUNRPC dont update timeout value on connection reset
+Cc: stable@vger.kernel.org # 5.9.x
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sunrpc/clnt.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2474,8 +2474,7 @@ call_status(struct rpc_task *task)
+ goto out_exit;
+ }
+ task->tk_action = call_encode;
+- if (status != -ECONNRESET && status != -ECONNABORTED)
+- rpc_check_timeout(task);
++ rpc_check_timeout(task);
+ return;
+ out_exit:
+ rpc_call_rpcerror(task, status);
--- /dev/null
+From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001
+From: Zheng Yejian <zhengyejian1@huawei.com>
+Date: Thu, 21 Sep 2023 20:54:25 +0800
+Subject: ring-buffer: Fix bytes info in per_cpu buffer stats
+
+From: Zheng Yejian <zhengyejian1@huawei.com>
+
+commit 45d99ea451d0c30bfd4864f0fe485d7dac014902 upstream.
+
+The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of
+bytes in cpu buffer that have not been consumed. However, currently
+after consuming data by reading file 'trace_pipe', the 'bytes' info
+was not changed as expected.
+
+ # cat per_cpu/cpu0/stats
+ entries: 0
+ overrun: 0
+ commit overrun: 0
+ bytes: 568 <--- 'bytes' is problematical !!!
+ oldest event ts: 8651.371479
+ now ts: 8653.912224
+ dropped events: 0
+ read events: 8
+
+The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it:
+ 1. When stat 'read_bytes', account consumed event in rb_advance_reader();
+ 2. When stat 'entries_bytes', exclude the discarded padding event which
+ is smaller than minimum size because it is invisible to reader. Then
+ use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for
+ page-based read/remove/overrun.
+
+Also correct the comments of ring_buffer_bytes_cpu() in this patch.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengyejian1@huawei.com
+
+Cc: stable@vger.kernel.org
+Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events")
+Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c | 28 +++++++++++++++-------------
+ 1 file changed, 15 insertions(+), 13 deletions(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_d
+ local_set(&bpage->commit, 0);
+ }
+
++static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
++{
++ return local_read(&bpage->page->commit);
++}
++
+ static void free_buffer_page(struct buffer_page *bpage)
+ {
+ free_page((unsigned long)bpage->page);
+@@ -2011,7 +2016,7 @@ rb_remove_pages(struct ring_buffer_per_c
+ * Increment overrun to account for the lost events.
+ */
+ local_add(page_entries, &cpu_buffer->overrun);
+- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
++ local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes);
+ local_inc(&cpu_buffer->pages_lost);
+ }
+
+@@ -2375,11 +2380,6 @@ rb_reader_event(struct ring_buffer_per_c
+ cpu_buffer->reader_page->read);
+ }
+
+-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
+-{
+- return local_read(&bpage->page->commit);
+-}
+-
+ static struct ring_buffer_event *
+ rb_iter_head_event(struct ring_buffer_iter *iter)
+ {
+@@ -2525,7 +2525,7 @@ rb_handle_head_page(struct ring_buffer_p
+ * the counters.
+ */
+ local_add(entries, &cpu_buffer->overrun);
+- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
++ local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
+ local_inc(&cpu_buffer->pages_lost);
+
+ /*
+@@ -2668,9 +2668,6 @@ rb_reset_tail(struct ring_buffer_per_cpu
+
+ event = __rb_page_index(tail_page, tail);
+
+- /* account for padding bytes */
+- local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+-
+ /*
+ * Save the original length to the meta data.
+ * This will be used by the reader to add lost event
+@@ -2684,7 +2681,8 @@ rb_reset_tail(struct ring_buffer_per_cpu
+ * write counter enough to allow another writer to slip
+ * in on this page.
+ * We put in a discarded commit instead, to make sure
+- * that this space is not used again.
++ * that this space is not used again, and this space will
++ * not be accounted into 'entries_bytes'.
+ *
+ * If we are less than the minimum size, we don't need to
+ * worry about it.
+@@ -2709,6 +2707,9 @@ rb_reset_tail(struct ring_buffer_per_cpu
+ /* time delta must be non zero */
+ event->time_delta = 1;
+
++ /* account for padding bytes */
++ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
++
+ /* Make sure the padding is visible before the tail_page->write update */
+ smp_wmb();
+
+@@ -4223,7 +4224,7 @@ u64 ring_buffer_oldest_event_ts(struct t
+ EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
+
+ /**
+- * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
++ * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to read from.
+ */
+@@ -4731,6 +4732,7 @@ static void rb_advance_reader(struct rin
+
+ length = rb_event_length(event);
+ cpu_buffer->reader_page->read += length;
++ cpu_buffer->read_bytes += length;
+ }
+
+ static void rb_advance_iter(struct ring_buffer_iter *iter)
+@@ -5824,7 +5826,7 @@ int ring_buffer_read_page(struct trace_b
+ } else {
+ /* update the entry counter */
+ cpu_buffer->read += rb_page_entries(reader);
+- cpu_buffer->read_bytes += BUF_PAGE_SIZE;
++ cpu_buffer->read_bytes += rb_page_commit(reader);
+
+ /* swap the pages */
+ rb_init_page(bpage);
--- /dev/null
+From 1e0cb399c7653462d9dadf8ab9425337c355d358 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Fri, 29 Sep 2023 18:01:13 -0400
+Subject: ring-buffer: Update "shortest_full" in polling
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 1e0cb399c7653462d9dadf8ab9425337c355d358 upstream.
+
+It was discovered that the ring buffer polling was incorrectly stating
+that read would not block, but that's because polling did not take into
+account that reads will block if the "buffer-percent" was set. Instead,
+the ring buffer polling would say reads would not block if there was any
+data in the ring buffer. This was incorrect behavior from a user space
+point of view. This was fixed by commit 42fb0a1e84ff by having the polling
+code check if the ring buffer had more data than what the user specified
+"buffer percent" had.
+
+The problem now is that the polling code did not register itself to the
+writer that it wanted to wait for a specific "full" value of the ring
+buffer. The result was that the writer would wake the polling waiter
+whenever there was a new event. The polling waiter would then wake up, see
+that there's not enough data in the ring buffer to notify user space and
+then go back to sleep. The next event would wake it up again.
+
+Before the polling fix was added, the code would wake up around 100 times
+for a hackbench 30 benchmark. After the "fix", due to the constant waking
+of the writer, it would wake up over 11,0000 times! It would never leave
+the kernel, so the user space behavior was still "correct", but this
+definitely is not the desired effect.
+
+To fix this, have the polling code add what it's waiting for to the
+"shortest_full" variable, to tell the writer not to wake it up if the
+buffer is not as full as it expects to be.
+
+Note, after this fix, it appears that the waiter is now woken up around 2x
+the times it was before (~200). This is a tremendous improvement from the
+11,000 times, but I will need to spend some time to see why polling is
+more aggressive in its wakeups than the read blocking code.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20230929180113.01c2cae3@rorschach.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 42fb0a1e84ff ("tracing/ring-buffer: Have polling block on watermark")
+Reported-by: Julia Lawall <julia.lawall@inria.fr>
+Tested-by: Julia Lawall <julia.lawall@inria.fr>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -1142,6 +1142,9 @@ __poll_t ring_buffer_poll_wait(struct tr
+ if (full) {
+ poll_wait(filp, &work->full_waiters, poll_table);
+ work->full_waiters_pending = true;
++ if (!cpu_buffer->shortest_full ||
++ cpu_buffer->shortest_full > full)
++ cpu_buffer->shortest_full = full;
+ } else {
+ poll_wait(filp, &work->waiters, poll_table);
+ work->waiters_pending = true;
--- /dev/null
+From bbe246f875d064ecfb872fe4f66152e743dfd22d Mon Sep 17 00:00:00 2001
+From: Juntong Deng <juntong.deng@outlook.com>
+Date: Wed, 27 Sep 2023 02:19:44 +0800
+Subject: selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh that may cause error
+
+From: Juntong Deng <juntong.deng@outlook.com>
+
+commit bbe246f875d064ecfb872fe4f66152e743dfd22d upstream.
+
+According to the awk manual, the -e option does not need to be specified
+in front of 'program' (unless you need to mix program-file).
+
+The redundant -e option can cause error when users use awk tools other
+than gawk (for example, mawk does not support the -e option).
+
+Error Example:
+awk: not an option: -e
+
+Link: https://lkml.kernel.org/r/VI1P193MB075228810591AF2FDD7D42C599C3A@VI1P193MB0752.EURP193.PROD.OUTLOOK.COM
+Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 4 ++--
+ tools/testing/selftests/mm/hugetlb_reparenting_test.sh | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
++++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+@@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then
+ fi
+
+ if [[ $cgroup2 ]]; then
+- cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
++ cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
+ mount -t cgroup2 none $cgroup_path
+@@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+ else
+- cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
++ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
+ mount -t cgroup memory,hugetlb $cgroup_path
+--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
++++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+@@ -20,7 +20,7 @@ fi
+
+
+ if [[ $cgroup2 ]]; then
+- CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
++ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
+ mount -t cgroup2 none $CGROUP_ROOT
+@@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+ else
+- CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
++ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
+ mount -t cgroup memory,hugetlb $CGROUP_ROOT
asoc-amd-yc-fix-non-functional-mic-on-lenovo-82qf-and-82ug.patch
kernel-sched-modify-initial-boot-task-idle-setup.patch
sched-rt-fix-live-lock-between-select_fallback_rq-and-rt-push.patch
+revert-sunrpc-dont-update-timeout-value-on-connection-reset.patch
+nfsv4-fix-a-state-manager-thread-deadlock-regression.patch
+acpi-nfit-fix-incorrect-calculation-of-idt-size.patch
+timers-tag-hr-timer-softirq-as-hotplug-safe.patch
+drm-tests-fix-incorrect-argument-in-drm_test_mm_insert_range.patch
+cxl-mbox-fix-cel-logic-for-poison-and-security-commands.patch
+arm64-defconfig-remove-config_common_clk_npcm8xx-y.patch
+mm-damon-vaddr-test-fix-memory-leak-in-damon_do_test_apply_three_regions.patch
+selftests-mm-fix-awk-usage-in-charge_reserved_hugetlb.sh-and-hugetlb_reparenting_test.sh-that-may-cause-error.patch
+mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-and-mpol_mf_move-are-specified.patch
+mm-slab_common-fix-slab_caches-list-corruption-after-kmem_cache_destroy.patch
+mm-page_alloc-fix-cma-and-highatomic-landing-on-the-wrong-buddy-list.patch
+mm-memcontrol-fix-gfp_nofs-recursion-in-memory.high-enforcement.patch
+cxl-port-fix-cxl_test-register-enumeration-regression.patch
+cxl-pci-fix-appropriate-checking-for-_osc-while-handling-cxl-ras-registers.patch
+ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch
+ring-buffer-update-shortest_full-in-polling.patch
+btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch
+btrfs-file_remove_privs-needs-an-exclusive-lock-in-direct-io-write.patch
+btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch
+btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch
+btrfs-properly-report-0-avail-for-very-full-file-systems.patch
+media-uvcvideo-fix-oob-read.patch
+bpf-add-override-check-to-kprobe-multi-link-attach.patch
+bpf-fix-btf_id-symbol-generation-collision.patch
+bpf-fix-btf_id-symbol-generation-collision-in-tools.patch
+net-thunderbolt-fix-tcpv6-gso-checksum-calculation.patch
+thermal-sysfs-fix-trip_point_hyst_store.patch
+fs-smb-client-reset-password-pointer-to-null.patch
+tracing-user_events-align-set_bit-address-for-all-archs.patch
+ata-libata-core-fix-ata_port_request_pm-locking.patch
+ata-libata-core-fix-port-and-device-removal.patch
+ata-libata-core-do-not-register-pm-operations-for-sas-ports.patch
+ata-libata-sata-increase-pmp-srst-timeout-to-10s.patch
+i915-limit-the-length-of-an-sg-list-to-the-requested-length.patch
+drm-i915-gt-fix-reservation-address-in-ggtt_reserve_guc_top.patch
+power-supply-rk817-add-missing-module-alias.patch
+power-supply-ab8500-set-typing-and-props.patch
+fs-binfmt_elf_efpic-fix-personality-for-elf-fdpic.patch
+drm-amdkfd-use-gpu_offset-for-user-queue-s-wptr.patch
+drm-amd-display-fix-the-ability-to-use-lower-resolution-modes-on-edp.patch
+drm-meson-fix-memory-leak-on-hpd_notify-callback.patch
+rbd-move-rbd_dev_refresh-definition.patch
+rbd-decouple-header-read-in-from-updating-rbd_dev-header.patch
+rbd-decouple-parent-info-read-in-from-updating-rbd_dev.patch
+rbd-take-header_rwsem-in-rbd_dev_refresh-only-when-updating.patch
--- /dev/null
+From ea3105672c68a5b6d7368504067220682ee6c65c Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Fri, 15 Sep 2023 20:35:33 +0200
+Subject: thermal: sysfs: Fix trip_point_hyst_store()
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit ea3105672c68a5b6d7368504067220682ee6c65c upstream.
+
+After commit 2e38a2a981b2 ("thermal/core: Add a generic thermal_zone_set_trip()
+function") updating a trip point temperature doesn't actually work,
+because the value supplied by user space is subsequently overwritten
+with the current trip point hysteresis value.
+
+Fix this by changing the code to parse the number string supplied by
+user space after retrieving the current trip point data from the
+thermal zone.
+
+Also drop a redundant tab character from the code in question.
+
+Fixes: 2e38a2a981b2 ("thermal/core: Add a generic thermal_zone_set_trip() function")
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: 6.3+ <stable@vger.kernel.org> # 6.3+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thermal/thermal_sysfs.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
+index 6c20c9f90a05..4e6a97db894e 100644
+--- a/drivers/thermal/thermal_sysfs.c
++++ b/drivers/thermal/thermal_sysfs.c
+@@ -185,9 +185,6 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
+ if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip_id) != 1)
+ return -EINVAL;
+
+- if (kstrtoint(buf, 10, &trip.hysteresis))
+- return -EINVAL;
+-
+ mutex_lock(&tz->lock);
+
+ if (!device_is_registered(dev)) {
+@@ -198,7 +195,11 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
+ ret = __thermal_zone_get_trip(tz, trip_id, &trip);
+ if (ret)
+ goto unlock;
+-
++
++ ret = kstrtoint(buf, 10, &trip.hysteresis);
++ if (ret)
++ goto unlock;
++
+ ret = thermal_zone_set_trip(tz, trip_id, &trip);
+ unlock:
+ mutex_unlock(&tz->lock);
+--
+2.42.0
+
--- /dev/null
+From 1a6a464774947920dcedcf7409be62495c7cedd0 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Tue, 12 Sep 2023 12:44:06 +0200
+Subject: timers: Tag (hr)timer softirq as hotplug safe
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit 1a6a464774947920dcedcf7409be62495c7cedd0 upstream.
+
+Specific stress involving frequent CPU-hotplug operations, such as
+running rcutorture for example, may trigger the following message:
+
+ NOHZ tick-stop error: local softirq work is pending, handler #02!!!"
+
+This happens in the CPU-down hotplug process, after
+CPUHP_AP_SMPBOOT_THREADS whose teardown callback parks ksoftirqd, and
+before the target CPU shuts down through CPUHP_AP_IDLE_DEAD. In this
+fragile intermediate state, softirqs waiting for threaded handling may be
+forever ignored and eventually reported by the idle task as in the above
+example.
+
+However some vectors are known to be safe as long as the corresponding
+subsystems have teardown callbacks handling the migration of their
+events. The above error message reports pending timers softirq although
+this vector can be considered as hotplug safe because the
+CPUHP_TIMERS_PREPARE teardown callback performs the necessary migration
+of timers after the death of the CPU. Hrtimers also have a similar
+hotplug handling.
+
+Therefore this error message, as far as (hr-)timers are concerned, can
+be considered spurious and the relevant softirq vectors can be marked as
+hotplug safe.
+
+Fixes: 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle")
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230912104406.312185-6-frederic@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/interrupt.h | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -569,8 +569,12 @@ enum
+ * 2) rcu_report_dead() reports the final quiescent states.
+ *
+ * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
++ *
++ * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue
+ */
+-#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ))
++#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\
++ BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ))
++
+
+ /* map softirq index to softirq name. update 'softirq_to_name' in
+ * kernel/softirq.c when adding a new softirq.
--- /dev/null
+From 2de9ee94054263940122aee8720e902b30c27930 Mon Sep 17 00:00:00 2001
+From: Beau Belgrave <beaub@linux.microsoft.com>
+Date: Mon, 25 Sep 2023 23:08:28 +0000
+Subject: tracing/user_events: Align set_bit() address for all archs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Beau Belgrave <beaub@linux.microsoft.com>
+
+commit 2de9ee94054263940122aee8720e902b30c27930 upstream.
+
+All architectures should use a long aligned address passed to set_bit().
+User processes can pass either a 32-bit or 64-bit sized value to be
+updated when tracing is enabled when on a 64-bit kernel. Both cases are
+ensured to be naturally aligned, however, that is not enough. The
+address must be long aligned without affecting checks on the value
+within the user process which require different adjustments for the bit
+for little and big endian CPUs.
+
+Add a compat flag to user_event_enabler that indicates when a 32-bit
+value is being used on a 64-bit kernel. Long align addresses and correct
+the bit to be used by set_bit() to account for this alignment. Ensure
+compat flags are copied during forks and used during deletion clears.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20230925230829.341-2-beaub@linux.microsoft.com
+Link: https://lore.kernel.org/linux-trace-kernel/20230914131102.179100-1-cleger@rivosinc.com/
+
+Cc: stable@vger.kernel.org
+Fixes: 7235759084a4 ("tracing/user_events: Use remote writes for event enablement")
+Reported-by: Clément Léger <cleger@rivosinc.com>
+Suggested-by: Clément Léger <cleger@rivosinc.com>
+Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_user.c | 58 ++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 51 insertions(+), 7 deletions(-)
+
+--- a/kernel/trace/trace_events_user.c
++++ b/kernel/trace/trace_events_user.c
+@@ -127,8 +127,13 @@ struct user_event_enabler {
+ /* Bit 7 is for freeing status of enablement */
+ #define ENABLE_VAL_FREEING_BIT 7
+
+-/* Only duplicate the bit value */
+-#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK
++/* Bit 8 is for marking 32-bit on 64-bit */
++#define ENABLE_VAL_32_ON_64_BIT 8
++
++#define ENABLE_VAL_COMPAT_MASK (1 << ENABLE_VAL_32_ON_64_BIT)
++
++/* Only duplicate the bit and compat values */
++#define ENABLE_VAL_DUP_MASK (ENABLE_VAL_BIT_MASK | ENABLE_VAL_COMPAT_MASK)
+
+ #define ENABLE_BITOPS(e) (&(e)->values)
+
+@@ -174,6 +179,30 @@ struct user_event_validator {
+ int flags;
+ };
+
++static inline void align_addr_bit(unsigned long *addr, int *bit,
++ unsigned long *flags)
++{
++ if (IS_ALIGNED(*addr, sizeof(long))) {
++#ifdef __BIG_ENDIAN
++ /* 32 bit on BE 64 bit requires a 32 bit offset when aligned. */
++ if (test_bit(ENABLE_VAL_32_ON_64_BIT, flags))
++ *bit += 32;
++#endif
++ return;
++ }
++
++ *addr = ALIGN_DOWN(*addr, sizeof(long));
++
++ /*
++ * We only support 32 and 64 bit values. The only time we need
++ * to align is a 32 bit value on a 64 bit kernel, which on LE
++ * is always 32 bits, and on BE requires no change when unaligned.
++ */
++#ifdef __LITTLE_ENDIAN
++ *bit += 32;
++#endif
++}
++
+ typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
+ void *tpdata, bool *faulted);
+
+@@ -482,6 +511,7 @@ static int user_event_enabler_write(stru
+ unsigned long *ptr;
+ struct page *page;
+ void *kaddr;
++ int bit = ENABLE_BIT(enabler);
+ int ret;
+
+ lockdep_assert_held(&event_mutex);
+@@ -497,6 +527,8 @@ static int user_event_enabler_write(stru
+ test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))))
+ return -EBUSY;
+
++ align_addr_bit(&uaddr, &bit, ENABLE_BITOPS(enabler));
++
+ ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT,
+ &page, NULL);
+
+@@ -515,9 +547,9 @@ static int user_event_enabler_write(stru
+
+ /* Update bit atomically, user tracers must be atomic as well */
+ if (enabler->event && enabler->event->status)
+- set_bit(ENABLE_BIT(enabler), ptr);
++ set_bit(bit, ptr);
+ else
+- clear_bit(ENABLE_BIT(enabler), ptr);
++ clear_bit(bit, ptr);
+
+ kunmap_local(kaddr);
+ unpin_user_pages_dirty_lock(&page, 1, true);
+@@ -849,6 +881,12 @@ static struct user_event_enabler
+ enabler->event = user;
+ enabler->addr = uaddr;
+ enabler->values = reg->enable_bit;
++
++#if BITS_PER_LONG >= 64
++ if (reg->enable_size == 4)
++ set_bit(ENABLE_VAL_32_ON_64_BIT, ENABLE_BITOPS(enabler));
++#endif
++
+ retry:
+ /* Prevents state changes from racing with new enablers */
+ mutex_lock(&event_mutex);
+@@ -2376,7 +2414,8 @@ static long user_unreg_get(struct user_u
+ }
+
+ static int user_event_mm_clear_bit(struct user_event_mm *user_mm,
+- unsigned long uaddr, unsigned char bit)
++ unsigned long uaddr, unsigned char bit,
++ unsigned long flags)
+ {
+ struct user_event_enabler enabler;
+ int result;
+@@ -2384,7 +2423,7 @@ static int user_event_mm_clear_bit(struc
+
+ memset(&enabler, 0, sizeof(enabler));
+ enabler.addr = uaddr;
+- enabler.values = bit;
++ enabler.values = bit | flags;
+ retry:
+ /* Prevents state changes from racing with new enablers */
+ mutex_lock(&event_mutex);
+@@ -2414,6 +2453,7 @@ static long user_events_ioctl_unreg(unsi
+ struct user_event_mm *mm = current->user_event_mm;
+ struct user_event_enabler *enabler, *next;
+ struct user_unreg reg;
++ unsigned long flags;
+ long ret;
+
+ ret = user_unreg_get(ureg, ®);
+@@ -2424,6 +2464,7 @@ static long user_events_ioctl_unreg(unsi
+ if (!mm)
+ return -ENOENT;
+
++ flags = 0;
+ ret = -ENOENT;
+
+ /*
+@@ -2440,6 +2481,9 @@ static long user_events_ioctl_unreg(unsi
+ ENABLE_BIT(enabler) == reg.disable_bit) {
+ set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
+
++ /* We must keep compat flags for the clear */
++ flags |= enabler->values & ENABLE_VAL_COMPAT_MASK;
++
+ if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))
+ user_event_enabler_destroy(enabler, true);
+
+@@ -2453,7 +2497,7 @@ static long user_events_ioctl_unreg(unsi
+ /* Ensure bit is now cleared for user, regardless of event status */
+ if (!ret)
+ ret = user_event_mm_clear_bit(mm, reg.disable_addr,
+- reg.disable_bit);
++ reg.disable_bit, flags);
+
+ return ret;
+ }