--- /dev/null
+From 71bf41b8e913ec9fc91f0d39ab8fb320229ec604 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 12 Aug 2024 15:16:21 +0200
+Subject: ACPI: EC: Evaluate _REG outside the EC scope more carefully
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 71bf41b8e913ec9fc91f0d39ab8fb320229ec604 upstream.
+
+Commit 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the
+namespace root") caused _REG methods for EC operation regions outside
+the EC device scope to be evaluated which on some systems leads to the
+evaluation of _REG methods in the scopes of device objects representing
+devices that are not present and not functional according to the _STA
+return values. Some of those device objects represent EC "alternatives"
+and if _REG is evaluated for their operation regions, the platform
+firmware may be confused and the platform may start to behave
+incorrectly.
+
+To avoid this problem, only evaluate _REG for EC operation regions
+located in the scopes of device objects representing known-to-be-present
+devices.
+
+For this purpose, partially revert commit 60fa6ae6e6d0 and trigger the
+evaluation of _REG for EC operation regions from acpi_bus_attach() for
+the known-valid devices.
+
+Fixes: 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root")
+Link: https://lore.kernel.org/linux-acpi/1f76b7e2-1928-4598-8037-28a1785c2d13@redhat.com
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2298938
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2302253
+Reported-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Cc: All applicable <stable@vger.kernel.org>
+Link: https://patch.msgid.link/23612351.6Emhk5qWAg@rjwysocki.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/ec.c | 11 +++++++++--
+ drivers/acpi/internal.h | 1 +
+ drivers/acpi/scan.c | 2 ++
+ 3 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -1487,12 +1487,13 @@ static bool install_gpio_irq_event_handl
+ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device,
+ bool call_reg)
+ {
+- acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle;
+ acpi_status status;
+
+ acpi_ec_start(ec, false);
+
+ if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
++ acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle;
++
+ acpi_ec_enter_noirq(ec);
+ status = acpi_install_address_space_handler_no_reg(scope_handle,
+ ACPI_ADR_SPACE_EC,
+@@ -1506,7 +1507,7 @@ static int ec_install_handlers(struct ac
+ }
+
+ if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) {
+- acpi_execute_reg_methods(scope_handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC);
++ acpi_execute_reg_methods(ec->handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC);
+ set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags);
+ }
+
+@@ -1721,6 +1722,12 @@ static void acpi_ec_remove(struct acpi_d
+ }
+ }
+
++void acpi_ec_register_opregions(struct acpi_device *adev)
++{
++ if (first_ec && first_ec->handle != adev->handle)
++ acpi_execute_reg_methods(adev->handle, 1, ACPI_ADR_SPACE_EC);
++}
++
+ static acpi_status
+ ec_parse_io_ports(struct acpi_resource *resource, void *context)
+ {
+--- a/drivers/acpi/internal.h
++++ b/drivers/acpi/internal.h
+@@ -223,6 +223,7 @@ int acpi_ec_add_query_handler(struct acp
+ acpi_handle handle, acpi_ec_query_func func,
+ void *data);
+ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
++void acpi_ec_register_opregions(struct acpi_device *adev);
+
+ #ifdef CONFIG_PM_SLEEP
+ void acpi_ec_flush_work(void);
+--- a/drivers/acpi/scan.c
++++ b/drivers/acpi/scan.c
+@@ -2264,6 +2264,8 @@ static int acpi_bus_attach(struct acpi_d
+ if (device->handler)
+ goto ok;
+
++ acpi_ec_register_opregions(device);
++
+ if (!device->flags.initialized) {
+ device->flags.power_manageable =
+ device->power.states[ACPI_STATE_D0].flags.valid;
--- /dev/null
+From cdf65d73e001fde600b18d7e45afadf559425ce5 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 12 Aug 2024 15:11:42 +0200
+Subject: ACPICA: Add a depth argument to acpi_execute_reg_methods()
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit cdf65d73e001fde600b18d7e45afadf559425ce5 upstream.
+
+A subsequent change will need to pass a depth argument to
+acpi_execute_reg_methods(), so prepare that function for it.
+
+No intentional functional changes.
+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Cc: All applicable <stable@vger.kernel.org>
+Link: https://patch.msgid.link/8451567.NyiUUSuA9g@rjwysocki.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/acpica/acevents.h | 2 +-
+ drivers/acpi/acpica/evregion.c | 6 ++++--
+ drivers/acpi/acpica/evxfregn.c | 10 +++++++---
+ drivers/acpi/ec.c | 2 +-
+ include/acpi/acpixf.h | 1 +
+ 5 files changed, 14 insertions(+), 7 deletions(-)
+
+--- a/drivers/acpi/acpica/acevents.h
++++ b/drivers/acpi/acpica/acevents.h
+@@ -188,7 +188,7 @@ acpi_ev_detach_region(union acpi_operand
+ u8 acpi_ns_is_locked);
+
+ void
+-acpi_ev_execute_reg_methods(struct acpi_namespace_node *node,
++acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth,
+ acpi_adr_space_type space_id, u32 function);
+
+ acpi_status
+--- a/drivers/acpi/acpica/evregion.c
++++ b/drivers/acpi/acpica/evregion.c
+@@ -65,6 +65,7 @@ acpi_status acpi_ev_initialize_op_region
+ acpi_gbl_default_address_spaces
+ [i])) {
+ acpi_ev_execute_reg_methods(acpi_gbl_root_node,
++ ACPI_UINT32_MAX,
+ acpi_gbl_default_address_spaces
+ [i], ACPI_REG_CONNECT);
+ }
+@@ -672,6 +673,7 @@ cleanup1:
+ * FUNCTION: acpi_ev_execute_reg_methods
+ *
+ * PARAMETERS: node - Namespace node for the device
++ * max_depth - Depth to which search for _REG
+ * space_id - The address space ID
+ * function - Passed to _REG: On (1) or Off (0)
+ *
+@@ -683,7 +685,7 @@ cleanup1:
+ ******************************************************************************/
+
+ void
+-acpi_ev_execute_reg_methods(struct acpi_namespace_node *node,
++acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth,
+ acpi_adr_space_type space_id, u32 function)
+ {
+ struct acpi_reg_walk_info info;
+@@ -717,7 +719,7 @@ acpi_ev_execute_reg_methods(struct acpi_
+ * regions and _REG methods. (i.e. handlers must be installed for all
+ * regions of this Space ID before we can run any _REG methods)
+ */
+- (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX,
++ (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, max_depth,
+ ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL,
+ &info, NULL);
+
+--- a/drivers/acpi/acpica/evxfregn.c
++++ b/drivers/acpi/acpica/evxfregn.c
+@@ -85,7 +85,8 @@ acpi_install_address_space_handler_inter
+ /* Run all _REG methods for this address space */
+
+ if (run_reg) {
+- acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT);
++ acpi_ev_execute_reg_methods(node, ACPI_UINT32_MAX, space_id,
++ ACPI_REG_CONNECT);
+ }
+
+ unlock_and_exit:
+@@ -263,6 +264,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_s
+ * FUNCTION: acpi_execute_reg_methods
+ *
+ * PARAMETERS: device - Handle for the device
++ * max_depth - Depth to which search for _REG
+ * space_id - The address space ID
+ *
+ * RETURN: Status
+@@ -271,7 +273,8 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_s
+ *
+ ******************************************************************************/
+ acpi_status
+-acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)
++acpi_execute_reg_methods(acpi_handle device, u32 max_depth,
++ acpi_adr_space_type space_id)
+ {
+ struct acpi_namespace_node *node;
+ acpi_status status;
+@@ -296,7 +299,8 @@ acpi_execute_reg_methods(acpi_handle dev
+
+ /* Run all _REG methods for this address space */
+
+- acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT);
++ acpi_ev_execute_reg_methods(node, max_depth, space_id,
++ ACPI_REG_CONNECT);
+ } else {
+ status = AE_BAD_PARAMETER;
+ }
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -1506,7 +1506,7 @@ static int ec_install_handlers(struct ac
+ }
+
+ if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) {
+- acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC);
++ acpi_execute_reg_methods(scope_handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC);
+ set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags);
+ }
+
+--- a/include/acpi/acpixf.h
++++ b/include/acpi/acpixf.h
+@@ -660,6 +660,7 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+ void *context))
+ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+ acpi_execute_reg_methods(acpi_handle device,
++ u32 nax_depth,
+ acpi_adr_space_type
+ space_id))
+ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
--- /dev/null
+From a8fc28dad6d574582cdf2f7e78c73c59c623df30 Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Tue, 13 Aug 2024 08:07:56 -0700
+Subject: alloc_tag: introduce clear_page_tag_ref() helper function
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit a8fc28dad6d574582cdf2f7e78c73c59c623df30 upstream.
+
+In several cases we are freeing pages which were not allocated using
+common page allocators. For such cases, in order to keep allocation
+accounting correct, we should clear the page tag to indicate that the page
+being freed is expected to not have a valid allocation tag. Introduce
+clear_page_tag_ref() helper function to be used for this.
+
+Link: https://lkml.kernel.org/r/20240813150758.855881-1-surenb@google.com
+Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty")
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kent Overstreet <kent.overstreet@linux.dev>
+Cc: Sourav Panda <souravpanda@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org> [6.10]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pgalloc_tag.h | 13 +++++++++++++
+ mm/mm_init.c | 10 +---------
+ mm/page_alloc.c | 9 +--------
+ 3 files changed, 15 insertions(+), 17 deletions(-)
+
+--- a/include/linux/pgalloc_tag.h
++++ b/include/linux/pgalloc_tag.h
+@@ -43,6 +43,18 @@ static inline void put_page_tag_ref(unio
+ page_ext_put(page_ext_from_codetag_ref(ref));
+ }
+
++static inline void clear_page_tag_ref(struct page *page)
++{
++ if (mem_alloc_profiling_enabled()) {
++ union codetag_ref *ref = get_page_tag_ref(page);
++
++ if (ref) {
++ set_codetag_empty(ref);
++ put_page_tag_ref(ref);
++ }
++ }
++}
++
+ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+ unsigned int nr)
+ {
+@@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages
+
+ static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
+ static inline void put_page_tag_ref(union codetag_ref *ref) {}
++static inline void clear_page_tag_ref(struct page *page) {}
+ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+ unsigned int nr) {}
+ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
+--- a/mm/mm_init.c
++++ b/mm/mm_init.c
+@@ -2507,15 +2507,7 @@ void __init memblock_free_pages(struct p
+ }
+
+ /* pages were reserved and not allocated */
+- if (mem_alloc_profiling_enabled()) {
+- union codetag_ref *ref = get_page_tag_ref(page);
+-
+- if (ref) {
+- set_codetag_empty(ref);
+- put_page_tag_ref(ref);
+- }
+- }
+-
++ clear_page_tag_ref(page);
+ __free_pages_core(page, order);
+ }
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5806,14 +5806,7 @@ unsigned long free_reserved_area(void *s
+
+ void free_reserved_page(struct page *page)
+ {
+- if (mem_alloc_profiling_enabled()) {
+- union codetag_ref *ref = get_page_tag_ref(page);
+-
+- if (ref) {
+- set_codetag_empty(ref);
+- put_page_tag_ref(ref);
+- }
+- }
++ clear_page_tag_ref(page);
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
--- /dev/null
+From 766c163c2068b45330664fb67df67268e588a22d Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Tue, 13 Aug 2024 08:07:57 -0700
+Subject: alloc_tag: mark pages reserved during CMA activation as not tagged
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit 766c163c2068b45330664fb67df67268e588a22d upstream.
+
+During CMA activation, pages in CMA area are prepared and then freed
+without being allocated. This triggers warnings when memory allocation
+debug config (CONFIG_MEM_ALLOC_PROFILING_DEBUG) is enabled. Fix this by
+marking these pages not tagged before freeing them.
+
+Link: https://lkml.kernel.org/r/20240813150758.855881-2-surenb@google.com
+Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty")
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kent Overstreet <kent.overstreet@linux.dev>
+Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: Sourav Panda <souravpanda@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org> [6.10]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mm_init.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/mm_init.c
++++ b/mm/mm_init.c
+@@ -2293,6 +2293,8 @@ void __init init_cma_reserved_pageblock(
+
+ set_pageblock_migratetype(page, MIGRATE_CMA);
+ set_page_refcounted(page);
++ /* pages were reserved and not allocated */
++ clear_page_tag_ref(page);
+ __free_pages(page, pageblock_order);
+
+ adjust_managed_page_count(page, pageblock_nr_pages);
--- /dev/null
+From 3beddef84d90590270465a907de1cfe2539ac70d Mon Sep 17 00:00:00 2001
+From: Baojun Xu <baojun.xu@ti.com>
+Date: Tue, 13 Aug 2024 12:37:48 +0800
+Subject: ALSA: hda/tas2781: fix wrong calibrated data order
+
+From: Baojun Xu <baojun.xu@ti.com>
+
+commit 3beddef84d90590270465a907de1cfe2539ac70d upstream.
+
+Wrong calibration data order cause sound too low in some device.
+Fix wrong calibrated data order, add calibration data converssion
+by get_unaligned_be32() after reading from UEFI.
+
+Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Baojun Xu <baojun.xu@ti.com>
+Link: https://patch.msgid.link/20240813043749.108-1-shenghao-ding@ti.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/pci/hda/tas2781_hda_i2c.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/sound/pci/hda/tas2781_hda_i2c.c
++++ b/sound/pci/hda/tas2781_hda_i2c.c
+@@ -2,10 +2,12 @@
+ //
+ // TAS2781 HDA I2C driver
+ //
+-// Copyright 2023 Texas Instruments, Inc.
++// Copyright 2023 - 2024 Texas Instruments, Inc.
+ //
+ // Author: Shenghao Ding <shenghao-ding@ti.com>
++// Current maintainer: Baojun Xu <baojun.xu@ti.com>
+
++#include <asm/unaligned.h>
+ #include <linux/acpi.h>
+ #include <linux/crc8.h>
+ #include <linux/crc32.h>
+@@ -519,20 +521,22 @@ static void tas2781_apply_calib(struct t
+ static const unsigned char rgno_array[CALIB_MAX] = {
+ 0x74, 0x0c, 0x14, 0x70, 0x7c,
+ };
+- unsigned char *data;
++ int offset = 0;
+ int i, j, rc;
++ __be32 data;
+
+ for (i = 0; i < tas_priv->ndev; i++) {
+- data = tas_priv->cali_data.data +
+- i * TASDEVICE_SPEAKER_CALIBRATION_SIZE;
+ for (j = 0; j < CALIB_MAX; j++) {
++ data = get_unaligned_be32(
++ &tas_priv->cali_data.data[offset]);
+ rc = tasdevice_dev_bulk_write(tas_priv, i,
+ TASDEVICE_REG(0, page_array[j], rgno_array[j]),
+- &(data[4 * j]), 4);
++ (unsigned char *)&data, 4);
+ if (rc < 0)
+ dev_err(tas_priv->dev,
+ "chn %d calib %d bulk_wr err = %d\n",
+ i, j, rc);
++ offset += 4;
+ }
+ }
+ }
--- /dev/null
+From ccbfcac05866ebe6eb3bc6d07b51d4ed4fcde436 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Sat, 10 Aug 2024 10:48:32 +0200
+Subject: ALSA: timer: Relax start tick time check for slave timer elements
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit ccbfcac05866ebe6eb3bc6d07b51d4ed4fcde436 upstream.
+
+The recent addition of a sanity check for a too low start tick time
+seems breaking some applications that uses aloop with a certain slave
+timer setup. They may have the initial resolution 0, hence it's
+treated as if it were a too low value.
+
+Relax and skip the check for the slave timer instance for addressing
+the regression.
+
+Fixes: 4a63bd179fa8 ("ALSA: timer: Set lower bound of start tick time")
+Cc: <stable@vger.kernel.org>
+Link: https://github.com/raspberrypi/linux/issues/6294
+Link: https://patch.msgid.link/20240810084833.10939-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/core/timer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/sound/core/timer.c
++++ b/sound/core/timer.c
+@@ -547,7 +547,7 @@ static int snd_timer_start1(struct snd_t
+ /* check the actual time for the start tick;
+ * bail out as error if it's way too low (< 100us)
+ */
+- if (start) {
++ if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) {
+ if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000)
+ return -EINVAL;
+ }
--- /dev/null
+From 004eb8ba776ccd3e296ea6f78f7ae7985b12824e Mon Sep 17 00:00:00 2001
+From: Lianqin Hu <hulianqin@vivo.com>
+Date: Sun, 11 Aug 2024 08:30:11 +0000
+Subject: ALSA: usb-audio: Add delay quirk for VIVO USB-C-XE710 HEADSET
+
+From: Lianqin Hu <hulianqin@vivo.com>
+
+commit 004eb8ba776ccd3e296ea6f78f7ae7985b12824e upstream.
+
+Audio control requests that sets sampling frequency sometimes fail on
+this card. Adding delay between control messages eliminates that problem.
+
+Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/TYUPR06MB6217FF67076AF3E49E12C877D2842@TYUPR06MB6217.apcprd06.prod.outlook.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/usb/quirks.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -2221,6 +2221,8 @@ static const struct usb_audio_quirk_flag
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++ DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */
++ QUIRK_FLAG_CTL_MSG_DELAY_1M),
+ DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */
+ QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */
--- /dev/null
+From c286f204ce6ba7b48e3dcba53eda7df8eaa64dd9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Juan=20Jos=C3=A9=20Arboleda?= <soyjuanarbol@gmail.com>
+Date: Tue, 13 Aug 2024 11:10:53 -0500
+Subject: ALSA: usb-audio: Support Yamaha P-125 quirk entry
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Juan José Arboleda <soyjuanarbol@gmail.com>
+
+commit c286f204ce6ba7b48e3dcba53eda7df8eaa64dd9 upstream.
+
+This patch adds a USB quirk for the Yamaha P-125 digital piano.
+
+Signed-off-by: Juan José Arboleda <soyjuanarbol@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/20240813161053.70256-1-soyjuanarbol@gmail.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/usb/quirks-table.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/usb/quirks-table.h
++++ b/sound/usb/quirks-table.h
+@@ -273,6 +273,7 @@ YAMAHA_DEVICE(0x105a, NULL),
+ YAMAHA_DEVICE(0x105b, NULL),
+ YAMAHA_DEVICE(0x105c, NULL),
+ YAMAHA_DEVICE(0x105d, NULL),
++YAMAHA_DEVICE(0x1718, "P-125"),
+ {
+ USB_DEVICE(0x0499, 0x1503),
+ .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
--- /dev/null
+From a21dcf0ea8566ebbe011c79d6ed08cdfea771de3 Mon Sep 17 00:00:00 2001
+From: Haibo Xu <haibo1.xu@intel.com>
+Date: Mon, 5 Aug 2024 11:30:24 +0800
+Subject: arm64: ACPI: NUMA: initialize all values of acpi_early_node_map to NUMA_NO_NODE
+
+From: Haibo Xu <haibo1.xu@intel.com>
+
+commit a21dcf0ea8566ebbe011c79d6ed08cdfea771de3 upstream.
+
+Currently, only acpi_early_node_map[0] was initialized to NUMA_NO_NODE.
+To ensure all the values were properly initialized, switch to initialize
+all of them to NUMA_NO_NODE.
+
+Fixes: e18962491696 ("arm64: numa: rework ACPI NUMA initialization")
+Cc: <stable@vger.kernel.org> # 4.19.x
+Reported-by: Andrew Jones <ajones@ventanamicro.com>
+Suggested-by: Andrew Jones <ajones@ventanamicro.com>
+Signed-off-by: Haibo Xu <haibo1.xu@intel.com>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Reviewed-by: Sunil V L <sunilvl@ventanamicro.com>
+Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
+Acked-by: Catalin Marinas <catalin.marinas@arm.com>
+Acked-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
+Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
+Link: https://lore.kernel.org/r/853d7f74aa243f6f5999e203246f0d1ae92d2b61.1722828421.git.haibo1.xu@intel.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/acpi_numa.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/kernel/acpi_numa.c
++++ b/arch/arm64/kernel/acpi_numa.c
+@@ -27,7 +27,7 @@
+
+ #include <asm/numa.h>
+
+-static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE };
++static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE };
+
+ int __init acpi_numa_get_nid(unsigned int cpu)
+ {
--- /dev/null
+From 42fac187b5c746227c92d024f1caf33bc1d337e4 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 11 Apr 2024 16:41:20 -0400
+Subject: btrfs: check delayed refs when we're checking if a ref exists
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 42fac187b5c746227c92d024f1caf33bc1d337e4 upstream.
+
+In the patch 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete
+resume") I added some code to handle file systems that had been
+corrupted by a bug that incorrectly skipped updating the drop progress
+key while dropping a snapshot. This code would check to see if we had
+already deleted our reference for a child block, and skip the deletion
+if we had already.
+
+Unfortunately there is a bug, as the check would only check the on-disk
+references. I made an incorrect assumption that blocks in an already
+deleted snapshot that was having the deletion resume on mount wouldn't
+be modified.
+
+If we have 2 pending deleted snapshots that share blocks, we can easily
+modify the rules for a block. Take the following example
+
+subvolume a exists, and subvolume b is a snapshot of subvolume a. They
+share references to block 1. Block 1 will have 2 full references, one
+for subvolume a and one for subvolume b, and it belongs to subvolume a
+(btrfs_header_owner(block 1) == subvolume a).
+
+When deleting subvolume a, we will drop our full reference for block 1,
+and because we are the owner we will drop our full reference for all of
+block 1's children, convert block 1 to FULL BACKREF, and add a shared
+reference to all of block 1's children.
+
+Then we will start the snapshot deletion of subvolume b. We look up the
+extent info for block 1, which checks delayed refs and tells us that
+FULL BACKREF is set, so sets parent to the bytenr of block 1. However
+because this is a resumed snapshot deletion, we call into
+check_ref_exists(). Because check_ref_exists() only looks at the disk,
+it doesn't find the shared backref for the child of block 1, and thus
+returns 0 and we skip deleting the reference for the child of block 1
+and continue. This orphans the child of block 1.
+
+The fix is to lookup the delayed refs, similar to what we do in
+btrfs_lookup_extent_info(). However we only care about whether the
+reference exists or not. If we fail to find our reference on disk, go
+look up the bytenr in the delayed refs, and if it exists look for an
+existing ref in the delayed ref head. If that exists then we know we
+can delete the reference safely and carry on. If it doesn't exist we
+know we have to skip over this block.
+
+This bug has existed since I introduced this fix, however requires
+having multiple deleted snapshots pending when we unmount. We noticed
+this in production because our shutdown path stops the container on the
+system, which deletes a bunch of subvolumes, and then reboots the box.
+This gives us plenty of opportunities to hit this issue. Looking at the
+history we've seen this occasionally in production, but we had a big
+spike recently thanks to faster machines getting jobs with multiple
+subvolumes in the job.
+
+Chris Mason wrote a reproducer which does the following
+
+mount /dev/nvme4n1 /btrfs
+btrfs subvol create /btrfs/s1
+simoop -E -f 4k -n 200000 -z /btrfs/s1
+while(true) ; do
+ btrfs subvol snap /btrfs/s1 /btrfs/s2
+ simoop -f 4k -n 200000 -r 10 -z /btrfs/s2
+ btrfs subvol snap /btrfs/s2 /btrfs/s3
+ btrfs balance start -dusage=80 /btrfs
+ btrfs subvol del /btrfs/s2 /btrfs/s3
+ umount /btrfs
+ btrfsck /dev/nvme4n1 || exit 1
+ mount /dev/nvme4n1 /btrfs
+done
+
+On the second loop this would fail consistently, with my patch it has
+been running for hours and hasn't failed.
+
+I also used dm-log-writes to capture the state of the failure so I could
+debug the problem. Using the existing failure case to test my patch
+validated that it fixes the problem.
+
+Fixes: 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete resume")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/delayed-ref.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/delayed-ref.h | 2 +
+ fs/btrfs/extent-tree.c | 51 ++++++++++++++++++++++++++++++++-----
+ 3 files changed, 114 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/delayed-ref.c
++++ b/fs/btrfs/delayed-ref.c
+@@ -1169,6 +1169,73 @@ btrfs_find_delayed_ref_head(struct btrfs
+ return find_ref_head(delayed_refs, bytenr, false);
+ }
+
++static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent)
++{
++ int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY;
++
++ if (type < entry->type)
++ return -1;
++ if (type > entry->type)
++ return 1;
++
++ if (type == BTRFS_TREE_BLOCK_REF_KEY) {
++ if (root < entry->ref_root)
++ return -1;
++ if (root > entry->ref_root)
++ return 1;
++ } else {
++ if (parent < entry->parent)
++ return -1;
++ if (parent > entry->parent)
++ return 1;
++ }
++ return 0;
++}
++
++/*
++ * Check to see if a given root/parent reference is attached to the head. This
++ * only checks for BTRFS_ADD_DELAYED_REF references that match, as that
++ * indicates the reference exists for the given root or parent. This is for
++ * tree blocks only.
++ *
++ * @head: the head of the bytenr we're searching.
++ * @root: the root objectid of the reference if it is a normal reference.
++ * @parent: the parent if this is a shared backref.
++ */
++bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
++ u64 root, u64 parent)
++{
++ struct rb_node *node;
++ bool found = false;
++
++ lockdep_assert_held(&head->mutex);
++
++ spin_lock(&head->lock);
++ node = head->ref_tree.rb_root.rb_node;
++ while (node) {
++ struct btrfs_delayed_ref_node *entry;
++ int ret;
++
++ entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
++ ret = find_comp(entry, root, parent);
++ if (ret < 0) {
++ node = node->rb_left;
++ } else if (ret > 0) {
++ node = node->rb_right;
++ } else {
++ /*
++ * We only want to count ADD actions, as drops mean the
++ * ref doesn't exist.
++ */
++ if (entry->action == BTRFS_ADD_DELAYED_REF)
++ found = true;
++ break;
++ }
++ }
++ spin_unlock(&head->lock);
++ return found;
++}
++
+ void __cold btrfs_delayed_ref_exit(void)
+ {
+ kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
+--- a/fs/btrfs/delayed-ref.h
++++ b/fs/btrfs/delayed-ref.h
+@@ -389,6 +389,8 @@ int btrfs_delayed_refs_rsv_refill(struct
+ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+ u64 num_bytes);
+ bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
++bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
++ u64 root, u64 parent);
+
+ static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
+ {
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -5387,23 +5387,62 @@ static int check_ref_exists(struct btrfs
+ struct btrfs_root *root, u64 bytenr, u64 parent,
+ int level)
+ {
++ struct btrfs_delayed_ref_root *delayed_refs;
++ struct btrfs_delayed_ref_head *head;
+ struct btrfs_path *path;
+ struct btrfs_extent_inline_ref *iref;
+ int ret;
++ bool exists = false;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+-
++again:
+ ret = lookup_extent_backref(trans, path, &iref, bytenr,
+ root->fs_info->nodesize, parent,
+ btrfs_root_id(root), level, 0);
++ if (ret != -ENOENT) {
++ /*
++ * If we get 0 then we found our reference, return 1, else
++ * return the error if it's not -ENOENT;
++ */
++ btrfs_free_path(path);
++ return (ret < 0 ) ? ret : 1;
++ }
++
++ /*
++ * We could have a delayed ref with this reference, so look it up while
++ * we're holding the path open to make sure we don't race with the
++ * delayed ref running.
++ */
++ delayed_refs = &trans->transaction->delayed_refs;
++ spin_lock(&delayed_refs->lock);
++ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
++ if (!head)
++ goto out;
++ if (!mutex_trylock(&head->mutex)) {
++ /*
++ * We're contended, means that the delayed ref is running, get a
++ * reference and wait for the ref head to be complete and then
++ * try again.
++ */
++ refcount_inc(&head->refs);
++ spin_unlock(&delayed_refs->lock);
++
++ btrfs_release_path(path);
++
++ mutex_lock(&head->mutex);
++ mutex_unlock(&head->mutex);
++ btrfs_put_delayed_ref_head(head);
++ goto again;
++ }
++
++ exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent);
++ mutex_unlock(&head->mutex);
++out:
++ spin_unlock(&delayed_refs->lock);
+ btrfs_free_path(path);
+- if (ret == -ENOENT)
+- return 0;
+- if (ret < 0)
+- return ret;
+- return 1;
++ return exists ? 1 : 0;
+ }
+
+ /*
--- /dev/null
+From 534f7eff9239c1b0af852fc33f5af2b62c00eddf Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 16 Aug 2024 10:40:38 +0930
+Subject: btrfs: only enable extent map shrinker for DEBUG builds
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 534f7eff9239c1b0af852fc33f5af2b62c00eddf upstream.
+
+Although there are several patches improving the extent map shrinker,
+there are still reports of too frequent shrinker behavior, taking too
+much CPU for the kswapd process.
+
+So let's only enable extent shrinker for now, until we got more
+comprehensive understanding and a better solution.
+
+Link: https://lore.kernel.org/linux-btrfs/3df4acd616a07ef4d2dc6bad668701504b412ffc.camel@intelfx.name/
+Link: https://lore.kernel.org/linux-btrfs/c30fd6b3-ca7a-4759-8a53-d42878bf84f7@gmail.com/
+Fixes: 956a17d9d050 ("btrfs: add a shrinker for extent maps")
+CC: stable@vger.kernel.org # 6.10+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/super.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2387,7 +2387,13 @@ static long btrfs_nr_cached_objects(stru
+
+ trace_btrfs_extent_map_shrinker_count(fs_info, nr);
+
+- return nr;
++ /*
++ * Only report the real number for DEBUG builds, as there are reports of
++ * serious performance degradation caused by too frequent shrinks.
++ */
++ if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
++ return nr;
++ return 0;
+ }
+
+ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc)
--- /dev/null
+From ae1e766f623f7a2a889a0b09eb076dd9a60efbe9 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sun, 11 Aug 2024 11:53:42 +0100
+Subject: btrfs: only run the extent map shrinker from kswapd tasks
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit ae1e766f623f7a2a889a0b09eb076dd9a60efbe9 upstream.
+
+Currently the extent map shrinker can be run by any task when attempting
+to allocate memory and there's enough memory pressure to trigger it.
+
+To avoid too much latency we stop iterating over extent maps and removing
+them once the task needs to reschedule. This logic was introduced in commit
+b3ebb9b7e92a ("btrfs: stop extent map shrinker if reschedule is needed").
+
+While that solved high latency problems for some use cases, it's still
+not enough because with a too high number of tasks entering the extent map
+shrinker code, either due to memory allocations or because they are a
+kswapd task, we end up having a very high level of contention on some
+spin locks, namely:
+
+1) The fs_info->fs_roots_radix_lock spin lock, which we need to find
+ roots to iterate over their inodes;
+
+2) The spin lock of the xarray used to track open inodes for a root
+ (struct btrfs_root::inodes) - on 6.10 kernels and below, it used to
+ be a red black tree and the spin lock was root->inode_lock;
+
+3) The fs_info->delayed_iput_lock spin lock since the shrinker adds
+ delayed iputs (calls btrfs_add_delayed_iput()).
+
+Instead of allowing the extent map shrinker to be run by any task, make
+it run only by kswapd tasks. This still solves the problem of running
+into OOM situations due to an unbounded extent map creation, which is
+simple to trigger by direct IO writes, as described in the changelog
+of commit 956a17d9d050 ("btrfs: add a shrinker for extent maps"), and
+by a similar case when doing buffered IO on files with a very large
+number of holes (keeping the file open and creating many holes, whose
+extent maps are only released when the file is closed).
+
+Reported-by: kzd <kzd@56709.net>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219121
+Reported-by: Octavia Togami <octavia.togami@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CAHPNGSSt-a4ZZWrtJdVyYnJFscFjP9S7rMcvEMaNSpR556DdLA@mail.gmail.com/
+Fixes: 956a17d9d050 ("btrfs: add a shrinker for extent maps")
+CC: stable@vger.kernel.org # 6.10+
+Tested-by: kzd <kzd@56709.net>
+Tested-by: Octavia Togami <octavia.togami@gmail.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_map.c | 22 ++++++----------------
+ fs/btrfs/super.c | 10 ++++++++++
+ 2 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -1065,8 +1065,7 @@ static long btrfs_scan_inode(struct btrf
+ return 0;
+
+ /*
+- * We want to be fast because we can be called from any path trying to
+- * allocate memory, so if the lock is busy we don't want to spend time
++ * We want to be fast so if the lock is busy we don't want to spend time
+ * waiting for it - either some task is about to do IO for the inode or
+ * we may have another task shrinking extent maps, here in this code, so
+ * skip this inode.
+@@ -1109,9 +1108,7 @@ next:
+ /*
+ * Stop if we need to reschedule or there's contention on the
+ * lock. This is to avoid slowing other tasks trying to take the
+- * lock and because the shrinker might be called during a memory
+- * allocation path and we want to avoid taking a very long time
+- * and slowing down all sorts of tasks.
++ * lock.
+ */
+ if (need_resched() || rwlock_needbreak(&tree->lock))
+ break;
+@@ -1139,12 +1136,7 @@ static long btrfs_scan_root(struct btrfs
+ if (ctx->scanned >= ctx->nr_to_scan)
+ break;
+
+- /*
+- * We may be called from memory allocation paths, so we don't
+- * want to take too much time and slowdown tasks.
+- */
+- if (need_resched())
+- break;
++ cond_resched();
+
+ inode = btrfs_find_first_inode(root, min_ino);
+ }
+@@ -1202,14 +1194,12 @@ long btrfs_free_extent_maps(struct btrfs
+ ctx.last_ino);
+ }
+
+- /*
+- * We may be called from memory allocation paths, so we don't want to
+- * take too much time and slowdown tasks, so stop if we need reschedule.
+- */
+- while (ctx.scanned < ctx.nr_to_scan && !need_resched()) {
++ while (ctx.scanned < ctx.nr_to_scan) {
+ struct btrfs_root *root;
+ unsigned long count;
+
++ cond_resched();
++
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ count = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)&root,
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -28,6 +28,7 @@
+ #include <linux/btrfs.h>
+ #include <linux/security.h>
+ #include <linux/fs_parser.h>
++#include <linux/swap.h>
+ #include "messages.h"
+ #include "delayed-inode.h"
+ #include "ctree.h"
+@@ -2394,6 +2395,15 @@ static long btrfs_free_cached_objects(st
+ const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan);
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+
++ /*
++ * We may be called from any task trying to allocate memory and we don't
++ * want to slow it down with scanning and dropping extent maps. It would
++ * also cause heavy lock contention if many tasks concurrently enter
++ * here. Therefore only allow kswapd tasks to scan and drop extent maps.
++ */
++ if (!current_is_kswapd())
++ return 0;
++
+ return btrfs_free_extent_maps(fs_info, nr_to_scan);
+ }
+
--- /dev/null
+From 46a6e10a1ab16cc71d4a3cab73e79aabadd6b8ea Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 12 Aug 2024 14:18:06 +0100
+Subject: btrfs: send: allow cloning non-aligned extent if it ends at i_size
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 46a6e10a1ab16cc71d4a3cab73e79aabadd6b8ea upstream.
+
+If we a find that an extent is shared but its end offset is not sector
+size aligned, then we don't clone it and issue write operations instead.
+This is because the reflink (remap_file_range) operation does not allow
+to clone unaligned ranges, except if the end offset of the range matches
+the i_size of the source and destination files (and the start offset is
+sector size aligned).
+
+While this is not incorrect because send can only guarantee that a file
+has the same data in the source and destination snapshots, it's not
+optimal and generates confusion and surprising behaviour for users.
+
+For example, running this test:
+
+ $ cat test.sh
+ #!/bin/bash
+
+ DEV=/dev/sdi
+ MNT=/mnt/sdi
+
+ mkfs.btrfs -f $DEV
+ mount $DEV $MNT
+
+ # Use a file size not aligned to any possible sector size.
+ file_size=$((1 * 1024 * 1024 + 5)) # 1MB + 5 bytes
+ dd if=/dev/random of=$MNT/foo bs=$file_size count=1
+ cp --reflink=always $MNT/foo $MNT/bar
+
+ btrfs subvolume snapshot -r $MNT/ $MNT/snap
+ rm -f /tmp/send-test
+ btrfs send -f /tmp/send-test $MNT/snap
+
+ umount $MNT
+ mkfs.btrfs -f $DEV
+ mount $DEV $MNT
+
+ btrfs receive -vv -f /tmp/send-test $MNT
+
+ xfs_io -r -c "fiemap -v" $MNT/snap/bar
+
+ umount $MNT
+
+Gives the following result:
+
+ (...)
+ mkfile o258-7-0
+ rename o258-7-0 -> bar
+ write bar - offset=0 length=49152
+ write bar - offset=49152 length=49152
+ write bar - offset=98304 length=49152
+ write bar - offset=147456 length=49152
+ write bar - offset=196608 length=49152
+ write bar - offset=245760 length=49152
+ write bar - offset=294912 length=49152
+ write bar - offset=344064 length=49152
+ write bar - offset=393216 length=49152
+ write bar - offset=442368 length=49152
+ write bar - offset=491520 length=49152
+ write bar - offset=540672 length=49152
+ write bar - offset=589824 length=49152
+ write bar - offset=638976 length=49152
+ write bar - offset=688128 length=49152
+ write bar - offset=737280 length=49152
+ write bar - offset=786432 length=49152
+ write bar - offset=835584 length=49152
+ write bar - offset=884736 length=49152
+ write bar - offset=933888 length=49152
+ write bar - offset=983040 length=49152
+ write bar - offset=1032192 length=16389
+ chown bar - uid=0, gid=0
+ chmod bar - mode=0644
+ utimes bar
+ utimes
+ BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=06d640da-9ca1-604c-b87c-3375175a8eb3, stransid=7
+ /mnt/sdi/snap/bar:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..2055]: 26624..28679 2056 0x1
+
+There's no clone operation to clone extents from the file foo into file
+bar and fiemap confirms there's no shared flag (0x2000).
+
+So update send_write_or_clone() so that it proceeds with cloning if the
+source and destination ranges end at the i_size of the respective files.
+
+After this changes the result of the test is:
+
+ (...)
+ mkfile o258-7-0
+ rename o258-7-0 -> bar
+ clone bar - source=foo source offset=0 offset=0 length=1048581
+ chown bar - uid=0, gid=0
+ chmod bar - mode=0644
+ utimes bar
+ utimes
+ BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=582420f3-ea7d-564e-bbe5-ce440d622190, stransid=7
+ /mnt/sdi/snap/bar:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ 0: [0..2055]: 26624..28679 2056 0x2001
+
+A test case for fstests will also follow up soon.
+
+Link: https://github.com/kdave/btrfs-progs/issues/572#issuecomment-2282841416
+CC: stable@vger.kernel.org # 5.10+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 54 ++++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 40 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -6158,25 +6158,51 @@ static int send_write_or_clone(struct se
+ u64 offset = key->offset;
+ u64 end;
+ u64 bs = sctx->send_root->fs_info->sectorsize;
++ struct btrfs_file_extent_item *ei;
++ u64 disk_byte;
++ u64 data_offset;
++ u64 num_bytes;
++ struct btrfs_inode_info info = { 0 };
+
+ end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
+ if (offset >= end)
+ return 0;
+
+- if (clone_root && IS_ALIGNED(end, bs)) {
+- struct btrfs_file_extent_item *ei;
+- u64 disk_byte;
+- u64 data_offset;
+-
+- ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+- struct btrfs_file_extent_item);
+- disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
+- data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
+- ret = clone_range(sctx, path, clone_root, disk_byte,
+- data_offset, offset, end - offset);
+- } else {
+- ret = send_extent_data(sctx, path, offset, end - offset);
+- }
++ num_bytes = end - offset;
++
++ if (!clone_root)
++ goto write_data;
++
++ if (IS_ALIGNED(end, bs))
++ goto clone_data;
++
++ /*
++ * If the extent end is not aligned, we can clone if the extent ends at
++ * the i_size of the inode and the clone range ends at the i_size of the
++ * source inode, otherwise the clone operation fails with -EINVAL.
++ */
++ if (end != sctx->cur_inode_size)
++ goto write_data;
++
++ ret = get_inode_info(clone_root->root, clone_root->ino, &info);
++ if (ret < 0)
++ return ret;
++
++ if (clone_root->offset + num_bytes == info.size)
++ goto clone_data;
++
++write_data:
++ ret = send_extent_data(sctx, path, offset, num_bytes);
++ sctx->cur_inode_next_write_offset = end;
++ return ret;
++
++clone_data:
++ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
++ struct btrfs_file_extent_item);
++ disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
++ data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
++ ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset,
++ num_bytes);
+ sctx->cur_inode_next_write_offset = end;
+ return ret;
+ }
--- /dev/null
+From 008e2512dc5696ab2dc5bf264e98a9fe9ceb830e Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Sun, 11 Aug 2024 15:00:22 +0930
+Subject: btrfs: tree-checker: add dev extent item checks
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 008e2512dc5696ab2dc5bf264e98a9fe9ceb830e upstream.
+
+[REPORT]
+There is a corruption report that btrfs refused to mount a fs that has
+overlapping dev extents:
+
+ BTRFS error (device sdc): dev extent devid 4 physical offset 14263979671552 overlap with previous dev extent end 14263980982272
+ BTRFS error (device sdc): failed to verify dev extents against chunks: -117
+ BTRFS error (device sdc): open_ctree failed
+
+[CAUSE]
+The direct cause is very obvious, there is a bad dev extent item with
+incorrect length.
+
+With btrfs check reporting two overlapping extents, the second one shows
+some clue on the cause:
+
+ ERROR: dev extent devid 4 offset 14263979671552 len 6488064 overlap with previous dev extent end 14263980982272
+ ERROR: dev extent devid 13 offset 2257707008000 len 6488064 overlap with previous dev extent end 2257707270144
+ ERROR: errors found in extent allocation tree or chunk allocation
+
+The second one looks like a bitflip happened during new chunk
+allocation:
+hex(2257707008000) = 0x20da9d30000
+hex(2257707270144) = 0x20da9d70000
+diff = 0x00000040000
+
+So it looks like a bitflip happened during new dev extent allocation,
+resulting the second overlap.
+
+Currently we only do the dev-extent verification at mount time, but if the
+corruption is caused by memory bitflip, we really want to catch it before
+writing the corruption to the storage.
+
+Furthermore the dev extent items has the following key definition:
+
+ (<device id> DEV_EXTENT <physical offset>)
+
+Thus we can not just rely on the generic key order check to make sure
+there is no overlapping.
+
+[ENHANCEMENT]
+Introduce dedicated dev extent checks, including:
+
+- Fixed member checks
+ * chunk_tree should always be BTRFS_CHUNK_TREE_OBJECTID (3)
+ * chunk_objectid should always be
+ BTRFS_FIRST_CHUNK_CHUNK_TREE_OBJECTID (256)
+
+- Alignment checks
+ * chunk_offset should be aligned to sectorsize
+ * length should be aligned to sectorsize
+ * key.offset should be aligned to sectorsize
+
+- Overlap checks
+ If the previous key is also a dev-extent item, with the same
+ device id, make sure we do not overlap with the previous dev extent.
+
+Reported: Stefan N <stefannnau@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CA+W5K0rSO3koYTo=nzxxTm1-Pdu1HYgVxEpgJ=aGc7d=E8mGEg@mail.gmail.com/
+CC: stable@vger.kernel.org # 5.10+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 69 insertions(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1718,6 +1718,72 @@ static int check_raid_stripe_extent(cons
+ return 0;
+ }
+
++static int check_dev_extent_item(const struct extent_buffer *leaf,
++ const struct btrfs_key *key,
++ int slot,
++ struct btrfs_key *prev_key)
++{
++ struct btrfs_dev_extent *de;
++ const u32 sectorsize = leaf->fs_info->sectorsize;
++
++ de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
++ /* Basic fixed member checks. */
++ if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) !=
++ BTRFS_CHUNK_TREE_OBJECTID)) {
++ generic_err(leaf, slot,
++ "invalid dev extent chunk tree id, has %llu expect %llu",
++ btrfs_dev_extent_chunk_tree(leaf, de),
++ BTRFS_CHUNK_TREE_OBJECTID);
++ return -EUCLEAN;
++ }
++ if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) !=
++ BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
++ generic_err(leaf, slot,
++ "invalid dev extent chunk objectid, has %llu expect %llu",
++ btrfs_dev_extent_chunk_objectid(leaf, de),
++ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
++ return -EUCLEAN;
++ }
++ /* Alignment check. */
++ if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
++ generic_err(leaf, slot,
++ "invalid dev extent key.offset, has %llu not aligned to %u",
++ key->offset, sectorsize);
++ return -EUCLEAN;
++ }
++ if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de),
++ sectorsize))) {
++ generic_err(leaf, slot,
++ "invalid dev extent chunk offset, has %llu not aligned to %u",
++ btrfs_dev_extent_chunk_objectid(leaf, de),
++ sectorsize);
++ return -EUCLEAN;
++ }
++ if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de),
++ sectorsize))) {
++ generic_err(leaf, slot,
++ "invalid dev extent length, has %llu not aligned to %u",
++ btrfs_dev_extent_length(leaf, de), sectorsize);
++ return -EUCLEAN;
++ }
++ /* Overlap check with previous dev extent. */
++ if (slot && prev_key->objectid == key->objectid &&
++ prev_key->type == key->type) {
++ struct btrfs_dev_extent *prev_de;
++ u64 prev_len;
++
++ prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent);
++ prev_len = btrfs_dev_extent_length(leaf, prev_de);
++ if (unlikely(prev_key->offset + prev_len > key->offset)) {
++ generic_err(leaf, slot,
++ "dev extent overlap, prev offset %llu len %llu current offset %llu",
++ prev_key->objectid, prev_len, key->offset);
++ return -EUCLEAN;
++ }
++ }
++ return 0;
++}
++
+ /*
+ * Common point to switch the item-specific validation.
+ */
+@@ -1754,6 +1820,9 @@ static enum btrfs_tree_block_status chec
+ case BTRFS_DEV_ITEM_KEY:
+ ret = check_dev_item(leaf, key, slot);
+ break;
++ case BTRFS_DEV_EXTENT_KEY:
++ ret = check_dev_extent_item(leaf, key, slot, prev_key);
++ break;
+ case BTRFS_INODE_ITEM_KEY:
+ ret = check_inode_item(leaf, key, slot);
+ break;
--- /dev/null
+From 31723c9542dba1681cc3720571fdf12ffe0eddd9 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 12 Aug 2024 08:52:44 +0930
+Subject: btrfs: tree-checker: reject BTRFS_FT_UNKNOWN dir type
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 31723c9542dba1681cc3720571fdf12ffe0eddd9 upstream.
+
+[REPORT]
+There is a bug report that kernel is rejecting a mismatching inode mode
+and its dir item:
+
+ [ 1881.553937] BTRFS critical (device dm-0): inode mode mismatch with
+ dir: inode mode=040700 btrfs type=2 dir type=0
+
+[CAUSE]
+It looks like the inode mode is correct, while the dir item type
+0 is BTRFS_FT_UNKNOWN, which should not be generated by btrfs at all.
+
+This may be caused by a memory bit flip.
+
+[ENHANCEMENT]
+Although tree-checker is not able to do any cross-leaf verification, for
+this particular case we can at least reject any dir type with
+BTRFS_FT_UNKNOWN.
+
+So here we enhance the dir type check from [0, BTRFS_FT_MAX), to
+(0, BTRFS_FT_MAX).
+Although the existing corruption can not be fixed just by such enhanced
+checking, it should prevent the same 0x2->0x0 bitflip for dir type to
+reach disk in the future.
+
+Reported-by: Kota <nospam@kota.moe>
+Link: https://lore.kernel.org/linux-btrfs/CACsxjPYnQF9ZF-0OhH16dAx50=BXXOcP74MxBc3BG+xae4vTTw@mail.gmail.com/
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -551,9 +551,10 @@ static int check_dir_item(struct extent_
+
+ /* dir type check */
+ dir_type = btrfs_dir_ftype(leaf, di);
+- if (unlikely(dir_type >= BTRFS_FT_MAX)) {
++ if (unlikely(dir_type <= BTRFS_FT_UNKNOWN ||
++ dir_type >= BTRFS_FT_MAX)) {
+ dir_item_err(leaf, slot,
+- "invalid dir item type, have %u expect [0, %u)",
++ "invalid dir item type, have %u expect (0, %u)",
+ dir_type, BTRFS_FT_MAX);
+ return -EUCLEAN;
+ }
--- /dev/null
+From e30729d4bd4001881be4d1ad4332a5d4985398f8 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Thu, 1 Aug 2024 16:47:52 +0900
+Subject: btrfs: zoned: properly take lock to read/update block group's zoned variables
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit e30729d4bd4001881be4d1ad4332a5d4985398f8 upstream.
+
+__btrfs_add_free_space_zoned() references and modifies bg's alloc_offset,
+ro, and zone_unusable, but without taking the lock. It is mostly safe
+because they monotonically increase (at least for now) and this function is
+mostly called by a transaction commit, which is serialized by itself.
+
+Still, taking the lock is a safer and correct option and I'm going to add a
+change to reset zone_unusable while a block group is still alive. So, add
+locking around the operations.
+
+Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/free-space-cache.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2698,15 +2698,16 @@ static int __btrfs_add_free_space_zoned(
+ u64 offset = bytenr - block_group->start;
+ u64 to_free, to_unusable;
+ int bg_reclaim_threshold = 0;
+- bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
++ bool initial;
+ u64 reclaimable_unusable;
+
+- WARN_ON(!initial && offset + size > block_group->zone_capacity);
++ spin_lock(&block_group->lock);
+
++ initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
++ WARN_ON(!initial && offset + size > block_group->zone_capacity);
+ if (!initial)
+ bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
+
+- spin_lock(&ctl->tree_lock);
+ if (!used)
+ to_free = size;
+ else if (initial)
+@@ -2719,7 +2720,9 @@ static int __btrfs_add_free_space_zoned(
+ to_free = offset + size - block_group->alloc_offset;
+ to_unusable = size - to_free;
+
++ spin_lock(&ctl->tree_lock);
+ ctl->free_space += to_free;
++ spin_unlock(&ctl->tree_lock);
+ /*
+ * If the block group is read-only, we should account freed space into
+ * bytes_readonly.
+@@ -2728,11 +2731,8 @@ static int __btrfs_add_free_space_zoned(
+ block_group->zone_unusable += to_unusable;
+ WARN_ON(block_group->zone_unusable > block_group->length);
+ }
+- spin_unlock(&ctl->tree_lock);
+ if (!used) {
+- spin_lock(&block_group->lock);
+ block_group->alloc_offset -= size;
+- spin_unlock(&block_group->lock);
+ }
+
+ reclaimable_unusable = block_group->zone_unusable -
+@@ -2746,6 +2746,8 @@ static int __btrfs_add_free_space_zoned(
+ btrfs_mark_bg_to_reclaim(block_group);
+ }
+
++ spin_unlock(&block_group->lock);
++
+ return 0;
+ }
+
--- /dev/null
+From 2374bf7558de915edc6ec8cb10ec3291dfab9594 Mon Sep 17 00:00:00 2001
+From: Eli Billauer <eli.billauer@gmail.com>
+Date: Fri, 16 Aug 2024 10:02:00 +0300
+Subject: char: xillybus: Check USB endpoints when probing device
+
+From: Eli Billauer <eli.billauer@gmail.com>
+
+commit 2374bf7558de915edc6ec8cb10ec3291dfab9594 upstream.
+
+Ensure, as the driver probes the device, that all endpoints that the
+driver may attempt to access exist and are of the correct type.
+
+All XillyUSB devices must have a Bulk IN and Bulk OUT endpoint at
+address 1. This is verified in xillyusb_setup_base_eps().
+
+On top of that, a XillyUSB device may have additional Bulk OUT
+endpoints. The information about these endpoints' addresses is deduced
+from a data structure (the IDT) that the driver fetches from the device
+while probing it. These endpoints are checked in setup_channels().
+
+A XillyUSB device never has more than one IN endpoint, as all data
+towards the host is multiplexed in this single Bulk IN endpoint. This is
+why setup_channels() only checks OUT endpoints.
+
+Reported-by: syzbot+eac39cba052f2e750dbe@syzkaller.appspotmail.com
+Cc: stable <stable@kernel.org>
+Closes: https://lore.kernel.org/all/0000000000001d44a6061f7a54ee@google.com/T/
+Fixes: a53d1202aef1 ("char: xillybus: Add driver for XillyUSB (Xillybus variant for USB)").
+Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
+Link: https://lore.kernel.org/r/20240816070200.50695-2-eli.billauer@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/xillybus/xillyusb.c | 22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/drivers/char/xillybus/xillyusb.c
++++ b/drivers/char/xillybus/xillyusb.c
+@@ -1903,6 +1903,13 @@ static const struct file_operations xill
+
+ static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev)
+ {
++ struct usb_device *udev = xdev->udev;
++
++ /* Verify that device has the two fundamental bulk in/out endpoints */
++ if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) ||
++ usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM)))
++ return -ENODEV;
++
+ xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT,
+ bulk_out_work, 1, 2);
+ if (!xdev->msg_ep)
+@@ -1932,14 +1939,15 @@ static int setup_channels(struct xillyus
+ __le16 *chandesc,
+ int num_channels)
+ {
+- struct xillyusb_channel *chan;
++ struct usb_device *udev = xdev->udev;
++ struct xillyusb_channel *chan, *new_channels;
+ int i;
+
+ chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+
+- xdev->channels = chan;
++ new_channels = chan;
+
+ for (i = 0; i < num_channels; i++, chan++) {
+ unsigned int in_desc = le16_to_cpu(*chandesc++);
+@@ -1968,6 +1976,15 @@ static int setup_channels(struct xillyus
+ */
+
+ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */
++ if (usb_pipe_type_check(udev,
++ usb_sndbulkpipe(udev, i + 2))) {
++ dev_err(xdev->dev,
++ "Missing BULK OUT endpoint %d\n",
++ i + 2);
++ kfree(new_channels);
++ return -ENODEV;
++ }
++
+ chan->writable = 1;
+ chan->out_synchronous = !!(out_desc & 0x40);
+ chan->out_seekable = !!(out_desc & 0x20);
+@@ -1977,6 +1994,7 @@ static int setup_channels(struct xillyus
+ }
+ }
+
++ xdev->channels = new_channels;
+ return 0;
+ }
+
--- /dev/null
+From ccbde4b128ef9c73d14d0d7817d68ef795f6d131 Mon Sep 17 00:00:00 2001
+From: Eli Billauer <eli.billauer@gmail.com>
+Date: Thu, 1 Aug 2024 15:11:26 +0300
+Subject: char: xillybus: Don't destroy workqueue from work item running on it
+
+From: Eli Billauer <eli.billauer@gmail.com>
+
+commit ccbde4b128ef9c73d14d0d7817d68ef795f6d131 upstream.
+
+Triggered by a kref decrement, destroy_workqueue() may be called from
+within a work item for destroying its own workqueue. This illegal
+situation is averted by adding a module-global workqueue for exclusive
+use of the offending work item. Other work items continue to be queued
+on per-device workqueues to ensure performance.
+
+Reported-by: syzbot+91dbdfecdd3287734d8e@syzkaller.appspotmail.com
+Cc: stable <stable@kernel.org>
+Closes: https://lore.kernel.org/lkml/0000000000000ab25a061e1dfe9f@google.com/
+Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
+Link: https://lore.kernel.org/r/20240801121126.60183-1-eli.billauer@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/xillybus/xillyusb.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/char/xillybus/xillyusb.c
++++ b/drivers/char/xillybus/xillyusb.c
+@@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2");
+ static const char xillyname[] = "xillyusb";
+
+ static unsigned int fifo_buf_order;
++static struct workqueue_struct *wakeup_wq;
+
+ #define USB_VENDOR_ID_XILINX 0x03fd
+ #define USB_VENDOR_ID_ALTERA 0x09fb
+@@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kre
+ * errors if executed. The mechanism relies on that xdev->error is assigned
+ * a non-zero value by report_io_error() prior to queueing wakeup_all(),
+ * which prevents bulk_in_work() from calling process_bulk_in().
+- *
+- * The fact that wakeup_all() and bulk_in_work() are queued on the same
+- * workqueue makes their concurrent execution very unlikely, however the
+- * kernel's API doesn't seem to ensure this strictly.
+ */
+
+ static void wakeup_all(struct work_struct *work)
+@@ -627,7 +624,7 @@ static void report_io_error(struct xilly
+
+ if (do_once) {
+ kref_get(&xdev->kref); /* xdev is used by work item */
+- queue_work(xdev->workq, &xdev->wakeup_workitem);
++ queue_work(wakeup_wq, &xdev->wakeup_workitem);
+ }
+ }
+
+@@ -2258,6 +2255,10 @@ static int __init xillyusb_init(void)
+ {
+ int rc = 0;
+
++ wakeup_wq = alloc_workqueue(xillyname, 0, 0);
++ if (!wakeup_wq)
++ return -ENOMEM;
++
+ if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT)
+ fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT;
+ else
+@@ -2265,11 +2266,16 @@ static int __init xillyusb_init(void)
+
+ rc = usb_register(&xillyusb_driver);
+
++ if (rc)
++ destroy_workqueue(wakeup_wq);
++
+ return rc;
+ }
+
+ static void __exit xillyusb_exit(void)
+ {
++ destroy_workqueue(wakeup_wq);
++
+ usb_deregister(&xillyusb_driver);
+ }
+
--- /dev/null
+From ad899c301c880766cc709aad277991b3ab671b66 Mon Sep 17 00:00:00 2001
+From: Eli Billauer <eli.billauer@gmail.com>
+Date: Fri, 16 Aug 2024 10:01:59 +0300
+Subject: char: xillybus: Refine workqueue handling
+
+From: Eli Billauer <eli.billauer@gmail.com>
+
+commit ad899c301c880766cc709aad277991b3ab671b66 upstream.
+
+As the wakeup work item now runs on a separate workqueue, it needs to be
+flushed separately along with flushing the device's workqueue.
+
+Also, move the destroy_workqueue() call to the end of the exit method,
+so that deinitialization is done in the opposite order of
+initialization.
+
+Fixes: ccbde4b128ef ("char: xillybus: Don't destroy workqueue from work item running on it")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
+Link: https://lore.kernel.org/r/20240816070200.50695-1-eli.billauer@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/xillybus/xillyusb.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/char/xillybus/xillyusb.c
++++ b/drivers/char/xillybus/xillyusb.c
+@@ -2093,9 +2093,11 @@ static int xillyusb_discovery(struct usb
+ * just after responding with the IDT, there is no reason for any
+ * work item to be running now. To be sure that xdev->channels
+ * is updated on anything that might run in parallel, flush the
+- * workqueue, which rarely does anything.
++ * device's workqueue and the wakeup work item. This rarely
++ * does anything.
+ */
+ flush_workqueue(xdev->workq);
++ flush_work(&xdev->wakeup_workitem);
+
+ xdev->num_channels = num_channels;
+
+@@ -2274,9 +2276,9 @@ static int __init xillyusb_init(void)
+
+ static void __exit xillyusb_exit(void)
+ {
+- destroy_workqueue(wakeup_wq);
+-
+ usb_deregister(&xillyusb_driver);
++
++ destroy_workqueue(wakeup_wq);
+ }
+
+ module_init(xillyusb_init);
--- /dev/null
+From faada2174c08662ae98b439c69efe3e79382c538 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Tue, 13 Aug 2024 16:35:14 +0200
+Subject: dm persistent data: fix memory allocation failure
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit faada2174c08662ae98b439c69efe3e79382c538 upstream.
+
+kmalloc is unreliable when allocating more than 8 pages of memory. It may
+fail when there is plenty of free memory but the memory is fragmented.
+Zdenek Kabelac observed such failure in his tests.
+
+This commit changes kmalloc to kvmalloc - kvmalloc will fall back to
+vmalloc if the large allocation fails.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
+Reviewed-by: Mike Snitzer <snitzer@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/persistent-data/dm-space-map-metadata.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/persistent-data/dm-space-map-metadata.c
++++ b/drivers/md/persistent-data/dm-space-map-metadata.c
+@@ -277,7 +277,7 @@ static void sm_metadata_destroy(struct d
+ {
+ struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+- kfree(smm);
++ kvfree(smm);
+ }
+
+ static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+@@ -772,7 +772,7 @@ struct dm_space_map *dm_sm_metadata_init
+ {
+ struct sm_metadata *smm;
+
+- smm = kmalloc(sizeof(*smm), GFP_KERNEL);
++ smm = kvmalloc(sizeof(*smm), GFP_KERNEL);
+ if (!smm)
+ return ERR_PTR(-ENOMEM);
+
--- /dev/null
+From 7a636b4f03af9d541205f69e373672e7b2b60a8a Mon Sep 17 00:00:00 2001
+From: Khazhismel Kumykov <khazhy@google.com>
+Date: Tue, 13 Aug 2024 12:39:52 +0200
+Subject: dm resume: don't return EINVAL when signalled
+
+From: Khazhismel Kumykov <khazhy@google.com>
+
+commit 7a636b4f03af9d541205f69e373672e7b2b60a8a upstream.
+
+If the dm_resume method is called on a device that is not suspended, the
+method will suspend the device briefly, before resuming it (so that the
+table will be swapped).
+
+However, there was a bug that the return value of dm_suspended_md was not
+checked. dm_suspended_md may return an error when it is interrupted by a
+signal. In this case, do_resume would call dm_swap_table, which would
+return -EINVAL.
+
+This commit fixes the logic, so that error returned by dm_suspend is
+checked and the resume operation is undone.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-ioctl.c | 22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm-ioctl.c
++++ b/drivers/md/dm-ioctl.c
+@@ -1181,8 +1181,26 @@ static int do_resume(struct dm_ioctl *pa
+ suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
+ if (param->flags & DM_NOFLUSH_FLAG)
+ suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
+- if (!dm_suspended_md(md))
+- dm_suspend(md, suspend_flags);
++ if (!dm_suspended_md(md)) {
++ r = dm_suspend(md, suspend_flags);
++ if (r) {
++ down_write(&_hash_lock);
++ hc = dm_get_mdptr(md);
++ if (hc && !hc->new_map) {
++ hc->new_map = new_map;
++ new_map = NULL;
++ } else {
++ r = -ENXIO;
++ }
++ up_write(&_hash_lock);
++ if (new_map) {
++ dm_sync_table(md);
++ dm_table_destroy(new_map);
++ }
++ dm_put(md);
++ return r;
++ }
++ }
+
+ old_size = dm_get_size(md);
+ old_map = dm_swap_table(md, new_map);
--- /dev/null
+From 0573a1e2ea7e35bff08944a40f1adf2bb35cea61 Mon Sep 17 00:00:00 2001
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Date: Tue, 6 Aug 2024 22:27:32 +0200
+Subject: drm/amdgpu: Actually check flags for all context ops.
+
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+
+commit 0573a1e2ea7e35bff08944a40f1adf2bb35cea61 upstream.
+
+Missing validation ...
+
+Checked libdrm and it clears all the structs, so we should be
+safe to just check everything.
+
+Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit c6b86421f1f9ddf9d706f2453159813ee39d0cf9)
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+@@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *
+
+ switch (args->in.op) {
+ case AMDGPU_CTX_OP_ALLOC_CTX:
++ if (args->in.flags)
++ return -EINVAL;
+ r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
+ args->out.alloc.ctx_id = id;
+ break;
+ case AMDGPU_CTX_OP_FREE_CTX:
++ if (args->in.flags)
++ return -EINVAL;
+ r = amdgpu_ctx_free(fpriv, id);
+ break;
+ case AMDGPU_CTX_OP_QUERY_STATE:
++ if (args->in.flags)
++ return -EINVAL;
+ r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
+ break;
+ case AMDGPU_CTX_OP_QUERY_STATE2:
++ if (args->in.flags)
++ return -EINVAL;
+ r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
+ break;
+ case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
--- /dev/null
+From 9a2fa1472083580b6c66bdaf291f591e1170123a Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sat, 3 Aug 2024 18:02:00 -0400
+Subject: fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 9a2fa1472083580b6c66bdaf291f591e1170123a upstream.
+
+copy_fd_bitmaps(new, old, count) is expected to copy the first
+count/BITS_PER_LONG bits from old->full_fds_bits[] and fill
+the rest with zeroes. What it does is copying enough words
+(BITS_TO_LONGS(count/BITS_PER_LONG)), then memsets the rest.
+That works fine, *if* all bits past the cutoff point are
+clear. Otherwise we are risking garbage from the last word
+we'd copied.
+
+For most of the callers that is true - expand_fdtable() has
+count equal to old->max_fds, so there's no open descriptors
+past count, let alone fully occupied words in ->open_fds[],
+which is what bits in ->full_fds_bits[] correspond to.
+
+The other caller (dup_fd()) passes sane_fdtable_size(old_fdt, max_fds),
+which is the smallest multiple of BITS_PER_LONG that covers all
+opened descriptors below max_fds. In the common case (copying on
+fork()) max_fds is ~0U, so all opened descriptors will be below
+it and we are fine, by the same reasons why the call in expand_fdtable()
+is safe.
+
+Unfortunately, there is a case where max_fds is less than that
+and where we might, indeed, end up with junk in ->full_fds_bits[] -
+close_range(from, to, CLOSE_RANGE_UNSHARE) with
+ * descriptor table being currently shared
+ * 'to' being above the current capacity of descriptor table
+ * 'from' being just under some chunk of opened descriptors.
+In that case we end up with observably wrong behaviour - e.g. spawn
+a child with CLONE_FILES, get all descriptors in range 0..127 open,
+then close_range(64, ~0U, CLOSE_RANGE_UNSHARE) and watch dup(0) ending
+up with descriptor #128, despite #64 being observably not open.
+
+The minimally invasive fix would be to deal with that in dup_fd().
+If this proves to add measurable overhead, we can go that way, but
+let's try to fix copy_fd_bitmaps() first.
+
+* new helper: bitmap_copy_and_expand(to, from, bits_to_copy, size).
+* make copy_fd_bitmaps() take the bitmap size in words, rather than
+bits; it's 'count' argument is always a multiple of BITS_PER_LONG,
+so we are not losing any information, and that way we can use the
+same helper for all three bitmaps - compiler will see that count
+is a multiple of BITS_PER_LONG for the large ones, so it'll generate
+plain memcpy()+memset().
+
+Reproducer added to tools/testing/selftests/core/close_range_test.c
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/file.c | 28 ++++++++-----------
+ include/linux/bitmap.h | 12 ++++++++
+ tools/testing/selftests/core/close_range_test.c | 35 ++++++++++++++++++++++++
+ 3 files changed, 59 insertions(+), 16 deletions(-)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_
+ #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
+ #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
+
++#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
+ /*
+ * Copy 'count' fd bits from the old table to the new table and clear the extra
+ * space if any. This does not copy the file pointers. Called with the files
+ * spinlock held for write.
+ */
+-static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
+- unsigned int count)
++static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
++ unsigned int copy_words)
+ {
+- unsigned int cpy, set;
++ unsigned int nwords = fdt_words(nfdt);
+
+- cpy = count / BITS_PER_BYTE;
+- set = (nfdt->max_fds - count) / BITS_PER_BYTE;
+- memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
+- memset((char *)nfdt->open_fds + cpy, 0, set);
+- memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
+- memset((char *)nfdt->close_on_exec + cpy, 0, set);
+-
+- cpy = BITBIT_SIZE(count);
+- set = BITBIT_SIZE(nfdt->max_fds) - cpy;
+- memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
+- memset((char *)nfdt->full_fds_bits + cpy, 0, set);
++ bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds,
++ copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
++ bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec,
++ copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
++ bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits,
++ copy_words, nwords);
+ }
+
+ /*
+@@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable
+ memcpy(nfdt->fd, ofdt->fd, cpy);
+ memset((char *)nfdt->fd + cpy, 0, set);
+
+- copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
++ copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
+ }
+
+ /*
+@@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files
+ open_files = sane_fdtable_size(old_fdt, max_fds);
+ }
+
+- copy_fd_bitmaps(new_fdt, old_fdt, open_files);
++ copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
+
+ old_fds = old_fdt->fd;
+ new_fds = new_fdt->fd;
+--- a/include/linux/bitmap.h
++++ b/include/linux/bitmap.h
+@@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tai
+ dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
+ }
+
++static inline void bitmap_copy_and_extend(unsigned long *to,
++ const unsigned long *from,
++ unsigned int count, unsigned int size)
++{
++ unsigned int copy = BITS_TO_LONGS(count);
++
++ memcpy(to, from, copy * sizeof(long));
++ if (count % BITS_PER_LONG)
++ to[copy - 1] &= BITMAP_LAST_WORD_MASK(count);
++ memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long));
++}
++
+ /*
+ * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64
+ * machines the order of hi and lo parts of numbers match the bitmap structure.
+--- a/tools/testing/selftests/core/close_range_test.c
++++ b/tools/testing/selftests/core/close_range_test.c
+@@ -589,4 +589,39 @@ TEST(close_range_cloexec_unshare_syzbot)
+ EXPECT_EQ(close(fd3), 0);
+ }
+
++TEST(close_range_bitmap_corruption)
++{
++ pid_t pid;
++ int status;
++ struct __clone_args args = {
++ .flags = CLONE_FILES,
++ .exit_signal = SIGCHLD,
++ };
++
++ /* get the first 128 descriptors open */
++ for (int i = 2; i < 128; i++)
++ EXPECT_GE(dup2(0, i), 0);
++
++ /* get descriptor table shared */
++ pid = sys_clone3(&args, sizeof(args));
++ ASSERT_GE(pid, 0);
++
++ if (pid == 0) {
++ /* unshare and truncate descriptor table down to 64 */
++ if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
++ exit(EXIT_FAILURE);
++
++ ASSERT_EQ(fcntl(64, F_GETFD), -1);
++ /* ... and verify that the range 64..127 is not
++ stuck "fully used" according to secondary bitmap */
++ EXPECT_EQ(dup(0), 64)
++ exit(EXIT_FAILURE);
++ exit(EXIT_SUCCESS);
++ }
++
++ EXPECT_EQ(waitpid(pid, &status, 0), pid);
++ EXPECT_EQ(true, WIFEXITED(status));
++ EXPECT_EQ(0, WEXITSTATUS(status));
++}
++
+ TEST_HARNESS_MAIN
--- /dev/null
+From f71aa06398aabc2e3eaac25acdf3d62e0094ba70 Mon Sep 17 00:00:00 2001
+From: Max Kellermann <max.kellermann@ionos.com>
+Date: Mon, 29 Jul 2024 17:19:30 +0100
+Subject: fs/netfs/fscache_cookie: add missing "n_accesses" check
+
+From: Max Kellermann <max.kellermann@ionos.com>
+
+commit f71aa06398aabc2e3eaac25acdf3d62e0094ba70 upstream.
+
+This fixes a NULL pointer dereference bug due to a data race which
+looks like this:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000008
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] SMP PTI
+ CPU: 33 PID: 16573 Comm: kworker/u97:799 Not tainted 6.8.7-cm4all1-hp+ #43
+ Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 10/17/2018
+ Workqueue: events_unbound netfs_rreq_write_to_cache_work
+ RIP: 0010:cachefiles_prepare_write+0x30/0xa0
+ Code: 57 41 56 45 89 ce 41 55 49 89 cd 41 54 49 89 d4 55 53 48 89 fb 48 83 ec 08 48 8b 47 08 48 83 7f 10 00 48 89 34 24 48 8b 68 20 <48> 8b 45 08 4c 8b 38 74 45 49 8b 7f 50 e8 4e a9 b0 ff 48 8b 73 10
+ RSP: 0018:ffffb4e78113bde0 EFLAGS: 00010286
+ RAX: ffff976126be6d10 RBX: ffff97615cdb8438 RCX: 0000000000020000
+ RDX: ffff97605e6c4c68 RSI: ffff97605e6c4c60 RDI: ffff97615cdb8438
+ RBP: 0000000000000000 R08: 0000000000278333 R09: 0000000000000001
+ R10: ffff97605e6c4600 R11: 0000000000000001 R12: ffff97605e6c4c68
+ R13: 0000000000020000 R14: 0000000000000001 R15: ffff976064fe2c00
+ FS: 0000000000000000(0000) GS:ffff9776dfd40000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000008 CR3: 000000005942c002 CR4: 00000000001706f0
+ Call Trace:
+ <TASK>
+ ? __die+0x1f/0x70
+ ? page_fault_oops+0x15d/0x440
+ ? search_module_extables+0xe/0x40
+ ? fixup_exception+0x22/0x2f0
+ ? exc_page_fault+0x5f/0x100
+ ? asm_exc_page_fault+0x22/0x30
+ ? cachefiles_prepare_write+0x30/0xa0
+ netfs_rreq_write_to_cache_work+0x135/0x2e0
+ process_one_work+0x137/0x2c0
+ worker_thread+0x2e9/0x400
+ ? __pfx_worker_thread+0x10/0x10
+ kthread+0xcc/0x100
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x30/0x50
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1b/0x30
+ </TASK>
+ Modules linked in:
+ CR2: 0000000000000008
+ ---[ end trace 0000000000000000 ]---
+
+This happened because fscache_cookie_state_machine() was slow and was
+still running while another process invoked fscache_unuse_cookie();
+this led to a fscache_cookie_lru_do_one() call, setting the
+FSCACHE_COOKIE_DO_LRU_DISCARD flag, which was picked up by
+fscache_cookie_state_machine(), withdrawing the cookie via
+cachefiles_withdraw_cookie(), clearing cookie->cache_priv.
+
+At the same time, yet another process invoked
+cachefiles_prepare_write(), which found a NULL pointer in this code
+line:
+
+ struct cachefiles_object *object = cachefiles_cres_object(cres);
+
+The next line crashes, obviously:
+
+ struct cachefiles_cache *cache = object->volume->cache;
+
+During cachefiles_prepare_write(), the "n_accesses" counter is
+non-zero (via fscache_begin_operation()). The cookie must not be
+withdrawn until it drops to zero.
+
+The counter is checked by fscache_cookie_state_machine() before
+switching to FSCACHE_COOKIE_STATE_RELINQUISHING and
+FSCACHE_COOKIE_STATE_WITHDRAWING (in "case
+FSCACHE_COOKIE_STATE_FAILED"), but not for
+FSCACHE_COOKIE_STATE_LRU_DISCARDING ("case
+FSCACHE_COOKIE_STATE_ACTIVE").
+
+This patch adds the missing check. With a non-zero access counter,
+the function returns and the next fscache_end_cookie_access() call
+will queue another fscache_cookie_state_machine() call to handle the
+still-pending FSCACHE_COOKIE_DO_LRU_DISCARD.
+
+Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning")
+Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Link: https://lore.kernel.org/r/20240729162002.3436763-2-dhowells@redhat.com
+cc: Jeff Layton <jlayton@kernel.org>
+cc: netfs@lists.linux.dev
+cc: linux-fsdevel@vger.kernel.org
+cc: stable@vger.kernel.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/netfs/fscache_cookie.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/netfs/fscache_cookie.c
++++ b/fs/netfs/fscache_cookie.c
+@@ -741,6 +741,10 @@ again_locked:
+ spin_lock(&cookie->lock);
+ }
+ if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) {
++ if (atomic_read(&cookie->n_accesses) != 0)
++ /* still being accessed: postpone it */
++ break;
++
+ __fscache_set_cookie_state(cookie,
+ FSCACHE_COOKIE_STATE_LRU_DISCARDING);
+ wake = true;
--- /dev/null
+From 4e91fa1ef3ce6290b4c598e54b5eb6cf134fbec8 Mon Sep 17 00:00:00 2001
+From: Andi Shyti <andi.shyti@kernel.org>
+Date: Mon, 12 Aug 2024 21:40:28 +0200
+Subject: i2c: qcom-geni: Add missing geni_icc_disable in geni_i2c_runtime_resume
+
+From: Andi Shyti <andi.shyti@kernel.org>
+
+commit 4e91fa1ef3ce6290b4c598e54b5eb6cf134fbec8 upstream.
+
+Add the missing geni_icc_disable() call before returning in the
+geni_i2c_runtime_resume() function.
+
+Commit 9ba48db9f77c ("i2c: qcom-geni: Add missing
+geni_icc_disable in geni_i2c_runtime_resume") by Gaosheng missed
+disabling the interconnect in one case.
+
+Fixes: bf225ed357c6 ("i2c: i2c-qcom-geni: Add interconnect support")
+Cc: Gaosheng Cui <cuigaosheng1@huawei.com>
+Cc: stable@vger.kernel.org # v5.9+
+Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-qcom-geni.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/i2c/busses/i2c-qcom-geni.c
++++ b/drivers/i2c/busses/i2c-qcom-geni.c
+@@ -986,8 +986,10 @@ static int __maybe_unused geni_i2c_runti
+ return ret;
+
+ ret = clk_prepare_enable(gi2c->core_clk);
+- if (ret)
++ if (ret) {
++ geni_icc_disable(&gi2c->se);
+ return ret;
++ }
+
+ ret = geni_se_resources_on(&gi2c->se);
+ if (ret) {
--- /dev/null
+From 14d069d92951a3e150c0a81f2ca3b93e54da913b Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Tue, 13 Aug 2024 09:12:53 -0700
+Subject: i2c: tegra: Do not mark ACPI devices as irq safe
+
+From: Breno Leitao <leitao@debian.org>
+
+commit 14d069d92951a3e150c0a81f2ca3b93e54da913b upstream.
+
+On ACPI machines, the tegra i2c module encounters an issue due to a
+mutex being called inside a spinlock. This leads to the following bug:
+
+ BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585
+ ...
+
+ Call trace:
+ __might_sleep
+ __mutex_lock_common
+ mutex_lock_nested
+ acpi_subsys_runtime_resume
+ rpm_resume
+ tegra_i2c_xfer
+
+The problem arises because during __pm_runtime_resume(), the spinlock
+&dev->power.lock is acquired before rpm_resume() is called. Later,
+rpm_resume() invokes acpi_subsys_runtime_resume(), which relies on
+mutexes, triggering the error.
+
+To address this issue, devices on ACPI are now marked as not IRQ-safe,
+considering the dependency of acpi_subsys_runtime_resume() on mutexes.
+
+Fixes: bd2fdedbf2ba ("i2c: tegra: Add the ACPI support")
+Cc: <stable@vger.kernel.org> # v5.17+
+Co-developed-by: Michael van der Westhuizen <rmikey@meta.com>
+Signed-off-by: Michael van der Westhuizen <rmikey@meta.com>
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-tegra.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-tegra.c
++++ b/drivers/i2c/busses/i2c-tegra.c
+@@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platfo
+ * domain.
+ *
+ * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't
+- * be used for atomic transfers.
++ * be used for atomic transfers. ACPI device is not IRQ safe also.
+ */
+- if (!IS_VI(i2c_dev))
++ if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev))
+ pm_runtime_irq_safe(i2c_dev->dev);
+
+ pm_runtime_enable(i2c_dev->dev);
--- /dev/null
+From 0e28bf61a5f9ab30be3f3b4eafb8d097e39446bb Mon Sep 17 00:00:00 2001
+From: David Gstir <david@sigma-star.at>
+Date: Wed, 17 Jul 2024 13:28:45 +0200
+Subject: KEYS: trusted: dcp: fix leak of blob encryption key
+
+From: David Gstir <david@sigma-star.at>
+
+commit 0e28bf61a5f9ab30be3f3b4eafb8d097e39446bb upstream.
+
+Trusted keys unseal the key blob on load, but keep the sealed payload in
+the blob field so that every subsequent read (export) will simply
+convert this field to hex and send it to userspace.
+
+With DCP-based trusted keys, we decrypt the blob encryption key (BEK)
+in the Kernel due hardware limitations and then decrypt the blob payload.
+BEK decryption is done in-place which means that the trusted key blob
+field is modified and it consequently holds the BEK in plain text.
+Every subsequent read of that key thus send the plain text BEK instead
+of the encrypted BEK to userspace.
+
+This issue only occurs when importing a trusted DCP-based key and
+then exporting it again. This should rarely happen as the common use cases
+are to either create a new trusted key and export it, or import a key
+blob and then just use it without exporting it again.
+
+Fix this by performing BEK decryption and encryption in a dedicated
+buffer. Further always wipe the plain text BEK buffer to prevent leaking
+the key via uninitialized memory.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 2e8a0f40a39c ("KEYS: trusted: Introduce NXP DCP-backed trusted keys")
+Signed-off-by: David Gstir <david@sigma-star.at>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/keys/trusted-keys/trusted_dcp.c | 33 +++++++++++++++---------
+ 1 file changed, 21 insertions(+), 12 deletions(-)
+
+diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c
+index b0947f072a98..4edc5bbbcda3 100644
+--- a/security/keys/trusted-keys/trusted_dcp.c
++++ b/security/keys/trusted-keys/trusted_dcp.c
+@@ -186,20 +186,21 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce,
+ return ret;
+ }
+
+-static int decrypt_blob_key(u8 *key)
++static int decrypt_blob_key(u8 *encrypted_key, u8 *plain_key)
+ {
+- return do_dcp_crypto(key, key, false);
++ return do_dcp_crypto(encrypted_key, plain_key, false);
+ }
+
+-static int encrypt_blob_key(u8 *key)
++static int encrypt_blob_key(u8 *plain_key, u8 *encrypted_key)
+ {
+- return do_dcp_crypto(key, key, true);
++ return do_dcp_crypto(plain_key, encrypted_key, true);
+ }
+
+ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob)
+ {
+ struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob;
+ int blen, ret;
++ u8 plain_blob_key[AES_KEYSIZE_128];
+
+ blen = calc_blob_len(p->key_len);
+ if (blen > MAX_BLOB_SIZE)
+@@ -207,30 +208,36 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob)
+
+ b->fmt_version = DCP_BLOB_VERSION;
+ get_random_bytes(b->nonce, AES_KEYSIZE_128);
+- get_random_bytes(b->blob_key, AES_KEYSIZE_128);
++ get_random_bytes(plain_blob_key, AES_KEYSIZE_128);
+
+- ret = do_aead_crypto(p->key, b->payload, p->key_len, b->blob_key,
++ ret = do_aead_crypto(p->key, b->payload, p->key_len, plain_blob_key,
+ b->nonce, true);
+ if (ret) {
+ pr_err("Unable to encrypt blob payload: %i\n", ret);
+- return ret;
++ goto out;
+ }
+
+- ret = encrypt_blob_key(b->blob_key);
++ ret = encrypt_blob_key(plain_blob_key, b->blob_key);
+ if (ret) {
+ pr_err("Unable to encrypt blob key: %i\n", ret);
+- return ret;
++ goto out;
+ }
+
+ put_unaligned_le32(p->key_len, &b->payload_len);
+ p->blob_len = blen;
+- return 0;
++ ret = 0;
++
++out:
++ memzero_explicit(plain_blob_key, sizeof(plain_blob_key));
++
++ return ret;
+ }
+
+ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob)
+ {
+ struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob;
+ int blen, ret;
++ u8 plain_blob_key[AES_KEYSIZE_128];
+
+ if (b->fmt_version != DCP_BLOB_VERSION) {
+ pr_err("DCP blob has bad version: %i, expected %i\n",
+@@ -248,14 +255,14 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob)
+ goto out;
+ }
+
+- ret = decrypt_blob_key(b->blob_key);
++ ret = decrypt_blob_key(b->blob_key, plain_blob_key);
+ if (ret) {
+ pr_err("Unable to decrypt blob key: %i\n", ret);
+ goto out;
+ }
+
+ ret = do_aead_crypto(b->payload, p->key, p->key_len + DCP_BLOB_AUTHLEN,
+- b->blob_key, b->nonce, false);
++ plain_blob_key, b->nonce, false);
+ if (ret) {
+ pr_err("Unwrap of DCP payload failed: %i\n", ret);
+ goto out;
+@@ -263,6 +270,8 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob)
+
+ ret = 0;
+ out:
++ memzero_explicit(plain_blob_key, sizeof(plain_blob_key));
++
+ return ret;
+ }
+
+--
+2.46.0
+
--- /dev/null
+From 6486cad00a8b7f8585983408c152bbe33dda529b Mon Sep 17 00:00:00 2001
+From: David Gstir <david@sigma-star.at>
+Date: Wed, 17 Jul 2024 13:28:44 +0200
+Subject: KEYS: trusted: fix DCP blob payload length assignment
+
+From: David Gstir <david@sigma-star.at>
+
+commit 6486cad00a8b7f8585983408c152bbe33dda529b upstream.
+
+The DCP trusted key type uses the wrong helper function to store
+the blob's payload length which can lead to the wrong byte order
+being used in case this would ever run on big endian architectures.
+
+Fix by using correct helper function.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 2e8a0f40a39c ("KEYS: trusted: Introduce NXP DCP-backed trusted keys")
+Suggested-by: Richard Weinberger <richard@nod.at>
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202405240610.fj53EK0q-lkp@intel.com/
+Signed-off-by: David Gstir <david@sigma-star.at>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/keys/trusted-keys/trusted_dcp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c
+index b5f81a05be36..b0947f072a98 100644
+--- a/security/keys/trusted-keys/trusted_dcp.c
++++ b/security/keys/trusted-keys/trusted_dcp.c
+@@ -222,7 +222,7 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob)
+ return ret;
+ }
+
+- b->payload_len = get_unaligned_le32(&p->key_len);
++ put_unaligned_le32(p->key_len, &b->payload_len);
+ p->blob_len = blen;
+ return 0;
+ }
+--
+2.46.0
+
--- /dev/null
+From 5a44bb061d04b0306f2aa8add761d86d152b9377 Mon Sep 17 00:00:00 2001
+From: Michael Mueller <mimu@linux.ibm.com>
+Date: Thu, 1 Aug 2024 14:31:09 +0200
+Subject: KVM: s390: fix validity interception issue when gisa is switched off
+
+From: Michael Mueller <mimu@linux.ibm.com>
+
+commit 5a44bb061d04b0306f2aa8add761d86d152b9377 upstream.
+
+We might run into a SIE validity if gisa has been disabled either via using
+kernel parameter "kvm.use_gisa=0" or by setting the related sysfs
+attribute to N (echo N >/sys/module/kvm/parameters/use_gisa).
+
+The validity is caused by an invalid value in the SIE control block's
+gisa designation. That happens because we pass the uninitialized gisa
+origin to virt_to_phys() before writing it to the gisa designation.
+
+To fix this we return 0 in kvm_s390_get_gisa_desc() if the origin is 0.
+kvm_s390_get_gisa_desc() is used to determine which gisa designation to
+set in the SIE control block. A value of 0 in the gisa designation disables
+gisa usage.
+
+The issue surfaces in the host kernel with the following kernel message as
+soon a new kvm guest start is attemted.
+
+kvm: unhandled validity intercept 0x1011
+WARNING: CPU: 0 PID: 781237 at arch/s390/kvm/intercept.c:101 kvm_handle_sie_intercept+0x42e/0x4d0 [kvm]
+Modules linked in: vhost_net tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT xt_tcpudp nft_compat x_tables nf_nat_tftp nf_conntrack_tftp vfio_pci_core irqbypass vhost_vsock vmw_vsock_virtio_transport_common vsock vhost vhost_iotlb kvm nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables sunrpc mlx5_ib ib_uverbs ib_core mlx5_core uvdevice s390_trng eadm_sch vfio_ccw zcrypt_cex4 mdev vfio_iommu_type1 vfio sch_fq_codel drm i2c_core loop drm_panel_orientation_quirks configfs nfnetlink lcs ctcm fsm dm_service_time ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common dm_mirror dm_region_hash dm_log zfcp scsi_transport_fc scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkey zcrypt dm_multipath rng_core autofs4 [last unloaded: vfio_pci]
+CPU: 0 PID: 781237 Comm: CPU 0/KVM Not tainted 6.10.0-08682-gcad9f11498ea #6
+Hardware name: IBM 3931 A01 701 (LPAR)
+Krnl PSW : 0704c00180000000 000003d93deb0122 (kvm_handle_sie_intercept+0x432/0x4d0 [kvm])
+ R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
+Krnl GPRS: 000003d900000027 000003d900000023 0000000000000028 000002cd00000000
+ 000002d063a00900 00000359c6daf708 00000000000bebb5 0000000000001eff
+ 000002cfd82e9000 000002cfd80bc000 0000000000001011 000003d93deda412
+ 000003ff8962df98 000003d93de77ce0 000003d93deb011e 00000359c6daf960
+Krnl Code: 000003d93deb0112: c020fffe7259 larl %r2,000003d93de7e5c4
+ 000003d93deb0118: c0e53fa8beac brasl %r14,000003d9bd3c7e70
+ #000003d93deb011e: af000000 mc 0,0
+ >000003d93deb0122: a728ffea lhi %r2,-22
+ 000003d93deb0126: a7f4fe24 brc 15,000003d93deafd6e
+ 000003d93deb012a: 9101f0b0 tm 176(%r15),1
+ 000003d93deb012e: a774fe48 brc 7,000003d93deafdbe
+ 000003d93deb0132: 40a0f0ae sth %r10,174(%r15)
+Call Trace:
+ [<000003d93deb0122>] kvm_handle_sie_intercept+0x432/0x4d0 [kvm]
+([<000003d93deb011e>] kvm_handle_sie_intercept+0x42e/0x4d0 [kvm])
+ [<000003d93deacc10>] vcpu_post_run+0x1d0/0x3b0 [kvm]
+ [<000003d93deaceda>] __vcpu_run+0xea/0x2d0 [kvm]
+ [<000003d93dead9da>] kvm_arch_vcpu_ioctl_run+0x16a/0x430 [kvm]
+ [<000003d93de93ee0>] kvm_vcpu_ioctl+0x190/0x7c0 [kvm]
+ [<000003d9bd728b4e>] vfs_ioctl+0x2e/0x70
+ [<000003d9bd72a092>] __s390x_sys_ioctl+0xc2/0xd0
+ [<000003d9be0e9222>] __do_syscall+0x1f2/0x2e0
+ [<000003d9be0f9a90>] system_call+0x70/0x98
+Last Breaking-Event-Address:
+ [<000003d9bd3c7f58>] __warn_printk+0xe8/0xf0
+
+Cc: stable@vger.kernel.org
+Reported-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Fixes: fe0ef0030463 ("KVM: s390: sort out physical vs virtual pointers usage")
+Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
+Tested-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240801123109.2782155-1-mimu@linux.ibm.com
+Message-ID: <20240801123109.2782155-1-mimu@linux.ibm.com>
+Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kvm/kvm-s390.h | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -267,7 +267,12 @@ static inline unsigned long kvm_s390_get
+
+ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
+ {
+- u32 gd = virt_to_phys(kvm->arch.gisa_int.origin);
++ u32 gd;
++
++ if (!kvm->arch.gisa_int.origin)
++ return 0;
++
++ gd = virt_to_phys(kvm->arch.gisa_int.origin);
+
+ if (gd && sclp.has_gisaf)
+ gd |= GISA_FORMAT1;
--- /dev/null
+From c916ca35308d3187c9928664f9be249b22a3a701 Mon Sep 17 00:00:00 2001
+From: Yu Kuai <yukuai3@huawei.com>
+Date: Sat, 3 Aug 2024 17:11:37 +0800
+Subject: md/raid1: Fix data corruption for degraded array with slow disk
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+commit c916ca35308d3187c9928664f9be249b22a3a701 upstream.
+
+read_balance() will avoid reading from slow disks as much as possible,
+however, if valid data only lands in slow disks, and a new normal disk
+is still in recovery, unrecovered data can be read:
+
+raid1_read_request
+ read_balance
+ raid1_should_read_first
+ -> return false
+ choose_best_rdev
+ -> normal disk is not recovered, return -1
+ choose_bb_rdev
+ -> missing the checking of recovery, return the normal disk
+ -> read unrecovered data
+
+Root cause is that the checking of recovery is missing in
+choose_bb_rdev(). Hence add such checking to fix the problem.
+
+Also fix similar problem in choose_slow_rdev().
+
+Cc: stable@vger.kernel.org
+Fixes: 9f3ced792203 ("md/raid1: factor out choose_bb_rdev() from read_balance()")
+Fixes: dfa8ecd167c1 ("md/raid1: factor out choose_slow_rdev() from read_balance()")
+Reported-and-tested-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Closes: https://lore.kernel.org/all/9952f532-2554-44bf-b906-4880b2e88e3a@o2.pl/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Link: https://lore.kernel.org/r/20240803091137.3197008-1-yukuai1@huaweicloud.com
+Signed-off-by: Song Liu <song@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid1.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
+index 7acfe7c9dc8d..761989d67906 100644
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+ return -1;
+ }
+
++static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio)
++{
++ return !test_bit(In_sync, &rdev->flags) &&
++ rdev->recovery_offset < r1_bio->sector + r1_bio->sectors;
++}
++
+ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+ int *max_sectors)
+ {
+@@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+
+ rdev = conf->mirrors[disk].rdev;
+ if (!rdev || test_bit(Faulty, &rdev->flags) ||
++ rdev_in_recovery(rdev, r1_bio) ||
+ test_bit(WriteMostly, &rdev->flags))
+ continue;
+
+@@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+
+ rdev = conf->mirrors[disk].rdev;
+ if (!rdev || test_bit(Faulty, &rdev->flags) ||
+- !test_bit(WriteMostly, &rdev->flags))
++ !test_bit(WriteMostly, &rdev->flags) ||
++ rdev_in_recovery(rdev, r1_bio))
+ continue;
+
+ /* there are no bad blocks, we can use this disk */
+@@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio)
+ if (!rdev || test_bit(Faulty, &rdev->flags))
+ return false;
+
+- /* still in recovery */
+- if (!test_bit(In_sync, &rdev->flags) &&
+- rdev->recovery_offset < r1_bio->sector + r1_bio->sectors)
++ if (rdev_in_recovery(rdev, r1_bio))
+ return false;
+
+ /* don't read from slow disk unless have to */
+--
+2.46.0
+
--- /dev/null
+From 63de936b513f7a9ce559194d3269ac291f4f4662 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Sun, 21 Jul 2024 17:38:40 +0200
+Subject: media: atomisp: Fix streaming no longer working on BYT / ISP2400 devices
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 63de936b513f7a9ce559194d3269ac291f4f4662 upstream.
+
+Commit a0821ca14bb8 ("media: atomisp: Remove test pattern generator (TPG)
+support") broke BYT support because it removed a seemingly unused field
+from struct sh_css_sp_config and a seemingly unused value from enum
+ia_css_input_mode.
+
+But these are part of the ABI between the kernel and firmware on ISP2400
+and this part of the TPG support removal changes broke ISP2400 support.
+
+ISP2401 support was not affected because on ISP2401 only a part of
+struct sh_css_sp_config is used.
+
+Restore the removed field and enum value to fix this.
+
+Fixes: a0821ca14bb8 ("media: atomisp: Remove test pattern generator (TPG) support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/staging/media/atomisp/pci/ia_css_stream_public.h | 8 ++++--
+ drivers/staging/media/atomisp/pci/sh_css_internal.h | 19 ++++++++++++---
+ 2 files changed, 22 insertions(+), 5 deletions(-)
+
+--- a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h
++++ b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h
+@@ -27,12 +27,16 @@
+ #include "ia_css_prbs.h"
+ #include "ia_css_input_port.h"
+
+-/* Input modes, these enumerate all supported input modes.
+- * Note that not all ISP modes support all input modes.
++/*
++ * Input modes, these enumerate all supported input modes.
++ * This enum is part of the atomisp firmware ABI and must
++ * NOT be changed!
++ * Note that not all ISP modes support all input modes.
+ */
+ enum ia_css_input_mode {
+ IA_CSS_INPUT_MODE_SENSOR, /** data from sensor */
+ IA_CSS_INPUT_MODE_FIFO, /** data from input-fifo */
++ IA_CSS_INPUT_MODE_TPG, /** data from test-pattern generator */
+ IA_CSS_INPUT_MODE_PRBS, /** data from pseudo-random bit stream */
+ IA_CSS_INPUT_MODE_MEMORY, /** data from a frame in memory */
+ IA_CSS_INPUT_MODE_BUFFERED_SENSOR /** data is sent through mipi buffer */
+--- a/drivers/staging/media/atomisp/pci/sh_css_internal.h
++++ b/drivers/staging/media/atomisp/pci/sh_css_internal.h
+@@ -341,7 +341,14 @@ struct sh_css_sp_input_formatter_set {
+
+ #define IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT (3)
+
+-/* SP configuration information */
++/*
++ * SP configuration information
++ *
++ * This struct is part of the atomisp firmware ABI and is directly copied
++ * to ISP DRAM by sh_css_store_sp_group_to_ddr()
++ *
++ * Do NOT change this struct's layout or remove seemingly unused fields!
++ */
+ struct sh_css_sp_config {
+ u8 no_isp_sync; /* Signal host immediately after start */
+ u8 enable_raw_pool_locking; /** Enable Raw Buffer Locking for HALv3 Support */
+@@ -351,6 +358,10 @@ struct sh_css_sp_config {
+ host (true) or when they are passed to the preview/video pipe
+ (false). */
+
++ /*
++ * Note the fields below are only used on the ISP2400 not on the ISP2401,
++ * sh_css_store_sp_group_to_ddr() skip copying these when run on the ISP2401.
++ */
+ struct {
+ u8 a_changed;
+ u8 b_changed;
+@@ -360,11 +371,13 @@ struct sh_css_sp_config {
+ } input_formatter;
+
+ sync_generator_cfg_t sync_gen;
++ tpg_cfg_t tpg;
+ prbs_cfg_t prbs;
+ input_system_cfg_t input_circuit;
+ u8 input_circuit_cfg_changed;
+- u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT];
+- u8 enable_isys_event_queue;
++ u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT];
++ /* These last 2 fields are used on both the ISP2400 and the ISP2401 */
++ u8 enable_isys_event_queue;
+ u8 disable_cont_vf;
+ };
+
--- /dev/null
+From 046667c4d3196938e992fba0dfcde570aa85cd0e Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sun, 21 Jul 2024 14:45:08 -0400
+Subject: memcg_write_event_control(): fix a user-triggerable oops
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 046667c4d3196938e992fba0dfcde570aa85cd0e upstream.
+
+we are *not* guaranteed that anything past the terminating NUL
+is mapped (let alone initialized with anything sane).
+
+Fixes: 0dea116876ee ("cgroup: implement eventfd-based generic API for notifications")
+Cc: stable@vger.kernel.org
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -5282,9 +5282,12 @@ static ssize_t memcg_write_event_control
+ buf = endp + 1;
+
+ cfd = simple_strtoul(buf, &endp, 10);
+- if ((*endp != ' ') && (*endp != '\0'))
++ if (*endp == '\0')
++ buf = endp;
++ else if (*endp == ' ')
++ buf = endp + 1;
++ else
+ return -EINVAL;
+- buf = endp + 1;
+
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
--- /dev/null
+From 807174a93d24c456503692dc3f5af322ee0b640a Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Fri, 9 Aug 2024 14:48:47 +0300
+Subject: mm: fix endless reclaim on machines with unaccepted memory
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 807174a93d24c456503692dc3f5af322ee0b640a upstream.
+
+Unaccepted memory is considered unusable free memory, which is not counted
+as free on the zone watermark check. This causes get_page_from_freelist()
+to accept more memory to hit the high watermark, but it creates problems
+in the reclaim path.
+
+The reclaim path encounters a failed zone watermark check and attempts to
+reclaim memory. This is usually successful, but if there is little or no
+reclaimable memory, it can result in endless reclaim with little to no
+progress. This can occur early in the boot process, just after start of
+the init process when the only reclaimable memory is the page cache of the
+init executable and its libraries.
+
+Make unaccepted memory free from watermark check point of view. This way
+unaccepted memory will never be the trigger of memory reclaim. Accept
+more memory in the get_page_from_freelist() if needed.
+
+Link: https://lkml.kernel.org/r/20240809114854.3745464-2-kirill.shutemov@linux.intel.com
+Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory")
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reported-by: Jianxiong Gao <jxgao@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org> [6.5+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c | 42 ++++++++++++++++++++----------------------
+ 1 file changed, 20 insertions(+), 22 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);
+
+ static bool page_contains_unaccepted(struct page *page, unsigned int order);
+ static void accept_page(struct page *page, unsigned int order);
+-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
++static bool cond_accept_memory(struct zone *zone, unsigned int order);
+ static inline bool has_unaccepted_memory(void);
+ static bool __free_unaccepted(struct page *page);
+
+@@ -3059,9 +3059,6 @@ static inline long __zone_watermark_unus
+ if (!(alloc_flags & ALLOC_CMA))
+ unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
+ #endif
+-#ifdef CONFIG_UNACCEPTED_MEMORY
+- unusable_free += zone_page_state(z, NR_UNACCEPTED);
+-#endif
+
+ return unusable_free;
+ }
+@@ -3355,6 +3352,8 @@ retry:
+ }
+ }
+
++ cond_accept_memory(zone, order);
++
+ /*
+ * Detect whether the number of free pages is below high
+ * watermark. If so, we will decrease pcp->high and free
+@@ -3380,10 +3379,8 @@ check_alloc_wmark:
+ gfp_mask)) {
+ int ret;
+
+- if (has_unaccepted_memory()) {
+- if (try_to_accept_memory(zone, order))
+- goto try_this_zone;
+- }
++ if (cond_accept_memory(zone, order))
++ goto try_this_zone;
+
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+ /*
+@@ -3437,10 +3434,8 @@ try_this_zone:
+
+ return page;
+ } else {
+- if (has_unaccepted_memory()) {
+- if (try_to_accept_memory(zone, order))
+- goto try_this_zone;
+- }
++ if (cond_accept_memory(zone, order))
++ goto try_this_zone;
+
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+ /* Try again if zone has deferred pages */
+@@ -6933,9 +6928,6 @@ static bool try_to_accept_memory_one(str
+ struct page *page;
+ bool last;
+
+- if (list_empty(&zone->unaccepted_pages))
+- return false;
+-
+ spin_lock_irqsave(&zone->lock, flags);
+ page = list_first_entry_or_null(&zone->unaccepted_pages,
+ struct page, lru);
+@@ -6961,23 +6953,29 @@ static bool try_to_accept_memory_one(str
+ return true;
+ }
+
+-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
++static bool cond_accept_memory(struct zone *zone, unsigned int order)
+ {
+ long to_accept;
+- int ret = false;
++ bool ret = false;
++
++ if (!has_unaccepted_memory())
++ return false;
++
++ if (list_empty(&zone->unaccepted_pages))
++ return false;
+
+ /* How much to accept to get to high watermark? */
+ to_accept = high_wmark_pages(zone) -
+ (zone_page_state(zone, NR_FREE_PAGES) -
+- __zone_watermark_unusable_free(zone, order, 0));
++ __zone_watermark_unusable_free(zone, order, 0) -
++ zone_page_state(zone, NR_UNACCEPTED));
+
+- /* Accept at least one page */
+- do {
++ while (to_accept > 0) {
+ if (!try_to_accept_memory_one(zone))
+ break;
+ ret = true;
+ to_accept -= MAX_ORDER_NR_PAGES;
+- } while (to_accept > 0);
++ }
+
+ return ret;
+ }
+@@ -7020,7 +7018,7 @@ static void accept_page(struct page *pag
+ {
+ }
+
+-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
++static bool cond_accept_memory(struct zone *zone, unsigned int order)
+ {
+ return false;
+ }
--- /dev/null
+From 5f75cfbd6bb02295ddaed48adf667b6c828ce07b Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Thu, 1 Aug 2024 22:47:48 +0200
+Subject: mm/hugetlb: fix hugetlb vs. core-mm PT locking
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 5f75cfbd6bb02295ddaed48adf667b6c828ce07b upstream.
+
+We recently made GUP's common page table walking code to also walk hugetlb
+VMAs without most hugetlb special-casing, preparing for the future of
+having less hugetlb-specific page table walking code in the codebase.
+Turns out that we missed one page table locking detail: page table locking
+for hugetlb folios that are not mapped using a single PMD/PUD.
+
+Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB
+hugetlb folios on arm64 with 4 KiB base page size). GUP, as it walks the
+page tables, will perform a pte_offset_map_lock() to grab the PTE table
+lock.
+
+However, hugetlb that concurrently modifies these page tables would
+actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the
+locks would differ. Something similar can happen right now with hugetlb
+folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS.
+
+This issue can be reproduced [1], for example triggering:
+
+[ 3105.936100] ------------[ cut here ]------------
+[ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188
+[ 3105.944634] Modules linked in: [...]
+[ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1
+[ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024
+[ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[ 3105.991108] pc : try_grab_folio+0x11c/0x188
+[ 3105.994013] lr : follow_page_pte+0xd8/0x430
+[ 3105.996986] sp : ffff80008eafb8f0
+[ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43
+[ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48
+[ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978
+[ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001
+[ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000
+[ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000
+[ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0
+[ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080
+[ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000
+[ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000
+[ 3106.047957] Call trace:
+[ 3106.049522] try_grab_folio+0x11c/0x188
+[ 3106.051996] follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0
+[ 3106.055527] follow_page_mask+0x1a0/0x2b8
+[ 3106.058118] __get_user_pages+0xf0/0x348
+[ 3106.060647] faultin_page_range+0xb0/0x360
+[ 3106.063651] do_madvise+0x340/0x598
+
+Let's make huge_pte_lockptr() effectively use the same PT locks as any
+core-mm page table walker would. Add ptep_lockptr() to obtain the PTE
+page table lock using a pte pointer -- unfortunately we cannot convert
+pte_lockptr() because virt_to_page() doesn't work with kmap'ed page tables
+we can have with CONFIG_HIGHPTE.
+
+Handle CONFIG_PGTABLE_LEVELS correctly by checking in reverse order, such
+that when e.g., CONFIG_PGTABLE_LEVELS==2 with
+PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE will work as expected. Document
+why that works.
+
+There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb
+folio being mapped using two PTE page tables. While hugetlb wants to take
+the PMD table lock, core-mm would grab the PTE table lock of one of both
+PTE page tables. In such corner cases, we have to make sure that both
+locks match, which is (fortunately!) currently guaranteed for 8xx as it
+does not support SMP and consequently doesn't use split PT locks.
+
+[1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/
+
+Link: https://lkml.kernel.org/r/20240801204748.99107-1-david@redhat.com
+Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h | 33 ++++++++++++++++++++++++++++++---
+ include/linux/mm.h | 11 +++++++++++
+ 2 files changed, 41 insertions(+), 3 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -967,10 +967,37 @@ static inline bool htlb_allow_alloc_fall
+ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
+ struct mm_struct *mm, pte_t *pte)
+ {
+- if (huge_page_size(h) == PMD_SIZE)
++ const unsigned long size = huge_page_size(h);
++
++ VM_WARN_ON(size == PAGE_SIZE);
++
++ /*
++ * hugetlb must use the exact same PT locks as core-mm page table
++ * walkers would. When modifying a PTE table, hugetlb must take the
++ * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
++ * PT lock etc.
++ *
++ * The expectation is that any hugetlb folio smaller than a PMD is
++ * always mapped into a single PTE table and that any hugetlb folio
++ * smaller than a PUD (but at least as big as a PMD) is always mapped
++ * into a single PMD table.
++ *
++ * If that does not hold for an architecture, then that architecture
++ * must disable split PT locks such that all *_lockptr() functions
++ * will give us the same result: the per-MM PT lock.
++ *
++ * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
++ * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
++ * and core-mm would use pmd_lockptr(). However, in such configurations
++ * split PMD locks are disabled -- they don't make sense on a single
++ * PGDIR page table -- and the end result is the same.
++ */
++ if (size >= PUD_SIZE)
++ return pud_lockptr(mm, (pud_t *) pte);
++ else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
+ return pmd_lockptr(mm, (pmd_t *) pte);
+- VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
+- return &mm->page_table_lock;
++ /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
++ return ptep_lockptr(mm, pte);
+ }
+
+ #ifndef hugepages_supported
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2960,6 +2960,13 @@ static inline spinlock_t *pte_lockptr(st
+ return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
+ }
+
++static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
++{
++ BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
++ BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
++ return ptlock_ptr(virt_to_ptdesc(pte));
++}
++
+ static inline bool ptlock_init(struct ptdesc *ptdesc)
+ {
+ /*
+@@ -2984,6 +2991,10 @@ static inline spinlock_t *pte_lockptr(st
+ {
+ return &mm->page_table_lock;
+ }
++static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
++{
++ return &mm->page_table_lock;
++}
+ static inline void ptlock_cache_init(void) {}
+ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
+ static inline void ptlock_free(struct ptdesc *ptdesc) {}
--- /dev/null
+From d75abd0d0bc29e6ebfebbf76d11b4067b35844af Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 6 Aug 2024 12:41:07 -0400
+Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
+
+From: Waiman Long <longman@redhat.com>
+
+commit d75abd0d0bc29e6ebfebbf76d11b4067b35844af upstream.
+
+The memory_failure_cpu structure is a per-cpu structure. Access to its
+content requires the use of get_cpu_var() to lock in the current CPU and
+disable preemption. The use of a regular spinlock_t for locking purpose
+is fine for a non-RT kernel.
+
+Since the integration of RT spinlock support into the v5.15 kernel, a
+spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping
+lock in a preemption disabled context is illegal resulting in the
+following kind of warning.
+
+ [12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
+ [12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0
+ [12135.732252] preempt_count: 1, expected: 0
+ [12135.732255] RCU nest depth: 2, expected: 2
+ :
+ [12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021
+ [12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred
+ [12135.732433] Call Trace:
+ [12135.732436] <TASK>
+ [12135.732450] dump_stack_lvl+0x57/0x81
+ [12135.732461] __might_resched.cold+0xf4/0x12f
+ [12135.732479] rt_spin_lock+0x4c/0x100
+ [12135.732491] memory_failure_queue+0x40/0xe0
+ [12135.732503] ghes_do_memory_failure+0x53/0x390
+ [12135.732516] ghes_do_proc.constprop.0+0x229/0x3e0
+ [12135.732575] ghes_proc+0xf9/0x1a0
+ [12135.732591] ghes_notify_hed+0x6a/0x150
+ [12135.732602] notifier_call_chain+0x43/0xb0
+ [12135.732626] blocking_notifier_call_chain+0x43/0x60
+ [12135.732637] acpi_ev_notify_dispatch+0x47/0x70
+ [12135.732648] acpi_os_execute_deferred+0x13/0x20
+ [12135.732654] process_one_work+0x41f/0x500
+ [12135.732695] worker_thread+0x192/0x360
+ [12135.732715] kthread+0x111/0x140
+ [12135.732733] ret_from_fork+0x29/0x50
+ [12135.732779] </TASK>
+
+Fix it by using a raw_spinlock_t for locking instead.
+
+Also move the pr_err() out of the lock critical section and after
+put_cpu_ptr() to avoid indeterminate latency and the possibility of sleep
+with this call.
+
+[longman@redhat.com: don't hold percpu ref across pr_err(), per Miaohe]
+ Link: https://lkml.kernel.org/r/20240807181130.1122660-1-longman@redhat.com
+Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com
+Fixes: 0f383b6dc96e ("locking/spinlock: Provide RT variant")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Acked-by: Miaohe Lin <linmiaohe@huawei.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Len Brown <len.brown@intel.com>
+Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c | 20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -2406,7 +2406,7 @@ struct memory_failure_entry {
+ struct memory_failure_cpu {
+ DECLARE_KFIFO(fifo, struct memory_failure_entry,
+ MEMORY_FAILURE_FIFO_SIZE);
+- spinlock_t lock;
++ raw_spinlock_t lock;
+ struct work_struct work;
+ };
+
+@@ -2432,20 +2432,22 @@ void memory_failure_queue(unsigned long
+ {
+ struct memory_failure_cpu *mf_cpu;
+ unsigned long proc_flags;
++ bool buffer_overflow;
+ struct memory_failure_entry entry = {
+ .pfn = pfn,
+ .flags = flags,
+ };
+
+ mf_cpu = &get_cpu_var(memory_failure_cpu);
+- spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+- if (kfifo_put(&mf_cpu->fifo, entry))
++ raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
++ buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
++ if (!buffer_overflow)
+ schedule_work_on(smp_processor_id(), &mf_cpu->work);
+- else
++ raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
++ put_cpu_var(memory_failure_cpu);
++ if (buffer_overflow)
+ pr_err("buffer overflow when queuing memory failure at %#lx\n",
+ pfn);
+- spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+- put_cpu_var(memory_failure_cpu);
+ }
+ EXPORT_SYMBOL_GPL(memory_failure_queue);
+
+@@ -2458,9 +2460,9 @@ static void memory_failure_work_func(str
+
+ mf_cpu = container_of(work, struct memory_failure_cpu, work);
+ for (;;) {
+- spin_lock_irqsave(&mf_cpu->lock, proc_flags);
++ raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+ gotten = kfifo_get(&mf_cpu->fifo, &entry);
+- spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
++ raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+ if (!gotten)
+ break;
+ if (entry.flags & MF_SOFT_OFFLINE)
+@@ -2490,7 +2492,7 @@ static int __init memory_failure_init(vo
+
+ for_each_possible_cpu(cpu) {
+ mf_cpu = &per_cpu(memory_failure_cpu, cpu);
+- spin_lock_init(&mf_cpu->lock);
++ raw_spin_lock_init(&mf_cpu->lock);
+ INIT_KFIFO(mf_cpu->fifo);
+ INIT_WORK(&mf_cpu->work, memory_failure_work_func);
+ }
--- /dev/null
+From fd8c35a92910f4829b7c99841f39b1b952c259d5 Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 9 Aug 2024 10:59:05 -0400
+Subject: mm/numa: no task_numa_fault() call if PMD is changed
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit fd8c35a92910f4829b7c99841f39b1b952c259d5 upstream.
+
+When handling a numa page fault, task_numa_fault() should be called by a
+process that restores the page table of the faulted folio to avoid
+duplicated stats counting. Commit c5b5a3dd2c1f ("mm: thp: refactor NUMA
+fault handling") restructured do_huge_pmd_numa_page() and did not avoid
+task_numa_fault() call in the second page table check after a numa
+migration failure. Fix it by making all !pmd_same() return immediately.
+
+This issue can cause task_numa_fault() being called more than necessary
+and lead to unexpected numa balancing results (It is hard to tell whether
+the issue will cause positive or negative performance impact due to
+duplicated numa fault counting).
+
+Link: https://lkml.kernel.org/r/20240809145906.1513458-3-ziy@nvidia.com
+Fixes: c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling")
+Reported-by: "Huang, Ying" <ying.huang@intel.com>
+Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c | 29 +++++++++++++----------------
+ 1 file changed, 13 insertions(+), 16 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1672,7 +1672,7 @@ vm_fault_t do_huge_pmd_numa_page(struct
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
+ spin_unlock(vmf->ptl);
+- goto out;
++ return 0;
+ }
+
+ pmd = pmd_modify(oldpmd, vma->vm_page_prot);
+@@ -1715,22 +1715,16 @@ vm_fault_t do_huge_pmd_numa_page(struct
+ if (!migrate_misplaced_folio(folio, vma, target_nid)) {
+ flags |= TNF_MIGRATED;
+ nid = target_nid;
+- } else {
+- flags |= TNF_MIGRATE_FAIL;
+- vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+- if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
+- spin_unlock(vmf->ptl);
+- goto out;
+- }
+- goto out_map;
+- }
+-
+-out:
+- if (nid != NUMA_NO_NODE)
+ task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
++ return 0;
++ }
+
+- return 0;
+-
++ flags |= TNF_MIGRATE_FAIL;
++ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
++ if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
++ spin_unlock(vmf->ptl);
++ return 0;
++ }
+ out_map:
+ /* Restore the PMD */
+ pmd = pmd_modify(oldpmd, vma->vm_page_prot);
+@@ -1740,7 +1734,10 @@ out_map:
+ set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
+ update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
+ spin_unlock(vmf->ptl);
+- goto out;
++
++ if (nid != NUMA_NO_NODE)
++ task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
++ return 0;
+ }
+
+ /*
--- /dev/null
+From 40b760cfd44566bca791c80e0720d70d75382b84 Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 9 Aug 2024 10:59:04 -0400
+Subject: mm/numa: no task_numa_fault() call if PTE is changed
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit 40b760cfd44566bca791c80e0720d70d75382b84 upstream.
+
+When handling a numa page fault, task_numa_fault() should be called by a
+process that restores the page table of the faulted folio to avoid
+duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce
+TLB flush via delaying mapping on hint page fault") restructured
+do_numa_page() and did not avoid task_numa_fault() call in the second page
+table check after a numa migration failure. Fix it by making all
+!pte_same() return immediately.
+
+This issue can cause task_numa_fault() being called more than necessary
+and lead to unexpected numa balancing results (It is hard to tell whether
+the issue will cause positive or negative performance impact due to
+duplicated numa fault counting).
+
+Link: https://lkml.kernel.org/r/20240809145906.1513458-2-ziy@nvidia.com
+Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault")
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Reported-by: "Huang, Ying" <ying.huang@intel.com>
+Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c | 33 ++++++++++++++++-----------------
+ 1 file changed, 16 insertions(+), 17 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5155,7 +5155,7 @@ static vm_fault_t do_numa_page(struct vm
+
+ if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+- goto out;
++ return 0;
+ }
+
+ pte = pte_modify(old_pte, vma->vm_page_prot);
+@@ -5218,23 +5218,19 @@ static vm_fault_t do_numa_page(struct vm
+ if (!migrate_misplaced_folio(folio, vma, target_nid)) {
+ nid = target_nid;
+ flags |= TNF_MIGRATED;
+- } else {
+- flags |= TNF_MIGRATE_FAIL;
+- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+- vmf->address, &vmf->ptl);
+- if (unlikely(!vmf->pte))
+- goto out;
+- if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+- pte_unmap_unlock(vmf->pte, vmf->ptl);
+- goto out;
+- }
+- goto out_map;
++ task_numa_fault(last_cpupid, nid, nr_pages, flags);
++ return 0;
+ }
+
+-out:
+- if (nid != NUMA_NO_NODE)
+- task_numa_fault(last_cpupid, nid, nr_pages, flags);
+- return 0;
++ flags |= TNF_MIGRATE_FAIL;
++ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
++ vmf->address, &vmf->ptl);
++ if (unlikely(!vmf->pte))
++ return 0;
++ if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
++ pte_unmap_unlock(vmf->pte, vmf->ptl);
++ return 0;
++ }
+ out_map:
+ /*
+ * Make it present again, depending on how arch implements
+@@ -5247,7 +5243,10 @@ out_map:
+ numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
+ writable);
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+- goto out;
++
++ if (nid != NUMA_NO_NODE)
++ task_numa_fault(last_cpupid, nid, nr_pages, flags);
++ return 0;
+ }
+
+ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
--- /dev/null
+From 61ebe5a747da649057c37be1c37eb934b4af79ca Mon Sep 17 00:00:00 2001
+From: Hailong Liu <hailong.liu@oppo.com>
+Date: Thu, 8 Aug 2024 20:19:56 +0800
+Subject: mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0
+
+From: Hailong Liu <hailong.liu@oppo.com>
+
+commit 61ebe5a747da649057c37be1c37eb934b4af79ca upstream.
+
+The __vmap_pages_range_noflush() assumes its argument pages** contains
+pages with the same page shift. However, since commit e9c3cda4d86e ("mm,
+vmalloc: fix high order __GFP_NOFAIL allocations"), if gfp_flags includes
+__GFP_NOFAIL with high order in vm_area_alloc_pages() and page allocation
+failed for high order, the pages** may contain two different page shifts
+(high order and order-0). This could lead __vmap_pages_range_noflush() to
+perform incorrect mappings, potentially resulting in memory corruption.
+
+Users might encounter this as follows (vmap_allow_huge = true, 2M is for
+PMD_SIZE):
+
+kvmalloc(2M, __GFP_NOFAIL|GFP_X)
+ __vmalloc_node_range_noprof(vm_flags=VM_ALLOW_HUGE_VMAP)
+ vm_area_alloc_pages(order=9) ---> order-9 allocation failed and fallback to order-0
+ vmap_pages_range()
+ vmap_pages_range_noflush()
+ __vmap_pages_range_noflush(page_shift = 21) ----> wrong mapping happens
+
+We can remove the fallback code because if a high-order allocation fails,
+__vmalloc_node_range_noprof() will retry with order-0. Therefore, it is
+unnecessary to fallback to order-0 here. Therefore, fix this by removing
+the fallback code.
+
+Link: https://lkml.kernel.org/r/20240808122019.3361-1-hailong.liu@oppo.com
+Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations")
+Signed-off-by: Hailong Liu <hailong.liu@oppo.com>
+Reported-by: Tangquan Zheng <zhengtangquan@oppo.com>
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Acked-by: Barry Song <baohua@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c | 11 ++---------
+ 1 file changed, 2 insertions(+), 9 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -3583,15 +3583,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
+ page = alloc_pages_noprof(alloc_gfp, order);
+ else
+ page = alloc_pages_node_noprof(nid, alloc_gfp, order);
+- if (unlikely(!page)) {
+- if (!nofail)
+- break;
+-
+- /* fall back to the zero order allocations */
+- alloc_gfp |= __GFP_NOFAIL;
+- order = 0;
+- continue;
+- }
++ if (unlikely(!page))
++ break;
+
+ /*
+ * Higher order allocations must be able to be treated as
--- /dev/null
+From e46bc2e7eb90a370bc27fa2fd98cb8251e7da1ec Mon Sep 17 00:00:00 2001
+From: Pedro Falcato <pedro.falcato@gmail.com>
+Date: Wed, 7 Aug 2024 18:33:35 +0100
+Subject: mseal: fix is_madv_discard()
+
+From: Pedro Falcato <pedro.falcato@gmail.com>
+
+commit e46bc2e7eb90a370bc27fa2fd98cb8251e7da1ec upstream.
+
+is_madv_discard did its check wrong. MADV_ flags are not bitwise,
+they're normal sequential numbers. So, for instance:
+ behavior & (/* ... */ | MADV_REMOVE)
+
+tagged both MADV_REMOVE and MADV_RANDOM (bit 0 set) as discard
+operations.
+
+As a result the kernel could erroneously block certain madvises (e.g
+MADV_RANDOM or MADV_HUGEPAGE) on sealed VMAs due to them sharing bits
+with blocked MADV operations (e.g REMOVE or WIPEONFORK).
+
+This is obviously incorrect, so use a switch statement instead.
+
+Link: https://lkml.kernel.org/r/20240807173336.2523757-1-pedro.falcato@gmail.com
+Link: https://lkml.kernel.org/r/20240807173336.2523757-2-pedro.falcato@gmail.com
+Fixes: 8be7258aad44 ("mseal: add mseal syscall")
+Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com>
+Tested-by: Jeff Xu <jeffxu@chromium.org>
+Reviewed-by: Jeff Xu <jeffxu@chromium.org>
+Cc: Kees Cook <kees@kernel.org>
+Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mseal.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/mm/mseal.c b/mm/mseal.c
+index bf783bba8ed0..15bba28acc00 100644
+--- a/mm/mseal.c
++++ b/mm/mseal.c
+@@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma)
+
+ static bool is_madv_discard(int behavior)
+ {
+- return behavior &
+- (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED |
+- MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK);
++ switch (behavior) {
++ case MADV_FREE:
++ case MADV_DONTNEED:
++ case MADV_DONTNEED_LOCKED:
++ case MADV_REMOVE:
++ case MADV_DONTFORK:
++ case MADV_WIPEONFORK:
++ return true;
++ }
++
++ return false;
+ }
+
+ static bool is_ro_anon(struct vm_area_struct *vma)
+--
+2.46.0
+
--- /dev/null
+From 58a63729c957621f1990c3494c702711188ca347 Mon Sep 17 00:00:00 2001
+From: Long Li <longli@microsoft.com>
+Date: Fri, 9 Aug 2024 08:58:58 -0700
+Subject: net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings
+
+From: Long Li <longli@microsoft.com>
+
+commit 58a63729c957621f1990c3494c702711188ca347 upstream.
+
+After napi_complete_done() is called when NAPI is polling in the current
+process context, another NAPI may be scheduled and start running in
+softirq on another CPU and may ring the doorbell before the current CPU
+does. When combined with unnecessary rings when there is no need to arm
+the CQ, it triggers error paths in the hardware.
+
+This patch fixes this by calling napi_complete_done() after doorbell
+rings. It limits the number of unnecessary rings when there is
+no need to arm. MANA hardware specifies that there must be one doorbell
+ring every 8 CQ wraparounds. This driver guarantees one doorbell ring as
+soon as the number of consumed CQEs exceeds 4 CQ wraparounds. In practical
+workloads, the 4 CQ wraparounds proves to be big enough that it rarely
+exceeds this limit before all the napi weight is consumed.
+
+To implement this, add a per-CQ counter cq->work_done_since_doorbell,
+and make sure the CQ is armed as soon as passing 4 wraparounds of the CQ.
+
+Cc: stable@vger.kernel.org
+Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Long Li <longli@microsoft.com>
+Link: https://patch.msgid.link/1723219138-29887-1-git-send-email-longli@linuxonhyperv.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/microsoft/mana/mana_en.c | 22 ++++++++++++++--------
+ include/net/mana/mana.h | 1 +
+ 2 files changed, 15 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -1777,7 +1777,6 @@ static void mana_poll_rx_cq(struct mana_
+ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
+ {
+ struct mana_cq *cq = context;
+- u8 arm_bit;
+ int w;
+
+ WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
+@@ -1788,16 +1787,23 @@ static int mana_cq_handler(void *context
+ mana_poll_tx_cq(cq);
+
+ w = cq->work_done;
++ cq->work_done_since_doorbell += w;
+
+- if (w < cq->budget &&
+- napi_complete_done(&cq->napi, w)) {
+- arm_bit = SET_ARM_BIT;
+- } else {
+- arm_bit = 0;
++ if (w < cq->budget) {
++ mana_gd_ring_cq(gdma_queue, SET_ARM_BIT);
++ cq->work_done_since_doorbell = 0;
++ napi_complete_done(&cq->napi, w);
++ } else if (cq->work_done_since_doorbell >
++ cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) {
++ /* MANA hardware requires at least one doorbell ring every 8
++ * wraparounds of CQ even if there is no need to arm the CQ.
++ * This driver rings the doorbell as soon as we have exceeded
++ * 4 wraparounds.
++ */
++ mana_gd_ring_cq(gdma_queue, 0);
++ cq->work_done_since_doorbell = 0;
+ }
+
+- mana_gd_ring_cq(gdma_queue, arm_bit);
+-
+ return w;
+ }
+
+--- a/include/net/mana/mana.h
++++ b/include/net/mana/mana.h
+@@ -274,6 +274,7 @@ struct mana_cq {
+ /* NAPI data */
+ struct napi_struct napi;
+ int work_done;
++ int work_done_since_doorbell;
+ int budget;
+ };
+
--- /dev/null
+From 32316f676b4ee87c0404d333d248ccf777f739bc Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Fri, 9 Aug 2024 14:01:24 -0700
+Subject: net: mana: Fix RX buf alloc_size alignment and atomic op panic
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+commit 32316f676b4ee87c0404d333d248ccf777f739bc upstream.
+
+The MANA driver's RX buffer alloc_size is passed into napi_build_skb() to
+create SKB. skb_shinfo(skb) is located at the end of skb, and its alignment
+is affected by the alloc_size passed into napi_build_skb(). The size needs
+to be aligned properly for better performance and atomic operations.
+Otherwise, on ARM64 CPU, for certain MTU settings like 4000, atomic
+operations may panic on the skb_shinfo(skb)->dataref due to alignment fault.
+
+To fix this bug, add proper alignment to the alloc_size calculation.
+
+Sample panic info:
+[ 253.298819] Unable to handle kernel paging request at virtual address ffff000129ba5cce
+[ 253.300900] Mem abort info:
+[ 253.301760] ESR = 0x0000000096000021
+[ 253.302825] EC = 0x25: DABT (current EL), IL = 32 bits
+[ 253.304268] SET = 0, FnV = 0
+[ 253.305172] EA = 0, S1PTW = 0
+[ 253.306103] FSC = 0x21: alignment fault
+Call trace:
+ __skb_clone+0xfc/0x198
+ skb_clone+0x78/0xe0
+ raw6_local_deliver+0xfc/0x228
+ ip6_protocol_deliver_rcu+0x80/0x500
+ ip6_input_finish+0x48/0x80
+ ip6_input+0x48/0xc0
+ ip6_sublist_rcv_finish+0x50/0x78
+ ip6_sublist_rcv+0x1cc/0x2b8
+ ipv6_list_rcv+0x100/0x150
+ __netif_receive_skb_list_core+0x180/0x220
+ netif_receive_skb_list_internal+0x198/0x2a8
+ __napi_poll+0x138/0x250
+ net_rx_action+0x148/0x330
+ handle_softirqs+0x12c/0x3a0
+
+Cc: stable@vger.kernel.org
+Fixes: 80f6215b450e ("net: mana: Add support for jumbo frame")
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/microsoft/mana/mana_en.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -599,7 +599,11 @@ static void mana_get_rxbuf_cfg(int mtu,
+ else
+ *headroom = XDP_PACKET_HEADROOM;
+
+- *alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
++ *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
++
++ /* Using page pool in this case, so alloc_size is PAGE_SIZE */
++ if (*alloc_size < PAGE_SIZE)
++ *alloc_size = PAGE_SIZE;
+
+ *datasize = mtu + ETH_HLEN;
+ }
--- /dev/null
+From 100bff23818eb61751ed05d64a7df36ce9728a4d Mon Sep 17 00:00:00 2001
+From: Kyle Huey <me@kylehuey.com>
+Date: Tue, 13 Aug 2024 15:17:27 +0000
+Subject: perf/bpf: Don't call bpf_overflow_handler() for tracing events
+
+From: Kyle Huey <me@kylehuey.com>
+
+commit 100bff23818eb61751ed05d64a7df36ce9728a4d upstream.
+
+The regressing commit is new in 6.10. It assumed that anytime event->prog
+is set bpf_overflow_handler() should be invoked to execute the attached bpf
+program. This assumption is false for tracing events, and as a result the
+regressing commit broke bpftrace by invoking the bpf handler with garbage
+inputs on overflow.
+
+Prior to the regression the overflow handlers formed a chain (of length 0,
+1, or 2) and perf_event_set_bpf_handler() (the !tracing case) added
+bpf_overflow_handler() to that chain, while perf_event_attach_bpf_prog()
+(the tracing case) did not. Both set event->prog. The chain of overflow
+handlers was replaced by a single overflow handler slot and a fixed call to
+bpf_overflow_handler() when appropriate. This modifies the condition there
+to check event->prog->type == BPF_PROG_TYPE_PERF_EVENT, restoring the
+previous behavior and fixing bpftrace.
+
+Signed-off-by: Kyle Huey <khuey@kylehuey.com>
+Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
+Reported-by: Joe Damato <jdamato@fastly.com>
+Closes: https://lore.kernel.org/lkml/ZpFfocvyF3KHaSzF@LQ3V64L9R2/
+Fixes: f11f10bfa1ca ("perf/bpf: Call BPF handler directly, not through overflow machinery")
+Cc: stable@vger.kernel.org
+Tested-by: Joe Damato <jdamato@fastly.com> # bpftrace
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20240813151727.28797-1-jdamato@fastly.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/events/core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -9708,7 +9708,8 @@ static int __perf_event_overflow(struct
+
+ ret = __perf_event_account_interrupt(event, throttle);
+
+- if (event->prog && !bpf_overflow_handler(event, data, regs))
++ if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
++ !bpf_overflow_handler(event, data, regs))
+ return ret;
+
+ /*
--- /dev/null
+From 57d76bc51fd80824bcc0c84a5b5ec944f1b51edd Mon Sep 17 00:00:00 2001
+From: Nam Cao <namcao@linutronix.de>
+Date: Wed, 8 May 2024 21:19:17 +0200
+Subject: riscv: change XIP's kernel_map.size to be size of the entire kernel
+
+From: Nam Cao <namcao@linutronix.de>
+
+commit 57d76bc51fd80824bcc0c84a5b5ec944f1b51edd upstream.
+
+With XIP kernel, kernel_map.size is set to be only the size of data part of
+the kernel. This is inconsistent with "normal" kernel, who sets it to be
+the size of the entire kernel.
+
+More importantly, XIP kernel fails to boot if CONFIG_DEBUG_VIRTUAL is
+enabled, because there are checks on virtual addresses with the assumption
+that kernel_map.size is the size of the entire kernel (these checks are in
+arch/riscv/mm/physaddr.c).
+
+Change XIP's kernel_map.size to be the size of the entire kernel.
+
+Signed-off-by: Nam Cao <namcao@linutronix.de>
+Cc: <stable@vger.kernel.org> # v6.1+
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20240508191917.2892064-1-namcao@linutronix.de
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/mm/init.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/riscv/mm/init.c
++++ b/arch/riscv/mm/init.c
+@@ -931,7 +931,7 @@ static void __init create_kernel_page_ta
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ /* Map the data in RAM */
+- end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
++ end_va = kernel_map.virt_addr + kernel_map.size;
+ for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
+ create_pgd_mapping(pgdir, va,
+ kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
+@@ -1100,7 +1100,7 @@ asmlinkage void __init setup_vm(uintptr_
+
+ phys_ram_base = CONFIG_PHYS_RAM_BASE;
+ kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
+- kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
++ kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
+
+ kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
+ #else
--- /dev/null
+From 61119394631f219e23ce98bcc3eb993a64a8ea64 Mon Sep 17 00:00:00 2001
+From: Celeste Liu <coelacanthushex@gmail.com>
+Date: Thu, 27 Jun 2024 22:23:39 +0800
+Subject: riscv: entry: always initialize regs->a0 to -ENOSYS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Celeste Liu <coelacanthushex@gmail.com>
+
+commit 61119394631f219e23ce98bcc3eb993a64a8ea64 upstream.
+
+Otherwise when the tracer changes syscall number to -1, the kernel fails
+to initialize a0 with -ENOSYS and subsequently fails to return the error
+code of the failed syscall to userspace. For example, it will break
+strace syscall tampering.
+
+Fixes: 52449c17bdd1 ("riscv: entry: set a0 = -ENOSYS only when syscall != -1")
+Reported-by: "Dmitry V. Levin" <ldv@strace.io>
+Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Celeste Liu <CoelacanthusHex@gmail.com>
+Link: https://lore.kernel.org/r/20240627142338.5114-2-CoelacanthusHex@gmail.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/kernel/traps.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *reg
+
+ regs->epc += 4;
+ regs->orig_a0 = regs->a0;
++ regs->a0 = -ENOSYS;
+
+ riscv_v_vstate_discard(regs);
+
+@@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *reg
+
+ if (syscall >= 0 && syscall < NR_syscalls)
+ syscall_handler(regs, syscall);
+- else if (syscall != -1)
+- regs->a0 = -ENOSYS;
++
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * so the maximum stack offset is 1k bytes (10 bits).
--- /dev/null
+From 90574d2a675947858b47008df8d07f75ea50d0d0 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Fri, 9 Aug 2024 15:34:30 +0300
+Subject: rtla/osnoise: Prevent NULL dereference in error handling
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+commit 90574d2a675947858b47008df8d07f75ea50d0d0 upstream.
+
+If the "tool->data" allocation fails then there is no need to call
+osnoise_free_top() and, in fact, doing so will lead to a NULL dereference.
+
+Cc: stable@vger.kernel.org
+Cc: John Kacur <jkacur@redhat.com>
+Cc: "Luis Claudio R. Goncalves" <lgoncalv@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Fixes: 1eceb2fc2ca5 ("rtla/osnoise: Add osnoise top mode")
+Link: https://lore.kernel.org/f964ed1f-64d2-4fde-ad3e-708331f8f358@stanley.mountain
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/tracing/rtla/src/osnoise_top.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/tools/tracing/rtla/src/osnoise_top.c
++++ b/tools/tracing/rtla/src/osnoise_top.c
+@@ -640,8 +640,10 @@ struct osnoise_tool *osnoise_init_top(st
+ return NULL;
+
+ tool->data = osnoise_alloc_top(nr_cpus);
+- if (!tool->data)
+- goto out_err;
++ if (!tool->data) {
++ osnoise_destroy_tool(tool);
++ return NULL;
++ }
+
+ tool->params = params;
+
+@@ -649,11 +651,6 @@ struct osnoise_tool *osnoise_init_top(st
+ osnoise_top_handler, NULL);
+
+ return tool;
+-
+-out_err:
+- osnoise_free_top(tool->data);
+- osnoise_destroy_tool(tool);
+- return NULL;
+ }
+
+ static int stop_tracing;
--- /dev/null
+From 7db4042336580dfd75cb5faa82c12cd51098c90b Mon Sep 17 00:00:00 2001
+From: Stefan Haberland <sth@linux.ibm.com>
+Date: Mon, 12 Aug 2024 14:57:33 +0200
+Subject: s390/dasd: fix error recovery leading to data corruption on ESE devices
+
+From: Stefan Haberland <sth@linux.ibm.com>
+
+commit 7db4042336580dfd75cb5faa82c12cd51098c90b upstream.
+
+Extent Space Efficient (ESE) or thin provisioned volumes need to be
+formatted on demand during usual IO processing.
+
+The dasd_ese_needs_format function checks for error codes that signal
+the non existence of a proper track format.
+
+The check for incorrect length is to imprecise since other error cases
+leading to transport of insufficient data also have this flag set.
+This might lead to data corruption in certain error cases for example
+during a storage server warmstart.
+
+Fix by removing the check for incorrect length and replacing by
+explicitly checking for invalid track format in transport mode.
+
+Also remove the check for file protected since this is not a valid
+ESE handling case.
+
+Cc: stable@vger.kernel.org # 5.3+
+Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes")
+Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
+Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240812125733.126431-3-sth@linux.ibm.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/block/dasd.c | 36 +++++++++++++++---------
+ drivers/s390/block/dasd_3990_erp.c | 10 +-----
+ drivers/s390/block/dasd_eckd.c | 55 ++++++++++++++++---------------------
+ drivers/s390/block/dasd_int.h | 2 -
+ 4 files changed, 50 insertions(+), 53 deletions(-)
+
+--- a/drivers/s390/block/dasd.c
++++ b/drivers/s390/block/dasd.c
+@@ -1601,9 +1601,15 @@ static int dasd_ese_needs_format(struct
+ if (!sense)
+ return 0;
+
+- return !!(sense[1] & SNS1_NO_REC_FOUND) ||
+- !!(sense[1] & SNS1_FILE_PROTECTED) ||
+- scsw_cstat(&irb->scsw) == SCHN_STAT_INCORR_LEN;
++ if (sense[1] & SNS1_NO_REC_FOUND)
++ return 1;
++
++ if ((sense[1] & SNS1_INV_TRACK_FORMAT) &&
++ scsw_is_tm(&irb->scsw) &&
++ !(sense[2] & SNS2_ENV_DATA_PRESENT))
++ return 1;
++
++ return 0;
+ }
+
+ static int dasd_ese_oos_cond(u8 *sense)
+@@ -1624,7 +1630,7 @@ void dasd_int_handler(struct ccw_device
+ struct dasd_device *device;
+ unsigned long now;
+ int nrf_suppressed = 0;
+- int fp_suppressed = 0;
++ int it_suppressed = 0;
+ struct request *req;
+ u8 *sense = NULL;
+ int expires;
+@@ -1679,8 +1685,9 @@ void dasd_int_handler(struct ccw_device
+ */
+ sense = dasd_get_sense(irb);
+ if (sense) {
+- fp_suppressed = (sense[1] & SNS1_FILE_PROTECTED) &&
+- test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
++ it_suppressed = (sense[1] & SNS1_INV_TRACK_FORMAT) &&
++ !(sense[2] & SNS2_ENV_DATA_PRESENT) &&
++ test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags);
+ nrf_suppressed = (sense[1] & SNS1_NO_REC_FOUND) &&
+ test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
+
+@@ -1695,7 +1702,7 @@ void dasd_int_handler(struct ccw_device
+ return;
+ }
+ }
+- if (!(fp_suppressed || nrf_suppressed))
++ if (!(it_suppressed || nrf_suppressed))
+ device->discipline->dump_sense_dbf(device, irb, "int");
+
+ if (device->features & DASD_FEATURE_ERPLOG)
+@@ -2459,14 +2466,17 @@ retry:
+ rc = 0;
+ list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) {
+ /*
+- * In some cases the 'File Protected' or 'Incorrect Length'
+- * error might be expected and error recovery would be
+- * unnecessary in these cases. Check if the according suppress
+- * bit is set.
++ * In some cases certain errors might be expected and
++ * error recovery would be unnecessary in these cases.
++ * Check if the according suppress bit is set.
+ */
+ sense = dasd_get_sense(&cqr->irb);
+- if (sense && sense[1] & SNS1_FILE_PROTECTED &&
+- test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags))
++ if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) &&
++ !(sense[2] & SNS2_ENV_DATA_PRESENT) &&
++ test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags))
++ continue;
++ if (sense && (sense[1] & SNS1_NO_REC_FOUND) &&
++ test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags))
+ continue;
+ if (scsw_cstat(&cqr->irb.scsw) == 0x40 &&
+ test_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags))
+--- a/drivers/s390/block/dasd_3990_erp.c
++++ b/drivers/s390/block/dasd_3990_erp.c
+@@ -1386,14 +1386,8 @@ dasd_3990_erp_file_prot(struct dasd_ccw_
+
+ struct dasd_device *device = erp->startdev;
+
+- /*
+- * In some cases the 'File Protected' error might be expected and
+- * log messages shouldn't be written then.
+- * Check if the according suppress bit is set.
+- */
+- if (!test_bit(DASD_CQR_SUPPRESS_FP, &erp->flags))
+- dev_err(&device->cdev->dev,
+- "Accessing the DASD failed because of a hardware error\n");
++ dev_err(&device->cdev->dev,
++ "Accessing the DASD failed because of a hardware error\n");
+
+ return dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
+
+--- a/drivers/s390/block/dasd_eckd.c
++++ b/drivers/s390/block/dasd_eckd.c
+@@ -2274,6 +2274,7 @@ dasd_eckd_analysis_ccw(struct dasd_devic
+ cqr->status = DASD_CQR_FILLED;
+ /* Set flags to suppress output for expected errors */
+ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
++ set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags);
+
+ return cqr;
+ }
+@@ -2555,7 +2556,6 @@ dasd_eckd_build_check_tcw(struct dasd_de
+ cqr->buildclk = get_tod_clock();
+ cqr->status = DASD_CQR_FILLED;
+ /* Set flags to suppress output for expected errors */
+- set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
+ set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags);
+
+ return cqr;
+@@ -4129,8 +4129,6 @@ static struct dasd_ccw_req *dasd_eckd_bu
+
+ /* Set flags to suppress output for expected errors */
+ if (dasd_eckd_is_ese(basedev)) {
+- set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
+- set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags);
+ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
+ }
+
+@@ -4632,9 +4630,8 @@ static struct dasd_ccw_req *dasd_eckd_bu
+
+ /* Set flags to suppress output for expected errors */
+ if (dasd_eckd_is_ese(basedev)) {
+- set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
+- set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags);
+ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
++ set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags);
+ }
+
+ return cqr;
+@@ -5779,36 +5776,32 @@ static void dasd_eckd_dump_sense(struct
+ {
+ u8 *sense = dasd_get_sense(irb);
+
+- if (scsw_is_tm(&irb->scsw)) {
+- /*
+- * In some cases the 'File Protected' or 'Incorrect Length'
+- * error might be expected and log messages shouldn't be written
+- * then. Check if the according suppress bit is set.
+- */
+- if (sense && (sense[1] & SNS1_FILE_PROTECTED) &&
+- test_bit(DASD_CQR_SUPPRESS_FP, &req->flags))
+- return;
+- if (scsw_cstat(&irb->scsw) == 0x40 &&
+- test_bit(DASD_CQR_SUPPRESS_IL, &req->flags))
+- return;
++ /*
++ * In some cases certain errors might be expected and
++ * log messages shouldn't be written then.
++ * Check if the according suppress bit is set.
++ */
++ if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) &&
++ !(sense[2] & SNS2_ENV_DATA_PRESENT) &&
++ test_bit(DASD_CQR_SUPPRESS_IT, &req->flags))
++ return;
+
+- dasd_eckd_dump_sense_tcw(device, req, irb);
+- } else {
+- /*
+- * In some cases the 'Command Reject' or 'No Record Found'
+- * error might be expected and log messages shouldn't be
+- * written then. Check if the according suppress bit is set.
+- */
+- if (sense && sense[0] & SNS0_CMD_REJECT &&
+- test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
+- return;
++ if (sense && sense[0] & SNS0_CMD_REJECT &&
++ test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
++ return;
+
+- if (sense && sense[1] & SNS1_NO_REC_FOUND &&
+- test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
+- return;
++ if (sense && sense[1] & SNS1_NO_REC_FOUND &&
++ test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
++ return;
+
++ if (scsw_cstat(&irb->scsw) == 0x40 &&
++ test_bit(DASD_CQR_SUPPRESS_IL, &req->flags))
++ return;
++
++ if (scsw_is_tm(&irb->scsw))
++ dasd_eckd_dump_sense_tcw(device, req, irb);
++ else
+ dasd_eckd_dump_sense_ccw(device, req, irb);
+- }
+ }
+
+ static int dasd_eckd_reload_device(struct dasd_device *device)
+--- a/drivers/s390/block/dasd_int.h
++++ b/drivers/s390/block/dasd_int.h
+@@ -196,7 +196,7 @@ struct dasd_ccw_req {
+ * The following flags are used to suppress output of certain errors.
+ */
+ #define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */
+-#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/
++#define DASD_CQR_SUPPRESS_IT 5 /* Suppress 'Invalid Track' error*/
+ #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */
+ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */
+
--- /dev/null
+From 7c5e8d212d7d81991a580e7de3904ea213d9a852 Mon Sep 17 00:00:00 2001
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Date: Fri, 9 Aug 2024 12:56:42 +0500
+Subject: selftests: memfd_secret: don't build memfd_secret test on unsupported arches
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+commit 7c5e8d212d7d81991a580e7de3904ea213d9a852 upstream.
+
+[1] mentions that memfd_secret is only supported on arm64, riscv, x86 and
+x86_64 for now. It doesn't support other architectures. I found the
+build error on arm and decided to send the fix as it was creating noise on
+KernelCI:
+
+memfd_secret.c: In function 'memfd_secret':
+memfd_secret.c:42:24: error: '__NR_memfd_secret' undeclared (first use in this function);
+did you mean 'memfd_secret'?
+ 42 | return syscall(__NR_memfd_secret, flags);
+ | ^~~~~~~~~~~~~~~~~
+ | memfd_secret
+
+Hence I'm adding condition that memfd_secret should only be compiled on
+supported architectures.
+
+Also check in run_vmtests script if memfd_secret binary is present before
+executing it.
+
+Link: https://lkml.kernel.org/r/20240812061522.1933054-1-usama.anjum@collabora.com
+Link: https://lore.kernel.org/all/20210518072034.31572-7-rppt@kernel.org/ [1]
+Link: https://lkml.kernel.org/r/20240809075642.403247-1-usama.anjum@collabora.com
+Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)")
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
+Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Albert Ou <aou@eecs.berkeley.edu>
+Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
+Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Palmer Dabbelt <palmer@dabbelt.com>
+Cc: Paul Walmsley <paul.walmsley@sifive.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/Makefile | 2 ++
+ tools/testing/selftests/mm/run_vmtests.sh | 3 +++
+ 2 files changed, 5 insertions(+)
+
+--- a/tools/testing/selftests/mm/Makefile
++++ b/tools/testing/selftests/mm/Makefile
+@@ -51,7 +51,9 @@ TEST_GEN_FILES += madv_populate
+ TEST_GEN_FILES += map_fixed_noreplace
+ TEST_GEN_FILES += map_hugetlb
+ TEST_GEN_FILES += map_populate
++ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
+ TEST_GEN_FILES += memfd_secret
++endif
+ TEST_GEN_FILES += migration
+ TEST_GEN_FILES += mkdirty
+ TEST_GEN_FILES += mlock-random-test
+--- a/tools/testing/selftests/mm/run_vmtests.sh
++++ b/tools/testing/selftests/mm/run_vmtests.sh
+@@ -367,8 +367,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.
+ # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+ CATEGORY="madv_populate" run_test ./madv_populate
+
++if [ -x ./memfd_secret ]
++then
+ (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
+ CATEGORY="memfd_secret" run_test ./memfd_secret
++fi
+
+ # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
+ CATEGORY="ksm" run_test ./ksm_tests -H -s 100
--- /dev/null
+From 6dd1e4c045afa6a4ba5d46f044c83bd357c593c2 Mon Sep 17 00:00:00 2001
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Wed, 7 Aug 2024 17:00:56 +0800
+Subject: selinux: add the processing of the failure of avc_add_xperms_decision()
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+commit 6dd1e4c045afa6a4ba5d46f044c83bd357c593c2 upstream.
+
+When avc_add_xperms_decision() fails, the information recorded by the new
+avc node is incomplete. In this case, the new avc node should be released
+instead of replacing the old avc node.
+
+Cc: stable@vger.kernel.org
+Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls")
+Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/selinux/avc.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/security/selinux/avc.c
++++ b/security/selinux/avc.c
+@@ -907,7 +907,11 @@ static int avc_update_node(u32 event, u3
+ node->ae.avd.auditdeny &= ~perms;
+ break;
+ case AVC_CALLBACK_ADD_XPERMS:
+- avc_add_xperms_decision(node, xpd);
++ rc = avc_add_xperms_decision(node, xpd);
++ if (rc) {
++ avc_node_kill(node);
++ goto out_unlock;
++ }
+ break;
+ }
+ avc_node_replace(node, orig);
--- /dev/null
+From 379d9af3f3da2da1bbfa67baf1820c72a080d1f1 Mon Sep 17 00:00:00 2001
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Tue, 6 Aug 2024 14:51:13 +0800
+Subject: selinux: fix potential counting error in avc_add_xperms_decision()
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+commit 379d9af3f3da2da1bbfa67baf1820c72a080d1f1 upstream.
+
+The count increases only when a node is successfully added to
+the linked list.
+
+Cc: stable@vger.kernel.org
+Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls")
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/selinux/avc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/selinux/avc.c
++++ b/security/selinux/avc.c
+@@ -330,12 +330,12 @@ static int avc_add_xperms_decision(struc
+ {
+ struct avc_xperms_decision_node *dest_xpd;
+
+- node->ae.xp_node->xp.len++;
+ dest_xpd = avc_xperms_decision_alloc(src->used);
+ if (!dest_xpd)
+ return -ENOMEM;
+ avc_copy_xperms_decision(&dest_xpd->xpd, src);
+ list_add(&dest_xpd->xpd_list, &node->ae.xp_node->xpd_head);
++ node->ae.xp_node->xp.len++;
+ return 0;
+ }
+
selinux-revert-our-use-of-vma_is_initial_heap.patch
netfs-ceph-revert-netfs-remove-deprecated-use-of-pg_private_2-as-a-second-writeback-flag.patch
fuse-initialize-beyond-eof-page-contents-before-setting-uptodate.patch
+char-xillybus-don-t-destroy-workqueue-from-work-item-running-on-it.patch
+char-xillybus-refine-workqueue-handling.patch
+char-xillybus-check-usb-endpoints-when-probing-device.patch
+alsa-usb-audio-add-delay-quirk-for-vivo-usb-c-xe710-headset.patch
+alsa-usb-audio-support-yamaha-p-125-quirk-entry.patch
+usb-misc-ljca-add-lunar-lake-ljca-gpio-hid-to-ljca_gpio_hids.patch
+usb-xhci-check-for-xhci-interrupters-being-allocated-in-xhci_mem_clearup.patch
+xhci-fix-panther-point-null-pointer-deref-at-full-speed-re-enumeration.patch
+thunderbolt-mark-xdomain-as-unplugged-when-router-is-removed.patch
+alsa-hda-tas2781-fix-wrong-calibrated-data-order.patch
+alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch
+s390-dasd-fix-error-recovery-leading-to-data-corruption-on-ese-devices.patch
+kvm-s390-fix-validity-interception-issue-when-gisa-is-switched-off.patch
+thermal-gov_bang_bang-call-__thermal_cdev_update-directly.patch
+keys-trusted-fix-dcp-blob-payload-length-assignment.patch
+keys-trusted-dcp-fix-leak-of-blob-encryption-key.patch
+riscv-change-xip-s-kernel_map.size-to-be-size-of-the-entire-kernel.patch
+riscv-entry-always-initialize-regs-a0-to-enosys.patch
+smb3-fix-lock-breakage-for-cached-writes.patch
+i2c-tegra-do-not-mark-acpi-devices-as-irq-safe.patch
+acpica-add-a-depth-argument-to-acpi_execute_reg_methods.patch
+acpi-ec-evaluate-_reg-outside-the-ec-scope-more-carefully.patch
+arm64-acpi-numa-initialize-all-values-of-acpi_early_node_map-to-numa_no_node.patch
+dm-resume-don-t-return-einval-when-signalled.patch
+dm-persistent-data-fix-memory-allocation-failure.patch
+vfs-don-t-evict-inode-under-the-inode-lru-traversing-context.patch
+fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch
+i2c-qcom-geni-add-missing-geni_icc_disable-in-geni_i2c_runtime_resume.patch
+tracing-return-from-tracing_buffers_read-if-the-file-has-been-closed.patch
+perf-bpf-don-t-call-bpf_overflow_handler-for-tracing-events.patch
+mseal-fix-is_madv_discard.patch
+rtla-osnoise-prevent-null-dereference-in-error-handling.patch
+mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch
+mm-hugetlb-fix-hugetlb-vs.-core-mm-pt-locking.patch
+md-raid1-fix-data-corruption-for-degraded-array-with-slow-disk.patch
+net-mana-fix-rx-buf-alloc_size-alignment-and-atomic-op-panic.patch
+media-atomisp-fix-streaming-no-longer-working-on-byt-isp2400-devices.patch
+net-mana-fix-doorbell-out-of-order-violation-and-avoid-unnecessary-doorbell-rings.patch
+wifi-brcmfmac-cfg80211-handle-ssid-based-pmksa-deletion.patch
+fs-netfs-fscache_cookie-add-missing-n_accesses-check.patch
+selinux-fix-potential-counting-error-in-avc_add_xperms_decision.patch
+selinux-add-the-processing-of-the-failure-of-avc_add_xperms_decision.patch
+alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch
+mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch
+selftests-memfd_secret-don-t-build-memfd_secret-test-on-unsupported-arches.patch
+alloc_tag-introduce-clear_page_tag_ref-helper-function.patch
+mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch
+mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch
+mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch
+btrfs-tree-checker-reject-btrfs_ft_unknown-dir-type.patch
+btrfs-send-allow-cloning-non-aligned-extent-if-it-ends-at-i_size.patch
+btrfs-check-delayed-refs-when-we-re-checking-if-a-ref-exists.patch
+btrfs-only-run-the-extent-map-shrinker-from-kswapd-tasks.patch
+btrfs-zoned-properly-take-lock-to-read-update-block-group-s-zoned-variables.patch
+btrfs-tree-checker-add-dev-extent-item-checks.patch
+btrfs-only-enable-extent-map-shrinker-for-debug-builds.patch
+drm-amdgpu-actually-check-flags-for-all-context-ops.patch
+memcg_write_event_control-fix-a-user-triggerable-oops.patch
--- /dev/null
+From 836bb3268db405cf9021496ac4dbc26d3e4758fe Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Thu, 15 Aug 2024 14:03:43 -0500
+Subject: smb3: fix lock breakage for cached writes
+
+From: Steve French <stfrench@microsoft.com>
+
+commit 836bb3268db405cf9021496ac4dbc26d3e4758fe upstream.
+
+Mandatory locking is enforced for cached writes, which violates
+default posix semantics, and also it is enforced inconsistently.
+This apparently breaks recent versions of libreoffice, but can
+also be demonstrated by opening a file twice from the same
+client, locking it from handle one and writing to it from
+handle two (which fails, returning EACCES).
+
+Since there was already a mount option "forcemandatorylock"
+(which defaults to off), with this change only when the user
+intentionally specifies "forcemandatorylock" on mount will we
+break posix semantics on write to a locked range (ie we will
+only fail the write in this case, if the user mounts with
+"forcemandatorylock").
+
+Fixes: 85160e03a79e ("CIFS: Implement caching mechanism for mandatory brlocks")
+Cc: stable@vger.kernel.org
+Cc: Pavel Shilovsky <piastryyy@gmail.com>
+Reported-by: abartlet@samba.org
+Reported-by: Kevin Ottens <kevin.ottens@enioka.com>
+Reviewed-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/file.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/fs/smb/client/file.c
++++ b/fs/smb/client/file.c
+@@ -2719,6 +2719,7 @@ cifs_writev(struct kiocb *iocb, struct i
+ struct inode *inode = file->f_mapping->host;
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+ struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
++ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ ssize_t rc;
+
+ rc = netfs_start_io_write(inode);
+@@ -2735,12 +2736,16 @@ cifs_writev(struct kiocb *iocb, struct i
+ if (rc <= 0)
+ goto out;
+
+- if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
++ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) &&
++ (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
+ server->vals->exclusive_lock_type, 0,
+- NULL, CIFS_WRITE_OP))
+- rc = netfs_buffered_write_iter_locked(iocb, from, NULL);
+- else
++ NULL, CIFS_WRITE_OP))) {
+ rc = -EACCES;
++ goto out;
++ }
++
++ rc = netfs_buffered_write_iter_locked(iocb, from, NULL);
++
+ out:
+ up_read(&cinode->lock_sem);
+ netfs_end_io_write(inode);
--- /dev/null
+From b9b6ee6fe258ce4d89592593efcd3d798c418859 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Tue, 13 Aug 2024 16:25:19 +0200
+Subject: thermal: gov_bang_bang: Call __thermal_cdev_update() directly
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit b9b6ee6fe258ce4d89592593efcd3d798c418859 upstream.
+
+Instead of clearing the "updated" flag for each cooling device
+affected by the trip point crossing in bang_bang_control() and
+walking all thermal instances to run thermal_cdev_update() for all
+of the affected cooling devices, call __thermal_cdev_update()
+directly for each of them.
+
+No intentional functional impact.
+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Acked-by: Peter Kästle <peter@piie.net>
+Reviewed-by: Zhang Rui <rui.zhang@intel.com>
+Cc: 6.10+ <stable@vger.kernel.org> # 6.10+
+Link: https://patch.msgid.link/13583081.uLZWGnKmhe@rjwysocki.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thermal/gov_bang_bang.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/thermal/gov_bang_bang.c
++++ b/drivers/thermal/gov_bang_bang.c
+@@ -79,12 +79,9 @@ static void bang_bang_control(struct the
+ dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target);
+
+ mutex_lock(&instance->cdev->lock);
+- instance->cdev->updated = false; /* cdev needs update */
++ __thermal_cdev_update(instance->cdev);
+ mutex_unlock(&instance->cdev->lock);
+ }
+-
+- list_for_each_entry(instance, &tz->thermal_instances, tz_node)
+- thermal_cdev_update(instance->cdev);
+ }
+
+ static struct thermal_governor thermal_gov_bang_bang = {
--- /dev/null
+From e2006140ad2e01a02ed0aff49cc2ae3ceeb11f8d Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Thu, 13 Jun 2024 15:05:03 +0300
+Subject: thunderbolt: Mark XDomain as unplugged when router is removed
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit e2006140ad2e01a02ed0aff49cc2ae3ceeb11f8d upstream.
+
+I noticed that when we do discrete host router NVM upgrade and it gets
+hot-removed from the PCIe side as a result of NVM firmware authentication,
+if there is another host connected with enabled paths we hang in tearing
+them down. This is due to fact that the Thunderbolt networking driver
+also tries to cleanup the paths and ends up blocking in
+tb_disconnect_xdomain_paths() waiting for the domain lock.
+
+However, at this point we already cleaned the paths in tb_stop() so
+there is really no need for tb_disconnect_xdomain_paths() to do that
+anymore. Furthermore it already checks if the XDomain is unplugged and
+bails out early so take advantage of that and mark the XDomain as
+unplugged when we remove the parent router.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thunderbolt/switch.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/thunderbolt/switch.c
++++ b/drivers/thunderbolt/switch.c
+@@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *
+ tb_switch_remove(port->remote->sw);
+ port->remote = NULL;
+ } else if (port->xdomain) {
++ port->xdomain->is_unplugged = true;
+ tb_xdomain_remove(port->xdomain);
+ port->xdomain = NULL;
+ }
--- /dev/null
+From d0949cd44a62c4c41b30ea7ae94d8c887f586882 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Thu, 8 Aug 2024 23:57:30 -0400
+Subject: tracing: Return from tracing_buffers_read() if the file has been closed
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit d0949cd44a62c4c41b30ea7ae94d8c887f586882 upstream.
+
+When running the following:
+
+ # cd /sys/kernel/tracing/
+ # echo 1 > events/sched/sched_waking/enable
+ # echo 1 > events/sched/sched_switch/enable
+ # echo 0 > tracing_on
+ # dd if=per_cpu/cpu0/trace_pipe_raw of=/tmp/raw0.dat
+
+The dd task would get stuck in an infinite loop in the kernel. What would
+happen is the following:
+
+When ring_buffer_read_page() returns -1 (no data) then a check is made to
+see if the buffer is empty (as happens when the page is not full), it will
+call wait_on_pipe() to wait until the ring buffer has data. When it is it
+will try again to read data (unless O_NONBLOCK is set).
+
+The issue happens when there's a reader and the file descriptor is closed.
+The wait_on_pipe() will return when that is the case. But this loop will
+continue to try again and wait_on_pipe() will again return immediately and
+the loop will continue and never stop.
+
+Simply check if the file was closed before looping and exit out if it is.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Link: https://lore.kernel.org/20240808235730.78bf63e5@rorschach.local.home
+Fixes: 2aa043a55b9a7 ("tracing/ring-buffer: Fix wait_on_pipe() race")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 10cd38bce2f1..ebe7ce2f5f4a 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
+ trace_access_unlock(iter->cpu_file);
+
+ if (ret < 0) {
+- if (trace_empty(iter)) {
++ if (trace_empty(iter) && !iter->closed) {
+ if ((filp->f_flags & O_NONBLOCK))
+ return -EAGAIN;
+
+--
+2.46.0
+
--- /dev/null
+From 3ed486e383ccee9b0c8d727608f12a937c6603ca Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Mon, 12 Aug 2024 11:50:38 +0200
+Subject: usb: misc: ljca: Add Lunar Lake ljca GPIO HID to ljca_gpio_hids[]
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 3ed486e383ccee9b0c8d727608f12a937c6603ca upstream.
+
+Add LJCA GPIO support for the Lunar Lake platform.
+
+New HID taken from out of tree ivsc-driver git repo.
+
+Link: https://github.com/intel/ivsc-driver/commit/47e7c4a446c8ea8c741ff5a32fa7b19f9e6fd47e
+Cc: stable <stable@kernel.org>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20240812095038.555837-1-hdegoede@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/misc/usb-ljca.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/usb/misc/usb-ljca.c
++++ b/drivers/usb/misc/usb-ljca.c
+@@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_
+ { "INTC1096" },
+ { "INTC100B" },
+ { "INTC10D1" },
++ { "INTC10B5" },
+ {},
+ };
+
--- /dev/null
+From dcdb52d948f3a17ccd3fce757d9bd981d7c32039 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Fri, 9 Aug 2024 15:44:07 +0300
+Subject: usb: xhci: Check for xhci->interrupters being allocated in xhci_mem_clearup()
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit dcdb52d948f3a17ccd3fce757d9bd981d7c32039 upstream.
+
+If xhci_mem_init() fails, it calls into xhci_mem_cleanup() to mop
+up the damage. If it fails early enough, before xhci->interrupters
+is allocated but after xhci->max_interrupters has been set, which
+happens in most (all?) cases, things get uglier, as xhci_mem_cleanup()
+unconditionally derefences xhci->interrupters. With prejudice.
+
+Gate the interrupt freeing loop with a check on xhci->interrupters
+being non-NULL.
+
+Found while debugging a DMA allocation issue that led the XHCI driver
+on this exact path.
+
+Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters")
+Cc: Mathias Nyman <mathias.nyman@linux.intel.com>
+Cc: Wesley Cheng <quic_wcheng@quicinc.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org # 6.8+
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240809124408.505786-2-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci-mem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/usb/host/xhci-mem.c
++++ b/drivers/usb/host/xhci-mem.c
+@@ -1877,7 +1877,7 @@ void xhci_mem_cleanup(struct xhci_hcd *x
+
+ cancel_delayed_work_sync(&xhci->cmd_timer);
+
+- for (i = 0; i < xhci->max_interrupters; i++) {
++ for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) {
+ if (xhci->interrupters[i]) {
+ xhci_remove_interrupter(xhci, xhci->interrupters[i]);
+ xhci_free_interrupter(xhci, xhci->interrupters[i]);
--- /dev/null
+From 2a0629834cd82f05d424bbc193374f9a43d1f87d Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Fri, 9 Aug 2024 11:16:28 +0800
+Subject: vfs: Don't evict inode under the inode lru traversing context
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 2a0629834cd82f05d424bbc193374f9a43d1f87d upstream.
+
+The inode reclaiming process(See function prune_icache_sb) collects all
+reclaimable inodes and mark them with I_FREEING flag at first, at that
+time, other processes will be stuck if they try getting these inodes
+(See function find_inode_fast), then the reclaiming process destroy the
+inodes by function dispose_list(). Some filesystems(eg. ext4 with
+ea_inode feature, ubifs with xattr) may do inode lookup in the inode
+evicting callback function, if the inode lookup is operated under the
+inode lru traversing context, deadlock problems may happen.
+
+Case 1: In function ext4_evict_inode(), the ea inode lookup could happen
+ if ea_inode feature is enabled, the lookup process will be stuck
+ under the evicting context like this:
+
+ 1. File A has inode i_reg and an ea inode i_ea
+ 2. getfattr(A, xattr_buf) // i_ea is added into lru // lru->i_ea
+ 3. Then, following three processes running like this:
+
+ PA PB
+ echo 2 > /proc/sys/vm/drop_caches
+ shrink_slab
+ prune_dcache_sb
+ // i_reg is added into lru, lru->i_ea->i_reg
+ prune_icache_sb
+ list_lru_walk_one
+ inode_lru_isolate
+ i_ea->i_state |= I_FREEING // set inode state
+ inode_lru_isolate
+ __iget(i_reg)
+ spin_unlock(&i_reg->i_lock)
+ spin_unlock(lru_lock)
+ rm file A
+ i_reg->nlink = 0
+ iput(i_reg) // i_reg->nlink is 0, do evict
+ ext4_evict_inode
+ ext4_xattr_delete_inode
+ ext4_xattr_inode_dec_ref_all
+ ext4_xattr_inode_iget
+ ext4_iget(i_ea->i_ino)
+ iget_locked
+ find_inode_fast
+ __wait_on_freeing_inode(i_ea) ----→ AA deadlock
+ dispose_list // cannot be executed by prune_icache_sb
+ wake_up_bit(&i_ea->i_state)
+
+Case 2: In deleted inode writing function ubifs_jnl_write_inode(), file
+ deleting process holds BASEHD's wbuf->io_mutex while getting the
+ xattr inode, which could race with inode reclaiming process(The
+ reclaiming process could try locking BASEHD's wbuf->io_mutex in
+ inode evicting function), then an ABBA deadlock problem would
+ happen as following:
+
+ 1. File A has inode ia and a xattr(with inode ixa), regular file B has
+ inode ib and a xattr.
+ 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa
+ 3. Then, following three processes running like this:
+
+ PA PB PC
+ echo 2 > /proc/sys/vm/drop_caches
+ shrink_slab
+ prune_dcache_sb
+ // ib and ia are added into lru, lru->ixa->ib->ia
+ prune_icache_sb
+ list_lru_walk_one
+ inode_lru_isolate
+ ixa->i_state |= I_FREEING // set inode state
+ inode_lru_isolate
+ __iget(ib)
+ spin_unlock(&ib->i_lock)
+ spin_unlock(lru_lock)
+ rm file B
+ ib->nlink = 0
+ rm file A
+ iput(ia)
+ ubifs_evict_inode(ia)
+ ubifs_jnl_delete_inode(ia)
+ ubifs_jnl_write_inode(ia)
+ make_reservation(BASEHD) // Lock wbuf->io_mutex
+ ubifs_iget(ixa->i_ino)
+ iget_locked
+ find_inode_fast
+ __wait_on_freeing_inode(ixa)
+ | iput(ib) // ib->nlink is 0, do evict
+ | ubifs_evict_inode
+ | ubifs_jnl_delete_inode(ib)
+ ↓ ubifs_jnl_write_inode
+ ABBA deadlock ←-----make_reservation(BASEHD)
+ dispose_list // cannot be executed by prune_icache_sb
+ wake_up_bit(&ixa->i_state)
+
+Fix the possible deadlock by using new inode state flag I_LRU_ISOLATING
+to pin the inode in memory while inode_lru_isolate() reclaims its pages
+instead of using ordinary inode reference. This way inode deletion
+cannot be triggered from inode_lru_isolate() thus avoiding the deadlock.
+evict() is made to wait for I_LRU_ISOLATING to be cleared before
+proceeding with inode cleanup.
+
+Link: https://lore.kernel.org/all/37c29c42-7685-d1f0-067d-63582ffac405@huaweicloud.com/
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022
+Fixes: e50e5129f384 ("ext4: xattr-in-inode support")
+Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files")
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Link: https://lore.kernel.org/r/20240809031628.1069873-1-chengzhihao@huaweicloud.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Suggested-by: Jan Kara <jack@suse.cz>
+Suggested-by: Mateusz Guzik <mjguzik@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/inode.c | 39 +++++++++++++++++++++++++++++++++++++--
+ include/linux/fs.h | 5 +++++
+ 2 files changed, 42 insertions(+), 2 deletions(-)
+
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -486,6 +486,39 @@ static void inode_lru_list_del(struct in
+ this_cpu_dec(nr_unused);
+ }
+
++static void inode_pin_lru_isolating(struct inode *inode)
++{
++ lockdep_assert_held(&inode->i_lock);
++ WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
++ inode->i_state |= I_LRU_ISOLATING;
++}
++
++static void inode_unpin_lru_isolating(struct inode *inode)
++{
++ spin_lock(&inode->i_lock);
++ WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
++ inode->i_state &= ~I_LRU_ISOLATING;
++ smp_mb();
++ wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
++ spin_unlock(&inode->i_lock);
++}
++
++static void inode_wait_for_lru_isolating(struct inode *inode)
++{
++ spin_lock(&inode->i_lock);
++ if (inode->i_state & I_LRU_ISOLATING) {
++ DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
++ wait_queue_head_t *wqh;
++
++ wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
++ spin_unlock(&inode->i_lock);
++ __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
++ spin_lock(&inode->i_lock);
++ WARN_ON(inode->i_state & I_LRU_ISOLATING);
++ }
++ spin_unlock(&inode->i_lock);
++}
++
+ /**
+ * inode_sb_list_add - add inode to the superblock list of inodes
+ * @inode: inode to add
+@@ -655,6 +688,8 @@ static void evict(struct inode *inode)
+
+ inode_sb_list_del(inode);
+
++ inode_wait_for_lru_isolating(inode);
++
+ /*
+ * Wait for flusher thread to be done with the inode so that filesystem
+ * does not start destroying it while writeback is still running. Since
+@@ -843,7 +878,7 @@ static enum lru_status inode_lru_isolate
+ * be under pressure before the cache inside the highmem zone.
+ */
+ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
+- __iget(inode);
++ inode_pin_lru_isolating(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(lru_lock);
+ if (remove_inode_buffers(inode)) {
+@@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate
+ __count_vm_events(PGINODESTEAL, reap);
+ mm_account_reclaimed_pages(reap);
+ }
+- iput(inode);
++ inode_unpin_lru_isolating(inode);
+ spin_lock(lru_lock);
+ return LRU_RETRY;
+ }
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2370,6 +2370,9 @@ static inline void kiocb_clone(struct ki
+ *
+ * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback.
+ *
++ * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding
++ * i_count.
++ *
+ * Q: What is the difference between I_WILL_FREE and I_FREEING?
+ */
+ #define I_DIRTY_SYNC (1 << 0)
+@@ -2393,6 +2396,8 @@ static inline void kiocb_clone(struct ki
+ #define I_DONTCACHE (1 << 16)
+ #define I_SYNC_QUEUED (1 << 17)
+ #define I_PINNING_NETFS_WB (1 << 18)
++#define __I_LRU_ISOLATING 19
++#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
+
+ #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
+ #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
--- /dev/null
+From 2ad4e1ada8eebafa2d75a4b75eeeca882de6ada1 Mon Sep 17 00:00:00 2001
+From: Janne Grunau <j@jannau.net>
+Date: Sat, 3 Aug 2024 21:52:55 +0200
+Subject: wifi: brcmfmac: cfg80211: Handle SSID based pmksa deletion
+
+From: Janne Grunau <j@jannau.net>
+
+commit 2ad4e1ada8eebafa2d75a4b75eeeca882de6ada1 upstream.
+
+wpa_supplicant 2.11 sends since 1efdba5fdc2c ("Handle PMKSA flush in the
+driver for SAE/OWE offload cases") SSID based PMKSA del commands.
+brcmfmac is not prepared and tries to dereference the NULL bssid and
+pmkid pointers in cfg80211_pmksa. PMKID_V3 operations support SSID based
+updates so copy the SSID.
+
+Fixes: a96202acaea4 ("wifi: brcmfmac: cfg80211: Add support for PMKID_V3 operations")
+Cc: stable@vger.kernel.org # 6.4.x
+Signed-off-by: Janne Grunau <j@jannau.net>
+Reviewed-by: Neal Gompa <neal@gompa.dev>
+Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
+Signed-off-by: Kalle Valo <kvalo@kernel.org>
+Link: https://patch.msgid.link/20240803-brcmfmac_pmksa_del_ssid-v1-1-4e85f19135e1@jannau.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 13 +++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+@@ -4320,9 +4320,16 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp,
+ /* Single PMK operation */
+ pmk_op->count = cpu_to_le16(1);
+ length += sizeof(struct brcmf_pmksa_v3);
+- memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN);
+- memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN);
+- pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN;
++ if (pmksa->bssid)
++ memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN);
++ if (pmksa->pmkid) {
++ memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN);
++ pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN;
++ }
++ if (pmksa->ssid && pmksa->ssid_len) {
++ memcpy(pmk_op->pmk[0].ssid.SSID, pmksa->ssid, pmksa->ssid_len);
++ pmk_op->pmk[0].ssid.SSID_len = pmksa->ssid_len;
++ }
+ pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0);
+ }
+
--- /dev/null
+From af8e119f52e9c13e556be9e03f27957554a84656 Mon Sep 17 00:00:00 2001
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+Date: Thu, 15 Aug 2024 17:11:17 +0300
+Subject: xhci: Fix Panther point NULL pointer deref at full-speed re-enumeration
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+commit af8e119f52e9c13e556be9e03f27957554a84656 upstream.
+
+re-enumerating full-speed devices after a failed address device command
+can trigger a NULL pointer dereference.
+
+Full-speed devices may need to reconfigure the endpoint 0 Max Packet Size
+value during enumeration. Usb core calls usb_ep0_reinit() in this case,
+which ends up calling xhci_configure_endpoint().
+
+On Panther point xHC the xhci_configure_endpoint() function will
+additionally check and reserve bandwidth in software. Other hosts do
+this in hardware
+
+If xHC address device command fails then a new xhci_virt_device structure
+is allocated as part of re-enabling the slot, but the bandwidth table
+pointers are not set up properly here.
+This triggers the NULL pointer dereference the next time usb_ep0_reinit()
+is called and xhci_configure_endpoint() tries to check and reserve
+bandwidth
+
+[46710.713538] usb 3-1: new full-speed USB device number 5 using xhci_hcd
+[46710.713699] usb 3-1: Device not responding to setup address.
+[46710.917684] usb 3-1: Device not responding to setup address.
+[46711.125536] usb 3-1: device not accepting address 5, error -71
+[46711.125594] BUG: kernel NULL pointer dereference, address: 0000000000000008
+[46711.125600] #PF: supervisor read access in kernel mode
+[46711.125603] #PF: error_code(0x0000) - not-present page
+[46711.125606] PGD 0 P4D 0
+[46711.125610] Oops: Oops: 0000 [#1] PREEMPT SMP PTI
+[46711.125615] CPU: 1 PID: 25760 Comm: kworker/1:2 Not tainted 6.10.3_2 #1
+[46711.125620] Hardware name: Gigabyte Technology Co., Ltd.
+[46711.125623] Workqueue: usb_hub_wq hub_event [usbcore]
+[46711.125668] RIP: 0010:xhci_reserve_bandwidth (drivers/usb/host/xhci.c
+
+Fix this by making sure bandwidth table pointers are set up correctly
+after a failed address device command, and additionally by avoiding
+checking for bandwidth in cases like this where no actual endpoints are
+added or removed, i.e. only context for default control endpoint 0 is
+evaluated.
+
+Reported-by: Karel Balej <balejk@matfyz.cz>
+Closes: https://lore.kernel.org/linux-usb/D3CKQQAETH47.1MUO22RTCH2O3@matfyz.cz/
+Cc: stable@vger.kernel.org
+Fixes: 651aaf36a7d7 ("usb: xhci: Handle USB transaction error on address command")
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240815141117.2702314-2-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struc
+ xhci->num_active_eps);
+ return -ENOMEM;
+ }
+- if ((xhci->quirks & XHCI_SW_BW_CHECKING) &&
++ if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change &&
+ xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) {
+ if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK))
+ xhci_free_host_resources(xhci, ctrl_ctx);
+@@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_
+ mutex_unlock(&xhci->mutex);
+ ret = xhci_disable_slot(xhci, udev->slot_id);
+ xhci_free_virt_device(xhci, udev->slot_id);
+- if (!ret)
+- xhci_alloc_dev(hcd, udev);
++ if (!ret) {
++ if (xhci_alloc_dev(hcd, udev) == 1)
++ xhci_setup_addressable_virt_dev(xhci, udev);
++ }
+ kfree(command->completion);
+ kfree(command);
+ return -EPROTO;