6.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 19 Aug 2024 10:08:29 +0000 (12:08 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 19 Aug 2024 10:08:29 +0000 (12:08 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Aug 2024 10:08:29 +0000 (12:08 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Aug 2024 10:08:29 +0000 (12:08 +0200)
diff --git a/queue-6.10/acpi-ec-evaluate-_reg-outside-the-ec-scope-more-carefully.patch b/queue-6.10/acpi-ec-evaluate-_reg-outside-the-ec-scope-more-carefully.patch

new file mode 100644 (file)

index 0000000..bbeb125
--- /dev/null
+++ b/queue-6.10/acpi-ec-evaluate-_reg-outside-the-ec-scope-more-carefully.patch
@@ -0,0 +1,103 @@
+From 71bf41b8e913ec9fc91f0d39ab8fb320229ec604 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 12 Aug 2024 15:16:21 +0200
+Subject: ACPI: EC: Evaluate _REG outside the EC scope more carefully
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 71bf41b8e913ec9fc91f0d39ab8fb320229ec604 upstream.
+
+Commit 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the
+namespace root") caused _REG methods for EC operation regions outside
+the EC device scope to be evaluated which on some systems leads to the
+evaluation of _REG methods in the scopes of device objects representing
+devices that are not present and not functional according to the _STA
+return values. Some of those device objects represent EC "alternatives"
+and if _REG is evaluated for their operation regions, the platform
+firmware may be confused and the platform may start to behave
+incorrectly.
+
+To avoid this problem, only evaluate _REG for EC operation regions
+located in the scopes of device objects representing known-to-be-present
+devices.
+
+For this purpose, partially revert commit 60fa6ae6e6d0 and trigger the
+evaluation of _REG for EC operation regions from acpi_bus_attach() for
+the known-valid devices.
+
+Fixes: 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root")
+Link: https://lore.kernel.org/linux-acpi/1f76b7e2-1928-4598-8037-28a1785c2d13@redhat.com
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2298938
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2302253
+Reported-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Cc: All applicable <stable@vger.kernel.org>
+Link: https://patch.msgid.link/23612351.6Emhk5qWAg@rjwysocki.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/ec.c       |   11 +++++++++--
+ drivers/acpi/internal.h |    1 +
+ drivers/acpi/scan.c     |    2 ++
+ 3 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -1487,12 +1487,13 @@ static bool install_gpio_irq_event_handl
+ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device,
+                              bool call_reg)
+ {
+-      acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle;
+       acpi_status status;
+ 
+       acpi_ec_start(ec, false);
+ 
+       if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
++              acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle;
++
+               acpi_ec_enter_noirq(ec);
+               status = acpi_install_address_space_handler_no_reg(scope_handle,
+                                                                  ACPI_ADR_SPACE_EC,
+@@ -1506,7 +1507,7 @@ static int ec_install_handlers(struct ac
+       }
+ 
+       if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) {
+-              acpi_execute_reg_methods(scope_handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC);
++              acpi_execute_reg_methods(ec->handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC);
+               set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags);
+       }
+ 
+@@ -1721,6 +1722,12 @@ static void acpi_ec_remove(struct acpi_d
+       }
+ }
+ 
++void acpi_ec_register_opregions(struct acpi_device *adev)
++{
++      if (first_ec && first_ec->handle != adev->handle)
++              acpi_execute_reg_methods(adev->handle, 1, ACPI_ADR_SPACE_EC);
++}
++
+ static acpi_status
+ ec_parse_io_ports(struct acpi_resource *resource, void *context)
+ {
+--- a/drivers/acpi/internal.h
++++ b/drivers/acpi/internal.h
+@@ -223,6 +223,7 @@ int acpi_ec_add_query_handler(struct acp
+                             acpi_handle handle, acpi_ec_query_func func,
+                             void *data);
+ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
++void acpi_ec_register_opregions(struct acpi_device *adev);
+ 
+ #ifdef CONFIG_PM_SLEEP
+ void acpi_ec_flush_work(void);
+--- a/drivers/acpi/scan.c
++++ b/drivers/acpi/scan.c
+@@ -2264,6 +2264,8 @@ static int acpi_bus_attach(struct acpi_d
+       if (device->handler)
+               goto ok;
+ 
++      acpi_ec_register_opregions(device);
++
+       if (!device->flags.initialized) {
+               device->flags.power_manageable =
+                       device->power.states[ACPI_STATE_D0].flags.valid;
diff --git a/queue-6.10/acpica-add-a-depth-argument-to-acpi_execute_reg_methods.patch b/queue-6.10/acpica-add-a-depth-argument-to-acpi_execute_reg_methods.patch

new file mode 100644 (file)

index 0000000..e43edae
--- /dev/null
+++ b/queue-6.10/acpica-add-a-depth-argument-to-acpi_execute_reg_methods.patch
@@ -0,0 +1,135 @@
+From cdf65d73e001fde600b18d7e45afadf559425ce5 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 12 Aug 2024 15:11:42 +0200
+Subject: ACPICA: Add a depth argument to acpi_execute_reg_methods()
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit cdf65d73e001fde600b18d7e45afadf559425ce5 upstream.
+
+A subsequent change will need to pass a depth argument to
+acpi_execute_reg_methods(), so prepare that function for it.
+
+No intentional functional changes.
+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Cc: All applicable <stable@vger.kernel.org>
+Link: https://patch.msgid.link/8451567.NyiUUSuA9g@rjwysocki.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/acpica/acevents.h |    2 +-
+ drivers/acpi/acpica/evregion.c |    6 ++++--
+ drivers/acpi/acpica/evxfregn.c |   10 +++++++---
+ drivers/acpi/ec.c              |    2 +-
+ include/acpi/acpixf.h          |    1 +
+ 5 files changed, 14 insertions(+), 7 deletions(-)
+
+--- a/drivers/acpi/acpica/acevents.h
++++ b/drivers/acpi/acpica/acevents.h
+@@ -188,7 +188,7 @@ acpi_ev_detach_region(union acpi_operand
+                     u8 acpi_ns_is_locked);
+ 
+ void
+-acpi_ev_execute_reg_methods(struct acpi_namespace_node *node,
++acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth,
+                           acpi_adr_space_type space_id, u32 function);
+ 
+ acpi_status
+--- a/drivers/acpi/acpica/evregion.c
++++ b/drivers/acpi/acpica/evregion.c
+@@ -65,6 +65,7 @@ acpi_status acpi_ev_initialize_op_region
+                                               acpi_gbl_default_address_spaces
+                                               [i])) {
+                       acpi_ev_execute_reg_methods(acpi_gbl_root_node,
++                                                  ACPI_UINT32_MAX,
+                                                   acpi_gbl_default_address_spaces
+                                                   [i], ACPI_REG_CONNECT);
+               }
+@@ -672,6 +673,7 @@ cleanup1:
+  * FUNCTION:    acpi_ev_execute_reg_methods
+  *
+  * PARAMETERS:  node            - Namespace node for the device
++ *              max_depth       - Depth to which search for _REG
+  *              space_id        - The address space ID
+  *              function        - Passed to _REG: On (1) or Off (0)
+  *
+@@ -683,7 +685,7 @@ cleanup1:
+  ******************************************************************************/
+ 
+ void
+-acpi_ev_execute_reg_methods(struct acpi_namespace_node *node,
++acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth,
+                           acpi_adr_space_type space_id, u32 function)
+ {
+       struct acpi_reg_walk_info info;
+@@ -717,7 +719,7 @@ acpi_ev_execute_reg_methods(struct acpi_
+        * regions and _REG methods. (i.e. handlers must be installed for all
+        * regions of this Space ID before we can run any _REG methods)
+        */
+-      (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX,
++      (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, max_depth,
+                                    ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL,
+                                    &info, NULL);
+ 
+--- a/drivers/acpi/acpica/evxfregn.c
++++ b/drivers/acpi/acpica/evxfregn.c
+@@ -85,7 +85,8 @@ acpi_install_address_space_handler_inter
+       /* Run all _REG methods for this address space */
+ 
+       if (run_reg) {
+-              acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT);
++              acpi_ev_execute_reg_methods(node, ACPI_UINT32_MAX, space_id,
++                                          ACPI_REG_CONNECT);
+       }
+ 
+ unlock_and_exit:
+@@ -263,6 +264,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_s
+  * FUNCTION:    acpi_execute_reg_methods
+  *
+  * PARAMETERS:  device          - Handle for the device
++ *              max_depth       - Depth to which search for _REG
+  *              space_id        - The address space ID
+  *
+  * RETURN:      Status
+@@ -271,7 +273,8 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_s
+  *
+  ******************************************************************************/
+ acpi_status
+-acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)
++acpi_execute_reg_methods(acpi_handle device, u32 max_depth,
++                       acpi_adr_space_type space_id)
+ {
+       struct acpi_namespace_node *node;
+       acpi_status status;
+@@ -296,7 +299,8 @@ acpi_execute_reg_methods(acpi_handle dev
+ 
+               /* Run all _REG methods for this address space */
+ 
+-              acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT);
++              acpi_ev_execute_reg_methods(node, max_depth, space_id,
++                                          ACPI_REG_CONNECT);
+       } else {
+               status = AE_BAD_PARAMETER;
+       }
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -1506,7 +1506,7 @@ static int ec_install_handlers(struct ac
+       }
+ 
+       if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) {
+-              acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC);
++              acpi_execute_reg_methods(scope_handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC);
+               set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags);
+       }
+ 
+--- a/include/acpi/acpixf.h
++++ b/include/acpi/acpixf.h
+@@ -660,6 +660,7 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+                            void *context))
+ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+                           acpi_execute_reg_methods(acpi_handle device,
++                                                   u32 nax_depth,
+                                                    acpi_adr_space_type
+                                                    space_id))
+ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
diff --git a/queue-6.10/alloc_tag-introduce-clear_page_tag_ref-helper-function.patch b/queue-6.10/alloc_tag-introduce-clear_page_tag_ref-helper-function.patch

new file mode 100644 (file)

index 0000000..1546afe
--- /dev/null
+++ b/queue-6.10/alloc_tag-introduce-clear_page_tag_ref-helper-function.patch
@@ -0,0 +1,100 @@
+From a8fc28dad6d574582cdf2f7e78c73c59c623df30 Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Tue, 13 Aug 2024 08:07:56 -0700
+Subject: alloc_tag: introduce clear_page_tag_ref() helper function
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit a8fc28dad6d574582cdf2f7e78c73c59c623df30 upstream.
+
+In several cases we are freeing pages which were not allocated using
+common page allocators.  For such cases, in order to keep allocation
+accounting correct, we should clear the page tag to indicate that the page
+being freed is expected to not have a valid allocation tag.  Introduce
+clear_page_tag_ref() helper function to be used for this.
+
+Link: https://lkml.kernel.org/r/20240813150758.855881-1-surenb@google.com
+Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty")
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kent Overstreet <kent.overstreet@linux.dev>
+Cc: Sourav Panda <souravpanda@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>   [6.10]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pgalloc_tag.h |   13 +++++++++++++
+ mm/mm_init.c                |   10 +---------
+ mm/page_alloc.c             |    9 +--------
+ 3 files changed, 15 insertions(+), 17 deletions(-)
+
+--- a/include/linux/pgalloc_tag.h
++++ b/include/linux/pgalloc_tag.h
+@@ -43,6 +43,18 @@ static inline void put_page_tag_ref(unio
+       page_ext_put(page_ext_from_codetag_ref(ref));
+ }
+ 
++static inline void clear_page_tag_ref(struct page *page)
++{
++      if (mem_alloc_profiling_enabled()) {
++              union codetag_ref *ref = get_page_tag_ref(page);
++
++              if (ref) {
++                      set_codetag_empty(ref);
++                      put_page_tag_ref(ref);
++              }
++      }
++}
++
+ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+                                  unsigned int nr)
+ {
+@@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages
+ 
+ static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
+ static inline void put_page_tag_ref(union codetag_ref *ref) {}
++static inline void clear_page_tag_ref(struct page *page) {}
+ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+                                  unsigned int nr) {}
+ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
+--- a/mm/mm_init.c
++++ b/mm/mm_init.c
+@@ -2507,15 +2507,7 @@ void __init memblock_free_pages(struct p
+       }
+ 
+       /* pages were reserved and not allocated */
+-      if (mem_alloc_profiling_enabled()) {
+-              union codetag_ref *ref = get_page_tag_ref(page);
+-
+-              if (ref) {
+-                      set_codetag_empty(ref);
+-                      put_page_tag_ref(ref);
+-              }
+-      }
+-
++      clear_page_tag_ref(page);
+       __free_pages_core(page, order);
+ }
+ 
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5806,14 +5806,7 @@ unsigned long free_reserved_area(void *s
+ 
+ void free_reserved_page(struct page *page)
+ {
+-      if (mem_alloc_profiling_enabled()) {
+-              union codetag_ref *ref = get_page_tag_ref(page);
+-
+-              if (ref) {
+-                      set_codetag_empty(ref);
+-                      put_page_tag_ref(ref);
+-              }
+-      }
++      clear_page_tag_ref(page);
+       ClearPageReserved(page);
+       init_page_count(page);
+       __free_page(page);
diff --git a/queue-6.10/alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch b/queue-6.10/alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch

new file mode 100644 (file)

index 0000000..7dbf30b
--- /dev/null
+++ b/queue-6.10/alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch
@@ -0,0 +1,41 @@
+From 766c163c2068b45330664fb67df67268e588a22d Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Tue, 13 Aug 2024 08:07:57 -0700
+Subject: alloc_tag: mark pages reserved during CMA activation as not tagged
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit 766c163c2068b45330664fb67df67268e588a22d upstream.
+
+During CMA activation, pages in CMA area are prepared and then freed
+without being allocated.  This triggers warnings when memory allocation
+debug config (CONFIG_MEM_ALLOC_PROFILING_DEBUG) is enabled.  Fix this by
+marking these pages not tagged before freeing them.
+
+Link: https://lkml.kernel.org/r/20240813150758.855881-2-surenb@google.com
+Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty")
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kent Overstreet <kent.overstreet@linux.dev>
+Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: Sourav Panda <souravpanda@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>   [6.10]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mm_init.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/mm_init.c
++++ b/mm/mm_init.c
+@@ -2293,6 +2293,8 @@ void __init init_cma_reserved_pageblock(
+ 
+       set_pageblock_migratetype(page, MIGRATE_CMA);
+       set_page_refcounted(page);
++      /* pages were reserved and not allocated */
++      clear_page_tag_ref(page);
+       __free_pages(page, pageblock_order);
+ 
+       adjust_managed_page_count(page, pageblock_nr_pages);
diff --git a/queue-6.10/alsa-hda-tas2781-fix-wrong-calibrated-data-order.patch b/queue-6.10/alsa-hda-tas2781-fix-wrong-calibrated-data-order.patch

new file mode 100644 (file)

index 0000000..fde0e8f
--- /dev/null
+++ b/queue-6.10/alsa-hda-tas2781-fix-wrong-calibrated-data-order.patch
@@ -0,0 +1,66 @@
+From 3beddef84d90590270465a907de1cfe2539ac70d Mon Sep 17 00:00:00 2001
+From: Baojun Xu <baojun.xu@ti.com>
+Date: Tue, 13 Aug 2024 12:37:48 +0800
+Subject: ALSA: hda/tas2781: fix wrong calibrated data order
+
+From: Baojun Xu <baojun.xu@ti.com>
+
+commit 3beddef84d90590270465a907de1cfe2539ac70d upstream.
+
+Wrong calibration data order cause sound too low in some device.
+Fix wrong calibrated data order, add calibration data converssion
+by get_unaligned_be32() after reading from UEFI.
+
+Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Baojun Xu <baojun.xu@ti.com>
+Link: https://patch.msgid.link/20240813043749.108-1-shenghao-ding@ti.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/pci/hda/tas2781_hda_i2c.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/sound/pci/hda/tas2781_hda_i2c.c
++++ b/sound/pci/hda/tas2781_hda_i2c.c
+@@ -2,10 +2,12 @@
+ //
+ // TAS2781 HDA I2C driver
+ //
+-// Copyright 2023 Texas Instruments, Inc.
++// Copyright 2023 - 2024 Texas Instruments, Inc.
+ //
+ // Author: Shenghao Ding <shenghao-ding@ti.com>
++// Current maintainer: Baojun Xu <baojun.xu@ti.com>
+ 
++#include <asm/unaligned.h>
+ #include <linux/acpi.h>
+ #include <linux/crc8.h>
+ #include <linux/crc32.h>
+@@ -519,20 +521,22 @@ static void tas2781_apply_calib(struct t
+       static const unsigned char rgno_array[CALIB_MAX] = {
+               0x74, 0x0c, 0x14, 0x70, 0x7c,
+       };
+-      unsigned char *data;
++      int offset = 0;
+       int i, j, rc;
++      __be32 data;
+ 
+       for (i = 0; i < tas_priv->ndev; i++) {
+-              data = tas_priv->cali_data.data +
+-                      i * TASDEVICE_SPEAKER_CALIBRATION_SIZE;
+               for (j = 0; j < CALIB_MAX; j++) {
++                      data = get_unaligned_be32(
++                              &tas_priv->cali_data.data[offset]);
+                       rc = tasdevice_dev_bulk_write(tas_priv, i,
+                               TASDEVICE_REG(0, page_array[j], rgno_array[j]),
+-                              &(data[4 * j]), 4);
++                              (unsigned char *)&data, 4);
+                       if (rc < 0)
+                               dev_err(tas_priv->dev,
+                                       "chn %d calib %d bulk_wr err = %d\n",
+                                       i, j, rc);
++                      offset += 4;
+               }
+       }
+ }
diff --git a/queue-6.10/alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch b/queue-6.10/alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch

new file mode 100644 (file)

index 0000000..c9d6c57
--- /dev/null
+++ b/queue-6.10/alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch
@@ -0,0 +1,38 @@
+From ccbfcac05866ebe6eb3bc6d07b51d4ed4fcde436 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Sat, 10 Aug 2024 10:48:32 +0200
+Subject: ALSA: timer: Relax start tick time check for slave timer elements
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit ccbfcac05866ebe6eb3bc6d07b51d4ed4fcde436 upstream.
+
+The recent addition of a sanity check for a too low start tick time
+seems breaking some applications that uses aloop with a certain slave
+timer setup.  They may have the initial resolution 0, hence it's
+treated as if it were a too low value.
+
+Relax and skip the check for the slave timer instance for addressing
+the regression.
+
+Fixes: 4a63bd179fa8 ("ALSA: timer: Set lower bound of start tick time")
+Cc: <stable@vger.kernel.org>
+Link: https://github.com/raspberrypi/linux/issues/6294
+Link: https://patch.msgid.link/20240810084833.10939-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/core/timer.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/sound/core/timer.c
++++ b/sound/core/timer.c
+@@ -547,7 +547,7 @@ static int snd_timer_start1(struct snd_t
+       /* check the actual time for the start tick;
+        * bail out as error if it's way too low (< 100us)
+        */
+-      if (start) {
++      if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) {
+               if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000)
+                       return -EINVAL;
+       }
diff --git a/queue-6.10/alsa-usb-audio-add-delay-quirk-for-vivo-usb-c-xe710-headset.patch b/queue-6.10/alsa-usb-audio-add-delay-quirk-for-vivo-usb-c-xe710-headset.patch

new file mode 100644 (file)

index 0000000..979e485
--- /dev/null
+++ b/queue-6.10/alsa-usb-audio-add-delay-quirk-for-vivo-usb-c-xe710-headset.patch
@@ -0,0 +1,32 @@
+From 004eb8ba776ccd3e296ea6f78f7ae7985b12824e Mon Sep 17 00:00:00 2001
+From: Lianqin Hu <hulianqin@vivo.com>
+Date: Sun, 11 Aug 2024 08:30:11 +0000
+Subject: ALSA: usb-audio: Add delay quirk for VIVO USB-C-XE710 HEADSET
+
+From: Lianqin Hu <hulianqin@vivo.com>
+
+commit 004eb8ba776ccd3e296ea6f78f7ae7985b12824e upstream.
+
+Audio control requests that sets sampling frequency sometimes fail on
+this card. Adding delay between control messages eliminates that problem.
+
+Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/TYUPR06MB6217FF67076AF3E49E12C877D2842@TYUPR06MB6217.apcprd06.prod.outlook.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/usb/quirks.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -2221,6 +2221,8 @@ static const struct usb_audio_quirk_flag
+                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+       DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
+                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++      DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */
++                 QUIRK_FLAG_CTL_MSG_DELAY_1M),
+       DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */
+                  QUIRK_FLAG_IGNORE_CTL_ERROR),
+       DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */
diff --git a/queue-6.10/alsa-usb-audio-support-yamaha-p-125-quirk-entry.patch b/queue-6.10/alsa-usb-audio-support-yamaha-p-125-quirk-entry.patch

new file mode 100644 (file)

index 0000000..21d1842
--- /dev/null
+++ b/queue-6.10/alsa-usb-audio-support-yamaha-p-125-quirk-entry.patch
@@ -0,0 +1,33 @@
+From c286f204ce6ba7b48e3dcba53eda7df8eaa64dd9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Juan=20Jos=C3=A9=20Arboleda?= <soyjuanarbol@gmail.com>
+Date: Tue, 13 Aug 2024 11:10:53 -0500
+Subject: ALSA: usb-audio: Support Yamaha P-125 quirk entry
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Juan José Arboleda <soyjuanarbol@gmail.com>
+
+commit c286f204ce6ba7b48e3dcba53eda7df8eaa64dd9 upstream.
+
+This patch adds a USB quirk for the Yamaha P-125 digital piano.
+
+Signed-off-by: Juan José Arboleda <soyjuanarbol@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/20240813161053.70256-1-soyjuanarbol@gmail.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/usb/quirks-table.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/usb/quirks-table.h
++++ b/sound/usb/quirks-table.h
+@@ -273,6 +273,7 @@ YAMAHA_DEVICE(0x105a, NULL),
+ YAMAHA_DEVICE(0x105b, NULL),
+ YAMAHA_DEVICE(0x105c, NULL),
+ YAMAHA_DEVICE(0x105d, NULL),
++YAMAHA_DEVICE(0x1718, "P-125"),
+ {
+       USB_DEVICE(0x0499, 0x1503),
+       .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
diff --git a/queue-6.10/arm64-acpi-numa-initialize-all-values-of-acpi_early_node_map-to-numa_no_node.patch b/queue-6.10/arm64-acpi-numa-initialize-all-values-of-acpi_early_node_map-to-numa_no_node.patch

new file mode 100644 (file)

index 0000000..6d20bbd
--- /dev/null
+++ b/queue-6.10/arm64-acpi-numa-initialize-all-values-of-acpi_early_node_map-to-numa_no_node.patch
@@ -0,0 +1,42 @@
+From a21dcf0ea8566ebbe011c79d6ed08cdfea771de3 Mon Sep 17 00:00:00 2001
+From: Haibo Xu <haibo1.xu@intel.com>
+Date: Mon, 5 Aug 2024 11:30:24 +0800
+Subject: arm64: ACPI: NUMA: initialize all values of acpi_early_node_map to NUMA_NO_NODE
+
+From: Haibo Xu <haibo1.xu@intel.com>
+
+commit a21dcf0ea8566ebbe011c79d6ed08cdfea771de3 upstream.
+
+Currently, only acpi_early_node_map[0] was initialized to NUMA_NO_NODE.
+To ensure all the values were properly initialized, switch to initialize
+all of them to NUMA_NO_NODE.
+
+Fixes: e18962491696 ("arm64: numa: rework ACPI NUMA initialization")
+Cc: <stable@vger.kernel.org> # 4.19.x
+Reported-by: Andrew Jones <ajones@ventanamicro.com>
+Suggested-by: Andrew Jones <ajones@ventanamicro.com>
+Signed-off-by: Haibo Xu <haibo1.xu@intel.com>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Reviewed-by: Sunil V L <sunilvl@ventanamicro.com>
+Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
+Acked-by: Catalin Marinas <catalin.marinas@arm.com>
+Acked-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
+Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
+Link: https://lore.kernel.org/r/853d7f74aa243f6f5999e203246f0d1ae92d2b61.1722828421.git.haibo1.xu@intel.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/acpi_numa.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/kernel/acpi_numa.c
++++ b/arch/arm64/kernel/acpi_numa.c
+@@ -27,7 +27,7 @@
+ 
+ #include <asm/numa.h>
+ 
+-static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE };
++static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE };
+ 
+ int __init acpi_numa_get_nid(unsigned int cpu)
+ {
diff --git a/queue-6.10/btrfs-check-delayed-refs-when-we-re-checking-if-a-ref-exists.patch b/queue-6.10/btrfs-check-delayed-refs-when-we-re-checking-if-a-ref-exists.patch

new file mode 100644 (file)

index 0000000..1c6abb4
--- /dev/null
+++ b/queue-6.10/btrfs-check-delayed-refs-when-we-re-checking-if-a-ref-exists.patch
@@ -0,0 +1,253 @@
+From 42fac187b5c746227c92d024f1caf33bc1d337e4 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 11 Apr 2024 16:41:20 -0400
+Subject: btrfs: check delayed refs when we're checking if a ref exists
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 42fac187b5c746227c92d024f1caf33bc1d337e4 upstream.
+
+In the patch 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete
+resume") I added some code to handle file systems that had been
+corrupted by a bug that incorrectly skipped updating the drop progress
+key while dropping a snapshot.  This code would check to see if we had
+already deleted our reference for a child block, and skip the deletion
+if we had already.
+
+Unfortunately there is a bug, as the check would only check the on-disk
+references.  I made an incorrect assumption that blocks in an already
+deleted snapshot that was having the deletion resume on mount wouldn't
+be modified.
+
+If we have 2 pending deleted snapshots that share blocks, we can easily
+modify the rules for a block.  Take the following example
+
+subvolume a exists, and subvolume b is a snapshot of subvolume a.  They
+share references to block 1.  Block 1 will have 2 full references, one
+for subvolume a and one for subvolume b, and it belongs to subvolume a
+(btrfs_header_owner(block 1) == subvolume a).
+
+When deleting subvolume a, we will drop our full reference for block 1,
+and because we are the owner we will drop our full reference for all of
+block 1's children, convert block 1 to FULL BACKREF, and add a shared
+reference to all of block 1's children.
+
+Then we will start the snapshot deletion of subvolume b.  We look up the
+extent info for block 1, which checks delayed refs and tells us that
+FULL BACKREF is set, so sets parent to the bytenr of block 1.  However
+because this is a resumed snapshot deletion, we call into
+check_ref_exists().  Because check_ref_exists() only looks at the disk,
+it doesn't find the shared backref for the child of block 1, and thus
+returns 0 and we skip deleting the reference for the child of block 1
+and continue.  This orphans the child of block 1.
+
+The fix is to lookup the delayed refs, similar to what we do in
+btrfs_lookup_extent_info().  However we only care about whether the
+reference exists or not.  If we fail to find our reference on disk, go
+look up the bytenr in the delayed refs, and if it exists look for an
+existing ref in the delayed ref head.  If that exists then we know we
+can delete the reference safely and carry on.  If it doesn't exist we
+know we have to skip over this block.
+
+This bug has existed since I introduced this fix, however requires
+having multiple deleted snapshots pending when we unmount.  We noticed
+this in production because our shutdown path stops the container on the
+system, which deletes a bunch of subvolumes, and then reboots the box.
+This gives us plenty of opportunities to hit this issue.  Looking at the
+history we've seen this occasionally in production, but we had a big
+spike recently thanks to faster machines getting jobs with multiple
+subvolumes in the job.
+
+Chris Mason wrote a reproducer which does the following
+
+mount /dev/nvme4n1 /btrfs
+btrfs subvol create /btrfs/s1
+simoop -E -f 4k -n 200000 -z /btrfs/s1
+while(true) ; do
+       btrfs subvol snap /btrfs/s1 /btrfs/s2
+       simoop -f 4k -n 200000 -r 10 -z /btrfs/s2
+       btrfs subvol snap /btrfs/s2 /btrfs/s3
+       btrfs balance start -dusage=80 /btrfs
+       btrfs subvol del /btrfs/s2 /btrfs/s3
+       umount /btrfs
+       btrfsck /dev/nvme4n1 || exit 1
+       mount /dev/nvme4n1 /btrfs
+done
+
+On the second loop this would fail consistently, with my patch it has
+been running for hours and hasn't failed.
+
+I also used dm-log-writes to capture the state of the failure so I could
+debug the problem.  Using the existing failure case to test my patch
+validated that it fixes the problem.
+
+Fixes: 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete resume")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/delayed-ref.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/delayed-ref.h |    2 +
+ fs/btrfs/extent-tree.c |   51 ++++++++++++++++++++++++++++++++-----
+ 3 files changed, 114 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/delayed-ref.c
++++ b/fs/btrfs/delayed-ref.c
+@@ -1169,6 +1169,73 @@ btrfs_find_delayed_ref_head(struct btrfs
+       return find_ref_head(delayed_refs, bytenr, false);
+ }
+ 
++static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent)
++{
++      int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY;
++
++      if (type < entry->type)
++              return -1;
++      if (type > entry->type)
++              return 1;
++
++      if (type == BTRFS_TREE_BLOCK_REF_KEY) {
++              if (root < entry->ref_root)
++                      return -1;
++              if (root > entry->ref_root)
++                      return 1;
++      } else {
++              if (parent < entry->parent)
++                      return -1;
++              if (parent > entry->parent)
++                      return 1;
++      }
++      return 0;
++}
++
++/*
++ * Check to see if a given root/parent reference is attached to the head.  This
++ * only checks for BTRFS_ADD_DELAYED_REF references that match, as that
++ * indicates the reference exists for the given root or parent.  This is for
++ * tree blocks only.
++ *
++ * @head: the head of the bytenr we're searching.
++ * @root: the root objectid of the reference if it is a normal reference.
++ * @parent: the parent if this is a shared backref.
++ */
++bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
++                               u64 root, u64 parent)
++{
++      struct rb_node *node;
++      bool found = false;
++
++      lockdep_assert_held(&head->mutex);
++
++      spin_lock(&head->lock);
++      node = head->ref_tree.rb_root.rb_node;
++      while (node) {
++              struct btrfs_delayed_ref_node *entry;
++              int ret;
++
++              entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
++              ret = find_comp(entry, root, parent);
++              if (ret < 0) {
++                      node = node->rb_left;
++              } else if (ret > 0) {
++                      node = node->rb_right;
++              } else {
++                      /*
++                       * We only want to count ADD actions, as drops mean the
++                       * ref doesn't exist.
++                       */
++                      if (entry->action == BTRFS_ADD_DELAYED_REF)
++                              found = true;
++                      break;
++              }
++      }
++      spin_unlock(&head->lock);
++      return found;
++}
++
+ void __cold btrfs_delayed_ref_exit(void)
+ {
+       kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
+--- a/fs/btrfs/delayed-ref.h
++++ b/fs/btrfs/delayed-ref.h
+@@ -389,6 +389,8 @@ int btrfs_delayed_refs_rsv_refill(struct
+ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+                                      u64 num_bytes);
+ bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
++bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
++                               u64 root, u64 parent);
+ 
+ static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
+ {
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -5387,23 +5387,62 @@ static int check_ref_exists(struct btrfs
+                           struct btrfs_root *root, u64 bytenr, u64 parent,
+                           int level)
+ {
++      struct btrfs_delayed_ref_root *delayed_refs;
++      struct btrfs_delayed_ref_head *head;
+       struct btrfs_path *path;
+       struct btrfs_extent_inline_ref *iref;
+       int ret;
++      bool exists = false;
+ 
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+-
++again:
+       ret = lookup_extent_backref(trans, path, &iref, bytenr,
+                                   root->fs_info->nodesize, parent,
+                                   btrfs_root_id(root), level, 0);
++      if (ret != -ENOENT) {
++              /*
++               * If we get 0 then we found our reference, return 1, else
++               * return the error if it's not -ENOENT;
++               */
++              btrfs_free_path(path);
++              return (ret < 0 ) ? ret : 1;
++      }
++
++      /*
++       * We could have a delayed ref with this reference, so look it up while
++       * we're holding the path open to make sure we don't race with the
++       * delayed ref running.
++       */
++      delayed_refs = &trans->transaction->delayed_refs;
++      spin_lock(&delayed_refs->lock);
++      head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
++      if (!head)
++              goto out;
++      if (!mutex_trylock(&head->mutex)) {
++              /*
++               * We're contended, means that the delayed ref is running, get a
++               * reference and wait for the ref head to be complete and then
++               * try again.
++               */
++              refcount_inc(&head->refs);
++              spin_unlock(&delayed_refs->lock);
++
++              btrfs_release_path(path);
++
++              mutex_lock(&head->mutex);
++              mutex_unlock(&head->mutex);
++              btrfs_put_delayed_ref_head(head);
++              goto again;
++      }
++
++      exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent);
++      mutex_unlock(&head->mutex);
++out:
++      spin_unlock(&delayed_refs->lock);
+       btrfs_free_path(path);
+-      if (ret == -ENOENT)
+-              return 0;
+-      if (ret < 0)
+-              return ret;
+-      return 1;
++      return exists ? 1 : 0;
+ }
+ 
+ /*
diff --git a/queue-6.10/btrfs-only-enable-extent-map-shrinker-for-debug-builds.patch b/queue-6.10/btrfs-only-enable-extent-map-shrinker-for-debug-builds.patch

new file mode 100644 (file)

index 0000000..640e1cf
--- /dev/null
+++ b/queue-6.10/btrfs-only-enable-extent-map-shrinker-for-debug-builds.patch
@@ -0,0 +1,45 @@
+From 534f7eff9239c1b0af852fc33f5af2b62c00eddf Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 16 Aug 2024 10:40:38 +0930
+Subject: btrfs: only enable extent map shrinker for DEBUG builds
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 534f7eff9239c1b0af852fc33f5af2b62c00eddf upstream.
+
+Although there are several patches improving the extent map shrinker,
+there are still reports of too frequent shrinker behavior, taking too
+much CPU for the kswapd process.
+
+So let's only enable extent shrinker for now, until we got more
+comprehensive understanding and a better solution.
+
+Link: https://lore.kernel.org/linux-btrfs/3df4acd616a07ef4d2dc6bad668701504b412ffc.camel@intelfx.name/
+Link: https://lore.kernel.org/linux-btrfs/c30fd6b3-ca7a-4759-8a53-d42878bf84f7@gmail.com/
+Fixes: 956a17d9d050 ("btrfs: add a shrinker for extent maps")
+CC: stable@vger.kernel.org # 6.10+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/super.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2387,7 +2387,13 @@ static long btrfs_nr_cached_objects(stru
+ 
+       trace_btrfs_extent_map_shrinker_count(fs_info, nr);
+ 
+-      return nr;
++      /*
++       * Only report the real number for DEBUG builds, as there are reports of
++       * serious performance degradation caused by too frequent shrinks.
++       */
++      if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
++              return nr;
++      return 0;
+ }
+ 
+ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc)
diff --git a/queue-6.10/btrfs-only-run-the-extent-map-shrinker-from-kswapd-tasks.patch b/queue-6.10/btrfs-only-run-the-extent-map-shrinker-from-kswapd-tasks.patch

new file mode 100644 (file)

index 0000000..320ef56
--- /dev/null
+++ b/queue-6.10/btrfs-only-run-the-extent-map-shrinker-from-kswapd-tasks.patch
@@ -0,0 +1,139 @@
+From ae1e766f623f7a2a889a0b09eb076dd9a60efbe9 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sun, 11 Aug 2024 11:53:42 +0100
+Subject: btrfs: only run the extent map shrinker from kswapd tasks
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit ae1e766f623f7a2a889a0b09eb076dd9a60efbe9 upstream.
+
+Currently the extent map shrinker can be run by any task when attempting
+to allocate memory and there's enough memory pressure to trigger it.
+
+To avoid too much latency we stop iterating over extent maps and removing
+them once the task needs to reschedule. This logic was introduced in commit
+b3ebb9b7e92a ("btrfs: stop extent map shrinker if reschedule is needed").
+
+While that solved high latency problems for some use cases, it's still
+not enough because with a too high number of tasks entering the extent map
+shrinker code, either due to memory allocations or because they are a
+kswapd task, we end up having a very high level of contention on some
+spin locks, namely:
+
+1) The fs_info->fs_roots_radix_lock spin lock, which we need to find
+   roots to iterate over their inodes;
+
+2) The spin lock of the xarray used to track open inodes for a root
+   (struct btrfs_root::inodes) - on 6.10 kernels and below, it used to
+   be a red black tree and the spin lock was root->inode_lock;
+
+3) The fs_info->delayed_iput_lock spin lock since the shrinker adds
+   delayed iputs (calls btrfs_add_delayed_iput()).
+
+Instead of allowing the extent map shrinker to be run by any task, make
+it run only by kswapd tasks. This still solves the problem of running
+into OOM situations due to an unbounded extent map creation, which is
+simple to trigger by direct IO writes, as described in the changelog
+of commit 956a17d9d050 ("btrfs: add a shrinker for extent maps"), and
+by a similar case when doing buffered IO on files with a very large
+number of holes (keeping the file open and creating many holes, whose
+extent maps are only released when the file is closed).
+
+Reported-by: kzd <kzd@56709.net>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219121
+Reported-by: Octavia Togami <octavia.togami@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CAHPNGSSt-a4ZZWrtJdVyYnJFscFjP9S7rMcvEMaNSpR556DdLA@mail.gmail.com/
+Fixes: 956a17d9d050 ("btrfs: add a shrinker for extent maps")
+CC: stable@vger.kernel.org # 6.10+
+Tested-by: kzd <kzd@56709.net>
+Tested-by: Octavia Togami <octavia.togami@gmail.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_map.c |   22 ++++++----------------
+ fs/btrfs/super.c      |   10 ++++++++++
+ 2 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -1065,8 +1065,7 @@ static long btrfs_scan_inode(struct btrf
+               return 0;
+ 
+       /*
+-       * We want to be fast because we can be called from any path trying to
+-       * allocate memory, so if the lock is busy we don't want to spend time
++       * We want to be fast so if the lock is busy we don't want to spend time
+        * waiting for it - either some task is about to do IO for the inode or
+        * we may have another task shrinking extent maps, here in this code, so
+        * skip this inode.
+@@ -1109,9 +1108,7 @@ next:
+               /*
+                * Stop if we need to reschedule or there's contention on the
+                * lock. This is to avoid slowing other tasks trying to take the
+-               * lock and because the shrinker might be called during a memory
+-               * allocation path and we want to avoid taking a very long time
+-               * and slowing down all sorts of tasks.
++               * lock.
+                */
+               if (need_resched() || rwlock_needbreak(&tree->lock))
+                       break;
+@@ -1139,12 +1136,7 @@ static long btrfs_scan_root(struct btrfs
+               if (ctx->scanned >= ctx->nr_to_scan)
+                       break;
+ 
+-              /*
+-               * We may be called from memory allocation paths, so we don't
+-               * want to take too much time and slowdown tasks.
+-               */
+-              if (need_resched())
+-                      break;
++              cond_resched();
+ 
+               inode = btrfs_find_first_inode(root, min_ino);
+       }
+@@ -1202,14 +1194,12 @@ long btrfs_free_extent_maps(struct btrfs
+                                                          ctx.last_ino);
+       }
+ 
+-      /*
+-       * We may be called from memory allocation paths, so we don't want to
+-       * take too much time and slowdown tasks, so stop if we need reschedule.
+-       */
+-      while (ctx.scanned < ctx.nr_to_scan && !need_resched()) {
++      while (ctx.scanned < ctx.nr_to_scan) {
+               struct btrfs_root *root;
+               unsigned long count;
+ 
++              cond_resched();
++
+               spin_lock(&fs_info->fs_roots_radix_lock);
+               count = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                              (void **)&root,
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -28,6 +28,7 @@
+ #include <linux/btrfs.h>
+ #include <linux/security.h>
+ #include <linux/fs_parser.h>
++#include <linux/swap.h>
+ #include "messages.h"
+ #include "delayed-inode.h"
+ #include "ctree.h"
+@@ -2394,6 +2395,15 @@ static long btrfs_free_cached_objects(st
+       const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan);
+       struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ 
++      /*
++       * We may be called from any task trying to allocate memory and we don't
++       * want to slow it down with scanning and dropping extent maps. It would
++       * also cause heavy lock contention if many tasks concurrently enter
++       * here. Therefore only allow kswapd tasks to scan and drop extent maps.
++       */
++      if (!current_is_kswapd())
++              return 0;
++
+       return btrfs_free_extent_maps(fs_info, nr_to_scan);
+ }
+ 
diff --git a/queue-6.10/btrfs-send-allow-cloning-non-aligned-extent-if-it-ends-at-i_size.patch b/queue-6.10/btrfs-send-allow-cloning-non-aligned-extent-if-it-ends-at-i_size.patch

new file mode 100644 (file)

index 0000000..bfc84dd
--- /dev/null
+++ b/queue-6.10/btrfs-send-allow-cloning-non-aligned-extent-if-it-ends-at-i_size.patch
@@ -0,0 +1,188 @@
+From 46a6e10a1ab16cc71d4a3cab73e79aabadd6b8ea Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 12 Aug 2024 14:18:06 +0100
+Subject: btrfs: send: allow cloning non-aligned extent if it ends at i_size
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 46a6e10a1ab16cc71d4a3cab73e79aabadd6b8ea upstream.
+
+If we a find that an extent is shared but its end offset is not sector
+size aligned, then we don't clone it and issue write operations instead.
+This is because the reflink (remap_file_range) operation does not allow
+to clone unaligned ranges, except if the end offset of the range matches
+the i_size of the source and destination files (and the start offset is
+sector size aligned).
+
+While this is not incorrect because send can only guarantee that a file
+has the same data in the source and destination snapshots, it's not
+optimal and generates confusion and surprising behaviour for users.
+
+For example, running this test:
+
+  $ cat test.sh
+  #!/bin/bash
+
+  DEV=/dev/sdi
+  MNT=/mnt/sdi
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+
+  # Use a file size not aligned to any possible sector size.
+  file_size=$((1 * 1024 * 1024 + 5)) # 1MB + 5 bytes
+  dd if=/dev/random of=$MNT/foo bs=$file_size count=1
+  cp --reflink=always $MNT/foo $MNT/bar
+
+  btrfs subvolume snapshot -r $MNT/ $MNT/snap
+  rm -f /tmp/send-test
+  btrfs send -f /tmp/send-test $MNT/snap
+
+  umount $MNT
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+
+  btrfs receive -vv -f /tmp/send-test $MNT
+
+  xfs_io -r -c "fiemap -v" $MNT/snap/bar
+
+  umount $MNT
+
+Gives the following result:
+
+  (...)
+  mkfile o258-7-0
+  rename o258-7-0 -> bar
+  write bar - offset=0 length=49152
+  write bar - offset=49152 length=49152
+  write bar - offset=98304 length=49152
+  write bar - offset=147456 length=49152
+  write bar - offset=196608 length=49152
+  write bar - offset=245760 length=49152
+  write bar - offset=294912 length=49152
+  write bar - offset=344064 length=49152
+  write bar - offset=393216 length=49152
+  write bar - offset=442368 length=49152
+  write bar - offset=491520 length=49152
+  write bar - offset=540672 length=49152
+  write bar - offset=589824 length=49152
+  write bar - offset=638976 length=49152
+  write bar - offset=688128 length=49152
+  write bar - offset=737280 length=49152
+  write bar - offset=786432 length=49152
+  write bar - offset=835584 length=49152
+  write bar - offset=884736 length=49152
+  write bar - offset=933888 length=49152
+  write bar - offset=983040 length=49152
+  write bar - offset=1032192 length=16389
+  chown bar - uid=0, gid=0
+  chmod bar - mode=0644
+  utimes bar
+  utimes
+  BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=06d640da-9ca1-604c-b87c-3375175a8eb3, stransid=7
+  /mnt/sdi/snap/bar:
+   EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+     0: [0..2055]:       26624..28679      2056   0x1
+
+There's no clone operation to clone extents from the file foo into file
+bar and fiemap confirms there's no shared flag (0x2000).
+
+So update send_write_or_clone() so that it proceeds with cloning if the
+source and destination ranges end at the i_size of the respective files.
+
+After this changes the result of the test is:
+
+  (...)
+  mkfile o258-7-0
+  rename o258-7-0 -> bar
+  clone bar - source=foo source offset=0 offset=0 length=1048581
+  chown bar - uid=0, gid=0
+  chmod bar - mode=0644
+  utimes bar
+  utimes
+  BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=582420f3-ea7d-564e-bbe5-ce440d622190, stransid=7
+  /mnt/sdi/snap/bar:
+   EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+     0: [0..2055]:       26624..28679      2056 0x2001
+
+A test case for fstests will also follow up soon.
+
+Link: https://github.com/kdave/btrfs-progs/issues/572#issuecomment-2282841416
+CC: stable@vger.kernel.org # 5.10+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c |   54 ++++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 40 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -6158,25 +6158,51 @@ static int send_write_or_clone(struct se
+       u64 offset = key->offset;
+       u64 end;
+       u64 bs = sctx->send_root->fs_info->sectorsize;
++      struct btrfs_file_extent_item *ei;
++      u64 disk_byte;
++      u64 data_offset;
++      u64 num_bytes;
++      struct btrfs_inode_info info = { 0 };
+ 
+       end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
+       if (offset >= end)
+               return 0;
+ 
+-      if (clone_root && IS_ALIGNED(end, bs)) {
+-              struct btrfs_file_extent_item *ei;
+-              u64 disk_byte;
+-              u64 data_offset;
+-
+-              ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+-                                  struct btrfs_file_extent_item);
+-              disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
+-              data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
+-              ret = clone_range(sctx, path, clone_root, disk_byte,
+-                                data_offset, offset, end - offset);
+-      } else {
+-              ret = send_extent_data(sctx, path, offset, end - offset);
+-      }
++      num_bytes = end - offset;
++
++      if (!clone_root)
++              goto write_data;
++
++      if (IS_ALIGNED(end, bs))
++              goto clone_data;
++
++      /*
++       * If the extent end is not aligned, we can clone if the extent ends at
++       * the i_size of the inode and the clone range ends at the i_size of the
++       * source inode, otherwise the clone operation fails with -EINVAL.
++       */
++      if (end != sctx->cur_inode_size)
++              goto write_data;
++
++      ret = get_inode_info(clone_root->root, clone_root->ino, &info);
++      if (ret < 0)
++              return ret;
++
++      if (clone_root->offset + num_bytes == info.size)
++              goto clone_data;
++
++write_data:
++      ret = send_extent_data(sctx, path, offset, num_bytes);
++      sctx->cur_inode_next_write_offset = end;
++      return ret;
++
++clone_data:
++      ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
++                          struct btrfs_file_extent_item);
++      disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
++      data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
++      ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset,
++                        num_bytes);
+       sctx->cur_inode_next_write_offset = end;
+       return ret;
+ }
diff --git a/queue-6.10/btrfs-tree-checker-add-dev-extent-item-checks.patch b/queue-6.10/btrfs-tree-checker-add-dev-extent-item-checks.patch

new file mode 100644 (file)

index 0000000..83f5b01
--- /dev/null
+++ b/queue-6.10/btrfs-tree-checker-add-dev-extent-item-checks.patch
@@ -0,0 +1,162 @@
+From 008e2512dc5696ab2dc5bf264e98a9fe9ceb830e Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Sun, 11 Aug 2024 15:00:22 +0930
+Subject: btrfs: tree-checker: add dev extent item checks
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 008e2512dc5696ab2dc5bf264e98a9fe9ceb830e upstream.
+
+[REPORT]
+There is a corruption report that btrfs refused to mount a fs that has
+overlapping dev extents:
+
+  BTRFS error (device sdc): dev extent devid 4 physical offset 14263979671552 overlap with previous dev extent end 14263980982272
+  BTRFS error (device sdc): failed to verify dev extents against chunks: -117
+  BTRFS error (device sdc): open_ctree failed
+
+[CAUSE]
+The direct cause is very obvious, there is a bad dev extent item with
+incorrect length.
+
+With btrfs check reporting two overlapping extents, the second one shows
+some clue on the cause:
+
+  ERROR: dev extent devid 4 offset 14263979671552 len 6488064 overlap with previous dev extent end 14263980982272
+  ERROR: dev extent devid 13 offset 2257707008000 len 6488064 overlap with previous dev extent end 2257707270144
+  ERROR: errors found in extent allocation tree or chunk allocation
+
+The second one looks like a bitflip happened during new chunk
+allocation:
+hex(2257707008000) = 0x20da9d30000
+hex(2257707270144) = 0x20da9d70000
+diff               = 0x00000040000
+
+So it looks like a bitflip happened during new dev extent allocation,
+resulting the second overlap.
+
+Currently we only do the dev-extent verification at mount time, but if the
+corruption is caused by memory bitflip, we really want to catch it before
+writing the corruption to the storage.
+
+Furthermore the dev extent items has the following key definition:
+
+       (<device id> DEV_EXTENT <physical offset>)
+
+Thus we can not just rely on the generic key order check to make sure
+there is no overlapping.
+
+[ENHANCEMENT]
+Introduce dedicated dev extent checks, including:
+
+- Fixed member checks
+  * chunk_tree should always be BTRFS_CHUNK_TREE_OBJECTID (3)
+  * chunk_objectid should always be
+    BTRFS_FIRST_CHUNK_CHUNK_TREE_OBJECTID (256)
+
+- Alignment checks
+  * chunk_offset should be aligned to sectorsize
+  * length should be aligned to sectorsize
+  * key.offset should be aligned to sectorsize
+
+- Overlap checks
+  If the previous key is also a dev-extent item, with the same
+  device id, make sure we do not overlap with the previous dev extent.
+
+Reported: Stefan N <stefannnau@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CA+W5K0rSO3koYTo=nzxxTm1-Pdu1HYgVxEpgJ=aGc7d=E8mGEg@mail.gmail.com/
+CC: stable@vger.kernel.org # 5.10+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |   69 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 69 insertions(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1718,6 +1718,72 @@ static int check_raid_stripe_extent(cons
+       return 0;
+ }
+ 
++static int check_dev_extent_item(const struct extent_buffer *leaf,
++                               const struct btrfs_key *key,
++                               int slot,
++                               struct btrfs_key *prev_key)
++{
++      struct btrfs_dev_extent *de;
++      const u32 sectorsize = leaf->fs_info->sectorsize;
++
++      de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
++      /* Basic fixed member checks. */
++      if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) !=
++                   BTRFS_CHUNK_TREE_OBJECTID)) {
++              generic_err(leaf, slot,
++                          "invalid dev extent chunk tree id, has %llu expect %llu",
++                          btrfs_dev_extent_chunk_tree(leaf, de),
++                          BTRFS_CHUNK_TREE_OBJECTID);
++              return -EUCLEAN;
++      }
++      if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) !=
++                   BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
++              generic_err(leaf, slot,
++                          "invalid dev extent chunk objectid, has %llu expect %llu",
++                          btrfs_dev_extent_chunk_objectid(leaf, de),
++                          BTRFS_FIRST_CHUNK_TREE_OBJECTID);
++              return -EUCLEAN;
++      }
++      /* Alignment check. */
++      if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
++              generic_err(leaf, slot,
++                          "invalid dev extent key.offset, has %llu not aligned to %u",
++                          key->offset, sectorsize);
++              return -EUCLEAN;
++      }
++      if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de),
++                               sectorsize))) {
++              generic_err(leaf, slot,
++                          "invalid dev extent chunk offset, has %llu not aligned to %u",
++                          btrfs_dev_extent_chunk_objectid(leaf, de),
++                          sectorsize);
++              return -EUCLEAN;
++      }
++      if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de),
++                               sectorsize))) {
++              generic_err(leaf, slot,
++                          "invalid dev extent length, has %llu not aligned to %u",
++                          btrfs_dev_extent_length(leaf, de), sectorsize);
++              return -EUCLEAN;
++      }
++      /* Overlap check with previous dev extent. */
++      if (slot && prev_key->objectid == key->objectid &&
++          prev_key->type == key->type) {
++              struct btrfs_dev_extent *prev_de;
++              u64 prev_len;
++
++              prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent);
++              prev_len = btrfs_dev_extent_length(leaf, prev_de);
++              if (unlikely(prev_key->offset + prev_len > key->offset)) {
++                      generic_err(leaf, slot,
++              "dev extent overlap, prev offset %llu len %llu current offset %llu",
++                                  prev_key->objectid, prev_len, key->offset);
++                      return -EUCLEAN;
++              }
++      }
++      return 0;
++}
++
+ /*
+  * Common point to switch the item-specific validation.
+  */
+@@ -1754,6 +1820,9 @@ static enum btrfs_tree_block_status chec
+       case BTRFS_DEV_ITEM_KEY:
+               ret = check_dev_item(leaf, key, slot);
+               break;
++      case BTRFS_DEV_EXTENT_KEY:
++              ret = check_dev_extent_item(leaf, key, slot, prev_key);
++              break;
+       case BTRFS_INODE_ITEM_KEY:
+               ret = check_inode_item(leaf, key, slot);
+               break;
diff --git a/queue-6.10/btrfs-tree-checker-reject-btrfs_ft_unknown-dir-type.patch b/queue-6.10/btrfs-tree-checker-reject-btrfs_ft_unknown-dir-type.patch

new file mode 100644 (file)

index 0000000..3196fa3
--- /dev/null
+++ b/queue-6.10/btrfs-tree-checker-reject-btrfs_ft_unknown-dir-type.patch
@@ -0,0 +1,58 @@
+From 31723c9542dba1681cc3720571fdf12ffe0eddd9 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 12 Aug 2024 08:52:44 +0930
+Subject: btrfs: tree-checker: reject BTRFS_FT_UNKNOWN dir type
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 31723c9542dba1681cc3720571fdf12ffe0eddd9 upstream.
+
+[REPORT]
+There is a bug report that kernel is rejecting a mismatching inode mode
+and its dir item:
+
+  [ 1881.553937] BTRFS critical (device dm-0): inode mode mismatch with
+  dir: inode mode=040700 btrfs type=2 dir type=0
+
+[CAUSE]
+It looks like the inode mode is correct, while the dir item type
+0 is BTRFS_FT_UNKNOWN, which should not be generated by btrfs at all.
+
+This may be caused by a memory bit flip.
+
+[ENHANCEMENT]
+Although tree-checker is not able to do any cross-leaf verification, for
+this particular case we can at least reject any dir type with
+BTRFS_FT_UNKNOWN.
+
+So here we enhance the dir type check from [0, BTRFS_FT_MAX), to
+(0, BTRFS_FT_MAX).
+Although the existing corruption can not be fixed just by such enhanced
+checking, it should prevent the same 0x2->0x0 bitflip for dir type to
+reach disk in the future.
+
+Reported-by: Kota <nospam@kota.moe>
+Link: https://lore.kernel.org/linux-btrfs/CACsxjPYnQF9ZF-0OhH16dAx50=BXXOcP74MxBc3BG+xae4vTTw@mail.gmail.com/
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -551,9 +551,10 @@ static int check_dir_item(struct extent_
+ 
+               /* dir type check */
+               dir_type = btrfs_dir_ftype(leaf, di);
+-              if (unlikely(dir_type >= BTRFS_FT_MAX)) {
++              if (unlikely(dir_type <= BTRFS_FT_UNKNOWN ||
++                           dir_type >= BTRFS_FT_MAX)) {
+                       dir_item_err(leaf, slot,
+-                      "invalid dir item type, have %u expect [0, %u)",
++                      "invalid dir item type, have %u expect (0, %u)",
+                               dir_type, BTRFS_FT_MAX);
+                       return -EUCLEAN;
+               }
diff --git a/queue-6.10/btrfs-zoned-properly-take-lock-to-read-update-block-group-s-zoned-variables.patch b/queue-6.10/btrfs-zoned-properly-take-lock-to-read-update-block-group-s-zoned-variables.patch

new file mode 100644 (file)

index 0000000..487836f
--- /dev/null
+++ b/queue-6.10/btrfs-zoned-properly-take-lock-to-read-update-block-group-s-zoned-variables.patch
@@ -0,0 +1,82 @@
+From e30729d4bd4001881be4d1ad4332a5d4985398f8 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Thu, 1 Aug 2024 16:47:52 +0900
+Subject: btrfs: zoned: properly take lock to read/update block group's zoned variables
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit e30729d4bd4001881be4d1ad4332a5d4985398f8 upstream.
+
+__btrfs_add_free_space_zoned() references and modifies bg's alloc_offset,
+ro, and zone_unusable, but without taking the lock. It is mostly safe
+because they monotonically increase (at least for now) and this function is
+mostly called by a transaction commit, which is serialized by itself.
+
+Still, taking the lock is a safer and correct option and I'm going to add a
+change to reset zone_unusable while a block group is still alive. So, add
+locking around the operations.
+
+Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/free-space-cache.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2698,15 +2698,16 @@ static int __btrfs_add_free_space_zoned(
+       u64 offset = bytenr - block_group->start;
+       u64 to_free, to_unusable;
+       int bg_reclaim_threshold = 0;
+-      bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
++      bool initial;
+       u64 reclaimable_unusable;
+ 
+-      WARN_ON(!initial && offset + size > block_group->zone_capacity);
++      spin_lock(&block_group->lock);
+ 
++      initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
++      WARN_ON(!initial && offset + size > block_group->zone_capacity);
+       if (!initial)
+               bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
+ 
+-      spin_lock(&ctl->tree_lock);
+       if (!used)
+               to_free = size;
+       else if (initial)
+@@ -2719,7 +2720,9 @@ static int __btrfs_add_free_space_zoned(
+               to_free = offset + size - block_group->alloc_offset;
+       to_unusable = size - to_free;
+ 
++      spin_lock(&ctl->tree_lock);
+       ctl->free_space += to_free;
++      spin_unlock(&ctl->tree_lock);
+       /*
+        * If the block group is read-only, we should account freed space into
+        * bytes_readonly.
+@@ -2728,11 +2731,8 @@ static int __btrfs_add_free_space_zoned(
+               block_group->zone_unusable += to_unusable;
+               WARN_ON(block_group->zone_unusable > block_group->length);
+       }
+-      spin_unlock(&ctl->tree_lock);
+       if (!used) {
+-              spin_lock(&block_group->lock);
+               block_group->alloc_offset -= size;
+-              spin_unlock(&block_group->lock);
+       }
+ 
+       reclaimable_unusable = block_group->zone_unusable -
+@@ -2746,6 +2746,8 @@ static int __btrfs_add_free_space_zoned(
+               btrfs_mark_bg_to_reclaim(block_group);
+       }
+ 
++      spin_unlock(&block_group->lock);
++
+       return 0;
+ }
+ 
diff --git a/queue-6.10/char-xillybus-check-usb-endpoints-when-probing-device.patch b/queue-6.10/char-xillybus-check-usb-endpoints-when-probing-device.patch

new file mode 100644 (file)

index 0000000..393ff94
--- /dev/null
+++ b/queue-6.10/char-xillybus-check-usb-endpoints-when-probing-device.patch
@@ -0,0 +1,93 @@
+From 2374bf7558de915edc6ec8cb10ec3291dfab9594 Mon Sep 17 00:00:00 2001
+From: Eli Billauer <eli.billauer@gmail.com>
+Date: Fri, 16 Aug 2024 10:02:00 +0300
+Subject: char: xillybus: Check USB endpoints when probing device
+
+From: Eli Billauer <eli.billauer@gmail.com>
+
+commit 2374bf7558de915edc6ec8cb10ec3291dfab9594 upstream.
+
+Ensure, as the driver probes the device, that all endpoints that the
+driver may attempt to access exist and are of the correct type.
+
+All XillyUSB devices must have a Bulk IN and Bulk OUT endpoint at
+address 1. This is verified in xillyusb_setup_base_eps().
+
+On top of that, a XillyUSB device may have additional Bulk OUT
+endpoints. The information about these endpoints' addresses is deduced
+from a data structure (the IDT) that the driver fetches from the device
+while probing it. These endpoints are checked in setup_channels().
+
+A XillyUSB device never has more than one IN endpoint, as all data
+towards the host is multiplexed in this single Bulk IN endpoint. This is
+why setup_channels() only checks OUT endpoints.
+
+Reported-by: syzbot+eac39cba052f2e750dbe@syzkaller.appspotmail.com
+Cc: stable <stable@kernel.org>
+Closes: https://lore.kernel.org/all/0000000000001d44a6061f7a54ee@google.com/T/
+Fixes: a53d1202aef1 ("char: xillybus: Add driver for XillyUSB (Xillybus variant for USB)").
+Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
+Link: https://lore.kernel.org/r/20240816070200.50695-2-eli.billauer@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/xillybus/xillyusb.c |   22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/drivers/char/xillybus/xillyusb.c
++++ b/drivers/char/xillybus/xillyusb.c
+@@ -1903,6 +1903,13 @@ static const struct file_operations xill
+ 
+ static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev)
+ {
++      struct usb_device *udev = xdev->udev;
++
++      /* Verify that device has the two fundamental bulk in/out endpoints */
++      if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) ||
++          usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM)))
++              return -ENODEV;
++
+       xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT,
+                                     bulk_out_work, 1, 2);
+       if (!xdev->msg_ep)
+@@ -1932,14 +1939,15 @@ static int setup_channels(struct xillyus
+                         __le16 *chandesc,
+                         int num_channels)
+ {
+-      struct xillyusb_channel *chan;
++      struct usb_device *udev = xdev->udev;
++      struct xillyusb_channel *chan, *new_channels;
+       int i;
+ 
+       chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL);
+       if (!chan)
+               return -ENOMEM;
+ 
+-      xdev->channels = chan;
++      new_channels = chan;
+ 
+       for (i = 0; i < num_channels; i++, chan++) {
+               unsigned int in_desc = le16_to_cpu(*chandesc++);
+@@ -1968,6 +1976,15 @@ static int setup_channels(struct xillyus
+                */
+ 
+               if ((out_desc & 0x80) && i < 14) { /* Entry is valid */
++                      if (usb_pipe_type_check(udev,
++                                              usb_sndbulkpipe(udev, i + 2))) {
++                              dev_err(xdev->dev,
++                                      "Missing BULK OUT endpoint %d\n",
++                                      i + 2);
++                              kfree(new_channels);
++                              return -ENODEV;
++                      }
++
+                       chan->writable = 1;
+                       chan->out_synchronous = !!(out_desc & 0x40);
+                       chan->out_seekable = !!(out_desc & 0x20);
+@@ -1977,6 +1994,7 @@ static int setup_channels(struct xillyus
+               }
+       }
+ 
++      xdev->channels = new_channels;
+       return 0;
+ }
+ 
diff --git a/queue-6.10/char-xillybus-don-t-destroy-workqueue-from-work-item-running-on-it.patch b/queue-6.10/char-xillybus-don-t-destroy-workqueue-from-work-item-running-on-it.patch

new file mode 100644 (file)

index 0000000..79c860c
--- /dev/null
+++ b/queue-6.10/char-xillybus-don-t-destroy-workqueue-from-work-item-running-on-it.patch
@@ -0,0 +1,83 @@
+From ccbde4b128ef9c73d14d0d7817d68ef795f6d131 Mon Sep 17 00:00:00 2001
+From: Eli Billauer <eli.billauer@gmail.com>
+Date: Thu, 1 Aug 2024 15:11:26 +0300
+Subject: char: xillybus: Don't destroy workqueue from work item running on it
+
+From: Eli Billauer <eli.billauer@gmail.com>
+
+commit ccbde4b128ef9c73d14d0d7817d68ef795f6d131 upstream.
+
+Triggered by a kref decrement, destroy_workqueue() may be called from
+within a work item for destroying its own workqueue. This illegal
+situation is averted by adding a module-global workqueue for exclusive
+use of the offending work item. Other work items continue to be queued
+on per-device workqueues to ensure performance.
+
+Reported-by: syzbot+91dbdfecdd3287734d8e@syzkaller.appspotmail.com
+Cc: stable <stable@kernel.org>
+Closes: https://lore.kernel.org/lkml/0000000000000ab25a061e1dfe9f@google.com/
+Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
+Link: https://lore.kernel.org/r/20240801121126.60183-1-eli.billauer@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/xillybus/xillyusb.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/char/xillybus/xillyusb.c
++++ b/drivers/char/xillybus/xillyusb.c
+@@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2");
+ static const char xillyname[] = "xillyusb";
+ 
+ static unsigned int fifo_buf_order;
++static struct workqueue_struct *wakeup_wq;
+ 
+ #define USB_VENDOR_ID_XILINX          0x03fd
+ #define USB_VENDOR_ID_ALTERA          0x09fb
+@@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kre
+  * errors if executed. The mechanism relies on that xdev->error is assigned
+  * a non-zero value by report_io_error() prior to queueing wakeup_all(),
+  * which prevents bulk_in_work() from calling process_bulk_in().
+- *
+- * The fact that wakeup_all() and bulk_in_work() are queued on the same
+- * workqueue makes their concurrent execution very unlikely, however the
+- * kernel's API doesn't seem to ensure this strictly.
+  */
+ 
+ static void wakeup_all(struct work_struct *work)
+@@ -627,7 +624,7 @@ static void report_io_error(struct xilly
+ 
+       if (do_once) {
+               kref_get(&xdev->kref); /* xdev is used by work item */
+-              queue_work(xdev->workq, &xdev->wakeup_workitem);
++              queue_work(wakeup_wq, &xdev->wakeup_workitem);
+       }
+ }
+ 
+@@ -2258,6 +2255,10 @@ static int __init xillyusb_init(void)
+ {
+       int rc = 0;
+ 
++      wakeup_wq = alloc_workqueue(xillyname, 0, 0);
++      if (!wakeup_wq)
++              return -ENOMEM;
++
+       if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT)
+               fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT;
+       else
+@@ -2265,11 +2266,16 @@ static int __init xillyusb_init(void)
+ 
+       rc = usb_register(&xillyusb_driver);
+ 
++      if (rc)
++              destroy_workqueue(wakeup_wq);
++
+       return rc;
+ }
+ 
+ static void __exit xillyusb_exit(void)
+ {
++      destroy_workqueue(wakeup_wq);
++
+       usb_deregister(&xillyusb_driver);
+ }
+ 
diff --git a/queue-6.10/char-xillybus-refine-workqueue-handling.patch b/queue-6.10/char-xillybus-refine-workqueue-handling.patch

new file mode 100644 (file)

index 0000000..cc782a3
--- /dev/null
+++ b/queue-6.10/char-xillybus-refine-workqueue-handling.patch
@@ -0,0 +1,52 @@
+From ad899c301c880766cc709aad277991b3ab671b66 Mon Sep 17 00:00:00 2001
+From: Eli Billauer <eli.billauer@gmail.com>
+Date: Fri, 16 Aug 2024 10:01:59 +0300
+Subject: char: xillybus: Refine workqueue handling
+
+From: Eli Billauer <eli.billauer@gmail.com>
+
+commit ad899c301c880766cc709aad277991b3ab671b66 upstream.
+
+As the wakeup work item now runs on a separate workqueue, it needs to be
+flushed separately along with flushing the device's workqueue.
+
+Also, move the destroy_workqueue() call to the end of the exit method,
+so that deinitialization is done in the opposite order of
+initialization.
+
+Fixes: ccbde4b128ef ("char: xillybus: Don't destroy workqueue from work item running on it")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
+Link: https://lore.kernel.org/r/20240816070200.50695-1-eli.billauer@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/xillybus/xillyusb.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/char/xillybus/xillyusb.c
++++ b/drivers/char/xillybus/xillyusb.c
+@@ -2093,9 +2093,11 @@ static int xillyusb_discovery(struct usb
+        * just after responding with the IDT, there is no reason for any
+        * work item to be running now. To be sure that xdev->channels
+        * is updated on anything that might run in parallel, flush the
+-       * workqueue, which rarely does anything.
++       * device's workqueue and the wakeup work item. This rarely
++       * does anything.
+        */
+       flush_workqueue(xdev->workq);
++      flush_work(&xdev->wakeup_workitem);
+ 
+       xdev->num_channels = num_channels;
+ 
+@@ -2274,9 +2276,9 @@ static int __init xillyusb_init(void)
+ 
+ static void __exit xillyusb_exit(void)
+ {
+-      destroy_workqueue(wakeup_wq);
+-
+       usb_deregister(&xillyusb_driver);
++
++      destroy_workqueue(wakeup_wq);
+ }
+ 
+ module_init(xillyusb_init);
diff --git a/queue-6.10/dm-persistent-data-fix-memory-allocation-failure.patch b/queue-6.10/dm-persistent-data-fix-memory-allocation-failure.patch

new file mode 100644 (file)

index 0000000..3b5c8a3
--- /dev/null
+++ b/queue-6.10/dm-persistent-data-fix-memory-allocation-failure.patch
@@ -0,0 +1,45 @@
+From faada2174c08662ae98b439c69efe3e79382c538 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Tue, 13 Aug 2024 16:35:14 +0200
+Subject: dm persistent data: fix memory allocation failure
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit faada2174c08662ae98b439c69efe3e79382c538 upstream.
+
+kmalloc is unreliable when allocating more than 8 pages of memory. It may
+fail when there is plenty of free memory but the memory is fragmented.
+Zdenek Kabelac observed such failure in his tests.
+
+This commit changes kmalloc to kvmalloc - kvmalloc will fall back to
+vmalloc if the large allocation fails.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
+Reviewed-by: Mike Snitzer <snitzer@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/persistent-data/dm-space-map-metadata.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/persistent-data/dm-space-map-metadata.c
++++ b/drivers/md/persistent-data/dm-space-map-metadata.c
+@@ -277,7 +277,7 @@ static void sm_metadata_destroy(struct d
+ {
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+ 
+-      kfree(smm);
++      kvfree(smm);
+ }
+ 
+ static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+@@ -772,7 +772,7 @@ struct dm_space_map *dm_sm_metadata_init
+ {
+       struct sm_metadata *smm;
+ 
+-      smm = kmalloc(sizeof(*smm), GFP_KERNEL);
++      smm = kvmalloc(sizeof(*smm), GFP_KERNEL);
+       if (!smm)
+               return ERR_PTR(-ENOMEM);
+ 
diff --git a/queue-6.10/dm-resume-don-t-return-einval-when-signalled.patch b/queue-6.10/dm-resume-don-t-return-einval-when-signalled.patch

new file mode 100644 (file)

index 0000000..486f013
--- /dev/null
+++ b/queue-6.10/dm-resume-don-t-return-einval-when-signalled.patch
@@ -0,0 +1,60 @@
+From 7a636b4f03af9d541205f69e373672e7b2b60a8a Mon Sep 17 00:00:00 2001
+From: Khazhismel Kumykov <khazhy@google.com>
+Date: Tue, 13 Aug 2024 12:39:52 +0200
+Subject: dm resume: don't return EINVAL when signalled
+
+From: Khazhismel Kumykov <khazhy@google.com>
+
+commit 7a636b4f03af9d541205f69e373672e7b2b60a8a upstream.
+
+If the dm_resume method is called on a device that is not suspended, the
+method will suspend the device briefly, before resuming it (so that the
+table will be swapped).
+
+However, there was a bug that the return value of dm_suspended_md was not
+checked. dm_suspended_md may return an error when it is interrupted by a
+signal. In this case, do_resume would call dm_swap_table, which would
+return -EINVAL.
+
+This commit fixes the logic, so that error returned by dm_suspend is
+checked and the resume operation is undone.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-ioctl.c |   22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm-ioctl.c
++++ b/drivers/md/dm-ioctl.c
+@@ -1181,8 +1181,26 @@ static int do_resume(struct dm_ioctl *pa
+                       suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
+               if (param->flags & DM_NOFLUSH_FLAG)
+                       suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
+-              if (!dm_suspended_md(md))
+-                      dm_suspend(md, suspend_flags);
++              if (!dm_suspended_md(md)) {
++                      r = dm_suspend(md, suspend_flags);
++                      if (r) {
++                              down_write(&_hash_lock);
++                              hc = dm_get_mdptr(md);
++                              if (hc && !hc->new_map) {
++                                      hc->new_map = new_map;
++                                      new_map = NULL;
++                              } else {
++                                      r = -ENXIO;
++                              }
++                              up_write(&_hash_lock);
++                              if (new_map) {
++                                      dm_sync_table(md);
++                                      dm_table_destroy(new_map);
++                              }
++                              dm_put(md);
++                              return r;
++                      }
++              }
+ 
+               old_size = dm_get_size(md);
+               old_map = dm_swap_table(md, new_map);
diff --git a/queue-6.10/drm-amdgpu-actually-check-flags-for-all-context-ops.patch b/queue-6.10/drm-amdgpu-actually-check-flags-for-all-context-ops.patch

new file mode 100644 (file)

index 0000000..e5a399a
--- /dev/null
+++ b/queue-6.10/drm-amdgpu-actually-check-flags-for-all-context-ops.patch
@@ -0,0 +1,50 @@
+From 0573a1e2ea7e35bff08944a40f1adf2bb35cea61 Mon Sep 17 00:00:00 2001
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Date: Tue, 6 Aug 2024 22:27:32 +0200
+Subject: drm/amdgpu: Actually check flags for all context ops.
+
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+
+commit 0573a1e2ea7e35bff08944a40f1adf2bb35cea61 upstream.
+
+Missing validation ...
+
+Checked libdrm and it clears all the structs, so we should be
+safe to just check everything.
+
+Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit c6b86421f1f9ddf9d706f2453159813ee39d0cf9)
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+@@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *
+ 
+       switch (args->in.op) {
+       case AMDGPU_CTX_OP_ALLOC_CTX:
++              if (args->in.flags)
++                      return -EINVAL;
+               r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
+               args->out.alloc.ctx_id = id;
+               break;
+       case AMDGPU_CTX_OP_FREE_CTX:
++              if (args->in.flags)
++                      return -EINVAL;
+               r = amdgpu_ctx_free(fpriv, id);
+               break;
+       case AMDGPU_CTX_OP_QUERY_STATE:
++              if (args->in.flags)
++                      return -EINVAL;
+               r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
+               break;
+       case AMDGPU_CTX_OP_QUERY_STATE2:
++              if (args->in.flags)
++                      return -EINVAL;
+               r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
+               break;
+       case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
diff --git a/queue-6.10/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch b/queue-6.10/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch

new file mode 100644 (file)

index 0000000..e864ddf
--- /dev/null
+++ b/queue-6.10/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch
@@ -0,0 +1,184 @@
+From 9a2fa1472083580b6c66bdaf291f591e1170123a Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sat, 3 Aug 2024 18:02:00 -0400
+Subject: fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 9a2fa1472083580b6c66bdaf291f591e1170123a upstream.
+
+copy_fd_bitmaps(new, old, count) is expected to copy the first
+count/BITS_PER_LONG bits from old->full_fds_bits[] and fill
+the rest with zeroes.  What it does is copying enough words
+(BITS_TO_LONGS(count/BITS_PER_LONG)), then memsets the rest.
+That works fine, *if* all bits past the cutoff point are
+clear.  Otherwise we are risking garbage from the last word
+we'd copied.
+
+For most of the callers that is true - expand_fdtable() has
+count equal to old->max_fds, so there's no open descriptors
+past count, let alone fully occupied words in ->open_fds[],
+which is what bits in ->full_fds_bits[] correspond to.
+
+The other caller (dup_fd()) passes sane_fdtable_size(old_fdt, max_fds),
+which is the smallest multiple of BITS_PER_LONG that covers all
+opened descriptors below max_fds.  In the common case (copying on
+fork()) max_fds is ~0U, so all opened descriptors will be below
+it and we are fine, by the same reasons why the call in expand_fdtable()
+is safe.
+
+Unfortunately, there is a case where max_fds is less than that
+and where we might, indeed, end up with junk in ->full_fds_bits[] -
+close_range(from, to, CLOSE_RANGE_UNSHARE) with
+       * descriptor table being currently shared
+       * 'to' being above the current capacity of descriptor table
+       * 'from' being just under some chunk of opened descriptors.
+In that case we end up with observably wrong behaviour - e.g. spawn
+a child with CLONE_FILES, get all descriptors in range 0..127 open,
+then close_range(64, ~0U, CLOSE_RANGE_UNSHARE) and watch dup(0) ending
+up with descriptor #128, despite #64 being observably not open.
+
+The minimally invasive fix would be to deal with that in dup_fd().
+If this proves to add measurable overhead, we can go that way, but
+let's try to fix copy_fd_bitmaps() first.
+
+* new helper: bitmap_copy_and_expand(to, from, bits_to_copy, size).
+* make copy_fd_bitmaps() take the bitmap size in words, rather than
+bits; it's 'count' argument is always a multiple of BITS_PER_LONG,
+so we are not losing any information, and that way we can use the
+same helper for all three bitmaps - compiler will see that count
+is a multiple of BITS_PER_LONG for the large ones, so it'll generate
+plain memcpy()+memset().
+
+Reproducer added to tools/testing/selftests/core/close_range_test.c
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/file.c                                       |   28 ++++++++-----------
+ include/linux/bitmap.h                          |   12 ++++++++
+ tools/testing/selftests/core/close_range_test.c |   35 ++++++++++++++++++++++++
+ 3 files changed, 59 insertions(+), 16 deletions(-)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_
+ #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
+ #define BITBIT_SIZE(nr)       (BITBIT_NR(nr) * sizeof(long))
+ 
++#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
+ /*
+  * Copy 'count' fd bits from the old table to the new table and clear the extra
+  * space if any.  This does not copy the file pointers.  Called with the files
+  * spinlock held for write.
+  */
+-static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
+-                          unsigned int count)
++static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
++                          unsigned int copy_words)
+ {
+-      unsigned int cpy, set;
++      unsigned int nwords = fdt_words(nfdt);
+ 
+-      cpy = count / BITS_PER_BYTE;
+-      set = (nfdt->max_fds - count) / BITS_PER_BYTE;
+-      memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
+-      memset((char *)nfdt->open_fds + cpy, 0, set);
+-      memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
+-      memset((char *)nfdt->close_on_exec + cpy, 0, set);
+-
+-      cpy = BITBIT_SIZE(count);
+-      set = BITBIT_SIZE(nfdt->max_fds) - cpy;
+-      memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
+-      memset((char *)nfdt->full_fds_bits + cpy, 0, set);
++      bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds,
++                      copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
++      bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec,
++                      copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
++      bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits,
++                      copy_words, nwords);
+ }
+ 
+ /*
+@@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable
+       memcpy(nfdt->fd, ofdt->fd, cpy);
+       memset((char *)nfdt->fd + cpy, 0, set);
+ 
+-      copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
++      copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
+ }
+ 
+ /*
+@@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files
+               open_files = sane_fdtable_size(old_fdt, max_fds);
+       }
+ 
+-      copy_fd_bitmaps(new_fdt, old_fdt, open_files);
++      copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
+ 
+       old_fds = old_fdt->fd;
+       new_fds = new_fdt->fd;
+--- a/include/linux/bitmap.h
++++ b/include/linux/bitmap.h
+@@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tai
+               dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
+ }
+ 
++static inline void bitmap_copy_and_extend(unsigned long *to,
++                                        const unsigned long *from,
++                                        unsigned int count, unsigned int size)
++{
++      unsigned int copy = BITS_TO_LONGS(count);
++
++      memcpy(to, from, copy * sizeof(long));
++      if (count % BITS_PER_LONG)
++              to[copy - 1] &= BITMAP_LAST_WORD_MASK(count);
++      memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long));
++}
++
+ /*
+  * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64
+  * machines the order of hi and lo parts of numbers match the bitmap structure.
+--- a/tools/testing/selftests/core/close_range_test.c
++++ b/tools/testing/selftests/core/close_range_test.c
+@@ -589,4 +589,39 @@ TEST(close_range_cloexec_unshare_syzbot)
+       EXPECT_EQ(close(fd3), 0);
+ }
+ 
++TEST(close_range_bitmap_corruption)
++{
++      pid_t pid;
++      int status;
++      struct __clone_args args = {
++              .flags = CLONE_FILES,
++              .exit_signal = SIGCHLD,
++      };
++
++      /* get the first 128 descriptors open */
++      for (int i = 2; i < 128; i++)
++              EXPECT_GE(dup2(0, i), 0);
++
++      /* get descriptor table shared */
++      pid = sys_clone3(&args, sizeof(args));
++      ASSERT_GE(pid, 0);
++
++      if (pid == 0) {
++              /* unshare and truncate descriptor table down to 64 */
++              if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
++                      exit(EXIT_FAILURE);
++
++              ASSERT_EQ(fcntl(64, F_GETFD), -1);
++              /* ... and verify that the range 64..127 is not
++                 stuck "fully used" according to secondary bitmap */
++              EXPECT_EQ(dup(0), 64)
++                      exit(EXIT_FAILURE);
++              exit(EXIT_SUCCESS);
++      }
++
++      EXPECT_EQ(waitpid(pid, &status, 0), pid);
++      EXPECT_EQ(true, WIFEXITED(status));
++      EXPECT_EQ(0, WEXITSTATUS(status));
++}
++
+ TEST_HARNESS_MAIN
diff --git a/queue-6.10/fs-netfs-fscache_cookie-add-missing-n_accesses-check.patch b/queue-6.10/fs-netfs-fscache_cookie-add-missing-n_accesses-check.patch

new file mode 100644 (file)

index 0000000..774deb3
--- /dev/null
+++ b/queue-6.10/fs-netfs-fscache_cookie-add-missing-n_accesses-check.patch
@@ -0,0 +1,114 @@
+From f71aa06398aabc2e3eaac25acdf3d62e0094ba70 Mon Sep 17 00:00:00 2001
+From: Max Kellermann <max.kellermann@ionos.com>
+Date: Mon, 29 Jul 2024 17:19:30 +0100
+Subject: fs/netfs/fscache_cookie: add missing "n_accesses" check
+
+From: Max Kellermann <max.kellermann@ionos.com>
+
+commit f71aa06398aabc2e3eaac25acdf3d62e0094ba70 upstream.
+
+This fixes a NULL pointer dereference bug due to a data race which
+looks like this:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000008
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 0 P4D 0
+  Oops: 0000 [#1] SMP PTI
+  CPU: 33 PID: 16573 Comm: kworker/u97:799 Not tainted 6.8.7-cm4all1-hp+ #43
+  Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 10/17/2018
+  Workqueue: events_unbound netfs_rreq_write_to_cache_work
+  RIP: 0010:cachefiles_prepare_write+0x30/0xa0
+  Code: 57 41 56 45 89 ce 41 55 49 89 cd 41 54 49 89 d4 55 53 48 89 fb 48 83 ec 08 48 8b 47 08 48 83 7f 10 00 48 89 34 24 48 8b 68 20 <48> 8b 45 08 4c 8b 38 74 45 49 8b 7f 50 e8 4e a9 b0 ff 48 8b 73 10
+  RSP: 0018:ffffb4e78113bde0 EFLAGS: 00010286
+  RAX: ffff976126be6d10 RBX: ffff97615cdb8438 RCX: 0000000000020000
+  RDX: ffff97605e6c4c68 RSI: ffff97605e6c4c60 RDI: ffff97615cdb8438
+  RBP: 0000000000000000 R08: 0000000000278333 R09: 0000000000000001
+  R10: ffff97605e6c4600 R11: 0000000000000001 R12: ffff97605e6c4c68
+  R13: 0000000000020000 R14: 0000000000000001 R15: ffff976064fe2c00
+  FS:  0000000000000000(0000) GS:ffff9776dfd40000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000000008 CR3: 000000005942c002 CR4: 00000000001706f0
+  Call Trace:
+   <TASK>
+   ? __die+0x1f/0x70
+   ? page_fault_oops+0x15d/0x440
+   ? search_module_extables+0xe/0x40
+   ? fixup_exception+0x22/0x2f0
+   ? exc_page_fault+0x5f/0x100
+   ? asm_exc_page_fault+0x22/0x30
+   ? cachefiles_prepare_write+0x30/0xa0
+   netfs_rreq_write_to_cache_work+0x135/0x2e0
+   process_one_work+0x137/0x2c0
+   worker_thread+0x2e9/0x400
+   ? __pfx_worker_thread+0x10/0x10
+   kthread+0xcc/0x100
+   ? __pfx_kthread+0x10/0x10
+   ret_from_fork+0x30/0x50
+   ? __pfx_kthread+0x10/0x10
+   ret_from_fork_asm+0x1b/0x30
+   </TASK>
+  Modules linked in:
+  CR2: 0000000000000008
+  ---[ end trace 0000000000000000 ]---
+
+This happened because fscache_cookie_state_machine() was slow and was
+still running while another process invoked fscache_unuse_cookie();
+this led to a fscache_cookie_lru_do_one() call, setting the
+FSCACHE_COOKIE_DO_LRU_DISCARD flag, which was picked up by
+fscache_cookie_state_machine(), withdrawing the cookie via
+cachefiles_withdraw_cookie(), clearing cookie->cache_priv.
+
+At the same time, yet another process invoked
+cachefiles_prepare_write(), which found a NULL pointer in this code
+line:
+
+  struct cachefiles_object *object = cachefiles_cres_object(cres);
+
+The next line crashes, obviously:
+
+  struct cachefiles_cache *cache = object->volume->cache;
+
+During cachefiles_prepare_write(), the "n_accesses" counter is
+non-zero (via fscache_begin_operation()).  The cookie must not be
+withdrawn until it drops to zero.
+
+The counter is checked by fscache_cookie_state_machine() before
+switching to FSCACHE_COOKIE_STATE_RELINQUISHING and
+FSCACHE_COOKIE_STATE_WITHDRAWING (in "case
+FSCACHE_COOKIE_STATE_FAILED"), but not for
+FSCACHE_COOKIE_STATE_LRU_DISCARDING ("case
+FSCACHE_COOKIE_STATE_ACTIVE").
+
+This patch adds the missing check.  With a non-zero access counter,
+the function returns and the next fscache_end_cookie_access() call
+will queue another fscache_cookie_state_machine() call to handle the
+still-pending FSCACHE_COOKIE_DO_LRU_DISCARD.
+
+Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning")
+Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Link: https://lore.kernel.org/r/20240729162002.3436763-2-dhowells@redhat.com
+cc: Jeff Layton <jlayton@kernel.org>
+cc: netfs@lists.linux.dev
+cc: linux-fsdevel@vger.kernel.org
+cc: stable@vger.kernel.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/netfs/fscache_cookie.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/netfs/fscache_cookie.c
++++ b/fs/netfs/fscache_cookie.c
+@@ -741,6 +741,10 @@ again_locked:
+                       spin_lock(&cookie->lock);
+               }
+               if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) {
++                      if (atomic_read(&cookie->n_accesses) != 0)
++                              /* still being accessed: postpone it */
++                              break;
++
+                       __fscache_set_cookie_state(cookie,
+                                                  FSCACHE_COOKIE_STATE_LRU_DISCARDING);
+                       wake = true;
diff --git a/queue-6.10/i2c-qcom-geni-add-missing-geni_icc_disable-in-geni_i2c_runtime_resume.patch b/queue-6.10/i2c-qcom-geni-add-missing-geni_icc_disable-in-geni_i2c_runtime_resume.patch

new file mode 100644 (file)

index 0000000..a5c15a2
--- /dev/null
+++ b/queue-6.10/i2c-qcom-geni-add-missing-geni_icc_disable-in-geni_i2c_runtime_resume.patch
@@ -0,0 +1,39 @@
+From 4e91fa1ef3ce6290b4c598e54b5eb6cf134fbec8 Mon Sep 17 00:00:00 2001
+From: Andi Shyti <andi.shyti@kernel.org>
+Date: Mon, 12 Aug 2024 21:40:28 +0200
+Subject: i2c: qcom-geni: Add missing geni_icc_disable in geni_i2c_runtime_resume
+
+From: Andi Shyti <andi.shyti@kernel.org>
+
+commit 4e91fa1ef3ce6290b4c598e54b5eb6cf134fbec8 upstream.
+
+Add the missing geni_icc_disable() call before returning in the
+geni_i2c_runtime_resume() function.
+
+Commit 9ba48db9f77c ("i2c: qcom-geni: Add missing
+geni_icc_disable in geni_i2c_runtime_resume") by Gaosheng missed
+disabling the interconnect in one case.
+
+Fixes: bf225ed357c6 ("i2c: i2c-qcom-geni: Add interconnect support")
+Cc: Gaosheng Cui <cuigaosheng1@huawei.com>
+Cc: stable@vger.kernel.org # v5.9+
+Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-qcom-geni.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/i2c/busses/i2c-qcom-geni.c
++++ b/drivers/i2c/busses/i2c-qcom-geni.c
+@@ -986,8 +986,10 @@ static int __maybe_unused geni_i2c_runti
+               return ret;
+ 
+       ret = clk_prepare_enable(gi2c->core_clk);
+-      if (ret)
++      if (ret) {
++              geni_icc_disable(&gi2c->se);
+               return ret;
++      }
+ 
+       ret = geni_se_resources_on(&gi2c->se);
+       if (ret) {
diff --git a/queue-6.10/i2c-tegra-do-not-mark-acpi-devices-as-irq-safe.patch b/queue-6.10/i2c-tegra-do-not-mark-acpi-devices-as-irq-safe.patch

new file mode 100644 (file)

index 0000000..821a8e9
--- /dev/null
+++ b/queue-6.10/i2c-tegra-do-not-mark-acpi-devices-as-irq-safe.patch
@@ -0,0 +1,58 @@
+From 14d069d92951a3e150c0a81f2ca3b93e54da913b Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Tue, 13 Aug 2024 09:12:53 -0700
+Subject: i2c: tegra: Do not mark ACPI devices as irq safe
+
+From: Breno Leitao <leitao@debian.org>
+
+commit 14d069d92951a3e150c0a81f2ca3b93e54da913b upstream.
+
+On ACPI machines, the tegra i2c module encounters an issue due to a
+mutex being called inside a spinlock. This leads to the following bug:
+
+       BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585
+       ...
+
+       Call trace:
+       __might_sleep
+       __mutex_lock_common
+       mutex_lock_nested
+       acpi_subsys_runtime_resume
+       rpm_resume
+       tegra_i2c_xfer
+
+The problem arises because during __pm_runtime_resume(), the spinlock
+&dev->power.lock is acquired before rpm_resume() is called. Later,
+rpm_resume() invokes acpi_subsys_runtime_resume(), which relies on
+mutexes, triggering the error.
+
+To address this issue, devices on ACPI are now marked as not IRQ-safe,
+considering the dependency of acpi_subsys_runtime_resume() on mutexes.
+
+Fixes: bd2fdedbf2ba ("i2c: tegra: Add the ACPI support")
+Cc: <stable@vger.kernel.org> # v5.17+
+Co-developed-by: Michael van der Westhuizen <rmikey@meta.com>
+Signed-off-by: Michael van der Westhuizen <rmikey@meta.com>
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-tegra.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-tegra.c
++++ b/drivers/i2c/busses/i2c-tegra.c
+@@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platfo
+        * domain.
+        *
+        * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't
+-       * be used for atomic transfers.
++       * be used for atomic transfers. ACPI device is not IRQ safe also.
+        */
+-      if (!IS_VI(i2c_dev))
++      if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev))
+               pm_runtime_irq_safe(i2c_dev->dev);
+ 
+       pm_runtime_enable(i2c_dev->dev);
diff --git a/queue-6.10/keys-trusted-dcp-fix-leak-of-blob-encryption-key.patch b/queue-6.10/keys-trusted-dcp-fix-leak-of-blob-encryption-key.patch

new file mode 100644 (file)

index 0000000..487df44
--- /dev/null
+++ b/queue-6.10/keys-trusted-dcp-fix-leak-of-blob-encryption-key.patch
@@ -0,0 +1,141 @@
+From 0e28bf61a5f9ab30be3f3b4eafb8d097e39446bb Mon Sep 17 00:00:00 2001
+From: David Gstir <david@sigma-star.at>
+Date: Wed, 17 Jul 2024 13:28:45 +0200
+Subject: KEYS: trusted: dcp: fix leak of blob encryption key
+
+From: David Gstir <david@sigma-star.at>
+
+commit 0e28bf61a5f9ab30be3f3b4eafb8d097e39446bb upstream.
+
+Trusted keys unseal the key blob on load, but keep the sealed payload in
+the blob field so that every subsequent read (export) will simply
+convert this field to hex and send it to userspace.
+
+With DCP-based trusted keys, we decrypt the blob encryption key (BEK)
+in the Kernel due hardware limitations and then decrypt the blob payload.
+BEK decryption is done in-place which means that the trusted key blob
+field is modified and it consequently holds the BEK in plain text.
+Every subsequent read of that key thus send the plain text BEK instead
+of the encrypted BEK to userspace.
+
+This issue only occurs when importing a trusted DCP-based key and
+then exporting it again. This should rarely happen as the common use cases
+are to either create a new trusted key and export it, or import a key
+blob and then just use it without exporting it again.
+
+Fix this by performing BEK decryption and encryption in a dedicated
+buffer. Further always wipe the plain text BEK buffer to prevent leaking
+the key via uninitialized memory.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 2e8a0f40a39c ("KEYS: trusted: Introduce NXP DCP-backed trusted keys")
+Signed-off-by: David Gstir <david@sigma-star.at>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/keys/trusted-keys/trusted_dcp.c | 33 +++++++++++++++---------
+ 1 file changed, 21 insertions(+), 12 deletions(-)
+
+diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c
+index b0947f072a98..4edc5bbbcda3 100644
+--- a/security/keys/trusted-keys/trusted_dcp.c
++++ b/security/keys/trusted-keys/trusted_dcp.c
+@@ -186,20 +186,21 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce,
+       return ret;
+ }
+ 
+-static int decrypt_blob_key(u8 *key)
++static int decrypt_blob_key(u8 *encrypted_key, u8 *plain_key)
+ {
+-      return do_dcp_crypto(key, key, false);
++      return do_dcp_crypto(encrypted_key, plain_key, false);
+ }
+ 
+-static int encrypt_blob_key(u8 *key)
++static int encrypt_blob_key(u8 *plain_key, u8 *encrypted_key)
+ {
+-      return do_dcp_crypto(key, key, true);
++      return do_dcp_crypto(plain_key, encrypted_key, true);
+ }
+ 
+ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob)
+ {
+       struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob;
+       int blen, ret;
++      u8 plain_blob_key[AES_KEYSIZE_128];
+ 
+       blen = calc_blob_len(p->key_len);
+       if (blen > MAX_BLOB_SIZE)
+@@ -207,30 +208,36 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob)
+ 
+       b->fmt_version = DCP_BLOB_VERSION;
+       get_random_bytes(b->nonce, AES_KEYSIZE_128);
+-      get_random_bytes(b->blob_key, AES_KEYSIZE_128);
++      get_random_bytes(plain_blob_key, AES_KEYSIZE_128);
+ 
+-      ret = do_aead_crypto(p->key, b->payload, p->key_len, b->blob_key,
++      ret = do_aead_crypto(p->key, b->payload, p->key_len, plain_blob_key,
+                            b->nonce, true);
+       if (ret) {
+               pr_err("Unable to encrypt blob payload: %i\n", ret);
+-              return ret;
++              goto out;
+       }
+ 
+-      ret = encrypt_blob_key(b->blob_key);
++      ret = encrypt_blob_key(plain_blob_key, b->blob_key);
+       if (ret) {
+               pr_err("Unable to encrypt blob key: %i\n", ret);
+-              return ret;
++              goto out;
+       }
+ 
+       put_unaligned_le32(p->key_len, &b->payload_len);
+       p->blob_len = blen;
+-      return 0;
++      ret = 0;
++
++out:
++      memzero_explicit(plain_blob_key, sizeof(plain_blob_key));
++
++      return ret;
+ }
+ 
+ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob)
+ {
+       struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob;
+       int blen, ret;
++      u8 plain_blob_key[AES_KEYSIZE_128];
+ 
+       if (b->fmt_version != DCP_BLOB_VERSION) {
+               pr_err("DCP blob has bad version: %i, expected %i\n",
+@@ -248,14 +255,14 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob)
+               goto out;
+       }
+ 
+-      ret = decrypt_blob_key(b->blob_key);
++      ret = decrypt_blob_key(b->blob_key, plain_blob_key);
+       if (ret) {
+               pr_err("Unable to decrypt blob key: %i\n", ret);
+               goto out;
+       }
+ 
+       ret = do_aead_crypto(b->payload, p->key, p->key_len + DCP_BLOB_AUTHLEN,
+-                           b->blob_key, b->nonce, false);
++                           plain_blob_key, b->nonce, false);
+       if (ret) {
+               pr_err("Unwrap of DCP payload failed: %i\n", ret);
+               goto out;
+@@ -263,6 +270,8 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob)
+ 
+       ret = 0;
+ out:
++      memzero_explicit(plain_blob_key, sizeof(plain_blob_key));
++
+       return ret;
+ }
+ 
+-- 
+2.46.0
+
diff --git a/queue-6.10/keys-trusted-fix-dcp-blob-payload-length-assignment.patch b/queue-6.10/keys-trusted-fix-dcp-blob-payload-length-assignment.patch

new file mode 100644 (file)

index 0000000..92558cd
--- /dev/null
+++ b/queue-6.10/keys-trusted-fix-dcp-blob-payload-length-assignment.patch
@@ -0,0 +1,44 @@
+From 6486cad00a8b7f8585983408c152bbe33dda529b Mon Sep 17 00:00:00 2001
+From: David Gstir <david@sigma-star.at>
+Date: Wed, 17 Jul 2024 13:28:44 +0200
+Subject: KEYS: trusted: fix DCP blob payload length assignment
+
+From: David Gstir <david@sigma-star.at>
+
+commit 6486cad00a8b7f8585983408c152bbe33dda529b upstream.
+
+The DCP trusted key type uses the wrong helper function to store
+the blob's payload length which can lead to the wrong byte order
+being used in case this would ever run on big endian architectures.
+
+Fix by using correct helper function.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 2e8a0f40a39c ("KEYS: trusted: Introduce NXP DCP-backed trusted keys")
+Suggested-by: Richard Weinberger <richard@nod.at>
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202405240610.fj53EK0q-lkp@intel.com/
+Signed-off-by: David Gstir <david@sigma-star.at>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/keys/trusted-keys/trusted_dcp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c
+index b5f81a05be36..b0947f072a98 100644
+--- a/security/keys/trusted-keys/trusted_dcp.c
++++ b/security/keys/trusted-keys/trusted_dcp.c
+@@ -222,7 +222,7 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob)
+               return ret;
+       }
+ 
+-      b->payload_len = get_unaligned_le32(&p->key_len);
++      put_unaligned_le32(p->key_len, &b->payload_len);
+       p->blob_len = blen;
+       return 0;
+ }
+-- 
+2.46.0
+
diff --git a/queue-6.10/kvm-s390-fix-validity-interception-issue-when-gisa-is-switched-off.patch b/queue-6.10/kvm-s390-fix-validity-interception-issue-when-gisa-is-switched-off.patch

new file mode 100644 (file)

index 0000000..6542222
--- /dev/null
+++ b/queue-6.10/kvm-s390-fix-validity-interception-issue-when-gisa-is-switched-off.patch
@@ -0,0 +1,88 @@
+From 5a44bb061d04b0306f2aa8add761d86d152b9377 Mon Sep 17 00:00:00 2001
+From: Michael Mueller <mimu@linux.ibm.com>
+Date: Thu, 1 Aug 2024 14:31:09 +0200
+Subject: KVM: s390: fix validity interception issue when gisa is switched off
+
+From: Michael Mueller <mimu@linux.ibm.com>
+
+commit 5a44bb061d04b0306f2aa8add761d86d152b9377 upstream.
+
+We might run into a SIE validity if gisa has been disabled either via using
+kernel parameter "kvm.use_gisa=0" or by setting the related sysfs
+attribute to N (echo N >/sys/module/kvm/parameters/use_gisa).
+
+The validity is caused by an invalid value in the SIE control block's
+gisa designation. That happens because we pass the uninitialized gisa
+origin to virt_to_phys() before writing it to the gisa designation.
+
+To fix this we return 0 in kvm_s390_get_gisa_desc() if the origin is 0.
+kvm_s390_get_gisa_desc() is used to determine which gisa designation to
+set in the SIE control block. A value of 0 in the gisa designation disables
+gisa usage.
+
+The issue surfaces in the host kernel with the following kernel message as
+soon a new kvm guest start is attemted.
+
+kvm: unhandled validity intercept 0x1011
+WARNING: CPU: 0 PID: 781237 at arch/s390/kvm/intercept.c:101 kvm_handle_sie_intercept+0x42e/0x4d0 [kvm]
+Modules linked in: vhost_net tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT xt_tcpudp nft_compat x_tables nf_nat_tftp nf_conntrack_tftp vfio_pci_core irqbypass vhost_vsock vmw_vsock_virtio_transport_common vsock vhost vhost_iotlb kvm nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables sunrpc mlx5_ib ib_uverbs ib_core mlx5_core uvdevice s390_trng eadm_sch vfio_ccw zcrypt_cex4 mdev vfio_iommu_type1 vfio sch_fq_codel drm i2c_core loop drm_panel_orientation_quirks configfs nfnetlink lcs ctcm fsm dm_service_time ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common dm_mirror dm_region_hash dm_log zfcp scsi_transport_fc scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkey zcrypt dm_multipath rng_core autofs4 [last unloaded: vfio_pci]
+CPU: 0 PID: 781237 Comm: CPU 0/KVM Not tainted 6.10.0-08682-gcad9f11498ea #6
+Hardware name: IBM 3931 A01 701 (LPAR)
+Krnl PSW : 0704c00180000000 000003d93deb0122 (kvm_handle_sie_intercept+0x432/0x4d0 [kvm])
+           R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
+Krnl GPRS: 000003d900000027 000003d900000023 0000000000000028 000002cd00000000
+           000002d063a00900 00000359c6daf708 00000000000bebb5 0000000000001eff
+           000002cfd82e9000 000002cfd80bc000 0000000000001011 000003d93deda412
+           000003ff8962df98 000003d93de77ce0 000003d93deb011e 00000359c6daf960
+Krnl Code: 000003d93deb0112: c020fffe7259      larl    %r2,000003d93de7e5c4
+           000003d93deb0118: c0e53fa8beac      brasl   %r14,000003d9bd3c7e70
+          #000003d93deb011e: af000000          mc      0,0
+          >000003d93deb0122: a728ffea          lhi     %r2,-22
+           000003d93deb0126: a7f4fe24          brc     15,000003d93deafd6e
+           000003d93deb012a: 9101f0b0          tm      176(%r15),1
+           000003d93deb012e: a774fe48          brc     7,000003d93deafdbe
+           000003d93deb0132: 40a0f0ae          sth     %r10,174(%r15)
+Call Trace:
+ [<000003d93deb0122>] kvm_handle_sie_intercept+0x432/0x4d0 [kvm]
+([<000003d93deb011e>] kvm_handle_sie_intercept+0x42e/0x4d0 [kvm])
+ [<000003d93deacc10>] vcpu_post_run+0x1d0/0x3b0 [kvm]
+ [<000003d93deaceda>] __vcpu_run+0xea/0x2d0 [kvm]
+ [<000003d93dead9da>] kvm_arch_vcpu_ioctl_run+0x16a/0x430 [kvm]
+ [<000003d93de93ee0>] kvm_vcpu_ioctl+0x190/0x7c0 [kvm]
+ [<000003d9bd728b4e>] vfs_ioctl+0x2e/0x70
+ [<000003d9bd72a092>] __s390x_sys_ioctl+0xc2/0xd0
+ [<000003d9be0e9222>] __do_syscall+0x1f2/0x2e0
+ [<000003d9be0f9a90>] system_call+0x70/0x98
+Last Breaking-Event-Address:
+ [<000003d9bd3c7f58>] __warn_printk+0xe8/0xf0
+
+Cc: stable@vger.kernel.org
+Reported-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Fixes: fe0ef0030463 ("KVM: s390: sort out physical vs virtual pointers usage")
+Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
+Tested-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240801123109.2782155-1-mimu@linux.ibm.com
+Message-ID: <20240801123109.2782155-1-mimu@linux.ibm.com>
+Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kvm/kvm-s390.h |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -267,7 +267,12 @@ static inline unsigned long kvm_s390_get
+ 
+ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
+ {
+-      u32 gd = virt_to_phys(kvm->arch.gisa_int.origin);
++      u32 gd;
++
++      if (!kvm->arch.gisa_int.origin)
++              return 0;
++
++      gd = virt_to_phys(kvm->arch.gisa_int.origin);
+ 
+       if (gd && sclp.has_gisaf)
+               gd |= GISA_FORMAT1;
diff --git a/queue-6.10/md-raid1-fix-data-corruption-for-degraded-array-with-slow-disk.patch b/queue-6.10/md-raid1-fix-data-corruption-for-degraded-array-with-slow-disk.patch

new file mode 100644 (file)

index 0000000..91b287b
--- /dev/null
+++ b/queue-6.10/md-raid1-fix-data-corruption-for-degraded-array-with-slow-disk.patch
@@ -0,0 +1,93 @@
+From c916ca35308d3187c9928664f9be249b22a3a701 Mon Sep 17 00:00:00 2001
+From: Yu Kuai <yukuai3@huawei.com>
+Date: Sat, 3 Aug 2024 17:11:37 +0800
+Subject: md/raid1: Fix data corruption for degraded array with slow disk
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+commit c916ca35308d3187c9928664f9be249b22a3a701 upstream.
+
+read_balance() will avoid reading from slow disks as much as possible,
+however, if valid data only lands in slow disks, and a new normal disk
+is still in recovery, unrecovered data can be read:
+
+raid1_read_request
+ read_balance
+  raid1_should_read_first
+  -> return false
+  choose_best_rdev
+  -> normal disk is not recovered, return -1
+  choose_bb_rdev
+  -> missing the checking of recovery, return the normal disk
+ -> read unrecovered data
+
+Root cause is that the checking of recovery is missing in
+choose_bb_rdev(). Hence add such checking to fix the problem.
+
+Also fix similar problem in choose_slow_rdev().
+
+Cc: stable@vger.kernel.org
+Fixes: 9f3ced792203 ("md/raid1: factor out choose_bb_rdev() from read_balance()")
+Fixes: dfa8ecd167c1 ("md/raid1: factor out choose_slow_rdev() from read_balance()")
+Reported-and-tested-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Closes: https://lore.kernel.org/all/9952f532-2554-44bf-b906-4880b2e88e3a@o2.pl/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Link: https://lore.kernel.org/r/20240803091137.3197008-1-yukuai1@huaweicloud.com
+Signed-off-by: Song Liu <song@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid1.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
+index 7acfe7c9dc8d..761989d67906 100644
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+       return -1;
+ }
+ 
++static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio)
++{
++      return !test_bit(In_sync, &rdev->flags) &&
++             rdev->recovery_offset < r1_bio->sector + r1_bio->sectors;
++}
++
+ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+                         int *max_sectors)
+ {
+@@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+ 
+               rdev = conf->mirrors[disk].rdev;
+               if (!rdev || test_bit(Faulty, &rdev->flags) ||
++                  rdev_in_recovery(rdev, r1_bio) ||
+                   test_bit(WriteMostly, &rdev->flags))
+                       continue;
+ 
+@@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+ 
+               rdev = conf->mirrors[disk].rdev;
+               if (!rdev || test_bit(Faulty, &rdev->flags) ||
+-                  !test_bit(WriteMostly, &rdev->flags))
++                  !test_bit(WriteMostly, &rdev->flags) ||
++                  rdev_in_recovery(rdev, r1_bio))
+                       continue;
+ 
+               /* there are no bad blocks, we can use this disk */
+@@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio)
+       if (!rdev || test_bit(Faulty, &rdev->flags))
+               return false;
+ 
+-      /* still in recovery */
+-      if (!test_bit(In_sync, &rdev->flags) &&
+-          rdev->recovery_offset < r1_bio->sector + r1_bio->sectors)
++      if (rdev_in_recovery(rdev, r1_bio))
+               return false;
+ 
+       /* don't read from slow disk unless have to */
+-- 
+2.46.0
+
diff --git a/queue-6.10/media-atomisp-fix-streaming-no-longer-working-on-byt-isp2400-devices.patch b/queue-6.10/media-atomisp-fix-streaming-no-longer-working-on-byt-isp2400-devices.patch

new file mode 100644 (file)

index 0000000..0300f4a
--- /dev/null
+++ b/queue-6.10/media-atomisp-fix-streaming-no-longer-working-on-byt-isp2400-devices.patch
@@ -0,0 +1,98 @@
+From 63de936b513f7a9ce559194d3269ac291f4f4662 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Sun, 21 Jul 2024 17:38:40 +0200
+Subject: media: atomisp: Fix streaming no longer working on BYT / ISP2400 devices
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 63de936b513f7a9ce559194d3269ac291f4f4662 upstream.
+
+Commit a0821ca14bb8 ("media: atomisp: Remove test pattern generator (TPG)
+support") broke BYT support because it removed a seemingly unused field
+from struct sh_css_sp_config and a seemingly unused value from enum
+ia_css_input_mode.
+
+But these are part of the ABI between the kernel and firmware on ISP2400
+and this part of the TPG support removal changes broke ISP2400 support.
+
+ISP2401 support was not affected because on ISP2401 only a part of
+struct sh_css_sp_config is used.
+
+Restore the removed field and enum value to fix this.
+
+Fixes: a0821ca14bb8 ("media: atomisp: Remove test pattern generator (TPG) support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/staging/media/atomisp/pci/ia_css_stream_public.h |    8 ++++--
+ drivers/staging/media/atomisp/pci/sh_css_internal.h      |   19 ++++++++++++---
+ 2 files changed, 22 insertions(+), 5 deletions(-)
+
+--- a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h
++++ b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h
+@@ -27,12 +27,16 @@
+ #include "ia_css_prbs.h"
+ #include "ia_css_input_port.h"
+ 
+-/* Input modes, these enumerate all supported input modes.
+- *  Note that not all ISP modes support all input modes.
++/*
++ * Input modes, these enumerate all supported input modes.
++ * This enum is part of the atomisp firmware ABI and must
++ * NOT be changed!
++ * Note that not all ISP modes support all input modes.
+  */
+ enum ia_css_input_mode {
+       IA_CSS_INPUT_MODE_SENSOR, /** data from sensor */
+       IA_CSS_INPUT_MODE_FIFO,   /** data from input-fifo */
++      IA_CSS_INPUT_MODE_TPG,    /** data from test-pattern generator */
+       IA_CSS_INPUT_MODE_PRBS,   /** data from pseudo-random bit stream */
+       IA_CSS_INPUT_MODE_MEMORY, /** data from a frame in memory */
+       IA_CSS_INPUT_MODE_BUFFERED_SENSOR /** data is sent through mipi buffer */
+--- a/drivers/staging/media/atomisp/pci/sh_css_internal.h
++++ b/drivers/staging/media/atomisp/pci/sh_css_internal.h
+@@ -341,7 +341,14 @@ struct sh_css_sp_input_formatter_set {
+ 
+ #define IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT (3)
+ 
+-/* SP configuration information */
++/*
++ * SP configuration information
++ *
++ * This struct is part of the atomisp firmware ABI and is directly copied
++ * to ISP DRAM by sh_css_store_sp_group_to_ddr()
++ *
++ * Do NOT change this struct's layout or remove seemingly unused fields!
++ */
+ struct sh_css_sp_config {
+       u8                      no_isp_sync; /* Signal host immediately after start */
+       u8                      enable_raw_pool_locking; /** Enable Raw Buffer Locking for HALv3 Support */
+@@ -351,6 +358,10 @@ struct sh_css_sp_config {
+            host (true) or when they are passed to the preview/video pipe
+            (false). */
+ 
++       /*
++        * Note the fields below are only used on the ISP2400 not on the ISP2401,
++        * sh_css_store_sp_group_to_ddr() skip copying these when run on the ISP2401.
++        */
+       struct {
+               u8                                      a_changed;
+               u8                                      b_changed;
+@@ -360,11 +371,13 @@ struct sh_css_sp_config {
+       } input_formatter;
+ 
+       sync_generator_cfg_t    sync_gen;
++      tpg_cfg_t               tpg;
+       prbs_cfg_t              prbs;
+       input_system_cfg_t      input_circuit;
+       u8                      input_circuit_cfg_changed;
+-      u32             mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT];
+-      u8                 enable_isys_event_queue;
++      u32                     mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT];
++      /* These last 2 fields are used on both the ISP2400 and the ISP2401 */
++      u8                      enable_isys_event_queue;
+       u8                      disable_cont_vf;
+ };
+ 
diff --git a/queue-6.10/memcg_write_event_control-fix-a-user-triggerable-oops.patch b/queue-6.10/memcg_write_event_control-fix-a-user-triggerable-oops.patch

new file mode 100644 (file)

index 0000000..4ebef58
--- /dev/null
+++ b/queue-6.10/memcg_write_event_control-fix-a-user-triggerable-oops.patch
@@ -0,0 +1,39 @@
+From 046667c4d3196938e992fba0dfcde570aa85cd0e Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sun, 21 Jul 2024 14:45:08 -0400
+Subject: memcg_write_event_control(): fix a user-triggerable oops
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 046667c4d3196938e992fba0dfcde570aa85cd0e upstream.
+
+we are *not* guaranteed that anything past the terminating NUL
+is mapped (let alone initialized with anything sane).
+
+Fixes: 0dea116876ee ("cgroup: implement eventfd-based generic API for notifications")
+Cc: stable@vger.kernel.org
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -5282,9 +5282,12 @@ static ssize_t memcg_write_event_control
+       buf = endp + 1;
+ 
+       cfd = simple_strtoul(buf, &endp, 10);
+-      if ((*endp != ' ') && (*endp != '\0'))
++      if (*endp == '\0')
++              buf = endp;
++      else if (*endp == ' ')
++              buf = endp + 1;
++      else
+               return -EINVAL;
+-      buf = endp + 1;
+ 
+       event = kzalloc(sizeof(*event), GFP_KERNEL);
+       if (!event)
diff --git a/queue-6.10/mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch b/queue-6.10/mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch

new file mode 100644 (file)

index 0000000..ac7e863
--- /dev/null
+++ b/queue-6.10/mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch
@@ -0,0 +1,157 @@
+From 807174a93d24c456503692dc3f5af322ee0b640a Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Fri, 9 Aug 2024 14:48:47 +0300
+Subject: mm: fix endless reclaim on machines with unaccepted memory
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 807174a93d24c456503692dc3f5af322ee0b640a upstream.
+
+Unaccepted memory is considered unusable free memory, which is not counted
+as free on the zone watermark check.  This causes get_page_from_freelist()
+to accept more memory to hit the high watermark, but it creates problems
+in the reclaim path.
+
+The reclaim path encounters a failed zone watermark check and attempts to
+reclaim memory.  This is usually successful, but if there is little or no
+reclaimable memory, it can result in endless reclaim with little to no
+progress.  This can occur early in the boot process, just after start of
+the init process when the only reclaimable memory is the page cache of the
+init executable and its libraries.
+
+Make unaccepted memory free from watermark check point of view.  This way
+unaccepted memory will never be the trigger of memory reclaim.  Accept
+more memory in the get_page_from_freelist() if needed.
+
+Link: https://lkml.kernel.org/r/20240809114854.3745464-2-kirill.shutemov@linux.intel.com
+Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory")
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reported-by: Jianxiong Gao <jxgao@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>   [6.5+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c |   42 ++++++++++++++++++++----------------------
+ 1 file changed, 20 insertions(+), 22 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);
+ 
+ static bool page_contains_unaccepted(struct page *page, unsigned int order);
+ static void accept_page(struct page *page, unsigned int order);
+-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
++static bool cond_accept_memory(struct zone *zone, unsigned int order);
+ static inline bool has_unaccepted_memory(void);
+ static bool __free_unaccepted(struct page *page);
+ 
+@@ -3059,9 +3059,6 @@ static inline long __zone_watermark_unus
+       if (!(alloc_flags & ALLOC_CMA))
+               unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
+ #endif
+-#ifdef CONFIG_UNACCEPTED_MEMORY
+-      unusable_free += zone_page_state(z, NR_UNACCEPTED);
+-#endif
+ 
+       return unusable_free;
+ }
+@@ -3355,6 +3352,8 @@ retry:
+                       }
+               }
+ 
++              cond_accept_memory(zone, order);
++
+               /*
+                * Detect whether the number of free pages is below high
+                * watermark.  If so, we will decrease pcp->high and free
+@@ -3380,10 +3379,8 @@ check_alloc_wmark:
+                                      gfp_mask)) {
+                       int ret;
+ 
+-                      if (has_unaccepted_memory()) {
+-                              if (try_to_accept_memory(zone, order))
+-                                      goto try_this_zone;
+-                      }
++                      if (cond_accept_memory(zone, order))
++                              goto try_this_zone;
+ 
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                       /*
+@@ -3437,10 +3434,8 @@ try_this_zone:
+ 
+                       return page;
+               } else {
+-                      if (has_unaccepted_memory()) {
+-                              if (try_to_accept_memory(zone, order))
+-                                      goto try_this_zone;
+-                      }
++                      if (cond_accept_memory(zone, order))
++                              goto try_this_zone;
+ 
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                       /* Try again if zone has deferred pages */
+@@ -6933,9 +6928,6 @@ static bool try_to_accept_memory_one(str
+       struct page *page;
+       bool last;
+ 
+-      if (list_empty(&zone->unaccepted_pages))
+-              return false;
+-
+       spin_lock_irqsave(&zone->lock, flags);
+       page = list_first_entry_or_null(&zone->unaccepted_pages,
+                                       struct page, lru);
+@@ -6961,23 +6953,29 @@ static bool try_to_accept_memory_one(str
+       return true;
+ }
+ 
+-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
++static bool cond_accept_memory(struct zone *zone, unsigned int order)
+ {
+       long to_accept;
+-      int ret = false;
++      bool ret = false;
++
++      if (!has_unaccepted_memory())
++              return false;
++
++      if (list_empty(&zone->unaccepted_pages))
++              return false;
+ 
+       /* How much to accept to get to high watermark? */
+       to_accept = high_wmark_pages(zone) -
+                   (zone_page_state(zone, NR_FREE_PAGES) -
+-                  __zone_watermark_unusable_free(zone, order, 0));
++                  __zone_watermark_unusable_free(zone, order, 0) -
++                  zone_page_state(zone, NR_UNACCEPTED));
+ 
+-      /* Accept at least one page */
+-      do {
++      while (to_accept > 0) {
+               if (!try_to_accept_memory_one(zone))
+                       break;
+               ret = true;
+               to_accept -= MAX_ORDER_NR_PAGES;
+-      } while (to_accept > 0);
++      }
+ 
+       return ret;
+ }
+@@ -7020,7 +7018,7 @@ static void accept_page(struct page *pag
+ {
+ }
+ 
+-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
++static bool cond_accept_memory(struct zone *zone, unsigned int order)
+ {
+       return false;
+ }
diff --git a/queue-6.10/mm-hugetlb-fix-hugetlb-vs.-core-mm-pt-locking.patch b/queue-6.10/mm-hugetlb-fix-hugetlb-vs.-core-mm-pt-locking.patch

new file mode 100644 (file)

index 0000000..0d2b20a
--- /dev/null
+++ b/queue-6.10/mm-hugetlb-fix-hugetlb-vs.-core-mm-pt-locking.patch
@@ -0,0 +1,161 @@
+From 5f75cfbd6bb02295ddaed48adf667b6c828ce07b Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Thu, 1 Aug 2024 22:47:48 +0200
+Subject: mm/hugetlb: fix hugetlb vs. core-mm PT locking
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 5f75cfbd6bb02295ddaed48adf667b6c828ce07b upstream.
+
+We recently made GUP's common page table walking code to also walk hugetlb
+VMAs without most hugetlb special-casing, preparing for the future of
+having less hugetlb-specific page table walking code in the codebase.
+Turns out that we missed one page table locking detail: page table locking
+for hugetlb folios that are not mapped using a single PMD/PUD.
+
+Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB
+hugetlb folios on arm64 with 4 KiB base page size).  GUP, as it walks the
+page tables, will perform a pte_offset_map_lock() to grab the PTE table
+lock.
+
+However, hugetlb that concurrently modifies these page tables would
+actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the
+locks would differ.  Something similar can happen right now with hugetlb
+folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS.
+
+This issue can be reproduced [1], for example triggering:
+
+[ 3105.936100] ------------[ cut here ]------------
+[ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188
+[ 3105.944634] Modules linked in: [...]
+[ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1
+[ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024
+[ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[ 3105.991108] pc : try_grab_folio+0x11c/0x188
+[ 3105.994013] lr : follow_page_pte+0xd8/0x430
+[ 3105.996986] sp : ffff80008eafb8f0
+[ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43
+[ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48
+[ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978
+[ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001
+[ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000
+[ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000
+[ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0
+[ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080
+[ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000
+[ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000
+[ 3106.047957] Call trace:
+[ 3106.049522]  try_grab_folio+0x11c/0x188
+[ 3106.051996]  follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0
+[ 3106.055527]  follow_page_mask+0x1a0/0x2b8
+[ 3106.058118]  __get_user_pages+0xf0/0x348
+[ 3106.060647]  faultin_page_range+0xb0/0x360
+[ 3106.063651]  do_madvise+0x340/0x598
+
+Let's make huge_pte_lockptr() effectively use the same PT locks as any
+core-mm page table walker would.  Add ptep_lockptr() to obtain the PTE
+page table lock using a pte pointer -- unfortunately we cannot convert
+pte_lockptr() because virt_to_page() doesn't work with kmap'ed page tables
+we can have with CONFIG_HIGHPTE.
+
+Handle CONFIG_PGTABLE_LEVELS correctly by checking in reverse order, such
+that when e.g., CONFIG_PGTABLE_LEVELS==2 with
+PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE will work as expected.  Document
+why that works.
+
+There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb
+folio being mapped using two PTE page tables.  While hugetlb wants to take
+the PMD table lock, core-mm would grab the PTE table lock of one of both
+PTE page tables.  In such corner cases, we have to make sure that both
+locks match, which is (fortunately!) currently guaranteed for 8xx as it
+does not support SMP and consequently doesn't use split PT locks.
+
+[1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/
+
+Link: https://lkml.kernel.org/r/20240801204748.99107-1-david@redhat.com
+Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h |   33 ++++++++++++++++++++++++++++++---
+ include/linux/mm.h      |   11 +++++++++++
+ 2 files changed, 41 insertions(+), 3 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -967,10 +967,37 @@ static inline bool htlb_allow_alloc_fall
+ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
+                                          struct mm_struct *mm, pte_t *pte)
+ {
+-      if (huge_page_size(h) == PMD_SIZE)
++      const unsigned long size = huge_page_size(h);
++
++      VM_WARN_ON(size == PAGE_SIZE);
++
++      /*
++       * hugetlb must use the exact same PT locks as core-mm page table
++       * walkers would. When modifying a PTE table, hugetlb must take the
++       * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
++       * PT lock etc.
++       *
++       * The expectation is that any hugetlb folio smaller than a PMD is
++       * always mapped into a single PTE table and that any hugetlb folio
++       * smaller than a PUD (but at least as big as a PMD) is always mapped
++       * into a single PMD table.
++       *
++       * If that does not hold for an architecture, then that architecture
++       * must disable split PT locks such that all *_lockptr() functions
++       * will give us the same result: the per-MM PT lock.
++       *
++       * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
++       * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
++       * and core-mm would use pmd_lockptr(). However, in such configurations
++       * split PMD locks are disabled -- they don't make sense on a single
++       * PGDIR page table -- and the end result is the same.
++       */
++      if (size >= PUD_SIZE)
++              return pud_lockptr(mm, (pud_t *) pte);
++      else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
+               return pmd_lockptr(mm, (pmd_t *) pte);
+-      VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
+-      return &mm->page_table_lock;
++      /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
++      return ptep_lockptr(mm, pte);
+ }
+ 
+ #ifndef hugepages_supported
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2960,6 +2960,13 @@ static inline spinlock_t *pte_lockptr(st
+       return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
+ }
+ 
++static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
++{
++      BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
++      BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
++      return ptlock_ptr(virt_to_ptdesc(pte));
++}
++
+ static inline bool ptlock_init(struct ptdesc *ptdesc)
+ {
+       /*
+@@ -2984,6 +2991,10 @@ static inline spinlock_t *pte_lockptr(st
+ {
+       return &mm->page_table_lock;
+ }
++static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
++{
++      return &mm->page_table_lock;
++}
+ static inline void ptlock_cache_init(void) {}
+ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
+ static inline void ptlock_free(struct ptdesc *ptdesc) {}
diff --git a/queue-6.10/mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch b/queue-6.10/mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch

new file mode 100644 (file)

index 0000000..1132422
--- /dev/null
+++ b/queue-6.10/mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch
@@ -0,0 +1,129 @@
+From d75abd0d0bc29e6ebfebbf76d11b4067b35844af Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 6 Aug 2024 12:41:07 -0400
+Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
+
+From: Waiman Long <longman@redhat.com>
+
+commit d75abd0d0bc29e6ebfebbf76d11b4067b35844af upstream.
+
+The memory_failure_cpu structure is a per-cpu structure.  Access to its
+content requires the use of get_cpu_var() to lock in the current CPU and
+disable preemption.  The use of a regular spinlock_t for locking purpose
+is fine for a non-RT kernel.
+
+Since the integration of RT spinlock support into the v5.15 kernel, a
+spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping
+lock in a preemption disabled context is illegal resulting in the
+following kind of warning.
+
+  [12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
+  [12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0
+  [12135.732252] preempt_count: 1, expected: 0
+  [12135.732255] RCU nest depth: 2, expected: 2
+    :
+  [12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021
+  [12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred
+  [12135.732433] Call Trace:
+  [12135.732436]  <TASK>
+  [12135.732450]  dump_stack_lvl+0x57/0x81
+  [12135.732461]  __might_resched.cold+0xf4/0x12f
+  [12135.732479]  rt_spin_lock+0x4c/0x100
+  [12135.732491]  memory_failure_queue+0x40/0xe0
+  [12135.732503]  ghes_do_memory_failure+0x53/0x390
+  [12135.732516]  ghes_do_proc.constprop.0+0x229/0x3e0
+  [12135.732575]  ghes_proc+0xf9/0x1a0
+  [12135.732591]  ghes_notify_hed+0x6a/0x150
+  [12135.732602]  notifier_call_chain+0x43/0xb0
+  [12135.732626]  blocking_notifier_call_chain+0x43/0x60
+  [12135.732637]  acpi_ev_notify_dispatch+0x47/0x70
+  [12135.732648]  acpi_os_execute_deferred+0x13/0x20
+  [12135.732654]  process_one_work+0x41f/0x500
+  [12135.732695]  worker_thread+0x192/0x360
+  [12135.732715]  kthread+0x111/0x140
+  [12135.732733]  ret_from_fork+0x29/0x50
+  [12135.732779]  </TASK>
+
+Fix it by using a raw_spinlock_t for locking instead.
+
+Also move the pr_err() out of the lock critical section and after
+put_cpu_ptr() to avoid indeterminate latency and the possibility of sleep
+with this call.
+
+[longman@redhat.com: don't hold percpu ref across pr_err(), per Miaohe]
+  Link: https://lkml.kernel.org/r/20240807181130.1122660-1-longman@redhat.com
+Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com
+Fixes: 0f383b6dc96e ("locking/spinlock: Provide RT variant")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Acked-by: Miaohe Lin <linmiaohe@huawei.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Len Brown <len.brown@intel.com>
+Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -2406,7 +2406,7 @@ struct memory_failure_entry {
+ struct memory_failure_cpu {
+       DECLARE_KFIFO(fifo, struct memory_failure_entry,
+                     MEMORY_FAILURE_FIFO_SIZE);
+-      spinlock_t lock;
++      raw_spinlock_t lock;
+       struct work_struct work;
+ };
+ 
+@@ -2432,20 +2432,22 @@ void memory_failure_queue(unsigned long
+ {
+       struct memory_failure_cpu *mf_cpu;
+       unsigned long proc_flags;
++      bool buffer_overflow;
+       struct memory_failure_entry entry = {
+               .pfn =          pfn,
+               .flags =        flags,
+       };
+ 
+       mf_cpu = &get_cpu_var(memory_failure_cpu);
+-      spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+-      if (kfifo_put(&mf_cpu->fifo, entry))
++      raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
++      buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
++      if (!buffer_overflow)
+               schedule_work_on(smp_processor_id(), &mf_cpu->work);
+-      else
++      raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
++      put_cpu_var(memory_failure_cpu);
++      if (buffer_overflow)
+               pr_err("buffer overflow when queuing memory failure at %#lx\n",
+                      pfn);
+-      spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+-      put_cpu_var(memory_failure_cpu);
+ }
+ EXPORT_SYMBOL_GPL(memory_failure_queue);
+ 
+@@ -2458,9 +2460,9 @@ static void memory_failure_work_func(str
+ 
+       mf_cpu = container_of(work, struct memory_failure_cpu, work);
+       for (;;) {
+-              spin_lock_irqsave(&mf_cpu->lock, proc_flags);
++              raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+               gotten = kfifo_get(&mf_cpu->fifo, &entry);
+-              spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
++              raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+               if (!gotten)
+                       break;
+               if (entry.flags & MF_SOFT_OFFLINE)
+@@ -2490,7 +2492,7 @@ static int __init memory_failure_init(vo
+ 
+       for_each_possible_cpu(cpu) {
+               mf_cpu = &per_cpu(memory_failure_cpu, cpu);
+-              spin_lock_init(&mf_cpu->lock);
++              raw_spin_lock_init(&mf_cpu->lock);
+               INIT_KFIFO(mf_cpu->fifo);
+               INIT_WORK(&mf_cpu->work, memory_failure_work_func);
+       }
diff --git a/queue-6.10/mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch b/queue-6.10/mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch

new file mode 100644 (file)

index 0000000..ef18926
--- /dev/null
+++ b/queue-6.10/mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch
@@ -0,0 +1,93 @@
+From fd8c35a92910f4829b7c99841f39b1b952c259d5 Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 9 Aug 2024 10:59:05 -0400
+Subject: mm/numa: no task_numa_fault() call if PMD is changed
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit fd8c35a92910f4829b7c99841f39b1b952c259d5 upstream.
+
+When handling a numa page fault, task_numa_fault() should be called by a
+process that restores the page table of the faulted folio to avoid
+duplicated stats counting.  Commit c5b5a3dd2c1f ("mm: thp: refactor NUMA
+fault handling") restructured do_huge_pmd_numa_page() and did not avoid
+task_numa_fault() call in the second page table check after a numa
+migration failure.  Fix it by making all !pmd_same() return immediately.
+
+This issue can cause task_numa_fault() being called more than necessary
+and lead to unexpected numa balancing results (It is hard to tell whether
+the issue will cause positive or negative performance impact due to
+duplicated numa fault counting).
+
+Link: https://lkml.kernel.org/r/20240809145906.1513458-3-ziy@nvidia.com
+Fixes: c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling")
+Reported-by: "Huang, Ying" <ying.huang@intel.com>
+Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   29 +++++++++++++----------------
+ 1 file changed, 13 insertions(+), 16 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1672,7 +1672,7 @@ vm_fault_t do_huge_pmd_numa_page(struct
+       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+       if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
+               spin_unlock(vmf->ptl);
+-              goto out;
++              return 0;
+       }
+ 
+       pmd = pmd_modify(oldpmd, vma->vm_page_prot);
+@@ -1715,22 +1715,16 @@ vm_fault_t do_huge_pmd_numa_page(struct
+       if (!migrate_misplaced_folio(folio, vma, target_nid)) {
+               flags |= TNF_MIGRATED;
+               nid = target_nid;
+-      } else {
+-              flags |= TNF_MIGRATE_FAIL;
+-              vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+-              if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
+-                      spin_unlock(vmf->ptl);
+-                      goto out;
+-              }
+-              goto out_map;
+-      }
+-
+-out:
+-      if (nid != NUMA_NO_NODE)
+               task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
++              return 0;
++      }
+ 
+-      return 0;
+-
++      flags |= TNF_MIGRATE_FAIL;
++      vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
++      if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
++              spin_unlock(vmf->ptl);
++              return 0;
++      }
+ out_map:
+       /* Restore the PMD */
+       pmd = pmd_modify(oldpmd, vma->vm_page_prot);
+@@ -1740,7 +1734,10 @@ out_map:
+       set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
+       update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
+       spin_unlock(vmf->ptl);
+-      goto out;
++
++      if (nid != NUMA_NO_NODE)
++              task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
++      return 0;
+ }
+ 
+ /*
diff --git a/queue-6.10/mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch b/queue-6.10/mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch

new file mode 100644 (file)

index 0000000..4f4f6a1
--- /dev/null
+++ b/queue-6.10/mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch
@@ -0,0 +1,97 @@
+From 40b760cfd44566bca791c80e0720d70d75382b84 Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 9 Aug 2024 10:59:04 -0400
+Subject: mm/numa: no task_numa_fault() call if PTE is changed
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit 40b760cfd44566bca791c80e0720d70d75382b84 upstream.
+
+When handling a numa page fault, task_numa_fault() should be called by a
+process that restores the page table of the faulted folio to avoid
+duplicated stats counting.  Commit b99a342d4f11 ("NUMA balancing: reduce
+TLB flush via delaying mapping on hint page fault") restructured
+do_numa_page() and did not avoid task_numa_fault() call in the second page
+table check after a numa migration failure.  Fix it by making all
+!pte_same() return immediately.
+
+This issue can cause task_numa_fault() being called more than necessary
+and lead to unexpected numa balancing results (It is hard to tell whether
+the issue will cause positive or negative performance impact due to
+duplicated numa fault counting).
+
+Link: https://lkml.kernel.org/r/20240809145906.1513458-2-ziy@nvidia.com
+Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault")
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Reported-by: "Huang, Ying" <ying.huang@intel.com>
+Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c |   33 ++++++++++++++++-----------------
+ 1 file changed, 16 insertions(+), 17 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5155,7 +5155,7 @@ static vm_fault_t do_numa_page(struct vm
+ 
+       if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
+               pte_unmap_unlock(vmf->pte, vmf->ptl);
+-              goto out;
++              return 0;
+       }
+ 
+       pte = pte_modify(old_pte, vma->vm_page_prot);
+@@ -5218,23 +5218,19 @@ static vm_fault_t do_numa_page(struct vm
+       if (!migrate_misplaced_folio(folio, vma, target_nid)) {
+               nid = target_nid;
+               flags |= TNF_MIGRATED;
+-      } else {
+-              flags |= TNF_MIGRATE_FAIL;
+-              vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+-                                             vmf->address, &vmf->ptl);
+-              if (unlikely(!vmf->pte))
+-                      goto out;
+-              if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+-                      pte_unmap_unlock(vmf->pte, vmf->ptl);
+-                      goto out;
+-              }
+-              goto out_map;
++              task_numa_fault(last_cpupid, nid, nr_pages, flags);
++              return 0;
+       }
+ 
+-out:
+-      if (nid != NUMA_NO_NODE)
+-              task_numa_fault(last_cpupid, nid, nr_pages, flags);
+-      return 0;
++      flags |= TNF_MIGRATE_FAIL;
++      vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
++                                     vmf->address, &vmf->ptl);
++      if (unlikely(!vmf->pte))
++              return 0;
++      if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
++              pte_unmap_unlock(vmf->pte, vmf->ptl);
++              return 0;
++      }
+ out_map:
+       /*
+        * Make it present again, depending on how arch implements
+@@ -5247,7 +5243,10 @@ out_map:
+               numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
+                                           writable);
+       pte_unmap_unlock(vmf->pte, vmf->ptl);
+-      goto out;
++
++      if (nid != NUMA_NO_NODE)
++              task_numa_fault(last_cpupid, nid, nr_pages, flags);
++      return 0;
+ }
+ 
+ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
diff --git a/queue-6.10/mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch b/queue-6.10/mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch

new file mode 100644 (file)

index 0000000..279ea80
--- /dev/null
+++ b/queue-6.10/mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch
@@ -0,0 +1,68 @@
+From 61ebe5a747da649057c37be1c37eb934b4af79ca Mon Sep 17 00:00:00 2001
+From: Hailong Liu <hailong.liu@oppo.com>
+Date: Thu, 8 Aug 2024 20:19:56 +0800
+Subject: mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0
+
+From: Hailong Liu <hailong.liu@oppo.com>
+
+commit 61ebe5a747da649057c37be1c37eb934b4af79ca upstream.
+
+The __vmap_pages_range_noflush() assumes its argument pages** contains
+pages with the same page shift.  However, since commit e9c3cda4d86e ("mm,
+vmalloc: fix high order __GFP_NOFAIL allocations"), if gfp_flags includes
+__GFP_NOFAIL with high order in vm_area_alloc_pages() and page allocation
+failed for high order, the pages** may contain two different page shifts
+(high order and order-0).  This could lead __vmap_pages_range_noflush() to
+perform incorrect mappings, potentially resulting in memory corruption.
+
+Users might encounter this as follows (vmap_allow_huge = true, 2M is for
+PMD_SIZE):
+
+kvmalloc(2M, __GFP_NOFAIL|GFP_X)
+    __vmalloc_node_range_noprof(vm_flags=VM_ALLOW_HUGE_VMAP)
+        vm_area_alloc_pages(order=9) ---> order-9 allocation failed and fallback to order-0
+            vmap_pages_range()
+                vmap_pages_range_noflush()
+                    __vmap_pages_range_noflush(page_shift = 21) ----> wrong mapping happens
+
+We can remove the fallback code because if a high-order allocation fails,
+__vmalloc_node_range_noprof() will retry with order-0.  Therefore, it is
+unnecessary to fallback to order-0 here.  Therefore, fix this by removing
+the fallback code.
+
+Link: https://lkml.kernel.org/r/20240808122019.3361-1-hailong.liu@oppo.com
+Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations")
+Signed-off-by: Hailong Liu <hailong.liu@oppo.com>
+Reported-by: Tangquan Zheng <zhengtangquan@oppo.com>
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Acked-by: Barry Song <baohua@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c |   11 ++---------
+ 1 file changed, 2 insertions(+), 9 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -3583,15 +3583,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
+                       page = alloc_pages_noprof(alloc_gfp, order);
+               else
+                       page = alloc_pages_node_noprof(nid, alloc_gfp, order);
+-              if (unlikely(!page)) {
+-                      if (!nofail)
+-                              break;
+-
+-                      /* fall back to the zero order allocations */
+-                      alloc_gfp |= __GFP_NOFAIL;
+-                      order = 0;
+-                      continue;
+-              }
++              if (unlikely(!page))
++                      break;
+ 
+               /*
+                * Higher order allocations must be able to be treated as
diff --git a/queue-6.10/mseal-fix-is_madv_discard.patch b/queue-6.10/mseal-fix-is_madv_discard.patch

new file mode 100644 (file)

index 0000000..6f66237
--- /dev/null
+++ b/queue-6.10/mseal-fix-is_madv_discard.patch
@@ -0,0 +1,66 @@
+From e46bc2e7eb90a370bc27fa2fd98cb8251e7da1ec Mon Sep 17 00:00:00 2001
+From: Pedro Falcato <pedro.falcato@gmail.com>
+Date: Wed, 7 Aug 2024 18:33:35 +0100
+Subject: mseal: fix is_madv_discard()
+
+From: Pedro Falcato <pedro.falcato@gmail.com>
+
+commit e46bc2e7eb90a370bc27fa2fd98cb8251e7da1ec upstream.
+
+is_madv_discard did its check wrong. MADV_ flags are not bitwise,
+they're normal sequential numbers. So, for instance:
+       behavior & (/* ... */ | MADV_REMOVE)
+
+tagged both MADV_REMOVE and MADV_RANDOM (bit 0 set) as discard
+operations.
+
+As a result the kernel could erroneously block certain madvises (e.g
+MADV_RANDOM or MADV_HUGEPAGE) on sealed VMAs due to them sharing bits
+with blocked MADV operations (e.g REMOVE or WIPEONFORK).
+
+This is obviously incorrect, so use a switch statement instead.
+
+Link: https://lkml.kernel.org/r/20240807173336.2523757-1-pedro.falcato@gmail.com
+Link: https://lkml.kernel.org/r/20240807173336.2523757-2-pedro.falcato@gmail.com
+Fixes: 8be7258aad44 ("mseal: add mseal syscall")
+Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com>
+Tested-by: Jeff Xu <jeffxu@chromium.org>
+Reviewed-by: Jeff Xu <jeffxu@chromium.org>
+Cc: Kees Cook <kees@kernel.org>
+Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mseal.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/mm/mseal.c b/mm/mseal.c
+index bf783bba8ed0..15bba28acc00 100644
+--- a/mm/mseal.c
++++ b/mm/mseal.c
+@@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma)
+ 
+ static bool is_madv_discard(int behavior)
+ {
+-      return  behavior &
+-              (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED |
+-               MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK);
++      switch (behavior) {
++      case MADV_FREE:
++      case MADV_DONTNEED:
++      case MADV_DONTNEED_LOCKED:
++      case MADV_REMOVE:
++      case MADV_DONTFORK:
++      case MADV_WIPEONFORK:
++              return true;
++      }
++
++      return false;
+ }
+ 
+ static bool is_ro_anon(struct vm_area_struct *vma)
+-- 
+2.46.0
+
diff --git a/queue-6.10/net-mana-fix-doorbell-out-of-order-violation-and-avoid-unnecessary-doorbell-rings.patch b/queue-6.10/net-mana-fix-doorbell-out-of-order-violation-and-avoid-unnecessary-doorbell-rings.patch

new file mode 100644 (file)

index 0000000..5c01b20
--- /dev/null
+++ b/queue-6.10/net-mana-fix-doorbell-out-of-order-violation-and-avoid-unnecessary-doorbell-rings.patch
@@ -0,0 +1,89 @@
+From 58a63729c957621f1990c3494c702711188ca347 Mon Sep 17 00:00:00 2001
+From: Long Li <longli@microsoft.com>
+Date: Fri, 9 Aug 2024 08:58:58 -0700
+Subject: net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings
+
+From: Long Li <longli@microsoft.com>
+
+commit 58a63729c957621f1990c3494c702711188ca347 upstream.
+
+After napi_complete_done() is called when NAPI is polling in the current
+process context, another NAPI may be scheduled and start running in
+softirq on another CPU and may ring the doorbell before the current CPU
+does. When combined with unnecessary rings when there is no need to arm
+the CQ, it triggers error paths in the hardware.
+
+This patch fixes this by calling napi_complete_done() after doorbell
+rings. It limits the number of unnecessary rings when there is
+no need to arm. MANA hardware specifies that there must be one doorbell
+ring every 8 CQ wraparounds. This driver guarantees one doorbell ring as
+soon as the number of consumed CQEs exceeds 4 CQ wraparounds. In practical
+workloads, the 4 CQ wraparounds proves to be big enough that it rarely
+exceeds this limit before all the napi weight is consumed.
+
+To implement this, add a per-CQ counter cq->work_done_since_doorbell,
+and make sure the CQ is armed as soon as passing 4 wraparounds of the CQ.
+
+Cc: stable@vger.kernel.org
+Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Long Li <longli@microsoft.com>
+Link: https://patch.msgid.link/1723219138-29887-1-git-send-email-longli@linuxonhyperv.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/microsoft/mana/mana_en.c |   22 ++++++++++++++--------
+ include/net/mana/mana.h                       |    1 +
+ 2 files changed, 15 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -1777,7 +1777,6 @@ static void mana_poll_rx_cq(struct mana_
+ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
+ {
+       struct mana_cq *cq = context;
+-      u8 arm_bit;
+       int w;
+ 
+       WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
+@@ -1788,16 +1787,23 @@ static int mana_cq_handler(void *context
+               mana_poll_tx_cq(cq);
+ 
+       w = cq->work_done;
++      cq->work_done_since_doorbell += w;
+ 
+-      if (w < cq->budget &&
+-          napi_complete_done(&cq->napi, w)) {
+-              arm_bit = SET_ARM_BIT;
+-      } else {
+-              arm_bit = 0;
++      if (w < cq->budget) {
++              mana_gd_ring_cq(gdma_queue, SET_ARM_BIT);
++              cq->work_done_since_doorbell = 0;
++              napi_complete_done(&cq->napi, w);
++      } else if (cq->work_done_since_doorbell >
++                 cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) {
++              /* MANA hardware requires at least one doorbell ring every 8
++               * wraparounds of CQ even if there is no need to arm the CQ.
++               * This driver rings the doorbell as soon as we have exceeded
++               * 4 wraparounds.
++               */
++              mana_gd_ring_cq(gdma_queue, 0);
++              cq->work_done_since_doorbell = 0;
+       }
+ 
+-      mana_gd_ring_cq(gdma_queue, arm_bit);
+-
+       return w;
+ }
+ 
+--- a/include/net/mana/mana.h
++++ b/include/net/mana/mana.h
+@@ -274,6 +274,7 @@ struct mana_cq {
+       /* NAPI data */
+       struct napi_struct napi;
+       int work_done;
++      int work_done_since_doorbell;
+       int budget;
+ };
+ 
diff --git a/queue-6.10/net-mana-fix-rx-buf-alloc_size-alignment-and-atomic-op-panic.patch b/queue-6.10/net-mana-fix-rx-buf-alloc_size-alignment-and-atomic-op-panic.patch

new file mode 100644 (file)

index 0000000..26ac605
--- /dev/null
+++ b/queue-6.10/net-mana-fix-rx-buf-alloc_size-alignment-and-atomic-op-panic.patch
@@ -0,0 +1,67 @@
+From 32316f676b4ee87c0404d333d248ccf777f739bc Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Fri, 9 Aug 2024 14:01:24 -0700
+Subject: net: mana: Fix RX buf alloc_size alignment and atomic op panic
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+commit 32316f676b4ee87c0404d333d248ccf777f739bc upstream.
+
+The MANA driver's RX buffer alloc_size is passed into napi_build_skb() to
+create SKB. skb_shinfo(skb) is located at the end of skb, and its alignment
+is affected by the alloc_size passed into napi_build_skb(). The size needs
+to be aligned properly for better performance and atomic operations.
+Otherwise, on ARM64 CPU, for certain MTU settings like 4000, atomic
+operations may panic on the skb_shinfo(skb)->dataref due to alignment fault.
+
+To fix this bug, add proper alignment to the alloc_size calculation.
+
+Sample panic info:
+[  253.298819] Unable to handle kernel paging request at virtual address ffff000129ba5cce
+[  253.300900] Mem abort info:
+[  253.301760]   ESR = 0x0000000096000021
+[  253.302825]   EC = 0x25: DABT (current EL), IL = 32 bits
+[  253.304268]   SET = 0, FnV = 0
+[  253.305172]   EA = 0, S1PTW = 0
+[  253.306103]   FSC = 0x21: alignment fault
+Call trace:
+ __skb_clone+0xfc/0x198
+ skb_clone+0x78/0xe0
+ raw6_local_deliver+0xfc/0x228
+ ip6_protocol_deliver_rcu+0x80/0x500
+ ip6_input_finish+0x48/0x80
+ ip6_input+0x48/0xc0
+ ip6_sublist_rcv_finish+0x50/0x78
+ ip6_sublist_rcv+0x1cc/0x2b8
+ ipv6_list_rcv+0x100/0x150
+ __netif_receive_skb_list_core+0x180/0x220
+ netif_receive_skb_list_internal+0x198/0x2a8
+ __napi_poll+0x138/0x250
+ net_rx_action+0x148/0x330
+ handle_softirqs+0x12c/0x3a0
+
+Cc: stable@vger.kernel.org
+Fixes: 80f6215b450e ("net: mana: Add support for jumbo frame")
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/microsoft/mana/mana_en.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -599,7 +599,11 @@ static void mana_get_rxbuf_cfg(int mtu,
+       else
+               *headroom = XDP_PACKET_HEADROOM;
+ 
+-      *alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
++      *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
++
++      /* Using page pool in this case, so alloc_size is PAGE_SIZE */
++      if (*alloc_size < PAGE_SIZE)
++              *alloc_size = PAGE_SIZE;
+ 
+       *datasize = mtu + ETH_HLEN;
+ }
diff --git a/queue-6.10/perf-bpf-don-t-call-bpf_overflow_handler-for-tracing-events.patch b/queue-6.10/perf-bpf-don-t-call-bpf_overflow_handler-for-tracing-events.patch

new file mode 100644 (file)

index 0000000..46bd327
--- /dev/null
+++ b/queue-6.10/perf-bpf-don-t-call-bpf_overflow_handler-for-tracing-events.patch
@@ -0,0 +1,51 @@
+From 100bff23818eb61751ed05d64a7df36ce9728a4d Mon Sep 17 00:00:00 2001
+From: Kyle Huey <me@kylehuey.com>
+Date: Tue, 13 Aug 2024 15:17:27 +0000
+Subject: perf/bpf: Don't call bpf_overflow_handler() for tracing events
+
+From: Kyle Huey <me@kylehuey.com>
+
+commit 100bff23818eb61751ed05d64a7df36ce9728a4d upstream.
+
+The regressing commit is new in 6.10. It assumed that anytime event->prog
+is set bpf_overflow_handler() should be invoked to execute the attached bpf
+program. This assumption is false for tracing events, and as a result the
+regressing commit broke bpftrace by invoking the bpf handler with garbage
+inputs on overflow.
+
+Prior to the regression the overflow handlers formed a chain (of length 0,
+1, or 2) and perf_event_set_bpf_handler() (the !tracing case) added
+bpf_overflow_handler() to that chain, while perf_event_attach_bpf_prog()
+(the tracing case) did not. Both set event->prog. The chain of overflow
+handlers was replaced by a single overflow handler slot and a fixed call to
+bpf_overflow_handler() when appropriate. This modifies the condition there
+to check event->prog->type == BPF_PROG_TYPE_PERF_EVENT, restoring the
+previous behavior and fixing bpftrace.
+
+Signed-off-by: Kyle Huey <khuey@kylehuey.com>
+Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
+Reported-by: Joe Damato <jdamato@fastly.com>
+Closes: https://lore.kernel.org/lkml/ZpFfocvyF3KHaSzF@LQ3V64L9R2/
+Fixes: f11f10bfa1ca ("perf/bpf: Call BPF handler directly, not through overflow machinery")
+Cc: stable@vger.kernel.org
+Tested-by: Joe Damato <jdamato@fastly.com> # bpftrace
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20240813151727.28797-1-jdamato@fastly.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/events/core.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -9708,7 +9708,8 @@ static int __perf_event_overflow(struct
+ 
+       ret = __perf_event_account_interrupt(event, throttle);
+ 
+-      if (event->prog && !bpf_overflow_handler(event, data, regs))
++      if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
++          !bpf_overflow_handler(event, data, regs))
+               return ret;
+ 
+       /*
diff --git a/queue-6.10/riscv-change-xip-s-kernel_map.size-to-be-size-of-the-entire-kernel.patch b/queue-6.10/riscv-change-xip-s-kernel_map.size-to-be-size-of-the-entire-kernel.patch

new file mode 100644 (file)

index 0000000..faf6590
--- /dev/null
+++ b/queue-6.10/riscv-change-xip-s-kernel_map.size-to-be-size-of-the-entire-kernel.patch
@@ -0,0 +1,50 @@
+From 57d76bc51fd80824bcc0c84a5b5ec944f1b51edd Mon Sep 17 00:00:00 2001
+From: Nam Cao <namcao@linutronix.de>
+Date: Wed, 8 May 2024 21:19:17 +0200
+Subject: riscv: change XIP's kernel_map.size to be size of the entire kernel
+
+From: Nam Cao <namcao@linutronix.de>
+
+commit 57d76bc51fd80824bcc0c84a5b5ec944f1b51edd upstream.
+
+With XIP kernel, kernel_map.size is set to be only the size of data part of
+the kernel. This is inconsistent with "normal" kernel, who sets it to be
+the size of the entire kernel.
+
+More importantly, XIP kernel fails to boot if CONFIG_DEBUG_VIRTUAL is
+enabled, because there are checks on virtual addresses with the assumption
+that kernel_map.size is the size of the entire kernel (these checks are in
+arch/riscv/mm/physaddr.c).
+
+Change XIP's kernel_map.size to be the size of the entire kernel.
+
+Signed-off-by: Nam Cao <namcao@linutronix.de>
+Cc: <stable@vger.kernel.org> # v6.1+
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20240508191917.2892064-1-namcao@linutronix.de
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/mm/init.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/riscv/mm/init.c
++++ b/arch/riscv/mm/init.c
+@@ -931,7 +931,7 @@ static void __init create_kernel_page_ta
+                                  PMD_SIZE, PAGE_KERNEL_EXEC);
+ 
+       /* Map the data in RAM */
+-      end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
++      end_va = kernel_map.virt_addr + kernel_map.size;
+       for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
+               create_pgd_mapping(pgdir, va,
+                                  kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
+@@ -1100,7 +1100,7 @@ asmlinkage void __init setup_vm(uintptr_
+ 
+       phys_ram_base = CONFIG_PHYS_RAM_BASE;
+       kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
+-      kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
++      kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
+ 
+       kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
+ #else
diff --git a/queue-6.10/riscv-entry-always-initialize-regs-a0-to-enosys.patch b/queue-6.10/riscv-entry-always-initialize-regs-a0-to-enosys.patch

new file mode 100644 (file)

index 0000000..3cfef88
--- /dev/null
+++ b/queue-6.10/riscv-entry-always-initialize-regs-a0-to-enosys.patch
@@ -0,0 +1,49 @@
+From 61119394631f219e23ce98bcc3eb993a64a8ea64 Mon Sep 17 00:00:00 2001
+From: Celeste Liu <coelacanthushex@gmail.com>
+Date: Thu, 27 Jun 2024 22:23:39 +0800
+Subject: riscv: entry: always initialize regs->a0 to -ENOSYS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Celeste Liu <coelacanthushex@gmail.com>
+
+commit 61119394631f219e23ce98bcc3eb993a64a8ea64 upstream.
+
+Otherwise when the tracer changes syscall number to -1, the kernel fails
+to initialize a0 with -ENOSYS and subsequently fails to return the error
+code of the failed syscall to userspace. For example, it will break
+strace syscall tampering.
+
+Fixes: 52449c17bdd1 ("riscv: entry: set a0 = -ENOSYS only when syscall != -1")
+Reported-by: "Dmitry V. Levin" <ldv@strace.io>
+Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Celeste Liu <CoelacanthusHex@gmail.com>
+Link: https://lore.kernel.org/r/20240627142338.5114-2-CoelacanthusHex@gmail.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/kernel/traps.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *reg
+ 
+               regs->epc += 4;
+               regs->orig_a0 = regs->a0;
++              regs->a0 = -ENOSYS;
+ 
+               riscv_v_vstate_discard(regs);
+ 
+@@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *reg
+ 
+               if (syscall >= 0 && syscall < NR_syscalls)
+                       syscall_handler(regs, syscall);
+-              else if (syscall != -1)
+-                      regs->a0 = -ENOSYS;
++
+               /*
+                * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+                * so the maximum stack offset is 1k bytes (10 bits).
diff --git a/queue-6.10/rtla-osnoise-prevent-null-dereference-in-error-handling.patch b/queue-6.10/rtla-osnoise-prevent-null-dereference-in-error-handling.patch

new file mode 100644 (file)

index 0000000..3066d02
--- /dev/null
+++ b/queue-6.10/rtla-osnoise-prevent-null-dereference-in-error-handling.patch
@@ -0,0 +1,52 @@
+From 90574d2a675947858b47008df8d07f75ea50d0d0 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Fri, 9 Aug 2024 15:34:30 +0300
+Subject: rtla/osnoise: Prevent NULL dereference in error handling
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+commit 90574d2a675947858b47008df8d07f75ea50d0d0 upstream.
+
+If the "tool->data" allocation fails then there is no need to call
+osnoise_free_top() and, in fact, doing so will lead to a NULL dereference.
+
+Cc: stable@vger.kernel.org
+Cc: John Kacur <jkacur@redhat.com>
+Cc: "Luis Claudio R. Goncalves" <lgoncalv@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Fixes: 1eceb2fc2ca5 ("rtla/osnoise: Add osnoise top mode")
+Link: https://lore.kernel.org/f964ed1f-64d2-4fde-ad3e-708331f8f358@stanley.mountain
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/tracing/rtla/src/osnoise_top.c |   11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/tools/tracing/rtla/src/osnoise_top.c
++++ b/tools/tracing/rtla/src/osnoise_top.c
+@@ -640,8 +640,10 @@ struct osnoise_tool *osnoise_init_top(st
+               return NULL;
+ 
+       tool->data = osnoise_alloc_top(nr_cpus);
+-      if (!tool->data)
+-              goto out_err;
++      if (!tool->data) {
++              osnoise_destroy_tool(tool);
++              return NULL;
++      }
+ 
+       tool->params = params;
+ 
+@@ -649,11 +651,6 @@ struct osnoise_tool *osnoise_init_top(st
+                                  osnoise_top_handler, NULL);
+ 
+       return tool;
+-
+-out_err:
+-      osnoise_free_top(tool->data);
+-      osnoise_destroy_tool(tool);
+-      return NULL;
+ }
+ 
+ static int stop_tracing;
diff --git a/queue-6.10/s390-dasd-fix-error-recovery-leading-to-data-corruption-on-ese-devices.patch b/queue-6.10/s390-dasd-fix-error-recovery-leading-to-data-corruption-on-ese-devices.patch

new file mode 100644 (file)

index 0000000..291eefe
--- /dev/null
+++ b/queue-6.10/s390-dasd-fix-error-recovery-leading-to-data-corruption-on-ese-devices.patch
@@ -0,0 +1,242 @@
+From 7db4042336580dfd75cb5faa82c12cd51098c90b Mon Sep 17 00:00:00 2001
+From: Stefan Haberland <sth@linux.ibm.com>
+Date: Mon, 12 Aug 2024 14:57:33 +0200
+Subject: s390/dasd: fix error recovery leading to data corruption on ESE devices
+
+From: Stefan Haberland <sth@linux.ibm.com>
+
+commit 7db4042336580dfd75cb5faa82c12cd51098c90b upstream.
+
+Extent Space Efficient (ESE) or thin provisioned volumes need to be
+formatted on demand during usual IO processing.
+
+The dasd_ese_needs_format function checks for error codes that signal
+the non existence of a proper track format.
+
+The check for incorrect length is to imprecise since other error cases
+leading to transport of insufficient data also have this flag set.
+This might lead to data corruption in certain error cases for example
+during a storage server warmstart.
+
+Fix by removing the check for incorrect length and replacing by
+explicitly checking for invalid track format in transport mode.
+
+Also remove the check for file protected since this is not a valid
+ESE handling case.
+
+Cc: stable@vger.kernel.org # 5.3+
+Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes")
+Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
+Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240812125733.126431-3-sth@linux.ibm.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/block/dasd.c          |   36 +++++++++++++++---------
+ drivers/s390/block/dasd_3990_erp.c |   10 +-----
+ drivers/s390/block/dasd_eckd.c     |   55 ++++++++++++++++---------------------
+ drivers/s390/block/dasd_int.h      |    2 -
+ 4 files changed, 50 insertions(+), 53 deletions(-)
+
+--- a/drivers/s390/block/dasd.c
++++ b/drivers/s390/block/dasd.c
+@@ -1601,9 +1601,15 @@ static int dasd_ese_needs_format(struct
+       if (!sense)
+               return 0;
+ 
+-      return !!(sense[1] & SNS1_NO_REC_FOUND) ||
+-              !!(sense[1] & SNS1_FILE_PROTECTED) ||
+-              scsw_cstat(&irb->scsw) == SCHN_STAT_INCORR_LEN;
++      if (sense[1] & SNS1_NO_REC_FOUND)
++              return 1;
++
++      if ((sense[1] & SNS1_INV_TRACK_FORMAT) &&
++          scsw_is_tm(&irb->scsw) &&
++          !(sense[2] & SNS2_ENV_DATA_PRESENT))
++              return 1;
++
++      return 0;
+ }
+ 
+ static int dasd_ese_oos_cond(u8 *sense)
+@@ -1624,7 +1630,7 @@ void dasd_int_handler(struct ccw_device
+       struct dasd_device *device;
+       unsigned long now;
+       int nrf_suppressed = 0;
+-      int fp_suppressed = 0;
++      int it_suppressed = 0;
+       struct request *req;
+       u8 *sense = NULL;
+       int expires;
+@@ -1679,8 +1685,9 @@ void dasd_int_handler(struct ccw_device
+                */
+               sense = dasd_get_sense(irb);
+               if (sense) {
+-                      fp_suppressed = (sense[1] & SNS1_FILE_PROTECTED) &&
+-                              test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
++                      it_suppressed = (sense[1] & SNS1_INV_TRACK_FORMAT) &&
++                              !(sense[2] & SNS2_ENV_DATA_PRESENT) &&
++                              test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags);
+                       nrf_suppressed = (sense[1] & SNS1_NO_REC_FOUND) &&
+                               test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
+ 
+@@ -1695,7 +1702,7 @@ void dasd_int_handler(struct ccw_device
+                               return;
+                       }
+               }
+-              if (!(fp_suppressed || nrf_suppressed))
++              if (!(it_suppressed || nrf_suppressed))
+                       device->discipline->dump_sense_dbf(device, irb, "int");
+ 
+               if (device->features & DASD_FEATURE_ERPLOG)
+@@ -2459,14 +2466,17 @@ retry:
+       rc = 0;
+       list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) {
+               /*
+-               * In some cases the 'File Protected' or 'Incorrect Length'
+-               * error might be expected and error recovery would be
+-               * unnecessary in these cases.  Check if the according suppress
+-               * bit is set.
++               * In some cases certain errors might be expected and
++               * error recovery would be unnecessary in these cases.
++               * Check if the according suppress bit is set.
+                */
+               sense = dasd_get_sense(&cqr->irb);
+-              if (sense && sense[1] & SNS1_FILE_PROTECTED &&
+-                  test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags))
++              if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) &&
++                  !(sense[2] & SNS2_ENV_DATA_PRESENT) &&
++                  test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags))
++                      continue;
++              if (sense && (sense[1] & SNS1_NO_REC_FOUND) &&
++                  test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags))
+                       continue;
+               if (scsw_cstat(&cqr->irb.scsw) == 0x40 &&
+                   test_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags))
+--- a/drivers/s390/block/dasd_3990_erp.c
++++ b/drivers/s390/block/dasd_3990_erp.c
+@@ -1386,14 +1386,8 @@ dasd_3990_erp_file_prot(struct dasd_ccw_
+ 
+       struct dasd_device *device = erp->startdev;
+ 
+-      /*
+-       * In some cases the 'File Protected' error might be expected and
+-       * log messages shouldn't be written then.
+-       * Check if the according suppress bit is set.
+-       */
+-      if (!test_bit(DASD_CQR_SUPPRESS_FP, &erp->flags))
+-              dev_err(&device->cdev->dev,
+-                      "Accessing the DASD failed because of a hardware error\n");
++      dev_err(&device->cdev->dev,
++              "Accessing the DASD failed because of a hardware error\n");
+ 
+       return dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
+ 
+--- a/drivers/s390/block/dasd_eckd.c
++++ b/drivers/s390/block/dasd_eckd.c
+@@ -2274,6 +2274,7 @@ dasd_eckd_analysis_ccw(struct dasd_devic
+       cqr->status = DASD_CQR_FILLED;
+       /* Set flags to suppress output for expected errors */
+       set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
++      set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags);
+ 
+       return cqr;
+ }
+@@ -2555,7 +2556,6 @@ dasd_eckd_build_check_tcw(struct dasd_de
+       cqr->buildclk = get_tod_clock();
+       cqr->status = DASD_CQR_FILLED;
+       /* Set flags to suppress output for expected errors */
+-      set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
+       set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags);
+ 
+       return cqr;
+@@ -4129,8 +4129,6 @@ static struct dasd_ccw_req *dasd_eckd_bu
+ 
+       /* Set flags to suppress output for expected errors */
+       if (dasd_eckd_is_ese(basedev)) {
+-              set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
+-              set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags);
+               set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
+       }
+ 
+@@ -4632,9 +4630,8 @@ static struct dasd_ccw_req *dasd_eckd_bu
+ 
+       /* Set flags to suppress output for expected errors */
+       if (dasd_eckd_is_ese(basedev)) {
+-              set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags);
+-              set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags);
+               set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags);
++              set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags);
+       }
+ 
+       return cqr;
+@@ -5779,36 +5776,32 @@ static void dasd_eckd_dump_sense(struct
+ {
+       u8 *sense = dasd_get_sense(irb);
+ 
+-      if (scsw_is_tm(&irb->scsw)) {
+-              /*
+-               * In some cases the 'File Protected' or 'Incorrect Length'
+-               * error might be expected and log messages shouldn't be written
+-               * then. Check if the according suppress bit is set.
+-               */
+-              if (sense && (sense[1] & SNS1_FILE_PROTECTED) &&
+-                  test_bit(DASD_CQR_SUPPRESS_FP, &req->flags))
+-                      return;
+-              if (scsw_cstat(&irb->scsw) == 0x40 &&
+-                  test_bit(DASD_CQR_SUPPRESS_IL, &req->flags))
+-                      return;
++      /*
++       * In some cases certain errors might be expected and
++       * log messages shouldn't be written then.
++       * Check if the according suppress bit is set.
++       */
++      if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) &&
++          !(sense[2] & SNS2_ENV_DATA_PRESENT) &&
++          test_bit(DASD_CQR_SUPPRESS_IT, &req->flags))
++              return;
+ 
+-              dasd_eckd_dump_sense_tcw(device, req, irb);
+-      } else {
+-              /*
+-               * In some cases the 'Command Reject' or 'No Record Found'
+-               * error might be expected and log messages shouldn't be
+-               * written then. Check if the according suppress bit is set.
+-               */
+-              if (sense && sense[0] & SNS0_CMD_REJECT &&
+-                  test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
+-                      return;
++      if (sense && sense[0] & SNS0_CMD_REJECT &&
++          test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
++              return;
+ 
+-              if (sense && sense[1] & SNS1_NO_REC_FOUND &&
+-                  test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
+-                      return;
++      if (sense && sense[1] & SNS1_NO_REC_FOUND &&
++          test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
++              return;
+ 
++      if (scsw_cstat(&irb->scsw) == 0x40 &&
++          test_bit(DASD_CQR_SUPPRESS_IL, &req->flags))
++              return;
++
++      if (scsw_is_tm(&irb->scsw))
++              dasd_eckd_dump_sense_tcw(device, req, irb);
++      else
+               dasd_eckd_dump_sense_ccw(device, req, irb);
+-      }
+ }
+ 
+ static int dasd_eckd_reload_device(struct dasd_device *device)
+--- a/drivers/s390/block/dasd_int.h
++++ b/drivers/s390/block/dasd_int.h
+@@ -196,7 +196,7 @@ struct dasd_ccw_req {
+  * The following flags are used to suppress output of certain errors.
+  */
+ #define DASD_CQR_SUPPRESS_NRF 4       /* Suppress 'No Record Found' error */
+-#define DASD_CQR_SUPPRESS_FP  5       /* Suppress 'File Protected' error*/
++#define DASD_CQR_SUPPRESS_IT  5       /* Suppress 'Invalid Track' error*/
+ #define DASD_CQR_SUPPRESS_IL  6       /* Suppress 'Incorrect Length' error */
+ #define DASD_CQR_SUPPRESS_CR  7       /* Suppress 'Command Reject' error */
+ 
diff --git a/queue-6.10/selftests-memfd_secret-don-t-build-memfd_secret-test-on-unsupported-arches.patch b/queue-6.10/selftests-memfd_secret-don-t-build-memfd_secret-test-on-unsupported-arches.patch

new file mode 100644 (file)

index 0000000..42d340f
--- /dev/null
+++ b/queue-6.10/selftests-memfd_secret-don-t-build-memfd_secret-test-on-unsupported-arches.patch
@@ -0,0 +1,73 @@
+From 7c5e8d212d7d81991a580e7de3904ea213d9a852 Mon Sep 17 00:00:00 2001
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Date: Fri, 9 Aug 2024 12:56:42 +0500
+Subject: selftests: memfd_secret: don't build memfd_secret test on unsupported arches
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+commit 7c5e8d212d7d81991a580e7de3904ea213d9a852 upstream.
+
+[1] mentions that memfd_secret is only supported on arm64, riscv, x86 and
+x86_64 for now.  It doesn't support other architectures.  I found the
+build error on arm and decided to send the fix as it was creating noise on
+KernelCI:
+
+memfd_secret.c: In function 'memfd_secret':
+memfd_secret.c:42:24: error: '__NR_memfd_secret' undeclared (first use in this function);
+did you mean 'memfd_secret'?
+   42 |         return syscall(__NR_memfd_secret, flags);
+      |                        ^~~~~~~~~~~~~~~~~
+      |                        memfd_secret
+
+Hence I'm adding condition that memfd_secret should only be compiled on
+supported architectures.
+
+Also check in run_vmtests script if memfd_secret binary is present before
+executing it.
+
+Link: https://lkml.kernel.org/r/20240812061522.1933054-1-usama.anjum@collabora.com
+Link: https://lore.kernel.org/all/20210518072034.31572-7-rppt@kernel.org/ [1]
+Link: https://lkml.kernel.org/r/20240809075642.403247-1-usama.anjum@collabora.com
+Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)")
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
+Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Albert Ou <aou@eecs.berkeley.edu>
+Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
+Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Palmer Dabbelt <palmer@dabbelt.com>
+Cc: Paul Walmsley <paul.walmsley@sifive.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/Makefile       |    2 ++
+ tools/testing/selftests/mm/run_vmtests.sh |    3 +++
+ 2 files changed, 5 insertions(+)
+
+--- a/tools/testing/selftests/mm/Makefile
++++ b/tools/testing/selftests/mm/Makefile
+@@ -51,7 +51,9 @@ TEST_GEN_FILES += madv_populate
+ TEST_GEN_FILES += map_fixed_noreplace
+ TEST_GEN_FILES += map_hugetlb
+ TEST_GEN_FILES += map_populate
++ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
+ TEST_GEN_FILES += memfd_secret
++endif
+ TEST_GEN_FILES += migration
+ TEST_GEN_FILES += mkdirty
+ TEST_GEN_FILES += mlock-random-test
+--- a/tools/testing/selftests/mm/run_vmtests.sh
++++ b/tools/testing/selftests/mm/run_vmtests.sh
+@@ -367,8 +367,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.
+ # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+ CATEGORY="madv_populate" run_test ./madv_populate
+ 
++if [ -x ./memfd_secret ]
++then
+ (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
+ CATEGORY="memfd_secret" run_test ./memfd_secret
++fi
+ 
+ # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
+ CATEGORY="ksm" run_test ./ksm_tests -H -s 100
diff --git a/queue-6.10/selinux-add-the-processing-of-the-failure-of-avc_add_xperms_decision.patch b/queue-6.10/selinux-add-the-processing-of-the-failure-of-avc_add_xperms_decision.patch

new file mode 100644 (file)

index 0000000..894c67c
--- /dev/null
+++ b/queue-6.10/selinux-add-the-processing-of-the-failure-of-avc_add_xperms_decision.patch
@@ -0,0 +1,39 @@
+From 6dd1e4c045afa6a4ba5d46f044c83bd357c593c2 Mon Sep 17 00:00:00 2001
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Wed, 7 Aug 2024 17:00:56 +0800
+Subject: selinux: add the processing of the failure of avc_add_xperms_decision()
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+commit 6dd1e4c045afa6a4ba5d46f044c83bd357c593c2 upstream.
+
+When avc_add_xperms_decision() fails, the information recorded by the new
+avc node is incomplete. In this case, the new avc node should be released
+instead of replacing the old avc node.
+
+Cc: stable@vger.kernel.org
+Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls")
+Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/selinux/avc.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/security/selinux/avc.c
++++ b/security/selinux/avc.c
+@@ -907,7 +907,11 @@ static int avc_update_node(u32 event, u3
+               node->ae.avd.auditdeny &= ~perms;
+               break;
+       case AVC_CALLBACK_ADD_XPERMS:
+-              avc_add_xperms_decision(node, xpd);
++              rc = avc_add_xperms_decision(node, xpd);
++              if (rc) {
++                      avc_node_kill(node);
++                      goto out_unlock;
++              }
+               break;
+       }
+       avc_node_replace(node, orig);
diff --git a/queue-6.10/selinux-fix-potential-counting-error-in-avc_add_xperms_decision.patch b/queue-6.10/selinux-fix-potential-counting-error-in-avc_add_xperms_decision.patch

new file mode 100644 (file)

index 0000000..cf61a2b
--- /dev/null
+++ b/queue-6.10/selinux-fix-potential-counting-error-in-avc_add_xperms_decision.patch
@@ -0,0 +1,38 @@
+From 379d9af3f3da2da1bbfa67baf1820c72a080d1f1 Mon Sep 17 00:00:00 2001
+From: Zhen Lei <thunder.leizhen@huawei.com>
+Date: Tue, 6 Aug 2024 14:51:13 +0800
+Subject: selinux: fix potential counting error in avc_add_xperms_decision()
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+commit 379d9af3f3da2da1bbfa67baf1820c72a080d1f1 upstream.
+
+The count increases only when a node is successfully added to
+the linked list.
+
+Cc: stable@vger.kernel.org
+Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls")
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/selinux/avc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/selinux/avc.c
++++ b/security/selinux/avc.c
+@@ -330,12 +330,12 @@ static int avc_add_xperms_decision(struc
+ {
+       struct avc_xperms_decision_node *dest_xpd;
+ 
+-      node->ae.xp_node->xp.len++;
+       dest_xpd = avc_xperms_decision_alloc(src->used);
+       if (!dest_xpd)
+               return -ENOMEM;
+       avc_copy_xperms_decision(&dest_xpd->xpd, src);
+       list_add(&dest_xpd->xpd_list, &node->ae.xp_node->xpd_head);
++      node->ae.xp_node->xp.len++;
+       return 0;
+ }
+ 
diff --git a/queue-6.10/series b/queue-6.10/series

index 7b411195f8365a34da76838c3ca3e45f852c5e3e..71c1d88717af95183afadf8f21cacbe06f57ebc5 100644 (file)
--- a/queue-6.10/series
+++ b/queue-6.10/series
@@ -8,3 +8,61 @@ revert-usb-typec-tcpm-clear-pd_event-queue-in-port_reset.patch
  selinux-revert-our-use-of-vma_is_initial_heap.patch
  netfs-ceph-revert-netfs-remove-deprecated-use-of-pg_private_2-as-a-second-writeback-flag.patch
  fuse-initialize-beyond-eof-page-contents-before-setting-uptodate.patch
+char-xillybus-don-t-destroy-workqueue-from-work-item-running-on-it.patch
+char-xillybus-refine-workqueue-handling.patch
+char-xillybus-check-usb-endpoints-when-probing-device.patch
+alsa-usb-audio-add-delay-quirk-for-vivo-usb-c-xe710-headset.patch
+alsa-usb-audio-support-yamaha-p-125-quirk-entry.patch
+usb-misc-ljca-add-lunar-lake-ljca-gpio-hid-to-ljca_gpio_hids.patch
+usb-xhci-check-for-xhci-interrupters-being-allocated-in-xhci_mem_clearup.patch
+xhci-fix-panther-point-null-pointer-deref-at-full-speed-re-enumeration.patch
+thunderbolt-mark-xdomain-as-unplugged-when-router-is-removed.patch
+alsa-hda-tas2781-fix-wrong-calibrated-data-order.patch
+alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch
+s390-dasd-fix-error-recovery-leading-to-data-corruption-on-ese-devices.patch
+kvm-s390-fix-validity-interception-issue-when-gisa-is-switched-off.patch
+thermal-gov_bang_bang-call-__thermal_cdev_update-directly.patch
+keys-trusted-fix-dcp-blob-payload-length-assignment.patch
+keys-trusted-dcp-fix-leak-of-blob-encryption-key.patch
+riscv-change-xip-s-kernel_map.size-to-be-size-of-the-entire-kernel.patch
+riscv-entry-always-initialize-regs-a0-to-enosys.patch
+smb3-fix-lock-breakage-for-cached-writes.patch
+i2c-tegra-do-not-mark-acpi-devices-as-irq-safe.patch
+acpica-add-a-depth-argument-to-acpi_execute_reg_methods.patch
+acpi-ec-evaluate-_reg-outside-the-ec-scope-more-carefully.patch
+arm64-acpi-numa-initialize-all-values-of-acpi_early_node_map-to-numa_no_node.patch
+dm-resume-don-t-return-einval-when-signalled.patch
+dm-persistent-data-fix-memory-allocation-failure.patch
+vfs-don-t-evict-inode-under-the-inode-lru-traversing-context.patch
+fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch
+i2c-qcom-geni-add-missing-geni_icc_disable-in-geni_i2c_runtime_resume.patch
+tracing-return-from-tracing_buffers_read-if-the-file-has-been-closed.patch
+perf-bpf-don-t-call-bpf_overflow_handler-for-tracing-events.patch
+mseal-fix-is_madv_discard.patch
+rtla-osnoise-prevent-null-dereference-in-error-handling.patch
+mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch
+mm-hugetlb-fix-hugetlb-vs.-core-mm-pt-locking.patch
+md-raid1-fix-data-corruption-for-degraded-array-with-slow-disk.patch
+net-mana-fix-rx-buf-alloc_size-alignment-and-atomic-op-panic.patch
+media-atomisp-fix-streaming-no-longer-working-on-byt-isp2400-devices.patch
+net-mana-fix-doorbell-out-of-order-violation-and-avoid-unnecessary-doorbell-rings.patch
+wifi-brcmfmac-cfg80211-handle-ssid-based-pmksa-deletion.patch
+fs-netfs-fscache_cookie-add-missing-n_accesses-check.patch
+selinux-fix-potential-counting-error-in-avc_add_xperms_decision.patch
+selinux-add-the-processing-of-the-failure-of-avc_add_xperms_decision.patch
+alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch
+mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch
+selftests-memfd_secret-don-t-build-memfd_secret-test-on-unsupported-arches.patch
+alloc_tag-introduce-clear_page_tag_ref-helper-function.patch
+mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch
+mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch
+mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch
+btrfs-tree-checker-reject-btrfs_ft_unknown-dir-type.patch
+btrfs-send-allow-cloning-non-aligned-extent-if-it-ends-at-i_size.patch
+btrfs-check-delayed-refs-when-we-re-checking-if-a-ref-exists.patch
+btrfs-only-run-the-extent-map-shrinker-from-kswapd-tasks.patch
+btrfs-zoned-properly-take-lock-to-read-update-block-group-s-zoned-variables.patch
+btrfs-tree-checker-add-dev-extent-item-checks.patch
+btrfs-only-enable-extent-map-shrinker-for-debug-builds.patch
+drm-amdgpu-actually-check-flags-for-all-context-ops.patch
+memcg_write_event_control-fix-a-user-triggerable-oops.patch
diff --git a/queue-6.10/smb3-fix-lock-breakage-for-cached-writes.patch b/queue-6.10/smb3-fix-lock-breakage-for-cached-writes.patch

new file mode 100644 (file)

index 0000000..5c6e210
--- /dev/null
+++ b/queue-6.10/smb3-fix-lock-breakage-for-cached-writes.patch
@@ -0,0 +1,66 @@
+From 836bb3268db405cf9021496ac4dbc26d3e4758fe Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Thu, 15 Aug 2024 14:03:43 -0500
+Subject: smb3: fix lock breakage for cached writes
+
+From: Steve French <stfrench@microsoft.com>
+
+commit 836bb3268db405cf9021496ac4dbc26d3e4758fe upstream.
+
+Mandatory locking is enforced for cached writes, which violates
+default posix semantics, and also it is enforced inconsistently.
+This apparently breaks recent versions of libreoffice, but can
+also be demonstrated by opening a file twice from the same
+client, locking it from handle one and writing to it from
+handle two (which fails, returning EACCES).
+
+Since there was already a mount option "forcemandatorylock"
+(which defaults to off), with this change only when the user
+intentionally specifies "forcemandatorylock" on mount will we
+break posix semantics on write to a locked range (ie we will
+only fail the write in this case, if the user mounts with
+"forcemandatorylock").
+
+Fixes: 85160e03a79e ("CIFS: Implement caching mechanism for mandatory brlocks")
+Cc: stable@vger.kernel.org
+Cc: Pavel Shilovsky <piastryyy@gmail.com>
+Reported-by: abartlet@samba.org
+Reported-by: Kevin Ottens <kevin.ottens@enioka.com>
+Reviewed-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/file.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/fs/smb/client/file.c
++++ b/fs/smb/client/file.c
+@@ -2719,6 +2719,7 @@ cifs_writev(struct kiocb *iocb, struct i
+       struct inode *inode = file->f_mapping->host;
+       struct cifsInodeInfo *cinode = CIFS_I(inode);
+       struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
++      struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+       ssize_t rc;
+ 
+       rc = netfs_start_io_write(inode);
+@@ -2735,12 +2736,16 @@ cifs_writev(struct kiocb *iocb, struct i
+       if (rc <= 0)
+               goto out;
+ 
+-      if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
++      if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) &&
++          (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
+                                    server->vals->exclusive_lock_type, 0,
+-                                   NULL, CIFS_WRITE_OP))
+-              rc = netfs_buffered_write_iter_locked(iocb, from, NULL);
+-      else
++                                   NULL, CIFS_WRITE_OP))) {
+               rc = -EACCES;
++              goto out;
++      }
++
++      rc = netfs_buffered_write_iter_locked(iocb, from, NULL);
++
+ out:
+       up_read(&cinode->lock_sem);
+       netfs_end_io_write(inode);
diff --git a/queue-6.10/thermal-gov_bang_bang-call-__thermal_cdev_update-directly.patch b/queue-6.10/thermal-gov_bang_bang-call-__thermal_cdev_update-directly.patch

new file mode 100644 (file)

index 0000000..d4c513f
--- /dev/null
+++ b/queue-6.10/thermal-gov_bang_bang-call-__thermal_cdev_update-directly.patch
@@ -0,0 +1,46 @@
+From b9b6ee6fe258ce4d89592593efcd3d798c418859 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Tue, 13 Aug 2024 16:25:19 +0200
+Subject: thermal: gov_bang_bang: Call __thermal_cdev_update() directly
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit b9b6ee6fe258ce4d89592593efcd3d798c418859 upstream.
+
+Instead of clearing the "updated" flag for each cooling device
+affected by the trip point crossing in bang_bang_control() and
+walking all thermal instances to run thermal_cdev_update() for all
+of the affected cooling devices, call __thermal_cdev_update()
+directly for each of them.
+
+No intentional functional impact.
+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Acked-by: Peter Kästle <peter@piie.net>
+Reviewed-by: Zhang Rui <rui.zhang@intel.com>
+Cc: 6.10+ <stable@vger.kernel.org> # 6.10+
+Link: https://patch.msgid.link/13583081.uLZWGnKmhe@rjwysocki.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thermal/gov_bang_bang.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/thermal/gov_bang_bang.c
++++ b/drivers/thermal/gov_bang_bang.c
+@@ -79,12 +79,9 @@ static void bang_bang_control(struct the
+               dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target);
+ 
+               mutex_lock(&instance->cdev->lock);
+-              instance->cdev->updated = false; /* cdev needs update */
++              __thermal_cdev_update(instance->cdev);
+               mutex_unlock(&instance->cdev->lock);
+       }
+-
+-      list_for_each_entry(instance, &tz->thermal_instances, tz_node)
+-              thermal_cdev_update(instance->cdev);
+ }
+ 
+ static struct thermal_governor thermal_gov_bang_bang = {
diff --git a/queue-6.10/thunderbolt-mark-xdomain-as-unplugged-when-router-is-removed.patch b/queue-6.10/thunderbolt-mark-xdomain-as-unplugged-when-router-is-removed.patch

new file mode 100644 (file)

index 0000000..ce4c2da
--- /dev/null
+++ b/queue-6.10/thunderbolt-mark-xdomain-as-unplugged-when-router-is-removed.patch
@@ -0,0 +1,39 @@
+From e2006140ad2e01a02ed0aff49cc2ae3ceeb11f8d Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Thu, 13 Jun 2024 15:05:03 +0300
+Subject: thunderbolt: Mark XDomain as unplugged when router is removed
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit e2006140ad2e01a02ed0aff49cc2ae3ceeb11f8d upstream.
+
+I noticed that when we do discrete host router NVM upgrade and it gets
+hot-removed from the PCIe side as a result of NVM firmware authentication,
+if there is another host connected with enabled paths we hang in tearing
+them down. This is due to fact that the Thunderbolt networking driver
+also tries to cleanup the paths and ends up blocking in
+tb_disconnect_xdomain_paths() waiting for the domain lock.
+
+However, at this point we already cleaned the paths in tb_stop() so
+there is really no need for tb_disconnect_xdomain_paths() to do that
+anymore. Furthermore it already checks if the XDomain is unplugged and
+bails out early so take advantage of that and mark the XDomain as
+unplugged when we remove the parent router.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thunderbolt/switch.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/thunderbolt/switch.c
++++ b/drivers/thunderbolt/switch.c
+@@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *
+                       tb_switch_remove(port->remote->sw);
+                       port->remote = NULL;
+               } else if (port->xdomain) {
++                      port->xdomain->is_unplugged = true;
+                       tb_xdomain_remove(port->xdomain);
+                       port->xdomain = NULL;
+               }
diff --git a/queue-6.10/tracing-return-from-tracing_buffers_read-if-the-file-has-been-closed.patch b/queue-6.10/tracing-return-from-tracing_buffers_read-if-the-file-has-been-closed.patch

new file mode 100644 (file)

index 0000000..321d7bc
--- /dev/null
+++ b/queue-6.10/tracing-return-from-tracing_buffers_read-if-the-file-has-been-closed.patch
@@ -0,0 +1,59 @@
+From d0949cd44a62c4c41b30ea7ae94d8c887f586882 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Thu, 8 Aug 2024 23:57:30 -0400
+Subject: tracing: Return from tracing_buffers_read() if the file has been closed
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit d0949cd44a62c4c41b30ea7ae94d8c887f586882 upstream.
+
+When running the following:
+
+ # cd /sys/kernel/tracing/
+ # echo 1 > events/sched/sched_waking/enable
+ # echo 1 > events/sched/sched_switch/enable
+ # echo 0 > tracing_on
+ # dd if=per_cpu/cpu0/trace_pipe_raw of=/tmp/raw0.dat
+
+The dd task would get stuck in an infinite loop in the kernel. What would
+happen is the following:
+
+When ring_buffer_read_page() returns -1 (no data) then a check is made to
+see if the buffer is empty (as happens when the page is not full), it will
+call wait_on_pipe() to wait until the ring buffer has data. When it is it
+will try again to read data (unless O_NONBLOCK is set).
+
+The issue happens when there's a reader and the file descriptor is closed.
+The wait_on_pipe() will return when that is the case. But this loop will
+continue to try again and wait_on_pipe() will again return immediately and
+the loop will continue and never stop.
+
+Simply check if the file was closed before looping and exit out if it is.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Link: https://lore.kernel.org/20240808235730.78bf63e5@rorschach.local.home
+Fixes: 2aa043a55b9a7 ("tracing/ring-buffer: Fix wait_on_pipe() race")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 10cd38bce2f1..ebe7ce2f5f4a 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
+       trace_access_unlock(iter->cpu_file);
+ 
+       if (ret < 0) {
+-              if (trace_empty(iter)) {
++              if (trace_empty(iter) && !iter->closed) {
+                       if ((filp->f_flags & O_NONBLOCK))
+                               return -EAGAIN;
+ 
+-- 
+2.46.0
+
diff --git a/queue-6.10/usb-misc-ljca-add-lunar-lake-ljca-gpio-hid-to-ljca_gpio_hids.patch b/queue-6.10/usb-misc-ljca-add-lunar-lake-ljca-gpio-hid-to-ljca_gpio_hids.patch

new file mode 100644 (file)

index 0000000..84f8031
--- /dev/null
+++ b/queue-6.10/usb-misc-ljca-add-lunar-lake-ljca-gpio-hid-to-ljca_gpio_hids.patch
@@ -0,0 +1,32 @@
+From 3ed486e383ccee9b0c8d727608f12a937c6603ca Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Mon, 12 Aug 2024 11:50:38 +0200
+Subject: usb: misc: ljca: Add Lunar Lake ljca GPIO HID to ljca_gpio_hids[]
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 3ed486e383ccee9b0c8d727608f12a937c6603ca upstream.
+
+Add LJCA GPIO support for the Lunar Lake platform.
+
+New HID taken from out of tree ivsc-driver git repo.
+
+Link: https://github.com/intel/ivsc-driver/commit/47e7c4a446c8ea8c741ff5a32fa7b19f9e6fd47e
+Cc: stable <stable@kernel.org>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20240812095038.555837-1-hdegoede@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/misc/usb-ljca.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/usb/misc/usb-ljca.c
++++ b/drivers/usb/misc/usb-ljca.c
+@@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_
+       { "INTC1096" },
+       { "INTC100B" },
+       { "INTC10D1" },
++      { "INTC10B5" },
+       {},
+ };
+ 
diff --git a/queue-6.10/usb-xhci-check-for-xhci-interrupters-being-allocated-in-xhci_mem_clearup.patch b/queue-6.10/usb-xhci-check-for-xhci-interrupters-being-allocated-in-xhci_mem_clearup.patch

new file mode 100644 (file)

index 0000000..4905685
--- /dev/null
+++ b/queue-6.10/usb-xhci-check-for-xhci-interrupters-being-allocated-in-xhci_mem_clearup.patch
@@ -0,0 +1,45 @@
+From dcdb52d948f3a17ccd3fce757d9bd981d7c32039 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Fri, 9 Aug 2024 15:44:07 +0300
+Subject: usb: xhci: Check for xhci->interrupters being allocated in xhci_mem_clearup()
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit dcdb52d948f3a17ccd3fce757d9bd981d7c32039 upstream.
+
+If xhci_mem_init() fails, it calls into xhci_mem_cleanup() to mop
+up the damage. If it fails early enough, before xhci->interrupters
+is allocated but after xhci->max_interrupters has been set, which
+happens in most (all?) cases, things get uglier, as xhci_mem_cleanup()
+unconditionally derefences xhci->interrupters. With prejudice.
+
+Gate the interrupt freeing loop with a check on xhci->interrupters
+being non-NULL.
+
+Found while debugging a DMA allocation issue that led the XHCI driver
+on this exact path.
+
+Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters")
+Cc: Mathias Nyman <mathias.nyman@linux.intel.com>
+Cc: Wesley Cheng <quic_wcheng@quicinc.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org # 6.8+
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240809124408.505786-2-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci-mem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/usb/host/xhci-mem.c
++++ b/drivers/usb/host/xhci-mem.c
+@@ -1877,7 +1877,7 @@ void xhci_mem_cleanup(struct xhci_hcd *x
+ 
+       cancel_delayed_work_sync(&xhci->cmd_timer);
+ 
+-      for (i = 0; i < xhci->max_interrupters; i++) {
++      for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) {
+               if (xhci->interrupters[i]) {
+                       xhci_remove_interrupter(xhci, xhci->interrupters[i]);
+                       xhci_free_interrupter(xhci, xhci->interrupters[i]);
diff --git a/queue-6.10/vfs-don-t-evict-inode-under-the-inode-lru-traversing-context.patch b/queue-6.10/vfs-don-t-evict-inode-under-the-inode-lru-traversing-context.patch

new file mode 100644 (file)

index 0000000..e12035b
--- /dev/null
+++ b/queue-6.10/vfs-don-t-evict-inode-under-the-inode-lru-traversing-context.patch
@@ -0,0 +1,215 @@
+From 2a0629834cd82f05d424bbc193374f9a43d1f87d Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Fri, 9 Aug 2024 11:16:28 +0800
+Subject: vfs: Don't evict inode under the inode lru traversing context
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 2a0629834cd82f05d424bbc193374f9a43d1f87d upstream.
+
+The inode reclaiming process(See function prune_icache_sb) collects all
+reclaimable inodes and mark them with I_FREEING flag at first, at that
+time, other processes will be stuck if they try getting these inodes
+(See function find_inode_fast), then the reclaiming process destroy the
+inodes by function dispose_list(). Some filesystems(eg. ext4 with
+ea_inode feature, ubifs with xattr) may do inode lookup in the inode
+evicting callback function, if the inode lookup is operated under the
+inode lru traversing context, deadlock problems may happen.
+
+Case 1: In function ext4_evict_inode(), the ea inode lookup could happen
+        if ea_inode feature is enabled, the lookup process will be stuck
+       under the evicting context like this:
+
+ 1. File A has inode i_reg and an ea inode i_ea
+ 2. getfattr(A, xattr_buf) // i_ea is added into lru // lru->i_ea
+ 3. Then, following three processes running like this:
+
+    PA                              PB
+ echo 2 > /proc/sys/vm/drop_caches
+  shrink_slab
+   prune_dcache_sb
+   // i_reg is added into lru, lru->i_ea->i_reg
+   prune_icache_sb
+    list_lru_walk_one
+     inode_lru_isolate
+      i_ea->i_state |= I_FREEING // set inode state
+     inode_lru_isolate
+      __iget(i_reg)
+      spin_unlock(&i_reg->i_lock)
+      spin_unlock(lru_lock)
+                                     rm file A
+                                      i_reg->nlink = 0
+      iput(i_reg) // i_reg->nlink is 0, do evict
+       ext4_evict_inode
+        ext4_xattr_delete_inode
+         ext4_xattr_inode_dec_ref_all
+          ext4_xattr_inode_iget
+           ext4_iget(i_ea->i_ino)
+            iget_locked
+             find_inode_fast
+              __wait_on_freeing_inode(i_ea) ----→ AA deadlock
+    dispose_list // cannot be executed by prune_icache_sb
+     wake_up_bit(&i_ea->i_state)
+
+Case 2: In deleted inode writing function ubifs_jnl_write_inode(), file
+        deleting process holds BASEHD's wbuf->io_mutex while getting the
+       xattr inode, which could race with inode reclaiming process(The
+        reclaiming process could try locking BASEHD's wbuf->io_mutex in
+       inode evicting function), then an ABBA deadlock problem would
+       happen as following:
+
+ 1. File A has inode ia and a xattr(with inode ixa), regular file B has
+    inode ib and a xattr.
+ 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa
+ 3. Then, following three processes running like this:
+
+        PA                PB                        PC
+                echo 2 > /proc/sys/vm/drop_caches
+                 shrink_slab
+                  prune_dcache_sb
+                  // ib and ia are added into lru, lru->ixa->ib->ia
+                  prune_icache_sb
+                   list_lru_walk_one
+                    inode_lru_isolate
+                     ixa->i_state |= I_FREEING // set inode state
+                    inode_lru_isolate
+                     __iget(ib)
+                     spin_unlock(&ib->i_lock)
+                     spin_unlock(lru_lock)
+                                                   rm file B
+                                                    ib->nlink = 0
+ rm file A
+  iput(ia)
+   ubifs_evict_inode(ia)
+    ubifs_jnl_delete_inode(ia)
+     ubifs_jnl_write_inode(ia)
+      make_reservation(BASEHD) // Lock wbuf->io_mutex
+      ubifs_iget(ixa->i_ino)
+       iget_locked
+        find_inode_fast
+         __wait_on_freeing_inode(ixa)
+          |          iput(ib) // ib->nlink is 0, do evict
+          |           ubifs_evict_inode
+          |            ubifs_jnl_delete_inode(ib)
+          ↓             ubifs_jnl_write_inode
+     ABBA deadlock ←-----make_reservation(BASEHD)
+                   dispose_list // cannot be executed by prune_icache_sb
+                    wake_up_bit(&ixa->i_state)
+
+Fix the possible deadlock by using new inode state flag I_LRU_ISOLATING
+to pin the inode in memory while inode_lru_isolate() reclaims its pages
+instead of using ordinary inode reference. This way inode deletion
+cannot be triggered from inode_lru_isolate() thus avoiding the deadlock.
+evict() is made to wait for I_LRU_ISOLATING to be cleared before
+proceeding with inode cleanup.
+
+Link: https://lore.kernel.org/all/37c29c42-7685-d1f0-067d-63582ffac405@huaweicloud.com/
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022
+Fixes: e50e5129f384 ("ext4: xattr-in-inode support")
+Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files")
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Link: https://lore.kernel.org/r/20240809031628.1069873-1-chengzhihao@huaweicloud.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Suggested-by: Jan Kara <jack@suse.cz>
+Suggested-by: Mateusz Guzik <mjguzik@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/inode.c         |   39 +++++++++++++++++++++++++++++++++++++--
+ include/linux/fs.h |    5 +++++
+ 2 files changed, 42 insertions(+), 2 deletions(-)
+
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -486,6 +486,39 @@ static void inode_lru_list_del(struct in
+               this_cpu_dec(nr_unused);
+ }
+ 
++static void inode_pin_lru_isolating(struct inode *inode)
++{
++      lockdep_assert_held(&inode->i_lock);
++      WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
++      inode->i_state |= I_LRU_ISOLATING;
++}
++
++static void inode_unpin_lru_isolating(struct inode *inode)
++{
++      spin_lock(&inode->i_lock);
++      WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
++      inode->i_state &= ~I_LRU_ISOLATING;
++      smp_mb();
++      wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
++      spin_unlock(&inode->i_lock);
++}
++
++static void inode_wait_for_lru_isolating(struct inode *inode)
++{
++      spin_lock(&inode->i_lock);
++      if (inode->i_state & I_LRU_ISOLATING) {
++              DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
++              wait_queue_head_t *wqh;
++
++              wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
++              spin_unlock(&inode->i_lock);
++              __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
++              spin_lock(&inode->i_lock);
++              WARN_ON(inode->i_state & I_LRU_ISOLATING);
++      }
++      spin_unlock(&inode->i_lock);
++}
++
+ /**
+  * inode_sb_list_add - add inode to the superblock list of inodes
+  * @inode: inode to add
+@@ -655,6 +688,8 @@ static void evict(struct inode *inode)
+ 
+       inode_sb_list_del(inode);
+ 
++      inode_wait_for_lru_isolating(inode);
++
+       /*
+        * Wait for flusher thread to be done with the inode so that filesystem
+        * does not start destroying it while writeback is still running. Since
+@@ -843,7 +878,7 @@ static enum lru_status inode_lru_isolate
+        * be under pressure before the cache inside the highmem zone.
+        */
+       if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
+-              __iget(inode);
++              inode_pin_lru_isolating(inode);
+               spin_unlock(&inode->i_lock);
+               spin_unlock(lru_lock);
+               if (remove_inode_buffers(inode)) {
+@@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate
+                               __count_vm_events(PGINODESTEAL, reap);
+                       mm_account_reclaimed_pages(reap);
+               }
+-              iput(inode);
++              inode_unpin_lru_isolating(inode);
+               spin_lock(lru_lock);
+               return LRU_RETRY;
+       }
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2370,6 +2370,9 @@ static inline void kiocb_clone(struct ki
+  *
+  * I_PINNING_FSCACHE_WB       Inode is pinning an fscache object for writeback.
+  *
++ * I_LRU_ISOLATING    Inode is pinned being isolated from LRU without holding
++ *                    i_count.
++ *
+  * Q: What is the difference between I_WILL_FREE and I_FREEING?
+  */
+ #define I_DIRTY_SYNC          (1 << 0)
+@@ -2393,6 +2396,8 @@ static inline void kiocb_clone(struct ki
+ #define I_DONTCACHE           (1 << 16)
+ #define I_SYNC_QUEUED         (1 << 17)
+ #define I_PINNING_NETFS_WB    (1 << 18)
++#define __I_LRU_ISOLATING     19
++#define I_LRU_ISOLATING               (1 << __I_LRU_ISOLATING)
+ 
+ #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
+ #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
diff --git a/queue-6.10/wifi-brcmfmac-cfg80211-handle-ssid-based-pmksa-deletion.patch b/queue-6.10/wifi-brcmfmac-cfg80211-handle-ssid-based-pmksa-deletion.patch

new file mode 100644 (file)

index 0000000..011cc29
--- /dev/null
+++ b/queue-6.10/wifi-brcmfmac-cfg80211-handle-ssid-based-pmksa-deletion.patch
@@ -0,0 +1,49 @@
+From 2ad4e1ada8eebafa2d75a4b75eeeca882de6ada1 Mon Sep 17 00:00:00 2001
+From: Janne Grunau <j@jannau.net>
+Date: Sat, 3 Aug 2024 21:52:55 +0200
+Subject: wifi: brcmfmac: cfg80211: Handle SSID based pmksa deletion
+
+From: Janne Grunau <j@jannau.net>
+
+commit 2ad4e1ada8eebafa2d75a4b75eeeca882de6ada1 upstream.
+
+wpa_supplicant 2.11 sends since 1efdba5fdc2c ("Handle PMKSA flush in the
+driver for SAE/OWE offload cases") SSID based PMKSA del commands.
+brcmfmac is not prepared and tries to dereference the NULL bssid and
+pmkid pointers in cfg80211_pmksa. PMKID_V3 operations support SSID based
+updates so copy the SSID.
+
+Fixes: a96202acaea4 ("wifi: brcmfmac: cfg80211: Add support for PMKID_V3 operations")
+Cc: stable@vger.kernel.org # 6.4.x
+Signed-off-by: Janne Grunau <j@jannau.net>
+Reviewed-by: Neal Gompa <neal@gompa.dev>
+Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
+Signed-off-by: Kalle Valo <kvalo@kernel.org>
+Link: https://patch.msgid.link/20240803-brcmfmac_pmksa_del_ssid-v1-1-4e85f19135e1@jannau.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c |   13 +++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+@@ -4320,9 +4320,16 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp,
+               /* Single PMK operation */
+               pmk_op->count = cpu_to_le16(1);
+               length += sizeof(struct brcmf_pmksa_v3);
+-              memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN);
+-              memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN);
+-              pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN;
++              if (pmksa->bssid)
++                      memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN);
++              if (pmksa->pmkid) {
++                      memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN);
++                      pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN;
++              }
++              if (pmksa->ssid && pmksa->ssid_len) {
++                      memcpy(pmk_op->pmk[0].ssid.SSID, pmksa->ssid, pmksa->ssid_len);
++                      pmk_op->pmk[0].ssid.SSID_len = pmksa->ssid_len;
++              }
+               pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0);
+       }
+ 
diff --git a/queue-6.10/xhci-fix-panther-point-null-pointer-deref-at-full-speed-re-enumeration.patch b/queue-6.10/xhci-fix-panther-point-null-pointer-deref-at-full-speed-re-enumeration.patch

new file mode 100644 (file)

index 0000000..822fa9a
--- /dev/null
+++ b/queue-6.10/xhci-fix-panther-point-null-pointer-deref-at-full-speed-re-enumeration.patch
@@ -0,0 +1,82 @@
+From af8e119f52e9c13e556be9e03f27957554a84656 Mon Sep 17 00:00:00 2001
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+Date: Thu, 15 Aug 2024 17:11:17 +0300
+Subject: xhci: Fix Panther point NULL pointer deref at full-speed re-enumeration
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+commit af8e119f52e9c13e556be9e03f27957554a84656 upstream.
+
+re-enumerating full-speed devices after a failed address device command
+can trigger a NULL pointer dereference.
+
+Full-speed devices may need to reconfigure the endpoint 0 Max Packet Size
+value during enumeration. Usb core calls usb_ep0_reinit() in this case,
+which ends up calling xhci_configure_endpoint().
+
+On Panther point xHC the xhci_configure_endpoint() function will
+additionally check and reserve bandwidth in software. Other hosts do
+this in hardware
+
+If xHC address device command fails then a new xhci_virt_device structure
+is allocated as part of re-enabling the slot, but the bandwidth table
+pointers are not set up properly here.
+This triggers the NULL pointer dereference the next time usb_ep0_reinit()
+is called and xhci_configure_endpoint() tries to check and reserve
+bandwidth
+
+[46710.713538] usb 3-1: new full-speed USB device number 5 using xhci_hcd
+[46710.713699] usb 3-1: Device not responding to setup address.
+[46710.917684] usb 3-1: Device not responding to setup address.
+[46711.125536] usb 3-1: device not accepting address 5, error -71
+[46711.125594] BUG: kernel NULL pointer dereference, address: 0000000000000008
+[46711.125600] #PF: supervisor read access in kernel mode
+[46711.125603] #PF: error_code(0x0000) - not-present page
+[46711.125606] PGD 0 P4D 0
+[46711.125610] Oops: Oops: 0000 [#1] PREEMPT SMP PTI
+[46711.125615] CPU: 1 PID: 25760 Comm: kworker/1:2 Not tainted 6.10.3_2 #1
+[46711.125620] Hardware name: Gigabyte Technology Co., Ltd.
+[46711.125623] Workqueue: usb_hub_wq hub_event [usbcore]
+[46711.125668] RIP: 0010:xhci_reserve_bandwidth (drivers/usb/host/xhci.c
+
+Fix this by making sure bandwidth table pointers are set up correctly
+after a failed address device command, and additionally by avoiding
+checking for bandwidth in cases like this where no actual endpoints are
+added or removed, i.e. only context for default control endpoint 0 is
+evaluated.
+
+Reported-by: Karel Balej <balejk@matfyz.cz>
+Closes: https://lore.kernel.org/linux-usb/D3CKQQAETH47.1MUO22RTCH2O3@matfyz.cz/
+Cc: stable@vger.kernel.org
+Fixes: 651aaf36a7d7 ("usb: xhci: Handle USB transaction error on address command")
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240815141117.2702314-2-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struc
+                               xhci->num_active_eps);
+               return -ENOMEM;
+       }
+-      if ((xhci->quirks & XHCI_SW_BW_CHECKING) &&
++      if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change &&
+           xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) {
+               if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK))
+                       xhci_free_host_resources(xhci, ctrl_ctx);
+@@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_
+               mutex_unlock(&xhci->mutex);
+               ret = xhci_disable_slot(xhci, udev->slot_id);
+               xhci_free_virt_device(xhci, udev->slot_id);
+-              if (!ret)
+-                      xhci_alloc_dev(hcd, udev);
++              if (!ret) {
++                      if (xhci_alloc_dev(hcd, udev) == 1)
++                              xhci_setup_addressable_virt_dev(xhci, udev);
++              }
+               kfree(command->completion);
+               kfree(command);
+               return -EPROTO;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 19 Aug 2024 10:08:29 +0000 (12:08 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 19 Aug 2024 10:08:29 +0000 (12:08 +0200)
queue-6.10/acpi-ec-evaluate-_reg-outside-the-ec-scope-more-carefully.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/acpica-add-a-depth-argument-to-acpi_execute_reg_methods.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/alloc_tag-introduce-clear_page_tag_ref-helper-function.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/alsa-hda-tas2781-fix-wrong-calibrated-data-order.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/alsa-usb-audio-add-delay-quirk-for-vivo-usb-c-xe710-headset.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/alsa-usb-audio-support-yamaha-p-125-quirk-entry.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/arm64-acpi-numa-initialize-all-values-of-acpi_early_node_map-to-numa_no_node.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-check-delayed-refs-when-we-re-checking-if-a-ref-exists.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-only-enable-extent-map-shrinker-for-debug-builds.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-only-run-the-extent-map-shrinker-from-kswapd-tasks.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-send-allow-cloning-non-aligned-extent-if-it-ends-at-i_size.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-tree-checker-add-dev-extent-item-checks.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-tree-checker-reject-btrfs_ft_unknown-dir-type.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-zoned-properly-take-lock-to-read-update-block-group-s-zoned-variables.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/char-xillybus-check-usb-endpoints-when-probing-device.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/char-xillybus-don-t-destroy-workqueue-from-work-item-running-on-it.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/char-xillybus-refine-workqueue-handling.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/dm-persistent-data-fix-memory-allocation-failure.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/dm-resume-don-t-return-einval-when-signalled.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/drm-amdgpu-actually-check-flags-for-all-context-ops.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/fs-netfs-fscache_cookie-add-missing-n_accesses-check.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/i2c-qcom-geni-add-missing-geni_icc_disable-in-geni_i2c_runtime_resume.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/i2c-tegra-do-not-mark-acpi-devices-as-irq-safe.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/keys-trusted-dcp-fix-leak-of-blob-encryption-key.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/keys-trusted-fix-dcp-blob-payload-length-assignment.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/kvm-s390-fix-validity-interception-issue-when-gisa-is-switched-off.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/md-raid1-fix-data-corruption-for-degraded-array-with-slow-disk.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/media-atomisp-fix-streaming-no-longer-working-on-byt-isp2400-devices.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/memcg_write_event_control-fix-a-user-triggerable-oops.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mm-hugetlb-fix-hugetlb-vs.-core-mm-pt-locking.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mseal-fix-is_madv_discard.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-mana-fix-doorbell-out-of-order-violation-and-avoid-unnecessary-doorbell-rings.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-mana-fix-rx-buf-alloc_size-alignment-and-atomic-op-panic.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/perf-bpf-don-t-call-bpf_overflow_handler-for-tracing-events.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/riscv-change-xip-s-kernel_map.size-to-be-size-of-the-entire-kernel.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/riscv-entry-always-initialize-regs-a0-to-enosys.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/rtla-osnoise-prevent-null-dereference-in-error-handling.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/s390-dasd-fix-error-recovery-leading-to-data-corruption-on-ese-devices.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/selftests-memfd_secret-don-t-build-memfd_secret-test-on-unsupported-arches.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/selinux-add-the-processing-of-the-failure-of-avc_add_xperms_decision.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/selinux-fix-potential-counting-error-in-avc_add_xperms_decision.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/series		patch \| blob \| blame \| history
queue-6.10/smb3-fix-lock-breakage-for-cached-writes.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/thermal-gov_bang_bang-call-__thermal_cdev_update-directly.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/thunderbolt-mark-xdomain-as-unplugged-when-router-is-removed.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/tracing-return-from-tracing_buffers_read-if-the-file-has-been-closed.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/usb-misc-ljca-add-lunar-lake-ljca-gpio-hid-to-ljca_gpio_hids.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/usb-xhci-check-for-xhci-interrupters-being-allocated-in-xhci_mem_clearup.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/vfs-don-t-evict-inode-under-the-inode-lru-traversing-context.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/wifi-brcmfmac-cfg80211-handle-ssid-based-pmksa-deletion.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/xhci-fix-panther-point-null-pointer-deref-at-full-speed-re-enumeration.patch	[new file with mode: 0644]	patch \| blob