From: Greg Kroah-Hartman Date: Mon, 7 Oct 2024 17:35:40 +0000 (+0200) Subject: 6.11-stable patches X-Git-Tag: v6.6.55~64 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cbb200c06a260918c109cdbc0ac1c9365f82ede5;p=thirdparty%2Fkernel%2Fstable-queue.git 6.11-stable patches added patches: acpi-resource-add-asus-expertbook-b2502cva-to-irq1_level_low_skip_override.patch acpi-resource-add-asus-vivobook-x1704vap-to-irq1_level_low_skip_override.patch acpi-resource-loosen-the-asus-e1404gab-dmi-match-to-also-cover-the-e1404ga.patch acpi-resource-remove-duplicate-asus-e1504gab-irq-override.patch acpi-video-add-backlight-native-quirk-for-dell-optiplex-5480-aio.patch bluetooth-hci_event-align-br-edr-just_works-paring-with-le.patch btrfs-drop-the-backref-cache-during-relocation-if-we-commit.patch btrfs-fix-a-null-pointer-dereference-when-failed-to-start-a-new-trasacntion.patch btrfs-send-fix-buffer-overflow-detection-when-copying-path-to-cache-entry.patch btrfs-send-fix-invalid-clone-operation-for-file-that-got-its-size-decreased.patch btrfs-wait-for-fixup-workers-before-stopping-cleaner-kthread-during-umount.patch ceph-fix-cap-ref-leak-via-netfs-init_request.patch cpufreq-avoid-a-bad-reference-count-on-cpu-node.patch cpufreq-intel_pstate-make-hwp_notify_lock-a-raw-spinlock.patch firmware-sysfb-disable-sysfb-for-firmware-buffers-with-unknown-parent.patch gpio-davinci-fix-lazy-disable.patch io_uring-net-harden-multishot-termination-case-for-recv.patch mac802154-fix-potential-rcu-dereference-issue-in-mac802154_scan_worker.patch net-pcs-xpcs-fix-the-wrong-register-that-was-written-back.patch pidfs-check-for-valid-pid-namespace.patch rtla-fix-the-help-text-in-osnoise-and-timerlat-top-tools.patch tracing-hwlat-fix-a-race-during-cpuhp-processing.patch tracing-timerlat-drop-interface_lock-in-stop_kthread.patch tracing-timerlat-fix-a-race-during-cpuhp-processing.patch tracing-timerlat-fix-duplicated-kthread-creation-due-to-cpu-online-offline.patch --- diff --git a/queue-6.11/acpi-resource-add-asus-expertbook-b2502cva-to-irq1_level_low_skip_override.patch b/queue-6.11/acpi-resource-add-asus-expertbook-b2502cva-to-irq1_level_low_skip_override.patch new file mode 100644 index 00000000000..56677df5843 --- /dev/null +++ b/queue-6.11/acpi-resource-add-asus-expertbook-b2502cva-to-irq1_level_low_skip_override.patch @@ -0,0 +1,42 @@ +From 056301e7c7c886f96d799edd36f3406cc30e1822 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Fri, 27 Sep 2024 16:16:06 +0200 +Subject: ACPI: resource: Add Asus ExpertBook B2502CVA to irq1_level_low_skip_override[] + +From: Hans de Goede + +commit 056301e7c7c886f96d799edd36f3406cc30e1822 upstream. + +Like other Asus ExpertBook models the B2502CVA has its keybopard IRQ (1) +described as ActiveLow in the DSDT, which the kernel overrides to EdgeHigh +which breaks the keyboard. + +Add the B2502CVA to the irq1_level_low_skip_override[] quirk table to fix +this. + +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217760 +Cc: All applicable +Signed-off-by: Hans de Goede +Link: https://patch.msgid.link/20240927141606.66826-4-hdegoede@redhat.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/resource.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/acpi/resource.c ++++ b/drivers/acpi/resource.c +@@ -511,6 +511,13 @@ static const struct dmi_system_id irq1_l + }, + }, + { ++ /* Asus ExpertBook B2502CVA */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "B2502CVA"), ++ }, ++ }, ++ { + /* Asus Vivobook Go E1404GA* */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), diff --git a/queue-6.11/acpi-resource-add-asus-vivobook-x1704vap-to-irq1_level_low_skip_override.patch b/queue-6.11/acpi-resource-add-asus-vivobook-x1704vap-to-irq1_level_low_skip_override.patch new file mode 100644 index 00000000000..05f3b06cfac --- /dev/null +++ b/queue-6.11/acpi-resource-add-asus-vivobook-x1704vap-to-irq1_level_low_skip_override.patch @@ -0,0 +1,44 @@ +From 2f80ce0b78c340e332f04a5801dee5e4ac8cfaeb Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Fri, 27 Sep 2024 16:16:05 +0200 +Subject: ACPI: resource: Add Asus Vivobook X1704VAP to irq1_level_low_skip_override[] + +From: Hans de Goede + +commit 2f80ce0b78c340e332f04a5801dee5e4ac8cfaeb upstream. + +Like other Asus Vivobook models the X1704VAP has its keybopard IRQ (1) +described as ActiveLow in the DSDT, which the kernel overrides to EdgeHigh +which breaks the keyboard. + +Add the X1704VAP to the irq1_level_low_skip_override[] quirk table to fix +this. + +Reported-by: Lamome Julien +Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1078696 +Closes: https://lore.kernel.org/all/1226760b-4699-4529-bf57-6423938157a3@wanadoo.fr/ +Cc: All applicable +Signed-off-by: Hans de Goede +Link: https://patch.msgid.link/20240927141606.66826-3-hdegoede@redhat.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/resource.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/acpi/resource.c ++++ b/drivers/acpi/resource.c +@@ -441,6 +441,13 @@ static const struct dmi_system_id irq1_l + }, + }, + { ++ /* Asus Vivobook X1704VAP */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "X1704VAP"), ++ }, ++ }, ++ { + /* Asus ExpertBook B1402CBA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), diff --git a/queue-6.11/acpi-resource-loosen-the-asus-e1404gab-dmi-match-to-also-cover-the-e1404ga.patch b/queue-6.11/acpi-resource-loosen-the-asus-e1404gab-dmi-match-to-also-cover-the-e1404ga.patch new file mode 100644 index 00000000000..dcb2923b24d --- /dev/null +++ b/queue-6.11/acpi-resource-loosen-the-asus-e1404gab-dmi-match-to-also-cover-the-e1404ga.patch @@ -0,0 +1,50 @@ +From 63539defee17bf0cbd8e24078cf103efee9c6633 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Fri, 27 Sep 2024 16:16:04 +0200 +Subject: ACPI: resource: Loosen the Asus E1404GAB DMI match to also cover the E1404GA + +From: Hans de Goede + +commit 63539defee17bf0cbd8e24078cf103efee9c6633 upstream. + +Like other Asus Vivobooks, the Asus Vivobook Go E1404GA has a DSDT +describing IRQ 1 as ActiveLow, while the kernel overrides to Edge_High. + + $ sudo dmesg | grep DMI:.*BIOS + [ 0.000000] DMI: ASUSTeK COMPUTER INC. Vivobook Go E1404GA_E1404GA/E1404GA, BIOS E1404GA.302 08/23/2023 + $ sudo cp /sys/firmware/acpi/tables/DSDT dsdt.dat + $ iasl -d dsdt.dat + $ grep -A 30 PS2K dsdt.dsl | grep IRQ -A 1 + IRQ (Level, ActiveLow, Exclusive, ) + {1} + +There already is an entry in the irq1_level_low_skip_override[] DMI match +table for the "E1404GAB", change this to match on "E1404GA" to cover +the E1404GA model as well (DMI_MATCH() does a substring match). + +Reported-by: Paul Menzel +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219224 +Cc: All applicable +Signed-off-by: Hans de Goede +Link: https://patch.msgid.link/20240927141606.66826-2-hdegoede@redhat.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/resource.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/acpi/resource.c ++++ b/drivers/acpi/resource.c +@@ -504,10 +504,10 @@ static const struct dmi_system_id irq1_l + }, + }, + { +- /* Asus Vivobook Go E1404GAB */ ++ /* Asus Vivobook Go E1404GA* */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), +- DMI_MATCH(DMI_BOARD_NAME, "E1404GAB"), ++ DMI_MATCH(DMI_BOARD_NAME, "E1404GA"), + }, + }, + { diff --git a/queue-6.11/acpi-resource-remove-duplicate-asus-e1504gab-irq-override.patch b/queue-6.11/acpi-resource-remove-duplicate-asus-e1504gab-irq-override.patch new file mode 100644 index 00000000000..d1bb5f48307 --- /dev/null +++ b/queue-6.11/acpi-resource-remove-duplicate-asus-e1504gab-irq-override.patch @@ -0,0 +1,53 @@ +From 65bdebf38e5fac7c56a9e05d3479a707e6dc783c Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Fri, 27 Sep 2024 16:16:03 +0200 +Subject: ACPI: resource: Remove duplicate Asus E1504GAB IRQ override + +From: Hans de Goede + +commit 65bdebf38e5fac7c56a9e05d3479a707e6dc783c upstream. + +Commit d2aaf1996504 ("ACPI: resource: Add DMI quirks for ASUS Vivobook +E1504GA and E1504GAB") does exactly what the subject says, adding DMI +matches for both the E1504GA and E1504GAB. + +But DMI_MATCH() does a substring match, so checking for E1504GA will also +match E1504GAB. + +Drop the unnecessary E1504GAB entry since that is covered already by +the E1504GA entry. + +Fixes: d2aaf1996504 ("ACPI: resource: Add DMI quirks for ASUS Vivobook E1504GA and E1504GAB") +Cc: All applicable +Signed-off-by: Hans de Goede +Link: https://patch.msgid.link/20240927141606.66826-1-hdegoede@redhat.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/resource.c | 9 +-------- + 1 file changed, 1 insertion(+), 8 deletions(-) + +--- a/drivers/acpi/resource.c ++++ b/drivers/acpi/resource.c +@@ -511,20 +511,13 @@ static const struct dmi_system_id irq1_l + }, + }, + { +- /* Asus Vivobook E1504GA */ ++ /* Asus Vivobook E1504GA* */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "E1504GA"), + }, + }, + { +- /* Asus Vivobook E1504GAB */ +- .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), +- DMI_MATCH(DMI_BOARD_NAME, "E1504GAB"), +- }, +- }, +- { + /* Asus Vivobook Pro N6506MV */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), diff --git a/queue-6.11/acpi-video-add-backlight-native-quirk-for-dell-optiplex-5480-aio.patch b/queue-6.11/acpi-video-add-backlight-native-quirk-for-dell-optiplex-5480-aio.patch new file mode 100644 index 00000000000..51cbfc04941 --- /dev/null +++ b/queue-6.11/acpi-video-add-backlight-native-quirk-for-dell-optiplex-5480-aio.patch @@ -0,0 +1,59 @@ +From ac78288fe062b64e45a479eaae74aaaafcc8ecdd Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Wed, 18 Sep 2024 17:38:49 +0200 +Subject: ACPI: video: Add backlight=native quirk for Dell OptiPlex 5480 AIO + +From: Hans de Goede + +commit ac78288fe062b64e45a479eaae74aaaafcc8ecdd upstream. + +Dell All In One (AIO) models released after 2017 may use a backlight +controller board connected to an UART. + +In DSDT this uart port will be defined as: + + Name (_HID, "DELL0501") + Name (_CID, EisaId ("PNP0501") + +The Dell OptiPlex 5480 AIO has an ACPI device for one of its UARTs with +the above _HID + _CID. Loading the dell-uart-backlight driver fails with +the following errors: + +[ 18.261353] dell_uart_backlight serial0-0: Timed out waiting for response. +[ 18.261356] dell_uart_backlight serial0-0: error -ETIMEDOUT: getting firmware version +[ 18.261359] dell_uart_backlight serial0-0: probe with driver dell_uart_backlight failed with error -110 + +Indicating that there is no backlight controller board attached to +the UART, while the GPU's native backlight control method does work. + +Add a quirk to use the GPU's native backlight control method on this model. + +Fixes: cd8e468efb4f ("ACPI: video: Add Dell UART backlight controller detection") +Cc: All applicable +Signed-off-by: Hans de Goede +Link: https://patch.msgid.link/20240918153849.37221-1-hdegoede@redhat.com +[ rjw: Changelog edit ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/video_detect.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/acpi/video_detect.c ++++ b/drivers/acpi/video_detect.c +@@ -845,6 +845,15 @@ static const struct dmi_system_id video_ + * which need native backlight control nevertheless. + */ + { ++ /* https://github.com/zabbly/linux/issues/26 */ ++ .callback = video_detect_force_native, ++ /* Dell OptiPlex 5480 AIO */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 5480 AIO"), ++ }, ++ }, ++ { + /* https://bugzilla.redhat.com/show_bug.cgi?id=2303936 */ + .callback = video_detect_force_native, + /* Dell OptiPlex 7760 AIO */ diff --git a/queue-6.11/bluetooth-hci_event-align-br-edr-just_works-paring-with-le.patch b/queue-6.11/bluetooth-hci_event-align-br-edr-just_works-paring-with-le.patch new file mode 100644 index 00000000000..4cd38a0747d --- /dev/null +++ b/queue-6.11/bluetooth-hci_event-align-br-edr-just_works-paring-with-le.patch @@ -0,0 +1,52 @@ +From b25e11f978b63cb7857890edb3a698599cddb10e Mon Sep 17 00:00:00 2001 +From: Luiz Augusto von Dentz +Date: Thu, 12 Sep 2024 12:17:00 -0400 +Subject: Bluetooth: hci_event: Align BR/EDR JUST_WORKS paring with LE + +From: Luiz Augusto von Dentz + +commit b25e11f978b63cb7857890edb3a698599cddb10e upstream. + +This aligned BR/EDR JUST_WORKS method with LE which since 92516cd97fd4 +("Bluetooth: Always request for user confirmation for Just Works") +always request user confirmation with confirm_hint set since the +likes of bluetoothd have dedicated policy around JUST_WORKS method +(e.g. main.conf:JustWorksRepairing). + +CVE: CVE-2024-8805 +Cc: stable@vger.kernel.org +Fixes: ba15a58b179e ("Bluetooth: Fix SSP acceptor just-works confirmation without MITM") +Signed-off-by: Luiz Augusto von Dentz +Tested-by: Kiran K +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/hci_event.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -5324,19 +5324,16 @@ static void hci_user_confirm_request_evt + goto unlock; + } + +- /* If no side requires MITM protection; auto-accept */ ++ /* If no side requires MITM protection; use JUST_CFM method */ + if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) && + (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) { + +- /* If we're not the initiators request authorization to +- * proceed from user space (mgmt_user_confirm with +- * confirm_hint set to 1). The exception is if neither +- * side had MITM or if the local IO capability is +- * NoInputNoOutput, in which case we do auto-accept ++ /* If we're not the initiator of request authorization and the ++ * local IO capability is not NoInputNoOutput, use JUST_WORKS ++ * method (mgmt_user_confirm with confirm_hint set to 1). + */ + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && +- conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && +- (loc_mitm || rem_mitm)) { ++ conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) { + bt_dev_dbg(hdev, "Confirming auto-accept as acceptor"); + confirm_hint = 1; + goto confirm; diff --git a/queue-6.11/btrfs-drop-the-backref-cache-during-relocation-if-we-commit.patch b/queue-6.11/btrfs-drop-the-backref-cache-during-relocation-if-we-commit.patch new file mode 100644 index 00000000000..d6f13337724 --- /dev/null +++ b/queue-6.11/btrfs-drop-the-backref-cache-during-relocation-if-we-commit.patch @@ -0,0 +1,207 @@ +From db7e68b522c01eb666cfe1f31637775f18997811 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Tue, 24 Sep 2024 16:50:22 -0400 +Subject: btrfs: drop the backref cache during relocation if we commit + +From: Josef Bacik + +commit db7e68b522c01eb666cfe1f31637775f18997811 upstream. + +Since the inception of relocation we have maintained the backref cache +across transaction commits, updating the backref cache with the new +bytenr whenever we COWed blocks that were in the cache, and then +updating their bytenr once we detected a transaction id change. + +This works as long as we're only ever modifying blocks, not changing the +structure of the tree. + +However relocation does in fact change the structure of the tree. For +example, if we are relocating a data extent, we will look up all the +leaves that point to this data extent. We will then call +do_relocation() on each of these leaves, which will COW down to the leaf +and then update the file extent location. + +But, a key feature of do_relocation() is the pending list. This is all +the pending nodes that we modified when we updated the file extent item. +We will then process all of these blocks via finish_pending_nodes, which +calls do_relocation() on all of the nodes that led up to that leaf. + +The purpose of this is to make sure we don't break sharing unless we +absolutely have to. Consider the case that we have 3 snapshots that all +point to this leaf through the same nodes, the initial COW would have +created a whole new path. If we did this for all 3 snapshots we would +end up with 3x the number of nodes we had originally. To avoid this we +will cycle through each of the snapshots that point to each of these +nodes and update their pointers to point at the new nodes. + +Once we update the pointer to the new node we will drop the node we +removed the link for and all of its children via btrfs_drop_subtree(). +This is essentially just btrfs_drop_snapshot(), but for an arbitrary +point in the snapshot. + +The problem with this is that we will never reflect this in the backref +cache. If we do this btrfs_drop_snapshot() for a node that is in the +backref tree, we will leave the node in the backref tree. This becomes +a problem when we change the transid, as now the backref cache has +entire subtrees that no longer exist, but exist as if they still are +pointed to by the same roots. + +In the best case scenario you end up with "adding refs to an existing +tree ref" errors from insert_inline_extent_backref(), where we attempt +to link in nodes on roots that are no longer valid. + +Worst case you will double free some random block and re-use it when +there's still references to the block. + +This is extremely subtle, and the consequences are quite bad. There +isn't a way to make sure our backref cache is consistent between +transid's. + +In order to fix this we need to simply evict the entire backref cache +anytime we cross transid's. This reduces performance in that we have to +rebuild this backref cache every time we change transid's, but fixes the +bug. + +This has existed since relocation was added, and is a pretty critical +bug. There's a lot more cleanup that can be done now that this +functionality is going away, but this patch is as small as possible in +order to fix the problem and make it easy for us to backport it to all +the kernels it needs to be backported to. + +Followup series will dismantle more of this code and simplify relocation +drastically to remove this functionality. + +We have a reproducer that reproduced the corruption within a few minutes +of running. With this patch it survives several iterations/hours of +running the reproducer. + +Fixes: 3fd0a5585eb9 ("Btrfs: Metadata ENOSPC handling for balance") +CC: stable@vger.kernel.org +Reviewed-by: Boris Burkov +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/backref.c | 12 +++++--- + fs/btrfs/relocation.c | 75 ++------------------------------------------------ + 2 files changed, 11 insertions(+), 76 deletions(-) + +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -3179,10 +3179,14 @@ void btrfs_backref_release_cache(struct + btrfs_backref_cleanup_node(cache, node); + } + +- cache->last_trans = 0; +- +- for (i = 0; i < BTRFS_MAX_LEVEL; i++) +- ASSERT(list_empty(&cache->pending[i])); ++ for (i = 0; i < BTRFS_MAX_LEVEL; i++) { ++ while (!list_empty(&cache->pending[i])) { ++ node = list_first_entry(&cache->pending[i], ++ struct btrfs_backref_node, ++ list); ++ btrfs_backref_cleanup_node(cache, node); ++ } ++ } + ASSERT(list_empty(&cache->pending_edge)); + ASSERT(list_empty(&cache->useless_node)); + ASSERT(list_empty(&cache->changed)); +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -232,70 +232,6 @@ static struct btrfs_backref_node *walk_d + return NULL; + } + +-static void update_backref_node(struct btrfs_backref_cache *cache, +- struct btrfs_backref_node *node, u64 bytenr) +-{ +- struct rb_node *rb_node; +- rb_erase(&node->rb_node, &cache->rb_root); +- node->bytenr = bytenr; +- rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node); +- if (rb_node) +- btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST); +-} +- +-/* +- * update backref cache after a transaction commit +- */ +-static int update_backref_cache(struct btrfs_trans_handle *trans, +- struct btrfs_backref_cache *cache) +-{ +- struct btrfs_backref_node *node; +- int level = 0; +- +- if (cache->last_trans == 0) { +- cache->last_trans = trans->transid; +- return 0; +- } +- +- if (cache->last_trans == trans->transid) +- return 0; +- +- /* +- * detached nodes are used to avoid unnecessary backref +- * lookup. transaction commit changes the extent tree. +- * so the detached nodes are no longer useful. +- */ +- while (!list_empty(&cache->detached)) { +- node = list_entry(cache->detached.next, +- struct btrfs_backref_node, list); +- btrfs_backref_cleanup_node(cache, node); +- } +- +- while (!list_empty(&cache->changed)) { +- node = list_entry(cache->changed.next, +- struct btrfs_backref_node, list); +- list_del_init(&node->list); +- BUG_ON(node->pending); +- update_backref_node(cache, node, node->new_bytenr); +- } +- +- /* +- * some nodes can be left in the pending list if there were +- * errors during processing the pending nodes. +- */ +- for (level = 0; level < BTRFS_MAX_LEVEL; level++) { +- list_for_each_entry(node, &cache->pending[level], list) { +- BUG_ON(!node->pending); +- if (node->bytenr == node->new_bytenr) +- continue; +- update_backref_node(cache, node, node->new_bytenr); +- } +- } +- +- cache->last_trans = 0; +- return 1; +-} +- + static bool reloc_root_is_dead(const struct btrfs_root *root) + { + /* +@@ -551,9 +487,6 @@ static int clone_backref_node(struct btr + struct btrfs_backref_edge *new_edge; + struct rb_node *rb_node; + +- if (cache->last_trans > 0) +- update_backref_cache(trans, cache); +- + rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start); + if (rb_node) { + node = rb_entry(rb_node, struct btrfs_backref_node, rb_node); +@@ -3698,11 +3631,9 @@ static noinline_for_stack int relocate_b + break; + } + restart: +- if (update_backref_cache(trans, &rc->backref_cache)) { +- btrfs_end_transaction(trans); +- trans = NULL; +- continue; +- } ++ if (rc->backref_cache.last_trans != trans->transid) ++ btrfs_backref_release_cache(&rc->backref_cache); ++ rc->backref_cache.last_trans = trans->transid; + + ret = find_next_extent(rc, path, &key); + if (ret < 0) diff --git a/queue-6.11/btrfs-fix-a-null-pointer-dereference-when-failed-to-start-a-new-trasacntion.patch b/queue-6.11/btrfs-fix-a-null-pointer-dereference-when-failed-to-start-a-new-trasacntion.patch new file mode 100644 index 00000000000..bfb81a20346 --- /dev/null +++ b/queue-6.11/btrfs-fix-a-null-pointer-dereference-when-failed-to-start-a-new-trasacntion.patch @@ -0,0 +1,89 @@ +From c3b47f49e83197e8dffd023ec568403bcdbb774b Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Sat, 28 Sep 2024 08:05:58 +0930 +Subject: btrfs: fix a NULL pointer dereference when failed to start a new trasacntion + +From: Qu Wenruo + +commit c3b47f49e83197e8dffd023ec568403bcdbb774b upstream. + +[BUG] +Syzbot reported a NULL pointer dereference with the following crash: + + FAULT_INJECTION: forcing a failure. + start_transaction+0x830/0x1670 fs/btrfs/transaction.c:676 + prepare_to_relocate+0x31f/0x4c0 fs/btrfs/relocation.c:3642 + relocate_block_group+0x169/0xd20 fs/btrfs/relocation.c:3678 + ... + BTRFS info (device loop0): balance: ended with status: -12 + Oops: general protection fault, probably for non-canonical address 0xdffffc00000000cc: 0000 [#1] PREEMPT SMP KASAN NOPTI + KASAN: null-ptr-deref in range [0x0000000000000660-0x0000000000000667] + RIP: 0010:btrfs_update_reloc_root+0x362/0xa80 fs/btrfs/relocation.c:926 + Call Trace: + + commit_fs_roots+0x2ee/0x720 fs/btrfs/transaction.c:1496 + btrfs_commit_transaction+0xfaf/0x3740 fs/btrfs/transaction.c:2430 + del_balance_item fs/btrfs/volumes.c:3678 [inline] + reset_balance_state+0x25e/0x3c0 fs/btrfs/volumes.c:3742 + btrfs_balance+0xead/0x10c0 fs/btrfs/volumes.c:4574 + btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3673 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:907 [inline] + __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +[CAUSE] +The allocation failure happens at the start_transaction() inside +prepare_to_relocate(), and during the error handling we call +unset_reloc_control(), which makes fs_info->balance_ctl to be NULL. + +Then we continue the error path cleanup in btrfs_balance() by calling +reset_balance_state() which will call del_balance_item() to fully delete +the balance item in the root tree. + +However during the small window between set_reloc_contrl() and +unset_reloc_control(), we can have a subvolume tree update and created a +reloc_root for that subvolume. + +Then we go into the final btrfs_commit_transaction() of +del_balance_item(), and into btrfs_update_reloc_root() inside +commit_fs_roots(). + +That function checks if fs_info->reloc_ctl is in the merge_reloc_tree +stage, but since fs_info->reloc_ctl is NULL, it results a NULL pointer +dereference. + +[FIX] +Just add extra check on fs_info->reloc_ctl inside +btrfs_update_reloc_root(), before checking +fs_info->reloc_ctl->merge_reloc_tree. + +That DEAD_RELOC_TREE handling is to prevent further modification to the +reloc tree during merge stage, but since there is no reloc_ctl at all, +we do not need to bother that. + +Reported-by: syzbot+283673dbc38527ef9f3d@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/66f6bfa7.050a0220.38ace9.0019.GAE@google.com/ +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Josef Bacik +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/relocation.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -923,7 +923,7 @@ int btrfs_update_reloc_root(struct btrfs + btrfs_grab_root(reloc_root); + + /* root->reloc_root will stay until current relocation finished */ +- if (fs_info->reloc_ctl->merge_reloc_tree && ++ if (fs_info->reloc_ctl && fs_info->reloc_ctl->merge_reloc_tree && + btrfs_root_refs(root_item) == 0) { + set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); + /* diff --git a/queue-6.11/btrfs-send-fix-buffer-overflow-detection-when-copying-path-to-cache-entry.patch b/queue-6.11/btrfs-send-fix-buffer-overflow-detection-when-copying-path-to-cache-entry.patch new file mode 100644 index 00000000000..20d3ad5c2a7 --- /dev/null +++ b/queue-6.11/btrfs-send-fix-buffer-overflow-detection-when-copying-path-to-cache-entry.patch @@ -0,0 +1,142 @@ +From 96c6ca71572a3556ed0c37237305657ff47174b7 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 19 Sep 2024 22:20:34 +0100 +Subject: btrfs: send: fix buffer overflow detection when copying path to cache entry + +From: Filipe Manana + +commit 96c6ca71572a3556ed0c37237305657ff47174b7 upstream. + +Starting with commit c0247d289e73 ("btrfs: send: annotate struct +name_cache_entry with __counted_by()") we annotated the variable length +array "name" from the name_cache_entry structure with __counted_by() to +improve overflow detection. However that alone was not correct, because +the length of that array does not match the "name_len" field - it matches +that plus 1 to include the NUL string terminator, so that makes a +fortified kernel think there's an overflow and report a splat like this: + + strcpy: detected buffer overflow: 20 byte write of buffer size 19 + WARNING: CPU: 3 PID: 3310 at __fortify_report+0x45/0x50 + CPU: 3 UID: 0 PID: 3310 Comm: btrfs Not tainted 6.11.0-prnet #1 + Hardware name: CompuLab Ltd. sbc-ihsw/Intense-PC2 (IPC2), BIOS IPC2_3.330.7 X64 03/15/2018 + RIP: 0010:__fortify_report+0x45/0x50 + Code: 48 8b 34 (...) + RSP: 0018:ffff97ebc0d6f650 EFLAGS: 00010246 + RAX: 7749924ef60fa600 RBX: ffff8bf5446a521a RCX: 0000000000000027 + RDX: 00000000ffffdfff RSI: ffff97ebc0d6f548 RDI: ffff8bf84e7a1cc8 + RBP: ffff8bf548574080 R08: ffffffffa8c40e10 R09: 0000000000005ffd + R10: 0000000000000004 R11: ffffffffa8c70e10 R12: ffff8bf551eef400 + R13: 0000000000000000 R14: 0000000000000013 R15: 00000000000003a8 + FS: 00007fae144de8c0(0000) GS:ffff8bf84e780000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fae14691690 CR3: 00000001027a2003 CR4: 00000000001706f0 + Call Trace: + + ? __warn+0x12a/0x1d0 + ? __fortify_report+0x45/0x50 + ? report_bug+0x154/0x1c0 + ? handle_bug+0x42/0x70 + ? exc_invalid_op+0x1a/0x50 + ? asm_exc_invalid_op+0x1a/0x20 + ? __fortify_report+0x45/0x50 + __fortify_panic+0x9/0x10 + __get_cur_name_and_parent+0x3bc/0x3c0 + get_cur_path+0x207/0x3b0 + send_extent_data+0x709/0x10d0 + ? find_parent_nodes+0x22df/0x25d0 + ? mas_nomem+0x13/0x90 + ? mtree_insert_range+0xa5/0x110 + ? btrfs_lru_cache_store+0x5f/0x1e0 + ? iterate_extent_inodes+0x52d/0x5a0 + process_extent+0xa96/0x11a0 + ? __pfx_lookup_backref_cache+0x10/0x10 + ? __pfx_store_backref_cache+0x10/0x10 + ? __pfx_iterate_backrefs+0x10/0x10 + ? __pfx_check_extent_item+0x10/0x10 + changed_cb+0x6fa/0x930 + ? tree_advance+0x362/0x390 + ? memcmp_extent_buffer+0xd7/0x160 + send_subvol+0xf0a/0x1520 + btrfs_ioctl_send+0x106b/0x11d0 + ? __pfx___clone_root_cmp_sort+0x10/0x10 + _btrfs_ioctl_send+0x1ac/0x240 + btrfs_ioctl+0x75b/0x850 + __se_sys_ioctl+0xca/0x150 + do_syscall_64+0x85/0x160 + ? __count_memcg_events+0x69/0x100 + ? handle_mm_fault+0x1327/0x15c0 + ? __se_sys_rt_sigprocmask+0xf1/0x180 + ? syscall_exit_to_user_mode+0x75/0xa0 + ? do_syscall_64+0x91/0x160 + ? do_user_addr_fault+0x21d/0x630 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + RIP: 0033:0x7fae145eeb4f + Code: 00 48 89 (...) + RSP: 002b:00007ffdf1cb09b0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 + RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fae145eeb4f + RDX: 00007ffdf1cb0ad0 RSI: 0000000040489426 RDI: 0000000000000004 + RBP: 00000000000078fe R08: 00007fae144006c0 R09: 00007ffdf1cb0927 + R10: 0000000000000008 R11: 0000000000000246 R12: 00007ffdf1cb1ce8 + R13: 0000000000000003 R14: 000055c499fab2e0 R15: 0000000000000004 + + +Fix this by not storing the NUL string terminator since we don't actually +need it for name cache entries, this way "name_len" corresponds to the +actual size of the "name" array. This requires marking the "name" array +field with __nonstring and using memcpy() instead of strcpy() as +recommended by the guidelines at: + + https://github.com/KSPP/linux/issues/90 + +Reported-by: David Arendt +Link: https://lore.kernel.org/linux-btrfs/cee4591a-3088-49ba-99b8-d86b4242b8bd@prnet.org/ +Fixes: c0247d289e73 ("btrfs: send: annotate struct name_cache_entry with __counted_by()") +CC: stable@vger.kernel.org # 6.11 +Tested-by: David Arendt +Reviewed-by: Josef Bacik +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/send.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c +index 7f48ba6c1c77..5871ca845b0e 100644 +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -346,8 +346,10 @@ struct name_cache_entry { + u64 parent_gen; + int ret; + int need_later_update; ++ /* Name length without NUL terminator. */ + int name_len; +- char name[] __counted_by(name_len); ++ /* Not NUL terminated. */ ++ char name[] __counted_by(name_len) __nonstring; + }; + + /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ +@@ -2388,7 +2390,7 @@ out_cache: + /* + * Store the result of the lookup in the name cache. + */ +- nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); ++ nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL); + if (!nce) { + ret = -ENOMEM; + goto out; +@@ -2400,7 +2402,7 @@ out_cache: + nce->parent_gen = *parent_gen; + nce->name_len = fs_path_len(dest); + nce->ret = ret; +- strcpy(nce->name, dest->start); ++ memcpy(nce->name, dest->start, nce->name_len); + + if (ino < sctx->send_progress) + nce->need_later_update = 0; +-- +2.46.2 + diff --git a/queue-6.11/btrfs-send-fix-invalid-clone-operation-for-file-that-got-its-size-decreased.patch b/queue-6.11/btrfs-send-fix-invalid-clone-operation-for-file-that-got-its-size-decreased.patch new file mode 100644 index 00000000000..cdbbdf69177 --- /dev/null +++ b/queue-6.11/btrfs-send-fix-invalid-clone-operation-for-file-that-got-its-size-decreased.patch @@ -0,0 +1,128 @@ +From fa630df665aa9ddce3a96ce7b54e10a38e4d2a2b Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 27 Sep 2024 10:50:12 +0100 +Subject: btrfs: send: fix invalid clone operation for file that got its size decreased + +From: Filipe Manana + +commit fa630df665aa9ddce3a96ce7b54e10a38e4d2a2b upstream. + +During an incremental send we may end up sending an invalid clone +operation, for the last extent of a file which ends at an unaligned offset +that matches the final i_size of the file in the send snapshot, in case +the file had its initial size (the size in the parent snapshot) decreased +in the send snapshot. In this case the destination will fail to apply the +clone operation because its end offset is not sector size aligned and it +ends before the current size of the file. + +Sending the truncate operation always happens when we finish processing an +inode, after we process all its extents (and xattrs, names, etc). So fix +this by ensuring the file has a valid size before we send a clone +operation for an unaligned extent that ends at the final i_size of the +file. The size we truncate to matches the start offset of the clone range +but it could be any value between that start offset and the final size of +the file since the clone operation will expand the i_size if the current +size is smaller than the end offset. The start offset of the range was +chosen because it's always sector size aligned and avoids a truncation +into the middle of a page, which results in dirtying the page due to +filling part of it with zeroes and then making the clone operation at the +receiver trigger IO. + +The following test reproduces the issue: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdi + MNT=/mnt/sdi + + mkfs.btrfs -f $DEV + mount $DEV $MNT + + # Create a file with a size of 256K + 5 bytes, having two extents, one + # with a size of 128K and another one with a size of 128K + 5 bytes. + last_ext_size=$((128 * 1024 + 5)) + xfs_io -f -d -c "pwrite -S 0xab -b 128K 0 128K" \ + -c "pwrite -S 0xcd -b $last_ext_size 128K $last_ext_size" \ + $MNT/foo + + # Another file which we will later clone foo into, but initially with + # a larger size than foo. + xfs_io -f -c "pwrite -S 0xef 0 1M" $MNT/bar + + btrfs subvolume snapshot -r $MNT/ $MNT/snap1 + + # Now resize bar and clone foo into it. + xfs_io -c "truncate 0" \ + -c "reflink $MNT/foo" $MNT/bar + + btrfs subvolume snapshot -r $MNT/ $MNT/snap2 + + rm -f /tmp/send-full /tmp/send-inc + btrfs send -f /tmp/send-full $MNT/snap1 + btrfs send -p $MNT/snap1 -f /tmp/send-inc $MNT/snap2 + + umount $MNT + mkfs.btrfs -f $DEV + mount $DEV $MNT + + btrfs receive -f /tmp/send-full $MNT + btrfs receive -f /tmp/send-inc $MNT + + umount $MNT + +Running it before this patch: + + $ ./test.sh + (...) + At subvol snap1 + At snapshot snap2 + ERROR: failed to clone extents to bar: Invalid argument + +A test case for fstests will be sent soon. + +Reported-by: Ben Millwood +Link: https://lore.kernel.org/linux-btrfs/CAJhrHS2z+WViO2h=ojYvBPDLsATwLbg+7JaNCyYomv0fUxEpQQ@mail.gmail.com/ +Fixes: 46a6e10a1ab1 ("btrfs: send: allow cloning non-aligned extent if it ends at i_size") +CC: stable@vger.kernel.org # 6.11 +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/send.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -6189,8 +6189,29 @@ static int send_write_or_clone(struct se + if (ret < 0) + return ret; + +- if (clone_root->offset + num_bytes == info.size) ++ if (clone_root->offset + num_bytes == info.size) { ++ /* ++ * The final size of our file matches the end offset, but it may ++ * be that its current size is larger, so we have to truncate it ++ * to any value between the start offset of the range and the ++ * final i_size, otherwise the clone operation is invalid ++ * because it's unaligned and it ends before the current EOF. ++ * We do this truncate to the final i_size when we finish ++ * processing the inode, but it's too late by then. And here we ++ * truncate to the start offset of the range because it's always ++ * sector size aligned while if it were the final i_size it ++ * would result in dirtying part of a page, filling part of a ++ * page with zeroes and then having the clone operation at the ++ * receiver trigger IO and wait for it due to the dirty page. ++ */ ++ if (sctx->parent_root != NULL) { ++ ret = send_truncate(sctx, sctx->cur_ino, ++ sctx->cur_inode_gen, offset); ++ if (ret < 0) ++ return ret; ++ } + goto clone_data; ++ } + + write_data: + ret = send_extent_data(sctx, path, offset, num_bytes); diff --git a/queue-6.11/btrfs-wait-for-fixup-workers-before-stopping-cleaner-kthread-during-umount.patch b/queue-6.11/btrfs-wait-for-fixup-workers-before-stopping-cleaner-kthread-during-umount.patch new file mode 100644 index 00000000000..d364ccc9cf3 --- /dev/null +++ b/queue-6.11/btrfs-wait-for-fixup-workers-before-stopping-cleaner-kthread-during-umount.patch @@ -0,0 +1,227 @@ +From 41fd1e94066a815a7ab0a7025359e9b40e4b3576 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 1 Oct 2024 11:06:52 +0100 +Subject: btrfs: wait for fixup workers before stopping cleaner kthread during umount + +From: Filipe Manana + +commit 41fd1e94066a815a7ab0a7025359e9b40e4b3576 upstream. + +During unmount, at close_ctree(), we have the following steps in this order: + +1) Park the cleaner kthread - this doesn't destroy the kthread, it basically + halts its execution (wake ups against it work but do nothing); + +2) We stop the cleaner kthread - this results in freeing the respective + struct task_struct; + +3) We call btrfs_stop_all_workers() which waits for any jobs running in all + the work queues and then free the work queues. + +Syzbot reported a case where a fixup worker resulted in a crash when doing +a delayed iput on its inode while attempting to wake up the cleaner at +btrfs_add_delayed_iput(), because the task_struct of the cleaner kthread +was already freed. This can happen during unmount because we don't wait +for any fixup workers still running before we call kthread_stop() against +the cleaner kthread, which stops and free all its resources. + +Fix this by waiting for any fixup workers at close_ctree() before we call +kthread_stop() against the cleaner and run pending delayed iputs. + +The stack traces reported by syzbot were the following: + + BUG: KASAN: slab-use-after-free in __lock_acquire+0x77/0x2050 kernel/locking/lockdep.c:5065 + Read of size 8 at addr ffff8880272a8a18 by task kworker/u8:3/52 + + CPU: 1 UID: 0 PID: 52 Comm: kworker/u8:3 Not tainted 6.12.0-rc1-syzkaller #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 + Workqueue: btrfs-fixup btrfs_work_helper + Call Trace: + + __dump_stack lib/dump_stack.c:94 [inline] + dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 + print_address_description mm/kasan/report.c:377 [inline] + print_report+0x169/0x550 mm/kasan/report.c:488 + kasan_report+0x143/0x180 mm/kasan/report.c:601 + __lock_acquire+0x77/0x2050 kernel/locking/lockdep.c:5065 + lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5825 + __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] + _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 + class_raw_spinlock_irqsave_constructor include/linux/spinlock.h:551 [inline] + try_to_wake_up+0xb0/0x1480 kernel/sched/core.c:4154 + btrfs_writepage_fixup_worker+0xc16/0xdf0 fs/btrfs/inode.c:2842 + btrfs_work_helper+0x390/0xc50 fs/btrfs/async-thread.c:314 + process_one_work kernel/workqueue.c:3229 [inline] + process_scheduled_works+0xa63/0x1850 kernel/workqueue.c:3310 + worker_thread+0x870/0xd30 kernel/workqueue.c:3391 + kthread+0x2f0/0x390 kernel/kthread.c:389 + ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + + + Allocated by task 2: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 + unpoison_slab_object mm/kasan/common.c:319 [inline] + __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:345 + kasan_slab_alloc include/linux/kasan.h:247 [inline] + slab_post_alloc_hook mm/slub.c:4086 [inline] + slab_alloc_node mm/slub.c:4135 [inline] + kmem_cache_alloc_node_noprof+0x16b/0x320 mm/slub.c:4187 + alloc_task_struct_node kernel/fork.c:180 [inline] + dup_task_struct+0x57/0x8c0 kernel/fork.c:1107 + copy_process+0x5d1/0x3d50 kernel/fork.c:2206 + kernel_clone+0x223/0x880 kernel/fork.c:2787 + kernel_thread+0x1bc/0x240 kernel/fork.c:2849 + create_kthread kernel/kthread.c:412 [inline] + kthreadd+0x60d/0x810 kernel/kthread.c:765 + ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + + Freed by task 61: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 + kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:579 + poison_slab_object mm/kasan/common.c:247 [inline] + __kasan_slab_free+0x59/0x70 mm/kasan/common.c:264 + kasan_slab_free include/linux/kasan.h:230 [inline] + slab_free_hook mm/slub.c:2343 [inline] + slab_free mm/slub.c:4580 [inline] + kmem_cache_free+0x1a2/0x420 mm/slub.c:4682 + put_task_struct include/linux/sched/task.h:144 [inline] + delayed_put_task_struct+0x125/0x300 kernel/exit.c:228 + rcu_do_batch kernel/rcu/tree.c:2567 [inline] + rcu_core+0xaaa/0x17a0 kernel/rcu/tree.c:2823 + handle_softirqs+0x2c5/0x980 kernel/softirq.c:554 + __do_softirq kernel/softirq.c:588 [inline] + invoke_softirq kernel/softirq.c:428 [inline] + __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:649 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1037 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1037 + asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 + + Last potentially related work creation: + kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 + __kasan_record_aux_stack+0xac/0xc0 mm/kasan/generic.c:541 + __call_rcu_common kernel/rcu/tree.c:3086 [inline] + call_rcu+0x167/0xa70 kernel/rcu/tree.c:3190 + context_switch kernel/sched/core.c:5318 [inline] + __schedule+0x184b/0x4ae0 kernel/sched/core.c:6675 + schedule_idle+0x56/0x90 kernel/sched/core.c:6793 + do_idle+0x56a/0x5d0 kernel/sched/idle.c:354 + cpu_startup_entry+0x42/0x60 kernel/sched/idle.c:424 + start_secondary+0x102/0x110 arch/x86/kernel/smpboot.c:314 + common_startup_64+0x13e/0x147 + + The buggy address belongs to the object at ffff8880272a8000 + which belongs to the cache task_struct of size 7424 + The buggy address is located 2584 bytes inside of + freed 7424-byte region [ffff8880272a8000, ffff8880272a9d00) + + The buggy address belongs to the physical page: + page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x272a8 + head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 + flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff) + page_type: f5(slab) + raw: 00fff00000000040 ffff88801bafa500 dead000000000122 0000000000000000 + raw: 0000000000000000 0000000080040004 00000001f5000000 0000000000000000 + head: 00fff00000000040 ffff88801bafa500 dead000000000122 0000000000000000 + head: 0000000000000000 0000000080040004 00000001f5000000 0000000000000000 + head: 00fff00000000003 ffffea00009caa01 ffffffffffffffff 0000000000000000 + head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 + page dumped because: kasan: bad access detected + page_owner tracks the page as allocated + page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 2, tgid 2 (kthreadd), ts 71247381401, free_ts 71214998153 + set_page_owner include/linux/page_owner.h:32 [inline] + post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1537 + prep_new_page mm/page_alloc.c:1545 [inline] + get_page_from_freelist+0x3039/0x3180 mm/page_alloc.c:3457 + __alloc_pages_noprof+0x256/0x6c0 mm/page_alloc.c:4733 + alloc_pages_mpol_noprof+0x3e8/0x680 mm/mempolicy.c:2265 + alloc_slab_page+0x6a/0x120 mm/slub.c:2413 + allocate_slab+0x5a/0x2f0 mm/slub.c:2579 + new_slab mm/slub.c:2632 [inline] + ___slab_alloc+0xcd1/0x14b0 mm/slub.c:3819 + __slab_alloc+0x58/0xa0 mm/slub.c:3909 + __slab_alloc_node mm/slub.c:3962 [inline] + slab_alloc_node mm/slub.c:4123 [inline] + kmem_cache_alloc_node_noprof+0x1fe/0x320 mm/slub.c:4187 + alloc_task_struct_node kernel/fork.c:180 [inline] + dup_task_struct+0x57/0x8c0 kernel/fork.c:1107 + copy_process+0x5d1/0x3d50 kernel/fork.c:2206 + kernel_clone+0x223/0x880 kernel/fork.c:2787 + kernel_thread+0x1bc/0x240 kernel/fork.c:2849 + create_kthread kernel/kthread.c:412 [inline] + kthreadd+0x60d/0x810 kernel/kthread.c:765 + ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + page last free pid 5230 tgid 5230 stack trace: + reset_page_owner include/linux/page_owner.h:25 [inline] + free_pages_prepare mm/page_alloc.c:1108 [inline] + free_unref_page+0xcd0/0xf00 mm/page_alloc.c:2638 + discard_slab mm/slub.c:2678 [inline] + __put_partials+0xeb/0x130 mm/slub.c:3146 + put_cpu_partial+0x17c/0x250 mm/slub.c:3221 + __slab_free+0x2ea/0x3d0 mm/slub.c:4450 + qlink_free mm/kasan/quarantine.c:163 [inline] + qlist_free_all+0x9a/0x140 mm/kasan/quarantine.c:179 + kasan_quarantine_reduce+0x14f/0x170 mm/kasan/quarantine.c:286 + __kasan_slab_alloc+0x23/0x80 mm/kasan/common.c:329 + kasan_slab_alloc include/linux/kasan.h:247 [inline] + slab_post_alloc_hook mm/slub.c:4086 [inline] + slab_alloc_node mm/slub.c:4135 [inline] + kmem_cache_alloc_noprof+0x135/0x2a0 mm/slub.c:4142 + getname_flags+0xb7/0x540 fs/namei.c:139 + do_sys_openat2+0xd2/0x1d0 fs/open.c:1409 + do_sys_open fs/open.c:1430 [inline] + __do_sys_openat fs/open.c:1446 [inline] + __se_sys_openat fs/open.c:1441 [inline] + __x64_sys_openat+0x247/0x2a0 fs/open.c:1441 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + + Memory state around the buggy address: + ffff8880272a8900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8880272a8980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + >ffff8880272a8a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff8880272a8a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8880272a8b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ================================================================== + +Reported-by: syzbot+8aaf2df2ef0164ffe1fb@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/66fb36b1.050a0220.aab67.003b.GAE@google.com/ +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Qu Wenruo +Reviewed-by: Johannes Thumshirn +Reviewed-by: David Sterba +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -4266,6 +4266,17 @@ void __cold close_ctree(struct btrfs_fs_ + btrfs_cleanup_defrag_inodes(fs_info); + + /* ++ * Wait for any fixup workers to complete. ++ * If we don't wait for them here and they are still running by the time ++ * we call kthread_stop() against the cleaner kthread further below, we ++ * get an use-after-free on the cleaner because the fixup worker adds an ++ * inode to the list of delayed iputs and then attempts to wakeup the ++ * cleaner kthread, which was already stopped and destroyed. We parked ++ * already the cleaner, but below we run all pending delayed iputs. ++ */ ++ btrfs_flush_workqueue(fs_info->fixup_workers); ++ ++ /* + * After we parked the cleaner kthread, ordered extents may have + * completed and created new delayed iputs. If one of the async reclaim + * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we diff --git a/queue-6.11/ceph-fix-cap-ref-leak-via-netfs-init_request.patch b/queue-6.11/ceph-fix-cap-ref-leak-via-netfs-init_request.patch new file mode 100644 index 00000000000..e1fddb6b89d --- /dev/null +++ b/queue-6.11/ceph-fix-cap-ref-leak-via-netfs-init_request.patch @@ -0,0 +1,58 @@ +From ccda9910d8490f4fb067131598e4b2e986faa5a0 Mon Sep 17 00:00:00 2001 +From: Patrick Donnelly +Date: Wed, 2 Oct 2024 21:05:12 -0400 +Subject: ceph: fix cap ref leak via netfs init_request + +From: Patrick Donnelly + +commit ccda9910d8490f4fb067131598e4b2e986faa5a0 upstream. + +Log recovered from a user's cluster: + + <7>[ 5413.970692] ceph: get_cap_refs 00000000958c114b ret 1 got Fr + <7>[ 5413.970695] ceph: start_read 00000000958c114b, no cache cap + ... + <7>[ 5473.934609] ceph: my wanted = Fr, used = Fr, dirty - + <7>[ 5473.934616] ceph: revocation: pAsLsXsFr -> pAsLsXs (revoking Fr) + <7>[ 5473.934632] ceph: __ceph_caps_issued 00000000958c114b cap 00000000f7784259 issued pAsLsXs + <7>[ 5473.934638] ceph: check_caps 10000000e68.fffffffffffffffe file_want - used Fr dirty - flushing - issued pAsLsXs revoking Fr retain pAsLsXsFsr AUTHONLY NOINVAL FLUSH_FORCE + +The MDS subsequently complains that the kernel client is late releasing +caps. + +Approximately, a series of changes to this code by commits 49870056005c +("ceph: convert ceph_readpages to ceph_readahead"), 2de160417315 +("netfs: Change ->init_request() to return an error code") and +a5c9dc445139 ("ceph: Make ceph_init_request() check caps on readahead") +resulted in subtle resource cleanup to be missed. The main culprit is +the change in error handling in 2de160417315 which meant that a failure +in init_request() would no longer cause cleanup to be called. That +would prevent the ceph_put_cap_refs() call which would cleanup the +leaked cap ref. + +Cc: stable@vger.kernel.org +Fixes: a5c9dc445139 ("ceph: Make ceph_init_request() check caps on readahead") +Link: https://tracker.ceph.com/issues/67008 +Signed-off-by: Patrick Donnelly +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/addr.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/ceph/addr.c ++++ b/fs/ceph/addr.c +@@ -473,8 +473,11 @@ static int ceph_init_request(struct netf + rreq->netfs_priv = priv; + + out: +- if (ret < 0) ++ if (ret < 0) { ++ if (got) ++ ceph_put_cap_refs(ceph_inode(inode), got); + kfree(priv); ++ } + + return ret; + } diff --git a/queue-6.11/cpufreq-avoid-a-bad-reference-count-on-cpu-node.patch b/queue-6.11/cpufreq-avoid-a-bad-reference-count-on-cpu-node.patch new file mode 100644 index 00000000000..e8b97953906 --- /dev/null +++ b/queue-6.11/cpufreq-avoid-a-bad-reference-count-on-cpu-node.patch @@ -0,0 +1,54 @@ +From c0f02536fffbbec71aced36d52a765f8c4493dc2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= +Date: Tue, 17 Sep 2024 15:42:46 +0200 +Subject: cpufreq: Avoid a bad reference count on CPU node +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Miquel Sabaté Solà + +commit c0f02536fffbbec71aced36d52a765f8c4493dc2 upstream. + +In the parse_perf_domain function, if the call to +of_parse_phandle_with_args returns an error, then the reference to the +CPU device node that was acquired at the start of the function would not +be properly decremented. + +Address this by declaring the variable with the __free(device_node) +cleanup attribute. + +Signed-off-by: Miquel Sabaté Solà +Acked-by: Viresh Kumar +Link: https://patch.msgid.link/20240917134246.584026-1-mikisabate@gmail.com +Cc: All applicable +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/cpufreq.h | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/include/linux/cpufreq.h ++++ b/include/linux/cpufreq.h +@@ -1113,10 +1113,9 @@ static inline int parse_perf_domain(int + const char *cell_name, + struct of_phandle_args *args) + { +- struct device_node *cpu_np; + int ret; + +- cpu_np = of_cpu_device_node_get(cpu); ++ struct device_node *cpu_np __free(device_node) = of_cpu_device_node_get(cpu); + if (!cpu_np) + return -ENODEV; + +@@ -1124,9 +1123,6 @@ static inline int parse_perf_domain(int + args); + if (ret < 0) + return ret; +- +- of_node_put(cpu_np); +- + return 0; + } + diff --git a/queue-6.11/cpufreq-intel_pstate-make-hwp_notify_lock-a-raw-spinlock.patch b/queue-6.11/cpufreq-intel_pstate-make-hwp_notify_lock-a-raw-spinlock.patch new file mode 100644 index 00000000000..c9df5660205 --- /dev/null +++ b/queue-6.11/cpufreq-intel_pstate-make-hwp_notify_lock-a-raw-spinlock.patch @@ -0,0 +1,92 @@ +From 8b4865cd904650cbed7f2407e653934c621b8127 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= +Date: Thu, 19 Sep 2024 10:11:21 +0200 +Subject: cpufreq: intel_pstate: Make hwp_notify_lock a raw spinlock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Uwe Kleine-König + +commit 8b4865cd904650cbed7f2407e653934c621b8127 upstream. + +notify_hwp_interrupt() is called via sysvec_thermal() -> +smp_thermal_vector() -> intel_thermal_interrupt() in hard irq context. +For this reason it must not use a simple spin_lock that sleeps with +PREEMPT_RT enabled. So convert it to a raw spinlock. + +Reported-by: xiao sheng wen +Link: https://bugs.debian.org/1076483 +Signed-off-by: Uwe Kleine-König +Acked-by: Srinivas Pandruvada +Acked-by: Sebastian Andrzej Siewior +Tested-by: xiao sheng wen +Link: https://patch.msgid.link/20240919081121.10784-2-ukleinek@debian.org +Cc: All applicable +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/intel_pstate.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -1623,7 +1623,7 @@ static void intel_pstate_notify_work(str + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); + } + +-static DEFINE_SPINLOCK(hwp_notify_lock); ++static DEFINE_RAW_SPINLOCK(hwp_notify_lock); + static cpumask_t hwp_intr_enable_mask; + + #define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0) +@@ -1646,7 +1646,7 @@ void notify_hwp_interrupt(void) + if (!(value & status_mask)) + return; + +- spin_lock_irqsave(&hwp_notify_lock, flags); ++ raw_spin_lock_irqsave(&hwp_notify_lock, flags); + + if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask)) + goto ack_intr; +@@ -1654,13 +1654,13 @@ void notify_hwp_interrupt(void) + schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work, + msecs_to_jiffies(10)); + +- spin_unlock_irqrestore(&hwp_notify_lock, flags); ++ raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); + + return; + + ack_intr: + wrmsrl_safe(MSR_HWP_STATUS, 0); +- spin_unlock_irqrestore(&hwp_notify_lock, flags); ++ raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); + } + + static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) +@@ -1673,9 +1673,9 @@ static void intel_pstate_disable_hwp_int + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + +- spin_lock_irq(&hwp_notify_lock); ++ raw_spin_lock_irq(&hwp_notify_lock); + cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask); +- spin_unlock_irq(&hwp_notify_lock); ++ raw_spin_unlock_irq(&hwp_notify_lock); + + if (cancel_work) + cancel_delayed_work_sync(&cpudata->hwp_notify_work); +@@ -1690,10 +1690,10 @@ static void intel_pstate_enable_hwp_inte + if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ; + +- spin_lock_irq(&hwp_notify_lock); ++ raw_spin_lock_irq(&hwp_notify_lock); + INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); + cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); +- spin_unlock_irq(&hwp_notify_lock); ++ raw_spin_unlock_irq(&hwp_notify_lock); + + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; diff --git a/queue-6.11/firmware-sysfb-disable-sysfb-for-firmware-buffers-with-unknown-parent.patch b/queue-6.11/firmware-sysfb-disable-sysfb-for-firmware-buffers-with-unknown-parent.patch new file mode 100644 index 00000000000..5f1e9e081c6 --- /dev/null +++ b/queue-6.11/firmware-sysfb-disable-sysfb-for-firmware-buffers-with-unknown-parent.patch @@ -0,0 +1,54 @@ +From ad604f0a4c040dcb8faf44dc72db25e457c28076 Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Tue, 24 Sep 2024 10:41:03 +0200 +Subject: firmware/sysfb: Disable sysfb for firmware buffers with unknown parent + +From: Thomas Zimmermann + +commit ad604f0a4c040dcb8faf44dc72db25e457c28076 upstream. + +The sysfb framebuffer handling only operates on graphics devices +that provide the system's firmware framebuffer. If that device is +not known, assume that any graphics device has been initialized by +firmware. + +Fixes a problem on i915 where sysfb does not release the firmware +framebuffer after the native graphics driver loaded. + +Reported-by: Borah, Chaitanya Kumar +Closes: https://lore.kernel.org/dri-devel/SJ1PR11MB6129EFB8CE63D1EF6D932F94B96F2@SJ1PR11MB6129.namprd11.prod.outlook.com/ +Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12160 +Signed-off-by: Thomas Zimmermann +Fixes: b49420d6a1ae ("video/aperture: optionally match the device in sysfb_disable()") +Cc: Javier Martinez Canillas +Cc: Thomas Zimmermann +Cc: Helge Deller +Cc: Sam Ravnborg +Cc: Daniel Vetter +Cc: Alex Deucher +Cc: dri-devel@lists.freedesktop.org +Cc: Linux regression tracking (Thorsten Leemhuis) +Cc: # v6.11+ +Acked-by: Alex Deucher +Reviewed-by: Javier Martinez Canillas +Link: https://patchwork.freedesktop.org/patch/msgid/20240924084227.262271-1-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/sysfb.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/firmware/sysfb.c ++++ b/drivers/firmware/sysfb.c +@@ -67,9 +67,11 @@ static bool sysfb_unregister(void) + void sysfb_disable(struct device *dev) + { + struct screen_info *si = &screen_info; ++ struct device *parent; + + mutex_lock(&disable_lock); +- if (!dev || dev == sysfb_parent_dev(si)) { ++ parent = sysfb_parent_dev(si); ++ if (!dev || !parent || dev == parent) { + sysfb_unregister(); + disabled = true; + } diff --git a/queue-6.11/gpio-davinci-fix-lazy-disable.patch b/queue-6.11/gpio-davinci-fix-lazy-disable.patch new file mode 100644 index 00000000000..24ebde3fd5b --- /dev/null +++ b/queue-6.11/gpio-davinci-fix-lazy-disable.patch @@ -0,0 +1,61 @@ +From 3360d41f4ac490282fddc3ccc0b58679aa5c065d Mon Sep 17 00:00:00 2001 +From: Emanuele Ghidoli +Date: Wed, 28 Aug 2024 15:32:07 +0200 +Subject: gpio: davinci: fix lazy disable + +From: Emanuele Ghidoli + +commit 3360d41f4ac490282fddc3ccc0b58679aa5c065d upstream. + +On a few platforms such as TI's AM69 device, disable_irq() fails to keep +track of the interrupts that happen between disable_irq() and +enable_irq() and those interrupts are missed. Use the ->irq_unmask() and +->irq_mask() methods instead of ->irq_enable() and ->irq_disable() to +correctly keep track of edges when disable_irq is called. + +This solves the issue of disable_irq() not working as expected on such +platforms. + +Fixes: 23265442b02b ("ARM: davinci: irq_data conversion.") +Signed-off-by: Emanuele Ghidoli +Signed-off-by: Parth Pancholi +Acked-by: Keerthy +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240828133207.493961-1-parth105105@gmail.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpio/gpio-davinci.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/gpio/gpio-davinci.c ++++ b/drivers/gpio/gpio-davinci.c +@@ -289,7 +289,7 @@ static int davinci_gpio_probe(struct pla + * serve as EDMA event triggers. + */ + +-static void gpio_irq_disable(struct irq_data *d) ++static void gpio_irq_mask(struct irq_data *d) + { + struct davinci_gpio_regs __iomem *g = irq2regs(d); + uintptr_t mask = (uintptr_t)irq_data_get_irq_handler_data(d); +@@ -298,7 +298,7 @@ static void gpio_irq_disable(struct irq_ + writel_relaxed(mask, &g->clr_rising); + } + +-static void gpio_irq_enable(struct irq_data *d) ++static void gpio_irq_unmask(struct irq_data *d) + { + struct davinci_gpio_regs __iomem *g = irq2regs(d); + uintptr_t mask = (uintptr_t)irq_data_get_irq_handler_data(d); +@@ -324,8 +324,8 @@ static int gpio_irq_type(struct irq_data + + static struct irq_chip gpio_irqchip = { + .name = "GPIO", +- .irq_enable = gpio_irq_enable, +- .irq_disable = gpio_irq_disable, ++ .irq_unmask = gpio_irq_unmask, ++ .irq_mask = gpio_irq_mask, + .irq_set_type = gpio_irq_type, + .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE, + }; diff --git a/queue-6.11/io_uring-net-harden-multishot-termination-case-for-recv.patch b/queue-6.11/io_uring-net-harden-multishot-termination-case-for-recv.patch new file mode 100644 index 00000000000..85f3673c7d4 --- /dev/null +++ b/queue-6.11/io_uring-net-harden-multishot-termination-case-for-recv.patch @@ -0,0 +1,56 @@ +From c314094cb4cfa6fc5a17f4881ead2dfebfa717a7 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Thu, 26 Sep 2024 07:08:10 -0600 +Subject: io_uring/net: harden multishot termination case for recv + +From: Jens Axboe + +commit c314094cb4cfa6fc5a17f4881ead2dfebfa717a7 upstream. + +If the recv returns zero, or an error, then it doesn't matter if more +data has already been received for this buffer. A condition like that +should terminate the multishot receive. Rather than pass in the +collected return value, pass in whether to terminate or keep the recv +going separately. + +Note that this isn't a bug right now, as the only way to get there is +via setting MSG_WAITALL with multishot receive. And if an application +does that, then -EINVAL is returned anyway. But it seems like an easy +bug to introduce, so let's make it a bit more explicit. + +Link: https://github.com/axboe/liburing/issues/1246 +Cc: stable@vger.kernel.org +Fixes: b3fdea6ecb55 ("io_uring: multishot recv") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -1126,6 +1126,7 @@ int io_recv(struct io_kiocb *req, unsign + int ret, min_ret = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + size_t len = sr->len; ++ bool mshot_finished; + + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) +@@ -1180,6 +1181,7 @@ out_free: + req_set_fail(req); + } + ++ mshot_finished = ret <= 0; + if (ret > 0) + ret += sr->done_io; + else if (sr->done_io) +@@ -1187,7 +1189,7 @@ out_free: + else + io_kbuf_recycle(req, issue_flags); + +- if (!io_recv_finish(req, &ret, kmsg, ret <= 0, issue_flags)) ++ if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) + goto retry_multishot; + + return ret; diff --git a/queue-6.11/mac802154-fix-potential-rcu-dereference-issue-in-mac802154_scan_worker.patch b/queue-6.11/mac802154-fix-potential-rcu-dereference-issue-in-mac802154_scan_worker.patch new file mode 100644 index 00000000000..e3c6610523c --- /dev/null +++ b/queue-6.11/mac802154-fix-potential-rcu-dereference-issue-in-mac802154_scan_worker.patch @@ -0,0 +1,63 @@ +From bff1709b3980bd7f80be6786f64cc9a9ee9e56da Mon Sep 17 00:00:00 2001 +From: Jiawei Ye +Date: Tue, 24 Sep 2024 06:58:05 +0000 +Subject: mac802154: Fix potential RCU dereference issue in mac802154_scan_worker + +From: Jiawei Ye + +commit bff1709b3980bd7f80be6786f64cc9a9ee9e56da upstream. + +In the `mac802154_scan_worker` function, the `scan_req->type` field was +accessed after the RCU read-side critical section was unlocked. According +to RCU usage rules, this is illegal and can lead to unpredictable +behavior, such as accessing memory that has been updated or causing +use-after-free issues. + +This possible bug was identified using a static analysis tool developed +by myself, specifically designed to detect RCU-related issues. + +To address this, the `scan_req->type` value is now stored in a local +variable `scan_req_type` while still within the RCU read-side critical +section. The `scan_req_type` is then used after the RCU lock is released, +ensuring that the type value is safely accessed without violating RCU +rules. + +Fixes: e2c3e6f53a7a ("mac802154: Handle active scanning") +Cc: stable@vger.kernel.org +Signed-off-by: Jiawei Ye +Acked-by: Miquel Raynal +Reviewed-by: Przemek Kitszel +Link: https://lore.kernel.org/tencent_3B2F4F2B4DA30FAE2F51A9634A16B3AD4908@qq.com +Signed-off-by: Stefan Schmidt +Signed-off-by: Greg Kroah-Hartman +--- + net/mac802154/scan.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/mac802154/scan.c ++++ b/net/mac802154/scan.c +@@ -176,6 +176,7 @@ void mac802154_scan_worker(struct work_s + struct ieee802154_local *local = + container_of(work, struct ieee802154_local, scan_work.work); + struct cfg802154_scan_request *scan_req; ++ enum nl802154_scan_types scan_req_type; + struct ieee802154_sub_if_data *sdata; + unsigned int scan_duration = 0; + struct wpan_phy *wpan_phy; +@@ -209,6 +210,7 @@ void mac802154_scan_worker(struct work_s + } + + wpan_phy = scan_req->wpan_phy; ++ scan_req_type = scan_req->type; + scan_req_duration = scan_req->duration; + + /* Look for the next valid chan */ +@@ -246,7 +248,7 @@ void mac802154_scan_worker(struct work_s + goto end_scan; + } + +- if (scan_req->type == NL802154_SCAN_ACTIVE) { ++ if (scan_req_type == NL802154_SCAN_ACTIVE) { + ret = mac802154_transmit_beacon_req(local, sdata); + if (ret) + dev_err(&sdata->dev->dev, diff --git a/queue-6.11/net-pcs-xpcs-fix-the-wrong-register-that-was-written-back.patch b/queue-6.11/net-pcs-xpcs-fix-the-wrong-register-that-was-written-back.patch new file mode 100644 index 00000000000..ba1c437a731 --- /dev/null +++ b/queue-6.11/net-pcs-xpcs-fix-the-wrong-register-that-was-written-back.patch @@ -0,0 +1,35 @@ +From 93ef6ee5c20e9330477930ec6347672c9e0cf5a6 Mon Sep 17 00:00:00 2001 +From: Jiawen Wu +Date: Tue, 24 Sep 2024 10:28:57 +0800 +Subject: net: pcs: xpcs: fix the wrong register that was written back + +From: Jiawen Wu + +commit 93ef6ee5c20e9330477930ec6347672c9e0cf5a6 upstream. + +The value is read from the register TXGBE_RX_GEN_CTL3, and it should be +written back to TXGBE_RX_GEN_CTL3 when it changes some fields. + +Cc: stable@vger.kernel.org +Fixes: f629acc6f210 ("net: pcs: xpcs: support to switch mode for Wangxun NICs") +Signed-off-by: Jiawen Wu +Reported-by: Russell King (Oracle) +Reviewed-by: Russell King (Oracle) +Link: https://patch.msgid.link/20240924022857.865422-1-jiawenwu@trustnetic.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/pcs/pcs-xpcs-wx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/pcs/pcs-xpcs-wx.c ++++ b/drivers/net/pcs/pcs-xpcs-wx.c +@@ -109,7 +109,7 @@ static void txgbe_pma_config_1g(struct d + txgbe_write_pma(xpcs, TXGBE_DFE_TAP_CTL0, 0); + val = txgbe_read_pma(xpcs, TXGBE_RX_GEN_CTL3); + val = u16_replace_bits(val, 0x4, TXGBE_RX_GEN_CTL3_LOS_TRSHLD0); +- txgbe_write_pma(xpcs, TXGBE_RX_EQ_ATTN_CTL, val); ++ txgbe_write_pma(xpcs, TXGBE_RX_GEN_CTL3, val); + + txgbe_write_pma(xpcs, TXGBE_MPLLA_CTL0, 0x20); + txgbe_write_pma(xpcs, TXGBE_MPLLA_CTL3, 0x46); diff --git a/queue-6.11/pidfs-check-for-valid-pid-namespace.patch b/queue-6.11/pidfs-check-for-valid-pid-namespace.patch new file mode 100644 index 00000000000..26f0203c402 --- /dev/null +++ b/queue-6.11/pidfs-check-for-valid-pid-namespace.patch @@ -0,0 +1,51 @@ +From 8a46067783bdff222d1fb8f8c20e3b7b711e3ce5 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Thu, 26 Sep 2024 18:51:46 +0200 +Subject: pidfs: check for valid pid namespace + +From: Christian Brauner + +commit 8a46067783bdff222d1fb8f8c20e3b7b711e3ce5 upstream. + +When we access a no-current task's pid namespace we need check that the +task hasn't been reaped in the meantime and it's pid namespace isn't +accessible anymore. + +The user namespace is fine because it is only released when the last +reference to struct task_struct is put and exit_creds() is called. + +Link: https://lore.kernel.org/r/20240926-klebt-altgedienten-0415ad4d273c@brauner +Fixes: 5b08bd408534 ("pidfs: allow retrieval of namespace file descriptors") +CC: stable@vger.kernel.org # v6.11 +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/pidfs.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/fs/pidfs.c b/fs/pidfs.c +index 7ffdc88dfb52..80675b6bf884 100644 +--- a/fs/pidfs.c ++++ b/fs/pidfs.c +@@ -120,6 +120,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) + struct nsproxy *nsp __free(put_nsproxy) = NULL; + struct pid *pid = pidfd_pid(file); + struct ns_common *ns_common = NULL; ++ struct pid_namespace *pid_ns; + + if (arg) + return -EINVAL; +@@ -202,7 +203,9 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) + case PIDFD_GET_PID_NAMESPACE: + if (IS_ENABLED(CONFIG_PID_NS)) { + rcu_read_lock(); +- ns_common = to_ns_common( get_pid_ns(task_active_pid_ns(task))); ++ pid_ns = task_active_pid_ns(task); ++ if (pid_ns) ++ ns_common = to_ns_common(get_pid_ns(pid_ns)); + rcu_read_unlock(); + } + break; +-- +2.46.2 + diff --git a/queue-6.11/rtla-fix-the-help-text-in-osnoise-and-timerlat-top-tools.patch b/queue-6.11/rtla-fix-the-help-text-in-osnoise-and-timerlat-top-tools.patch new file mode 100644 index 00000000000..ce827874a2b --- /dev/null +++ b/queue-6.11/rtla-fix-the-help-text-in-osnoise-and-timerlat-top-tools.patch @@ -0,0 +1,58 @@ +From 3d7b8ea7a8a20a45d019382c4dc6ed79e8bb95cf Mon Sep 17 00:00:00 2001 +From: Eder Zulian +Date: Tue, 13 Aug 2024 17:58:31 +0200 +Subject: rtla: Fix the help text in osnoise and timerlat top tools + +From: Eder Zulian + +commit 3d7b8ea7a8a20a45d019382c4dc6ed79e8bb95cf upstream. + +The help text in osnoise top and timerlat top had some minor errors +and omissions. The -d option was missing the 's' (second) abbreviation and +the error message for '-d' used '-D'. + +Cc: stable@vger.kernel.org +Fixes: 1eceb2fc2ca54 ("rtla/osnoise: Add osnoise top mode") +Fixes: a828cd18bc4ad ("rtla: Add timerlat tool and timelart top mode") +Link: https://lore.kernel.org/20240813155831.384446-1-ezulian@redhat.com +Suggested-by: Tomas Glozar +Reviewed-by: Tomas Glozar +Signed-off-by: Eder Zulian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + tools/tracing/rtla/src/osnoise_top.c | 2 +- + tools/tracing/rtla/src/timerlat_top.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/tools/tracing/rtla/src/osnoise_top.c ++++ b/tools/tracing/rtla/src/osnoise_top.c +@@ -442,7 +442,7 @@ struct osnoise_top_params *osnoise_top_p + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) +- osnoise_top_usage(params, "Invalid -D duration\n"); ++ osnoise_top_usage(params, "Invalid -d duration\n"); + break; + case 'e': + tevent = trace_event_alloc(optarg); +--- a/tools/tracing/rtla/src/timerlat_top.c ++++ b/tools/tracing/rtla/src/timerlat_top.c +@@ -459,7 +459,7 @@ static void timerlat_top_usage(char *usa + " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", +- " -d/--duration time[m|h|d]: duration of the session in seconds", ++ " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", + " -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]", +@@ -613,7 +613,7 @@ static struct timerlat_top_params + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) +- timerlat_top_usage("Invalid -D duration\n"); ++ timerlat_top_usage("Invalid -d duration\n"); + break; + case 'e': + tevent = trace_event_alloc(optarg); diff --git a/queue-6.11/series b/queue-6.11/series index f9767e6678a..739de981705 100644 --- a/queue-6.11/series +++ b/queue-6.11/series @@ -488,3 +488,28 @@ input-adp5589-keys-fix-null-pointer-dereference.patch input-adp5589-keys-fix-adp5589_gpio_get_value.patch hid-bpf-fix-cfi-stubs-for-hid_bpf_ops.patch cachefiles-fix-dentry-leak-in-cachefiles_open_file.patch +pidfs-check-for-valid-pid-namespace.patch +acpi-video-add-backlight-native-quirk-for-dell-optiplex-5480-aio.patch +acpi-resource-remove-duplicate-asus-e1504gab-irq-override.patch +acpi-resource-loosen-the-asus-e1404gab-dmi-match-to-also-cover-the-e1404ga.patch +acpi-resource-add-asus-vivobook-x1704vap-to-irq1_level_low_skip_override.patch +acpi-resource-add-asus-expertbook-b2502cva-to-irq1_level_low_skip_override.patch +btrfs-send-fix-buffer-overflow-detection-when-copying-path-to-cache-entry.patch +btrfs-fix-a-null-pointer-dereference-when-failed-to-start-a-new-trasacntion.patch +btrfs-drop-the-backref-cache-during-relocation-if-we-commit.patch +btrfs-send-fix-invalid-clone-operation-for-file-that-got-its-size-decreased.patch +btrfs-wait-for-fixup-workers-before-stopping-cleaner-kthread-during-umount.patch +cpufreq-avoid-a-bad-reference-count-on-cpu-node.patch +cpufreq-intel_pstate-make-hwp_notify_lock-a-raw-spinlock.patch +gpio-davinci-fix-lazy-disable.patch +net-pcs-xpcs-fix-the-wrong-register-that-was-written-back.patch +bluetooth-hci_event-align-br-edr-just_works-paring-with-le.patch +mac802154-fix-potential-rcu-dereference-issue-in-mac802154_scan_worker.patch +io_uring-net-harden-multishot-termination-case-for-recv.patch +ceph-fix-cap-ref-leak-via-netfs-init_request.patch +tracing-hwlat-fix-a-race-during-cpuhp-processing.patch +tracing-timerlat-drop-interface_lock-in-stop_kthread.patch +tracing-timerlat-fix-a-race-during-cpuhp-processing.patch +tracing-timerlat-fix-duplicated-kthread-creation-due-to-cpu-online-offline.patch +rtla-fix-the-help-text-in-osnoise-and-timerlat-top-tools.patch +firmware-sysfb-disable-sysfb-for-firmware-buffers-with-unknown-parent.patch diff --git a/queue-6.11/tracing-hwlat-fix-a-race-during-cpuhp-processing.patch b/queue-6.11/tracing-hwlat-fix-a-race-during-cpuhp-processing.patch new file mode 100644 index 00000000000..5804bccdf16 --- /dev/null +++ b/queue-6.11/tracing-hwlat-fix-a-race-during-cpuhp-processing.patch @@ -0,0 +1,46 @@ +From 2a13ca2e8abb12ee43ada8a107dadca83f140937 Mon Sep 17 00:00:00 2001 +From: Wei Li +Date: Tue, 24 Sep 2024 17:45:14 +0800 +Subject: tracing/hwlat: Fix a race during cpuhp processing + +From: Wei Li + +commit 2a13ca2e8abb12ee43ada8a107dadca83f140937 upstream. + +The cpuhp online/offline processing race also exists in percpu-mode hwlat +tracer in theory, apply the fix too. That is: + + T1 | T2 + [CPUHP_ONLINE] | cpu_device_down() + hwlat_hotplug_workfn() | + | cpus_write_lock() + | takedown_cpu(1) + | cpus_write_unlock() + [CPUHP_OFFLINE] | + cpus_read_lock() | + start_kthread(1) | + cpus_read_unlock() | + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/20240924094515.3561410-5-liwei391@huawei.com +Fixes: ba998f7d9531 ("trace/hwlat: Support hotplug operations") +Signed-off-by: Wei Li +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_hwlat.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/trace/trace_hwlat.c ++++ b/kernel/trace/trace_hwlat.c +@@ -520,6 +520,8 @@ static void hwlat_hotplug_workfn(struct + if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU) + goto out_unlock; + ++ if (!cpu_online(cpu)) ++ goto out_unlock; + if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) + goto out_unlock; + diff --git a/queue-6.11/tracing-timerlat-drop-interface_lock-in-stop_kthread.patch b/queue-6.11/tracing-timerlat-drop-interface_lock-in-stop_kthread.patch new file mode 100644 index 00000000000..46b36e7cc0b --- /dev/null +++ b/queue-6.11/tracing-timerlat-drop-interface_lock-in-stop_kthread.patch @@ -0,0 +1,76 @@ +From b484a02c9cedf8703eff8f0756f94618004bd165 Mon Sep 17 00:00:00 2001 +From: Wei Li +Date: Tue, 24 Sep 2024 17:45:12 +0800 +Subject: tracing/timerlat: Drop interface_lock in stop_kthread() + +From: Wei Li + +commit b484a02c9cedf8703eff8f0756f94618004bd165 upstream. + +stop_kthread() is the offline callback for "trace/osnoise:online", since +commit 5bfbcd1ee57b ("tracing/timerlat: Add interface_lock around clearing +of kthread in stop_kthread()"), the following ABBA deadlock scenario is +introduced: + +T1 | T2 [BP] | T3 [AP] +osnoise_hotplug_workfn() | work_for_cpu_fn() | cpuhp_thread_fun() + | _cpu_down() | osnoise_cpu_die() + mutex_lock(&interface_lock) | | stop_kthread() + | cpus_write_lock() | mutex_lock(&interface_lock) + cpus_read_lock() | cpuhp_kick_ap() | + +As the interface_lock here in just for protecting the "kthread" field of +the osn_var, use xchg() instead to fix this issue. Also use +for_each_online_cpu() back in stop_per_cpu_kthreads() as it can take +cpu_read_lock() again. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/20240924094515.3561410-3-liwei391@huawei.com +Fixes: 5bfbcd1ee57b ("tracing/timerlat: Add interface_lock around clearing of kthread in stop_kthread()") +Signed-off-by: Wei Li +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_osnoise.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/kernel/trace/trace_osnoise.c ++++ b/kernel/trace/trace_osnoise.c +@@ -1953,12 +1953,8 @@ static void stop_kthread(unsigned int cp + { + struct task_struct *kthread; + +- mutex_lock(&interface_lock); +- kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; ++ kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); + if (kthread) { +- per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; +- mutex_unlock(&interface_lock); +- + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && + !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { + kthread_stop(kthread); +@@ -1972,7 +1968,6 @@ static void stop_kthread(unsigned int cp + put_task_struct(kthread); + } + } else { +- mutex_unlock(&interface_lock); + /* if no workload, just return */ + if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { + /* +@@ -1994,8 +1989,12 @@ static void stop_per_cpu_kthreads(void) + { + int cpu; + +- for_each_possible_cpu(cpu) ++ cpus_read_lock(); ++ ++ for_each_online_cpu(cpu) + stop_kthread(cpu); ++ ++ cpus_read_unlock(); + } + + /* diff --git a/queue-6.11/tracing-timerlat-fix-a-race-during-cpuhp-processing.patch b/queue-6.11/tracing-timerlat-fix-a-race-during-cpuhp-processing.patch new file mode 100644 index 00000000000..f349ee12e86 --- /dev/null +++ b/queue-6.11/tracing-timerlat-fix-a-race-during-cpuhp-processing.patch @@ -0,0 +1,88 @@ +From 829e0c9f0855f26b3ae830d17b24aec103f7e915 Mon Sep 17 00:00:00 2001 +From: Wei Li +Date: Tue, 24 Sep 2024 17:45:13 +0800 +Subject: tracing/timerlat: Fix a race during cpuhp processing + +From: Wei Li + +commit 829e0c9f0855f26b3ae830d17b24aec103f7e915 upstream. + +There is another found exception that the "timerlat/1" thread was +scheduled on CPU0, and lead to timer corruption finally: + +``` +ODEBUG: init active (active state 0) object: ffff888237c2e108 object type: hrtimer hint: timerlat_irq+0x0/0x220 +WARNING: CPU: 0 PID: 426 at lib/debugobjects.c:518 debug_print_object+0x7d/0xb0 +Modules linked in: +CPU: 0 UID: 0 PID: 426 Comm: timerlat/1 Not tainted 6.11.0-rc7+ #45 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 +RIP: 0010:debug_print_object+0x7d/0xb0 +... +Call Trace: + + ? __warn+0x7c/0x110 + ? debug_print_object+0x7d/0xb0 + ? report_bug+0xf1/0x1d0 + ? prb_read_valid+0x17/0x20 + ? handle_bug+0x3f/0x70 + ? exc_invalid_op+0x13/0x60 + ? asm_exc_invalid_op+0x16/0x20 + ? debug_print_object+0x7d/0xb0 + ? debug_print_object+0x7d/0xb0 + ? __pfx_timerlat_irq+0x10/0x10 + __debug_object_init+0x110/0x150 + hrtimer_init+0x1d/0x60 + timerlat_main+0xab/0x2d0 + ? __pfx_timerlat_main+0x10/0x10 + kthread+0xb7/0xe0 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x2d/0x40 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + +``` + +After tracing the scheduling event, it was discovered that the migration +of the "timerlat/1" thread was performed during thread creation. Further +analysis confirmed that it is because the CPU online processing for +osnoise is implemented through workers, which is asynchronous with the +offline processing. When the worker was scheduled to create a thread, the +CPU may has already been removed from the cpu_online_mask during the offline +process, resulting in the inability to select the right CPU: + +T1 | T2 +[CPUHP_ONLINE] | cpu_device_down() +osnoise_hotplug_workfn() | + | cpus_write_lock() + | takedown_cpu(1) + | cpus_write_unlock() +[CPUHP_OFFLINE] | + cpus_read_lock() | + start_kthread(1) | + cpus_read_unlock() | + +To fix this, skip online processing if the CPU is already offline. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/20240924094515.3561410-4-liwei391@huawei.com +Fixes: c8895e271f79 ("trace/osnoise: Support hotplug operations") +Signed-off-by: Wei Li +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_osnoise.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/trace/trace_osnoise.c ++++ b/kernel/trace/trace_osnoise.c +@@ -2094,6 +2094,8 @@ static void osnoise_hotplug_workfn(struc + mutex_lock(&interface_lock); + cpus_read_lock(); + ++ if (!cpu_online(cpu)) ++ goto out_unlock; + if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) + goto out_unlock; + diff --git a/queue-6.11/tracing-timerlat-fix-duplicated-kthread-creation-due-to-cpu-online-offline.patch b/queue-6.11/tracing-timerlat-fix-duplicated-kthread-creation-due-to-cpu-online-offline.patch new file mode 100644 index 00000000000..1a3031e6920 --- /dev/null +++ b/queue-6.11/tracing-timerlat-fix-duplicated-kthread-creation-due-to-cpu-online-offline.patch @@ -0,0 +1,55 @@ +From 0bb0a5c12ecf36ad561542bbb95f96355e036a02 Mon Sep 17 00:00:00 2001 +From: Wei Li +Date: Tue, 24 Sep 2024 17:45:11 +0800 +Subject: tracing/timerlat: Fix duplicated kthread creation due to CPU online/offline + +From: Wei Li + +commit 0bb0a5c12ecf36ad561542bbb95f96355e036a02 upstream. + +osnoise_hotplug_workfn() is the asynchronous online callback for +"trace/osnoise:online". It may be congested when a CPU goes online and +offline repeatedly and is invoked for multiple times after a certain +online. + +This will lead to kthread leak and timer corruption. Add a check +in start_kthread() to prevent this situation. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/20240924094515.3561410-2-liwei391@huawei.com +Fixes: c8895e271f79 ("trace/osnoise: Support hotplug operations") +Signed-off-by: Wei Li +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_osnoise.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace_osnoise.c ++++ b/kernel/trace/trace_osnoise.c +@@ -2006,6 +2006,10 @@ static int start_kthread(unsigned int cp + void *main = osnoise_main; + char comm[24]; + ++ /* Do not start a new thread if it is already running */ ++ if (per_cpu(per_cpu_osnoise_var, cpu).kthread) ++ return 0; ++ + if (timerlat_enabled()) { + snprintf(comm, 24, "timerlat/%d", cpu); + main = timerlat_main; +@@ -2060,11 +2064,10 @@ static int start_per_cpu_kthreads(void) + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { + struct task_struct *kthread; + +- kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; ++ kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); + if (!WARN_ON(!kthread)) + kthread_stop(kthread); + } +- per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + } + + for_each_cpu(cpu, current_mask) {