From: Greg Kroah-Hartman Date: Thu, 25 Jun 2026 11:34:11 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.18.37~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=81010433811727afbb99f45351852d661a1ad7a3;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: bonding-3ad-implement-proper-rcu-rules-for-port-aggregator.patch bonding-add-support-for-per-port-lacp-actor-priority.patch bonding-fix-null-pointer-dereference-in-actor_port_prio-setting.patch bonding-print-churn-state-via-netlink.patch dlm-prevent-npd-when-writing-a-positive-value-to-event_done.patch drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch fhandle-fix-uaf-due-to-unlocked-mnt_ns-read-in-may_decode_fh.patch fuse-re-lock-request-before-replacing-page-cache-folio.patch futex-requeue-prevent-null-pointer-dereference-in-remove_waiter-on-self-deadlock.patch hv-utils-handle-and-propagate-errors-in-kvp_register.patch locking-mutex-remove-wakeups-from-under-mutex-wait_lock.patch locking-rtmutex-skip-remove_waiter-when-waiter-is-not-enqueued.patch net-bonding-add-broadcast_neighbor-option-for-802.3ad.patch net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch net-phonet-free-phonet_device-after-rcu-grace-period.patch phonet-pass-ifindex-to-fill_addr.patch phonet-pass-net-and-ifindex-to-phonet_address_notify.patch revert-net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch rxrpc-fix-the-ack-parser-to-extract-the-sack-table-for-parsing.patch staging-rtl8723bs-fix-buffer-over-read-in-rtw_update_protection.patch xfs-fix-error-returns-in-cow-fork-repair.patch xfs-remove-the-expr-argument-to-xfs_test_error.patch --- diff --git a/queue-6.12/bonding-3ad-implement-proper-rcu-rules-for-port-aggregator.patch b/queue-6.12/bonding-3ad-implement-proper-rcu-rules-for-port-aggregator.patch new file mode 100644 index 0000000000..752775536f --- /dev/null +++ b/queue-6.12/bonding-3ad-implement-proper-rcu-rules-for-port-aggregator.patch @@ -0,0 +1,561 @@ +From 3xnExagcKBscx2or44Bt11tyr.p1zt4rtxuyv07As170qn6v10.14t@flex--kpberry.bounces.google.com Tue Jun 16 16:54:48 2026 +From: Kevin Berry +Date: Tue, 16 Jun 2026 15:54:28 +0000 +Subject: bonding: 3ad: implement proper RCU rules for port->aggregator +To: stable@vger.kernel.org +Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Eric Dumazet , syzbot+9bb2ff2a4ab9e17307e1@syzkaller.appspotmail.com, Jay Vosburgh , Andrew Lunn , Jakub Kicinski +Message-ID: <20260616155432.2093908-6-kpberry@google.com> + +From: Eric Dumazet + +[ Upstream commit c4f050ce06c56cfb5993268af4a5cb66ed1cd04e ] + +syzbot found a data-race in bond_3ad_get_active_agg_info / +bond_3ad_state_machine_handler [1] which hints at lack of proper +RCU implementation. + +Add __rcu qualifier to port->aggregator, and add proper RCU API. + +[1] + +BUG: KCSAN: data-race in bond_3ad_get_active_agg_info / bond_3ad_state_machine_handler + +write to 0xffff88813cf5c4b0 of 8 bytes by task 36 on cpu 0: + ad_port_selection_logic drivers/net/bonding/bond_3ad.c:1659 [inline] + bond_3ad_state_machine_handler+0x9d5/0x2d60 drivers/net/bonding/bond_3ad.c:2569 + process_one_work kernel/workqueue.c:3302 [inline] + process_scheduled_works+0x4f0/0x9c0 kernel/workqueue.c:3385 + worker_thread+0x58a/0x780 kernel/workqueue.c:3466 + kthread+0x22a/0x280 kernel/kthread.c:436 + ret_from_fork+0x146/0x330 arch/x86/kernel/process.c:158 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 + +read to 0xffff88813cf5c4b0 of 8 bytes by task 22063 on cpu 1: + __bond_3ad_get_active_agg_info drivers/net/bonding/bond_3ad.c:2858 [inline] + bond_3ad_get_active_agg_info+0x8c/0x230 drivers/net/bonding/bond_3ad.c:2881 + bond_fill_info+0xe0f/0x10f0 drivers/net/bonding/bond_netlink.c:853 + rtnl_link_info_fill net/core/rtnetlink.c:906 [inline] + rtnl_link_fill+0x1d7/0x4e0 net/core/rtnetlink.c:927 + rtnl_fill_ifinfo+0xf8e/0x1380 net/core/rtnetlink.c:2168 + rtmsg_ifinfo_build_skb+0x11c/0x1b0 net/core/rtnetlink.c:4453 + rtmsg_ifinfo_event net/core/rtnetlink.c:4486 [inline] + rtmsg_ifinfo+0x6d/0x110 net/core/rtnetlink.c:4495 + __dev_notify_flags+0x76/0x390 net/core/dev.c:9790 + netif_change_flags+0xac/0xd0 net/core/dev.c:9823 + do_setlink+0x905/0x2950 net/core/rtnetlink.c:3180 + rtnl_group_changelink net/core/rtnetlink.c:3813 [inline] + __rtnl_newlink net/core/rtnetlink.c:3981 [inline] + rtnl_newlink+0xf55/0x1400 net/core/rtnetlink.c:4109 + rtnetlink_rcv_msg+0x64b/0x720 net/core/rtnetlink.c:6995 + netlink_rcv_skb+0x123/0x220 net/netlink/af_netlink.c:2550 + rtnetlink_rcv+0x1c/0x30 net/core/rtnetlink.c:7022 + netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] + netlink_unicast+0x5a8/0x680 net/netlink/af_netlink.c:1344 + netlink_sendmsg+0x5c8/0x6f0 net/netlink/af_netlink.c:1894 + sock_sendmsg_nosec net/socket.c:787 [inline] + __sock_sendmsg net/socket.c:802 [inline] + ____sys_sendmsg+0x563/0x5b0 net/socket.c:2698 + ___sys_sendmsg+0x195/0x1e0 net/socket.c:2752 + __sys_sendmsg net/socket.c:2784 [inline] + __do_sys_sendmsg net/socket.c:2789 [inline] + __se_sys_sendmsg net/socket.c:2787 [inline] + __x64_sys_sendmsg+0xd4/0x160 net/socket.c:2787 + x64_sys_call+0x194c/0x3020 arch/x86/include/generated/asm/syscalls_64.h:47 + do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] + do_syscall_64+0x12c/0x3b0 arch/x86/entry/syscall_64.c:94 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +value changed: 0x0000000000000000 -> 0xffff88813cf5c400 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 UID: 0 PID: 22063 Comm: syz.0.31122 Tainted: G W syzkaller #0 PREEMPT(full) +Tainted: [W]=WARN +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026 + +Fixes: 47e91f56008b ("bonding: use RCU protection for 3ad xmit path") +Reported-by: syzbot+9bb2ff2a4ab9e17307e1@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/69f0a82f.050a0220.3aadc4.0000.GAE@google.com/ +Signed-off-by: Eric Dumazet +Cc: Jay Vosburgh +Cc: Andrew Lunn +Link: https://patch.msgid.link/20260428123207.3809211-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_3ad.c | 109 ++++++++++++++++++--------------- + drivers/net/bonding/bond_main.c | 8 +- + drivers/net/bonding/bond_netlink.c | 16 +++- + drivers/net/bonding/bond_procfs.c | 3 + drivers/net/bonding/bond_sysfs_slave.c | 17 +++-- + include/net/bond_3ad.h | 2 + 6 files changed, 89 insertions(+), 66 deletions(-) + +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -991,6 +991,7 @@ static int ad_marker_send(struct port *p + static void ad_mux_machine(struct port *port, bool *update_slave_arr) + { + struct bonding *bond = __get_bond_by_port(port); ++ struct aggregator *aggregator; + mux_states_t last_state; + + /* keep current State Machine state to compare later if it was +@@ -998,6 +999,7 @@ static void ad_mux_machine(struct port * + */ + last_state = port->sm_mux_state; + ++ aggregator = rcu_dereference(port->aggregator); + if (port->sm_vars & AD_PORT_BEGIN) { + port->sm_mux_state = AD_MUX_DETACHED; + } else { +@@ -1017,7 +1019,7 @@ static void ad_mux_machine(struct port * + * cycle to update ready variable, we check + * READY_N and update READY here + */ +- __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); ++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); + port->sm_mux_state = AD_MUX_DETACHED; + break; + } +@@ -1032,7 +1034,7 @@ static void ad_mux_machine(struct port * + * update ready variable, we check READY_N and update + * READY here + */ +- __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); ++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); + + /* if the wait_while_timer expired, and the port is + * in READY state, move to ATTACHED state +@@ -1048,7 +1050,7 @@ static void ad_mux_machine(struct port * + if ((port->sm_vars & AD_PORT_SELECTED) && + (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && + !__check_agg_selection_timer(port)) { +- if (port->aggregator->is_active) { ++ if (aggregator->is_active) { + int state = AD_MUX_COLLECTING_DISTRIBUTING; + + if (!bond->params.coupled_control) +@@ -1064,9 +1066,9 @@ static void ad_mux_machine(struct port * + * cycle to update ready variable, we check + * READY_N and update READY here + */ +- __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); ++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); + port->sm_mux_state = AD_MUX_DETACHED; +- } else if (port->aggregator->is_active) { ++ } else if (aggregator->is_active) { + port->actor_oper_port_state |= + LACP_STATE_SYNCHRONIZATION; + } +@@ -1077,7 +1079,7 @@ static void ad_mux_machine(struct port * + * sure that a collecting distributing + * port in an active aggregator is enabled + */ +- if (port->aggregator->is_active && ++ if (aggregator->is_active && + !__port_is_collecting_distributing(port)) { + __enable_port(port); + *update_slave_arr = true; +@@ -1096,7 +1098,7 @@ static void ad_mux_machine(struct port * + */ + struct slave *slave = port->slave; + +- if (port->aggregator->is_active && ++ if (aggregator->is_active && + bond_is_slave_rx_disabled(slave)) { + ad_enable_collecting(port); + *update_slave_arr = true; +@@ -1116,8 +1118,8 @@ static void ad_mux_machine(struct port * + * sure that a collecting distributing + * port in an active aggregator is enabled + */ +- if (port->aggregator && +- port->aggregator->is_active && ++ if (aggregator && ++ aggregator->is_active && + !__port_is_collecting_distributing(port)) { + __enable_port(port); + *update_slave_arr = true; +@@ -1149,7 +1151,7 @@ static void ad_mux_machine(struct port * + port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); + break; + case AD_MUX_ATTACHED: +- if (port->aggregator->is_active) ++ if (aggregator->is_active) + port->actor_oper_port_state |= + LACP_STATE_SYNCHRONIZATION; + else +@@ -1522,9 +1524,9 @@ static void ad_port_selection_logic(stru + bond = __get_bond_by_port(port); + + /* if the port is connected to other aggregator, detach it */ +- if (port->aggregator) { ++ temp_aggregator = rcu_dereference(port->aggregator); ++ if (temp_aggregator) { + /* detach the port from its former aggregator */ +- temp_aggregator = port->aggregator; + for (curr_port = temp_aggregator->lag_ports; curr_port; + last_port = curr_port, + curr_port = curr_port->next_port_in_aggregator) { +@@ -1547,7 +1549,7 @@ static void ad_port_selection_logic(stru + /* clear the port's relations to this + * aggregator + */ +- port->aggregator = NULL; ++ RCU_INIT_POINTER(port->aggregator, NULL); + port->next_port_in_aggregator = NULL; + port->actor_port_aggregator_identifier = 0; + +@@ -1570,7 +1572,7 @@ static void ad_port_selection_logic(stru + port->slave->bond->dev->name, + port->slave->dev->name, + port->actor_port_number, +- port->aggregator->aggregator_identifier); ++ temp_aggregator->aggregator_identifier); + } + } + /* search on all aggregators for a suitable aggregator for this port */ +@@ -1594,15 +1596,15 @@ static void ad_port_selection_logic(stru + ) + ) { + /* attach to the founded aggregator */ +- port->aggregator = aggregator; ++ rcu_assign_pointer(port->aggregator, aggregator); + port->actor_port_aggregator_identifier = +- port->aggregator->aggregator_identifier; ++ aggregator->aggregator_identifier; + port->next_port_in_aggregator = aggregator->lag_ports; +- port->aggregator->num_of_ports++; ++ aggregator->num_of_ports++; + aggregator->lag_ports = port; + slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n", + port->actor_port_number, +- port->aggregator->aggregator_identifier); ++ aggregator->aggregator_identifier); + + /* mark this port as selected */ + port->sm_vars |= AD_PORT_SELECTED; +@@ -1617,39 +1619,40 @@ static void ad_port_selection_logic(stru + if (!found) { + if (free_aggregator) { + /* assign port a new aggregator */ +- port->aggregator = free_aggregator; + port->actor_port_aggregator_identifier = +- port->aggregator->aggregator_identifier; ++ free_aggregator->aggregator_identifier; + + /* update the new aggregator's parameters + * if port was responsed from the end-user + */ + if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS) + /* if port is full duplex */ +- port->aggregator->is_individual = false; ++ free_aggregator->is_individual = false; + else +- port->aggregator->is_individual = true; ++ free_aggregator->is_individual = true; + +- port->aggregator->actor_admin_aggregator_key = ++ free_aggregator->actor_admin_aggregator_key = + port->actor_admin_port_key; +- port->aggregator->actor_oper_aggregator_key = ++ free_aggregator->actor_oper_aggregator_key = + port->actor_oper_port_key; +- port->aggregator->partner_system = ++ free_aggregator->partner_system = + port->partner_oper.system; +- port->aggregator->partner_system_priority = ++ free_aggregator->partner_system_priority = + port->partner_oper.system_priority; +- port->aggregator->partner_oper_aggregator_key = port->partner_oper.key; +- port->aggregator->receive_state = 1; +- port->aggregator->transmit_state = 1; +- port->aggregator->lag_ports = port; +- port->aggregator->num_of_ports++; ++ free_aggregator->partner_oper_aggregator_key = port->partner_oper.key; ++ free_aggregator->receive_state = 1; ++ free_aggregator->transmit_state = 1; ++ free_aggregator->lag_ports = port; ++ free_aggregator->num_of_ports++; ++ ++ rcu_assign_pointer(port->aggregator, free_aggregator); + + /* mark this port as selected */ + port->sm_vars |= AD_PORT_SELECTED; + + slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n", + port->actor_port_number, +- port->aggregator->aggregator_identifier); ++ free_aggregator->aggregator_identifier); + } else { + slave_err(bond->dev, port->slave->dev, + "Port %d did not find a suitable aggregator\n", +@@ -1661,13 +1664,12 @@ static void ad_port_selection_logic(stru + * in all aggregator's ports, else set ready=FALSE in all + * aggregator's ports + */ +- __set_agg_ports_ready(port->aggregator, +- __agg_ports_are_ready(port->aggregator)); ++ aggregator = rcu_dereference(port->aggregator); ++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); + +- aggregator = __get_first_agg(port); +- ad_agg_selection_logic(aggregator, update_slave_arr); ++ ad_agg_selection_logic(__get_first_agg(port), update_slave_arr); + +- if (!port->aggregator->is_active) ++ if (!aggregator->is_active) + port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; + } + +@@ -2021,13 +2023,15 @@ static void ad_initialize_port(struct po + */ + static void ad_enable_collecting(struct port *port) + { +- if (port->aggregator->is_active) { ++ struct aggregator *aggregator = rcu_dereference(port->aggregator); ++ ++ if (aggregator->is_active) { + struct slave *slave = port->slave; + + slave_dbg(slave->bond->dev, slave->dev, + "Enabling collecting on port %d (LAG %d)\n", + port->actor_port_number, +- port->aggregator->aggregator_identifier); ++ aggregator->aggregator_identifier); + __enable_collecting_port(port); + } + } +@@ -2039,11 +2043,13 @@ static void ad_enable_collecting(struct + */ + static void ad_disable_distributing(struct port *port, bool *update_slave_arr) + { +- if (port->aggregator && __agg_has_partner(port->aggregator)) { ++ struct aggregator *aggregator = rcu_dereference(port->aggregator); ++ ++ if (aggregator && __agg_has_partner(aggregator)) { + slave_dbg(port->slave->bond->dev, port->slave->dev, + "Disabling distributing on port %d (LAG %d)\n", + port->actor_port_number, +- port->aggregator->aggregator_identifier); ++ aggregator->aggregator_identifier); + __disable_distributing_port(port); + /* Slave array needs an update */ + *update_slave_arr = true; +@@ -2060,11 +2066,13 @@ static void ad_disable_distributing(stru + static void ad_enable_collecting_distributing(struct port *port, + bool *update_slave_arr) + { +- if (port->aggregator->is_active) { ++ struct aggregator *aggregator = rcu_dereference(port->aggregator); ++ ++ if (aggregator->is_active) { + slave_dbg(port->slave->bond->dev, port->slave->dev, + "Enabling port %d (LAG %d)\n", + port->actor_port_number, +- port->aggregator->aggregator_identifier); ++ aggregator->aggregator_identifier); + __enable_port(port); + /* Slave array needs update */ + *update_slave_arr = true; +@@ -2079,11 +2087,13 @@ static void ad_enable_collecting_distrib + static void ad_disable_collecting_distributing(struct port *port, + bool *update_slave_arr) + { +- if (port->aggregator && __agg_has_partner(port->aggregator)) { ++ struct aggregator *aggregator = rcu_dereference(port->aggregator); ++ ++ if (aggregator && __agg_has_partner(aggregator)) { + slave_dbg(port->slave->bond->dev, port->slave->dev, + "Disabling port %d (LAG %d)\n", + port->actor_port_number, +- port->aggregator->aggregator_identifier); ++ aggregator->aggregator_identifier); + __disable_port(port); + /* Slave array needs an update */ + *update_slave_arr = true; +@@ -2323,7 +2333,7 @@ void bond_3ad_unbind_slave(struct slave + */ + for (temp_port = aggregator->lag_ports; temp_port; + temp_port = temp_port->next_port_in_aggregator) { +- temp_port->aggregator = new_aggregator; ++ rcu_assign_pointer(temp_port->aggregator, new_aggregator); + temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; + } + +@@ -2792,15 +2802,16 @@ out: + int __bond_3ad_get_active_agg_info(struct bonding *bond, + struct ad_info *ad_info) + { +- struct aggregator *aggregator = NULL; ++ struct aggregator *aggregator = NULL, *tmp; + struct list_head *iter; + struct slave *slave; + struct port *port; + + bond_for_each_slave_rcu(bond, slave, iter) { + port = &(SLAVE_AD_INFO(slave)->port); +- if (port->aggregator && port->aggregator->is_active) { +- aggregator = port->aggregator; ++ tmp = rcu_dereference(port->aggregator); ++ if (tmp && tmp->is_active) { ++ aggregator = tmp; + break; + } + } +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1470,7 +1470,7 @@ static void bond_poll_controller(struct + + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + struct aggregator *agg = +- SLAVE_AD_INFO(slave)->port.aggregator; ++ rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); + + if (agg && + agg->aggregator_identifier != ad_info.aggregator_id) +@@ -5244,15 +5244,16 @@ int bond_update_slave_arr(struct bonding + spin_unlock_bh(&bond->mode_lock); + agg_id = ad_info.aggregator_id; + } ++ rcu_read_lock(); + bond_for_each_slave(bond, slave, iter) { + if (skipslave == slave) + continue; + + all_slaves->arr[all_slaves->count++] = slave; + if (BOND_MODE(bond) == BOND_MODE_8023AD) { +- struct aggregator *agg; ++ const struct aggregator *agg; + +- agg = SLAVE_AD_INFO(slave)->port.aggregator; ++ agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); + if (!agg || agg->aggregator_identifier != agg_id) + continue; + } +@@ -5264,6 +5265,7 @@ int bond_update_slave_arr(struct bonding + + usable_slaves->arr[usable_slaves->count++] = slave; + } ++ rcu_read_unlock(); + + bond_set_slave_arr(bond, usable_slaves, all_slaves); + return ret; +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -66,27 +66,29 @@ static int bond_fill_slave_info(struct s + const struct port *ad_port; + + ad_port = &SLAVE_AD_INFO(slave)->port; +- agg = SLAVE_AD_INFO(slave)->port.aggregator; ++ rcu_read_lock(); ++ agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); + if (agg) { + if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, + agg->aggregator_identifier)) +- goto nla_put_failure; ++ goto nla_put_failure_rcu; + if (nla_put_u8(skb, + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + ad_port->actor_oper_port_state)) +- goto nla_put_failure; ++ goto nla_put_failure_rcu; + if (nla_put_u16(skb, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + ad_port->partner_oper.port_state)) +- goto nla_put_failure; ++ goto nla_put_failure_rcu; + + if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, + ad_port->sm_churn_actor_state)) +- goto nla_put_failure; ++ goto nla_put_failure_rcu; + if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, + ad_port->sm_churn_partner_state)) +- goto nla_put_failure; ++ goto nla_put_failure_rcu; + } ++ rcu_read_unlock(); + + if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, + SLAVE_AD_INFO(slave)->port_priority)) +@@ -95,6 +97,8 @@ static int bond_fill_slave_info(struct s + + return 0; + ++nla_put_failure_rcu: ++ rcu_read_unlock(); + nla_put_failure: + return -EMSGSIZE; + } +--- a/drivers/net/bonding/bond_procfs.c ++++ b/drivers/net/bonding/bond_procfs.c +@@ -187,6 +187,7 @@ static void bond_info_show_master(struct + } + } + ++/* Note: runs under rcu_read_lock() */ + static void bond_info_show_slave(struct seq_file *seq, + const struct slave *slave) + { +@@ -213,7 +214,7 @@ static void bond_info_show_slave(struct + + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + const struct port *port = &SLAVE_AD_INFO(slave)->port; +- const struct aggregator *agg = port->aggregator; ++ const struct aggregator *agg = rcu_dereference(port->aggregator); + + if (agg) { + seq_printf(seq, "Aggregator ID: %d\n", +--- a/drivers/net/bonding/bond_sysfs_slave.c ++++ b/drivers/net/bonding/bond_sysfs_slave.c +@@ -62,10 +62,15 @@ static ssize_t ad_aggregator_id_show(str + const struct aggregator *agg; + + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { +- agg = SLAVE_AD_INFO(slave)->port.aggregator; +- if (agg) +- return sysfs_emit(buf, "%d\n", +- agg->aggregator_identifier); ++ rcu_read_lock(); ++ agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); ++ if (agg) { ++ ssize_t res = sysfs_emit(buf, "%d\n", ++ agg->aggregator_identifier); ++ rcu_read_unlock(); ++ return res; ++ } ++ rcu_read_unlock(); + } + + return sysfs_emit(buf, "N/A\n"); +@@ -78,7 +83,7 @@ static ssize_t ad_actor_oper_port_state_ + + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { + ad_port = &SLAVE_AD_INFO(slave)->port; +- if (ad_port->aggregator) ++ if (rcu_access_pointer(ad_port->aggregator)) + return sysfs_emit(buf, "%u\n", + ad_port->actor_oper_port_state); + } +@@ -93,7 +98,7 @@ static ssize_t ad_partner_oper_port_stat + + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { + ad_port = &SLAVE_AD_INFO(slave)->port; +- if (ad_port->aggregator) ++ if (rcu_access_pointer(ad_port->aggregator)) + return sysfs_emit(buf, "%u\n", + ad_port->partner_oper.port_state); + } +--- a/include/net/bond_3ad.h ++++ b/include/net/bond_3ad.h +@@ -242,7 +242,7 @@ typedef struct port { + churn_state_t sm_churn_actor_state; + churn_state_t sm_churn_partner_state; + struct slave *slave; /* pointer to the bond slave that this port belongs to */ +- struct aggregator *aggregator; /* pointer to an aggregator that this port related to */ ++ struct aggregator __rcu *aggregator; /* pointer to an aggregator that this port related to */ + struct port *next_port_in_aggregator; /* Next port on the linked list of the parent aggregator */ + u32 transaction_id; /* continuous number for identification of Marker PDU's; */ + struct lacpdu lacpdu; /* the lacpdu that will be sent for this port */ diff --git a/queue-6.12/bonding-add-support-for-per-port-lacp-actor-priority.patch b/queue-6.12/bonding-add-support-for-per-port-lacp-actor-priority.patch new file mode 100644 index 0000000000..5c06efa79c --- /dev/null +++ b/queue-6.12/bonding-add-support-for-per-port-lacp-actor-priority.patch @@ -0,0 +1,215 @@ +From 3w3ExagcKBsQuzlo118qyyqvo.mywq1oqurvsx47py4xnk3syx.y1q@flex--kpberry.bounces.google.com Tue Jun 16 16:54:44 2026 +From: Kevin Berry +Date: Tue, 16 Jun 2026 15:54:26 +0000 +Subject: bonding: add support for per-port LACP actor priority +To: stable@vger.kernel.org +Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Hangbin Liu +Message-ID: <20260616155432.2093908-4-kpberry@google.com> + +From: Hangbin Liu + +[ Upstream commit 6b6dc81ee7e8ca87c71a533e1d69cf96a4f1e986 ] + +Introduce a new netlink attribute 'actor_port_prio' to allow setting +the LACP actor port priority on a per-slave basis. This extends the +existing bonding infrastructure to support more granular control over +LACP negotiations. + +The priority value is embedded in LACPDU packets and will be used by +subsequent patches to influence aggregator selection policies. + +Signed-off-by: Hangbin Liu +Link: https://patch.msgid.link/20250902064501.360822-2-liuhangbin@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/bonding.rst | 9 ++++++++ + drivers/net/bonding/bond_3ad.c | 4 +++ + drivers/net/bonding/bond_netlink.c | 16 +++++++++++++++ + drivers/net/bonding/bond_options.c | 36 +++++++++++++++++++++++++++++++++++ + include/net/bond_3ad.h | 1 + include/net/bond_options.h | 1 + include/uapi/linux/if_link.h | 1 + 7 files changed, 68 insertions(+) + +--- a/Documentation/networking/bonding.rst ++++ b/Documentation/networking/bonding.rst +@@ -193,6 +193,15 @@ ad_actor_sys_prio + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + ++actor_port_prio ++ ++ In an AD system, this specifies the port priority. The allowed range ++ is 1 - 65535. If the value is not specified, it takes 255 as the ++ default value. ++ ++ This parameter has effect only in 802.3ad mode and is available through ++ netlink interface. ++ + ad_actor_system + + In an AD system, this specifies the mac-address for the actor in +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -436,6 +436,7 @@ static void __ad_actor_update_port(struc + + port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; + port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; ++ port->actor_port_priority = SLAVE_AD_INFO(port->slave)->port_priority; + } + + /* Conversions */ +@@ -2195,6 +2196,9 @@ void bond_3ad_bind_slave(struct slave *s + + ad_initialize_port(port, &bond->params); + ++ /* Port priority is initialized. Update it to slave's ad info */ ++ SLAVE_AD_INFO(slave)->port_priority = port->actor_port_priority; ++ + port->slave = slave; + port->actor_port_number = SLAVE_AD_INFO(slave)->id; + /* key is determined according to the link speed, duplex and +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const + nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ + nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ ++ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */ + 0; + } + +@@ -77,6 +78,10 @@ static int bond_fill_slave_info(struct s + ad_port->partner_oper.port_state)) + goto nla_put_failure; + } ++ ++ if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, ++ SLAVE_AD_INFO(slave)->port_priority)) ++ goto nla_put_failure; + } + + return 0; +@@ -129,6 +134,7 @@ static const struct nla_policy bond_poli + static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { + [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, + [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, ++ [IFLA_BOND_SLAVE_ACTOR_PORT_PRIO] = { .type = NLA_U16 }, + }; + + static int bond_validate(struct nlattr *tb[], struct nlattr *data[], +@@ -178,6 +184,16 @@ static int bond_slave_changelink(struct + if (err) + return err; + } ++ ++ if (data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]) { ++ u16 ad_prio = nla_get_u16(data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]); ++ ++ bond_opt_slave_initval(&newval, &slave_dev, ad_prio); ++ err = __bond_opt_set(bond, BOND_OPT_ACTOR_PORT_PRIO, &newval, ++ data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO], extack); ++ if (err) ++ return err; ++ } + + return 0; + } +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -79,6 +79,8 @@ static int bond_option_tlb_dynamic_lb_se + const struct bond_opt_value *newval); + static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); ++static int bond_option_actor_port_prio_set(struct bonding *bond, ++ const struct bond_opt_value *newval); + static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval); + static int bond_option_ad_user_port_key_set(struct bonding *bond, +@@ -223,6 +225,13 @@ static const struct bond_opt_value bond_ + { NULL, -1, 0}, + }; + ++static const struct bond_opt_value bond_actor_port_prio_tbl[] = { ++ { "minval", 0, BOND_VALFLAG_MIN}, ++ { "maxval", 65535, BOND_VALFLAG_MAX}, ++ { "default", 255, BOND_VALFLAG_DEFAULT}, ++ { NULL, -1, 0}, ++}; ++ + static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { + { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, + { "maxval", 1023, BOND_VALFLAG_MAX}, +@@ -484,6 +493,13 @@ static const struct bond_option bond_opt + .values = bond_ad_actor_sys_prio_tbl, + .set = bond_option_ad_actor_sys_prio_set, + }, ++ [BOND_OPT_ACTOR_PORT_PRIO] = { ++ .id = BOND_OPT_ACTOR_PORT_PRIO, ++ .name = "actor_port_prio", ++ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), ++ .values = bond_actor_port_prio_tbl, ++ .set = bond_option_actor_port_prio_set, ++ }, + [BOND_OPT_AD_ACTOR_SYSTEM] = { + .id = BOND_OPT_AD_ACTOR_SYSTEM, + .name = "ad_actor_system", +@@ -1817,6 +1833,26 @@ static int bond_option_ad_actor_sys_prio + bond_3ad_update_ad_actor_settings(bond); + + return 0; ++} ++ ++static int bond_option_actor_port_prio_set(struct bonding *bond, ++ const struct bond_opt_value *newval) ++{ ++ struct slave *slave; ++ ++ slave = bond_slave_get_rtnl(newval->slave_dev); ++ if (!slave) { ++ netdev_dbg(bond->dev, "%s called on NULL slave\n", __func__); ++ return -ENODEV; ++ } ++ ++ netdev_dbg(newval->slave_dev, "Setting actor_port_prio to %llu\n", ++ newval->value); ++ ++ SLAVE_AD_INFO(slave)->port_priority = newval->value; ++ bond_3ad_update_ad_actor_settings(bond); ++ ++ return 0; + } + + static int bond_option_ad_actor_system_set(struct bonding *bond, +--- a/include/net/bond_3ad.h ++++ b/include/net/bond_3ad.h +@@ -274,6 +274,7 @@ struct ad_slave_info { + struct port port; /* 802.3ad port structure */ + struct bond_3ad_stats stats; + u16 id; ++ u16 port_priority; + }; + + static inline const char *bond_3ad_churn_desc(churn_state_t state) +--- a/include/net/bond_options.h ++++ b/include/net/bond_options.h +@@ -78,6 +78,7 @@ enum { + BOND_OPT_PRIO, + BOND_OPT_COUPLED_CONTROL, + BOND_OPT_BROADCAST_NEIGH, ++ BOND_OPT_ACTOR_PORT_PRIO, + BOND_OPT_LAST + }; + +--- a/include/uapi/linux/if_link.h ++++ b/include/uapi/linux/if_link.h +@@ -1551,6 +1551,7 @@ enum { + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, ++ IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, + __IFLA_BOND_SLAVE_MAX, + }; + diff --git a/queue-6.12/bonding-fix-null-pointer-dereference-in-actor_port_prio-setting.patch b/queue-6.12/bonding-fix-null-pointer-dereference-in-actor_port_prio-setting.patch new file mode 100644 index 0000000000..db5e76e65b --- /dev/null +++ b/queue-6.12/bonding-fix-null-pointer-dereference-in-actor_port_prio-setting.patch @@ -0,0 +1,61 @@ +From 3yXExagcKBso05ru77Ew44w1u.s42w7uw0x1y3ADv4A3tq9y43.47w@flex--kpberry.bounces.google.com Tue Jun 16 16:54:50 2026 +From: Kevin Berry +Date: Tue, 16 Jun 2026 15:54:30 +0000 +Subject: bonding: fix NULL pointer dereference in actor_port_prio setting +To: stable@vger.kernel.org +Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Hangbin Liu , Liang Li , Jakub Kicinski +Message-ID: <20260616155432.2093908-8-kpberry@google.com> + +From: Hangbin Liu + +[ Upstream commit 067bf016e99ad72aa4ff869d6dec1fd62a9c6202 ] + +Liang reported an issue where setting a slave’s actor_port_prio to +predefined values such as 0, 255, or 65535 would cause a system crash. + +The problem occurs because in bond_opt_parse(), when the provided value +matches a predefined table entry, the function returns that table entry, +which does not contain slave information. Later, in +bond_option_actor_port_prio_set(), calling bond_slave_get_rtnl() leads +to a NULL pointer dereference. + +Since actor_port_prio is defined as a u16 and initialized to the default +value of 255 in ad_initialize_port(), there is no need for the +bond_actor_port_prio_tbl. Using the BOND_OPTFLAG_RAWVAL flag is sufficient. + +Fixes: 6b6dc81ee7e8 ("bonding: add support for per-port LACP actor priority") +Reported-by: Liang Li +Signed-off-by: Hangbin Liu +Link: https://patch.msgid.link/20251105072620.164841-1-liuhangbin@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_options.c | 9 +-------- + 1 file changed, 1 insertion(+), 8 deletions(-) + +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -225,13 +225,6 @@ static const struct bond_opt_value bond_ + { NULL, -1, 0}, + }; + +-static const struct bond_opt_value bond_actor_port_prio_tbl[] = { +- { "minval", 0, BOND_VALFLAG_MIN}, +- { "maxval", 65535, BOND_VALFLAG_MAX}, +- { "default", 255, BOND_VALFLAG_DEFAULT}, +- { NULL, -1, 0}, +-}; +- + static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { + { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, + { "maxval", 1023, BOND_VALFLAG_MAX}, +@@ -497,7 +490,7 @@ static const struct bond_option bond_opt + .id = BOND_OPT_ACTOR_PORT_PRIO, + .name = "actor_port_prio", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), +- .values = bond_actor_port_prio_tbl, ++ .flags = BOND_OPTFLAG_RAWVAL, + .set = bond_option_actor_port_prio_set, + }, + [BOND_OPT_AD_ACTOR_SYSTEM] = { diff --git a/queue-6.12/bonding-print-churn-state-via-netlink.patch b/queue-6.12/bonding-print-churn-state-via-netlink.patch new file mode 100644 index 0000000000..a3d0905a00 --- /dev/null +++ b/queue-6.12/bonding-print-churn-state-via-netlink.patch @@ -0,0 +1,61 @@ +From 3xXExagcKBsYw1nq33As00sxq.o0ys3qswtxuz69r06zpm5u0z.03s@flex--kpberry.bounces.google.com Tue Jun 16 16:54:46 2026 +From: Kevin Berry +Date: Tue, 16 Jun 2026 15:54:27 +0000 +Subject: bonding: print churn state via netlink +To: stable@vger.kernel.org +Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Hangbin Liu +Message-ID: <20260616155432.2093908-5-kpberry@google.com> + +From: Hangbin Liu + +[ Upstream commit 4916f2e2f3fc9aef289fcd07949301e5c29094c2 ] + +Currently, the churn state is printed only in sysfs. Add netlink support +so users could get the state via netlink. + +Signed-off-by: Hangbin Liu +Link: https://patch.msgid.link/20260224020215.6012-1-liuhangbin@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_netlink.c | 9 +++++++++ + include/uapi/linux/if_link.h | 2 ++ + 2 files changed, 11 insertions(+) + +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -29,6 +29,8 @@ static size_t bond_get_slave_size(const + nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ + nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */ ++ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE */ ++ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE */ + 0; + } + +@@ -77,6 +79,13 @@ static int bond_fill_slave_info(struct s + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + ad_port->partner_oper.port_state)) + goto nla_put_failure; ++ ++ if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, ++ ad_port->sm_churn_actor_state)) ++ goto nla_put_failure; ++ if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, ++ ad_port->sm_churn_partner_state)) ++ goto nla_put_failure; + } + + if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, +--- a/include/uapi/linux/if_link.h ++++ b/include/uapi/linux/if_link.h +@@ -1552,6 +1552,8 @@ enum { + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, + IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, ++ IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, ++ IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, + __IFLA_BOND_SLAVE_MAX, + }; + diff --git a/queue-6.12/dlm-prevent-npd-when-writing-a-positive-value-to-event_done.patch b/queue-6.12/dlm-prevent-npd-when-writing-a-positive-value-to-event_done.patch new file mode 100644 index 0000000000..faf2694bcc --- /dev/null +++ b/queue-6.12/dlm-prevent-npd-when-writing-a-positive-value-to-event_done.patch @@ -0,0 +1,42 @@ +From 8e2bad543eca5c25cd02cbc63d72557934d45f13 Mon Sep 17 00:00:00 2001 +From: Thadeu Lima de Souza Cascardo +Date: Mon, 10 Feb 2025 13:16:22 -0600 +Subject: dlm: prevent NPD when writing a positive value to event_done + +From: Thadeu Lima de Souza Cascardo + +commit 8e2bad543eca5c25cd02cbc63d72557934d45f13 upstream. + +do_uevent returns the value written to event_done. In case it is a +positive value, new_lockspace would undo all the work, and lockspace +would not be set. __dlm_new_lockspace, however, would treat that +positive value as a success due to commit 8511a2728ab8 ("dlm: fix use +count with multiple joins"). + +Down the line, device_create_lockspace would pass that NULL lockspace to +dlm_find_lockspace_local, leading to a NULL pointer dereference. + +Treating such positive values as successes prevents the problem. Given +this has been broken for so long, this is unlikely to break userspace +expectations. + +Fixes: 8511a2728ab8 ("dlm: fix use count with multiple joins") +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: David Teigland +Signed-off-by: Nazar Kalashnikov +Signed-off-by: Greg Kroah-Hartman +--- + fs/dlm/lockspace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/dlm/lockspace.c ++++ b/fs/dlm/lockspace.c +@@ -576,7 +576,7 @@ static int new_lockspace(const char *nam + lockspace to start running (via sysfs) in dlm_ls_start(). */ + + error = do_uevent(ls, 1); +- if (error) ++ if (error < 0) + goto out_recoverd; + + /* wait until recovery is successful or failed */ diff --git a/queue-6.12/drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch b/queue-6.12/drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch new file mode 100644 index 0000000000..5cfdad3c97 --- /dev/null +++ b/queue-6.12/drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch @@ -0,0 +1,153 @@ +From stable+bounces-265062-greg=kroah.com@vger.kernel.org Tue Jun 16 18:13:39 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 13:01:22 -0400 +Subject: Drivers: hv: vmbus: Improve the logic of reserving fb_mmio on Gen2 VMs +To: stable@vger.kernel.org +Cc: Dexuan Cui , Michael Kelley , Krister Johansen , Matthew Ruffell , Wei Liu , Sasha Levin +Message-ID: <20260616170122.3368542-1-sashal@kernel.org> + +From: Dexuan Cui + +[ Upstream commit 016a25e4b0df4d77e7c258edee4aaf982e4ee809 ] + +If vmbus_reserve_fb() in the kdump/kexec kernel fails to properly reserve +the framebuffer MMIO range (which is below 4GB) due to a Gen2 VM's +screen.lfb_base being zero [1], there is an MMIO conflict between the +drivers hyperv-drm and pci-hyperv: when the driver pci-hyperv's +hv_allocate_config_window() calls vmbus_allocate_mmio() to get an +MMIO range, typically it gets a 32-bit MMIO range that overlaps with the +framebuffer MMIO range, and later hv_pci_enter_d0() fails with an +error message "PCI Pass-through VSP failed D0 Entry with status" since +the host thinks that PCI devices must not use MMIO space that the +host has assigned to the framebuffer. + +This is especially an issue if pci-hyperv is built-in and hyperv-drm is +built as a module. Consequently, the kdump/kexec kernel fails to detect +PCI devices via pci-hyperv, and may fail to mount the root file system, +which may reside in a NVMe disk. The issue described here has existed +for SR-IOV VF NICs since day one of the pci-hyperv driver, and has been +worked around on x64 when possible. With the recent introduction of +ARM64 VMs that boot from NVMe, there is no workaround, so we need a +formal fix. + +On Gen2 VMs, if the screen.lfb_base is 0 in the kdump/kexec kernel [1], +fall back to the low MMIO base, which should be equal to the framebuffer +MMIO base [2] (the statement is true according to my testing on x64 +Windows Server 2016, and on x64 and ARM64 Windows Server 2025 and on +Azure. I checked with the Hyper-V team and they said the statement should +continue to be true for Gen2 VMs). In the first kernel, screen.lfb_base +is not 0; if the user specifies a very high resolution, it's not enough +to only reserve 8MB: let's always reserve half of the space below 4GB, +but cap the reservation to 128MB, which is the required framebuffer size +of the highest resolution 7680*4320 supported by Hyper-V. + +While at it, fix the comparison "end > VTPM_BASE_ADDRESS" by changing +the > to >=. Here the 'end' is an inclusive end (typically, it's +0xFFFF_FFFF for the low MMIO range). + +Note: vmbus_reserve_fb() now also reserves an MMIO range at the beginning +of the low MMIO range on CVMs, which have no framebuffers (the +'screen.lfb_base' in vmbus_reserve_fb() is 0 for CVMs), just in case the +host might treat the beginning of the low MMIO range specially [3]. BTW, +the OpenHCL kernel is not affected by the change, because that kernel +boots with DeviceTree rather than ACPI (so vmbus_reserve_fb() won't run +there), and there is no framebuffer device for that kernel. + +Note: normally Gen1 VMs don't have the MMIO conflict issue because the +framebuffer MMIO range (which is hardcoded to base=4GB-128MB and +size=64MB for Gen1 VMs by the host) is always reported via the legacy PCI +graphics device's BAR, so the kdump/kexec kernel can reserve the 64MB +MMIO range; however, if the VM is configured to use a very high resolution +and the required framebuffer size exceeds 64MB (AFAIK, in practice, this +isn't a typical configuration by users), the hyperv-drm driver may need to +allocate an MMIO range above 4GB and change the framebuffer MMIO location +to the allocated MMIO range -- in this case, there can still be issues [4] +which can't be easily fixed: any possible affected Gen1 users would have +to use a resolution whose framebuffer size is <= 64MB, or switch to Gen2 +VMs. + +[1] https://lore.kernel.org/all/SA1PR21MB692176C1BC53BFC9EAE5CF8EBF51A@SA1PR21MB6921.namprd21.prod.outlook.com/ +[2] https://lore.kernel.org/all/SA1PR21MB69218F955B62DFF62E3E88D2BF222@SA1PR21MB6921.namprd21.prod.outlook.com/ +[3] https://lore.kernel.org/all/SN6PR02MB415726B17D5A6027CD1717E8D4342@SN6PR02MB4157.namprd02.prod.outlook.com/ +[4] https://lore.kernel.org/all/SA1PR21MB69213486F821CA5A2C793C81BF342@SA1PR21MB6921.namprd21.prod.outlook.com/ + +Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") +CC: stable@vger.kernel.org +Reviewed-by: Michael Kelley +Tested-by: Krister Johansen +Tested-by: Matthew Ruffell +Signed-off-by: Dexuan Cui +Signed-off-by: Wei Liu +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hv/vmbus_drv.c | 29 ++++++++++++++++++++++++++--- + 1 file changed, 26 insertions(+), 3 deletions(-) + +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -2205,8 +2205,8 @@ static acpi_status vmbus_walk_resources( + return AE_NO_MEMORY; + + /* If this range overlaps the virtual TPM, truncate it. */ +- if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) +- end = VTPM_BASE_ADDRESS; ++ if (end >= VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) ++ end = VTPM_BASE_ADDRESS - 1; + + new_res->name = "hyperv mmio"; + new_res->flags = IORESOURCE_MEM; +@@ -2273,6 +2273,7 @@ static void vmbus_mmio_remove(void) + static void __maybe_unused vmbus_reserve_fb(void) + { + resource_size_t start = 0, size; ++ resource_size_t low_mmio_base; + struct pci_dev *pdev; + + if (efi_enabled(EFI_BOOT)) { +@@ -2280,6 +2281,24 @@ static void __maybe_unused vmbus_reserve + if (IS_ENABLED(CONFIG_SYSFB)) { + start = screen_info.lfb_base; + size = max_t(__u32, screen_info.lfb_size, 0x800000); ++ ++ low_mmio_base = hyperv_mmio->start; ++ if (!low_mmio_base || upper_32_bits(low_mmio_base) || ++ (start && start < low_mmio_base)) { ++ pr_warn("Unexpected low mmio base %pa\n", &low_mmio_base); ++ } else { ++ /* ++ * If the kdump/kexec or CVM kernel's lfb_base ++ * is 0, fall back to the low mmio base. ++ */ ++ if (!start) ++ start = low_mmio_base; ++ /* ++ * Reserve half of the space below 4GB for high ++ * resolutions, but cap the reservation to 128MB. ++ */ ++ size = min((SZ_4G - start) / 2, SZ_128M); ++ } + } + } else { + /* Gen1 VM: get FB base from PCI */ +@@ -2300,8 +2319,10 @@ static void __maybe_unused vmbus_reserve + pci_dev_put(pdev); + } + +- if (!start) ++ if (!start) { ++ pr_warn("Unexpected framebuffer mmio base of zero\n"); + return; ++ } + + /* + * Make a claim for the frame buffer in the resource tree under the +@@ -2311,6 +2332,8 @@ static void __maybe_unused vmbus_reserve + */ + for (; !fb_mmio && (size >= 0x100000); size >>= 1) + fb_mmio = __request_region(hyperv_mmio, start, size, fb_mmio_name, 0); ++ ++ pr_info("hv_mmio=%pR,%pR fb=%pR\n", hyperv_mmio, hyperv_mmio->sibling, fb_mmio); + } + + /** diff --git a/queue-6.12/fhandle-fix-uaf-due-to-unlocked-mnt_ns-read-in-may_decode_fh.patch b/queue-6.12/fhandle-fix-uaf-due-to-unlocked-mnt_ns-read-in-may_decode_fh.patch new file mode 100644 index 0000000000..26e4c66978 --- /dev/null +++ b/queue-6.12/fhandle-fix-uaf-due-to-unlocked-mnt_ns-read-in-may_decode_fh.patch @@ -0,0 +1,153 @@ +From stable+bounces-264764-greg=kroah.com@vger.kernel.org Tue Jun 16 17:44:22 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 12:36:09 -0400 +Subject: fhandle: fix UAF due to unlocked ->mnt_ns read in may_decode_fh() +To: stable@vger.kernel.org +Cc: Jann Horn , "Christian Brauner (Amutable)" , Sasha Levin +Message-ID: <20260616163609.3352096-1-sashal@kernel.org> + +From: Jann Horn + +[ Upstream commit 40ab6644b99685755f740b872c00ef40d9aa870e ] + +may_decode_fh() accesses mount::mnt_ns without holding any locks; that +means the mount can concurrently be unmounted, and the mnt_namespace can +concurrently be freed after an RCU grace period. + +This race can happens as follows, assuming that the mount point was +created by open_tree(..., OPEN_TREE_CLONE): + +thread 1 thread 2 RCU + __do_sys_open_by_handle_at + do_handle_open + handle_to_path + may_decode_fh + is_mounted + [mount::mnt_ns access] + [mount::mnt_ns access] +__do_sys_close + fput_close_sync + __fput + dissolve_on_fput + umount_tree + class_namespace_excl_destructor + namespace_unlock + free_mnt_ns + mnt_ns_tree_remove + call_rcu(mnt_ns_release_rcu) + mnt_ns_release_rcu + mnt_ns_release + kfree + [mnt_namespace::user_ns access] **UAF** + +Fix it by taking rcu_read_lock() around the mount::mnt_ns access, like +in __prepend_path(). +Additionally, document the semantics of mount::mnt_ns, and use WRITE_ONCE() +for writers that can race with lockless readers. + +This bug is unreachable unless one of the following is set: + + - CONFIG_PREEMPTION + - CONFIG_RCU_STRICT_GRACE_PERIOD + +because it requires an RCU grace period to happen during a syscall without +an explicit preemption. + +This doesn't seem to have interesting security impact; worst-case, it could +leak the result of an integer comparison to userspace (from the level +check in cap_capable()), cause an endless loop, or crash the kernel by +dereferencing an invalid address. + +Fixes: 620c266f3949 ("fhandle: relax open_by_handle_at() permission checks") +Cc: stable@vger.kernel.org +Signed-off-by: Jann Horn +Link: https://patch.msgid.link/20260603-vfs-fhandle-uaf-fix-v2-1-d05db76a5084@google.com +Signed-off-by: Christian Brauner (Amutable) +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/fhandle.c | 16 ++++++++++++++-- + fs/mount.h | 10 +++++++++- + fs/namespace.c | 6 +++--- + 3 files changed, 26 insertions(+), 6 deletions(-) + +--- a/fs/fhandle.c ++++ b/fs/fhandle.c +@@ -242,6 +242,19 @@ static int do_handle_to_path(struct file + return 0; + } + ++static bool capable_wrt_mount(struct mount *mount) ++{ ++ struct mnt_namespace *mnt_ns; ++ ++ /* ++ * For ->mnt_ns access. ++ * The following READ_ONCE() is semantically rcu_dereference(). ++ */ ++ guard(rcu)(); ++ mnt_ns = READ_ONCE(mount->mnt_ns); ++ return ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN); ++} ++ + /* + * Allow relaxed permissions of file handles if the caller has the + * ability to mount the filesystem or create a bind-mount of the +@@ -273,8 +286,7 @@ static inline bool may_decode_fh(struct + if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) + ctx->flags = HANDLE_CHECK_PERMS; + else if (is_mounted(root->mnt) && +- ns_capable(real_mount(root->mnt)->mnt_ns->user_ns, +- CAP_SYS_ADMIN) && ++ capable_wrt_mount(real_mount(root->mnt)) && + !has_locked_children(real_mount(root->mnt), root->dentry)) + ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; + else +--- a/fs/mount.h ++++ b/fs/mount.h +@@ -58,7 +58,15 @@ struct mount { + struct list_head mnt_slave_list;/* list of slave mounts */ + struct list_head mnt_slave; /* slave list entry */ + struct mount *mnt_master; /* slave is on master->mnt_slave_list */ +- struct mnt_namespace *mnt_ns; /* containing namespace */ ++ /* ++ * Containing namespace (active or deactivating, non-refcounted). ++ * Normally protected by namespace_sem. ++ * Can also be accessed locklessly under RCU. RCU readers can't rely on ++ * the namespace still being active, but implicitly hold a passive ++ * reference (because an RCU delay happens between a namespace being ++ * deactivated and the corresponding passive refcount drop). ++ */ ++ struct mnt_namespace *mnt_ns; + struct mountpoint *mnt_mp; /* where is it mounted */ + union { + struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -1132,7 +1132,7 @@ static void mnt_add_to_ns(struct mnt_nam + struct rb_node *parent = NULL; + + WARN_ON(mnt_ns_attached(mnt)); +- mnt->mnt_ns = ns; ++ WRITE_ONCE(mnt->mnt_ns, ns); + while (*link) { + parent = *link; + if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) +@@ -1493,7 +1493,7 @@ EXPORT_SYMBOL(mntget); + void mnt_make_shortterm(struct vfsmount *mnt) + { + if (mnt) +- real_mount(mnt)->mnt_ns = NULL; ++ WRITE_ONCE(real_mount(mnt)->mnt_ns, NULL); + } + + /** +@@ -1805,7 +1805,7 @@ static void umount_tree(struct mount *mn + ns->nr_mounts--; + __touch_mnt_namespace(ns); + } +- p->mnt_ns = NULL; ++ WRITE_ONCE(p->mnt_ns, NULL); + if (how & UMOUNT_SYNC) + p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; + diff --git a/queue-6.12/fuse-re-lock-request-before-replacing-page-cache-folio.patch b/queue-6.12/fuse-re-lock-request-before-replacing-page-cache-folio.patch new file mode 100644 index 0000000000..120470bae8 --- /dev/null +++ b/queue-6.12/fuse-re-lock-request-before-replacing-page-cache-folio.patch @@ -0,0 +1,71 @@ +From stable+bounces-267936-greg=kroah.com@vger.kernel.org Tue Jun 23 13:05:39 2026 +From: Sasha Levin +Date: Tue, 23 Jun 2026 08:05:32 -0400 +Subject: fuse: re-lock request before replacing page cache folio +To: stable@vger.kernel.org +Cc: Joanne Koong , Lei Lu , Miklos Szeredi , Sasha Levin +Message-ID: <20260623120532.1152295-1-sashal@kernel.org> + +From: Joanne Koong + +[ Upstream commit a078484921052d0badd827fcc2770b5cfc1d4120 ] + +fuse_try_move_folio() unlocks the request on entry but does not +re-lock it on the success path. This means fuse_chan_abort() can end the +request and free the fuse_io_args (eg fuse_readpages_end()) while the +subsequent copy chain logic after fuse_try_move_folio() accesses the +fuse_io_args, leading to use-after-free issues. + +Fix this by calling lock_request() before replace_page_cache_folio(). +This ensures the request is locked on the success path which will +prevent the fuse_io_args from being freed while the later copying logic +runs, and also ensures that the ap->folios[i]->mapping is never null +since ap->folios[i] will always point to the newfolio after +replace_page_cache_folio(). + +Fixes: ce534fb05292 ("fuse: allow splice to move pages") +Cc: stable@vger.kernel.org +Reported-by: Lei Lu +Signed-off-by: Joanne Koong +Signed-off-by: Miklos Szeredi +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/dev.c | 19 +++++-------------- + 1 file changed, 5 insertions(+), 14 deletions(-) + +--- a/fs/fuse/dev.c ++++ b/fs/fuse/dev.c +@@ -891,6 +891,10 @@ static int fuse_try_move_page(struct fus + if (WARN_ON(folio_test_mlocked(oldfolio))) + goto out_fallback_unlock; + ++ err = lock_request(cs->req); ++ if (err) ++ goto out_fallback_unlock; ++ + replace_page_cache_folio(oldfolio, newfolio); + + folio_get(newfolio); +@@ -904,20 +908,7 @@ static int fuse_try_move_page(struct fus + */ + pipe_buf_release(cs->pipe, buf); + +- err = 0; +- spin_lock(&cs->req->waitq.lock); +- if (test_bit(FR_ABORTED, &cs->req->flags)) +- err = -ENOENT; +- else +- *pagep = &newfolio->page; +- spin_unlock(&cs->req->waitq.lock); +- +- if (err) { +- folio_unlock(newfolio); +- folio_put(newfolio); +- goto out_put_old; +- } +- ++ *pagep = &newfolio->page; + folio_unlock(oldfolio); + /* Drop ref for ap->pages[] array */ + folio_put(oldfolio); diff --git a/queue-6.12/futex-requeue-prevent-null-pointer-dereference-in-remove_waiter-on-self-deadlock.patch b/queue-6.12/futex-requeue-prevent-null-pointer-dereference-in-remove_waiter-on-self-deadlock.patch new file mode 100644 index 0000000000..2ca1ebb5db --- /dev/null +++ b/queue-6.12/futex-requeue-prevent-null-pointer-dereference-in-remove_waiter-on-self-deadlock.patch @@ -0,0 +1,48 @@ +From stable+bounces-266506-greg=kroah.com@vger.kernel.org Tue Jun 16 20:06:16 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 15:05:55 -0400 +Subject: futex/requeue: Prevent NULL pointer dereference in remove_waiter() on self-deadlock +To: stable@vger.kernel.org +Cc: Ji'an Zhou , Thomas Gleixner , Sasha Levin +Message-ID: <20260616190556.3487341-1-sashal@kernel.org> + +From: Ji'an Zhou + +[ Upstream commit 74e144274af39935b0f410c0ee4d2b91c3730414 ] + +When FUTEX_CMP_REQUEUE_PI requeues a non-top waiter that already owns the +target PI futex, task_blocks_on_rt_mutex() returns -EDEADLK before setting +waiter->task. + +The subsequent remove_waiter() in rt_mutex_start_proxy_lock() dereferences +the NULL waiter->task, causing a kernel crash. + +Add a self-deadlock check for non-top waiters before calling +rt_mutex_start_proxy_lock(), analogous to the top-waiter check in +futex_lock_pi_atomic(). + +Fixes: 3bfdc63936dd4773109b7b8c280c0f3b5ae7d349 ("rtmutex: Use waiter::task instead of current in remove_waiter()") +Signed-off-by: Ji'an Zhou +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex/requeue.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/kernel/futex/requeue.c ++++ b/kernel/futex/requeue.c +@@ -633,6 +633,12 @@ retry_private: + continue; + } + ++ /* Self-deadlock: non-top waiter already owns the PI futex. */ ++ if (rt_mutex_owner(&pi_state->pi_mutex) == this->task) { ++ ret = -EDEADLK; ++ break; ++ } ++ + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, + this->rt_waiter, + this->task); diff --git a/queue-6.12/hv-utils-handle-and-propagate-errors-in-kvp_register.patch b/queue-6.12/hv-utils-handle-and-propagate-errors-in-kvp_register.patch new file mode 100644 index 0000000000..271419fa31 --- /dev/null +++ b/queue-6.12/hv-utils-handle-and-propagate-errors-in-kvp_register.patch @@ -0,0 +1,89 @@ +From stable+bounces-265063-greg=kroah.com@vger.kernel.org Tue Jun 16 18:13:34 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 13:01:25 -0400 +Subject: hv: utils: handle and propagate errors in kvp_register +To: stable@vger.kernel.org +Cc: Thorsten Blum , Long Li , Wei Liu , Sasha Levin +Message-ID: <20260616170125.3368588-1-sashal@kernel.org> + +From: Thorsten Blum + +[ Upstream commit 3fcf923302a8f5c0dc3af3d2ca2657cb5fae4297 ] + +Make kvp_register() return an error code instead of silently ignoring +failures, and propagate the error from kvp_handle_handshake() instead of +returning success. + +This propagates both kzalloc_obj() and hvutil_transport_send() failures +to kvp_handle_handshake() and thus to kvp_on_msg(). + +Fixes: 245ba56a52a3 ("Staging: hv: Implement key/value pair (KVP)") +Cc: stable@vger.kernel.org +Signed-off-by: Thorsten Blum +Reviewed-by: Long Li +Signed-off-by: Wei Liu +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hv/hv_kvp.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -93,7 +93,7 @@ static void kvp_send_key(struct work_str + static void kvp_respond_to_host(struct hv_kvp_msg *msg, int error); + static void kvp_timeout_func(struct work_struct *dummy); + static void kvp_host_handshake_func(struct work_struct *dummy); +-static void kvp_register(int); ++static int kvp_register(int); + + static DECLARE_DELAYED_WORK(kvp_timeout_work, kvp_timeout_func); + static DECLARE_DELAYED_WORK(kvp_host_handshake_work, kvp_host_handshake_func); +@@ -127,24 +127,26 @@ static void kvp_register_done(void) + hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); + } + +-static void ++static int + kvp_register(int reg_value) + { + + struct hv_kvp_msg *kvp_msg; + char *version; ++ int ret; + + kvp_msg = kzalloc(sizeof(*kvp_msg), GFP_KERNEL); ++ if (!kvp_msg) ++ return -ENOMEM; + +- if (kvp_msg) { +- version = kvp_msg->body.kvp_register.version; +- kvp_msg->kvp_hdr.operation = reg_value; +- strcpy(version, HV_DRV_VERSION); +- +- hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg), +- kvp_register_done); +- kfree(kvp_msg); +- } ++ version = kvp_msg->body.kvp_register.version; ++ kvp_msg->kvp_hdr.operation = reg_value; ++ strcpy(version, HV_DRV_VERSION); ++ ++ ret = hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg), ++ kvp_register_done); ++ kfree(kvp_msg); ++ return ret; + } + + static void kvp_timeout_func(struct work_struct *dummy) +@@ -186,9 +188,8 @@ static int kvp_handle_handshake(struct h + */ + pr_debug("KVP: userspace daemon ver. %d connected\n", + msg->kvp_hdr.operation); +- kvp_register(dm_reg_value); + +- return 0; ++ return kvp_register(dm_reg_value); + } + + diff --git a/queue-6.12/locking-mutex-remove-wakeups-from-under-mutex-wait_lock.patch b/queue-6.12/locking-mutex-remove-wakeups-from-under-mutex-wait_lock.patch new file mode 100644 index 0000000000..50e460faad --- /dev/null +++ b/queue-6.12/locking-mutex-remove-wakeups-from-under-mutex-wait_lock.patch @@ -0,0 +1,550 @@ +From stable+bounces-266509-greg=kroah.com@vger.kernel.org Tue Jun 16 20:06:10 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 15:06:00 -0400 +Subject: locking/mutex: Remove wakeups from under mutex::wait_lock +To: stable@vger.kernel.org +Cc: Peter Zijlstra , Juri Lelli , John Stultz , Metin Kaya , Davidlohr Bueso , K Prateek Nayak , Sasha Levin +Message-ID: <20260616190601.3487860-1-sashal@kernel.org> + +From: Peter Zijlstra + +[ Upstream commit 894d1b3db41cf7e6ae0304429a1747b3c3f390bc ] + +In preparation to nest mutex::wait_lock under rq::lock we need +to remove wakeups from under it. + +Do this by utilizing wake_qs to defer the wakeup until after the +lock is dropped. + +[Heavily changed after 55f036ca7e74 ("locking: WW mutex cleanup") and +08295b3b5bee ("locking: Implement an algorithm choice for Wound-Wait +mutexes")] +[jstultz: rebased to mainline, added extra wake_up_q & init + to avoid hangs, similar to Connor's rework of this patch] + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Juri Lelli +Signed-off-by: John Stultz +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Metin Kaya +Acked-by: Davidlohr Bueso +Tested-by: K Prateek Nayak +Tested-by: Metin Kaya +Link: https://lore.kernel.org/r/20241009235352.1614323-2-jstultz@google.com +Stable-dep-of: 40a25d59e85b ("locking/rtmutex: Skip remove_waiter() when waiter is not enqueued") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex/pi.c | 6 +++- + kernel/locking/mutex.c | 16 +++++++++--- + kernel/locking/rtmutex.c | 51 +++++++++++++++++++++++++++++----------- + kernel/locking/rtmutex_api.c | 12 +++++++-- + kernel/locking/rtmutex_common.h | 3 +- + kernel/locking/rwbase_rt.c | 8 +++++- + kernel/locking/rwsem.c | 4 +-- + kernel/locking/spinlock_rt.c | 5 ++- + kernel/locking/ww_mutex.h | 30 ++++++++++++++--------- + 9 files changed, 96 insertions(+), 39 deletions(-) + +--- a/kernel/futex/pi.c ++++ b/kernel/futex/pi.c +@@ -922,6 +922,7 @@ int futex_lock_pi(u32 __user *uaddr, uns + struct rt_mutex_waiter rt_waiter; + struct futex_hash_bucket *hb; + struct futex_q q = futex_q_init; ++ DEFINE_WAKE_Q(wake_q); + int res, ret; + + if (!IS_ENABLED(CONFIG_FUTEX_PI)) +@@ -1019,8 +1020,11 @@ retry_private: + * such that futex_unlock_pi() is guaranteed to observe the waiter when + * it sees the futex_q::pi_state. + */ +- ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); ++ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q); ++ preempt_disable(); + raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); ++ wake_up_q(&wake_q); ++ preempt_enable(); + + if (ret) { + if (ret == 1) +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -575,6 +575,7 @@ __mutex_lock_common(struct mutex *lock, + struct lockdep_map *nest_lock, unsigned long ip, + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) + { ++ DEFINE_WAKE_Q(wake_q); + struct mutex_waiter waiter; + struct ww_mutex *ww; + int ret; +@@ -625,7 +626,7 @@ __mutex_lock_common(struct mutex *lock, + */ + if (__mutex_trylock(lock)) { + if (ww_ctx) +- __ww_mutex_check_waiters(lock, ww_ctx); ++ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q); + + goto skip_wait; + } +@@ -645,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, + * Add in stamp order, waking up waiters that must kill + * themselves. + */ +- ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); ++ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q); + if (ret) + goto err_early_kill; + } +@@ -681,6 +682,10 @@ __mutex_lock_common(struct mutex *lock, + } + + raw_spin_unlock(&lock->wait_lock); ++ /* Make sure we do wakeups before calling schedule */ ++ wake_up_q(&wake_q); ++ wake_q_init(&wake_q); ++ + schedule_preempt_disabled(); + + first = __mutex_waiter_is_first(lock, &waiter); +@@ -714,7 +719,7 @@ acquired: + */ + if (!ww_ctx->is_wait_die && + !__mutex_waiter_is_first(lock, &waiter)) +- __ww_mutex_check_waiters(lock, ww_ctx); ++ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q); + } + + __mutex_remove_waiter(lock, &waiter); +@@ -730,6 +735,7 @@ skip_wait: + ww_mutex_lock_acquired(ww, ww_ctx); + + raw_spin_unlock(&lock->wait_lock); ++ wake_up_q(&wake_q); + preempt_enable(); + return 0; + +@@ -741,6 +747,7 @@ err_early_kill: + raw_spin_unlock(&lock->wait_lock); + debug_mutex_free_waiter(&waiter); + mutex_release(&lock->dep_map, ip); ++ wake_up_q(&wake_q); + preempt_enable(); + return ret; + } +@@ -951,9 +958,10 @@ static noinline void __sched __mutex_unl + if (owner & MUTEX_FLAG_HANDOFF) + __mutex_handoff(lock, next); + ++ preempt_disable(); + raw_spin_unlock(&lock->wait_lock); +- + wake_up_q(&wake_q); ++ preempt_enable(); + } + + #ifndef CONFIG_DEBUG_LOCK_ALLOC +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -34,13 +34,15 @@ + + static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, + struct rt_mutex *lock, +- struct ww_acquire_ctx *ww_ctx) ++ struct ww_acquire_ctx *ww_ctx, ++ struct wake_q_head *wake_q) + { + return 0; + } + + static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, +- struct ww_acquire_ctx *ww_ctx) ++ struct ww_acquire_ctx *ww_ctx, ++ struct wake_q_head *wake_q) + { + } + +@@ -1201,7 +1203,8 @@ static int __sched task_blocks_on_rt_mut + struct rt_mutex_waiter *waiter, + struct task_struct *task, + struct ww_acquire_ctx *ww_ctx, +- enum rtmutex_chainwalk chwalk) ++ enum rtmutex_chainwalk chwalk, ++ struct wake_q_head *wake_q) + { + struct task_struct *owner = rt_mutex_owner(lock); + struct rt_mutex_waiter *top_waiter = waiter; +@@ -1245,7 +1248,10 @@ static int __sched task_blocks_on_rt_mut + + /* Check whether the waiter should back out immediately */ + rtm = container_of(lock, struct rt_mutex, rtmutex); +- res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); ++ preempt_disable(); ++ res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q); ++ wake_up_q(wake_q); ++ preempt_enable(); + if (res) { + raw_spin_lock(&task->pi_lock); + rt_mutex_dequeue(lock, waiter); +@@ -1677,12 +1683,14 @@ static void __sched rt_mutex_handle_dead + * @state: The task state for sleeping + * @chwalk: Indicator whether full or partial chainwalk is requested + * @waiter: Initializer waiter for blocking ++ * @wake_q: The wake_q to wake tasks after we release the wait_lock + */ + static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, + struct ww_acquire_ctx *ww_ctx, + unsigned int state, + enum rtmutex_chainwalk chwalk, +- struct rt_mutex_waiter *waiter) ++ struct rt_mutex_waiter *waiter, ++ struct wake_q_head *wake_q) + { + struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); + struct ww_mutex *ww = ww_container_of(rtm); +@@ -1693,7 +1701,7 @@ static int __sched __rt_mutex_slowlock(s + /* Try to acquire the lock again: */ + if (try_to_take_rt_mutex(lock, current, NULL)) { + if (build_ww_mutex() && ww_ctx) { +- __ww_mutex_check_waiters(rtm, ww_ctx); ++ __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); + ww_mutex_lock_acquired(ww, ww_ctx); + } + return 0; +@@ -1703,7 +1711,7 @@ static int __sched __rt_mutex_slowlock(s + + trace_contention_begin(lock, LCB_F_RT); + +- ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); ++ ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q); + if (likely(!ret)) + ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); + +@@ -1711,7 +1719,7 @@ static int __sched __rt_mutex_slowlock(s + /* acquired the lock */ + if (build_ww_mutex() && ww_ctx) { + if (!ww_ctx->is_wait_die) +- __ww_mutex_check_waiters(rtm, ww_ctx); ++ __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); + ww_mutex_lock_acquired(ww, ww_ctx); + } + } else { +@@ -1733,7 +1741,8 @@ static int __sched __rt_mutex_slowlock(s + + static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, + struct ww_acquire_ctx *ww_ctx, +- unsigned int state) ++ unsigned int state, ++ struct wake_q_head *wake_q) + { + struct rt_mutex_waiter waiter; + int ret; +@@ -1742,7 +1751,7 @@ static inline int __rt_mutex_slowlock_lo + waiter.ww_ctx = ww_ctx; + + ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, +- &waiter); ++ &waiter, wake_q); + + debug_rt_mutex_free_waiter(&waiter); + return ret; +@@ -1758,6 +1767,7 @@ static int __sched rt_mutex_slowlock(str + struct ww_acquire_ctx *ww_ctx, + unsigned int state) + { ++ DEFINE_WAKE_Q(wake_q); + unsigned long flags; + int ret; + +@@ -1779,8 +1789,11 @@ static int __sched rt_mutex_slowlock(str + * irqsave/restore variants. + */ + raw_spin_lock_irqsave(&lock->wait_lock, flags); +- ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); ++ ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q); ++ preempt_disable(); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ wake_up_q(&wake_q); ++ preempt_enable(); + rt_mutex_post_schedule(); + + return ret; +@@ -1806,8 +1819,10 @@ static __always_inline int __rt_mutex_lo + /** + * rtlock_slowlock_locked - Slow path lock acquisition for RT locks + * @lock: The underlying RT mutex ++ * @wake_q: The wake_q to wake tasks after we release the wait_lock + */ +-static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) ++static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock, ++ struct wake_q_head *wake_q) + { + struct rt_mutex_waiter waiter; + struct task_struct *owner; +@@ -1824,7 +1839,7 @@ static void __sched rtlock_slowlock_lock + + trace_contention_begin(lock, LCB_F_RT); + +- task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); ++ task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q); + + for (;;) { + /* Try to acquire the lock again */ +@@ -1835,7 +1850,11 @@ static void __sched rtlock_slowlock_lock + owner = rt_mutex_owner(lock); + else + owner = NULL; ++ preempt_disable(); + raw_spin_unlock_irq(&lock->wait_lock); ++ wake_up_q(wake_q); ++ wake_q_init(wake_q); ++ preempt_enable(); + + if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) + schedule_rtlock(); +@@ -1860,10 +1879,14 @@ static void __sched rtlock_slowlock_lock + static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) + { + unsigned long flags; ++ DEFINE_WAKE_Q(wake_q); + + raw_spin_lock_irqsave(&lock->wait_lock, flags); +- rtlock_slowlock_locked(lock); ++ rtlock_slowlock_locked(lock, &wake_q); ++ preempt_disable(); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ wake_up_q(&wake_q); ++ preempt_enable(); + } + + #endif /* RT_MUTEX_BUILD_SPINLOCKS */ +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -275,6 +275,7 @@ void __sched rt_mutex_proxy_unlock(struc + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare ++ * @wake_q: the wake_q to wake tasks after we release the wait_lock + * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. +@@ -291,7 +292,8 @@ void __sched rt_mutex_proxy_unlock(struc + */ + int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, + struct rt_mutex_waiter *waiter, +- struct task_struct *task) ++ struct task_struct *task, ++ struct wake_q_head *wake_q) + { + int ret; + +@@ -302,7 +304,7 @@ int __sched __rt_mutex_start_proxy_lock( + + /* We enforce deadlock detection for futexes */ + ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL, +- RT_MUTEX_FULL_CHAINWALK); ++ RT_MUTEX_FULL_CHAINWALK, wake_q); + + if (ret && !rt_mutex_owner(lock)) { + /* +@@ -341,12 +343,16 @@ int __sched rt_mutex_start_proxy_lock(st + struct task_struct *task) + { + int ret; ++ DEFINE_WAKE_Q(wake_q); + + raw_spin_lock_irq(&lock->wait_lock); +- ret = __rt_mutex_start_proxy_lock(lock, waiter, task); ++ ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q); + if (unlikely(ret)) + remove_waiter(lock, waiter); ++ preempt_disable(); + raw_spin_unlock_irq(&lock->wait_lock); ++ wake_up_q(&wake_q); ++ preempt_enable(); + + return ret; + } +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -83,7 +83,8 @@ extern void rt_mutex_init_proxy_locked(s + extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock); + extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, + struct rt_mutex_waiter *waiter, +- struct task_struct *task); ++ struct task_struct *task, ++ struct wake_q_head *); + extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task); +--- a/kernel/locking/rwbase_rt.c ++++ b/kernel/locking/rwbase_rt.c +@@ -69,6 +69,7 @@ static int __sched __rwbase_read_lock(st + unsigned int state) + { + struct rt_mutex_base *rtm = &rwb->rtmutex; ++ DEFINE_WAKE_Q(wake_q); + int ret; + + rwbase_pre_schedule(); +@@ -110,7 +111,7 @@ static int __sched __rwbase_read_lock(st + * For rwlocks this returns 0 unconditionally, so the below + * !ret conditionals are optimized out. + */ +- ret = rwbase_rtmutex_slowlock_locked(rtm, state); ++ ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q); + + /* + * On success the rtmutex is held, so there can't be a writer +@@ -121,7 +122,12 @@ static int __sched __rwbase_read_lock(st + */ + if (!ret) + atomic_inc(&rwb->readers); ++ ++ preempt_disable(); + raw_spin_unlock_irq(&rtm->wait_lock); ++ wake_up_q(&wake_q); ++ preempt_enable(); ++ + if (!ret) + rwbase_rtmutex_unlock(rtm); + +--- a/kernel/locking/rwsem.c ++++ b/kernel/locking/rwsem.c +@@ -1413,8 +1413,8 @@ static inline void __downgrade_write(str + #define rwbase_rtmutex_lock_state(rtm, state) \ + __rt_mutex_lock(rtm, state) + +-#define rwbase_rtmutex_slowlock_locked(rtm, state) \ +- __rt_mutex_slowlock_locked(rtm, NULL, state) ++#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \ ++ __rt_mutex_slowlock_locked(rtm, NULL, state, wq) + + #define rwbase_rtmutex_unlock(rtm) \ + __rt_mutex_unlock(rtm) +--- a/kernel/locking/spinlock_rt.c ++++ b/kernel/locking/spinlock_rt.c +@@ -162,9 +162,10 @@ rwbase_rtmutex_lock_state(struct rt_mute + } + + static __always_inline int +-rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state) ++rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state, ++ struct wake_q_head *wake_q) + { +- rtlock_slowlock_locked(rtm); ++ rtlock_slowlock_locked(rtm, wake_q); + return 0; + } + +--- a/kernel/locking/ww_mutex.h ++++ b/kernel/locking/ww_mutex.h +@@ -275,7 +275,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, + */ + static bool + __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, +- struct ww_acquire_ctx *ww_ctx) ++ struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q) + { + if (!ww_ctx->is_wait_die) + return false; +@@ -284,7 +284,7 @@ __ww_mutex_die(struct MUTEX *lock, struc + #ifndef WW_RT + debug_mutex_wake_waiter(lock, waiter); + #endif +- wake_up_process(waiter->task); ++ wake_q_add(wake_q, waiter->task); + } + + return true; +@@ -299,7 +299,8 @@ __ww_mutex_die(struct MUTEX *lock, struc + */ + static bool __ww_mutex_wound(struct MUTEX *lock, + struct ww_acquire_ctx *ww_ctx, +- struct ww_acquire_ctx *hold_ctx) ++ struct ww_acquire_ctx *hold_ctx, ++ struct wake_q_head *wake_q) + { + struct task_struct *owner = __ww_mutex_owner(lock); + +@@ -331,7 +332,7 @@ static bool __ww_mutex_wound(struct MUTE + * wakeup pending to re-read the wounded state. + */ + if (owner != current) +- wake_up_process(owner); ++ wake_q_add(wake_q, owner); + + return true; + } +@@ -352,7 +353,8 @@ static bool __ww_mutex_wound(struct MUTE + * The current task must not be on the wait list. + */ + static void +-__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) ++__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, ++ struct wake_q_head *wake_q) + { + struct MUTEX_WAITER *cur; + +@@ -364,8 +366,8 @@ __ww_mutex_check_waiters(struct MUTEX *l + if (!cur->ww_ctx) + continue; + +- if (__ww_mutex_die(lock, cur, ww_ctx) || +- __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) ++ if (__ww_mutex_die(lock, cur, ww_ctx, wake_q) || ++ __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx, wake_q)) + break; + } + } +@@ -377,6 +379,8 @@ __ww_mutex_check_waiters(struct MUTEX *l + static __always_inline void + ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + { ++ DEFINE_WAKE_Q(wake_q); ++ + ww_mutex_lock_acquired(lock, ctx); + + /* +@@ -405,8 +409,11 @@ ww_mutex_set_context_fastpath(struct ww_ + * die or wound us. + */ + lock_wait_lock(&lock->base); +- __ww_mutex_check_waiters(&lock->base, ctx); ++ __ww_mutex_check_waiters(&lock->base, ctx, &wake_q); ++ preempt_disable(); + unlock_wait_lock(&lock->base); ++ wake_up_q(&wake_q); ++ preempt_enable(); + } + + static __always_inline int +@@ -488,7 +495,8 @@ __ww_mutex_check_kill(struct MUTEX *lock + static inline int + __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, + struct MUTEX *lock, +- struct ww_acquire_ctx *ww_ctx) ++ struct ww_acquire_ctx *ww_ctx, ++ struct wake_q_head *wake_q) + { + struct MUTEX_WAITER *cur, *pos = NULL; + bool is_wait_die; +@@ -532,7 +540,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITE + pos = cur; + + /* Wait-Die: ensure younger waiters die. */ +- __ww_mutex_die(lock, cur, ww_ctx); ++ __ww_mutex_die(lock, cur, ww_ctx, wake_q); + } + + __ww_waiter_add(lock, waiter, pos); +@@ -550,7 +558,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITE + * such that either we or the fastpath will wound @ww->ctx. + */ + smp_mb(); +- __ww_mutex_wound(lock, ww_ctx, ww->ctx); ++ __ww_mutex_wound(lock, ww_ctx, ww->ctx, wake_q); + } + + return 0; diff --git a/queue-6.12/locking-rtmutex-skip-remove_waiter-when-waiter-is-not-enqueued.patch b/queue-6.12/locking-rtmutex-skip-remove_waiter-when-waiter-is-not-enqueued.patch new file mode 100644 index 0000000000..85b245b55d --- /dev/null +++ b/queue-6.12/locking-rtmutex-skip-remove_waiter-when-waiter-is-not-enqueued.patch @@ -0,0 +1,69 @@ +From stable+bounces-266508-greg=kroah.com@vger.kernel.org Tue Jun 16 20:06:09 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 15:06:01 -0400 +Subject: locking/rtmutex: Skip remove_waiter() when waiter is not enqueued +To: stable@vger.kernel.org +Cc: Davidlohr Bueso , syzbot+78147abe6c524f183ee9@syzkaller.appspotmail.com, Thomas Gleixner , Sasha Levin +Message-ID: <20260616190601.3487860-2-sashal@kernel.org> + +From: Davidlohr Bueso + +[ Upstream commit 40a25d59e85b3c8709ac2424d44f65610467871e ] + +syzbot triggered the following splat in remove_waiter() via +FUTEX_CMP_REQUEUE_PI: + + KASAN: null-ptr-deref in range [0x0000000000000a88-0x0000000000000a8f] + class_raw_spinlock_constructor + remove_waiter+0x159/0x1200 kernel/locking/rtmutex.c:1561 + rt_mutex_start_proxy_lock+0x103/0x120 + futex_requeue+0x10e4/0x20d0 + __x64_sys_futex+0x34f/0x4d0 + +task_blocks_on_rt_mutex() does not arm the waiter upon deadlock detection, +leaving waiter->task nil, where 3bfdc63936dd ("rtmutex: Use waiter::task instead +of current in remove_waiter()") made this fatal. + +Furthermore, rt_mutex_start_proxy_lock() should not be calling into remove_waiter() +upon a successfully grabbing the rtmutex. 1a1fb985f2e2 ("futex: Handle early deadlock +return correctly"), moved the remove_waiter() out of __rt_mutex_start_proxy_lock() +(where 'ret' was only ever 0 or < 0) into the wrapper. Tighten this check to +account for try_to_take_rt_mutex(). + +Fixes: 3bfdc63936dd ("rtmutex: Use waiter::task instead of current in remove_waiter()") +Reported-by: syzbot+78147abe6c524f183ee9@syzkaller.appspotmail.com +Signed-off-by: Davidlohr Bueso +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Closes: https://lore.kernel.org/all/69f114ac.050a0220.ac8b.0003.GAE@google.com/ +Link: https://patch.msgid.link/20260507112913.1019537-1-dave@stgolabs.net +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/locking/rtmutex.c | 3 +++ + kernel/locking/rtmutex_api.c | 2 +- + 2 files changed, 4 insertions(+), 1 deletion(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1550,6 +1550,9 @@ static void __sched remove_waiter(struct + + lockdep_assert_held(&lock->wait_lock); + ++ if (!waiter_task) /* never enqueued */ ++ return; ++ + scoped_guard(raw_spinlock, &waiter_task->pi_lock) { + rt_mutex_dequeue(lock, waiter); + waiter_task->pi_blocked_on = NULL; +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -347,7 +347,7 @@ int __sched rt_mutex_start_proxy_lock(st + + raw_spin_lock_irq(&lock->wait_lock); + ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q); +- if (unlikely(ret)) ++ if (unlikely(ret < 0)) + remove_waiter(lock, waiter); + preempt_disable(); + raw_spin_unlock_irq(&lock->wait_lock); diff --git a/queue-6.12/net-bonding-add-broadcast_neighbor-option-for-802.3ad.patch b/queue-6.12/net-bonding-add-broadcast_neighbor-option-for-802.3ad.patch new file mode 100644 index 0000000000..0b33f9641e --- /dev/null +++ b/queue-6.12/net-bonding-add-broadcast_neighbor-option-for-802.3ad.patch @@ -0,0 +1,332 @@ +From 3wnExagcKBsMtykn007pxxpun.lxvp0nptqurw36ox3wmj2rxw.x0p@flex--kpberry.bounces.google.com Tue Jun 16 16:54:43 2026 +From: Kevin Berry +Date: Tue, 16 Jun 2026 15:54:25 +0000 +Subject: net: bonding: add broadcast_neighbor option for 802.3ad +To: stable@vger.kernel.org +Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Tonghao Zhang , Jay Vosburgh , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Simon Horman , Jonathan Corbet , Andrew Lunn , Steven Rostedt , Masami Hiramatsu , Mathieu Desnoyers , Nikolay Aleksandrov , Zengbing Tu +Message-ID: <20260616155432.2093908-3-kpberry@google.com> + +From: Tonghao Zhang + +[ Upstream commit ce7a381697cb3958ffe0b45e5028ac69444e9288 ] + +Stacking technology is a type of technology used to expand ports on +Ethernet switches. It is widely used as a common access method in +large-scale Internet data center architectures. Years of practice +have proved that stacking technology has advantages and disadvantages +in high-reliability network architecture scenarios. For instance, +in stacking networking arch, conventional switch system upgrades +require multiple stacked devices to restart at the same time. +Therefore, it is inevitable that the business will be interrupted +for a while. It is for this reason that "no-stacking" in data centers +has become a trend. Additionally, when the stacking link connecting +the switches fails or is abnormal, the stack will split. Although it is +not common, it still happens in actual operation. The problem is that +after the split, it is equivalent to two switches with the same +configuration appearing in the network, causing network configuration +conflicts and ultimately interrupting the services carried by the +stacking system. + +To improve network stability, "non-stacking" solutions have been +increasingly adopted, particularly by public cloud providers and +tech companies like Alibaba, Tencent, and Didi. "non-stacking" is +a method of mimicing switch stacking that convinces a LACP peer, +bonding in this case, connected to a set of "non-stacked" switches +that all of its ports are connected to a single switch +(i.e., LACP aggregator), as if those switches were stacked. This +enables the LACP peer's ports to aggregate together, and requires +(a) special switch configuration, described in the linked article, +and (b) modifications to the bonding 802.3ad (LACP) mode to send +all ARP/ND packets across all ports of the active aggregator. + +Note that, with multiple aggregators, the current broadcast mode +logic will send only packets to the selected aggregator(s). + + +-----------+ +-----------+ + | switch1 | | switch2 | + +-----------+ +-----------+ + ^ ^ + | | + +-----------------+ + | bond4 lacp | + +-----------------+ + | | + | NIC1 | NIC2 + +-----------------+ + | server | + +-----------------+ + +- https://www.ruijie.com/fr-fr/support/tech-gallery/de-stack-data-center-network-architecture/ + +Cc: Jay Vosburgh +Cc: "David S. Miller" +Cc: Eric Dumazet +Cc: Jakub Kicinski +Cc: Paolo Abeni +Cc: Simon Horman +Cc: Jonathan Corbet +Cc: Andrew Lunn +Cc: Steven Rostedt +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Nikolay Aleksandrov +Signed-off-by: Tonghao Zhang +Signed-off-by: Zengbing Tu +Link: https://patch.msgid.link/84d0a044514157bb856a10b6d03a1028c4883561.1751031306.git.tonghao@bamaicloud.com +Signed-off-by: Paolo Abeni +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/bonding.rst | 6 +++ + drivers/net/bonding/bond_main.c | 66 +++++++++++++++++++++++++++++++---- + drivers/net/bonding/bond_options.c | 42 ++++++++++++++++++++++ + include/net/bond_options.h | 1 + include/net/bonding.h | 3 + + 5 files changed, 112 insertions(+), 6 deletions(-) + +--- a/Documentation/networking/bonding.rst ++++ b/Documentation/networking/bonding.rst +@@ -562,6 +562,12 @@ lacp_rate + + The default is slow. + ++broadcast_neighbor ++ ++ Option specifying whether to broadcast ARP/ND packets to all ++ active slaves. This option has no effect in modes other than ++ 802.3ad mode. The default is off (0). ++ + max_bonds + + Specifies the number of bonding devices to create for this +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -211,6 +211,8 @@ atomic_t netpoll_block_tx = ATOMIC_INIT( + + unsigned int bond_net_id __read_mostly; + ++DEFINE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled); ++ + static const struct flow_dissector_key flow_keys_bonding_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, +@@ -4445,6 +4447,9 @@ static int bond_open(struct net_device * + + bond_for_each_slave(bond, slave, iter) + dev_mc_add(slave->dev, lacpdu_mcast_addr); ++ ++ if (bond->params.broadcast_neighbor) ++ static_branch_inc(&bond_bcast_neigh_enabled); + } + + if (bond_mode_can_use_xmit_hash(bond)) +@@ -4468,6 +4473,10 @@ static int bond_close(struct net_device + if (bond_is_lb(bond)) + bond_alb_deinitialize(bond); + ++ if (BOND_MODE(bond) == BOND_MODE_8023AD && ++ bond->params.broadcast_neighbor) ++ static_branch_dec(&bond_bcast_neigh_enabled); ++ + if (bond_uses_primary(bond)) { + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); +@@ -5304,6 +5313,37 @@ static struct slave *bond_xdp_xmit_3ad_x + return slaves->arr[hash % count]; + } + ++static bool bond_should_broadcast_neighbor(struct sk_buff *skb, ++ struct net_device *dev) ++{ ++ struct bonding *bond = netdev_priv(dev); ++ struct { ++ struct ipv6hdr ip6; ++ struct icmp6hdr icmp6; ++ } *combined, _combined; ++ ++ if (!static_branch_unlikely(&bond_bcast_neigh_enabled)) ++ return false; ++ ++ if (!bond->params.broadcast_neighbor) ++ return false; ++ ++ if (skb->protocol == htons(ETH_P_ARP)) ++ return true; ++ ++ if (skb->protocol == htons(ETH_P_IPV6)) { ++ combined = skb_header_pointer(skb, skb_mac_header_len(skb), ++ sizeof(_combined), ++ &_combined); ++ if (combined && combined->ip6.nexthdr == NEXTHDR_ICMP && ++ (combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || ++ combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) ++ return true; ++ } ++ ++ return false; ++} ++ + /* Use this Xmit function for 3AD as well as XOR modes. The current + * usable slave array is formed in the control path. The xmit function + * just calculates hash and sends the packet out. +@@ -5323,17 +5363,27 @@ static netdev_tx_t bond_3ad_xor_xmit(str + return bond_tx_drop(dev, skb); + } + +-/* in broadcast mode, we send everything to all usable interfaces. */ ++/* in broadcast mode, we send everything to all or usable slave interfaces. ++ * under rcu_read_lock when this function is called. ++ */ + static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, +- struct net_device *bond_dev) ++ struct net_device *bond_dev, ++ bool all_slaves) + { + struct bonding *bond = netdev_priv(bond_dev); +- struct slave *slave = NULL; +- struct list_head *iter; ++ struct bond_up_slave *slaves; + bool xmit_suc = false; + bool skb_used = false; ++ int slaves_count, i; + +- bond_for_each_slave_rcu(bond, slave, iter) { ++ if (all_slaves) ++ slaves = rcu_dereference(bond->all_slaves); ++ else ++ slaves = rcu_dereference(bond->usable_slaves); ++ ++ slaves_count = slaves ? READ_ONCE(slaves->count) : 0; ++ for (i = 0; i < slaves_count; i++) { ++ struct slave *slave = slaves->arr[i]; + struct sk_buff *skb2; + + if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) +@@ -5571,10 +5621,13 @@ static netdev_tx_t __bond_start_xmit(str + case BOND_MODE_ACTIVEBACKUP: + return bond_xmit_activebackup(skb, dev); + case BOND_MODE_8023AD: ++ if (bond_should_broadcast_neighbor(skb, dev)) ++ return bond_xmit_broadcast(skb, dev, false); ++ fallthrough; + case BOND_MODE_XOR: + return bond_3ad_xor_xmit(skb, dev); + case BOND_MODE_BROADCAST: +- return bond_xmit_broadcast(skb, dev); ++ return bond_xmit_broadcast(skb, dev, true); + case BOND_MODE_ALB: + return bond_alb_xmit(skb, dev); + case BOND_MODE_TLB: +@@ -6450,6 +6503,7 @@ static int __init bond_check_params(stru + eth_zero_addr(params->ad_actor_system); + params->ad_user_port_key = ad_user_port_key; + params->coupled_control = 1; ++ params->broadcast_neighbor = 0; + if (packets_per_slave > 0) { + params->reciprocal_packets_per_slave = + reciprocal_value(packets_per_slave); +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -87,6 +87,8 @@ static int bond_option_missed_max_set(st + const struct bond_opt_value *newval); + static int bond_option_coupled_control_set(struct bonding *bond, + const struct bond_opt_value *newval); ++static int bond_option_broadcast_neigh_set(struct bonding *bond, ++ const struct bond_opt_value *newval); + + static const struct bond_opt_value bond_mode_tbl[] = { + { "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT}, +@@ -240,6 +242,12 @@ static const struct bond_opt_value bond_ + { NULL, -1, 0}, + }; + ++static const struct bond_opt_value bond_broadcast_neigh_tbl[] = { ++ { "off", 0, BOND_VALFLAG_DEFAULT}, ++ { "on", 1, 0}, ++ { NULL, -1, 0} ++}; ++ + static const struct bond_option bond_opts[BOND_OPT_LAST] = { + [BOND_OPT_MODE] = { + .id = BOND_OPT_MODE, +@@ -513,6 +521,14 @@ static const struct bond_option bond_opt + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_coupled_control_tbl, + .set = bond_option_coupled_control_set, ++ }, ++ [BOND_OPT_BROADCAST_NEIGH] = { ++ .id = BOND_OPT_BROADCAST_NEIGH, ++ .name = "broadcast_neighbor", ++ .desc = "Broadcast neighbor packets to all active slaves", ++ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), ++ .values = bond_broadcast_neigh_tbl, ++ .set = bond_option_broadcast_neigh_set, + } + }; + +@@ -894,6 +910,13 @@ static int bond_option_mode_set(struct b + bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; + bond->params.mode = newval->value; + ++ /* When changing mode, the bond device is down, we may reduce ++ * the bond_bcast_neigh_enabled in bond_close() if broadcast_neighbor ++ * enabled in 8023ad mode. Therefore, only clear broadcast_neighbor ++ * to 0. ++ */ ++ bond->params.broadcast_neighbor = 0; ++ + if (bond->dev->reg_state == NETREG_REGISTERED) { + bool update = false; + +@@ -1843,3 +1866,22 @@ static int bond_option_coupled_control_s + bond->params.coupled_control = newval->value; + return 0; + } ++ ++static int bond_option_broadcast_neigh_set(struct bonding *bond, ++ const struct bond_opt_value *newval) ++{ ++ if (bond->params.broadcast_neighbor == newval->value) ++ return 0; ++ ++ bond->params.broadcast_neighbor = newval->value; ++ if (bond->dev->flags & IFF_UP) { ++ if (bond->params.broadcast_neighbor) ++ static_branch_inc(&bond_bcast_neigh_enabled); ++ else ++ static_branch_dec(&bond_bcast_neigh_enabled); ++ } ++ ++ netdev_dbg(bond->dev, "Setting broadcast_neighbor to %s (%llu)\n", ++ newval->string, newval->value); ++ return 0; ++} +--- a/include/net/bond_options.h ++++ b/include/net/bond_options.h +@@ -77,6 +77,7 @@ enum { + BOND_OPT_NS_TARGETS, + BOND_OPT_PRIO, + BOND_OPT_COUPLED_CONTROL, ++ BOND_OPT_BROADCAST_NEIGH, + BOND_OPT_LAST + }; + +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -115,6 +115,8 @@ static inline int is_netpoll_tx_blocked( + #define is_netpoll_tx_blocked(dev) (0) + #endif + ++DECLARE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled); ++ + struct bond_params { + int mode; + int xmit_policy; +@@ -149,6 +151,7 @@ struct bond_params { + struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; + #endif + int coupled_control; ++ int broadcast_neighbor; + + /* 2 bytes of padding : see ether_addr_equal_64bits() */ + u8 ad_actor_system[ETH_ALEN + 2]; diff --git a/queue-6.12/net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch b/queue-6.12/net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch new file mode 100644 index 0000000000..001aa78fed --- /dev/null +++ b/queue-6.12/net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch @@ -0,0 +1,93 @@ +From 3yHExagcKBskz4qt66Dv33v0t.r31v6tvzw0x29Cu392sp8x32.36v@flex--kpberry.bounces.google.com Tue Jun 16 16:54:49 2026 +From: Kevin Berry +Date: Tue, 16 Jun 2026 15:54:29 +0000 +Subject: net: bonding: fix use-after-free in bond_xmit_broadcast() +To: stable@vger.kernel.org +Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu +Message-ID: <20260616155432.2093908-7-kpberry@google.com> + +From: Xiang Mei + +[ Upstream commit 2884bf72fb8f03409e423397319205de48adca16 ] + +bond_xmit_broadcast() reuses the original skb for the last slave +(determined by bond_is_last_slave()) and clones it for others. +Concurrent slave enslave/release can mutate the slave list during +RCU-protected iteration, changing which slave is "last" mid-loop. +This causes the original skb to be double-consumed (double-freed). + +Replace the racy bond_is_last_slave() check with a simple index +comparison (i + 1 == slaves_count) against the pre-snapshot slave +count taken via READ_ONCE() before the loop. This preserves the +zero-copy optimization for the last slave while making the "last" +determination stable against concurrent list mutations. + +The UAF can trigger the following crash: + +================================================================== +BUG: KASAN: slab-use-after-free in skb_clone +Read of size 8 at addr ffff888100ef8d40 by task exploit/147 + +CPU: 1 UID: 0 PID: 147 Comm: exploit Not tainted 7.0.0-rc3+ #4 PREEMPTLAZY +Call Trace: + + dump_stack_lvl (lib/dump_stack.c:123) + print_report (mm/kasan/report.c:379 mm/kasan/report.c:482) + kasan_report (mm/kasan/report.c:597) + skb_clone (include/linux/skbuff.h:1724 include/linux/skbuff.h:1792 include/linux/skbuff.h:3396 net/core/skbuff.c:2108) + bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5334) + bond_start_xmit (drivers/net/bonding/bond_main.c:5567 drivers/net/bonding/bond_main.c:5593) + dev_hard_start_xmit (include/linux/netdevice.h:5325 include/linux/netdevice.h:5334 net/core/dev.c:3871 net/core/dev.c:3887) + __dev_queue_xmit (include/linux/netdevice.h:3601 net/core/dev.c:4838) + ip6_finish_output2 (include/net/neighbour.h:540 include/net/neighbour.h:554 net/ipv6/ip6_output.c:136) + ip6_finish_output (net/ipv6/ip6_output.c:208 net/ipv6/ip6_output.c:219) + ip6_output (net/ipv6/ip6_output.c:250) + ip6_send_skb (net/ipv6/ip6_output.c:1985) + udp_v6_send_skb (net/ipv6/udp.c:1442) + udpv6_sendmsg (net/ipv6/udp.c:1733) + __sys_sendto (net/socket.c:730 net/socket.c:742 net/socket.c:2206) + __x64_sys_sendto (net/socket.c:2209) + do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + + +Allocated by task 147: + +Freed by task 147: + +The buggy address belongs to the object at ffff888100ef8c80 + which belongs to the cache skbuff_head_cache of size 224 +The buggy address is located 192 bytes inside of + freed 224-byte region [ffff888100ef8c80, ffff888100ef8d60) + +Memory state around the buggy address: + ffff888100ef8c00: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc + ffff888100ef8c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +>ffff888100ef8d00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc + ^ + ffff888100ef8d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb + ffff888100ef8e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +Fixes: 4e5bd03ae346 ("net: bonding: fix bond_xmit_broadcast return value error bug") +Reported-by: Weiming Shi +Signed-off-by: Xiang Mei +Link: https://patch.msgid.link/20260326075553.3960562-1-xmei5@asu.edu +Signed-off-by: Paolo Abeni +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -5391,7 +5391,7 @@ static netdev_tx_t bond_xmit_broadcast(s + if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) + continue; + +- if (bond_is_last_slave(bond, slave)) { ++ if (i + 1 == slaves_count) { + skb2 = skb; + skb_used = true; + } else { diff --git a/queue-6.12/net-phonet-free-phonet_device-after-rcu-grace-period.patch b/queue-6.12/net-phonet-free-phonet_device-after-rcu-grace-period.patch new file mode 100644 index 0000000000..afd550cb37 --- /dev/null +++ b/queue-6.12/net-phonet-free-phonet_device-after-rcu-grace-period.patch @@ -0,0 +1,43 @@ +From stable+bounces-266792-greg=kroah.com@vger.kernel.org Wed Jun 17 15:28:38 2026 +From: Sasha Levin +Date: Wed, 17 Jun 2026 10:28:30 -0400 +Subject: net: phonet: free phonet_device after RCU grace period +To: stable@vger.kernel.org +Cc: "Santosh Kalluri" , "Rémi Denis-Courmont" , "Simon Horman" , "Jakub Kicinski" , "Sasha Levin" +Message-ID: <20260617142830.3939916-3-sashal@kernel.org> + +From: Santosh Kalluri + +[ Upstream commit 71de0177b28da751f407581a4515cf4d762f6296 ] + +phonet_device_destroy() removes a phonet_device from the per-net device +list with list_del_rcu(), but frees it immediately. RCU readers walking +the same list can still hold a pointer to the object after it has been +removed, leading to a slab-use-after-free. + +Use kfree_rcu(), matching the lifetime rule already used by +phonet_address_del() for the same object type. + +Fixes: eeb74a9d45f7 ("Phonet: convert devices list to RCU") +Cc: stable@vger.kernel.org +Signed-off-by: Santosh Kalluri +Acked-by: Rémi Denis-Courmont +Reviewed-by: Simon Horman +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/phonet/pn_dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/phonet/pn_dev.c ++++ b/net/phonet/pn_dev.c +@@ -105,7 +105,7 @@ static void phonet_device_destroy(struct + for_each_set_bit(addr, pnd->addrs, 64) + phonet_address_notify(net, RTM_DELADDR, ifindex, addr); + +- kfree(pnd); ++ kfree_rcu(pnd, rcu); + } + } + diff --git a/queue-6.12/phonet-pass-ifindex-to-fill_addr.patch b/queue-6.12/phonet-pass-ifindex-to-fill_addr.patch new file mode 100644 index 0000000000..589071ae4b --- /dev/null +++ b/queue-6.12/phonet-pass-ifindex-to-fill_addr.patch @@ -0,0 +1,80 @@ +From stable+bounces-266790-greg=kroah.com@vger.kernel.org Wed Jun 17 15:29:18 2026 +From: Sasha Levin +Date: Wed, 17 Jun 2026 10:28:28 -0400 +Subject: phonet: Pass ifindex to fill_addr(). +To: stable@vger.kernel.org +Cc: Kuniyuki Iwashima , Eric Dumazet , Paolo Abeni , Sasha Levin +Message-ID: <20260617142830.3939916-1-sashal@kernel.org> + +From: Kuniyuki Iwashima + +[ Upstream commit 08a9572be36819b5d9011604edfa5db6c5062a7a ] + +We will convert addr_doit() and getaddr_dumpit() to RCU, both +of which call fill_addr(). + +The former will call phonet_address_notify() outside of RCU +due to GFP_KERNEL, so dev will not be available in fill_addr(). + +Let's pass ifindex directly to fill_addr(). + +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Signed-off-by: Paolo Abeni +Stable-dep-of: 71de0177b28d ("net: phonet: free phonet_device after RCU grace period") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/phonet/pn_netlink.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/net/phonet/pn_netlink.c ++++ b/net/phonet/pn_netlink.c +@@ -19,7 +19,7 @@ + + /* Device address handling */ + +-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, ++static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, + u32 portid, u32 seq, int event); + + void phonet_address_notify(int event, struct net_device *dev, u8 addr) +@@ -31,7 +31,8 @@ void phonet_address_notify(int event, st + nla_total_size(1), GFP_KERNEL); + if (skb == NULL) + goto errout; +- err = fill_addr(skb, dev, addr, 0, 0, event); ++ ++ err = fill_addr(skb, dev->ifindex, addr, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); +@@ -92,8 +93,8 @@ static int addr_doit(struct sk_buff *skb + return err; + } + +-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, +- u32 portid, u32 seq, int event) ++static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, ++ u32 portid, u32 seq, int event) + { + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; +@@ -107,7 +108,7 @@ static int fill_addr(struct sk_buff *skb + ifm->ifa_prefixlen = 0; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_LINK; +- ifm->ifa_index = dev->ifindex; ++ ifm->ifa_index = ifindex; + if (nla_put_u8(skb, IFA_LOCAL, addr)) + goto nla_put_failure; + nlmsg_end(skb, nlh); +@@ -140,7 +141,7 @@ static int getaddr_dumpit(struct sk_buff + if (addr_idx++ < addr_start_idx) + continue; + +- if (fill_addr(skb, pnd->netdev, addr << 2, ++ if (fill_addr(skb, pnd->netdev->ifindex, addr << 2, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) + goto out; diff --git a/queue-6.12/phonet-pass-net-and-ifindex-to-phonet_address_notify.patch b/queue-6.12/phonet-pass-net-and-ifindex-to-phonet_address_notify.patch new file mode 100644 index 0000000000..c10cf79e34 --- /dev/null +++ b/queue-6.12/phonet-pass-net-and-ifindex-to-phonet_address_notify.patch @@ -0,0 +1,114 @@ +From stable+bounces-266791-greg=kroah.com@vger.kernel.org Wed Jun 17 15:28:37 2026 +From: Sasha Levin +Date: Wed, 17 Jun 2026 10:28:29 -0400 +Subject: phonet: Pass net and ifindex to phonet_address_notify(). +To: stable@vger.kernel.org +Cc: Kuniyuki Iwashima , Eric Dumazet , Paolo Abeni , Sasha Levin +Message-ID: <20260617142830.3939916-2-sashal@kernel.org> + +From: Kuniyuki Iwashima + +[ Upstream commit 68ed5c38b512b734caf3da1f87db4a99fcfe3002 ] + +Currently, phonet_address_notify() fetches netns and ifindex from dev. + +Once addr_doit() is converted to RCU, phonet_address_notify() will be +called outside of RCU due to GFP_KERNEL, and dev will be unavailable +there. + +Let's pass net and ifindex to phonet_address_notify(). + +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Signed-off-by: Paolo Abeni +Stable-dep-of: 71de0177b28d ("net: phonet: free phonet_device after RCU grace period") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/net/phonet/pn_dev.h | 2 +- + net/phonet/pn_dev.c | 10 +++++++--- + net/phonet/pn_netlink.c | 12 ++++++------ + 3 files changed, 14 insertions(+), 10 deletions(-) + +--- a/include/net/phonet/pn_dev.h ++++ b/include/net/phonet/pn_dev.h +@@ -38,7 +38,7 @@ int phonet_address_add(struct net_device + int phonet_address_del(struct net_device *dev, u8 addr); + u8 phonet_address_get(struct net_device *dev, u8 addr); + int phonet_address_lookup(struct net *net, u8 addr); +-void phonet_address_notify(int event, struct net_device *dev, u8 addr); ++void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr); + + int phonet_route_add(struct net_device *dev, u8 daddr); + int phonet_route_del(struct net_device *dev, u8 daddr); +--- a/net/phonet/pn_dev.c ++++ b/net/phonet/pn_dev.c +@@ -98,10 +98,13 @@ static void phonet_device_destroy(struct + mutex_unlock(&pndevs->lock); + + if (pnd) { ++ struct net *net = dev_net(dev); ++ u32 ifindex = dev->ifindex; + u8 addr; + + for_each_set_bit(addr, pnd->addrs, 64) +- phonet_address_notify(RTM_DELADDR, dev, addr); ++ phonet_address_notify(net, RTM_DELADDR, ifindex, addr); ++ + kfree(pnd); + } + } +@@ -244,8 +247,9 @@ static int phonet_device_autoconf(struct + ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device); + if (ret) + return ret; +- phonet_address_notify(RTM_NEWADDR, dev, +- req.ifr_phonet_autoconf.device); ++ ++ phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex, ++ req.ifr_phonet_autoconf.device); + return 0; + } + +--- a/net/phonet/pn_netlink.c ++++ b/net/phonet/pn_netlink.c +@@ -22,7 +22,7 @@ + static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, + u32 portid, u32 seq, int event); + +-void phonet_address_notify(int event, struct net_device *dev, u8 addr) ++void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr) + { + struct sk_buff *skb; + int err = -ENOBUFS; +@@ -32,17 +32,17 @@ void phonet_address_notify(int event, st + if (skb == NULL) + goto errout; + +- err = fill_addr(skb, dev->ifindex, addr, 0, 0, event); ++ err = fill_addr(skb, ifindex, addr, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } +- rtnl_notify(skb, dev_net(dev), 0, +- RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); ++ ++ rtnl_notify(skb, net, 0, RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + return; + errout: +- rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); ++ rtnl_set_sk_err(net, RTNLGRP_PHONET_IFADDR, err); + } + + static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { +@@ -89,7 +89,7 @@ static int addr_doit(struct sk_buff *skb + else + err = phonet_address_del(dev, pnaddr); + if (!err) +- phonet_address_notify(nlh->nlmsg_type, dev, pnaddr); ++ phonet_address_notify(net, nlh->nlmsg_type, ifm->ifa_index, pnaddr); + return err; + } + diff --git a/queue-6.12/revert-net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch b/queue-6.12/revert-net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch new file mode 100644 index 0000000000..008c0e589c --- /dev/null +++ b/queue-6.12/revert-net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch @@ -0,0 +1,63 @@ +From 3wHExagcKBsErwilyy5nvvnsl.jvtnylnrospu14mv1ukh0pvu.vyn@flex--kpberry.bounces.google.com Tue Jun 16 16:54:41 2026 +From: Kevin Berry +Date: Tue, 16 Jun 2026 15:54:24 +0000 +Subject: Revert "net: bonding: fix use-after-free in bond_xmit_broadcast()" +To: stable@vger.kernel.org +Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu +Message-ID: <20260616155432.2093908-2-kpberry@google.com> + +From: Kevin Berry + +This reverts commit 3453882f36c40d2339267093676585a89808a73d. + +There are two versions of this use-after-free fix commit: this one, +which was written to avoid taking a dependency on ce7a381697cb3 ("net: +bonding: add broadcast_neighbor option for 802.3ad"), and the original, +simpler version 2884bf72fb8f ("net: bonding: fix use-after-free in +bond_xmit_broadcast()"), which implicitly depends on the slave counting +changes in ce7a381697cb3. In both the 6.1 and 6.6 stable branches, +commit ce7a381697cb3 was included as a stable dep of c4f050ce06c56 +("bonding: 3ad: implement proper RCU rules for port->aggregator"), and +the original version of this fix was subsequently applied. + +For consistency, and to be able to apply both bug fixes, we should +revert this commit, apply the series for ce7a381697cb3 ("net: bonding: +add broadcast_neighbor option for 802.3ad"), and then apply +the original version of this fix, 2884bf72fb8f ("net: bonding: fix +use-after-free in bond_xmit_broadcast()"). + +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -5328,22 +5328,18 @@ static netdev_tx_t bond_xmit_broadcast(s + struct net_device *bond_dev) + { + struct bonding *bond = netdev_priv(bond_dev); +- struct bond_up_slave *slaves; ++ struct slave *slave = NULL; ++ struct list_head *iter; + bool xmit_suc = false; + bool skb_used = false; +- int slaves_count, i; + +- slaves = rcu_dereference(bond->all_slaves); +- +- slaves_count = slaves ? READ_ONCE(slaves->count) : 0; +- for (i = 0; i < slaves_count; i++) { +- struct slave *slave = slaves->arr[i]; ++ bond_for_each_slave_rcu(bond, slave, iter) { + struct sk_buff *skb2; + + if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) + continue; + +- if (i + 1 == slaves_count) { ++ if (bond_is_last_slave(bond, slave)) { + skb2 = skb; + skb_used = true; + } else { diff --git a/queue-6.12/rxrpc-fix-the-ack-parser-to-extract-the-sack-table-for-parsing.patch b/queue-6.12/rxrpc-fix-the-ack-parser-to-extract-the-sack-table-for-parsing.patch new file mode 100644 index 0000000000..c18e6d57ea --- /dev/null +++ b/queue-6.12/rxrpc-fix-the-ack-parser-to-extract-the-sack-table-for-parsing.patch @@ -0,0 +1,89 @@ +From stable+bounces-266865-greg=kroah.com@vger.kernel.org Wed Jun 17 18:21:35 2026 +From: Sasha Levin +Date: Wed, 17 Jun 2026 13:21:26 -0400 +Subject: rxrpc: Fix the ACK parser to extract the SACK table for parsing +To: stable@vger.kernel.org +Cc: David Howells , Michael Bommarito , Marc Dionne , Jeffrey Altman , Eric Dumazet , "David S. Miller" , Jakub Kicinski , Paolo Abeni , Simon Horman , linux-afs@lists.infradead.org, netdev@vger.kernel.org, stable@kernel.org, Sasha Levin +Message-ID: <20260617172126.254222-1-sashal@kernel.org> + +From: David Howells + +[ Upstream commit 333b6d5bb9f87827ac2639c737bf9613dbae7253 ] + +Fix modification of the received skbuff in rxrpc_input_soft_acks() and a +potential incorrect access of the buffer in a fragmented UDP packet (the +packet would probably have to be deliberately pre-generated as fragmented) +when AF_RXRPC tries to extract the contents of the SACK table by copying +out the contents of the SACK table into a buffer before attempting to parse + +AF_RXRPC assumes that it can just call skb_condense() and then validly +access the SACK table from skb->data and that it will be a flat buffer - +but skb_condense() can silently fail to do anything under some +circumstances. + +Note that whilst rxrpc_input_soft_acks() should be able to parse extended +ACKs, the rest of AF_RXRPC doesn't currently support that. + +Further, there's then no need to call skb_condense() in rxrpc_input_ack(), +so don't. + +Fixes: d57a3a151660 ("rxrpc: Save last ACK's SACK table rather than marking txbufs") +Reported-by: Michael Bommarito +Link: https://lore.kernel.org/r/20260513180907.2061972-1-michael.bommarito@gmail.com +Signed-off-by: David Howells +cc: Marc Dionne +cc: Jeffrey Altman +cc: Eric Dumazet +cc: "David S. Miller" +cc: Jakub Kicinski +cc: Paolo Abeni +cc: Simon Horman +cc: linux-afs@lists.infradead.org +cc: netdev@vger.kernel.org +cc: stable@kernel.org +Link: https://patch.msgid.link/105362.1780573560@warthog.procyon.org.uk +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/input.c | 21 ++++++++++++++++----- + 1 file changed, 16 insertions(+), 5 deletions(-) + +--- a/net/rxrpc/input.c ++++ b/net/rxrpc/input.c +@@ -775,9 +775,23 @@ static void rxrpc_input_soft_acks(struct + rxrpc_seq_t since) + { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); +- unsigned int i, old_nacks = 0; ++ unsigned int i, old_nacks = 0, nsack; + rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; +- u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); ++ u8 sack[256] __aligned(sizeof(unsigned long)); ++ u8 *acks = sack; ++ ++ /* AF_RXRPC assumes that it can access the SACK table directly from ++ * skb->data as a flat buffer, but the skb may be non-linear (e.g. a ++ * fragmented UDP packet) and skb_condense() can silently fail to ++ * linearise it. Copy the SACK table out into a local buffer before ++ * parsing it. ++ */ ++ memset(sack, 0, sizeof(sack)); ++ nsack = umin(sp->ack.nr_acks, 256); ++ if (skb_copy_bits(skb, ++ sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket), ++ sack, nsack) < 0) ++ return; + + for (i = 0; i < sp->ack.nr_acks; i++) { + if (acks[i] == RXRPC_ACK_TYPE_ACK) { +@@ -934,9 +948,6 @@ static void rxrpc_input_ack(struct rxrpc + skb_copy_bits(skb, ioffset, &trailer, sizeof(trailer)) < 0) + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_trailer); + +- if (nr_acks > 0) +- skb_condense(skb); +- + if (call->cong_last_nack) { + since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); diff --git a/queue-6.12/series b/queue-6.12/series index d402d8dd5b..498c40e653 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -32,3 +32,25 @@ bpf-reject-sleepable-kprobe_multi-programs-at-attach.patch selftests-bpf-add-test-to-ensure-kprobe_multi-is-not.patch acpi-scan-use-async-schedule-function-in-acpi_scan_c.patch regulator-core-fix-locking-in-regulator_resolve_supply-error-path.patch +dlm-prevent-npd-when-writing-a-positive-value-to-event_done.patch +xfs-remove-the-expr-argument-to-xfs_test_error.patch +xfs-fix-error-returns-in-cow-fork-repair.patch +revert-net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch +net-bonding-add-broadcast_neighbor-option-for-802.3ad.patch +bonding-add-support-for-per-port-lacp-actor-priority.patch +bonding-print-churn-state-via-netlink.patch +bonding-3ad-implement-proper-rcu-rules-for-port-aggregator.patch +net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch +bonding-fix-null-pointer-dereference-in-actor_port_prio-setting.patch +staging-rtl8723bs-fix-buffer-over-read-in-rtw_update_protection.patch +fhandle-fix-uaf-due-to-unlocked-mnt_ns-read-in-may_decode_fh.patch +drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch +hv-utils-handle-and-propagate-errors-in-kvp_register.patch +futex-requeue-prevent-null-pointer-dereference-in-remove_waiter-on-self-deadlock.patch +locking-mutex-remove-wakeups-from-under-mutex-wait_lock.patch +locking-rtmutex-skip-remove_waiter-when-waiter-is-not-enqueued.patch +phonet-pass-ifindex-to-fill_addr.patch +phonet-pass-net-and-ifindex-to-phonet_address_notify.patch +net-phonet-free-phonet_device-after-rcu-grace-period.patch +rxrpc-fix-the-ack-parser-to-extract-the-sack-table-for-parsing.patch +fuse-re-lock-request-before-replacing-page-cache-folio.patch diff --git a/queue-6.12/staging-rtl8723bs-fix-buffer-over-read-in-rtw_update_protection.patch b/queue-6.12/staging-rtl8723bs-fix-buffer-over-read-in-rtw_update_protection.patch new file mode 100644 index 0000000000..801e2868a2 --- /dev/null +++ b/queue-6.12/staging-rtl8723bs-fix-buffer-over-read-in-rtw_update_protection.patch @@ -0,0 +1,59 @@ +From sashal@kernel.org Tue Jun 16 16:55:23 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 11:55:20 -0400 +Subject: staging: rtl8723bs: fix buffer over-read in rtw_update_protection +To: stable@vger.kernel.org +Cc: Salman Alghamdi , Luka Gejak , Greg Kroah-Hartman , Sasha Levin +Message-ID: <20260616155520.3322698-1-sashal@kernel.org> + +From: Salman Alghamdi + +[ Upstream commit 514ab98364595007d4557ecc85d7e5f012c504d3 ] + +rtw_update_protection() is called with a pointer offset into the +ies buffer but the full ie_length is passed, causing a potential +buffer over-read. + +Fixes: e945c43df60b ("Staging: rtl8723bs: Delete dead code from update_current_network()") +Fixes: d3fcee1b78a5 ("staging: rtl8723bs: fix camel case in struct wlan_bssid_ex") +Reported-by: Luka Gejak +Closes: https://lore.kernel.org/linux-staging/DI2H39EAAFBZ.3KI5NWN02AQ2S@linux.dev +Cc: stable@vger.kernel.org +Signed-off-by: Salman Alghamdi +Reviewed-by: Luka Gejak +Link: https://patch.msgid.link/20260508222649.23989-1-me@cipherat.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/staging/rtl8723bs/core/rtw_mlme.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/drivers/staging/rtl8723bs/core/rtw_mlme.c ++++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c +@@ -449,8 +449,11 @@ static void update_current_network(struc + + if ((check_fwstate(pmlmepriv, _FW_LINKED) == true) && (is_same_network(&(pmlmepriv->cur_network.network), pnetwork, 0))) { + update_network(&(pmlmepriv->cur_network.network), pnetwork, adapter, true); ++ if (pmlmepriv->cur_network.network.ie_length < sizeof(struct ndis_802_11_fix_ie)) ++ return; ++ + rtw_update_protection(adapter, (pmlmepriv->cur_network.network.ies) + sizeof(struct ndis_802_11_fix_ie), +- pmlmepriv->cur_network.network.ie_length); ++ pmlmepriv->cur_network.network.ie_length - sizeof(struct ndis_802_11_fix_ie)); + } + } + +@@ -1070,8 +1073,11 @@ static void rtw_joinbss_update_network(s + break; + } + ++ if (cur_network->network.ie_length < sizeof(struct ndis_802_11_fix_ie)) ++ return; ++ + rtw_update_protection(padapter, (cur_network->network.ies) + sizeof(struct ndis_802_11_fix_ie), +- (cur_network->network.ie_length)); ++ (cur_network->network.ie_length - sizeof(struct ndis_802_11_fix_ie))); + + rtw_update_ht_cap(padapter, cur_network->network.ies, cur_network->network.ie_length, (u8) cur_network->network.configuration.ds_config); + } diff --git a/queue-6.12/xfs-fix-error-returns-in-cow-fork-repair.patch b/queue-6.12/xfs-fix-error-returns-in-cow-fork-repair.patch new file mode 100644 index 0000000000..f72807d61c --- /dev/null +++ b/queue-6.12/xfs-fix-error-returns-in-cow-fork-repair.patch @@ -0,0 +1,58 @@ +From stable+bounces-263757-greg=kroah.com@vger.kernel.org Tue Jun 16 15:05:54 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 10:02:14 -0400 +Subject: xfs: fix error returns in CoW fork repair +To: stable@vger.kernel.org +Cc: Yingjie Gao , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20260616140214.3285019-2-sashal@kernel.org> + +From: Yingjie Gao + +[ Upstream commit fcf4faba9f986b3bb528da11913c9ec5d6e8f689 ] + +xrep_cow_find_bad() returns success after the cleanup labels even if +AG setup, btree queries, or bitmap updates failed. This can make +repair continue with an incomplete bad-file-offset bitmap instead of +stopping at the original error. + +The force-rebuild path has a related cleanup problem. If +xrep_cow_mark_file_range() fails, the function returns directly and +skips the scrub AG context and perag cleanup. + +Let the force-rebuild path fall through to the existing cleanup code +and return the saved error after cleanup. + +Fixes: dbbdbd008632 ("xfs: repair problems in CoW forks") +Cc: # v6.8 +Signed-off-by: Yingjie Gao +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Carlos Maiolino +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/scrub/cow_repair.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/fs/xfs/scrub/cow_repair.c ++++ b/fs/xfs/scrub/cow_repair.c +@@ -297,18 +297,15 @@ xrep_cow_find_bad( + * on the debugging knob, replace everything in the CoW fork. + */ + if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || +- XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { ++ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) + error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock, + xc->irec.br_blockcount); +- if (error) +- return error; +- } + + out_sa: + xchk_ag_free(sc, &sc->sa); + out_pag: + xfs_perag_put(pag); +- return 0; ++ return error; + } + + /* diff --git a/queue-6.12/xfs-remove-the-expr-argument-to-xfs_test_error.patch b/queue-6.12/xfs-remove-the-expr-argument-to-xfs_test_error.patch new file mode 100644 index 0000000000..94538d7d1e --- /dev/null +++ b/queue-6.12/xfs-remove-the-expr-argument-to-xfs_test_error.patch @@ -0,0 +1,515 @@ +From stable+bounces-263756-greg=kroah.com@vger.kernel.org Tue Jun 16 15:05:50 2026 +From: Sasha Levin +Date: Tue, 16 Jun 2026 10:02:13 -0400 +Subject: xfs: remove the expr argument to XFS_TEST_ERROR +To: stable@vger.kernel.org +Cc: Christoph Hellwig , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20260616140214.3285019-1-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit 807df3227d7674d7957c576551d552acf15bb96f ] + +Don't pass expr to XFS_TEST_ERROR. Most calls pass a constant false, +and the places that do pass an expression become cleaner by moving it +out. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Stable-dep-of: fcf4faba9f98 ("xfs: fix error returns in CoW fork repair") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ag_resv.c | 8 ++++---- + fs/xfs/libxfs/xfs_alloc.c | 5 ++--- + fs/xfs/libxfs/xfs_attr_leaf.c | 2 +- + fs/xfs/libxfs/xfs_bmap.c | 17 ++++++++--------- + fs/xfs/libxfs/xfs_btree.c | 2 +- + fs/xfs/libxfs/xfs_da_btree.c | 2 +- + fs/xfs/libxfs/xfs_dir2.c | 2 +- + fs/xfs/libxfs/xfs_exchmaps.c | 4 ++-- + fs/xfs/libxfs/xfs_ialloc.c | 2 +- + fs/xfs/libxfs/xfs_inode_buf.c | 4 ++-- + fs/xfs/libxfs/xfs_inode_fork.c | 3 +-- + fs/xfs/libxfs/xfs_refcount.c | 5 ++--- + fs/xfs/libxfs/xfs_rmap.c | 2 +- + fs/xfs/scrub/cow_repair.c | 2 +- + fs/xfs/scrub/repair.c | 2 +- + fs/xfs/xfs_attr_item.c | 2 +- + fs/xfs/xfs_buf.c | 4 ++-- + fs/xfs/xfs_error.c | 5 ++--- + fs/xfs/xfs_error.h | 10 +++++----- + fs/xfs/xfs_inode.c | 28 +++++++++++++--------------- + fs/xfs/xfs_iomap.c | 2 +- + fs/xfs/xfs_log.c | 8 ++++---- + fs/xfs/xfs_trans_ail.c | 2 +- + 23 files changed, 58 insertions(+), 65 deletions(-) + +--- a/fs/xfs/libxfs/xfs_ag_resv.c ++++ b/fs/xfs/libxfs/xfs_ag_resv.c +@@ -91,9 +91,9 @@ xfs_ag_resv_critical( + trace_xfs_ag_resv_critical(pag, type, avail); + + /* Critically low if less than 10% or max btree height remains. */ +- return XFS_TEST_ERROR(avail < orig / 10 || +- avail < pag->pag_mount->m_agbtree_maxlevels, +- pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); ++ return avail < orig / 10 || ++ avail < pag->pag_mount->m_agbtree_maxlevels || ++ XFS_TEST_ERROR(pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); + } + + /* +@@ -201,7 +201,7 @@ __xfs_ag_resv_init( + return -EINVAL; + } + +- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL)) ++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_AG_RESV_FAIL)) + error = -ENOSPC; + else + error = xfs_dec_fdblocks(mp, hidden_space, true); +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -3312,7 +3312,7 @@ xfs_agf_read_verify( + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agf_verify(bp); +- if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) ++ if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_ALLOC_READ_AGF)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } + } +@@ -3986,8 +3986,7 @@ __xfs_free_extent( + ASSERT(len != 0); + ASSERT(type != XFS_AG_RESV_AGFL); + +- if (XFS_TEST_ERROR(false, mp, +- XFS_ERRTAG_FREE_EXTENT)) ++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_FREE_EXTENT)) + return -EIO; + + error = xfs_free_extent_fix_freelist(tp, pag, &agbp); +--- a/fs/xfs/libxfs/xfs_attr_leaf.c ++++ b/fs/xfs/libxfs/xfs_attr_leaf.c +@@ -1225,7 +1225,7 @@ xfs_attr3_leaf_to_node( + + trace_xfs_attr_leaf_to_node(args); + +- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) { ++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) { + error = -EIO; + goto out; + } +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -3766,8 +3766,7 @@ xfs_bmap_btalloc( + /* Trim the allocation back to the maximum an AG can fit. */ + args.maxlen = min(ap->length, mp->m_ag_max_usable); + +- if (unlikely(XFS_TEST_ERROR(false, mp, +- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) ++ if (unlikely(XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) + error = xfs_bmap_exact_minlen_extent_alloc(ap, &args); + else if ((ap->datatype & XFS_ALLOC_USERDATA) && + xfs_inode_is_filestream(ap->ip)) +@@ -3953,7 +3952,7 @@ xfs_bmapi_read( + } + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); + return -EFSCORRUPTED; + } +@@ -4442,7 +4441,7 @@ xfs_bmapi_write( + (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); + return -EFSCORRUPTED; + } +@@ -4785,7 +4784,7 @@ xfs_bmapi_remap( + (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); + return -EFSCORRUPTED; + } +@@ -5873,7 +5872,7 @@ xfs_bmap_collapse_extents( + int logflags = 0; + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); + return -EFSCORRUPTED; + } +@@ -5988,7 +5987,7 @@ xfs_bmap_insert_extents( + int logflags = 0; + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); + return -EFSCORRUPTED; + } +@@ -6092,7 +6091,7 @@ xfs_bmap_split_extent( + int i = 0; + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); + return -EFSCORRUPTED; + } +@@ -6257,7 +6256,7 @@ xfs_bmap_finish_one( + + trace_xfs_bmap_deferred(bi); + +- if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE)) ++ if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE)) + return -EIO; + + switch (bi->bi_type) { +--- a/fs/xfs/libxfs/xfs_btree.c ++++ b/fs/xfs/libxfs/xfs_btree.c +@@ -300,7 +300,7 @@ xfs_btree_check_block( + + fa = __xfs_btree_check_block(cur, block, level, bp); + if (XFS_IS_CORRUPT(mp, fa != NULL) || +- XFS_TEST_ERROR(false, mp, xfs_btree_block_errtag(cur))) { ++ XFS_TEST_ERROR(mp, xfs_btree_block_errtag(cur))) { + if (bp) + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_btree_mark_sick(cur); +--- a/fs/xfs/libxfs/xfs_da_btree.c ++++ b/fs/xfs/libxfs/xfs_da_btree.c +@@ -565,7 +565,7 @@ xfs_da3_split( + + trace_xfs_da_split(state->args); + +- if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT)) ++ if (XFS_TEST_ERROR(state->mp, XFS_ERRTAG_DA_LEAF_SPLIT)) + return -EIO; + + /* +--- a/fs/xfs/libxfs/xfs_dir2.c ++++ b/fs/xfs/libxfs/xfs_dir2.c +@@ -223,7 +223,7 @@ xfs_dir_ino_validate( + bool ino_ok = xfs_verify_dir_ino(mp, ino); + + if (XFS_IS_CORRUPT(mp, !ino_ok) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DIR_INO_VALIDATE)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_DIR_INO_VALIDATE)) { + xfs_warn(mp, "Invalid inode number 0x%Lx", + (unsigned long long) ino); + return -EFSCORRUPTED; +--- a/fs/xfs/libxfs/xfs_exchmaps.c ++++ b/fs/xfs/libxfs/xfs_exchmaps.c +@@ -616,7 +616,7 @@ xfs_exchmaps_finish_one( + return error; + } + +- if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE)) ++ if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE)) + return -EIO; + + /* If we still have work to do, ask for a new transaction. */ +@@ -880,7 +880,7 @@ xmi_ensure_delta_nextents( + &new_nextents)) + return -EFBIG; + +- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && ++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && + new_nextents > 10) + return -EFBIG; + +--- a/fs/xfs/libxfs/xfs_ialloc.c ++++ b/fs/xfs/libxfs/xfs_ialloc.c +@@ -2690,7 +2690,7 @@ xfs_agi_read_verify( + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agi_verify(bp); +- if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI)) ++ if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_IALLOC_READ_AGI)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } + } +--- a/fs/xfs/libxfs/xfs_inode_buf.c ++++ b/fs/xfs/libxfs/xfs_inode_buf.c +@@ -60,8 +60,8 @@ xfs_inode_buf_verify( + di_ok = xfs_verify_magic16(bp, dip->di_magic) && + xfs_dinode_good_version(mp, dip->di_version) && + xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); +- if (unlikely(XFS_TEST_ERROR(!di_ok, mp, +- XFS_ERRTAG_ITOBP_INOTOBP))) { ++ if (unlikely(!di_ok || ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_ITOBP_INOTOBP))) { + if (readahead) { + bp->b_flags &= ~XBF_DONE; + xfs_buf_ioerror(bp, -EIO); +--- a/fs/xfs/libxfs/xfs_inode_fork.c ++++ b/fs/xfs/libxfs/xfs_inode_fork.c +@@ -795,8 +795,7 @@ xfs_iext_count_extend( + if (nr_exts < ifp->if_nextents) + return -EFBIG; + +- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && +- nr_exts > 10) ++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && nr_exts > 10) + return -EFBIG; + + if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) { +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -1073,8 +1073,7 @@ xfs_refcount_still_have_space( + * refcount continue update "error" has been injected. + */ + if (cur->bc_refc.nr_ops > 2 && +- XFS_TEST_ERROR(false, cur->bc_mp, +- XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) ++ XFS_TEST_ERROR(cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) + return false; + + if (cur->bc_refc.nr_ops == 0) +@@ -1353,7 +1352,7 @@ xfs_refcount_finish_one( + + trace_xfs_refcount_deferred(mp, ri); + +- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) ++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) + return -EIO; + + /* +--- a/fs/xfs/libxfs/xfs_rmap.c ++++ b/fs/xfs/libxfs/xfs_rmap.c +@@ -2579,7 +2579,7 @@ xfs_rmap_finish_one( + + trace_xfs_rmap_deferred(mp, ri); + +- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) ++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_RMAP_FINISH_ONE)) + return -EIO; + + /* +--- a/fs/xfs/scrub/cow_repair.c ++++ b/fs/xfs/scrub/cow_repair.c +@@ -297,7 +297,7 @@ xrep_cow_find_bad( + * on the debugging knob, replace everything in the CoW fork. + */ + if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || +- XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { ++ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { + error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock, + xc->irec.br_blockcount); + if (error) +--- a/fs/xfs/scrub/repair.c ++++ b/fs/xfs/scrub/repair.c +@@ -990,7 +990,7 @@ xrep_will_attempt( + return true; + + /* Let debug users force us into the repair routines. */ +- if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) ++ if (XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) + return true; + + /* Metadata is corrupt or failed cross-referencing. */ +--- a/fs/xfs/xfs_attr_item.c ++++ b/fs/xfs/xfs_attr_item.c +@@ -490,7 +490,7 @@ xfs_attr_finish_item( + /* Reset trans after EAGAIN cycle since the transaction is new */ + args->trans = tp; + +- if (XFS_TEST_ERROR(false, args->dp->i_mount, XFS_ERRTAG_LARP)) { ++ if (XFS_TEST_ERROR(args->dp->i_mount, XFS_ERRTAG_LARP)) { + error = -EIO; + goto out; + } +--- a/fs/xfs/xfs_buf.c ++++ b/fs/xfs/xfs_buf.c +@@ -1498,7 +1498,7 @@ xfs_buf_bio_end_io( + + if (!bio->bi_status && + (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) && +- XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) ++ XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) + bio->bi_status = BLK_STS_IOERR; + + /* +@@ -2451,7 +2451,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, + * This allows userspace to disrupt buffer caching for debug/testing + * purposes. + */ +- if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) ++ if (XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) + lru_ref = 0; + + atomic_set(&bp->b_lru_ref, lru_ref); +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -292,7 +292,6 @@ xfs_errortag_enabled( + bool + xfs_errortag_test( + struct xfs_mount *mp, +- const char *expression, + const char *file, + int line, + unsigned int error_tag) +@@ -318,8 +317,8 @@ xfs_errortag_test( + return false; + + xfs_warn_ratelimited(mp, +-"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", +- expression, file, line, mp->m_super->s_id); ++"Injecting error at file %s, line %d, on filesystem \"%s\"", ++ file, line, mp->m_super->s_id); + return true; + } + +--- a/fs/xfs/xfs_error.h ++++ b/fs/xfs/xfs_error.h +@@ -41,10 +41,10 @@ extern void xfs_inode_verifier_error(str + #ifdef DEBUG + extern int xfs_errortag_init(struct xfs_mount *mp); + extern void xfs_errortag_del(struct xfs_mount *mp); +-extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression, +- const char *file, int line, unsigned int error_tag); +-#define XFS_TEST_ERROR(expr, mp, tag) \ +- ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag))) ++bool xfs_errortag_test(struct xfs_mount *mp, const char *file, int line, ++ unsigned int error_tag); ++#define XFS_TEST_ERROR(mp, tag) \ ++ xfs_errortag_test((mp), __FILE__, __LINE__, (tag)) + bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag); + #define XFS_ERRORTAG_DELAY(mp, tag) \ + do { \ +@@ -66,7 +66,7 @@ extern int xfs_errortag_clearall(struct + #else + #define xfs_errortag_init(mp) (0) + #define xfs_errortag_del(mp) +-#define XFS_TEST_ERROR(expr, mp, tag) (expr) ++#define XFS_TEST_ERROR(mp, tag) (false) + #define XFS_ERRORTAG_DELAY(mp, tag) ((void)0) + #define xfs_errortag_set(mp, tag, val) (ENOSYS) + #define xfs_errortag_add(mp, tag) (ENOSYS) +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2367,37 +2367,35 @@ xfs_iflush( + * error handling as the caller will shutdown and fail the buffer. + */ + error = -EFSCORRUPTED; +- if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), +- mp, XFS_ERRTAG_IFLUSH_1)) { ++ if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC) || ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_1)) { + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT, + __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); + goto flush_out; + } + if (S_ISREG(VFS_I(ip)->i_mode)) { +- if (XFS_TEST_ERROR( +- ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && +- ip->i_df.if_format != XFS_DINODE_FMT_BTREE, +- mp, XFS_ERRTAG_IFLUSH_3)) { ++ if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && ++ ip->i_df.if_format != XFS_DINODE_FMT_BTREE) || ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_3)) { + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad regular inode %llu, ptr "PTR_FMT, + __func__, ip->i_ino, ip); + goto flush_out; + } + } else if (S_ISDIR(VFS_I(ip)->i_mode)) { +- if (XFS_TEST_ERROR( +- ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && +- ip->i_df.if_format != XFS_DINODE_FMT_BTREE && +- ip->i_df.if_format != XFS_DINODE_FMT_LOCAL, +- mp, XFS_ERRTAG_IFLUSH_4)) { ++ if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && ++ ip->i_df.if_format != XFS_DINODE_FMT_BTREE && ++ ip->i_df.if_format != XFS_DINODE_FMT_LOCAL) || ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_4)) { + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad directory inode %llu, ptr "PTR_FMT, + __func__, ip->i_ino, ip); + goto flush_out; + } + } +- if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) > +- ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { ++ if (ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) > ++ ip->i_nblocks || XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_5)) { + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: detected corrupt incore inode %llu, " + "total extents = %llu nblocks = %lld, ptr "PTR_FMT, +@@ -2406,8 +2404,8 @@ xfs_iflush( + ip->i_nblocks, ip); + goto flush_out; + } +- if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize, +- mp, XFS_ERRTAG_IFLUSH_6)) { ++ if (ip->i_forkoff > mp->m_sb.sb_inodesize || ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_6)) { + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT, + __func__, ip->i_ino, ip->i_forkoff, ip); +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -993,7 +993,7 @@ xfs_buffered_write_iomap_begin( + return error; + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) || +- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, XFS_DATA_FORK); + error = -EFSCORRUPTED; + goto out_unlock; +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -968,8 +968,8 @@ xfs_log_unmount_write( + * counters will be recalculated. Refer to xlog_check_unmount_rec for + * more details. + */ +- if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp, +- XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { ++ if (xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS) || ++ XFS_TEST_ERROR(mp, XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { + xfs_alert(mp, "%s: will fix summary counters at next mount", + __func__); + return; +@@ -1239,7 +1239,7 @@ xlog_ioend_work( + /* + * Race to shutdown the filesystem if we see an error. + */ +- if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { ++ if (error || XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { + xfs_alert(log->l_mp, "log I/O error %d", error); + xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); + } +@@ -1848,7 +1848,7 @@ xlog_sync( + * detects the bad CRC and attempts to recover. + */ + #ifdef DEBUG +- if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { ++ if (XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { + iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); + iclog->ic_fail_crc = true; + xfs_warn(log->l_mp, +--- a/fs/xfs/xfs_trans_ail.c ++++ b/fs/xfs/xfs_trans_ail.c +@@ -385,7 +385,7 @@ xfsaild_push_item( + * If log item pinning is enabled, skip the push and track the item as + * pinned. This can help induce head-behind-tail conditions. + */ +- if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN)) ++ if (XFS_TEST_ERROR(ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN)) + return XFS_ITEM_PINNED; + + /*