--- /dev/null
+From 3xnExagcKBscx2or44Bt11tyr.p1zt4rtxuyv07As170qn6v10.14t@flex--kpberry.bounces.google.com Tue Jun 16 16:54:48 2026
+From: Kevin Berry <kpberry@google.com>
+Date: Tue, 16 Jun 2026 15:54:28 +0000
+Subject: bonding: 3ad: implement proper RCU rules for port->aggregator
+To: stable@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Eric Dumazet <edumazet@google.com>, syzbot+9bb2ff2a4ab9e17307e1@syzkaller.appspotmail.com, Jay Vosburgh <jv@jvosburgh.net>, Andrew Lunn <andrew+netdev@lunn.ch>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20260616155432.2093908-6-kpberry@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c4f050ce06c56cfb5993268af4a5cb66ed1cd04e ]
+
+syzbot found a data-race in bond_3ad_get_active_agg_info /
+bond_3ad_state_machine_handler [1] which hints at lack of proper
+RCU implementation.
+
+Add __rcu qualifier to port->aggregator, and add proper RCU API.
+
+[1]
+
+BUG: KCSAN: data-race in bond_3ad_get_active_agg_info / bond_3ad_state_machine_handler
+
+write to 0xffff88813cf5c4b0 of 8 bytes by task 36 on cpu 0:
+ ad_port_selection_logic drivers/net/bonding/bond_3ad.c:1659 [inline]
+ bond_3ad_state_machine_handler+0x9d5/0x2d60 drivers/net/bonding/bond_3ad.c:2569
+ process_one_work kernel/workqueue.c:3302 [inline]
+ process_scheduled_works+0x4f0/0x9c0 kernel/workqueue.c:3385
+ worker_thread+0x58a/0x780 kernel/workqueue.c:3466
+ kthread+0x22a/0x280 kernel/kthread.c:436
+ ret_from_fork+0x146/0x330 arch/x86/kernel/process.c:158
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
+
+read to 0xffff88813cf5c4b0 of 8 bytes by task 22063 on cpu 1:
+ __bond_3ad_get_active_agg_info drivers/net/bonding/bond_3ad.c:2858 [inline]
+ bond_3ad_get_active_agg_info+0x8c/0x230 drivers/net/bonding/bond_3ad.c:2881
+ bond_fill_info+0xe0f/0x10f0 drivers/net/bonding/bond_netlink.c:853
+ rtnl_link_info_fill net/core/rtnetlink.c:906 [inline]
+ rtnl_link_fill+0x1d7/0x4e0 net/core/rtnetlink.c:927
+ rtnl_fill_ifinfo+0xf8e/0x1380 net/core/rtnetlink.c:2168
+ rtmsg_ifinfo_build_skb+0x11c/0x1b0 net/core/rtnetlink.c:4453
+ rtmsg_ifinfo_event net/core/rtnetlink.c:4486 [inline]
+ rtmsg_ifinfo+0x6d/0x110 net/core/rtnetlink.c:4495
+ __dev_notify_flags+0x76/0x390 net/core/dev.c:9790
+ netif_change_flags+0xac/0xd0 net/core/dev.c:9823
+ do_setlink+0x905/0x2950 net/core/rtnetlink.c:3180
+ rtnl_group_changelink net/core/rtnetlink.c:3813 [inline]
+ __rtnl_newlink net/core/rtnetlink.c:3981 [inline]
+ rtnl_newlink+0xf55/0x1400 net/core/rtnetlink.c:4109
+ rtnetlink_rcv_msg+0x64b/0x720 net/core/rtnetlink.c:6995
+ netlink_rcv_skb+0x123/0x220 net/netlink/af_netlink.c:2550
+ rtnetlink_rcv+0x1c/0x30 net/core/rtnetlink.c:7022
+ netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline]
+ netlink_unicast+0x5a8/0x680 net/netlink/af_netlink.c:1344
+ netlink_sendmsg+0x5c8/0x6f0 net/netlink/af_netlink.c:1894
+ sock_sendmsg_nosec net/socket.c:787 [inline]
+ __sock_sendmsg net/socket.c:802 [inline]
+ ____sys_sendmsg+0x563/0x5b0 net/socket.c:2698
+ ___sys_sendmsg+0x195/0x1e0 net/socket.c:2752
+ __sys_sendmsg net/socket.c:2784 [inline]
+ __do_sys_sendmsg net/socket.c:2789 [inline]
+ __se_sys_sendmsg net/socket.c:2787 [inline]
+ __x64_sys_sendmsg+0xd4/0x160 net/socket.c:2787
+ x64_sys_call+0x194c/0x3020 arch/x86/include/generated/asm/syscalls_64.h:47
+ do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+ do_syscall_64+0x12c/0x3b0 arch/x86/entry/syscall_64.c:94
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+value changed: 0x0000000000000000 -> 0xffff88813cf5c400
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 UID: 0 PID: 22063 Comm: syz.0.31122 Tainted: G W syzkaller #0 PREEMPT(full)
+Tainted: [W]=WARN
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026
+
+Fixes: 47e91f56008b ("bonding: use RCU protection for 3ad xmit path")
+Reported-by: syzbot+9bb2ff2a4ab9e17307e1@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/69f0a82f.050a0220.3aadc4.0000.GAE@google.com/
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Jay Vosburgh <jv@jvosburgh.net>
+Cc: Andrew Lunn <andrew+netdev@lunn.ch>
+Link: https://patch.msgid.link/20260428123207.3809211-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_3ad.c | 109 ++++++++++++++++++---------------
+ drivers/net/bonding/bond_main.c | 8 +-
+ drivers/net/bonding/bond_netlink.c | 16 +++-
+ drivers/net/bonding/bond_procfs.c | 3
+ drivers/net/bonding/bond_sysfs_slave.c | 17 +++--
+ include/net/bond_3ad.h | 2
+ 6 files changed, 89 insertions(+), 66 deletions(-)
+
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -991,6 +991,7 @@ static int ad_marker_send(struct port *p
+ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
+ {
+ struct bonding *bond = __get_bond_by_port(port);
++ struct aggregator *aggregator;
+ mux_states_t last_state;
+
+ /* keep current State Machine state to compare later if it was
+@@ -998,6 +999,7 @@ static void ad_mux_machine(struct port *
+ */
+ last_state = port->sm_mux_state;
+
++ aggregator = rcu_dereference(port->aggregator);
+ if (port->sm_vars & AD_PORT_BEGIN) {
+ port->sm_mux_state = AD_MUX_DETACHED;
+ } else {
+@@ -1017,7 +1019,7 @@ static void ad_mux_machine(struct port *
+ * cycle to update ready variable, we check
+ * READY_N and update READY here
+ */
+- __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator));
+ port->sm_mux_state = AD_MUX_DETACHED;
+ break;
+ }
+@@ -1032,7 +1034,7 @@ static void ad_mux_machine(struct port *
+ * update ready variable, we check READY_N and update
+ * READY here
+ */
+- __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator));
+
+ /* if the wait_while_timer expired, and the port is
+ * in READY state, move to ATTACHED state
+@@ -1048,7 +1050,7 @@ static void ad_mux_machine(struct port *
+ if ((port->sm_vars & AD_PORT_SELECTED) &&
+ (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) &&
+ !__check_agg_selection_timer(port)) {
+- if (port->aggregator->is_active) {
++ if (aggregator->is_active) {
+ int state = AD_MUX_COLLECTING_DISTRIBUTING;
+
+ if (!bond->params.coupled_control)
+@@ -1064,9 +1066,9 @@ static void ad_mux_machine(struct port *
+ * cycle to update ready variable, we check
+ * READY_N and update READY here
+ */
+- __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator));
+ port->sm_mux_state = AD_MUX_DETACHED;
+- } else if (port->aggregator->is_active) {
++ } else if (aggregator->is_active) {
+ port->actor_oper_port_state |=
+ LACP_STATE_SYNCHRONIZATION;
+ }
+@@ -1077,7 +1079,7 @@ static void ad_mux_machine(struct port *
+ * sure that a collecting distributing
+ * port in an active aggregator is enabled
+ */
+- if (port->aggregator->is_active &&
++ if (aggregator->is_active &&
+ !__port_is_collecting_distributing(port)) {
+ __enable_port(port);
+ *update_slave_arr = true;
+@@ -1096,7 +1098,7 @@ static void ad_mux_machine(struct port *
+ */
+ struct slave *slave = port->slave;
+
+- if (port->aggregator->is_active &&
++ if (aggregator->is_active &&
+ bond_is_slave_rx_disabled(slave)) {
+ ad_enable_collecting(port);
+ *update_slave_arr = true;
+@@ -1116,8 +1118,8 @@ static void ad_mux_machine(struct port *
+ * sure that a collecting distributing
+ * port in an active aggregator is enabled
+ */
+- if (port->aggregator &&
+- port->aggregator->is_active &&
++ if (aggregator &&
++ aggregator->is_active &&
+ !__port_is_collecting_distributing(port)) {
+ __enable_port(port);
+ *update_slave_arr = true;
+@@ -1149,7 +1151,7 @@ static void ad_mux_machine(struct port *
+ port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0);
+ break;
+ case AD_MUX_ATTACHED:
+- if (port->aggregator->is_active)
++ if (aggregator->is_active)
+ port->actor_oper_port_state |=
+ LACP_STATE_SYNCHRONIZATION;
+ else
+@@ -1522,9 +1524,9 @@ static void ad_port_selection_logic(stru
+ bond = __get_bond_by_port(port);
+
+ /* if the port is connected to other aggregator, detach it */
+- if (port->aggregator) {
++ temp_aggregator = rcu_dereference(port->aggregator);
++ if (temp_aggregator) {
+ /* detach the port from its former aggregator */
+- temp_aggregator = port->aggregator;
+ for (curr_port = temp_aggregator->lag_ports; curr_port;
+ last_port = curr_port,
+ curr_port = curr_port->next_port_in_aggregator) {
+@@ -1547,7 +1549,7 @@ static void ad_port_selection_logic(stru
+ /* clear the port's relations to this
+ * aggregator
+ */
+- port->aggregator = NULL;
++ RCU_INIT_POINTER(port->aggregator, NULL);
+ port->next_port_in_aggregator = NULL;
+ port->actor_port_aggregator_identifier = 0;
+
+@@ -1570,7 +1572,7 @@ static void ad_port_selection_logic(stru
+ port->slave->bond->dev->name,
+ port->slave->dev->name,
+ port->actor_port_number,
+- port->aggregator->aggregator_identifier);
++ temp_aggregator->aggregator_identifier);
+ }
+ }
+ /* search on all aggregators for a suitable aggregator for this port */
+@@ -1594,15 +1596,15 @@ static void ad_port_selection_logic(stru
+ )
+ ) {
+ /* attach to the founded aggregator */
+- port->aggregator = aggregator;
++ rcu_assign_pointer(port->aggregator, aggregator);
+ port->actor_port_aggregator_identifier =
+- port->aggregator->aggregator_identifier;
++ aggregator->aggregator_identifier;
+ port->next_port_in_aggregator = aggregator->lag_ports;
+- port->aggregator->num_of_ports++;
++ aggregator->num_of_ports++;
+ aggregator->lag_ports = port;
+ slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n",
+ port->actor_port_number,
+- port->aggregator->aggregator_identifier);
++ aggregator->aggregator_identifier);
+
+ /* mark this port as selected */
+ port->sm_vars |= AD_PORT_SELECTED;
+@@ -1617,39 +1619,40 @@ static void ad_port_selection_logic(stru
+ if (!found) {
+ if (free_aggregator) {
+ /* assign port a new aggregator */
+- port->aggregator = free_aggregator;
+ port->actor_port_aggregator_identifier =
+- port->aggregator->aggregator_identifier;
++ free_aggregator->aggregator_identifier;
+
+ /* update the new aggregator's parameters
+ * if port was responsed from the end-user
+ */
+ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
+ /* if port is full duplex */
+- port->aggregator->is_individual = false;
++ free_aggregator->is_individual = false;
+ else
+- port->aggregator->is_individual = true;
++ free_aggregator->is_individual = true;
+
+- port->aggregator->actor_admin_aggregator_key =
++ free_aggregator->actor_admin_aggregator_key =
+ port->actor_admin_port_key;
+- port->aggregator->actor_oper_aggregator_key =
++ free_aggregator->actor_oper_aggregator_key =
+ port->actor_oper_port_key;
+- port->aggregator->partner_system =
++ free_aggregator->partner_system =
+ port->partner_oper.system;
+- port->aggregator->partner_system_priority =
++ free_aggregator->partner_system_priority =
+ port->partner_oper.system_priority;
+- port->aggregator->partner_oper_aggregator_key = port->partner_oper.key;
+- port->aggregator->receive_state = 1;
+- port->aggregator->transmit_state = 1;
+- port->aggregator->lag_ports = port;
+- port->aggregator->num_of_ports++;
++ free_aggregator->partner_oper_aggregator_key = port->partner_oper.key;
++ free_aggregator->receive_state = 1;
++ free_aggregator->transmit_state = 1;
++ free_aggregator->lag_ports = port;
++ free_aggregator->num_of_ports++;
++
++ rcu_assign_pointer(port->aggregator, free_aggregator);
+
+ /* mark this port as selected */
+ port->sm_vars |= AD_PORT_SELECTED;
+
+ slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n",
+ port->actor_port_number,
+- port->aggregator->aggregator_identifier);
++ free_aggregator->aggregator_identifier);
+ } else {
+ slave_err(bond->dev, port->slave->dev,
+ "Port %d did not find a suitable aggregator\n",
+@@ -1661,13 +1664,12 @@ static void ad_port_selection_logic(stru
+ * in all aggregator's ports, else set ready=FALSE in all
+ * aggregator's ports
+ */
+- __set_agg_ports_ready(port->aggregator,
+- __agg_ports_are_ready(port->aggregator));
++ aggregator = rcu_dereference(port->aggregator);
++ __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator));
+
+- aggregator = __get_first_agg(port);
+- ad_agg_selection_logic(aggregator, update_slave_arr);
++ ad_agg_selection_logic(__get_first_agg(port), update_slave_arr);
+
+- if (!port->aggregator->is_active)
++ if (!aggregator->is_active)
+ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION;
+ }
+
+@@ -2021,13 +2023,15 @@ static void ad_initialize_port(struct po
+ */
+ static void ad_enable_collecting(struct port *port)
+ {
+- if (port->aggregator->is_active) {
++ struct aggregator *aggregator = rcu_dereference(port->aggregator);
++
++ if (aggregator->is_active) {
+ struct slave *slave = port->slave;
+
+ slave_dbg(slave->bond->dev, slave->dev,
+ "Enabling collecting on port %d (LAG %d)\n",
+ port->actor_port_number,
+- port->aggregator->aggregator_identifier);
++ aggregator->aggregator_identifier);
+ __enable_collecting_port(port);
+ }
+ }
+@@ -2039,11 +2043,13 @@ static void ad_enable_collecting(struct
+ */
+ static void ad_disable_distributing(struct port *port, bool *update_slave_arr)
+ {
+- if (port->aggregator && __agg_has_partner(port->aggregator)) {
++ struct aggregator *aggregator = rcu_dereference(port->aggregator);
++
++ if (aggregator && __agg_has_partner(aggregator)) {
+ slave_dbg(port->slave->bond->dev, port->slave->dev,
+ "Disabling distributing on port %d (LAG %d)\n",
+ port->actor_port_number,
+- port->aggregator->aggregator_identifier);
++ aggregator->aggregator_identifier);
+ __disable_distributing_port(port);
+ /* Slave array needs an update */
+ *update_slave_arr = true;
+@@ -2060,11 +2066,13 @@ static void ad_disable_distributing(stru
+ static void ad_enable_collecting_distributing(struct port *port,
+ bool *update_slave_arr)
+ {
+- if (port->aggregator->is_active) {
++ struct aggregator *aggregator = rcu_dereference(port->aggregator);
++
++ if (aggregator->is_active) {
+ slave_dbg(port->slave->bond->dev, port->slave->dev,
+ "Enabling port %d (LAG %d)\n",
+ port->actor_port_number,
+- port->aggregator->aggregator_identifier);
++ aggregator->aggregator_identifier);
+ __enable_port(port);
+ /* Slave array needs update */
+ *update_slave_arr = true;
+@@ -2079,11 +2087,13 @@ static void ad_enable_collecting_distrib
+ static void ad_disable_collecting_distributing(struct port *port,
+ bool *update_slave_arr)
+ {
+- if (port->aggregator && __agg_has_partner(port->aggregator)) {
++ struct aggregator *aggregator = rcu_dereference(port->aggregator);
++
++ if (aggregator && __agg_has_partner(aggregator)) {
+ slave_dbg(port->slave->bond->dev, port->slave->dev,
+ "Disabling port %d (LAG %d)\n",
+ port->actor_port_number,
+- port->aggregator->aggregator_identifier);
++ aggregator->aggregator_identifier);
+ __disable_port(port);
+ /* Slave array needs an update */
+ *update_slave_arr = true;
+@@ -2323,7 +2333,7 @@ void bond_3ad_unbind_slave(struct slave
+ */
+ for (temp_port = aggregator->lag_ports; temp_port;
+ temp_port = temp_port->next_port_in_aggregator) {
+- temp_port->aggregator = new_aggregator;
++ rcu_assign_pointer(temp_port->aggregator, new_aggregator);
+ temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier;
+ }
+
+@@ -2792,15 +2802,16 @@ out:
+ int __bond_3ad_get_active_agg_info(struct bonding *bond,
+ struct ad_info *ad_info)
+ {
+- struct aggregator *aggregator = NULL;
++ struct aggregator *aggregator = NULL, *tmp;
+ struct list_head *iter;
+ struct slave *slave;
+ struct port *port;
+
+ bond_for_each_slave_rcu(bond, slave, iter) {
+ port = &(SLAVE_AD_INFO(slave)->port);
+- if (port->aggregator && port->aggregator->is_active) {
+- aggregator = port->aggregator;
++ tmp = rcu_dereference(port->aggregator);
++ if (tmp && tmp->is_active) {
++ aggregator = tmp;
+ break;
+ }
+ }
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1470,7 +1470,7 @@ static void bond_poll_controller(struct
+
+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+ struct aggregator *agg =
+- SLAVE_AD_INFO(slave)->port.aggregator;
++ rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator);
+
+ if (agg &&
+ agg->aggregator_identifier != ad_info.aggregator_id)
+@@ -5244,15 +5244,16 @@ int bond_update_slave_arr(struct bonding
+ spin_unlock_bh(&bond->mode_lock);
+ agg_id = ad_info.aggregator_id;
+ }
++ rcu_read_lock();
+ bond_for_each_slave(bond, slave, iter) {
+ if (skipslave == slave)
+ continue;
+
+ all_slaves->arr[all_slaves->count++] = slave;
+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+- struct aggregator *agg;
++ const struct aggregator *agg;
+
+- agg = SLAVE_AD_INFO(slave)->port.aggregator;
++ agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator);
+ if (!agg || agg->aggregator_identifier != agg_id)
+ continue;
+ }
+@@ -5264,6 +5265,7 @@ int bond_update_slave_arr(struct bonding
+
+ usable_slaves->arr[usable_slaves->count++] = slave;
+ }
++ rcu_read_unlock();
+
+ bond_set_slave_arr(bond, usable_slaves, all_slaves);
+ return ret;
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -66,27 +66,29 @@ static int bond_fill_slave_info(struct s
+ const struct port *ad_port;
+
+ ad_port = &SLAVE_AD_INFO(slave)->port;
+- agg = SLAVE_AD_INFO(slave)->port.aggregator;
++ rcu_read_lock();
++ agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator);
+ if (agg) {
+ if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+ agg->aggregator_identifier))
+- goto nla_put_failure;
++ goto nla_put_failure_rcu;
+ if (nla_put_u8(skb,
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ ad_port->actor_oper_port_state))
+- goto nla_put_failure;
++ goto nla_put_failure_rcu;
+ if (nla_put_u16(skb,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ ad_port->partner_oper.port_state))
+- goto nla_put_failure;
++ goto nla_put_failure_rcu;
+
+ if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE,
+ ad_port->sm_churn_actor_state))
+- goto nla_put_failure;
++ goto nla_put_failure_rcu;
+ if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE,
+ ad_port->sm_churn_partner_state))
+- goto nla_put_failure;
++ goto nla_put_failure_rcu;
+ }
++ rcu_read_unlock();
+
+ if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO,
+ SLAVE_AD_INFO(slave)->port_priority))
+@@ -95,6 +97,8 @@ static int bond_fill_slave_info(struct s
+
+ return 0;
+
++nla_put_failure_rcu:
++ rcu_read_unlock();
+ nla_put_failure:
+ return -EMSGSIZE;
+ }
+--- a/drivers/net/bonding/bond_procfs.c
++++ b/drivers/net/bonding/bond_procfs.c
+@@ -187,6 +187,7 @@ static void bond_info_show_master(struct
+ }
+ }
+
++/* Note: runs under rcu_read_lock() */
+ static void bond_info_show_slave(struct seq_file *seq,
+ const struct slave *slave)
+ {
+@@ -213,7 +214,7 @@ static void bond_info_show_slave(struct
+
+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+ const struct port *port = &SLAVE_AD_INFO(slave)->port;
+- const struct aggregator *agg = port->aggregator;
++ const struct aggregator *agg = rcu_dereference(port->aggregator);
+
+ if (agg) {
+ seq_printf(seq, "Aggregator ID: %d\n",
+--- a/drivers/net/bonding/bond_sysfs_slave.c
++++ b/drivers/net/bonding/bond_sysfs_slave.c
+@@ -62,10 +62,15 @@ static ssize_t ad_aggregator_id_show(str
+ const struct aggregator *agg;
+
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+- agg = SLAVE_AD_INFO(slave)->port.aggregator;
+- if (agg)
+- return sysfs_emit(buf, "%d\n",
+- agg->aggregator_identifier);
++ rcu_read_lock();
++ agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator);
++ if (agg) {
++ ssize_t res = sysfs_emit(buf, "%d\n",
++ agg->aggregator_identifier);
++ rcu_read_unlock();
++ return res;
++ }
++ rcu_read_unlock();
+ }
+
+ return sysfs_emit(buf, "N/A\n");
+@@ -78,7 +83,7 @@ static ssize_t ad_actor_oper_port_state_
+
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+ ad_port = &SLAVE_AD_INFO(slave)->port;
+- if (ad_port->aggregator)
++ if (rcu_access_pointer(ad_port->aggregator))
+ return sysfs_emit(buf, "%u\n",
+ ad_port->actor_oper_port_state);
+ }
+@@ -93,7 +98,7 @@ static ssize_t ad_partner_oper_port_stat
+
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+ ad_port = &SLAVE_AD_INFO(slave)->port;
+- if (ad_port->aggregator)
++ if (rcu_access_pointer(ad_port->aggregator))
+ return sysfs_emit(buf, "%u\n",
+ ad_port->partner_oper.port_state);
+ }
+--- a/include/net/bond_3ad.h
++++ b/include/net/bond_3ad.h
+@@ -242,7 +242,7 @@ typedef struct port {
+ churn_state_t sm_churn_actor_state;
+ churn_state_t sm_churn_partner_state;
+ struct slave *slave; /* pointer to the bond slave that this port belongs to */
+- struct aggregator *aggregator; /* pointer to an aggregator that this port related to */
++ struct aggregator __rcu *aggregator; /* pointer to an aggregator that this port related to */
+ struct port *next_port_in_aggregator; /* Next port on the linked list of the parent aggregator */
+ u32 transaction_id; /* continuous number for identification of Marker PDU's; */
+ struct lacpdu lacpdu; /* the lacpdu that will be sent for this port */
--- /dev/null
+From 3w3ExagcKBsQuzlo118qyyqvo.mywq1oqurvsx47py4xnk3syx.y1q@flex--kpberry.bounces.google.com Tue Jun 16 16:54:44 2026
+From: Kevin Berry <kpberry@google.com>
+Date: Tue, 16 Jun 2026 15:54:26 +0000
+Subject: bonding: add support for per-port LACP actor priority
+To: stable@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Hangbin Liu <liuhangbin@gmail.com>
+Message-ID: <20260616155432.2093908-4-kpberry@google.com>
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 6b6dc81ee7e8ca87c71a533e1d69cf96a4f1e986 ]
+
+Introduce a new netlink attribute 'actor_port_prio' to allow setting
+the LACP actor port priority on a per-slave basis. This extends the
+existing bonding infrastructure to support more granular control over
+LACP negotiations.
+
+The priority value is embedded in LACPDU packets and will be used by
+subsequent patches to influence aggregator selection policies.
+
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://patch.msgid.link/20250902064501.360822-2-liuhangbin@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/networking/bonding.rst | 9 ++++++++
+ drivers/net/bonding/bond_3ad.c | 4 +++
+ drivers/net/bonding/bond_netlink.c | 16 +++++++++++++++
+ drivers/net/bonding/bond_options.c | 36 +++++++++++++++++++++++++++++++++++
+ include/net/bond_3ad.h | 1
+ include/net/bond_options.h | 1
+ include/uapi/linux/if_link.h | 1
+ 7 files changed, 68 insertions(+)
+
+--- a/Documentation/networking/bonding.rst
++++ b/Documentation/networking/bonding.rst
+@@ -193,6 +193,15 @@ ad_actor_sys_prio
+ This parameter has effect only in 802.3ad mode and is available through
+ SysFs interface.
+
++actor_port_prio
++
++ In an AD system, this specifies the port priority. The allowed range
++ is 1 - 65535. If the value is not specified, it takes 255 as the
++ default value.
++
++ This parameter has effect only in 802.3ad mode and is available through
++ netlink interface.
++
+ ad_actor_system
+
+ In an AD system, this specifies the mac-address for the actor in
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -436,6 +436,7 @@ static void __ad_actor_update_port(struc
+
+ port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr;
+ port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority;
++ port->actor_port_priority = SLAVE_AD_INFO(port->slave)->port_priority;
+ }
+
+ /* Conversions */
+@@ -2195,6 +2196,9 @@ void bond_3ad_bind_slave(struct slave *s
+
+ ad_initialize_port(port, &bond->params);
+
++ /* Port priority is initialized. Update it to slave's ad info */
++ SLAVE_AD_INFO(slave)->port_priority = port->actor_port_priority;
++
+ port->slave = slave;
+ port->actor_port_number = SLAVE_AD_INFO(slave)->id;
+ /* key is determined according to the link speed, duplex and
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const
+ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */
+ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */
+ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */
++ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */
+ 0;
+ }
+
+@@ -77,6 +78,10 @@ static int bond_fill_slave_info(struct s
+ ad_port->partner_oper.port_state))
+ goto nla_put_failure;
+ }
++
++ if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO,
++ SLAVE_AD_INFO(slave)->port_priority))
++ goto nla_put_failure;
+ }
+
+ return 0;
+@@ -129,6 +134,7 @@ static const struct nla_policy bond_poli
+ static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
+ [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 },
+ [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 },
++ [IFLA_BOND_SLAVE_ACTOR_PORT_PRIO] = { .type = NLA_U16 },
+ };
+
+ static int bond_validate(struct nlattr *tb[], struct nlattr *data[],
+@@ -178,6 +184,16 @@ static int bond_slave_changelink(struct
+ if (err)
+ return err;
+ }
++
++ if (data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]) {
++ u16 ad_prio = nla_get_u16(data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]);
++
++ bond_opt_slave_initval(&newval, &slave_dev, ad_prio);
++ err = __bond_opt_set(bond, BOND_OPT_ACTOR_PORT_PRIO, &newval,
++ data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO], extack);
++ if (err)
++ return err;
++ }
+
+ return 0;
+ }
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -79,6 +79,8 @@ static int bond_option_tlb_dynamic_lb_se
+ const struct bond_opt_value *newval);
+ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
++static int bond_option_actor_port_prio_set(struct bonding *bond,
++ const struct bond_opt_value *newval);
+ static int bond_option_ad_actor_system_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
+ static int bond_option_ad_user_port_key_set(struct bonding *bond,
+@@ -223,6 +225,13 @@ static const struct bond_opt_value bond_
+ { NULL, -1, 0},
+ };
+
++static const struct bond_opt_value bond_actor_port_prio_tbl[] = {
++ { "minval", 0, BOND_VALFLAG_MIN},
++ { "maxval", 65535, BOND_VALFLAG_MAX},
++ { "default", 255, BOND_VALFLAG_DEFAULT},
++ { NULL, -1, 0},
++};
++
+ static const struct bond_opt_value bond_ad_user_port_key_tbl[] = {
+ { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT},
+ { "maxval", 1023, BOND_VALFLAG_MAX},
+@@ -484,6 +493,13 @@ static const struct bond_option bond_opt
+ .values = bond_ad_actor_sys_prio_tbl,
+ .set = bond_option_ad_actor_sys_prio_set,
+ },
++ [BOND_OPT_ACTOR_PORT_PRIO] = {
++ .id = BOND_OPT_ACTOR_PORT_PRIO,
++ .name = "actor_port_prio",
++ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
++ .values = bond_actor_port_prio_tbl,
++ .set = bond_option_actor_port_prio_set,
++ },
+ [BOND_OPT_AD_ACTOR_SYSTEM] = {
+ .id = BOND_OPT_AD_ACTOR_SYSTEM,
+ .name = "ad_actor_system",
+@@ -1817,6 +1833,26 @@ static int bond_option_ad_actor_sys_prio
+ bond_3ad_update_ad_actor_settings(bond);
+
+ return 0;
++}
++
++static int bond_option_actor_port_prio_set(struct bonding *bond,
++ const struct bond_opt_value *newval)
++{
++ struct slave *slave;
++
++ slave = bond_slave_get_rtnl(newval->slave_dev);
++ if (!slave) {
++ netdev_dbg(bond->dev, "%s called on NULL slave\n", __func__);
++ return -ENODEV;
++ }
++
++ netdev_dbg(newval->slave_dev, "Setting actor_port_prio to %llu\n",
++ newval->value);
++
++ SLAVE_AD_INFO(slave)->port_priority = newval->value;
++ bond_3ad_update_ad_actor_settings(bond);
++
++ return 0;
+ }
+
+ static int bond_option_ad_actor_system_set(struct bonding *bond,
+--- a/include/net/bond_3ad.h
++++ b/include/net/bond_3ad.h
+@@ -274,6 +274,7 @@ struct ad_slave_info {
+ struct port port; /* 802.3ad port structure */
+ struct bond_3ad_stats stats;
+ u16 id;
++ u16 port_priority;
+ };
+
+ static inline const char *bond_3ad_churn_desc(churn_state_t state)
+--- a/include/net/bond_options.h
++++ b/include/net/bond_options.h
+@@ -78,6 +78,7 @@ enum {
+ BOND_OPT_PRIO,
+ BOND_OPT_COUPLED_CONTROL,
+ BOND_OPT_BROADCAST_NEIGH,
++ BOND_OPT_ACTOR_PORT_PRIO,
+ BOND_OPT_LAST
+ };
+
+--- a/include/uapi/linux/if_link.h
++++ b/include/uapi/linux/if_link.h
+@@ -1551,6 +1551,7 @@ enum {
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_PRIO,
++ IFLA_BOND_SLAVE_ACTOR_PORT_PRIO,
+ __IFLA_BOND_SLAVE_MAX,
+ };
+
--- /dev/null
+From 3yXExagcKBso05ru77Ew44w1u.s42w7uw0x1y3ADv4A3tq9y43.47w@flex--kpberry.bounces.google.com Tue Jun 16 16:54:50 2026
+From: Kevin Berry <kpberry@google.com>
+Date: Tue, 16 Jun 2026 15:54:30 +0000
+Subject: bonding: fix NULL pointer dereference in actor_port_prio setting
+To: stable@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Hangbin Liu <liuhangbin@gmail.com>, Liang Li <liali@redhat.com>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20260616155432.2093908-8-kpberry@google.com>
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 067bf016e99ad72aa4ff869d6dec1fd62a9c6202 ]
+
+Liang reported an issue where setting a slave’s actor_port_prio to
+predefined values such as 0, 255, or 65535 would cause a system crash.
+
+The problem occurs because in bond_opt_parse(), when the provided value
+matches a predefined table entry, the function returns that table entry,
+which does not contain slave information. Later, in
+bond_option_actor_port_prio_set(), calling bond_slave_get_rtnl() leads
+to a NULL pointer dereference.
+
+Since actor_port_prio is defined as a u16 and initialized to the default
+value of 255 in ad_initialize_port(), there is no need for the
+bond_actor_port_prio_tbl. Using the BOND_OPTFLAG_RAWVAL flag is sufficient.
+
+Fixes: 6b6dc81ee7e8 ("bonding: add support for per-port LACP actor priority")
+Reported-by: Liang Li <liali@redhat.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://patch.msgid.link/20251105072620.164841-1-liuhangbin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_options.c | 9 +--------
+ 1 file changed, 1 insertion(+), 8 deletions(-)
+
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -225,13 +225,6 @@ static const struct bond_opt_value bond_
+ { NULL, -1, 0},
+ };
+
+-static const struct bond_opt_value bond_actor_port_prio_tbl[] = {
+- { "minval", 0, BOND_VALFLAG_MIN},
+- { "maxval", 65535, BOND_VALFLAG_MAX},
+- { "default", 255, BOND_VALFLAG_DEFAULT},
+- { NULL, -1, 0},
+-};
+-
+ static const struct bond_opt_value bond_ad_user_port_key_tbl[] = {
+ { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT},
+ { "maxval", 1023, BOND_VALFLAG_MAX},
+@@ -497,7 +490,7 @@ static const struct bond_option bond_opt
+ .id = BOND_OPT_ACTOR_PORT_PRIO,
+ .name = "actor_port_prio",
+ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
+- .values = bond_actor_port_prio_tbl,
++ .flags = BOND_OPTFLAG_RAWVAL,
+ .set = bond_option_actor_port_prio_set,
+ },
+ [BOND_OPT_AD_ACTOR_SYSTEM] = {
--- /dev/null
+From 3xXExagcKBsYw1nq33As00sxq.o0ys3qswtxuz69r06zpm5u0z.03s@flex--kpberry.bounces.google.com Tue Jun 16 16:54:46 2026
+From: Kevin Berry <kpberry@google.com>
+Date: Tue, 16 Jun 2026 15:54:27 +0000
+Subject: bonding: print churn state via netlink
+To: stable@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Hangbin Liu <liuhangbin@gmail.com>
+Message-ID: <20260616155432.2093908-5-kpberry@google.com>
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 4916f2e2f3fc9aef289fcd07949301e5c29094c2 ]
+
+Currently, the churn state is printed only in sysfs. Add netlink support
+so users could get the state via netlink.
+
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://patch.msgid.link/20260224020215.6012-1-liuhangbin@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_netlink.c | 9 +++++++++
+ include/uapi/linux/if_link.h | 2 ++
+ 2 files changed, 11 insertions(+)
+
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -29,6 +29,8 @@ static size_t bond_get_slave_size(const
+ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */
+ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */
+ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */
++ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE */
++ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE */
+ 0;
+ }
+
+@@ -77,6 +79,13 @@ static int bond_fill_slave_info(struct s
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ ad_port->partner_oper.port_state))
+ goto nla_put_failure;
++
++ if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE,
++ ad_port->sm_churn_actor_state))
++ goto nla_put_failure;
++ if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE,
++ ad_port->sm_churn_partner_state))
++ goto nla_put_failure;
+ }
+
+ if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO,
+--- a/include/uapi/linux/if_link.h
++++ b/include/uapi/linux/if_link.h
+@@ -1552,6 +1552,8 @@ enum {
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_PRIO,
+ IFLA_BOND_SLAVE_ACTOR_PORT_PRIO,
++ IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE,
++ IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE,
+ __IFLA_BOND_SLAVE_MAX,
+ };
+
--- /dev/null
+From 8e2bad543eca5c25cd02cbc63d72557934d45f13 Mon Sep 17 00:00:00 2001
+From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Date: Mon, 10 Feb 2025 13:16:22 -0600
+Subject: dlm: prevent NPD when writing a positive value to event_done
+
+From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+
+commit 8e2bad543eca5c25cd02cbc63d72557934d45f13 upstream.
+
+do_uevent returns the value written to event_done. In case it is a
+positive value, new_lockspace would undo all the work, and lockspace
+would not be set. __dlm_new_lockspace, however, would treat that
+positive value as a success due to commit 8511a2728ab8 ("dlm: fix use
+count with multiple joins").
+
+Down the line, device_create_lockspace would pass that NULL lockspace to
+dlm_find_lockspace_local, leading to a NULL pointer dereference.
+
+Treating such positive values as successes prevents the problem. Given
+this has been broken for so long, this is unlikely to break userspace
+expectations.
+
+Fixes: 8511a2728ab8 ("dlm: fix use count with multiple joins")
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: David Teigland <teigland@redhat.com>
+Signed-off-by: Nazar Kalashnikov <nazarkalashnikov0@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/dlm/lockspace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/dlm/lockspace.c
++++ b/fs/dlm/lockspace.c
+@@ -576,7 +576,7 @@ static int new_lockspace(const char *nam
+ lockspace to start running (via sysfs) in dlm_ls_start(). */
+
+ error = do_uevent(ls, 1);
+- if (error)
++ if (error < 0)
+ goto out_recoverd;
+
+ /* wait until recovery is successful or failed */
--- /dev/null
+From stable+bounces-265062-greg=kroah.com@vger.kernel.org Tue Jun 16 18:13:39 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 13:01:22 -0400
+Subject: Drivers: hv: vmbus: Improve the logic of reserving fb_mmio on Gen2 VMs
+To: stable@vger.kernel.org
+Cc: Dexuan Cui <decui@microsoft.com>, Michael Kelley <mhklinux@outlook.com>, Krister Johansen <kjlx@templeofstupid.com>, Matthew Ruffell <matthew.ruffell@canonical.com>, Wei Liu <wei.liu@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616170122.3368542-1-sashal@kernel.org>
+
+From: Dexuan Cui <decui@microsoft.com>
+
+[ Upstream commit 016a25e4b0df4d77e7c258edee4aaf982e4ee809 ]
+
+If vmbus_reserve_fb() in the kdump/kexec kernel fails to properly reserve
+the framebuffer MMIO range (which is below 4GB) due to a Gen2 VM's
+screen.lfb_base being zero [1], there is an MMIO conflict between the
+drivers hyperv-drm and pci-hyperv: when the driver pci-hyperv's
+hv_allocate_config_window() calls vmbus_allocate_mmio() to get an
+MMIO range, typically it gets a 32-bit MMIO range that overlaps with the
+framebuffer MMIO range, and later hv_pci_enter_d0() fails with an
+error message "PCI Pass-through VSP failed D0 Entry with status" since
+the host thinks that PCI devices must not use MMIO space that the
+host has assigned to the framebuffer.
+
+This is especially an issue if pci-hyperv is built-in and hyperv-drm is
+built as a module. Consequently, the kdump/kexec kernel fails to detect
+PCI devices via pci-hyperv, and may fail to mount the root file system,
+which may reside in a NVMe disk. The issue described here has existed
+for SR-IOV VF NICs since day one of the pci-hyperv driver, and has been
+worked around on x64 when possible. With the recent introduction of
+ARM64 VMs that boot from NVMe, there is no workaround, so we need a
+formal fix.
+
+On Gen2 VMs, if the screen.lfb_base is 0 in the kdump/kexec kernel [1],
+fall back to the low MMIO base, which should be equal to the framebuffer
+MMIO base [2] (the statement is true according to my testing on x64
+Windows Server 2016, and on x64 and ARM64 Windows Server 2025 and on
+Azure. I checked with the Hyper-V team and they said the statement should
+continue to be true for Gen2 VMs). In the first kernel, screen.lfb_base
+is not 0; if the user specifies a very high resolution, it's not enough
+to only reserve 8MB: let's always reserve half of the space below 4GB,
+but cap the reservation to 128MB, which is the required framebuffer size
+of the highest resolution 7680*4320 supported by Hyper-V.
+
+While at it, fix the comparison "end > VTPM_BASE_ADDRESS" by changing
+the > to >=. Here the 'end' is an inclusive end (typically, it's
+0xFFFF_FFFF for the low MMIO range).
+
+Note: vmbus_reserve_fb() now also reserves an MMIO range at the beginning
+of the low MMIO range on CVMs, which have no framebuffers (the
+'screen.lfb_base' in vmbus_reserve_fb() is 0 for CVMs), just in case the
+host might treat the beginning of the low MMIO range specially [3]. BTW,
+the OpenHCL kernel is not affected by the change, because that kernel
+boots with DeviceTree rather than ACPI (so vmbus_reserve_fb() won't run
+there), and there is no framebuffer device for that kernel.
+
+Note: normally Gen1 VMs don't have the MMIO conflict issue because the
+framebuffer MMIO range (which is hardcoded to base=4GB-128MB and
+size=64MB for Gen1 VMs by the host) is always reported via the legacy PCI
+graphics device's BAR, so the kdump/kexec kernel can reserve the 64MB
+MMIO range; however, if the VM is configured to use a very high resolution
+and the required framebuffer size exceeds 64MB (AFAIK, in practice, this
+isn't a typical configuration by users), the hyperv-drm driver may need to
+allocate an MMIO range above 4GB and change the framebuffer MMIO location
+to the allocated MMIO range -- in this case, there can still be issues [4]
+which can't be easily fixed: any possible affected Gen1 users would have
+to use a resolution whose framebuffer size is <= 64MB, or switch to Gen2
+VMs.
+
+[1] https://lore.kernel.org/all/SA1PR21MB692176C1BC53BFC9EAE5CF8EBF51A@SA1PR21MB6921.namprd21.prod.outlook.com/
+[2] https://lore.kernel.org/all/SA1PR21MB69218F955B62DFF62E3E88D2BF222@SA1PR21MB6921.namprd21.prod.outlook.com/
+[3] https://lore.kernel.org/all/SN6PR02MB415726B17D5A6027CD1717E8D4342@SN6PR02MB4157.namprd02.prod.outlook.com/
+[4] https://lore.kernel.org/all/SA1PR21MB69213486F821CA5A2C793C81BF342@SA1PR21MB6921.namprd21.prod.outlook.com/
+
+Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
+CC: stable@vger.kernel.org
+Reviewed-by: Michael Kelley <mhklinux@outlook.com>
+Tested-by: Krister Johansen <kjlx@templeofstupid.com>
+Tested-by: Matthew Ruffell <matthew.ruffell@canonical.com>
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hv/vmbus_drv.c | 29 ++++++++++++++++++++++++++---
+ 1 file changed, 26 insertions(+), 3 deletions(-)
+
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -2205,8 +2205,8 @@ static acpi_status vmbus_walk_resources(
+ return AE_NO_MEMORY;
+
+ /* If this range overlaps the virtual TPM, truncate it. */
+- if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS)
+- end = VTPM_BASE_ADDRESS;
++ if (end >= VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS)
++ end = VTPM_BASE_ADDRESS - 1;
+
+ new_res->name = "hyperv mmio";
+ new_res->flags = IORESOURCE_MEM;
+@@ -2273,6 +2273,7 @@ static void vmbus_mmio_remove(void)
+ static void __maybe_unused vmbus_reserve_fb(void)
+ {
+ resource_size_t start = 0, size;
++ resource_size_t low_mmio_base;
+ struct pci_dev *pdev;
+
+ if (efi_enabled(EFI_BOOT)) {
+@@ -2280,6 +2281,24 @@ static void __maybe_unused vmbus_reserve
+ if (IS_ENABLED(CONFIG_SYSFB)) {
+ start = screen_info.lfb_base;
+ size = max_t(__u32, screen_info.lfb_size, 0x800000);
++
++ low_mmio_base = hyperv_mmio->start;
++ if (!low_mmio_base || upper_32_bits(low_mmio_base) ||
++ (start && start < low_mmio_base)) {
++ pr_warn("Unexpected low mmio base %pa\n", &low_mmio_base);
++ } else {
++ /*
++ * If the kdump/kexec or CVM kernel's lfb_base
++ * is 0, fall back to the low mmio base.
++ */
++ if (!start)
++ start = low_mmio_base;
++ /*
++ * Reserve half of the space below 4GB for high
++ * resolutions, but cap the reservation to 128MB.
++ */
++ size = min((SZ_4G - start) / 2, SZ_128M);
++ }
+ }
+ } else {
+ /* Gen1 VM: get FB base from PCI */
+@@ -2300,8 +2319,10 @@ static void __maybe_unused vmbus_reserve
+ pci_dev_put(pdev);
+ }
+
+- if (!start)
++ if (!start) {
++ pr_warn("Unexpected framebuffer mmio base of zero\n");
+ return;
++ }
+
+ /*
+ * Make a claim for the frame buffer in the resource tree under the
+@@ -2311,6 +2332,8 @@ static void __maybe_unused vmbus_reserve
+ */
+ for (; !fb_mmio && (size >= 0x100000); size >>= 1)
+ fb_mmio = __request_region(hyperv_mmio, start, size, fb_mmio_name, 0);
++
++ pr_info("hv_mmio=%pR,%pR fb=%pR\n", hyperv_mmio, hyperv_mmio->sibling, fb_mmio);
+ }
+
+ /**
--- /dev/null
+From stable+bounces-264764-greg=kroah.com@vger.kernel.org Tue Jun 16 17:44:22 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 12:36:09 -0400
+Subject: fhandle: fix UAF due to unlocked ->mnt_ns read in may_decode_fh()
+To: stable@vger.kernel.org
+Cc: Jann Horn <jannh@google.com>, "Christian Brauner (Amutable)" <brauner@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616163609.3352096-1-sashal@kernel.org>
+
+From: Jann Horn <jannh@google.com>
+
+[ Upstream commit 40ab6644b99685755f740b872c00ef40d9aa870e ]
+
+may_decode_fh() accesses mount::mnt_ns without holding any locks; that
+means the mount can concurrently be unmounted, and the mnt_namespace can
+concurrently be freed after an RCU grace period.
+
+This race can happens as follows, assuming that the mount point was
+created by open_tree(..., OPEN_TREE_CLONE):
+
+thread 1 thread 2 RCU
+ __do_sys_open_by_handle_at
+ do_handle_open
+ handle_to_path
+ may_decode_fh
+ is_mounted
+ [mount::mnt_ns access]
+ [mount::mnt_ns access]
+__do_sys_close
+ fput_close_sync
+ __fput
+ dissolve_on_fput
+ umount_tree
+ class_namespace_excl_destructor
+ namespace_unlock
+ free_mnt_ns
+ mnt_ns_tree_remove
+ call_rcu(mnt_ns_release_rcu)
+ mnt_ns_release_rcu
+ mnt_ns_release
+ kfree
+ [mnt_namespace::user_ns access] **UAF**
+
+Fix it by taking rcu_read_lock() around the mount::mnt_ns access, like
+in __prepend_path().
+Additionally, document the semantics of mount::mnt_ns, and use WRITE_ONCE()
+for writers that can race with lockless readers.
+
+This bug is unreachable unless one of the following is set:
+
+ - CONFIG_PREEMPTION
+ - CONFIG_RCU_STRICT_GRACE_PERIOD
+
+because it requires an RCU grace period to happen during a syscall without
+an explicit preemption.
+
+This doesn't seem to have interesting security impact; worst-case, it could
+leak the result of an integer comparison to userspace (from the level
+check in cap_capable()), cause an endless loop, or crash the kernel by
+dereferencing an invalid address.
+
+Fixes: 620c266f3949 ("fhandle: relax open_by_handle_at() permission checks")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jann Horn <jannh@google.com>
+Link: https://patch.msgid.link/20260603-vfs-fhandle-uaf-fix-v2-1-d05db76a5084@google.com
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fhandle.c | 16 ++++++++++++++--
+ fs/mount.h | 10 +++++++++-
+ fs/namespace.c | 6 +++---
+ 3 files changed, 26 insertions(+), 6 deletions(-)
+
+--- a/fs/fhandle.c
++++ b/fs/fhandle.c
+@@ -242,6 +242,19 @@ static int do_handle_to_path(struct file
+ return 0;
+ }
+
++static bool capable_wrt_mount(struct mount *mount)
++{
++ struct mnt_namespace *mnt_ns;
++
++ /*
++ * For ->mnt_ns access.
++ * The following READ_ONCE() is semantically rcu_dereference().
++ */
++ guard(rcu)();
++ mnt_ns = READ_ONCE(mount->mnt_ns);
++ return ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN);
++}
++
+ /*
+ * Allow relaxed permissions of file handles if the caller has the
+ * ability to mount the filesystem or create a bind-mount of the
+@@ -273,8 +286,7 @@ static inline bool may_decode_fh(struct
+ if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+ ctx->flags = HANDLE_CHECK_PERMS;
+ else if (is_mounted(root->mnt) &&
+- ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
+- CAP_SYS_ADMIN) &&
++ capable_wrt_mount(real_mount(root->mnt)) &&
+ !has_locked_children(real_mount(root->mnt), root->dentry))
+ ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
+ else
+--- a/fs/mount.h
++++ b/fs/mount.h
+@@ -58,7 +58,15 @@ struct mount {
+ struct list_head mnt_slave_list;/* list of slave mounts */
+ struct list_head mnt_slave; /* slave list entry */
+ struct mount *mnt_master; /* slave is on master->mnt_slave_list */
+- struct mnt_namespace *mnt_ns; /* containing namespace */
++ /*
++ * Containing namespace (active or deactivating, non-refcounted).
++ * Normally protected by namespace_sem.
++ * Can also be accessed locklessly under RCU. RCU readers can't rely on
++ * the namespace still being active, but implicitly hold a passive
++ * reference (because an RCU delay happens between a namespace being
++ * deactivated and the corresponding passive refcount drop).
++ */
++ struct mnt_namespace *mnt_ns;
+ struct mountpoint *mnt_mp; /* where is it mounted */
+ union {
+ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1132,7 +1132,7 @@ static void mnt_add_to_ns(struct mnt_nam
+ struct rb_node *parent = NULL;
+
+ WARN_ON(mnt_ns_attached(mnt));
+- mnt->mnt_ns = ns;
++ WRITE_ONCE(mnt->mnt_ns, ns);
+ while (*link) {
+ parent = *link;
+ if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique)
+@@ -1493,7 +1493,7 @@ EXPORT_SYMBOL(mntget);
+ void mnt_make_shortterm(struct vfsmount *mnt)
+ {
+ if (mnt)
+- real_mount(mnt)->mnt_ns = NULL;
++ WRITE_ONCE(real_mount(mnt)->mnt_ns, NULL);
+ }
+
+ /**
+@@ -1805,7 +1805,7 @@ static void umount_tree(struct mount *mn
+ ns->nr_mounts--;
+ __touch_mnt_namespace(ns);
+ }
+- p->mnt_ns = NULL;
++ WRITE_ONCE(p->mnt_ns, NULL);
+ if (how & UMOUNT_SYNC)
+ p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
+
--- /dev/null
+From stable+bounces-267936-greg=kroah.com@vger.kernel.org Tue Jun 23 13:05:39 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jun 2026 08:05:32 -0400
+Subject: fuse: re-lock request before replacing page cache folio
+To: stable@vger.kernel.org
+Cc: Joanne Koong <joannelkoong@gmail.com>, Lei Lu <llfamsec@gmail.com>, Miklos Szeredi <mszeredi@redhat.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260623120532.1152295-1-sashal@kernel.org>
+
+From: Joanne Koong <joannelkoong@gmail.com>
+
+[ Upstream commit a078484921052d0badd827fcc2770b5cfc1d4120 ]
+
+fuse_try_move_folio() unlocks the request on entry but does not
+re-lock it on the success path. This means fuse_chan_abort() can end the
+request and free the fuse_io_args (eg fuse_readpages_end()) while the
+subsequent copy chain logic after fuse_try_move_folio() accesses the
+fuse_io_args, leading to use-after-free issues.
+
+Fix this by calling lock_request() before replace_page_cache_folio().
+This ensures the request is locked on the success path which will
+prevent the fuse_io_args from being freed while the later copying logic
+runs, and also ensures that the ap->folios[i]->mapping is never null
+since ap->folios[i] will always point to the newfolio after
+replace_page_cache_folio().
+
+Fixes: ce534fb05292 ("fuse: allow splice to move pages")
+Cc: stable@vger.kernel.org
+Reported-by: Lei Lu <llfamsec@gmail.com>
+Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/dev.c | 19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -891,6 +891,10 @@ static int fuse_try_move_page(struct fus
+ if (WARN_ON(folio_test_mlocked(oldfolio)))
+ goto out_fallback_unlock;
+
++ err = lock_request(cs->req);
++ if (err)
++ goto out_fallback_unlock;
++
+ replace_page_cache_folio(oldfolio, newfolio);
+
+ folio_get(newfolio);
+@@ -904,20 +908,7 @@ static int fuse_try_move_page(struct fus
+ */
+ pipe_buf_release(cs->pipe, buf);
+
+- err = 0;
+- spin_lock(&cs->req->waitq.lock);
+- if (test_bit(FR_ABORTED, &cs->req->flags))
+- err = -ENOENT;
+- else
+- *pagep = &newfolio->page;
+- spin_unlock(&cs->req->waitq.lock);
+-
+- if (err) {
+- folio_unlock(newfolio);
+- folio_put(newfolio);
+- goto out_put_old;
+- }
+-
++ *pagep = &newfolio->page;
+ folio_unlock(oldfolio);
+ /* Drop ref for ap->pages[] array */
+ folio_put(oldfolio);
--- /dev/null
+From stable+bounces-266506-greg=kroah.com@vger.kernel.org Tue Jun 16 20:06:16 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 15:05:55 -0400
+Subject: futex/requeue: Prevent NULL pointer dereference in remove_waiter() on self-deadlock
+To: stable@vger.kernel.org
+Cc: Ji'an Zhou <eilaimemedsnaimel@gmail.com>, Thomas Gleixner <tglx@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616190556.3487341-1-sashal@kernel.org>
+
+From: Ji'an Zhou <eilaimemedsnaimel@gmail.com>
+
+[ Upstream commit 74e144274af39935b0f410c0ee4d2b91c3730414 ]
+
+When FUTEX_CMP_REQUEUE_PI requeues a non-top waiter that already owns the
+target PI futex, task_blocks_on_rt_mutex() returns -EDEADLK before setting
+waiter->task.
+
+The subsequent remove_waiter() in rt_mutex_start_proxy_lock() dereferences
+the NULL waiter->task, causing a kernel crash.
+
+Add a self-deadlock check for non-top waiters before calling
+rt_mutex_start_proxy_lock(), analogous to the top-waiter check in
+futex_lock_pi_atomic().
+
+Fixes: 3bfdc63936dd4773109b7b8c280c0f3b5ae7d349 ("rtmutex: Use waiter::task instead of current in remove_waiter()")
+Signed-off-by: Ji'an Zhou <eilaimemedsnaimel@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex/requeue.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/kernel/futex/requeue.c
++++ b/kernel/futex/requeue.c
+@@ -633,6 +633,12 @@ retry_private:
+ continue;
+ }
+
++ /* Self-deadlock: non-top waiter already owns the PI futex. */
++ if (rt_mutex_owner(&pi_state->pi_mutex) == this->task) {
++ ret = -EDEADLK;
++ break;
++ }
++
+ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+ this->rt_waiter,
+ this->task);
--- /dev/null
+From stable+bounces-265063-greg=kroah.com@vger.kernel.org Tue Jun 16 18:13:34 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 13:01:25 -0400
+Subject: hv: utils: handle and propagate errors in kvp_register
+To: stable@vger.kernel.org
+Cc: Thorsten Blum <thorsten.blum@linux.dev>, Long Li <longli@microsoft.com>, Wei Liu <wei.liu@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616170125.3368588-1-sashal@kernel.org>
+
+From: Thorsten Blum <thorsten.blum@linux.dev>
+
+[ Upstream commit 3fcf923302a8f5c0dc3af3d2ca2657cb5fae4297 ]
+
+Make kvp_register() return an error code instead of silently ignoring
+failures, and propagate the error from kvp_handle_handshake() instead of
+returning success.
+
+This propagates both kzalloc_obj() and hvutil_transport_send() failures
+to kvp_handle_handshake() and thus to kvp_on_msg().
+
+Fixes: 245ba56a52a3 ("Staging: hv: Implement key/value pair (KVP)")
+Cc: stable@vger.kernel.org
+Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hv/hv_kvp.c | 27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+--- a/drivers/hv/hv_kvp.c
++++ b/drivers/hv/hv_kvp.c
+@@ -93,7 +93,7 @@ static void kvp_send_key(struct work_str
+ static void kvp_respond_to_host(struct hv_kvp_msg *msg, int error);
+ static void kvp_timeout_func(struct work_struct *dummy);
+ static void kvp_host_handshake_func(struct work_struct *dummy);
+-static void kvp_register(int);
++static int kvp_register(int);
+
+ static DECLARE_DELAYED_WORK(kvp_timeout_work, kvp_timeout_func);
+ static DECLARE_DELAYED_WORK(kvp_host_handshake_work, kvp_host_handshake_func);
+@@ -127,24 +127,26 @@ static void kvp_register_done(void)
+ hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
+ }
+
+-static void
++static int
+ kvp_register(int reg_value)
+ {
+
+ struct hv_kvp_msg *kvp_msg;
+ char *version;
++ int ret;
+
+ kvp_msg = kzalloc(sizeof(*kvp_msg), GFP_KERNEL);
++ if (!kvp_msg)
++ return -ENOMEM;
+
+- if (kvp_msg) {
+- version = kvp_msg->body.kvp_register.version;
+- kvp_msg->kvp_hdr.operation = reg_value;
+- strcpy(version, HV_DRV_VERSION);
+-
+- hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg),
+- kvp_register_done);
+- kfree(kvp_msg);
+- }
++ version = kvp_msg->body.kvp_register.version;
++ kvp_msg->kvp_hdr.operation = reg_value;
++ strcpy(version, HV_DRV_VERSION);
++
++ ret = hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg),
++ kvp_register_done);
++ kfree(kvp_msg);
++ return ret;
+ }
+
+ static void kvp_timeout_func(struct work_struct *dummy)
+@@ -186,9 +188,8 @@ static int kvp_handle_handshake(struct h
+ */
+ pr_debug("KVP: userspace daemon ver. %d connected\n",
+ msg->kvp_hdr.operation);
+- kvp_register(dm_reg_value);
+
+- return 0;
++ return kvp_register(dm_reg_value);
+ }
+
+
--- /dev/null
+From stable+bounces-266509-greg=kroah.com@vger.kernel.org Tue Jun 16 20:06:10 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 15:06:00 -0400
+Subject: locking/mutex: Remove wakeups from under mutex::wait_lock
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Juri Lelli <juri.lelli@redhat.com>, John Stultz <jstultz@google.com>, Metin Kaya <metin.kaya@arm.com>, Davidlohr Bueso <dave@stgolabs.net>, K Prateek Nayak <kprateek.nayak@amd.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616190601.3487860-1-sashal@kernel.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 894d1b3db41cf7e6ae0304429a1747b3c3f390bc ]
+
+In preparation to nest mutex::wait_lock under rq::lock we need
+to remove wakeups from under it.
+
+Do this by utilizing wake_qs to defer the wakeup until after the
+lock is dropped.
+
+[Heavily changed after 55f036ca7e74 ("locking: WW mutex cleanup") and
+08295b3b5bee ("locking: Implement an algorithm choice for Wound-Wait
+mutexes")]
+[jstultz: rebased to mainline, added extra wake_up_q & init
+ to avoid hangs, similar to Connor's rework of this patch]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: John Stultz <jstultz@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Metin Kaya <metin.kaya@arm.com>
+Acked-by: Davidlohr Bueso <dave@stgolabs.net>
+Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Tested-by: Metin Kaya <metin.kaya@arm.com>
+Link: https://lore.kernel.org/r/20241009235352.1614323-2-jstultz@google.com
+Stable-dep-of: 40a25d59e85b ("locking/rtmutex: Skip remove_waiter() when waiter is not enqueued")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/futex/pi.c | 6 +++-
+ kernel/locking/mutex.c | 16 +++++++++---
+ kernel/locking/rtmutex.c | 51 +++++++++++++++++++++++++++++-----------
+ kernel/locking/rtmutex_api.c | 12 +++++++--
+ kernel/locking/rtmutex_common.h | 3 +-
+ kernel/locking/rwbase_rt.c | 8 +++++-
+ kernel/locking/rwsem.c | 4 +--
+ kernel/locking/spinlock_rt.c | 5 ++-
+ kernel/locking/ww_mutex.h | 30 ++++++++++++++---------
+ 9 files changed, 96 insertions(+), 39 deletions(-)
+
+--- a/kernel/futex/pi.c
++++ b/kernel/futex/pi.c
+@@ -922,6 +922,7 @@ int futex_lock_pi(u32 __user *uaddr, uns
+ struct rt_mutex_waiter rt_waiter;
+ struct futex_hash_bucket *hb;
+ struct futex_q q = futex_q_init;
++ DEFINE_WAKE_Q(wake_q);
+ int res, ret;
+
+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
+@@ -1019,8 +1020,11 @@ retry_private:
+ * such that futex_unlock_pi() is guaranteed to observe the waiter when
+ * it sees the futex_q::pi_state.
+ */
+- ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
++ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
++ preempt_disable();
+ raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
++ wake_up_q(&wake_q);
++ preempt_enable();
+
+ if (ret) {
+ if (ret == 1)
+--- a/kernel/locking/mutex.c
++++ b/kernel/locking/mutex.c
+@@ -575,6 +575,7 @@ __mutex_lock_common(struct mutex *lock,
+ struct lockdep_map *nest_lock, unsigned long ip,
+ struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
+ {
++ DEFINE_WAKE_Q(wake_q);
+ struct mutex_waiter waiter;
+ struct ww_mutex *ww;
+ int ret;
+@@ -625,7 +626,7 @@ __mutex_lock_common(struct mutex *lock,
+ */
+ if (__mutex_trylock(lock)) {
+ if (ww_ctx)
+- __ww_mutex_check_waiters(lock, ww_ctx);
++ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
+
+ goto skip_wait;
+ }
+@@ -645,7 +646,7 @@ __mutex_lock_common(struct mutex *lock,
+ * Add in stamp order, waking up waiters that must kill
+ * themselves.
+ */
+- ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
++ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q);
+ if (ret)
+ goto err_early_kill;
+ }
+@@ -681,6 +682,10 @@ __mutex_lock_common(struct mutex *lock,
+ }
+
+ raw_spin_unlock(&lock->wait_lock);
++ /* Make sure we do wakeups before calling schedule */
++ wake_up_q(&wake_q);
++ wake_q_init(&wake_q);
++
+ schedule_preempt_disabled();
+
+ first = __mutex_waiter_is_first(lock, &waiter);
+@@ -714,7 +719,7 @@ acquired:
+ */
+ if (!ww_ctx->is_wait_die &&
+ !__mutex_waiter_is_first(lock, &waiter))
+- __ww_mutex_check_waiters(lock, ww_ctx);
++ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
+ }
+
+ __mutex_remove_waiter(lock, &waiter);
+@@ -730,6 +735,7 @@ skip_wait:
+ ww_mutex_lock_acquired(ww, ww_ctx);
+
+ raw_spin_unlock(&lock->wait_lock);
++ wake_up_q(&wake_q);
+ preempt_enable();
+ return 0;
+
+@@ -741,6 +747,7 @@ err_early_kill:
+ raw_spin_unlock(&lock->wait_lock);
+ debug_mutex_free_waiter(&waiter);
+ mutex_release(&lock->dep_map, ip);
++ wake_up_q(&wake_q);
+ preempt_enable();
+ return ret;
+ }
+@@ -951,9 +958,10 @@ static noinline void __sched __mutex_unl
+ if (owner & MUTEX_FLAG_HANDOFF)
+ __mutex_handoff(lock, next);
+
++ preempt_disable();
+ raw_spin_unlock(&lock->wait_lock);
+-
+ wake_up_q(&wake_q);
++ preempt_enable();
+ }
+
+ #ifndef CONFIG_DEBUG_LOCK_ALLOC
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -34,13 +34,15 @@
+
+ static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
+ struct rt_mutex *lock,
+- struct ww_acquire_ctx *ww_ctx)
++ struct ww_acquire_ctx *ww_ctx,
++ struct wake_q_head *wake_q)
+ {
+ return 0;
+ }
+
+ static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
+- struct ww_acquire_ctx *ww_ctx)
++ struct ww_acquire_ctx *ww_ctx,
++ struct wake_q_head *wake_q)
+ {
+ }
+
+@@ -1201,7 +1203,8 @@ static int __sched task_blocks_on_rt_mut
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task,
+ struct ww_acquire_ctx *ww_ctx,
+- enum rtmutex_chainwalk chwalk)
++ enum rtmutex_chainwalk chwalk,
++ struct wake_q_head *wake_q)
+ {
+ struct task_struct *owner = rt_mutex_owner(lock);
+ struct rt_mutex_waiter *top_waiter = waiter;
+@@ -1245,7 +1248,10 @@ static int __sched task_blocks_on_rt_mut
+
+ /* Check whether the waiter should back out immediately */
+ rtm = container_of(lock, struct rt_mutex, rtmutex);
+- res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
++ preempt_disable();
++ res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q);
++ wake_up_q(wake_q);
++ preempt_enable();
+ if (res) {
+ raw_spin_lock(&task->pi_lock);
+ rt_mutex_dequeue(lock, waiter);
+@@ -1677,12 +1683,14 @@ static void __sched rt_mutex_handle_dead
+ * @state: The task state for sleeping
+ * @chwalk: Indicator whether full or partial chainwalk is requested
+ * @waiter: Initializer waiter for blocking
++ * @wake_q: The wake_q to wake tasks after we release the wait_lock
+ */
+ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned int state,
+ enum rtmutex_chainwalk chwalk,
+- struct rt_mutex_waiter *waiter)
++ struct rt_mutex_waiter *waiter,
++ struct wake_q_head *wake_q)
+ {
+ struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+ struct ww_mutex *ww = ww_container_of(rtm);
+@@ -1693,7 +1701,7 @@ static int __sched __rt_mutex_slowlock(s
+ /* Try to acquire the lock again: */
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+ if (build_ww_mutex() && ww_ctx) {
+- __ww_mutex_check_waiters(rtm, ww_ctx);
++ __ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ }
+ return 0;
+@@ -1703,7 +1711,7 @@ static int __sched __rt_mutex_slowlock(s
+
+ trace_contention_begin(lock, LCB_F_RT);
+
+- ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
++ ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q);
+ if (likely(!ret))
+ ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
+
+@@ -1711,7 +1719,7 @@ static int __sched __rt_mutex_slowlock(s
+ /* acquired the lock */
+ if (build_ww_mutex() && ww_ctx) {
+ if (!ww_ctx->is_wait_die)
+- __ww_mutex_check_waiters(rtm, ww_ctx);
++ __ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ }
+ } else {
+@@ -1733,7 +1741,8 @@ static int __sched __rt_mutex_slowlock(s
+
+ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
+- unsigned int state)
++ unsigned int state,
++ struct wake_q_head *wake_q)
+ {
+ struct rt_mutex_waiter waiter;
+ int ret;
+@@ -1742,7 +1751,7 @@ static inline int __rt_mutex_slowlock_lo
+ waiter.ww_ctx = ww_ctx;
+
+ ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
+- &waiter);
++ &waiter, wake_q);
+
+ debug_rt_mutex_free_waiter(&waiter);
+ return ret;
+@@ -1758,6 +1767,7 @@ static int __sched rt_mutex_slowlock(str
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned int state)
+ {
++ DEFINE_WAKE_Q(wake_q);
+ unsigned long flags;
+ int ret;
+
+@@ -1779,8 +1789,11 @@ static int __sched rt_mutex_slowlock(str
+ * irqsave/restore variants.
+ */
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+- ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
++ ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q);
++ preempt_disable();
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
++ wake_up_q(&wake_q);
++ preempt_enable();
+ rt_mutex_post_schedule();
+
+ return ret;
+@@ -1806,8 +1819,10 @@ static __always_inline int __rt_mutex_lo
+ /**
+ * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
+ * @lock: The underlying RT mutex
++ * @wake_q: The wake_q to wake tasks after we release the wait_lock
+ */
+-static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
++static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
++ struct wake_q_head *wake_q)
+ {
+ struct rt_mutex_waiter waiter;
+ struct task_struct *owner;
+@@ -1824,7 +1839,7 @@ static void __sched rtlock_slowlock_lock
+
+ trace_contention_begin(lock, LCB_F_RT);
+
+- task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
++ task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q);
+
+ for (;;) {
+ /* Try to acquire the lock again */
+@@ -1835,7 +1850,11 @@ static void __sched rtlock_slowlock_lock
+ owner = rt_mutex_owner(lock);
+ else
+ owner = NULL;
++ preempt_disable();
+ raw_spin_unlock_irq(&lock->wait_lock);
++ wake_up_q(wake_q);
++ wake_q_init(wake_q);
++ preempt_enable();
+
+ if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
+ schedule_rtlock();
+@@ -1860,10 +1879,14 @@ static void __sched rtlock_slowlock_lock
+ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
+ {
+ unsigned long flags;
++ DEFINE_WAKE_Q(wake_q);
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+- rtlock_slowlock_locked(lock);
++ rtlock_slowlock_locked(lock, &wake_q);
++ preempt_disable();
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
++ wake_up_q(&wake_q);
++ preempt_enable();
+ }
+
+ #endif /* RT_MUTEX_BUILD_SPINLOCKS */
+--- a/kernel/locking/rtmutex_api.c
++++ b/kernel/locking/rtmutex_api.c
+@@ -275,6 +275,7 @@ void __sched rt_mutex_proxy_unlock(struc
+ * @lock: the rt_mutex to take
+ * @waiter: the pre-initialized rt_mutex_waiter
+ * @task: the task to prepare
++ * @wake_q: the wake_q to wake tasks after we release the wait_lock
+ *
+ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
+ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
+@@ -291,7 +292,8 @@ void __sched rt_mutex_proxy_unlock(struc
+ */
+ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+- struct task_struct *task)
++ struct task_struct *task,
++ struct wake_q_head *wake_q)
+ {
+ int ret;
+
+@@ -302,7 +304,7 @@ int __sched __rt_mutex_start_proxy_lock(
+
+ /* We enforce deadlock detection for futexes */
+ ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL,
+- RT_MUTEX_FULL_CHAINWALK);
++ RT_MUTEX_FULL_CHAINWALK, wake_q);
+
+ if (ret && !rt_mutex_owner(lock)) {
+ /*
+@@ -341,12 +343,16 @@ int __sched rt_mutex_start_proxy_lock(st
+ struct task_struct *task)
+ {
+ int ret;
++ DEFINE_WAKE_Q(wake_q);
+
+ raw_spin_lock_irq(&lock->wait_lock);
+- ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
++ ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);
+ if (unlikely(ret))
+ remove_waiter(lock, waiter);
++ preempt_disable();
+ raw_spin_unlock_irq(&lock->wait_lock);
++ wake_up_q(&wake_q);
++ preempt_enable();
+
+ return ret;
+ }
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -83,7 +83,8 @@ extern void rt_mutex_init_proxy_locked(s
+ extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock);
+ extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+- struct task_struct *task);
++ struct task_struct *task,
++ struct wake_q_head *);
+ extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task);
+--- a/kernel/locking/rwbase_rt.c
++++ b/kernel/locking/rwbase_rt.c
+@@ -69,6 +69,7 @@ static int __sched __rwbase_read_lock(st
+ unsigned int state)
+ {
+ struct rt_mutex_base *rtm = &rwb->rtmutex;
++ DEFINE_WAKE_Q(wake_q);
+ int ret;
+
+ rwbase_pre_schedule();
+@@ -110,7 +111,7 @@ static int __sched __rwbase_read_lock(st
+ * For rwlocks this returns 0 unconditionally, so the below
+ * !ret conditionals are optimized out.
+ */
+- ret = rwbase_rtmutex_slowlock_locked(rtm, state);
++ ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q);
+
+ /*
+ * On success the rtmutex is held, so there can't be a writer
+@@ -121,7 +122,12 @@ static int __sched __rwbase_read_lock(st
+ */
+ if (!ret)
+ atomic_inc(&rwb->readers);
++
++ preempt_disable();
+ raw_spin_unlock_irq(&rtm->wait_lock);
++ wake_up_q(&wake_q);
++ preempt_enable();
++
+ if (!ret)
+ rwbase_rtmutex_unlock(rtm);
+
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -1413,8 +1413,8 @@ static inline void __downgrade_write(str
+ #define rwbase_rtmutex_lock_state(rtm, state) \
+ __rt_mutex_lock(rtm, state)
+
+-#define rwbase_rtmutex_slowlock_locked(rtm, state) \
+- __rt_mutex_slowlock_locked(rtm, NULL, state)
++#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \
++ __rt_mutex_slowlock_locked(rtm, NULL, state, wq)
+
+ #define rwbase_rtmutex_unlock(rtm) \
+ __rt_mutex_unlock(rtm)
+--- a/kernel/locking/spinlock_rt.c
++++ b/kernel/locking/spinlock_rt.c
+@@ -162,9 +162,10 @@ rwbase_rtmutex_lock_state(struct rt_mute
+ }
+
+ static __always_inline int
+-rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
++rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state,
++ struct wake_q_head *wake_q)
+ {
+- rtlock_slowlock_locked(rtm);
++ rtlock_slowlock_locked(rtm, wake_q);
+ return 0;
+ }
+
+--- a/kernel/locking/ww_mutex.h
++++ b/kernel/locking/ww_mutex.h
+@@ -275,7 +275,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a,
+ */
+ static bool
+ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
+- struct ww_acquire_ctx *ww_ctx)
++ struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q)
+ {
+ if (!ww_ctx->is_wait_die)
+ return false;
+@@ -284,7 +284,7 @@ __ww_mutex_die(struct MUTEX *lock, struc
+ #ifndef WW_RT
+ debug_mutex_wake_waiter(lock, waiter);
+ #endif
+- wake_up_process(waiter->task);
++ wake_q_add(wake_q, waiter->task);
+ }
+
+ return true;
+@@ -299,7 +299,8 @@ __ww_mutex_die(struct MUTEX *lock, struc
+ */
+ static bool __ww_mutex_wound(struct MUTEX *lock,
+ struct ww_acquire_ctx *ww_ctx,
+- struct ww_acquire_ctx *hold_ctx)
++ struct ww_acquire_ctx *hold_ctx,
++ struct wake_q_head *wake_q)
+ {
+ struct task_struct *owner = __ww_mutex_owner(lock);
+
+@@ -331,7 +332,7 @@ static bool __ww_mutex_wound(struct MUTE
+ * wakeup pending to re-read the wounded state.
+ */
+ if (owner != current)
+- wake_up_process(owner);
++ wake_q_add(wake_q, owner);
+
+ return true;
+ }
+@@ -352,7 +353,8 @@ static bool __ww_mutex_wound(struct MUTE
+ * The current task must not be on the wait list.
+ */
+ static void
+-__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
++__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx,
++ struct wake_q_head *wake_q)
+ {
+ struct MUTEX_WAITER *cur;
+
+@@ -364,8 +366,8 @@ __ww_mutex_check_waiters(struct MUTEX *l
+ if (!cur->ww_ctx)
+ continue;
+
+- if (__ww_mutex_die(lock, cur, ww_ctx) ||
+- __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
++ if (__ww_mutex_die(lock, cur, ww_ctx, wake_q) ||
++ __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx, wake_q))
+ break;
+ }
+ }
+@@ -377,6 +379,8 @@ __ww_mutex_check_waiters(struct MUTEX *l
+ static __always_inline void
+ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ {
++ DEFINE_WAKE_Q(wake_q);
++
+ ww_mutex_lock_acquired(lock, ctx);
+
+ /*
+@@ -405,8 +409,11 @@ ww_mutex_set_context_fastpath(struct ww_
+ * die or wound us.
+ */
+ lock_wait_lock(&lock->base);
+- __ww_mutex_check_waiters(&lock->base, ctx);
++ __ww_mutex_check_waiters(&lock->base, ctx, &wake_q);
++ preempt_disable();
+ unlock_wait_lock(&lock->base);
++ wake_up_q(&wake_q);
++ preempt_enable();
+ }
+
+ static __always_inline int
+@@ -488,7 +495,8 @@ __ww_mutex_check_kill(struct MUTEX *lock
+ static inline int
+ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
+ struct MUTEX *lock,
+- struct ww_acquire_ctx *ww_ctx)
++ struct ww_acquire_ctx *ww_ctx,
++ struct wake_q_head *wake_q)
+ {
+ struct MUTEX_WAITER *cur, *pos = NULL;
+ bool is_wait_die;
+@@ -532,7 +540,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITE
+ pos = cur;
+
+ /* Wait-Die: ensure younger waiters die. */
+- __ww_mutex_die(lock, cur, ww_ctx);
++ __ww_mutex_die(lock, cur, ww_ctx, wake_q);
+ }
+
+ __ww_waiter_add(lock, waiter, pos);
+@@ -550,7 +558,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITE
+ * such that either we or the fastpath will wound @ww->ctx.
+ */
+ smp_mb();
+- __ww_mutex_wound(lock, ww_ctx, ww->ctx);
++ __ww_mutex_wound(lock, ww_ctx, ww->ctx, wake_q);
+ }
+
+ return 0;
--- /dev/null
+From stable+bounces-266508-greg=kroah.com@vger.kernel.org Tue Jun 16 20:06:09 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 15:06:01 -0400
+Subject: locking/rtmutex: Skip remove_waiter() when waiter is not enqueued
+To: stable@vger.kernel.org
+Cc: Davidlohr Bueso <dave@stgolabs.net>, syzbot+78147abe6c524f183ee9@syzkaller.appspotmail.com, Thomas Gleixner <tglx@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616190601.3487860-2-sashal@kernel.org>
+
+From: Davidlohr Bueso <dave@stgolabs.net>
+
+[ Upstream commit 40a25d59e85b3c8709ac2424d44f65610467871e ]
+
+syzbot triggered the following splat in remove_waiter() via
+FUTEX_CMP_REQUEUE_PI:
+
+ KASAN: null-ptr-deref in range [0x0000000000000a88-0x0000000000000a8f]
+ class_raw_spinlock_constructor
+ remove_waiter+0x159/0x1200 kernel/locking/rtmutex.c:1561
+ rt_mutex_start_proxy_lock+0x103/0x120
+ futex_requeue+0x10e4/0x20d0
+ __x64_sys_futex+0x34f/0x4d0
+
+task_blocks_on_rt_mutex() does not arm the waiter upon deadlock detection,
+leaving waiter->task nil, where 3bfdc63936dd ("rtmutex: Use waiter::task instead
+of current in remove_waiter()") made this fatal.
+
+Furthermore, rt_mutex_start_proxy_lock() should not be calling into remove_waiter()
+upon a successfully grabbing the rtmutex. 1a1fb985f2e2 ("futex: Handle early deadlock
+return correctly"), moved the remove_waiter() out of __rt_mutex_start_proxy_lock()
+(where 'ret' was only ever 0 or < 0) into the wrapper. Tighten this check to
+account for try_to_take_rt_mutex().
+
+Fixes: 3bfdc63936dd ("rtmutex: Use waiter::task instead of current in remove_waiter()")
+Reported-by: syzbot+78147abe6c524f183ee9@syzkaller.appspotmail.com
+Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Cc: stable@vger.kernel.org
+Closes: https://lore.kernel.org/all/69f114ac.050a0220.ac8b.0003.GAE@google.com/
+Link: https://patch.msgid.link/20260507112913.1019537-1-dave@stgolabs.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/locking/rtmutex.c | 3 +++
+ kernel/locking/rtmutex_api.c | 2 +-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1550,6 +1550,9 @@ static void __sched remove_waiter(struct
+
+ lockdep_assert_held(&lock->wait_lock);
+
++ if (!waiter_task) /* never enqueued */
++ return;
++
+ scoped_guard(raw_spinlock, &waiter_task->pi_lock) {
+ rt_mutex_dequeue(lock, waiter);
+ waiter_task->pi_blocked_on = NULL;
+--- a/kernel/locking/rtmutex_api.c
++++ b/kernel/locking/rtmutex_api.c
+@@ -347,7 +347,7 @@ int __sched rt_mutex_start_proxy_lock(st
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);
+- if (unlikely(ret))
++ if (unlikely(ret < 0))
+ remove_waiter(lock, waiter);
+ preempt_disable();
+ raw_spin_unlock_irq(&lock->wait_lock);
--- /dev/null
+From 3wnExagcKBsMtykn007pxxpun.lxvp0nptqurw36ox3wmj2rxw.x0p@flex--kpberry.bounces.google.com Tue Jun 16 16:54:43 2026
+From: Kevin Berry <kpberry@google.com>
+Date: Tue, 16 Jun 2026 15:54:25 +0000
+Subject: net: bonding: add broadcast_neighbor option for 802.3ad
+To: stable@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu, Tonghao Zhang <tonghao@bamaicloud.com>, Jay Vosburgh <jv@jvosburgh.net>, "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Simon Horman <horms@kernel.org>, Jonathan Corbet <corbet@lwn.net>, Andrew Lunn <andrew+netdev@lunn.ch>, Steven Rostedt <rostedt@goodmis.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Nikolay Aleksandrov <razor@blackwall.org>, Zengbing Tu <tuzengbing@didiglobal.com>
+Message-ID: <20260616155432.2093908-3-kpberry@google.com>
+
+From: Tonghao Zhang <tonghao@bamaicloud.com>
+
+[ Upstream commit ce7a381697cb3958ffe0b45e5028ac69444e9288 ]
+
+Stacking technology is a type of technology used to expand ports on
+Ethernet switches. It is widely used as a common access method in
+large-scale Internet data center architectures. Years of practice
+have proved that stacking technology has advantages and disadvantages
+in high-reliability network architecture scenarios. For instance,
+in stacking networking arch, conventional switch system upgrades
+require multiple stacked devices to restart at the same time.
+Therefore, it is inevitable that the business will be interrupted
+for a while. It is for this reason that "no-stacking" in data centers
+has become a trend. Additionally, when the stacking link connecting
+the switches fails or is abnormal, the stack will split. Although it is
+not common, it still happens in actual operation. The problem is that
+after the split, it is equivalent to two switches with the same
+configuration appearing in the network, causing network configuration
+conflicts and ultimately interrupting the services carried by the
+stacking system.
+
+To improve network stability, "non-stacking" solutions have been
+increasingly adopted, particularly by public cloud providers and
+tech companies like Alibaba, Tencent, and Didi. "non-stacking" is
+a method of mimicing switch stacking that convinces a LACP peer,
+bonding in this case, connected to a set of "non-stacked" switches
+that all of its ports are connected to a single switch
+(i.e., LACP aggregator), as if those switches were stacked. This
+enables the LACP peer's ports to aggregate together, and requires
+(a) special switch configuration, described in the linked article,
+and (b) modifications to the bonding 802.3ad (LACP) mode to send
+all ARP/ND packets across all ports of the active aggregator.
+
+Note that, with multiple aggregators, the current broadcast mode
+logic will send only packets to the selected aggregator(s).
+
+ +-----------+ +-----------+
+ | switch1 | | switch2 |
+ +-----------+ +-----------+
+ ^ ^
+ | |
+ +-----------------+
+ | bond4 lacp |
+ +-----------------+
+ | |
+ | NIC1 | NIC2
+ +-----------------+
+ | server |
+ +-----------------+
+
+- https://www.ruijie.com/fr-fr/support/tech-gallery/de-stack-data-center-network-architecture/
+
+Cc: Jay Vosburgh <jv@jvosburgh.net>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Simon Horman <horms@kernel.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Andrew Lunn <andrew+netdev@lunn.ch>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Nikolay Aleksandrov <razor@blackwall.org>
+Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
+Signed-off-by: Zengbing Tu <tuzengbing@didiglobal.com>
+Link: https://patch.msgid.link/84d0a044514157bb856a10b6d03a1028c4883561.1751031306.git.tonghao@bamaicloud.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/networking/bonding.rst | 6 +++
+ drivers/net/bonding/bond_main.c | 66 +++++++++++++++++++++++++++++++----
+ drivers/net/bonding/bond_options.c | 42 ++++++++++++++++++++++
+ include/net/bond_options.h | 1
+ include/net/bonding.h | 3 +
+ 5 files changed, 112 insertions(+), 6 deletions(-)
+
+--- a/Documentation/networking/bonding.rst
++++ b/Documentation/networking/bonding.rst
+@@ -562,6 +562,12 @@ lacp_rate
+
+ The default is slow.
+
++broadcast_neighbor
++
++ Option specifying whether to broadcast ARP/ND packets to all
++ active slaves. This option has no effect in modes other than
++ 802.3ad mode. The default is off (0).
++
+ max_bonds
+
+ Specifies the number of bonding devices to create for this
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -211,6 +211,8 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(
+
+ unsigned int bond_net_id __read_mostly;
+
++DEFINE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled);
++
+ static const struct flow_dissector_key flow_keys_bonding_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+@@ -4445,6 +4447,9 @@ static int bond_open(struct net_device *
+
+ bond_for_each_slave(bond, slave, iter)
+ dev_mc_add(slave->dev, lacpdu_mcast_addr);
++
++ if (bond->params.broadcast_neighbor)
++ static_branch_inc(&bond_bcast_neigh_enabled);
+ }
+
+ if (bond_mode_can_use_xmit_hash(bond))
+@@ -4468,6 +4473,10 @@ static int bond_close(struct net_device
+ if (bond_is_lb(bond))
+ bond_alb_deinitialize(bond);
+
++ if (BOND_MODE(bond) == BOND_MODE_8023AD &&
++ bond->params.broadcast_neighbor)
++ static_branch_dec(&bond_bcast_neigh_enabled);
++
+ if (bond_uses_primary(bond)) {
+ rcu_read_lock();
+ slave = rcu_dereference(bond->curr_active_slave);
+@@ -5304,6 +5313,37 @@ static struct slave *bond_xdp_xmit_3ad_x
+ return slaves->arr[hash % count];
+ }
+
++static bool bond_should_broadcast_neighbor(struct sk_buff *skb,
++ struct net_device *dev)
++{
++ struct bonding *bond = netdev_priv(dev);
++ struct {
++ struct ipv6hdr ip6;
++ struct icmp6hdr icmp6;
++ } *combined, _combined;
++
++ if (!static_branch_unlikely(&bond_bcast_neigh_enabled))
++ return false;
++
++ if (!bond->params.broadcast_neighbor)
++ return false;
++
++ if (skb->protocol == htons(ETH_P_ARP))
++ return true;
++
++ if (skb->protocol == htons(ETH_P_IPV6)) {
++ combined = skb_header_pointer(skb, skb_mac_header_len(skb),
++ sizeof(_combined),
++ &_combined);
++ if (combined && combined->ip6.nexthdr == NEXTHDR_ICMP &&
++ (combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
++ combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT))
++ return true;
++ }
++
++ return false;
++}
++
+ /* Use this Xmit function for 3AD as well as XOR modes. The current
+ * usable slave array is formed in the control path. The xmit function
+ * just calculates hash and sends the packet out.
+@@ -5323,17 +5363,27 @@ static netdev_tx_t bond_3ad_xor_xmit(str
+ return bond_tx_drop(dev, skb);
+ }
+
+-/* in broadcast mode, we send everything to all usable interfaces. */
++/* in broadcast mode, we send everything to all or usable slave interfaces.
++ * under rcu_read_lock when this function is called.
++ */
+ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
+- struct net_device *bond_dev)
++ struct net_device *bond_dev,
++ bool all_slaves)
+ {
+ struct bonding *bond = netdev_priv(bond_dev);
+- struct slave *slave = NULL;
+- struct list_head *iter;
++ struct bond_up_slave *slaves;
+ bool xmit_suc = false;
+ bool skb_used = false;
++ int slaves_count, i;
+
+- bond_for_each_slave_rcu(bond, slave, iter) {
++ if (all_slaves)
++ slaves = rcu_dereference(bond->all_slaves);
++ else
++ slaves = rcu_dereference(bond->usable_slaves);
++
++ slaves_count = slaves ? READ_ONCE(slaves->count) : 0;
++ for (i = 0; i < slaves_count; i++) {
++ struct slave *slave = slaves->arr[i];
+ struct sk_buff *skb2;
+
+ if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
+@@ -5571,10 +5621,13 @@ static netdev_tx_t __bond_start_xmit(str
+ case BOND_MODE_ACTIVEBACKUP:
+ return bond_xmit_activebackup(skb, dev);
+ case BOND_MODE_8023AD:
++ if (bond_should_broadcast_neighbor(skb, dev))
++ return bond_xmit_broadcast(skb, dev, false);
++ fallthrough;
+ case BOND_MODE_XOR:
+ return bond_3ad_xor_xmit(skb, dev);
+ case BOND_MODE_BROADCAST:
+- return bond_xmit_broadcast(skb, dev);
++ return bond_xmit_broadcast(skb, dev, true);
+ case BOND_MODE_ALB:
+ return bond_alb_xmit(skb, dev);
+ case BOND_MODE_TLB:
+@@ -6450,6 +6503,7 @@ static int __init bond_check_params(stru
+ eth_zero_addr(params->ad_actor_system);
+ params->ad_user_port_key = ad_user_port_key;
+ params->coupled_control = 1;
++ params->broadcast_neighbor = 0;
+ if (packets_per_slave > 0) {
+ params->reciprocal_packets_per_slave =
+ reciprocal_value(packets_per_slave);
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -87,6 +87,8 @@ static int bond_option_missed_max_set(st
+ const struct bond_opt_value *newval);
+ static int bond_option_coupled_control_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
++static int bond_option_broadcast_neigh_set(struct bonding *bond,
++ const struct bond_opt_value *newval);
+
+ static const struct bond_opt_value bond_mode_tbl[] = {
+ { "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT},
+@@ -240,6 +242,12 @@ static const struct bond_opt_value bond_
+ { NULL, -1, 0},
+ };
+
++static const struct bond_opt_value bond_broadcast_neigh_tbl[] = {
++ { "off", 0, BOND_VALFLAG_DEFAULT},
++ { "on", 1, 0},
++ { NULL, -1, 0}
++};
++
+ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
+ [BOND_OPT_MODE] = {
+ .id = BOND_OPT_MODE,
+@@ -513,6 +521,14 @@ static const struct bond_option bond_opt
+ .flags = BOND_OPTFLAG_IFDOWN,
+ .values = bond_coupled_control_tbl,
+ .set = bond_option_coupled_control_set,
++ },
++ [BOND_OPT_BROADCAST_NEIGH] = {
++ .id = BOND_OPT_BROADCAST_NEIGH,
++ .name = "broadcast_neighbor",
++ .desc = "Broadcast neighbor packets to all active slaves",
++ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
++ .values = bond_broadcast_neigh_tbl,
++ .set = bond_option_broadcast_neigh_set,
+ }
+ };
+
+@@ -894,6 +910,13 @@ static int bond_option_mode_set(struct b
+ bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
+ bond->params.mode = newval->value;
+
++ /* When changing mode, the bond device is down, we may reduce
++ * the bond_bcast_neigh_enabled in bond_close() if broadcast_neighbor
++ * enabled in 8023ad mode. Therefore, only clear broadcast_neighbor
++ * to 0.
++ */
++ bond->params.broadcast_neighbor = 0;
++
+ if (bond->dev->reg_state == NETREG_REGISTERED) {
+ bool update = false;
+
+@@ -1843,3 +1866,22 @@ static int bond_option_coupled_control_s
+ bond->params.coupled_control = newval->value;
+ return 0;
+ }
++
++static int bond_option_broadcast_neigh_set(struct bonding *bond,
++ const struct bond_opt_value *newval)
++{
++ if (bond->params.broadcast_neighbor == newval->value)
++ return 0;
++
++ bond->params.broadcast_neighbor = newval->value;
++ if (bond->dev->flags & IFF_UP) {
++ if (bond->params.broadcast_neighbor)
++ static_branch_inc(&bond_bcast_neigh_enabled);
++ else
++ static_branch_dec(&bond_bcast_neigh_enabled);
++ }
++
++ netdev_dbg(bond->dev, "Setting broadcast_neighbor to %s (%llu)\n",
++ newval->string, newval->value);
++ return 0;
++}
+--- a/include/net/bond_options.h
++++ b/include/net/bond_options.h
+@@ -77,6 +77,7 @@ enum {
+ BOND_OPT_NS_TARGETS,
+ BOND_OPT_PRIO,
+ BOND_OPT_COUPLED_CONTROL,
++ BOND_OPT_BROADCAST_NEIGH,
+ BOND_OPT_LAST
+ };
+
+--- a/include/net/bonding.h
++++ b/include/net/bonding.h
+@@ -115,6 +115,8 @@ static inline int is_netpoll_tx_blocked(
+ #define is_netpoll_tx_blocked(dev) (0)
+ #endif
+
++DECLARE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled);
++
+ struct bond_params {
+ int mode;
+ int xmit_policy;
+@@ -149,6 +151,7 @@ struct bond_params {
+ struct in6_addr ns_targets[BOND_MAX_NS_TARGETS];
+ #endif
+ int coupled_control;
++ int broadcast_neighbor;
+
+ /* 2 bytes of padding : see ether_addr_equal_64bits() */
+ u8 ad_actor_system[ETH_ALEN + 2];
--- /dev/null
+From 3yHExagcKBskz4qt66Dv33v0t.r31v6tvzw0x29Cu392sp8x32.36v@flex--kpberry.bounces.google.com Tue Jun 16 16:54:49 2026
+From: Kevin Berry <kpberry@google.com>
+Date: Tue, 16 Jun 2026 15:54:29 +0000
+Subject: net: bonding: fix use-after-free in bond_xmit_broadcast()
+To: stable@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu
+Message-ID: <20260616155432.2093908-7-kpberry@google.com>
+
+From: Xiang Mei <xmei5@asu.edu>
+
+[ Upstream commit 2884bf72fb8f03409e423397319205de48adca16 ]
+
+bond_xmit_broadcast() reuses the original skb for the last slave
+(determined by bond_is_last_slave()) and clones it for others.
+Concurrent slave enslave/release can mutate the slave list during
+RCU-protected iteration, changing which slave is "last" mid-loop.
+This causes the original skb to be double-consumed (double-freed).
+
+Replace the racy bond_is_last_slave() check with a simple index
+comparison (i + 1 == slaves_count) against the pre-snapshot slave
+count taken via READ_ONCE() before the loop. This preserves the
+zero-copy optimization for the last slave while making the "last"
+determination stable against concurrent list mutations.
+
+The UAF can trigger the following crash:
+
+==================================================================
+BUG: KASAN: slab-use-after-free in skb_clone
+Read of size 8 at addr ffff888100ef8d40 by task exploit/147
+
+CPU: 1 UID: 0 PID: 147 Comm: exploit Not tainted 7.0.0-rc3+ #4 PREEMPTLAZY
+Call Trace:
+ <TASK>
+ dump_stack_lvl (lib/dump_stack.c:123)
+ print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
+ kasan_report (mm/kasan/report.c:597)
+ skb_clone (include/linux/skbuff.h:1724 include/linux/skbuff.h:1792 include/linux/skbuff.h:3396 net/core/skbuff.c:2108)
+ bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5334)
+ bond_start_xmit (drivers/net/bonding/bond_main.c:5567 drivers/net/bonding/bond_main.c:5593)
+ dev_hard_start_xmit (include/linux/netdevice.h:5325 include/linux/netdevice.h:5334 net/core/dev.c:3871 net/core/dev.c:3887)
+ __dev_queue_xmit (include/linux/netdevice.h:3601 net/core/dev.c:4838)
+ ip6_finish_output2 (include/net/neighbour.h:540 include/net/neighbour.h:554 net/ipv6/ip6_output.c:136)
+ ip6_finish_output (net/ipv6/ip6_output.c:208 net/ipv6/ip6_output.c:219)
+ ip6_output (net/ipv6/ip6_output.c:250)
+ ip6_send_skb (net/ipv6/ip6_output.c:1985)
+ udp_v6_send_skb (net/ipv6/udp.c:1442)
+ udpv6_sendmsg (net/ipv6/udp.c:1733)
+ __sys_sendto (net/socket.c:730 net/socket.c:742 net/socket.c:2206)
+ __x64_sys_sendto (net/socket.c:2209)
+ do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+ </TASK>
+
+Allocated by task 147:
+
+Freed by task 147:
+
+The buggy address belongs to the object at ffff888100ef8c80
+ which belongs to the cache skbuff_head_cache of size 224
+The buggy address is located 192 bytes inside of
+ freed 224-byte region [ffff888100ef8c80, ffff888100ef8d60)
+
+Memory state around the buggy address:
+ ffff888100ef8c00: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff888100ef8c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff888100ef8d00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
+ ^
+ ffff888100ef8d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb
+ ffff888100ef8e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+==================================================================
+
+Fixes: 4e5bd03ae346 ("net: bonding: fix bond_xmit_broadcast return value error bug")
+Reported-by: Weiming Shi <bestswngs@gmail.com>
+Signed-off-by: Xiang Mei <xmei5@asu.edu>
+Link: https://patch.msgid.link/20260326075553.3960562-1-xmei5@asu.edu
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -5391,7 +5391,7 @@ static netdev_tx_t bond_xmit_broadcast(s
+ if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
+ continue;
+
+- if (bond_is_last_slave(bond, slave)) {
++ if (i + 1 == slaves_count) {
+ skb2 = skb;
+ skb_used = true;
+ } else {
--- /dev/null
+From stable+bounces-266792-greg=kroah.com@vger.kernel.org Wed Jun 17 15:28:38 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jun 2026 10:28:30 -0400
+Subject: net: phonet: free phonet_device after RCU grace period
+To: stable@vger.kernel.org
+Cc: "Santosh Kalluri" <santosh.kalluri129@gmail.com>, "Rémi Denis-Courmont" <remi@remlab.net>, "Simon Horman" <horms@kernel.org>, "Jakub Kicinski" <kuba@kernel.org>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20260617142830.3939916-3-sashal@kernel.org>
+
+From: Santosh Kalluri <santosh.kalluri129@gmail.com>
+
+[ Upstream commit 71de0177b28da751f407581a4515cf4d762f6296 ]
+
+phonet_device_destroy() removes a phonet_device from the per-net device
+list with list_del_rcu(), but frees it immediately. RCU readers walking
+the same list can still hold a pointer to the object after it has been
+removed, leading to a slab-use-after-free.
+
+Use kfree_rcu(), matching the lifetime rule already used by
+phonet_address_del() for the same object type.
+
+Fixes: eeb74a9d45f7 ("Phonet: convert devices list to RCU")
+Cc: stable@vger.kernel.org
+Signed-off-by: Santosh Kalluri <santosh.kalluri129@gmail.com>
+Acked-by: Rémi Denis-Courmont <remi@remlab.net>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/phonet/pn_dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/phonet/pn_dev.c
++++ b/net/phonet/pn_dev.c
+@@ -105,7 +105,7 @@ static void phonet_device_destroy(struct
+ for_each_set_bit(addr, pnd->addrs, 64)
+ phonet_address_notify(net, RTM_DELADDR, ifindex, addr);
+
+- kfree(pnd);
++ kfree_rcu(pnd, rcu);
+ }
+ }
+
--- /dev/null
+From stable+bounces-266790-greg=kroah.com@vger.kernel.org Wed Jun 17 15:29:18 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jun 2026 10:28:28 -0400
+Subject: phonet: Pass ifindex to fill_addr().
+To: stable@vger.kernel.org
+Cc: Kuniyuki Iwashima <kuniyu@amazon.com>, Eric Dumazet <edumazet@google.com>, Paolo Abeni <pabeni@redhat.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260617142830.3939916-1-sashal@kernel.org>
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 08a9572be36819b5d9011604edfa5db6c5062a7a ]
+
+We will convert addr_doit() and getaddr_dumpit() to RCU, both
+of which call fill_addr().
+
+The former will call phonet_address_notify() outside of RCU
+due to GFP_KERNEL, so dev will not be available in fill_addr().
+
+Let's pass ifindex directly to fill_addr().
+
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 71de0177b28d ("net: phonet: free phonet_device after RCU grace period")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/phonet/pn_netlink.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/phonet/pn_netlink.c
++++ b/net/phonet/pn_netlink.c
+@@ -19,7 +19,7 @@
+
+ /* Device address handling */
+
+-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
++static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr,
+ u32 portid, u32 seq, int event);
+
+ void phonet_address_notify(int event, struct net_device *dev, u8 addr)
+@@ -31,7 +31,8 @@ void phonet_address_notify(int event, st
+ nla_total_size(1), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+- err = fill_addr(skb, dev, addr, 0, 0, event);
++
++ err = fill_addr(skb, dev->ifindex, addr, 0, 0, event);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+@@ -92,8 +93,8 @@ static int addr_doit(struct sk_buff *skb
+ return err;
+ }
+
+-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
+- u32 portid, u32 seq, int event)
++static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr,
++ u32 portid, u32 seq, int event)
+ {
+ struct ifaddrmsg *ifm;
+ struct nlmsghdr *nlh;
+@@ -107,7 +108,7 @@ static int fill_addr(struct sk_buff *skb
+ ifm->ifa_prefixlen = 0;
+ ifm->ifa_flags = IFA_F_PERMANENT;
+ ifm->ifa_scope = RT_SCOPE_LINK;
+- ifm->ifa_index = dev->ifindex;
++ ifm->ifa_index = ifindex;
+ if (nla_put_u8(skb, IFA_LOCAL, addr))
+ goto nla_put_failure;
+ nlmsg_end(skb, nlh);
+@@ -140,7 +141,7 @@ static int getaddr_dumpit(struct sk_buff
+ if (addr_idx++ < addr_start_idx)
+ continue;
+
+- if (fill_addr(skb, pnd->netdev, addr << 2,
++ if (fill_addr(skb, pnd->netdev->ifindex, addr << 2,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0)
+ goto out;
--- /dev/null
+From stable+bounces-266791-greg=kroah.com@vger.kernel.org Wed Jun 17 15:28:37 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jun 2026 10:28:29 -0400
+Subject: phonet: Pass net and ifindex to phonet_address_notify().
+To: stable@vger.kernel.org
+Cc: Kuniyuki Iwashima <kuniyu@amazon.com>, Eric Dumazet <edumazet@google.com>, Paolo Abeni <pabeni@redhat.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260617142830.3939916-2-sashal@kernel.org>
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 68ed5c38b512b734caf3da1f87db4a99fcfe3002 ]
+
+Currently, phonet_address_notify() fetches netns and ifindex from dev.
+
+Once addr_doit() is converted to RCU, phonet_address_notify() will be
+called outside of RCU due to GFP_KERNEL, and dev will be unavailable
+there.
+
+Let's pass net and ifindex to phonet_address_notify().
+
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 71de0177b28d ("net: phonet: free phonet_device after RCU grace period")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/phonet/pn_dev.h | 2 +-
+ net/phonet/pn_dev.c | 10 +++++++---
+ net/phonet/pn_netlink.c | 12 ++++++------
+ 3 files changed, 14 insertions(+), 10 deletions(-)
+
+--- a/include/net/phonet/pn_dev.h
++++ b/include/net/phonet/pn_dev.h
+@@ -38,7 +38,7 @@ int phonet_address_add(struct net_device
+ int phonet_address_del(struct net_device *dev, u8 addr);
+ u8 phonet_address_get(struct net_device *dev, u8 addr);
+ int phonet_address_lookup(struct net *net, u8 addr);
+-void phonet_address_notify(int event, struct net_device *dev, u8 addr);
++void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr);
+
+ int phonet_route_add(struct net_device *dev, u8 daddr);
+ int phonet_route_del(struct net_device *dev, u8 daddr);
+--- a/net/phonet/pn_dev.c
++++ b/net/phonet/pn_dev.c
+@@ -98,10 +98,13 @@ static void phonet_device_destroy(struct
+ mutex_unlock(&pndevs->lock);
+
+ if (pnd) {
++ struct net *net = dev_net(dev);
++ u32 ifindex = dev->ifindex;
+ u8 addr;
+
+ for_each_set_bit(addr, pnd->addrs, 64)
+- phonet_address_notify(RTM_DELADDR, dev, addr);
++ phonet_address_notify(net, RTM_DELADDR, ifindex, addr);
++
+ kfree(pnd);
+ }
+ }
+@@ -244,8 +247,9 @@ static int phonet_device_autoconf(struct
+ ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device);
+ if (ret)
+ return ret;
+- phonet_address_notify(RTM_NEWADDR, dev,
+- req.ifr_phonet_autoconf.device);
++
++ phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex,
++ req.ifr_phonet_autoconf.device);
+ return 0;
+ }
+
+--- a/net/phonet/pn_netlink.c
++++ b/net/phonet/pn_netlink.c
+@@ -22,7 +22,7 @@
+ static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr,
+ u32 portid, u32 seq, int event);
+
+-void phonet_address_notify(int event, struct net_device *dev, u8 addr)
++void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr)
+ {
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+@@ -32,17 +32,17 @@ void phonet_address_notify(int event, st
+ if (skb == NULL)
+ goto errout;
+
+- err = fill_addr(skb, dev->ifindex, addr, 0, 0, event);
++ err = fill_addr(skb, ifindex, addr, 0, 0, event);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+- rtnl_notify(skb, dev_net(dev), 0,
+- RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
++
++ rtnl_notify(skb, net, 0, RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
+ return;
+ errout:
+- rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
++ rtnl_set_sk_err(net, RTNLGRP_PHONET_IFADDR, err);
+ }
+
+ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
+@@ -89,7 +89,7 @@ static int addr_doit(struct sk_buff *skb
+ else
+ err = phonet_address_del(dev, pnaddr);
+ if (!err)
+- phonet_address_notify(nlh->nlmsg_type, dev, pnaddr);
++ phonet_address_notify(net, nlh->nlmsg_type, ifm->ifa_index, pnaddr);
+ return err;
+ }
+
--- /dev/null
+From 3wHExagcKBsErwilyy5nvvnsl.jvtnylnrospu14mv1ukh0pvu.vyn@flex--kpberry.bounces.google.com Tue Jun 16 16:54:41 2026
+From: Kevin Berry <kpberry@google.com>
+Date: Tue, 16 Jun 2026 15:54:24 +0000
+Subject: Revert "net: bonding: fix use-after-free in bond_xmit_broadcast()"
+To: stable@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, bestswngs@gmail.com, chenglongtang@google.com, joneslee@google.com, kpberry@google.com, pabeni@redhat.com, rnj@google.com, sashal@kernel.org, xmei5@asu.edu
+Message-ID: <20260616155432.2093908-2-kpberry@google.com>
+
+From: Kevin Berry <kpberry@google.com>
+
+This reverts commit 3453882f36c40d2339267093676585a89808a73d.
+
+There are two versions of this use-after-free fix commit: this one,
+which was written to avoid taking a dependency on ce7a381697cb3 ("net:
+bonding: add broadcast_neighbor option for 802.3ad"), and the original,
+simpler version 2884bf72fb8f ("net: bonding: fix use-after-free in
+bond_xmit_broadcast()"), which implicitly depends on the slave counting
+changes in ce7a381697cb3. In both the 6.1 and 6.6 stable branches,
+commit ce7a381697cb3 was included as a stable dep of c4f050ce06c56
+("bonding: 3ad: implement proper RCU rules for port->aggregator"), and
+the original version of this fix was subsequently applied.
+
+For consistency, and to be able to apply both bug fixes, we should
+revert this commit, apply the series for ce7a381697cb3 ("net: bonding:
+add broadcast_neighbor option for 802.3ad"), and then apply
+the original version of this fix, 2884bf72fb8f ("net: bonding: fix
+use-after-free in bond_xmit_broadcast()").
+
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -5328,22 +5328,18 @@ static netdev_tx_t bond_xmit_broadcast(s
+ struct net_device *bond_dev)
+ {
+ struct bonding *bond = netdev_priv(bond_dev);
+- struct bond_up_slave *slaves;
++ struct slave *slave = NULL;
++ struct list_head *iter;
+ bool xmit_suc = false;
+ bool skb_used = false;
+- int slaves_count, i;
+
+- slaves = rcu_dereference(bond->all_slaves);
+-
+- slaves_count = slaves ? READ_ONCE(slaves->count) : 0;
+- for (i = 0; i < slaves_count; i++) {
+- struct slave *slave = slaves->arr[i];
++ bond_for_each_slave_rcu(bond, slave, iter) {
+ struct sk_buff *skb2;
+
+ if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
+ continue;
+
+- if (i + 1 == slaves_count) {
++ if (bond_is_last_slave(bond, slave)) {
+ skb2 = skb;
+ skb_used = true;
+ } else {
--- /dev/null
+From stable+bounces-266865-greg=kroah.com@vger.kernel.org Wed Jun 17 18:21:35 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jun 2026 13:21:26 -0400
+Subject: rxrpc: Fix the ACK parser to extract the SACK table for parsing
+To: stable@vger.kernel.org
+Cc: David Howells <dhowells@redhat.com>, Michael Bommarito <michael.bommarito@gmail.com>, Marc Dionne <marc.dionne@auristor.com>, Jeffrey Altman <jaltman@auristor.com>, Eric Dumazet <edumazet@google.com>, "David S. Miller" <davem@davemloft.net>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Simon Horman <horms@kernel.org>, linux-afs@lists.infradead.org, netdev@vger.kernel.org, stable@kernel.org, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260617172126.254222-1-sashal@kernel.org>
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 333b6d5bb9f87827ac2639c737bf9613dbae7253 ]
+
+Fix modification of the received skbuff in rxrpc_input_soft_acks() and a
+potential incorrect access of the buffer in a fragmented UDP packet (the
+packet would probably have to be deliberately pre-generated as fragmented)
+when AF_RXRPC tries to extract the contents of the SACK table by copying
+out the contents of the SACK table into a buffer before attempting to parse
+
+AF_RXRPC assumes that it can just call skb_condense() and then validly
+access the SACK table from skb->data and that it will be a flat buffer -
+but skb_condense() can silently fail to do anything under some
+circumstances.
+
+Note that whilst rxrpc_input_soft_acks() should be able to parse extended
+ACKs, the rest of AF_RXRPC doesn't currently support that.
+
+Further, there's then no need to call skb_condense() in rxrpc_input_ack(),
+so don't.
+
+Fixes: d57a3a151660 ("rxrpc: Save last ACK's SACK table rather than marking txbufs")
+Reported-by: Michael Bommarito <michael.bommarito@gmail.com>
+Link: https://lore.kernel.org/r/20260513180907.2061972-1-michael.bommarito@gmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Jeffrey Altman <jaltman@auristor.com>
+cc: Eric Dumazet <edumazet@google.com>
+cc: "David S. Miller" <davem@davemloft.net>
+cc: Jakub Kicinski <kuba@kernel.org>
+cc: Paolo Abeni <pabeni@redhat.com>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+cc: netdev@vger.kernel.org
+cc: stable@kernel.org
+Link: https://patch.msgid.link/105362.1780573560@warthog.procyon.org.uk
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/input.c | 21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+--- a/net/rxrpc/input.c
++++ b/net/rxrpc/input.c
+@@ -775,9 +775,23 @@ static void rxrpc_input_soft_acks(struct
+ rxrpc_seq_t since)
+ {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+- unsigned int i, old_nacks = 0;
++ unsigned int i, old_nacks = 0, nsack;
+ rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks;
+- u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
++ u8 sack[256] __aligned(sizeof(unsigned long));
++ u8 *acks = sack;
++
++ /* AF_RXRPC assumes that it can access the SACK table directly from
++ * skb->data as a flat buffer, but the skb may be non-linear (e.g. a
++ * fragmented UDP packet) and skb_condense() can silently fail to
++ * linearise it. Copy the SACK table out into a local buffer before
++ * parsing it.
++ */
++ memset(sack, 0, sizeof(sack));
++ nsack = umin(sp->ack.nr_acks, 256);
++ if (skb_copy_bits(skb,
++ sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket),
++ sack, nsack) < 0)
++ return;
+
+ for (i = 0; i < sp->ack.nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_ACK) {
+@@ -934,9 +948,6 @@ static void rxrpc_input_ack(struct rxrpc
+ skb_copy_bits(skb, ioffset, &trailer, sizeof(trailer)) < 0)
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_trailer);
+
+- if (nr_acks > 0)
+- skb_condense(skb);
+-
+ if (call->cong_last_nack) {
+ since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
selftests-bpf-add-test-to-ensure-kprobe_multi-is-not.patch
acpi-scan-use-async-schedule-function-in-acpi_scan_c.patch
regulator-core-fix-locking-in-regulator_resolve_supply-error-path.patch
+dlm-prevent-npd-when-writing-a-positive-value-to-event_done.patch
+xfs-remove-the-expr-argument-to-xfs_test_error.patch
+xfs-fix-error-returns-in-cow-fork-repair.patch
+revert-net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch
+net-bonding-add-broadcast_neighbor-option-for-802.3ad.patch
+bonding-add-support-for-per-port-lacp-actor-priority.patch
+bonding-print-churn-state-via-netlink.patch
+bonding-3ad-implement-proper-rcu-rules-for-port-aggregator.patch
+net-bonding-fix-use-after-free-in-bond_xmit_broadcast.patch
+bonding-fix-null-pointer-dereference-in-actor_port_prio-setting.patch
+staging-rtl8723bs-fix-buffer-over-read-in-rtw_update_protection.patch
+fhandle-fix-uaf-due-to-unlocked-mnt_ns-read-in-may_decode_fh.patch
+drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch
+hv-utils-handle-and-propagate-errors-in-kvp_register.patch
+futex-requeue-prevent-null-pointer-dereference-in-remove_waiter-on-self-deadlock.patch
+locking-mutex-remove-wakeups-from-under-mutex-wait_lock.patch
+locking-rtmutex-skip-remove_waiter-when-waiter-is-not-enqueued.patch
+phonet-pass-ifindex-to-fill_addr.patch
+phonet-pass-net-and-ifindex-to-phonet_address_notify.patch
+net-phonet-free-phonet_device-after-rcu-grace-period.patch
+rxrpc-fix-the-ack-parser-to-extract-the-sack-table-for-parsing.patch
+fuse-re-lock-request-before-replacing-page-cache-folio.patch
--- /dev/null
+From sashal@kernel.org Tue Jun 16 16:55:23 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 11:55:20 -0400
+Subject: staging: rtl8723bs: fix buffer over-read in rtw_update_protection
+To: stable@vger.kernel.org
+Cc: Salman Alghamdi <me@cipherat.com>, Luka Gejak <luka.gejak@linux.dev>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616155520.3322698-1-sashal@kernel.org>
+
+From: Salman Alghamdi <me@cipherat.com>
+
+[ Upstream commit 514ab98364595007d4557ecc85d7e5f012c504d3 ]
+
+rtw_update_protection() is called with a pointer offset into the
+ies buffer but the full ie_length is passed, causing a potential
+buffer over-read.
+
+Fixes: e945c43df60b ("Staging: rtl8723bs: Delete dead code from update_current_network()")
+Fixes: d3fcee1b78a5 ("staging: rtl8723bs: fix camel case in struct wlan_bssid_ex")
+Reported-by: Luka Gejak <luka.gejak@linux.dev>
+Closes: https://lore.kernel.org/linux-staging/DI2H39EAAFBZ.3KI5NWN02AQ2S@linux.dev
+Cc: stable@vger.kernel.org
+Signed-off-by: Salman Alghamdi <me@cipherat.com>
+Reviewed-by: Luka Gejak <luka.gejak@linux.dev>
+Link: https://patch.msgid.link/20260508222649.23989-1-me@cipherat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/staging/rtl8723bs/core/rtw_mlme.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/drivers/staging/rtl8723bs/core/rtw_mlme.c
++++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c
+@@ -449,8 +449,11 @@ static void update_current_network(struc
+
+ if ((check_fwstate(pmlmepriv, _FW_LINKED) == true) && (is_same_network(&(pmlmepriv->cur_network.network), pnetwork, 0))) {
+ update_network(&(pmlmepriv->cur_network.network), pnetwork, adapter, true);
++ if (pmlmepriv->cur_network.network.ie_length < sizeof(struct ndis_802_11_fix_ie))
++ return;
++
+ rtw_update_protection(adapter, (pmlmepriv->cur_network.network.ies) + sizeof(struct ndis_802_11_fix_ie),
+- pmlmepriv->cur_network.network.ie_length);
++ pmlmepriv->cur_network.network.ie_length - sizeof(struct ndis_802_11_fix_ie));
+ }
+ }
+
+@@ -1070,8 +1073,11 @@ static void rtw_joinbss_update_network(s
+ break;
+ }
+
++ if (cur_network->network.ie_length < sizeof(struct ndis_802_11_fix_ie))
++ return;
++
+ rtw_update_protection(padapter, (cur_network->network.ies) + sizeof(struct ndis_802_11_fix_ie),
+- (cur_network->network.ie_length));
++ (cur_network->network.ie_length - sizeof(struct ndis_802_11_fix_ie)));
+
+ rtw_update_ht_cap(padapter, cur_network->network.ies, cur_network->network.ie_length, (u8) cur_network->network.configuration.ds_config);
+ }
--- /dev/null
+From stable+bounces-263757-greg=kroah.com@vger.kernel.org Tue Jun 16 15:05:54 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 10:02:14 -0400
+Subject: xfs: fix error returns in CoW fork repair
+To: stable@vger.kernel.org
+Cc: Yingjie Gao <gaoyingjie@uniontech.com>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616140214.3285019-2-sashal@kernel.org>
+
+From: Yingjie Gao <gaoyingjie@uniontech.com>
+
+[ Upstream commit fcf4faba9f986b3bb528da11913c9ec5d6e8f689 ]
+
+xrep_cow_find_bad() returns success after the cleanup labels even if
+AG setup, btree queries, or bitmap updates failed. This can make
+repair continue with an incomplete bad-file-offset bitmap instead of
+stopping at the original error.
+
+The force-rebuild path has a related cleanup problem. If
+xrep_cow_mark_file_range() fails, the function returns directly and
+skips the scrub AG context and perag cleanup.
+
+Let the force-rebuild path fall through to the existing cleanup code
+and return the saved error after cleanup.
+
+Fixes: dbbdbd008632 ("xfs: repair problems in CoW forks")
+Cc: <stable@vger.kernel.org> # v6.8
+Signed-off-by: Yingjie Gao <gaoyingjie@uniontech.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/scrub/cow_repair.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/scrub/cow_repair.c
++++ b/fs/xfs/scrub/cow_repair.c
+@@ -297,18 +297,15 @@ xrep_cow_find_bad(
+ * on the debugging knob, replace everything in the CoW fork.
+ */
+ if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+- XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
++ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
+ error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
+ xc->irec.br_blockcount);
+- if (error)
+- return error;
+- }
+
+ out_sa:
+ xchk_ag_free(sc, &sc->sa);
+ out_pag:
+ xfs_perag_put(pag);
+- return 0;
++ return error;
+ }
+
+ /*
--- /dev/null
+From stable+bounces-263756-greg=kroah.com@vger.kernel.org Tue Jun 16 15:05:50 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jun 2026 10:02:13 -0400
+Subject: xfs: remove the expr argument to XFS_TEST_ERROR
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260616140214.3285019-1-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 807df3227d7674d7957c576551d552acf15bb96f ]
+
+Don't pass expr to XFS_TEST_ERROR. Most calls pass a constant false,
+and the places that do pass an expression become cleaner by moving it
+out.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Stable-dep-of: fcf4faba9f98 ("xfs: fix error returns in CoW fork repair")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag_resv.c | 8 ++++----
+ fs/xfs/libxfs/xfs_alloc.c | 5 ++---
+ fs/xfs/libxfs/xfs_attr_leaf.c | 2 +-
+ fs/xfs/libxfs/xfs_bmap.c | 17 ++++++++---------
+ fs/xfs/libxfs/xfs_btree.c | 2 +-
+ fs/xfs/libxfs/xfs_da_btree.c | 2 +-
+ fs/xfs/libxfs/xfs_dir2.c | 2 +-
+ fs/xfs/libxfs/xfs_exchmaps.c | 4 ++--
+ fs/xfs/libxfs/xfs_ialloc.c | 2 +-
+ fs/xfs/libxfs/xfs_inode_buf.c | 4 ++--
+ fs/xfs/libxfs/xfs_inode_fork.c | 3 +--
+ fs/xfs/libxfs/xfs_refcount.c | 5 ++---
+ fs/xfs/libxfs/xfs_rmap.c | 2 +-
+ fs/xfs/scrub/cow_repair.c | 2 +-
+ fs/xfs/scrub/repair.c | 2 +-
+ fs/xfs/xfs_attr_item.c | 2 +-
+ fs/xfs/xfs_buf.c | 4 ++--
+ fs/xfs/xfs_error.c | 5 ++---
+ fs/xfs/xfs_error.h | 10 +++++-----
+ fs/xfs/xfs_inode.c | 28 +++++++++++++---------------
+ fs/xfs/xfs_iomap.c | 2 +-
+ fs/xfs/xfs_log.c | 8 ++++----
+ fs/xfs/xfs_trans_ail.c | 2 +-
+ 23 files changed, 58 insertions(+), 65 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag_resv.c
++++ b/fs/xfs/libxfs/xfs_ag_resv.c
+@@ -91,9 +91,9 @@ xfs_ag_resv_critical(
+ trace_xfs_ag_resv_critical(pag, type, avail);
+
+ /* Critically low if less than 10% or max btree height remains. */
+- return XFS_TEST_ERROR(avail < orig / 10 ||
+- avail < pag->pag_mount->m_agbtree_maxlevels,
+- pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
++ return avail < orig / 10 ||
++ avail < pag->pag_mount->m_agbtree_maxlevels ||
++ XFS_TEST_ERROR(pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
+ }
+
+ /*
+@@ -201,7 +201,7 @@ __xfs_ag_resv_init(
+ return -EINVAL;
+ }
+
+- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL))
++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_AG_RESV_FAIL))
+ error = -ENOSPC;
+ else
+ error = xfs_dec_fdblocks(mp, hidden_space, true);
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -3312,7 +3312,7 @@ xfs_agf_read_verify(
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_agf_verify(bp);
+- if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
++ if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_ALLOC_READ_AGF))
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
+ }
+@@ -3986,8 +3986,7 @@ __xfs_free_extent(
+ ASSERT(len != 0);
+ ASSERT(type != XFS_AG_RESV_AGFL);
+
+- if (XFS_TEST_ERROR(false, mp,
+- XFS_ERRTAG_FREE_EXTENT))
++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_FREE_EXTENT))
+ return -EIO;
+
+ error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -1225,7 +1225,7 @@ xfs_attr3_leaf_to_node(
+
+ trace_xfs_attr_leaf_to_node(args);
+
+- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) {
++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) {
+ error = -EIO;
+ goto out;
+ }
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3766,8 +3766,7 @@ xfs_bmap_btalloc(
+ /* Trim the allocation back to the maximum an AG can fit. */
+ args.maxlen = min(ap->length, mp->m_ag_max_usable);
+
+- if (unlikely(XFS_TEST_ERROR(false, mp,
+- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
++ if (unlikely(XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+ error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
+ else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
+ xfs_inode_is_filestream(ap->ip))
+@@ -3953,7 +3952,7 @@ xfs_bmapi_read(
+ }
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
+ return -EFSCORRUPTED;
+ }
+@@ -4442,7 +4441,7 @@ xfs_bmapi_write(
+ (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
+ return -EFSCORRUPTED;
+ }
+@@ -4785,7 +4784,7 @@ xfs_bmapi_remap(
+ (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
+ return -EFSCORRUPTED;
+ }
+@@ -5873,7 +5872,7 @@ xfs_bmap_collapse_extents(
+ int logflags = 0;
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
+ return -EFSCORRUPTED;
+ }
+@@ -5988,7 +5987,7 @@ xfs_bmap_insert_extents(
+ int logflags = 0;
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
+ return -EFSCORRUPTED;
+ }
+@@ -6092,7 +6091,7 @@ xfs_bmap_split_extent(
+ int i = 0;
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
+ return -EFSCORRUPTED;
+ }
+@@ -6257,7 +6256,7 @@ xfs_bmap_finish_one(
+
+ trace_xfs_bmap_deferred(bi);
+
+- if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
++ if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
+ return -EIO;
+
+ switch (bi->bi_type) {
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -300,7 +300,7 @@ xfs_btree_check_block(
+
+ fa = __xfs_btree_check_block(cur, block, level, bp);
+ if (XFS_IS_CORRUPT(mp, fa != NULL) ||
+- XFS_TEST_ERROR(false, mp, xfs_btree_block_errtag(cur))) {
++ XFS_TEST_ERROR(mp, xfs_btree_block_errtag(cur))) {
+ if (bp)
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_btree_mark_sick(cur);
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -565,7 +565,7 @@ xfs_da3_split(
+
+ trace_xfs_da_split(state->args);
+
+- if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
++ if (XFS_TEST_ERROR(state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
+ return -EIO;
+
+ /*
+--- a/fs/xfs/libxfs/xfs_dir2.c
++++ b/fs/xfs/libxfs/xfs_dir2.c
+@@ -223,7 +223,7 @@ xfs_dir_ino_validate(
+ bool ino_ok = xfs_verify_dir_ino(mp, ino);
+
+ if (XFS_IS_CORRUPT(mp, !ino_ok) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DIR_INO_VALIDATE)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_DIR_INO_VALIDATE)) {
+ xfs_warn(mp, "Invalid inode number 0x%Lx",
+ (unsigned long long) ino);
+ return -EFSCORRUPTED;
+--- a/fs/xfs/libxfs/xfs_exchmaps.c
++++ b/fs/xfs/libxfs/xfs_exchmaps.c
+@@ -616,7 +616,7 @@ xfs_exchmaps_finish_one(
+ return error;
+ }
+
+- if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
++ if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
+ return -EIO;
+
+ /* If we still have work to do, ask for a new transaction. */
+@@ -880,7 +880,7 @@ xmi_ensure_delta_nextents(
+ &new_nextents))
+ return -EFBIG;
+
+- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
+ new_nextents > 10)
+ return -EFBIG;
+
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -2690,7 +2690,7 @@ xfs_agi_read_verify(
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_agi_verify(bp);
+- if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
++ if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_IALLOC_READ_AGI))
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
+ }
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -60,8 +60,8 @@ xfs_inode_buf_verify(
+ di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
+ xfs_dinode_good_version(mp, dip->di_version) &&
+ xfs_verify_agino_or_null(bp->b_pag, unlinked_ino);
+- if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+- XFS_ERRTAG_ITOBP_INOTOBP))) {
++ if (unlikely(!di_ok ||
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_ITOBP_INOTOBP))) {
+ if (readahead) {
+ bp->b_flags &= ~XBF_DONE;
+ xfs_buf_ioerror(bp, -EIO);
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -795,8 +795,7 @@ xfs_iext_count_extend(
+ if (nr_exts < ifp->if_nextents)
+ return -EFBIG;
+
+- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
+- nr_exts > 10)
++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && nr_exts > 10)
+ return -EFBIG;
+
+ if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) {
+--- a/fs/xfs/libxfs/xfs_refcount.c
++++ b/fs/xfs/libxfs/xfs_refcount.c
+@@ -1073,8 +1073,7 @@ xfs_refcount_still_have_space(
+ * refcount continue update "error" has been injected.
+ */
+ if (cur->bc_refc.nr_ops > 2 &&
+- XFS_TEST_ERROR(false, cur->bc_mp,
+- XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
++ XFS_TEST_ERROR(cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
+ return false;
+
+ if (cur->bc_refc.nr_ops == 0)
+@@ -1353,7 +1352,7 @@ xfs_refcount_finish_one(
+
+ trace_xfs_refcount_deferred(mp, ri);
+
+- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
+ return -EIO;
+
+ /*
+--- a/fs/xfs/libxfs/xfs_rmap.c
++++ b/fs/xfs/libxfs/xfs_rmap.c
+@@ -2579,7 +2579,7 @@ xfs_rmap_finish_one(
+
+ trace_xfs_rmap_deferred(mp, ri);
+
+- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
++ if (XFS_TEST_ERROR(mp, XFS_ERRTAG_RMAP_FINISH_ONE))
+ return -EIO;
+
+ /*
+--- a/fs/xfs/scrub/cow_repair.c
++++ b/fs/xfs/scrub/cow_repair.c
+@@ -297,7 +297,7 @@ xrep_cow_find_bad(
+ * on the debugging knob, replace everything in the CoW fork.
+ */
+ if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+- XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
++ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
+ xc->irec.br_blockcount);
+ if (error)
+--- a/fs/xfs/scrub/repair.c
++++ b/fs/xfs/scrub/repair.c
+@@ -990,7 +990,7 @@ xrep_will_attempt(
+ return true;
+
+ /* Let debug users force us into the repair routines. */
+- if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
++ if (XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
+ return true;
+
+ /* Metadata is corrupt or failed cross-referencing. */
+--- a/fs/xfs/xfs_attr_item.c
++++ b/fs/xfs/xfs_attr_item.c
+@@ -490,7 +490,7 @@ xfs_attr_finish_item(
+ /* Reset trans after EAGAIN cycle since the transaction is new */
+ args->trans = tp;
+
+- if (XFS_TEST_ERROR(false, args->dp->i_mount, XFS_ERRTAG_LARP)) {
++ if (XFS_TEST_ERROR(args->dp->i_mount, XFS_ERRTAG_LARP)) {
+ error = -EIO;
+ goto out;
+ }
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1498,7 +1498,7 @@ xfs_buf_bio_end_io(
+
+ if (!bio->bi_status &&
+ (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
+- XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
++ XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
+ bio->bi_status = BLK_STS_IOERR;
+
+ /*
+@@ -2451,7 +2451,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp,
+ * This allows userspace to disrupt buffer caching for debug/testing
+ * purposes.
+ */
+- if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
++ if (XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
+ lru_ref = 0;
+
+ atomic_set(&bp->b_lru_ref, lru_ref);
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -292,7 +292,6 @@ xfs_errortag_enabled(
+ bool
+ xfs_errortag_test(
+ struct xfs_mount *mp,
+- const char *expression,
+ const char *file,
+ int line,
+ unsigned int error_tag)
+@@ -318,8 +317,8 @@ xfs_errortag_test(
+ return false;
+
+ xfs_warn_ratelimited(mp,
+-"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
+- expression, file, line, mp->m_super->s_id);
++"Injecting error at file %s, line %d, on filesystem \"%s\"",
++ file, line, mp->m_super->s_id);
+ return true;
+ }
+
+--- a/fs/xfs/xfs_error.h
++++ b/fs/xfs/xfs_error.h
+@@ -41,10 +41,10 @@ extern void xfs_inode_verifier_error(str
+ #ifdef DEBUG
+ extern int xfs_errortag_init(struct xfs_mount *mp);
+ extern void xfs_errortag_del(struct xfs_mount *mp);
+-extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
+- const char *file, int line, unsigned int error_tag);
+-#define XFS_TEST_ERROR(expr, mp, tag) \
+- ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
++bool xfs_errortag_test(struct xfs_mount *mp, const char *file, int line,
++ unsigned int error_tag);
++#define XFS_TEST_ERROR(mp, tag) \
++ xfs_errortag_test((mp), __FILE__, __LINE__, (tag))
+ bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
+ #define XFS_ERRORTAG_DELAY(mp, tag) \
+ do { \
+@@ -66,7 +66,7 @@ extern int xfs_errortag_clearall(struct
+ #else
+ #define xfs_errortag_init(mp) (0)
+ #define xfs_errortag_del(mp)
+-#define XFS_TEST_ERROR(expr, mp, tag) (expr)
++#define XFS_TEST_ERROR(mp, tag) (false)
+ #define XFS_ERRORTAG_DELAY(mp, tag) ((void)0)
+ #define xfs_errortag_set(mp, tag, val) (ENOSYS)
+ #define xfs_errortag_add(mp, tag) (ENOSYS)
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2367,37 +2367,35 @@ xfs_iflush(
+ * error handling as the caller will shutdown and fail the buffer.
+ */
+ error = -EFSCORRUPTED;
+- if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
+- mp, XFS_ERRTAG_IFLUSH_1)) {
++ if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC) ||
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_1)) {
+ xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+ "%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT,
+ __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
+ goto flush_out;
+ }
+ if (S_ISREG(VFS_I(ip)->i_mode)) {
+- if (XFS_TEST_ERROR(
+- ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+- ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
+- mp, XFS_ERRTAG_IFLUSH_3)) {
++ if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
++ ip->i_df.if_format != XFS_DINODE_FMT_BTREE) ||
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_3)) {
+ xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+ "%s: Bad regular inode %llu, ptr "PTR_FMT,
+ __func__, ip->i_ino, ip);
+ goto flush_out;
+ }
+ } else if (S_ISDIR(VFS_I(ip)->i_mode)) {
+- if (XFS_TEST_ERROR(
+- ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+- ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
+- ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
+- mp, XFS_ERRTAG_IFLUSH_4)) {
++ if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
++ ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
++ ip->i_df.if_format != XFS_DINODE_FMT_LOCAL) ||
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_4)) {
+ xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+ "%s: Bad directory inode %llu, ptr "PTR_FMT,
+ __func__, ip->i_ino, ip);
+ goto flush_out;
+ }
+ }
+- if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
+- ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
++ if (ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
++ ip->i_nblocks || XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_5)) {
+ xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+ "%s: detected corrupt incore inode %llu, "
+ "total extents = %llu nblocks = %lld, ptr "PTR_FMT,
+@@ -2406,8 +2404,8 @@ xfs_iflush(
+ ip->i_nblocks, ip);
+ goto flush_out;
+ }
+- if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
+- mp, XFS_ERRTAG_IFLUSH_6)) {
++ if (ip->i_forkoff > mp->m_sb.sb_inodesize ||
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_6)) {
+ xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+ "%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT,
+ __func__, ip->i_ino, ip->i_forkoff, ip);
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -993,7 +993,7 @@ xfs_buffered_write_iomap_begin(
+ return error;
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
+- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
+ error = -EFSCORRUPTED;
+ goto out_unlock;
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -968,8 +968,8 @@ xfs_log_unmount_write(
+ * counters will be recalculated. Refer to xlog_check_unmount_rec for
+ * more details.
+ */
+- if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
+- XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
++ if (xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS) ||
++ XFS_TEST_ERROR(mp, XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
+ xfs_alert(mp, "%s: will fix summary counters at next mount",
+ __func__);
+ return;
+@@ -1239,7 +1239,7 @@ xlog_ioend_work(
+ /*
+ * Race to shutdown the filesystem if we see an error.
+ */
+- if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
++ if (error || XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
+ xfs_alert(log->l_mp, "log I/O error %d", error);
+ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+ }
+@@ -1848,7 +1848,7 @@ xlog_sync(
+ * detects the bad CRC and attempts to recover.
+ */
+ #ifdef DEBUG
+- if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
++ if (XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
+ iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
+ iclog->ic_fail_crc = true;
+ xfs_warn(log->l_mp,
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -385,7 +385,7 @@ xfsaild_push_item(
+ * If log item pinning is enabled, skip the push and track the item as
+ * pinned. This can help induce head-behind-tail conditions.
+ */
+- if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
++ if (XFS_TEST_ERROR(ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
+ return XFS_ITEM_PINNED;
+
+ /*