--- /dev/null
+From 07756cb6faa6228c3f491e96f7b0aebd62ec4bc9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Jun 2026 20:39:06 +0000
+Subject: netfilter: ctnetlink: ensure safe access to master conntrack
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit bffcaad9afdfe45d7fc777397d3b83c1e3ebffe5 ]
+
+Holding reference on the expectation is not sufficient, the master
+conntrack object can just go away, making exp->master invalid.
+
+To access exp->master safely:
+
+- Grab the nf_conntrack_expect_lock, this gets serialized with
+ clean_from_lists() which also holds this lock when the master
+ conntrack goes away.
+
+- Hold reference on master conntrack via nf_conntrack_find_get().
+ Not so easy since the master tuple to look up for the master conntrack
+ is not available in the existing problematic paths.
+
+This patch goes for extending the nf_conntrack_expect_lock section
+to address this issue for simplicity, in the cases that are described
+below this is just slightly extending the lock section.
+
+The add expectation command already holds a reference to the master
+conntrack from ctnetlink_create_expect().
+
+However, the delete expectation command needs to grab the spinlock
+before looking up for the expectation. Expand the existing spinlock
+section to address this to cover the expectation lookup. Note that,
+the nf_ct_expect_iterate_net() calls already grabs the spinlock while
+iterating over the expectation table, which is correct.
+
+The get expectation command needs to grab the spinlock to ensure master
+conntrack does not go away. This also expands the existing spinlock
+section to cover the expectation lookup too. I needed to move the
+netlink skb allocation out of the spinlock to keep it GFP_KERNEL.
+
+For the expectation events, the IPEXP_DESTROY event is already delivered
+under the spinlock, just move the delivery of IPEXP_NEW under the
+spinlock too because the master conntrack event cache is reached through
+exp->master.
+
+While at it, add lockdep notations to help identify what codepaths need
+to grab the spinlock.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+[ fix timer_delete -> del_timer in diff context lines since 8fa7292
+("treewide: Switch/rename to timer_delete[_sync]()") landed in 6.15 ]
+Signed-off-by: Mark Bundschuh <mkbund@amazon.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_conntrack_core.h | 5 ++++
+ net/netfilter/nf_conntrack_ecache.c | 2 ++
+ net/netfilter/nf_conntrack_expect.c | 10 +++++++-
+ net/netfilter/nf_conntrack_netlink.c | 28 +++++++++++++++--------
+ 4 files changed, 35 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
+index a36f87af415c22..8ea16b0ba1c982 100644
+--- a/include/net/netfilter/nf_conntrack_core.h
++++ b/include/net/netfilter/nf_conntrack_core.h
+@@ -84,6 +84,11 @@ void nf_conntrack_lock(spinlock_t *lock);
+
+ extern spinlock_t nf_conntrack_expect_lock;
+
++static inline void lockdep_nfct_expect_lock_held(void)
++{
++ lockdep_assert_held(&nf_conntrack_expect_lock);
++}
++
+ /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
+
+ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
+diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
+index 69948e1d6974e3..6526bdcca580fd 100644
+--- a/net/netfilter/nf_conntrack_ecache.c
++++ b/net/netfilter/nf_conntrack_ecache.c
+@@ -237,6 +237,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
+ struct nf_ct_event_notifier *notify;
+ struct nf_conntrack_ecache *e;
+
++ lockdep_nfct_expect_lock_held();
++
+ rcu_read_lock();
+ notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
+ if (!notify)
+diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
+index 70bcddfc17ccc2..379711ea5ab67e 100644
+--- a/net/netfilter/nf_conntrack_expect.c
++++ b/net/netfilter/nf_conntrack_expect.c
+@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
+ struct net *net = nf_ct_exp_net(exp);
+ struct nf_conntrack_net *cnet;
+
++ lockdep_nfct_expect_lock_held();
+ WARN_ON(!master_help);
+ WARN_ON(timer_pending(&exp->timeout));
+
+@@ -118,6 +119,8 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
+
+ bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
+ {
++ lockdep_nfct_expect_lock_held();
++
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
+ struct nf_conntrack_expect *i, *exp = NULL;
+ unsigned int h;
+
++ lockdep_nfct_expect_lock_held();
++
+ if (!cnet->expect_count)
+ return NULL;
+
+@@ -459,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+ unsigned int h;
+ int ret = 0;
+
++ lockdep_nfct_expect_lock_held();
++
+ if (!master_help) {
+ ret = -ESHUTDOWN;
+ goto out;
+@@ -515,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+
+ nf_ct_expect_insert(expect);
+
+- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++
+ return 0;
+ out:
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index bcbd77608365a9..f6e9d9bc18864a 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -3330,31 +3330,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
+ if (err < 0)
+ return err;
+
++ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
++ if (!skb2)
++ return -ENOMEM;
++
++ spin_lock_bh(&nf_conntrack_expect_lock);
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+- if (!exp)
++ if (!exp) {
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++ kfree_skb(skb2);
+ return -ENOENT;
++ }
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+
+ if (id != nf_expect_get_id(exp)) {
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++ kfree_skb(skb2);
+ return -ENOENT;
+ }
+ }
+
+- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+- if (!skb2) {
+- nf_ct_expect_put(exp);
+- return -ENOMEM;
+- }
+-
+ rcu_read_lock();
+ err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
+ info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+ exp);
+ rcu_read_unlock();
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++
+ if (err <= 0) {
+ kfree_skb(skb2);
+ return -ENOMEM;
+@@ -3401,22 +3407,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
+ if (err < 0)
+ return err;
+
++ spin_lock_bh(&nf_conntrack_expect_lock);
++
+ /* bump usage count to 2 */
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+- if (!exp)
++ if (!exp) {
++ spin_unlock_bh(&nf_conntrack_expect_lock);
+ return -ENOENT;
++ }
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+
+ if (id != nf_expect_get_id(exp)) {
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
+ return -ENOENT;
+ }
+ }
+
+ /* after list removal, usage count == 1 */
+- spin_lock_bh(&nf_conntrack_expect_lock);
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
+ nlmsg_report(info->nlh));
+--
+2.53.0
+
net-mvpp2-add-metadata-support-for-xdp-mode.patch
net-mvpp2-refill-rx-buffers-before-xdp-or-skb-use.patch
net-mvpp2-build-skb-from-xdp-adjusted-data-on-xdp_pa.patch
+netfilter-ctnetlink-ensure-safe-access-to-master-con.patch
--- /dev/null
+From 2cd4803c24ecf6e069ec3ca6b04719906fe45815 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Jun 2026 19:07:05 +0000
+Subject: netfilter: ctnetlink: ensure safe access to master conntrack
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit bffcaad9afdfe45d7fc777397d3b83c1e3ebffe5 ]
+
+Holding reference on the expectation is not sufficient, the master
+conntrack object can just go away, making exp->master invalid.
+
+To access exp->master safely:
+
+- Grab the nf_conntrack_expect_lock, this gets serialized with
+ clean_from_lists() which also holds this lock when the master
+ conntrack goes away.
+
+- Hold reference on master conntrack via nf_conntrack_find_get().
+ Not so easy since the master tuple to look up for the master conntrack
+ is not available in the existing problematic paths.
+
+This patch goes for extending the nf_conntrack_expect_lock section
+to address this issue for simplicity, in the cases that are described
+below this is just slightly extending the lock section.
+
+The add expectation command already holds a reference to the master
+conntrack from ctnetlink_create_expect().
+
+However, the delete expectation command needs to grab the spinlock
+before looking up for the expectation. Expand the existing spinlock
+section to address this to cover the expectation lookup. Note that,
+the nf_ct_expect_iterate_net() calls already grabs the spinlock while
+iterating over the expectation table, which is correct.
+
+The get expectation command needs to grab the spinlock to ensure master
+conntrack does not go away. This also expands the existing spinlock
+section to cover the expectation lookup too. I needed to move the
+netlink skb allocation out of the spinlock to keep it GFP_KERNEL.
+
+For the expectation events, the IPEXP_DESTROY event is already delivered
+under the spinlock, just move the delivery of IPEXP_NEW under the
+spinlock too because the master conntrack event cache is reached through
+exp->master.
+
+While at it, add lockdep notations to help identify what codepaths need
+to grab the spinlock.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+[ fix timer_delete -> del_timer in diff context lines since 8fa7292
+("treewide: Switch/rename to timer_delete[_sync]()") landed in 6.15 ]
+Signed-off-by: Mark Bundschuh <mkbund@amazon.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_conntrack_core.h | 5 ++++
+ net/netfilter/nf_conntrack_ecache.c | 2 ++
+ net/netfilter/nf_conntrack_expect.c | 10 +++++++-
+ net/netfilter/nf_conntrack_netlink.c | 28 +++++++++++++++--------
+ 4 files changed, 35 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
+index 3384859a892101..8883575adcc1e7 100644
+--- a/include/net/netfilter/nf_conntrack_core.h
++++ b/include/net/netfilter/nf_conntrack_core.h
+@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
+
+ extern spinlock_t nf_conntrack_expect_lock;
+
++static inline void lockdep_nfct_expect_lock_held(void)
++{
++ lockdep_assert_held(&nf_conntrack_expect_lock);
++}
++
+ /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
+
+ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
+diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
+index 69948e1d6974e3..6526bdcca580fd 100644
+--- a/net/netfilter/nf_conntrack_ecache.c
++++ b/net/netfilter/nf_conntrack_ecache.c
+@@ -237,6 +237,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
+ struct nf_ct_event_notifier *notify;
+ struct nf_conntrack_ecache *e;
+
++ lockdep_nfct_expect_lock_held();
++
+ rcu_read_lock();
+ notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
+ if (!notify)
+diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
+index f5c45989df5736..bb8b87f9ee50da 100644
+--- a/net/netfilter/nf_conntrack_expect.c
++++ b/net/netfilter/nf_conntrack_expect.c
+@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
+ struct net *net = nf_ct_exp_net(exp);
+ struct nf_conntrack_net *cnet;
+
++ lockdep_nfct_expect_lock_held();
+ WARN_ON(!master_help);
+ WARN_ON(timer_pending(&exp->timeout));
+
+@@ -118,6 +119,8 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
+
+ bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
+ {
++ lockdep_nfct_expect_lock_held();
++
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
+ struct nf_conntrack_expect *i, *exp = NULL;
+ unsigned int h;
+
++ lockdep_nfct_expect_lock_held();
++
+ if (!cnet->expect_count)
+ return NULL;
+
+@@ -459,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+ unsigned int h;
+ int ret = 0;
+
++ lockdep_nfct_expect_lock_held();
++
+ if (!master_help) {
+ ret = -ESHUTDOWN;
+ goto out;
+@@ -515,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+
+ nf_ct_expect_insert(expect);
+
+- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++
+ return 0;
+ out:
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index f51cdfba68fbdb..507f17722f375b 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -3332,31 +3332,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
+ if (err < 0)
+ return err;
+
++ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
++ if (!skb2)
++ return -ENOMEM;
++
++ spin_lock_bh(&nf_conntrack_expect_lock);
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+- if (!exp)
++ if (!exp) {
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++ kfree_skb(skb2);
+ return -ENOENT;
++ }
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+
+ if (id != nf_expect_get_id(exp)) {
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++ kfree_skb(skb2);
+ return -ENOENT;
+ }
+ }
+
+- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+- if (!skb2) {
+- nf_ct_expect_put(exp);
+- return -ENOMEM;
+- }
+-
+ rcu_read_lock();
+ err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
+ info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+ exp);
+ rcu_read_unlock();
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++
+ if (err <= 0) {
+ kfree_skb(skb2);
+ return -ENOMEM;
+@@ -3403,22 +3409,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
+ if (err < 0)
+ return err;
+
++ spin_lock_bh(&nf_conntrack_expect_lock);
++
+ /* bump usage count to 2 */
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+- if (!exp)
++ if (!exp) {
++ spin_unlock_bh(&nf_conntrack_expect_lock);
+ return -ENOENT;
++ }
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+
+ if (id != nf_expect_get_id(exp)) {
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
+ return -ENOENT;
+ }
+ }
+
+ /* after list removal, usage count == 1 */
+- spin_lock_bh(&nf_conntrack_expect_lock);
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
+ nlmsg_report(info->nlh));
+--
+2.53.0
+
net-mvpp2-refill-rx-buffers-before-xdp-or-skb-use.patch
net-mvpp2-build-skb-from-xdp-adjusted-data-on-xdp_pa.patch
ipv6-fix-a-potential-npd-in-cleanup_prefix_route.patch
+netfilter-ctnetlink-ensure-safe-access-to-master-con.patch
+writeback-avoid-contention-on-wb-list_lock-when-swit.patch
+writeback-fix-use-after-free-in-inode_switch_wbs_wor.patch
+xfrm-hold-device-only-for-the-asynchronous-decryptio.patch
+xfrm-hold-dev-ref-until-after-transport_finish-nf_ho.patch
--- /dev/null
+From cc73bfdc8a332fe69957503aa0ada8507b9fd7bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Apr 2025 17:12:59 +0200
+Subject: writeback: Avoid contention on wb->list_lock when switching inodes
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit e1b849cfa6b61f1c866a908c9e8dd9b5aaab820b ]
+
+There can be multiple inode switch works that are trying to switch
+inodes to / from the same wb. This can happen in particular if some
+cgroup exits which owns many (thousands) inodes and we need to switch
+them all. In this case several inode_switch_wbs_work_fn() instances will
+be just spinning on the same wb->list_lock while only one of them makes
+forward progress. This wastes CPU cycles and quickly leads to softlockup
+reports and unusable system.
+
+Instead of running several inode_switch_wbs_work_fn() instances in
+parallel switching to the same wb and contending on wb->list_lock, run
+just one work item per wb and manage a queue of isw items switching to
+this wb.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 99 ++++++++++++++++++++------------
+ include/linux/backing-dev-defs.h | 4 ++
+ include/linux/writeback.h | 2 +
+ mm/backing-dev.c | 5 ++
+ 4 files changed, 74 insertions(+), 36 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 45e90338fbb2df..a8d21a5f354859 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -369,7 +369,8 @@ static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
+ }
+
+ struct inode_switch_wbs_context {
+- struct rcu_work work;
++ /* List of queued switching contexts for the wb */
++ struct llist_node list;
+
+ /*
+ * Multiple inodes can be switched at once. The switching procedure
+@@ -379,7 +380,6 @@ struct inode_switch_wbs_context {
+ * array embedded into struct inode_switch_wbs_context. Otherwise
+ * an inode could be left in a non-consistent state.
+ */
+- struct bdi_writeback *new_wb;
+ struct inode *inodes[];
+ };
+
+@@ -488,13 +488,11 @@ static bool inode_do_switch_wbs(struct inode *inode,
+ return switched;
+ }
+
+-static void inode_switch_wbs_work_fn(struct work_struct *work)
++static void process_inode_switch_wbs(struct bdi_writeback *new_wb,
++ struct inode_switch_wbs_context *isw)
+ {
+- struct inode_switch_wbs_context *isw =
+- container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
+ struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]);
+ struct bdi_writeback *old_wb = isw->inodes[0]->i_wb;
+- struct bdi_writeback *new_wb = isw->new_wb;
+ unsigned long nr_switched = 0;
+ struct inode **inodep;
+
+@@ -554,6 +552,38 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+ atomic_dec(&isw_nr_in_flight);
+ }
+
++void inode_switch_wbs_work_fn(struct work_struct *work)
++{
++ struct bdi_writeback *new_wb = container_of(work, struct bdi_writeback,
++ switch_work);
++ struct inode_switch_wbs_context *isw, *next_isw;
++ struct llist_node *list;
++
++ /*
++ * Grab out reference to wb so that it cannot get freed under us
++ * after we process all the isw items.
++ */
++ wb_get(new_wb);
++ while (1) {
++ list = llist_del_all(&new_wb->switch_wbs_ctxs);
++ /* Nothing to do? */
++ if (!list)
++ break;
++ /*
++ * In addition to synchronizing among switchers, I_WB_SWITCH
++ * tells the RCU protected stat update paths to grab the i_page
++ * lock so that stat transfer can synchronize against them.
++ * Let's continue after I_WB_SWITCH is guaranteed to be
++ * visible.
++ */
++ synchronize_rcu();
++
++ llist_for_each_entry_safe(isw, next_isw, list, list)
++ process_inode_switch_wbs(new_wb, isw);
++ }
++ wb_put(new_wb);
++}
++
+ static bool inode_prepare_wbs_switch(struct inode *inode,
+ struct bdi_writeback *new_wb)
+ {
+@@ -583,6 +613,13 @@ static bool inode_prepare_wbs_switch(struct inode *inode,
+ return true;
+ }
+
++static void wb_queue_isw(struct bdi_writeback *wb,
++ struct inode_switch_wbs_context *isw)
++{
++ if (llist_add(&isw->list, &wb->switch_wbs_ctxs))
++ queue_work(isw_wq, &wb->switch_work);
++}
++
+ /**
+ * inode_switch_wbs - change the wb association of an inode
+ * @inode: target inode
+@@ -596,6 +633,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
+ struct cgroup_subsys_state *memcg_css;
+ struct inode_switch_wbs_context *isw;
++ struct bdi_writeback *new_wb = NULL;
+
+ /* noop if seems to be already in progress */
+ if (inode->i_state & I_WB_SWITCH)
+@@ -620,40 +658,34 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
+ if (!memcg_css)
+ goto out_free;
+
+- isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
++ new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ css_put(memcg_css);
+- if (!isw->new_wb)
++ if (!new_wb)
+ goto out_free;
+
+- if (!inode_prepare_wbs_switch(inode, isw->new_wb))
++ if (!inode_prepare_wbs_switch(inode, new_wb))
+ goto out_free;
+
+ isw->inodes[0] = inode;
+
+- /*
+- * In addition to synchronizing among switchers, I_WB_SWITCH tells
+- * the RCU protected stat update paths to grab the i_page
+- * lock so that stat transfer can synchronize against them.
+- * Let's continue after I_WB_SWITCH is guaranteed to be visible.
+- */
+- INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
+- queue_rcu_work(isw_wq, &isw->work);
++ wb_queue_isw(new_wb, isw);
+ return;
+
+ out_free:
+ atomic_dec(&isw_nr_in_flight);
+- if (isw->new_wb)
+- wb_put(isw->new_wb);
++ if (new_wb)
++ wb_put(new_wb);
+ kfree(isw);
+ }
+
+-static bool isw_prepare_wbs_switch(struct inode_switch_wbs_context *isw,
++static bool isw_prepare_wbs_switch(struct bdi_writeback *new_wb,
++ struct inode_switch_wbs_context *isw,
+ struct list_head *list, int *nr)
+ {
+ struct inode *inode;
+
+ list_for_each_entry(inode, list, i_io_list) {
+- if (!inode_prepare_wbs_switch(inode, isw->new_wb))
++ if (!inode_prepare_wbs_switch(inode, new_wb))
+ continue;
+
+ isw->inodes[*nr] = inode;
+@@ -677,6 +709,7 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
+ {
+ struct cgroup_subsys_state *memcg_css;
+ struct inode_switch_wbs_context *isw;
++ struct bdi_writeback *new_wb;
+ int nr;
+ bool restart = false;
+
+@@ -689,12 +722,12 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
+
+ for (memcg_css = wb->memcg_css->parent; memcg_css;
+ memcg_css = memcg_css->parent) {
+- isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL);
+- if (isw->new_wb)
++ new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL);
++ if (new_wb)
+ break;
+ }
+- if (unlikely(!isw->new_wb))
+- isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */
++ if (unlikely(!new_wb))
++ new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */
+
+ nr = 0;
+ spin_lock(&wb->list_lock);
+@@ -706,27 +739,21 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
+ * bandwidth restrictions, as writeback of inode metadata is not
+ * accounted for.
+ */
+- restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr);
++ restart = isw_prepare_wbs_switch(new_wb, isw, &wb->b_attached, &nr);
+ if (!restart)
+- restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr);
++ restart = isw_prepare_wbs_switch(new_wb, isw, &wb->b_dirty_time,
++ &nr);
+ spin_unlock(&wb->list_lock);
+
+ /* no attached inodes? bail out */
+ if (nr == 0) {
+ atomic_dec(&isw_nr_in_flight);
+- wb_put(isw->new_wb);
++ wb_put(new_wb);
+ kfree(isw);
+ return restart;
+ }
+
+- /*
+- * In addition to synchronizing among switchers, I_WB_SWITCH tells
+- * the RCU protected stat update paths to grab the i_page
+- * lock so that stat transfer can synchronize against them.
+- * Let's continue after I_WB_SWITCH is guaranteed to be visible.
+- */
+- INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
+- queue_rcu_work(isw_wq, &isw->work);
++ wb_queue_isw(new_wb, isw);
+
+ return restart;
+ }
+diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
+index 2ad261082bba5f..c5c9d89c73edcc 100644
+--- a/include/linux/backing-dev-defs.h
++++ b/include/linux/backing-dev-defs.h
+@@ -152,6 +152,10 @@ struct bdi_writeback {
+ struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
+ struct list_head b_attached; /* attached inodes, protected by list_lock */
+ struct list_head offline_node; /* anchored at offline_cgwbs */
++ struct work_struct switch_work; /* work used to perform inode switching
++ * to this wb */
++ struct llist_head switch_wbs_ctxs; /* queued contexts for
++ * writeback switching */
+
+ union {
+ struct work_struct release_work;
+diff --git a/include/linux/writeback.h b/include/linux/writeback.h
+index 641a057e041329..b6bf90a7052599 100644
+--- a/include/linux/writeback.h
++++ b/include/linux/writeback.h
+@@ -293,6 +293,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
+ bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
+ }
+
++void inode_switch_wbs_work_fn(struct work_struct *work);
++
+ #else /* CONFIG_CGROUP_WRITEBACK */
+
+ static inline void inode_attach_wb(struct inode *inode, struct folio *folio)
+diff --git a/mm/backing-dev.c b/mm/backing-dev.c
+index bf0594ceb3ff87..956a7e23b5d634 100644
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -634,6 +634,7 @@ static void cgwb_release_workfn(struct work_struct *work)
+ wb_exit(wb);
+ bdi_put(bdi);
+ WARN_ON_ONCE(!list_empty(&wb->b_attached));
++ WARN_ON_ONCE(work_pending(&wb->switch_work));
+ call_rcu(&wb->rcu, cgwb_free_rcu);
+ }
+
+@@ -710,6 +711,8 @@ static int cgwb_create(struct backing_dev_info *bdi,
+ wb->memcg_css = memcg_css;
+ wb->blkcg_css = blkcg_css;
+ INIT_LIST_HEAD(&wb->b_attached);
++ INIT_WORK(&wb->switch_work, inode_switch_wbs_work_fn);
++ init_llist_head(&wb->switch_wbs_ctxs);
+ INIT_WORK(&wb->release_work, cgwb_release_workfn);
+ set_bit(WB_registered, &wb->state);
+ bdi_get(bdi);
+@@ -840,6 +843,8 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
+ if (!ret) {
+ bdi->wb.memcg_css = &root_mem_cgroup->css;
+ bdi->wb.blkcg_css = blkcg_root_css;
++ INIT_WORK(&bdi->wb.switch_work, inode_switch_wbs_work_fn);
++ init_llist_head(&bdi->wb.switch_wbs_ctxs);
+ }
+ return ret;
+ }
+--
+2.53.0
+
--- /dev/null
+From f765f27fb5a2ce650ddda2e3de068ff0875778e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Apr 2026 11:36:19 +0200
+Subject: writeback: Fix use after free in inode_switch_wbs_work_fn()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 6689f01d6740cf358932b3e97ee968c6099800d9 ]
+
+inode_switch_wbs_work_fn() has a loop like:
+
+ wb_get(new_wb);
+ while (1) {
+ list = llist_del_all(&new_wb->switch_wbs_ctxs);
+ /* Nothing to do? */
+ if (!list)
+ break;
+ ... process the items ...
+ }
+
+Now adding of items to the list looks like:
+
+wb_queue_isw()
+ if (llist_add(&isw->list, &wb->switch_wbs_ctxs))
+ queue_work(isw_wq, &wb->switch_work);
+
+Because inode_switch_wbs_work_fn() loops when processing isw items, it
+can happen that wb->switch_work is pending while wb->switch_wbs_ctxs is
+empty. This is a problem because in that case wb can get freed (no isw
+items -> no wb reference) while the work is still pending causing
+use-after-free issues.
+
+We cannot just fix this by cancelling work when freeing wb because that
+could still trigger problematic 0 -> 1 transitions on wb refcount due to
+wb_get() in inode_switch_wbs_work_fn(). It could be all handled with
+more careful code but that seems unnecessarily complex so let's avoid
+that until it is proven that the looping actually brings practical
+benefit. Just remove the loop from inode_switch_wbs_work_fn() instead.
+That way when wb_queue_isw() queues work, we are guaranteed we have
+added the first item to wb->switch_wbs_ctxs and nobody is going to
+remove it (and drop the wb reference it holds) until the queued work
+runs.
+
+Fixes: e1b849cfa6b6 ("writeback: Avoid contention on wb->list_lock when switching inodes")
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20260413093618.17244-2-jack@suse.cz
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index a8d21a5f354859..e8afd4fd26f98e 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -559,28 +559,30 @@ void inode_switch_wbs_work_fn(struct work_struct *work)
+ struct inode_switch_wbs_context *isw, *next_isw;
+ struct llist_node *list;
+
++ list = llist_del_all(&new_wb->switch_wbs_ctxs);
+ /*
+- * Grab out reference to wb so that it cannot get freed under us
++ * Nothing to do? That would be a problem as references held by isw
++ * items protect wb from freeing...
++ */
++ if (WARN_ON_ONCE(!list))
++ return;
++
++ /*
++ * Grab our reference to wb so that it cannot get freed under us
+ * after we process all the isw items.
+ */
+ wb_get(new_wb);
+- while (1) {
+- list = llist_del_all(&new_wb->switch_wbs_ctxs);
+- /* Nothing to do? */
+- if (!list)
+- break;
+- /*
+- * In addition to synchronizing among switchers, I_WB_SWITCH
+- * tells the RCU protected stat update paths to grab the i_page
+- * lock so that stat transfer can synchronize against them.
+- * Let's continue after I_WB_SWITCH is guaranteed to be
+- * visible.
+- */
+- synchronize_rcu();
++ /*
++ * In addition to synchronizing among switchers, I_WB_SWITCH
++ * tells the RCU protected stat update paths to grab the i_page
++ * lock so that stat transfer can synchronize against them.
++ * Let's continue after I_WB_SWITCH is guaranteed to be
++ * visible.
++ */
++ synchronize_rcu();
+
+- llist_for_each_entry_safe(isw, next_isw, list, list)
+- process_inode_switch_wbs(new_wb, isw);
+- }
++ llist_for_each_entry_safe(isw, next_isw, list, list)
++ process_inode_switch_wbs(new_wb, isw);
+ wb_put(new_wb);
+ }
+
+--
+2.53.0
+
--- /dev/null
+From 196fd569f080d6d4b0c74e505c2ca49e9e448e3e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Jun 2026 11:13:27 +0000
+Subject: xfrm: hold dev ref until after transport_finish NF_HOOK
+
+From: Qi Tang <tpluszz77@gmail.com>
+
+[ Upstream commit 1c428b03840094410c5fb6a5db30640486bbbfcb ]
+
+After async crypto completes, xfrm_input_resume() calls dev_put()
+immediately on re-entry before the skb reaches transport_finish.
+The skb->dev pointer is then used inside NF_HOOK and its okfn,
+which can race with device teardown.
+
+Remove the dev_put from the async resumption entry and instead
+drop the reference after the NF_HOOK call in transport_finish,
+using a saved device pointer since NF_HOOK may consume the skb.
+This covers NF_DROP, NF_QUEUE and NF_STOLEN paths that skip
+the okfn.
+
+For non-transport exits (decaps, gro, drop) and secondary
+async return points, release the reference inline when
+async is set.
+
+Suggested-by: Florian Westphal <fw@strlen.de>
+Fixes: acf568ee859f ("xfrm: Reinject transport-mode packets through tasklet")
+Cc: stable@vger.kernel.org
+Signed-off-by: Qi Tang <tpluszz77@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+[ xfrm_inner_mode_input() always completes synchronously in this kernel
+version and cannot return -EINPROGRESS. That requires
+7ac64f4598b4 ("xfrm: add mode_cbs module functionality"), which is not
+present, so the async dev_put path is unreachable and the hunk was
+omitted ]
+Signed-off-by: Simon Liebold <simonlie@amazon.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/xfrm4_input.c | 5 ++++-
+ net/ipv6/xfrm6_input.c | 5 ++++-
+ net/xfrm/xfrm_input.c | 12 ++++++++++--
+ 3 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
+index 12a1a0f421956c..adf21d6b6076c1 100644
+--- a/net/ipv4/xfrm4_input.c
++++ b/net/ipv4/xfrm4_input.c
+@@ -50,6 +50,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
+ {
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct iphdr *iph = ip_hdr(skb);
++ struct net_device *dev = skb->dev;
+
+ iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
+
+@@ -73,8 +74,10 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
+ }
+
+ NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+- dev_net(skb->dev), NULL, skb, skb->dev, NULL,
++ dev_net(dev), NULL, skb, dev, NULL,
+ xfrm4_rcv_encap_finish);
++ if (async)
++ dev_put(dev);
+ return 0;
+ }
+
+diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
+index 9005fc156a20e6..699a001ac16629 100644
+--- a/net/ipv6/xfrm6_input.c
++++ b/net/ipv6/xfrm6_input.c
+@@ -43,6 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+ int xfrm6_transport_finish(struct sk_buff *skb, int async)
+ {
+ struct xfrm_offload *xo = xfrm_offload(skb);
++ struct net_device *dev = skb->dev;
+ int nhlen = -skb_network_offset(skb);
+
+ skb_network_header(skb)[IP6CB(skb)->nhoff] =
+@@ -68,8 +69,10 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
+ }
+
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+- dev_net(skb->dev), NULL, skb, skb->dev, NULL,
++ dev_net(dev), NULL, skb, dev, NULL,
+ xfrm6_transport_finish2);
++ if (async)
++ dev_put(dev);
+ return 0;
+ }
+
+diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
+index 90a79558dca259..5d3633ce6ba329 100644
+--- a/net/xfrm/xfrm_input.c
++++ b/net/xfrm/xfrm_input.c
+@@ -492,7 +492,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+ /* An encap_type of -1 indicates async resumption. */
+ if (encap_type == -1) {
+ async = 1;
+- dev_put(skb->dev);
+ seq = XFRM_SKB_CB(skb)->seq.input.low;
+ goto resume;
+ }
+@@ -645,8 +644,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+ dev_hold(skb->dev);
+
+ nexthdr = x->type->input(x, skb);
+- if (nexthdr == -EINPROGRESS)
++ if (nexthdr == -EINPROGRESS) {
++ if (async)
++ dev_put(skb->dev);
+ return 0;
++ }
+
+ dev_put(skb->dev);
+ }
+@@ -717,6 +719,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+ sp->olen = 0;
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
++ if (async)
++ dev_put(skb->dev);
+ gro_cells_receive(&gro_cells, skb);
+ return 0;
+ } else {
+@@ -736,6 +740,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+ sp->olen = 0;
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
++ if (async)
++ dev_put(skb->dev);
+ gro_cells_receive(&gro_cells, skb);
+ return err;
+ }
+@@ -746,6 +752,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+ drop_unlock:
+ spin_unlock(&x->lock);
+ drop:
++ if (async)
++ dev_put(skb->dev);
+ xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
+ kfree_skb(skb);
+ return 0;
+--
+2.53.0
+
--- /dev/null
+From 84bb266d43de21ea4f6f7a4ea0259e4a453ca999 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Jun 2026 11:13:26 +0000
+Subject: xfrm: hold device only for the asynchronous decryption
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit b05d42eefac737ce3cd80114d3579111023941b8 ]
+
+The dev_hold() on skb->dev during packet reception was originally
+added to prevent the device from being released prematurely during
+asynchronous decryption operations.
+
+As current hardware can offload decryption, this asynchronous path is
+not always utilized. This often results in a pattern of dev_hold()
+immediately followed by dev_put() for each packet, creating
+unnecessary reference counting overhead detrimental to performance.
+
+This patch optimizes this by skipping the dev_hold() and subsequent
+dev_put() when asynchronous decryption is not being performed.
+
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Stable-dep-of: 1c428b038400 ("xfrm: hold dev ref until after transport_finish NF_HOOK")
+Signed-off-by: Simon Liebold <simonlie@amazon.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_input.c | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
+index 8edcb32735e595..90a79558dca259 100644
+--- a/net/xfrm/xfrm_input.c
++++ b/net/xfrm/xfrm_input.c
+@@ -492,6 +492,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+ /* An encap_type of -1 indicates async resumption. */
+ if (encap_type == -1) {
+ async = 1;
++ dev_put(skb->dev);
+ seq = XFRM_SKB_CB(skb)->seq.input.low;
+ goto resume;
+ }
+@@ -638,18 +639,18 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+ XFRM_SKB_CB(skb)->seq.input.low = seq;
+ XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
+
+- dev_hold(skb->dev);
+-
+- if (crypto_done)
++ if (crypto_done) {
+ nexthdr = x->type_offload->input_tail(x, skb);
+- else
++ } else {
++ dev_hold(skb->dev);
++
+ nexthdr = x->type->input(x, skb);
++ if (nexthdr == -EINPROGRESS)
++ return 0;
+
+- if (nexthdr == -EINPROGRESS)
+- return 0;
++ dev_put(skb->dev);
++ }
+ resume:
+- dev_put(skb->dev);
+-
+ spin_lock(&x->lock);
+ if (nexthdr < 0) {
+ if (nexthdr == -EBADMSG) {
+--
+2.53.0
+
--- /dev/null
+From 6bcc61ad9ab1c9a039d5c6601cca2f09b3871e95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Jun 2026 14:10:01 -0700
+Subject: KVM: VMX: Update SVI during runtime APICv activation
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+commit b2849bec936be642b5420801f902337f2507648e upstream.
+
+The APICv (apic->apicv_active) can be activated or deactivated at runtime,
+for instance, because of APICv inhibit reasons. Intel VMX employs different
+mechanisms to virtualize LAPIC based on whether APICv is active.
+
+When APICv is activated at runtime, GUEST_INTR_STATUS is used to configure
+and report the current pending IRR and ISR states. Unless a specific vector
+is explicitly included in EOI_EXIT_BITMAP, its EOI will not be trapped to
+KVM. Intel VMX automatically clears the corresponding ISR bit based on the
+GUEST_INTR_STATUS.SVI field.
+
+When APICv is deactivated at runtime, the VM_ENTRY_INTR_INFO_FIELD is used
+to specify the next interrupt vector to invoke upon VM-entry. The
+VMX IDT_VECTORING_INFO_FIELD is used to report un-invoked vectors on
+VM-exit. EOIs are always trapped to KVM, so the software can manually clear
+pending ISR bits.
+
+There are scenarios where, with APICv activated at runtime, a guest-issued
+EOI may not be able to clear the pending ISR bit.
+
+Taking vector 236 as an example, here is one scenario.
+
+1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
+2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
+and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
+3. After VM-entry, vector 236 is invoked through the guest IDT. At this
+point, the data in VM_ENTRY_INTR_INFO_FIELD is no longer valid. The guest
+interrupt handler for vector 236 is invoked.
+4. Suppose a VM exit occurs very early in the guest interrupt handler,
+before the EOI is issued.
+5. Nothing is reported through the IDT_VECTORING_INFO_FIELD because
+vector 236 has already been invoked in the guest.
+6. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
+kvm_vcpu_update_apicv() to activate APICv.
+7. Unfortunately, GUEST_INTR_STATUS.SVI is not configured, although
+vector 236 is still pending in the ISR.
+8. After VM-entry, the guest finally issues the EOI for vector 236.
+However, because SVI is not configured, vector 236 is not cleared.
+9. ISR is stalled forever on vector 236.
+
+Here is another scenario.
+
+1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
+2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
+and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
+3. VM-exit occurs immediately after the next VM-entry. The vector 236 is
+not invoked through the guest IDT. Instead, it is saved to the
+IDT_VECTORING_INFO_FIELD during the VM-exit.
+4. KVM calls kvm_queue_interrupt() to re-queue the un-invoked vector 236
+into vcpu->arch.interrupt. A KVM_REQ_EVENT is requested.
+5. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
+kvm_vcpu_update_apicv() to activate APICv.
+6. Although APICv is now active, KVM still uses the legacy
+VM_ENTRY_INTR_INFO_FIELD to re-inject vector 236. GUEST_INTR_STATUS.SVI is
+not configured.
+7. After the next VM-entry, vector 236 is invoked through the guest IDT.
+Finally, an EOI occurs. However, due to the lack of GUEST_INTR_STATUS.SVI
+configuration, vector 236 is not cleared from the ISR.
+8. ISR is stalled forever on vector 236.
+
+Using QEMU as an example, vector 236 is stuck in ISR forever.
+
+(qemu) info lapic 1
+dumping local APIC state for CPU 1
+
+LVT0 0x00010700 active-hi edge masked ExtINT (vec 0)
+LVT1 0x00010400 active-hi edge masked NMI
+LVTPC 0x00000400 active-hi edge NMI
+LVTERR 0x000000fe active-hi edge Fixed (vec 254)
+LVTTHMR 0x00010000 active-hi edge masked Fixed (vec 0)
+LVTT 0x000400ec active-hi edge tsc-deadline Fixed (vec 236)
+Timer DCR=0x0 (divide by 2) initial_count = 0 current_count = 0
+SPIV 0x000001ff APIC enabled, focus=off, spurious vec 255
+ICR 0x000000fd physical edge de-assert no-shorthand
+ICR2 0x00000000 cpu 0 (X2APIC ID)
+ESR 0x00000000
+ISR 236
+IRR 37(level) 236
+
+The issue isn't applicable to AMD SVM as KVM simply writes vmcb01 directly
+irrespective of whether L1 (vmcs01) or L2 (vmcb02) is active (unlike VMX,
+there is no need/cost to switch between VMCBs). In addition,
+APICV_INHIBIT_REASON_IRQWIN ensures AMD SVM AVIC is not activated until
+the last interrupt is EOI'd.
+
+Fix the bug by configuring Intel VMX GUEST_INTR_STATUS.SVI if APICv is
+activated at runtime.
+
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Reviewed-by: Chao Gao <chao.gao@intel.com>
+Link: https://patch.msgid.link/20251110063212.34902-1-dongli.zhang@oracle.com
+[sean: call out that SVM writes vmcb01 directly, tweak comment]
+Link: https://patch.msgid.link/20251205231913.441872-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+(cherry picked from commit b2849bec936be642b5420801f902337f2507648e)
+Cc: stable@vger.kernel.org # 6.6.x and above
+Cc: Gulshan Gabel <gulshan.gabel@nutanix.com>
+Signed-off-by: Jon Kohler <jon@nutanix.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 9 ---------
+ arch/x86/kvm/x86.c | 7 +++++++
+ 2 files changed, 7 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index c084f48e2b0b98..b7798ced7b505c 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6886,15 +6886,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
+ * VM-Exit, otherwise L1 with run with a stale SVI.
+ */
+ if (is_guest_mode(vcpu)) {
+- /*
+- * KVM is supposed to forward intercepted L2 EOIs to L1 if VID
+- * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
+- * Note, userspace can stuff state while L2 is active; assert
+- * that VID is disabled if and only if the vCPU is in KVM_RUN
+- * to avoid false positives if userspace is setting APIC state.
+- */
+- WARN_ON_ONCE(vcpu->wants_to_run &&
+- nested_cpu_has_vid(get_vmcs12(vcpu)));
+ to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
+ return;
+ }
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index ad2b7158b9c8ea..a21ebe04aa23a8 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10950,9 +10950,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+ * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
+ * still active when the interrupt got accepted. Make sure
+ * kvm_check_and_inject_events() is called to check for that.
++ *
++ * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
++ * highest bit in vISR and the next accelerated EOI in the guest won't
++ * be virtualized correctly (the CPU uses SVI to determine which vISR
++ * vector to clear).
+ */
+ if (!apic->apicv_active)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
++ else
++ kvm_apic_update_hwapic_isr(vcpu);
+
+ out:
+ preempt_enable();
+--
+2.53.0
+
net-txgbe-rename-the-sfp-related.patch
net-txgbe-initialize-module-info-buffer.patch
ipv6-fix-a-potential-npd-in-cleanup_prefix_route.patch
+kvm-vmx-update-svi-during-runtime-apicv-activation.patch
--- /dev/null
+From 56ce9e8fa76f29b011850684b9bc25901e6071dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Jun 2026 20:24:08 +0000
+Subject: netfilter: ctnetlink: ensure safe access to master conntrack
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit bffcaad9afdfe45d7fc777397d3b83c1e3ebffe5 ]
+
+Holding reference on the expectation is not sufficient, the master
+conntrack object can just go away, making exp->master invalid.
+
+To access exp->master safely:
+
+- Grab the nf_conntrack_expect_lock, this gets serialized with
+ clean_from_lists() which also holds this lock when the master
+ conntrack goes away.
+
+- Hold reference on master conntrack via nf_conntrack_find_get().
+ Not so easy since the master tuple to look up for the master conntrack
+ is not available in the existing problematic paths.
+
+This patch goes for extending the nf_conntrack_expect_lock section
+to address this issue for simplicity, in the cases that are described
+below this is just slightly extending the lock section.
+
+The add expectation command already holds a reference to the master
+conntrack from ctnetlink_create_expect().
+
+However, the delete expectation command needs to grab the spinlock
+before looking up for the expectation. Expand the existing spinlock
+section to address this to cover the expectation lookup. Note that,
+the nf_ct_expect_iterate_net() calls already grabs the spinlock while
+iterating over the expectation table, which is correct.
+
+The get expectation command needs to grab the spinlock to ensure master
+conntrack does not go away. This also expands the existing spinlock
+section to cover the expectation lookup too. I needed to move the
+netlink skb allocation out of the spinlock to keep it GFP_KERNEL.
+
+For the expectation events, the IPEXP_DESTROY event is already delivered
+under the spinlock, just move the delivery of IPEXP_NEW under the
+spinlock too because the master conntrack event cache is reached through
+exp->master.
+
+While at it, add lockdep notations to help identify what codepaths need
+to grab the spinlock.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+[ fix timer_delete -> del_timer in diff context lines since 8fa7292
+("treewide: Switch/rename to timer_delete[_sync]()") landed in 6.15 ]
+Signed-off-by: Mark Bundschuh <mkbund@amazon.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_conntrack_core.h | 5 ++++
+ net/netfilter/nf_conntrack_ecache.c | 2 ++
+ net/netfilter/nf_conntrack_expect.c | 10 +++++++-
+ net/netfilter/nf_conntrack_netlink.c | 28 +++++++++++++++--------
+ 4 files changed, 35 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
+index 3384859a892101..8883575adcc1e7 100644
+--- a/include/net/netfilter/nf_conntrack_core.h
++++ b/include/net/netfilter/nf_conntrack_core.h
+@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
+
+ extern spinlock_t nf_conntrack_expect_lock;
+
++static inline void lockdep_nfct_expect_lock_held(void)
++{
++ lockdep_assert_held(&nf_conntrack_expect_lock);
++}
++
+ /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
+
+ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
+diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
+index 69948e1d6974e3..6526bdcca580fd 100644
+--- a/net/netfilter/nf_conntrack_ecache.c
++++ b/net/netfilter/nf_conntrack_ecache.c
+@@ -237,6 +237,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
+ struct nf_ct_event_notifier *notify;
+ struct nf_conntrack_ecache *e;
+
++ lockdep_nfct_expect_lock_held();
++
+ rcu_read_lock();
+ notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
+ if (!notify)
+diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
+index 70bcddfc17ccc2..379711ea5ab67e 100644
+--- a/net/netfilter/nf_conntrack_expect.c
++++ b/net/netfilter/nf_conntrack_expect.c
+@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
+ struct net *net = nf_ct_exp_net(exp);
+ struct nf_conntrack_net *cnet;
+
++ lockdep_nfct_expect_lock_held();
+ WARN_ON(!master_help);
+ WARN_ON(timer_pending(&exp->timeout));
+
+@@ -118,6 +119,8 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
+
+ bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
+ {
++ lockdep_nfct_expect_lock_held();
++
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
+ struct nf_conntrack_expect *i, *exp = NULL;
+ unsigned int h;
+
++ lockdep_nfct_expect_lock_held();
++
+ if (!cnet->expect_count)
+ return NULL;
+
+@@ -459,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+ unsigned int h;
+ int ret = 0;
+
++ lockdep_nfct_expect_lock_held();
++
+ if (!master_help) {
+ ret = -ESHUTDOWN;
+ goto out;
+@@ -515,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+
+ nf_ct_expect_insert(expect);
+
+- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++
+ return 0;
+ out:
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index 255996f43d854c..eff5008f5e9d4e 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -3326,31 +3326,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
+ if (err < 0)
+ return err;
+
++ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
++ if (!skb2)
++ return -ENOMEM;
++
++ spin_lock_bh(&nf_conntrack_expect_lock);
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+- if (!exp)
++ if (!exp) {
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++ kfree_skb(skb2);
+ return -ENOENT;
++ }
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+
+ if (id != nf_expect_get_id(exp)) {
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++ kfree_skb(skb2);
+ return -ENOENT;
+ }
+ }
+
+- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+- if (!skb2) {
+- nf_ct_expect_put(exp);
+- return -ENOMEM;
+- }
+-
+ rcu_read_lock();
+ err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
+ info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+ exp);
+ rcu_read_unlock();
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
++
+ if (err <= 0) {
+ kfree_skb(skb2);
+ return -ENOMEM;
+@@ -3397,22 +3403,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
+ if (err < 0)
+ return err;
+
++ spin_lock_bh(&nf_conntrack_expect_lock);
++
+ /* bump usage count to 2 */
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+- if (!exp)
++ if (!exp) {
++ spin_unlock_bh(&nf_conntrack_expect_lock);
+ return -ENOENT;
++ }
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+
+ if (id != nf_expect_get_id(exp)) {
+ nf_ct_expect_put(exp);
++ spin_unlock_bh(&nf_conntrack_expect_lock);
+ return -ENOENT;
+ }
+ }
+
+ /* after list removal, usage count == 1 */
+- spin_lock_bh(&nf_conntrack_expect_lock);
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
+ nlmsg_report(info->nlh));
+--
+2.53.0
+
net-mvpp2-refill-rx-buffers-before-xdp-or-skb-use.patch
net-mvpp2-build-skb-from-xdp-adjusted-data-on-xdp_pa.patch
ipv6-fix-a-potential-npd-in-cleanup_prefix_route.patch
+netfilter-ctnetlink-ensure-safe-access-to-master-con.patch