--- /dev/null
+From stable+bounces-119651-greg=kroah.com@vger.kernel.org Wed Feb 26 10:40:21 2025
+From: "Ricardo Cañuelo Navarro" <rcn@igalia.com>
+Date: Wed, 26 Feb 2025 10:39:05 +0100
+Subject: net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.
+To: stable@vger.kernel.org, Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: revest@google.com, kernel-dev@igalia.com, "Andrii Nakryiko" <andrii@kernel.org>, "Eduard Zingerman" <eddyz87@gmail.com>, "Hao Luo" <haoluo@google.com>, "Jiri Olsa" <jolsa@kernel.org>, "John Fastabend" <john.fastabend@gmail.com>, "KP Singh" <kpsingh@kernel.org>, "Martin KaFai Lau" <martin.lau@linux.dev>, "Song Liu" <song@kernel.org>, "Stanislav Fomichev" <sdf@google.com>, "Yonghong Song" <yonghong.song@linux.dev>, "Alexei Starovoitov" <ast@kernel.org>, "Jesper Dangaard Brouer" <hawk@kernel.org>, "Toke Høiland-Jørgensen" <toke@redhat.com>, "Jakub Kicinski" <kuba@kernel.org>
+Message-ID: <20250226-20250204-kasan-slab-use-after-free-read-in-dev_map_enqueue__submit-v3-1-360efec441ba@igalia.com>
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+[ Upstream commit 401cb7dae8130fd34eb84648e02ab4c506df7d5e ]
+
+The XDP redirect process is two staged:
+- bpf_prog_run_xdp() is invoked to run a eBPF program which inspects the
+ packet and makes decisions. While doing that, the per-CPU variable
+ bpf_redirect_info is used.
+
+- Afterwards xdp_do_redirect() is invoked and accesses bpf_redirect_info
+ and it may also access other per-CPU variables like xskmap_flush_list.
+
+At the very end of the NAPI callback, xdp_do_flush() is invoked which
+does not access bpf_redirect_info but will touch the individual per-CPU
+lists.
+
+The per-CPU variables are only used in the NAPI callback hence disabling
+bottom halves is the only protection mechanism. Users from preemptible
+context (like cpu_map_kthread_run()) explicitly disable bottom halves
+for protections reasons.
+Without locking in local_bh_disable() on PREEMPT_RT this data structure
+requires explicit locking.
+
+PREEMPT_RT has forced-threaded interrupts enabled and every
+NAPI-callback runs in a thread. If each thread has its own data
+structure then locking can be avoided.
+
+Create a struct bpf_net_context which contains struct bpf_redirect_info.
+Define the variable on stack, use bpf_net_ctx_set() to save a pointer to
+it, bpf_net_ctx_clear() removes it again.
+The bpf_net_ctx_set() may nest. For instance a function can be used from
+within NET_RX_SOFTIRQ/ net_rx_action which uses bpf_net_ctx_set() and
+NET_TX_SOFTIRQ which does not. Therefore only the first invocations
+updates the pointer.
+Use bpf_net_ctx_get_ri() as a wrapper to retrieve the current struct
+bpf_redirect_info. The returned data structure is zero initialized to
+ensure nothing is leaked from stack. This is done on first usage of the
+struct. bpf_net_ctx_set() sets bpf_redirect_info::kern_flags to 0 to
+note that initialisation is required. First invocation of
+bpf_net_ctx_get_ri() will memset() the data structure and update
+bpf_redirect_info::kern_flags.
+bpf_redirect_info::nh is excluded from memset because it is only used
+once BPF_F_NEIGH is set which also sets the nh member. The kern_flags is
+moved past nh to exclude it from memset.
+
+The pointer to bpf_net_context is saved task's task_struct. Using
+always the bpf_net_context approach has the advantage that there is
+almost zero differences between PREEMPT_RT and non-PREEMPT_RT builds.
+
+Cc: Andrii Nakryiko <andrii@kernel.org>
+Cc: Eduard Zingerman <eddyz87@gmail.com>
+Cc: Hao Luo <haoluo@google.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: John Fastabend <john.fastabend@gmail.com>
+Cc: KP Singh <kpsingh@kernel.org>
+Cc: Martin KaFai Lau <martin.lau@linux.dev>
+Cc: Song Liu <song@kernel.org>
+Cc: Stanislav Fomichev <sdf@google.com>
+Cc: Yonghong Song <yonghong.song@linux.dev>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://patch.msgid.link/20240620132727.660738-15-bigeasy@linutronix.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[rcn: the backport addresses the differences in
+net/core/dev.c:napi_threaded_poll(), busy_poll_stop(), napi_busy_loop()
+and net_rx_action() between upstream and stable. This allows the patch
+to be applied without bringing additional dependencies, such as
+dad6b9770263 ("net: Allow to use SMP threads for backlog NAPI."). These
+changes allow applying the patch on stable without bringing the whole
+related series.
+The rest of the changes are made to adapt context lines and are
+unrelated to the purpose of the patch.]
+Signed-off-by: Ricardo Cañuelo Navarro <rcn@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/filter.h | 56 ++++++++++++++++++++++++++++++++++++++++---------
+ include/linux/sched.h | 3 ++
+ kernel/bpf/cpumap.c | 3 ++
+ kernel/bpf/devmap.c | 9 +++++++
+ kernel/fork.c | 1
+ net/bpf/test_run.c | 11 ++++++++-
+ net/core/dev.c | 28 +++++++++++++++++++++++-
+ net/core/filter.c | 44 ++++++++++----------------------------
+ net/core/lwt_bpf.c | 3 ++
+ 9 files changed, 113 insertions(+), 45 deletions(-)
+
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -654,21 +654,59 @@ struct bpf_nh_params {
+ };
+ };
+
++/* flags for bpf_redirect_info kern_flags */
++#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */
++#define BPF_RI_F_RI_INIT BIT(1)
++
+ struct bpf_redirect_info {
+ u64 tgt_index;
+ void *tgt_value;
+ struct bpf_map *map;
+ u32 flags;
+- u32 kern_flags;
+ u32 map_id;
+ enum bpf_map_type map_type;
+ struct bpf_nh_params nh;
++ u32 kern_flags;
+ };
+
+-DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
++struct bpf_net_context {
++ struct bpf_redirect_info ri;
++};
+
+-/* flags for bpf_redirect_info kern_flags */
+-#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */
++static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx)
++{
++ struct task_struct *tsk = current;
++
++ if (tsk->bpf_net_context != NULL)
++ return NULL;
++ bpf_net_ctx->ri.kern_flags = 0;
++
++ tsk->bpf_net_context = bpf_net_ctx;
++ return bpf_net_ctx;
++}
++
++static inline void bpf_net_ctx_clear(struct bpf_net_context *bpf_net_ctx)
++{
++ if (bpf_net_ctx)
++ current->bpf_net_context = NULL;
++}
++
++static inline struct bpf_net_context *bpf_net_ctx_get(void)
++{
++ return current->bpf_net_context;
++}
++
++static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
++{
++ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
++
++ if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
++ memset(&bpf_net_ctx->ri, 0, offsetof(struct bpf_net_context, ri.nh));
++ bpf_net_ctx->ri.kern_flags |= BPF_RI_F_RI_INIT;
++ }
++
++ return &bpf_net_ctx->ri;
++}
+
+ /* Compute the linear packet data range [data, data_end) which
+ * will be accessed by various program types (cls_bpf, act_bpf,
+@@ -929,25 +967,23 @@ struct bpf_prog *bpf_patch_insn_single(s
+ const struct bpf_insn *patch, u32 len);
+ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
+
+-void bpf_clear_redirect_map(struct bpf_map *map);
+-
+ static inline bool xdp_return_frame_no_direct(void)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+
+ return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
+ }
+
+ static inline void xdp_set_return_frame_no_direct(void)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+
+ ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
+ }
+
+ static inline void xdp_clear_return_frame_no_direct(void)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+
+ ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
+ }
+@@ -1503,7 +1539,7 @@ static __always_inline long __bpf_xdp_re
+ u64 flags, const u64 flag_mask,
+ void *lookup_elem(struct bpf_map *map, u32 key))
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+ const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
+
+ /* Lower bits of the flags are used as return code on lookup failure */
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -45,6 +45,7 @@ struct bio_list;
+ struct blk_plug;
+ struct bpf_local_storage;
+ struct bpf_run_ctx;
++struct bpf_net_context;
+ struct capture_control;
+ struct cfs_rq;
+ struct fs_struct;
+@@ -1494,6 +1495,8 @@ struct task_struct {
+ /* Used for BPF run context */
+ struct bpf_run_ctx *bpf_ctx;
+ #endif
++ /* Used by BPF for per-TASK xdp storage */
++ struct bpf_net_context *bpf_net_context;
+
+ #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ unsigned long lowest_stack;
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -239,12 +239,14 @@ static int cpu_map_bpf_prog_run(struct b
+ int xdp_n, struct xdp_cpumap_stats *stats,
+ struct list_head *list)
+ {
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ int nframes;
+
+ if (!rcpu->prog)
+ return xdp_n;
+
+ rcu_read_lock_bh();
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+
+ nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
+
+@@ -254,6 +256,7 @@ static int cpu_map_bpf_prog_run(struct b
+ if (unlikely(!list_empty(list)))
+ cpu_map_bpf_prog_run_skb(rcpu, list, stats);
+
++ bpf_net_ctx_clear(bpf_net_ctx);
+ rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
+
+ return nframes;
+--- a/kernel/bpf/devmap.c
++++ b/kernel/bpf/devmap.c
+@@ -196,7 +196,14 @@ static void dev_map_free(struct bpf_map
+ list_del_rcu(&dtab->list);
+ spin_unlock(&dev_map_lock);
+
+- bpf_clear_redirect_map(map);
++ /* bpf_redirect_info->map is assigned in __bpf_xdp_redirect_map()
++ * during NAPI callback and cleared after the XDP redirect. There is no
++ * explicit RCU read section which protects bpf_redirect_info->map but
++ * local_bh_disable() also marks the beginning an RCU section. This
++ * makes the complete softirq callback RCU protected. Thus after
++ * following synchronize_rcu() there no bpf_redirect_info->map == map
++ * assignment.
++ */
+ synchronize_rcu();
+
+ /* Make sure prior __dev_map_entry_free() have completed. */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -2466,6 +2466,7 @@ __latent_entropy struct task_struct *cop
+ RCU_INIT_POINTER(p->bpf_storage, NULL);
+ p->bpf_ctx = NULL;
+ #endif
++ p->bpf_net_context = NULL;
+
+ /* Perform scheduler related setup. Assign this task to a CPU. */
+ retval = sched_fork(clone_flags, p);
+--- a/net/bpf/test_run.c
++++ b/net/bpf/test_run.c
+@@ -282,9 +282,10 @@ static int xdp_recv_frames(struct xdp_fr
+ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
+ u32 repeat)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ int err = 0, act, ret, i, nframes = 0, batch_sz;
+ struct xdp_frame **frames = xdp->frames;
++ struct bpf_redirect_info *ri;
+ struct xdp_page_head *head;
+ struct xdp_frame *frm;
+ bool redirect = false;
+@@ -294,6 +295,8 @@ static int xdp_test_run_batch(struct xdp
+ batch_sz = min_t(u32, repeat, xdp->batch_size);
+
+ local_bh_disable();
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
++ ri = bpf_net_ctx_get_ri();
+ xdp_set_return_frame_no_direct();
+
+ for (i = 0; i < batch_sz; i++) {
+@@ -358,6 +361,7 @@ out:
+ }
+
+ xdp_clear_return_frame_no_direct();
++ bpf_net_ctx_clear(bpf_net_ctx);
+ local_bh_enable();
+ return err;
+ }
+@@ -393,6 +397,7 @@ static int bpf_test_run_xdp_live(struct
+ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
+ u32 *retval, u32 *time, bool xdp)
+ {
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ struct bpf_prog_array_item item = {.prog = prog};
+ struct bpf_run_ctx *old_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+@@ -418,10 +423,14 @@ static int bpf_test_run(struct bpf_prog
+ do {
+ run_ctx.prog_item = &item;
+ local_bh_disable();
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
++
+ if (xdp)
+ *retval = bpf_prog_run_xdp(prog, ctx);
+ else
+ *retval = bpf_prog_run(prog, ctx);
++
++ bpf_net_ctx_clear(bpf_net_ctx);
+ local_bh_enable();
+ } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time));
+ bpf_reset_run_ctx(old_ctx);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4050,10 +4050,13 @@ sch_handle_ingress(struct sk_buff *skb,
+ struct net_device *orig_dev, bool *another)
+ {
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ int sch_ret;
+
+ if (!entry)
+ return skb;
++
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+@@ -4082,10 +4085,12 @@ ingress_verdict:
+ break;
+ }
+ *ret = NET_RX_SUCCESS;
++ bpf_net_ctx_clear(bpf_net_ctx);
+ return NULL;
+ case TC_ACT_SHOT:
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ *ret = NET_RX_DROP;
++ bpf_net_ctx_clear(bpf_net_ctx);
+ return NULL;
+ /* used by tc_run */
+ case TC_ACT_STOLEN:
+@@ -4095,8 +4100,10 @@ ingress_verdict:
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_RX_SUCCESS;
++ bpf_net_ctx_clear(bpf_net_ctx);
+ return NULL;
+ }
++ bpf_net_ctx_clear(bpf_net_ctx);
+
+ return skb;
+ }
+@@ -4105,11 +4112,14 @@ static __always_inline struct sk_buff *
+ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+ {
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
++
+ /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
+ * already set by the caller.
+ */
+@@ -4125,10 +4135,12 @@ egress_verdict:
+ /* No need to push/pop skb's mac_header here on egress! */
+ skb_do_redirect(skb);
+ *ret = NET_XMIT_SUCCESS;
++ bpf_net_ctx_clear(bpf_net_ctx);
+ return NULL;
+ case TC_ACT_SHOT:
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ *ret = NET_XMIT_DROP;
++ bpf_net_ctx_clear(bpf_net_ctx);
+ return NULL;
+ /* used by tc_run */
+ case TC_ACT_STOLEN:
+@@ -4138,8 +4150,10 @@ egress_verdict:
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_XMIT_SUCCESS;
++ bpf_net_ctx_clear(bpf_net_ctx);
+ return NULL;
+ }
++ bpf_net_ctx_clear(bpf_net_ctx);
+
+ return skb;
+ }
+@@ -6242,6 +6256,7 @@ static void __busy_poll_stop(struct napi
+ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
+ u16 budget)
+ {
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ bool skip_schedule = false;
+ unsigned long timeout;
+ int rc;
+@@ -6259,6 +6274,7 @@ static void busy_poll_stop(struct napi_s
+ clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
+
+ local_bh_disable();
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+
+ if (prefer_busy_poll) {
+ napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
+@@ -6281,6 +6297,7 @@ static void busy_poll_stop(struct napi_s
+ netpoll_poll_unlock(have_poll_lock);
+ if (rc == budget)
+ __busy_poll_stop(napi, skip_schedule);
++ bpf_net_ctx_clear(bpf_net_ctx);
+ local_bh_enable();
+ }
+
+@@ -6290,6 +6307,7 @@ void napi_busy_loop(unsigned int napi_id
+ {
+ unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
+ int (*napi_poll)(struct napi_struct *napi, int budget);
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ void *have_poll_lock = NULL;
+ struct napi_struct *napi;
+
+@@ -6308,6 +6326,7 @@ restart:
+ int work = 0;
+
+ local_bh_disable();
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ if (!napi_poll) {
+ unsigned long val = READ_ONCE(napi->state);
+
+@@ -6337,6 +6356,7 @@ count:
+ if (work > 0)
+ __NET_ADD_STATS(dev_net(napi->dev),
+ LINUX_MIB_BUSYPOLLRXPACKETS, work);
++ bpf_net_ctx_clear(bpf_net_ctx);
+ local_bh_enable();
+
+ if (!loop_end || loop_end(loop_end_arg, start_time))
+@@ -6725,6 +6745,7 @@ static void skb_defer_free_flush(struct
+
+ static int napi_threaded_poll(void *data)
+ {
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ struct napi_struct *napi = data;
+ struct softnet_data *sd;
+ void *have;
+@@ -6736,6 +6757,7 @@ static int napi_threaded_poll(void *data
+ bool repoll = false;
+
+ local_bh_disable();
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ sd = this_cpu_ptr(&softnet_data);
+ sd->in_napi_threaded_poll = true;
+
+@@ -6751,6 +6773,7 @@ static int napi_threaded_poll(void *data
+ net_rps_action_and_irq_enable(sd);
+ }
+ skb_defer_free_flush(sd);
++ bpf_net_ctx_clear(bpf_net_ctx);
+ local_bh_enable();
+
+ if (!repoll)
+@@ -6768,10 +6791,12 @@ static __latent_entropy void net_rx_acti
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+ unsigned long time_limit = jiffies +
+ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ int budget = READ_ONCE(netdev_budget);
+ LIST_HEAD(list);
+ LIST_HEAD(repoll);
+
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ start:
+ sd->in_net_rx_action = true;
+ local_irq_disable();
+@@ -6824,7 +6849,8 @@ start:
+ sd->in_net_rx_action = false;
+
+ net_rps_action_and_irq_enable(sd);
+-end:;
++end:
++ bpf_net_ctx_clear(bpf_net_ctx);
+ }
+
+ struct netdev_adjacent {
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2471,12 +2471,9 @@ static const struct bpf_func_proto bpf_c
+ .arg3_type = ARG_ANYTHING,
+ };
+
+-DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+-EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
+-
+ int skb_do_redirect(struct sk_buff *skb)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+ struct net *net = dev_net(skb->dev);
+ struct net_device *dev;
+ u32 flags = ri->flags;
+@@ -2512,7 +2509,7 @@ out_drop:
+
+ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+
+ if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
+ return TC_ACT_SHOT;
+@@ -2533,7 +2530,7 @@ static const struct bpf_func_proto bpf_r
+
+ BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+
+ if (unlikely(flags))
+ return TC_ACT_SHOT;
+@@ -2555,7 +2552,7 @@ static const struct bpf_func_proto bpf_r
+ BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
+ int, plen, u64, flags)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+
+ if (unlikely((plen && plen < sizeof(*params)) || flags))
+ return TC_ACT_SHOT;
+@@ -4297,30 +4294,13 @@ void xdp_do_flush(void)
+ }
+ EXPORT_SYMBOL_GPL(xdp_do_flush);
+
+-void bpf_clear_redirect_map(struct bpf_map *map)
+-{
+- struct bpf_redirect_info *ri;
+- int cpu;
+-
+- for_each_possible_cpu(cpu) {
+- ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+- /* Avoid polluting remote cacheline due to writes if
+- * not needed. Once we pass this test, we need the
+- * cmpxchg() to make sure it hasn't been changed in
+- * the meantime by remote CPU.
+- */
+- if (unlikely(READ_ONCE(ri->map) == map))
+- cmpxchg(&ri->map, map, NULL);
+- }
+-}
+-
+ DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+ EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+
+ u32 xdp_master_redirect(struct xdp_buff *xdp)
+ {
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+ struct net_device *master, *slave;
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+ slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+@@ -4392,7 +4372,7 @@ static __always_inline int __xdp_do_redi
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+- * down by bpf_clear_redirect_map()
++ * down by dev_map_free()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+@@ -4437,7 +4417,7 @@ err:
+ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+@@ -4451,7 +4431,7 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
+ int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
+ struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+@@ -4468,7 +4448,7 @@ static int xdp_do_generic_redirect_map(s
+ enum bpf_map_type map_type, u32 map_id,
+ u32 flags)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+ struct bpf_map *map;
+ int err;
+
+@@ -4480,7 +4460,7 @@ static int xdp_do_generic_redirect_map(s
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+- * down by bpf_clear_redirect_map()
++ * down by dev_map_free()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+@@ -4522,7 +4502,7 @@ err:
+ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+ struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+ enum bpf_map_type map_type = ri->map_type;
+ void *fwd = ri->tgt_value;
+ u32 map_id = ri->map_id;
+@@ -4558,7 +4538,7 @@ err:
+
+ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
+ {
+- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
+
+ if (unlikely(flags))
+ return XDP_ABORTED;
+--- a/net/core/lwt_bpf.c
++++ b/net/core/lwt_bpf.c
+@@ -38,6 +38,7 @@ static inline struct bpf_lwt *bpf_lwt_lw
+ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
+ struct dst_entry *dst, bool can_redirect)
+ {
++ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ int ret;
+
+ /* Migration disable and BH disable are needed to protect per-cpu
+@@ -45,6 +46,7 @@ static int run_lwt_bpf(struct sk_buff *s
+ */
+ migrate_disable();
+ local_bh_disable();
++ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ bpf_compute_data_pointers(skb);
+ ret = bpf_prog_run_save_cb(lwt->prog, skb);
+
+@@ -77,6 +79,7 @@ static int run_lwt_bpf(struct sk_buff *s
+ break;
+ }
+
++ bpf_net_ctx_clear(bpf_net_ctx);
+ local_bh_enable();
+ migrate_enable();
+