--- /dev/null
+From 640a604585aa30f93e39b17d4d6ba69fcb1e66c9 Mon Sep 17 00:00:00 2001
+From: Hou Tao <houtao1@huawei.com>
+Date: Sat, 29 Jul 2023 17:51:06 +0800
+Subject: bpf, cpumap: Make sure kthread is running before map update returns
+
+From: Hou Tao <houtao1@huawei.com>
+
+commit 640a604585aa30f93e39b17d4d6ba69fcb1e66c9 upstream.
+
+The following warning was reported when running stress-mode enabled
+xdp_redirect_cpu with some RT threads:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 4 PID: 65 at kernel/bpf/cpumap.c:135
+ CPU: 4 PID: 65 Comm: kworker/4:1 Not tainted 6.5.0-rc2+ #1
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+ Workqueue: events cpu_map_kthread_stop
+ RIP: 0010:put_cpu_map_entry+0xda/0x220
+ ......
+ Call Trace:
+ <TASK>
+ ? show_regs+0x65/0x70
+ ? __warn+0xa5/0x240
+ ......
+ ? put_cpu_map_entry+0xda/0x220
+ cpu_map_kthread_stop+0x41/0x60
+ process_one_work+0x6b0/0xb80
+ worker_thread+0x96/0x720
+ kthread+0x1a5/0x1f0
+ ret_from_fork+0x3a/0x70
+ ret_from_fork_asm+0x1b/0x30
+ </TASK>
+
+The root cause is the same as commit 436901649731 ("bpf: cpumap: Fix memory
+leak in cpu_map_update_elem"). The kthread is stopped prematurely by
+kthread_stop() in cpu_map_kthread_stop(), and kthread() doesn't call
+cpu_map_kthread_run() at all but XDP program has already queued some
+frames or skbs into ptr_ring. So when __cpu_map_ring_cleanup() checks
+the ptr_ring, it will find it was not emptied and report a warning.
+
+An alternative fix is to use __cpu_map_ring_cleanup() to drop these
+pending frames or skbs when kthread_stop() returns -EINTR, but it may
+confuse the user, because these frames or skbs have been handled
+correctly by XDP program. So instead of dropping these frames or skbs,
+just make sure the per-cpu kthread is running before
+__cpu_map_entry_alloc() returns.
+
+After apply the fix, the error handle for kthread_stop() will be
+unnecessary because it will always return 0, so just remove it.
+
+Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP")
+Signed-off-by: Hou Tao <houtao1@huawei.com>
+Reviewed-by: Pu Lehui <pulehui@huawei.com>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Link: https://lore.kernel.org/r/20230729095107.1722450-2-houtao@huaweicloud.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/cpumap.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -26,6 +26,7 @@
+ #include <linux/workqueue.h>
+ #include <linux/kthread.h>
+ #include <linux/capability.h>
++#include <linux/completion.h>
+ #include <trace/events/xdp.h>
+
+ #include <linux/netdevice.h> /* netif_receive_skb_list */
+@@ -70,6 +71,7 @@ struct bpf_cpu_map_entry {
+ struct rcu_head rcu;
+
+ struct work_struct kthread_stop_wq;
++ struct completion kthread_running;
+ };
+
+ struct bpf_cpu_map {
+@@ -163,7 +165,6 @@ static void put_cpu_map_entry(struct bpf
+ static void cpu_map_kthread_stop(struct work_struct *work)
+ {
+ struct bpf_cpu_map_entry *rcpu;
+- int err;
+
+ rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
+
+@@ -173,14 +174,7 @@ static void cpu_map_kthread_stop(struct
+ rcu_barrier();
+
+ /* kthread_stop will wake_up_process and wait for it to complete */
+- err = kthread_stop(rcpu->kthread);
+- if (err) {
+- /* kthread_stop may be called before cpu_map_kthread_run
+- * is executed, so we need to release the memory related
+- * to rcpu.
+- */
+- put_cpu_map_entry(rcpu);
+- }
++ kthread_stop(rcpu->kthread);
+ }
+
+ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+@@ -308,11 +302,11 @@ static int cpu_map_bpf_prog_run(struct b
+ return nframes;
+ }
+
+-
+ static int cpu_map_kthread_run(void *data)
+ {
+ struct bpf_cpu_map_entry *rcpu = data;
+
++ complete(&rcpu->kthread_running);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /* When kthread gives stop order, then rcpu have been disconnected
+@@ -475,6 +469,7 @@ __cpu_map_entry_alloc(struct bpf_map *ma
+ goto free_ptr_ring;
+
+ /* Setup kthread */
++ init_completion(&rcpu->kthread_running);
+ rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
+ "cpumap/%d/map:%d", cpu,
+ map->id);
+@@ -488,6 +483,12 @@ __cpu_map_entry_alloc(struct bpf_map *ma
+ kthread_bind(rcpu->kthread, cpu);
+ wake_up_process(rcpu->kthread);
+
++ /* Make sure kthread has been running, so kthread_stop() will not
++ * stop the kthread prematurely and all pending frames or skbs
++ * will be handled by the kthread before kthread_stop() returns.
++ */
++ wait_for_completion(&rcpu->kthread_running);
++
+ return rcpu;
+
+ free_prog:
--- /dev/null
+From 797964253d358cf8d705614dda394dbe30120223 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 3 Aug 2023 11:35:53 -0700
+Subject: file: reinstate f_pos locking optimization for regular files
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 797964253d358cf8d705614dda394dbe30120223 upstream.
+
+In commit 20ea1e7d13c1 ("file: always lock position for
+FMODE_ATOMIC_POS") we ended up always taking the file pos lock, because
+pidfd_getfd() could get a reference to the file even when it didn't have
+an elevated file count due to threading of other sharing cases.
+
+But Mateusz Guzik reports that the extra locking is actually measurable,
+so let's re-introduce the optimization, and only force the locking for
+directory traversal.
+
+Directories need the lock for correctness reasons, while regular files
+only need it for "POSIX semantics". Since pidfd_getfd() is about
+debuggers etc special things that are _way_ outside of POSIX, we can
+relax the rules for that case.
+
+Reported-by: Mateusz Guzik <mjguzik@gmail.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Link: https://lore.kernel.org/linux-fsdevel/20230803095311.ijpvhx3fyrbkasul@f/
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/file.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -1062,12 +1062,28 @@ unsigned long __fdget_raw(unsigned int f
+ return __fget_light(fd, 0);
+ }
+
++/*
++ * Try to avoid f_pos locking. We only need it if the
++ * file is marked for FMODE_ATOMIC_POS, and it can be
++ * accessed multiple ways.
++ *
++ * Always do it for directories, because pidfd_getfd()
++ * can make a file accessible even if it otherwise would
++ * not be, and for directories this is a correctness
++ * issue, not a "POSIX requirement".
++ */
++static inline bool file_needs_f_pos_lock(struct file *file)
++{
++ return (file->f_mode & FMODE_ATOMIC_POS) &&
++ (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode));
++}
++
+ unsigned long __fdget_pos(unsigned int fd)
+ {
+ unsigned long v = __fdget(fd);
+ struct file *file = (struct file *)(v & ~3);
+
+- if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
++ if (file && file_needs_f_pos_lock(file)) {
+ v |= FDPUT_POS_UNLOCK;
+ mutex_lock(&file->f_pos_lock);
+ }