]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Aug 2023 07:57:42 +0000 (09:57 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Aug 2023 07:57:42 +0000 (09:57 +0200)
added patches:
bpf-cpumap-make-sure-kthread-is-running-before-map-update-returns.patch
file-reinstate-f_pos-locking-optimization-for-regular-files.patch

queue-5.15/bpf-cpumap-make-sure-kthread-is-running-before-map-update-returns.patch [new file with mode: 0644]
queue-5.15/file-reinstate-f_pos-locking-optimization-for-regular-files.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/bpf-cpumap-make-sure-kthread-is-running-before-map-update-returns.patch b/queue-5.15/bpf-cpumap-make-sure-kthread-is-running-before-map-update-returns.patch
new file mode 100644 (file)
index 0000000..0220043
--- /dev/null
@@ -0,0 +1,137 @@
+From 640a604585aa30f93e39b17d4d6ba69fcb1e66c9 Mon Sep 17 00:00:00 2001
+From: Hou Tao <houtao1@huawei.com>
+Date: Sat, 29 Jul 2023 17:51:06 +0800
+Subject: bpf, cpumap: Make sure kthread is running before map update returns
+
+From: Hou Tao <houtao1@huawei.com>
+
+commit 640a604585aa30f93e39b17d4d6ba69fcb1e66c9 upstream.
+
+The following warning was reported when running stress-mode enabled
+xdp_redirect_cpu with some RT threads:
+
+  ------------[ cut here ]------------
+  WARNING: CPU: 4 PID: 65 at kernel/bpf/cpumap.c:135
+  CPU: 4 PID: 65 Comm: kworker/4:1 Not tainted 6.5.0-rc2+ #1
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+  Workqueue: events cpu_map_kthread_stop
+  RIP: 0010:put_cpu_map_entry+0xda/0x220
+  ......
+  Call Trace:
+   <TASK>
+   ? show_regs+0x65/0x70
+   ? __warn+0xa5/0x240
+   ......
+   ? put_cpu_map_entry+0xda/0x220
+   cpu_map_kthread_stop+0x41/0x60
+   process_one_work+0x6b0/0xb80
+   worker_thread+0x96/0x720
+   kthread+0x1a5/0x1f0
+   ret_from_fork+0x3a/0x70
+   ret_from_fork_asm+0x1b/0x30
+   </TASK>
+
+The root cause is the same as commit 436901649731 ("bpf: cpumap: Fix memory
+leak in cpu_map_update_elem"). The kthread is stopped prematurely by
+kthread_stop() in cpu_map_kthread_stop(), and kthread() doesn't call
+cpu_map_kthread_run() at all but XDP program has already queued some
+frames or skbs into ptr_ring. So when __cpu_map_ring_cleanup() checks
+the ptr_ring, it will find it was not emptied and report a warning.
+
+An alternative fix is to use __cpu_map_ring_cleanup() to drop these
+pending frames or skbs when kthread_stop() returns -EINTR, but it may
+confuse the user, because these frames or skbs have been handled
+correctly by XDP program. So instead of dropping these frames or skbs,
+just make sure the per-cpu kthread is running before
+__cpu_map_entry_alloc() returns.
+
+After apply the fix, the error handle for kthread_stop() will be
+unnecessary because it will always return 0, so just remove it.
+
+Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP")
+Signed-off-by: Hou Tao <houtao1@huawei.com>
+Reviewed-by: Pu Lehui <pulehui@huawei.com>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Link: https://lore.kernel.org/r/20230729095107.1722450-2-houtao@huaweicloud.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/cpumap.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -26,6 +26,7 @@
+ #include <linux/workqueue.h>
+ #include <linux/kthread.h>
+ #include <linux/capability.h>
++#include <linux/completion.h>
+ #include <trace/events/xdp.h>
+ #include <linux/netdevice.h>   /* netif_receive_skb_list */
+@@ -70,6 +71,7 @@ struct bpf_cpu_map_entry {
+       struct rcu_head rcu;
+       struct work_struct kthread_stop_wq;
++      struct completion kthread_running;
+ };
+ struct bpf_cpu_map {
+@@ -163,7 +165,6 @@ static void put_cpu_map_entry(struct bpf
+ static void cpu_map_kthread_stop(struct work_struct *work)
+ {
+       struct bpf_cpu_map_entry *rcpu;
+-      int err;
+       rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
+@@ -173,14 +174,7 @@ static void cpu_map_kthread_stop(struct
+       rcu_barrier();
+       /* kthread_stop will wake_up_process and wait for it to complete */
+-      err = kthread_stop(rcpu->kthread);
+-      if (err) {
+-              /* kthread_stop may be called before cpu_map_kthread_run
+-               * is executed, so we need to release the memory related
+-               * to rcpu.
+-               */
+-              put_cpu_map_entry(rcpu);
+-      }
++      kthread_stop(rcpu->kthread);
+ }
+ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+@@ -308,11 +302,11 @@ static int cpu_map_bpf_prog_run(struct b
+       return nframes;
+ }
+-
+ static int cpu_map_kthread_run(void *data)
+ {
+       struct bpf_cpu_map_entry *rcpu = data;
++      complete(&rcpu->kthread_running);
+       set_current_state(TASK_INTERRUPTIBLE);
+       /* When kthread gives stop order, then rcpu have been disconnected
+@@ -475,6 +469,7 @@ __cpu_map_entry_alloc(struct bpf_map *ma
+               goto free_ptr_ring;
+       /* Setup kthread */
++      init_completion(&rcpu->kthread_running);
+       rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
+                                              "cpumap/%d/map:%d", cpu,
+                                              map->id);
+@@ -488,6 +483,12 @@ __cpu_map_entry_alloc(struct bpf_map *ma
+       kthread_bind(rcpu->kthread, cpu);
+       wake_up_process(rcpu->kthread);
++      /* Make sure kthread has been running, so kthread_stop() will not
++       * stop the kthread prematurely and all pending frames or skbs
++       * will be handled by the kthread before kthread_stop() returns.
++       */
++      wait_for_completion(&rcpu->kthread_running);
++
+       return rcpu;
+ free_prog:
diff --git a/queue-5.15/file-reinstate-f_pos-locking-optimization-for-regular-files.patch b/queue-5.15/file-reinstate-f_pos-locking-optimization-for-regular-files.patch
new file mode 100644 (file)
index 0000000..d1d7701
--- /dev/null
@@ -0,0 +1,64 @@
+From 797964253d358cf8d705614dda394dbe30120223 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 3 Aug 2023 11:35:53 -0700
+Subject: file: reinstate f_pos locking optimization for regular files
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 797964253d358cf8d705614dda394dbe30120223 upstream.
+
+In commit 20ea1e7d13c1 ("file: always lock position for
+FMODE_ATOMIC_POS") we ended up always taking the file pos lock, because
+pidfd_getfd() could get a reference to the file even when it didn't have
+an elevated file count due to threading of other sharing cases.
+
+But Mateusz Guzik reports that the extra locking is actually measurable,
+so let's re-introduce the optimization, and only force the locking for
+directory traversal.
+
+Directories need the lock for correctness reasons, while regular files
+only need it for "POSIX semantics".  Since pidfd_getfd() is about
+debuggers etc special things that are _way_ outside of POSIX, we can
+relax the rules for that case.
+
+Reported-by: Mateusz Guzik <mjguzik@gmail.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Link: https://lore.kernel.org/linux-fsdevel/20230803095311.ijpvhx3fyrbkasul@f/
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/file.c |   18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -1062,12 +1062,28 @@ unsigned long __fdget_raw(unsigned int f
+       return __fget_light(fd, 0);
+ }
++/*
++ * Try to avoid f_pos locking. We only need it if the
++ * file is marked for FMODE_ATOMIC_POS, and it can be
++ * accessed multiple ways.
++ *
++ * Always do it for directories, because pidfd_getfd()
++ * can make a file accessible even if it otherwise would
++ * not be, and for directories this is a correctness
++ * issue, not a "POSIX requirement".
++ */
++static inline bool file_needs_f_pos_lock(struct file *file)
++{
++      return (file->f_mode & FMODE_ATOMIC_POS) &&
++              (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode));
++}
++
+ unsigned long __fdget_pos(unsigned int fd)
+ {
+       unsigned long v = __fdget(fd);
+       struct file *file = (struct file *)(v & ~3);
+-      if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
++      if (file && file_needs_f_pos_lock(file)) {
+               v |= FDPUT_POS_UNLOCK;
+               mutex_lock(&file->f_pos_lock);
+       }
index b4deb2138cf66746eeca7944cf6d009ee80ef804..91344e15e73f1957aa02afa6d13cbc86e04e9638 100644 (file)
@@ -69,3 +69,5 @@ rbd-prevent-busy-loop-when-requesting-exclusive-lock.patch
 bpf-disable-preemption-in-bpf_event_output.patch
 open-make-resolve_cached-correctly-test-for-o_tmpfile.patch
 drm-ttm-check-null-pointer-before-accessing-when-swapping.patch
+bpf-cpumap-make-sure-kthread-is-running-before-map-update-returns.patch
+file-reinstate-f_pos-locking-optimization-for-regular-files.patch