]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
pipe_read: don't wake up the writer if the pipe is still full
authorOleg Nesterov <oleg@redhat.com>
Thu, 2 Jan 2025 14:07:15 +0000 (15:07 +0100)
committerChristian Brauner <brauner@kernel.org>
Sat, 4 Jan 2025 09:12:19 +0000 (10:12 +0100)
wake_up(pipe->wr_wait) makes no sense if pipe_full() is still true after
the reading, the writer sleeping in wait_event(wr_wait, pipe_writable())
will check the pipe_writable() == !pipe_full() condition and sleep again.

Only wake the writer if we actually released a pipe buf, and the pipe was
full before we did so.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/all/20241229135737.GA3293@redhat.com/
Link: https://lore.kernel.org/r/20250102140715.GA7091@redhat.com
Reported-by: WangYuli <wangyuli@uniontech.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/pipe.c

index 12b22c2723b7ead48f8ffcaecee869631c68dd05..82fede0f2111e8f597de73b28d9fe4bb3ffd110a 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -253,7 +253,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
        size_t total_len = iov_iter_count(to);
        struct file *filp = iocb->ki_filp;
        struct pipe_inode_info *pipe = filp->private_data;
-       bool was_full, wake_next_reader = false;
+       bool wake_writer = false, wake_next_reader = false;
        ssize_t ret;
 
        /* Null read succeeds. */
@@ -264,14 +264,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
        mutex_lock(&pipe->mutex);
 
        /*
-        * We only wake up writers if the pipe was full when we started
-        * reading in order to avoid unnecessary wakeups.
+        * We only wake up writers if the pipe was full when we started reading
+        * and it is no longer full after reading to avoid unnecessary wakeups.
         *
         * But when we do wake up writers, we do so using a sync wakeup
         * (WF_SYNC), because we want them to get going and generate more
         * data for us.
         */
-       was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
        for (;;) {
                /* Read ->head with a barrier vs post_one_notification() */
                unsigned int head = smp_load_acquire(&pipe->head);
@@ -340,8 +339,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                                buf->len = 0;
                        }
 
-                       if (!buf->len)
+                       if (!buf->len) {
+                               wake_writer |= pipe_full(head, tail, pipe->max_usage);
                                tail = pipe_update_tail(pipe, buf, tail);
+                       }
                        total_len -= chars;
                        if (!total_len)
                                break;  /* common path: read succeeded */
@@ -377,7 +378,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                 * _very_ unlikely case that the pipe was full, but we got
                 * no data.
                 */
-               if (unlikely(was_full))
+               if (unlikely(wake_writer))
                        wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 
@@ -390,15 +391,15 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
                        return -ERESTARTSYS;
 
-               mutex_lock(&pipe->mutex);
-               was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+               wake_writer = false;
                wake_next_reader = true;
+               mutex_lock(&pipe->mutex);
        }
        if (pipe_empty(pipe->head, pipe->tail))
                wake_next_reader = false;
        mutex_unlock(&pipe->mutex);
 
-       if (was_full)
+       if (wake_writer)
                wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
        if (wake_next_reader)
                wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);