From: Greg Kroah-Hartman Date: Thu, 13 Jul 2006 00:04:16 +0000 (-0700) Subject: added splice patch X-Git-Tag: v2.6.17.7~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c73a5d6c0a937a8db9131a06192cb80aa19dac91;p=thirdparty%2Fkernel%2Fstable-queue.git added splice patch --- diff --git a/queue-2.6.17/series b/queue-2.6.17/series index 1b78bd96776..da85d2d9f45 100644 --- a/queue-2.6.17/series +++ b/queue-2.6.17/series @@ -23,3 +23,4 @@ make-powernow-k7-work-on-smp-kernels.patch fix-ondemand-vs-suspend-deadlock.patch fix-powernow-k8-smp-kernel-on-up-hardware-bug.patch cdrom-fix-bad-cgc.buflen-assignment.patch +splice-fix-problems-with-sys_tee.patch diff --git a/queue-2.6.17/splice-fix-problems-with-sys_tee.patch b/queue-2.6.17/splice-fix-problems-with-sys_tee.patch new file mode 100644 index 00000000000..c07963c5865 --- /dev/null +++ b/queue-2.6.17/splice-fix-problems-with-sys_tee.patch @@ -0,0 +1,315 @@ +From aadd06e5c56b9ff5117ec77e59eada43dc46e2fc Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 10 Jul 2006 11:00:01 +0200 +Subject: [PATCH] [PATCH] splice: fix problems with sys_tee() + +Several issues noticed/fixed: + +- We cannot reliably block in link_pipe() while holding both input and output + mutexes. So do preparatory checks before locking down both mutexes and doing + the link. + +- The ipipe->nrbufs vs i check was bad, because we could have dropped the + ipipe lock in-between. This causes us to potentially look at unknown + buffers if we were racing with someone else reading this pipe. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/splice.c | 238 +++++++++++++++++++++++++++++++++--------------------------- + 1 file changed, 133 insertions(+), 105 deletions(-) + +--- linux-2.6.17.4.orig/fs/splice.c ++++ linux-2.6.17.4/fs/splice.c +@@ -1295,6 +1295,85 @@ asmlinkage long sys_splice(int fd_in, lo + } + + /* ++ * Make sure there's data to read. Wait for input if we can, otherwise ++ * return an appropriate error. ++ */ ++static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ++{ ++ int ret; ++ ++ /* ++ * Check ->nrbufs without the inode lock first. This function ++ * is speculative anyways, so missing one is ok. ++ */ ++ if (pipe->nrbufs) ++ return 0; ++ ++ ret = 0; ++ mutex_lock(&pipe->inode->i_mutex); ++ ++ while (!pipe->nrbufs) { ++ if (signal_pending(current)) { ++ ret = -ERESTARTSYS; ++ break; ++ } ++ if (!pipe->writers) ++ break; ++ if (!pipe->waiting_writers) { ++ if (flags & SPLICE_F_NONBLOCK) { ++ ret = -EAGAIN; ++ break; ++ } ++ } ++ pipe_wait(pipe); ++ } ++ ++ mutex_unlock(&pipe->inode->i_mutex); ++ return ret; ++} ++ ++/* ++ * Make sure there's writeable room. Wait for room if we can, otherwise ++ * return an appropriate error. ++ */ ++static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ++{ ++ int ret; ++ ++ /* ++ * Check ->nrbufs without the inode lock first. This function ++ * is speculative anyways, so missing one is ok. ++ */ ++ if (pipe->nrbufs < PIPE_BUFFERS) ++ return 0; ++ ++ ret = 0; ++ mutex_lock(&pipe->inode->i_mutex); ++ ++ while (pipe->nrbufs >= PIPE_BUFFERS) { ++ if (!pipe->readers) { ++ send_sig(SIGPIPE, current, 0); ++ ret = -EPIPE; ++ break; ++ } ++ if (flags & SPLICE_F_NONBLOCK) { ++ ret = -EAGAIN; ++ break; ++ } ++ if (signal_pending(current)) { ++ ret = -ERESTARTSYS; ++ break; ++ } ++ pipe->waiting_writers++; ++ pipe_wait(pipe); ++ pipe->waiting_writers--; ++ } ++ ++ mutex_unlock(&pipe->inode->i_mutex); ++ return ret; ++} ++ ++/* + * Link contents of ipipe to opipe. + */ + static int link_pipe(struct pipe_inode_info *ipipe, +@@ -1302,9 +1381,7 @@ static int link_pipe(struct pipe_inode_i + size_t len, unsigned int flags) + { + struct pipe_buffer *ibuf, *obuf; +- int ret, do_wakeup, i, ipipe_first; +- +- ret = do_wakeup = ipipe_first = 0; ++ int ret = 0, i = 0, nbuf; + + /* + * Potential ABBA deadlock, work around it by ordering lock +@@ -1312,126 +1389,62 @@ static int link_pipe(struct pipe_inode_i + * could deadlock (one doing tee from A -> B, the other from B -> A). + */ + if (ipipe->inode < opipe->inode) { +- ipipe_first = 1; +- mutex_lock(&ipipe->inode->i_mutex); +- mutex_lock(&opipe->inode->i_mutex); ++ mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); ++ mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); + } else { +- mutex_lock(&opipe->inode->i_mutex); +- mutex_lock(&ipipe->inode->i_mutex); ++ mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); ++ mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); + } + +- for (i = 0;; i++) { ++ do { + if (!opipe->readers) { + send_sig(SIGPIPE, current, 0); + if (!ret) + ret = -EPIPE; + break; + } +- if (ipipe->nrbufs - i) { +- ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); + +- /* +- * If we have room, fill this buffer +- */ +- if (opipe->nrbufs < PIPE_BUFFERS) { +- int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); +- +- /* +- * Get a reference to this pipe buffer, +- * so we can copy the contents over. +- */ +- ibuf->ops->get(ipipe, ibuf); +- +- obuf = opipe->bufs + nbuf; +- *obuf = *ibuf; +- +- /* +- * Don't inherit the gift flag, we need to +- * prevent multiple steals of this page. +- */ +- obuf->flags &= ~PIPE_BUF_FLAG_GIFT; +- +- if (obuf->len > len) +- obuf->len = len; +- +- opipe->nrbufs++; +- do_wakeup = 1; +- ret += obuf->len; +- len -= obuf->len; +- +- if (!len) +- break; +- if (opipe->nrbufs < PIPE_BUFFERS) +- continue; +- } +- +- /* +- * We have input available, but no output room. +- * If we already copied data, return that. If we +- * need to drop the opipe lock, it must be ordered +- * last to avoid deadlocks. +- */ +- if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { +- if (!ret) +- ret = -EAGAIN; +- break; +- } +- if (signal_pending(current)) { +- if (!ret) +- ret = -ERESTARTSYS; +- break; +- } +- if (do_wakeup) { +- smp_mb(); +- if (waitqueue_active(&opipe->wait)) +- wake_up_interruptible(&opipe->wait); +- kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); +- do_wakeup = 0; +- } ++ /* ++ * If we have iterated all input buffers or ran out of ++ * output room, break. ++ */ ++ if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) ++ break; + +- opipe->waiting_writers++; +- pipe_wait(opipe); +- opipe->waiting_writers--; +- continue; +- } ++ ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); ++ nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); + + /* +- * No input buffers, do the usual checks for available +- * writers and blocking and wait if necessary ++ * Get a reference to this pipe buffer, ++ * so we can copy the contents over. + */ +- if (!ipipe->writers) +- break; +- if (!ipipe->waiting_writers) { +- if (ret) +- break; +- } ++ ibuf->ops->get(ipipe, ibuf); ++ ++ obuf = opipe->bufs + nbuf; ++ *obuf = *ibuf; ++ + /* +- * pipe_wait() drops the ipipe mutex. To avoid deadlocks +- * with another process, we can only safely do that if +- * the ipipe lock is ordered last. ++ * Don't inherit the gift flag, we need to ++ * prevent multiple steals of this page. + */ +- if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { +- if (!ret) +- ret = -EAGAIN; +- break; +- } +- if (signal_pending(current)) { +- if (!ret) +- ret = -ERESTARTSYS; +- break; +- } ++ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + +- if (waitqueue_active(&ipipe->wait)) +- wake_up_interruptible_sync(&ipipe->wait); +- kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); ++ if (obuf->len > len) ++ obuf->len = len; + +- pipe_wait(ipipe); +- } ++ opipe->nrbufs++; ++ ret += obuf->len; ++ len -= obuf->len; ++ i++; ++ } while (len); + + mutex_unlock(&ipipe->inode->i_mutex); + mutex_unlock(&opipe->inode->i_mutex); + +- if (do_wakeup) { ++ /* ++ * If we put data in the output pipe, wakeup any potential readers. ++ */ ++ if (ret > 0) { + smp_mb(); + if (waitqueue_active(&opipe->wait)) + wake_up_interruptible(&opipe->wait); +@@ -1452,14 +1465,29 @@ static long do_tee(struct file *in, stru + { + struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; + struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; ++ int ret = -EINVAL; + + /* +- * Link ipipe to the two output pipes, consuming as we go along. ++ * Duplicate the contents of ipipe to opipe without actually ++ * copying the data. + */ +- if (ipipe && opipe) +- return link_pipe(ipipe, opipe, len, flags); ++ if (ipipe && opipe && ipipe != opipe) { ++ /* ++ * Keep going, unless we encounter an error. The ipipe/opipe ++ * ordering doesn't really matter. ++ */ ++ ret = link_ipipe_prep(ipipe, flags); ++ if (!ret) { ++ ret = link_opipe_prep(opipe, flags); ++ if (!ret) { ++ ret = link_pipe(ipipe, opipe, len, flags); ++ if (!ret && (flags & SPLICE_F_NONBLOCK)) ++ ret = -EAGAIN; ++ } ++ } ++ } + +- return -EINVAL; ++ return ret; + } + + asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)