]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
.27 patch
authorGreg Kroah-Hartman <gregkh@suse.de>
Sat, 14 Feb 2009 22:01:00 +0000 (14:01 -0800)
committerGreg Kroah-Hartman <gregkh@suse.de>
Sat, 14 Feb 2009 22:01:00 +0000 (14:01 -0800)
review-2.6.27/net-fix-data-corruption-when-splicing-from-sockets.patch [new file with mode: 0644]
review-2.6.27/series

diff --git a/review-2.6.27/net-fix-data-corruption-when-splicing-from-sockets.patch b/review-2.6.27/net-fix-data-corruption-when-splicing-from-sockets.patch
new file mode 100644 (file)
index 0000000..86755bc
--- /dev/null
@@ -0,0 +1,202 @@
+From 8b9d3728977760f6bd1317c4420890f73695354e Mon Sep 17 00:00:00 2001
+From: Jarek Poplawski <jarkao2@gmail.com>
+Date: Mon, 19 Jan 2009 17:03:56 -0800
+Subject: net: Fix data corruption when splicing from sockets.
+
+From: Jarek Poplawski <jarkao2@gmail.com>
+
+[ Upstream commit 8b9d3728977760f6bd1317c4420890f73695354e ]
+
+The trick in socket splicing where we try to convert the skb->data
+into a page based reference using virt_to_page() does not work so
+well.
+
+The idea is to pass the virt_to_page() reference via the pipe
+buffer, and refcount the buffer using a SKB reference.
+
+But if we are splicing from a socket to a socket (via sendpage)
+this doesn't work.
+
+The from side processing will grab the page (and SKB) references.
+The sendpage() calls will grab page references only, return, and
+then the from side processing completes and drops the SKB ref.
+
+The page based reference to skb->data is not enough to keep the
+kmalloc() buffer backing it from being reused.  Yet, that is
+all that the socket send side has at this point.
+
+This leads to data corruption if the skb->data buffer is reused
+by SLAB before the send side socket actually gets the TX packet
+out to the device.
+
+The fix employed here is to simply allocate a page and copy the
+skb->data bytes into that page.
+
+This will hurt performance, but there is no clear way to fix this
+properly without a copy at the present time, and it is important
+to get rid of the data corruption.
+
+With fixes from Herbert Xu.
+
+Tested-by: Willy Tarreau <w@1wt.eu>
+Foreseen-by: Changli Gao <xiaosuo@gmail.com>
+Diagnosed-by: Willy Tarreau <w@1wt.eu>
+Reported-by: Willy Tarreau <w@1wt.eu>
+Fixed-by: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ net/core/skbuff.c |   61 +++++++++++++++++++++++++-----------------------------
+ 1 file changed, 29 insertions(+), 32 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -73,17 +73,13 @@ static struct kmem_cache *skbuff_fclone_
+ static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+                                 struct pipe_buffer *buf)
+ {
+-      struct sk_buff *skb = (struct sk_buff *) buf->private;
+-
+-      kfree_skb(skb);
++      put_page(buf->page);
+ }
+ static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
+                               struct pipe_buffer *buf)
+ {
+-      struct sk_buff *skb = (struct sk_buff *) buf->private;
+-
+-      skb_get(skb);
++      get_page(buf->page);
+ }
+ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
+@@ -1262,9 +1258,19 @@ fault:
+  */
+ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+ {
+-      struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
++      put_page(spd->pages[i]);
++}
+-      kfree_skb(skb);
++static inline struct page *linear_to_page(struct page *page, unsigned int len,
++                                        unsigned int offset)
++{
++      struct page *p = alloc_pages(GFP_KERNEL, 0);
++
++      if (!p)
++              return NULL;
++      memcpy(page_address(p) + offset, page_address(page) + offset, len);
++
++      return p;
+ }
+ /*
+@@ -1272,16 +1278,23 @@ static void sock_spd_release(struct spli
+  */
+ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+                               unsigned int len, unsigned int offset,
+-                              struct sk_buff *skb)
++                              struct sk_buff *skb, int linear)
+ {
+       if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+               return 1;
++      if (linear) {
++              page = linear_to_page(page, len, offset);
++              if (!page)
++                      return 1;
++      } else
++              get_page(page);
++
+       spd->pages[spd->nr_pages] = page;
+       spd->partial[spd->nr_pages].len = len;
+       spd->partial[spd->nr_pages].offset = offset;
+-      spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
+       spd->nr_pages++;
++
+       return 0;
+ }
+@@ -1297,7 +1310,7 @@ static inline void __segment_seek(struct
+ static inline int __splice_segment(struct page *page, unsigned int poff,
+                                  unsigned int plen, unsigned int *off,
+                                  unsigned int *len, struct sk_buff *skb,
+-                                 struct splice_pipe_desc *spd)
++                                 struct splice_pipe_desc *spd, int linear)
+ {
+       if (!*len)
+               return 1;
+@@ -1320,7 +1333,7 @@ static inline int __splice_segment(struc
+               /* the linear region may spread across several pages  */
+               flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
+-              if (spd_fill_page(spd, page, flen, poff, skb))
++              if (spd_fill_page(spd, page, flen, poff, skb, linear))
+                       return 1;
+               __segment_seek(&page, &poff, &plen, flen);
+@@ -1347,7 +1360,7 @@ static int __skb_splice_bits(struct sk_b
+       if (__splice_segment(virt_to_page(skb->data),
+                            (unsigned long) skb->data & (PAGE_SIZE - 1),
+                            skb_headlen(skb),
+-                           offset, len, skb, spd))
++                           offset, len, skb, spd, 1))
+               return 1;
+       /*
+@@ -1357,7 +1370,7 @@ static int __skb_splice_bits(struct sk_b
+               const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+               if (__splice_segment(f->page, f->page_offset, f->size,
+-                                   offset, len, skb, spd))
++                                   offset, len, skb, spd, 0))
+                       return 1;
+       }
+@@ -1370,7 +1383,7 @@ static int __skb_splice_bits(struct sk_b
+  * the frag list, if such a thing exists. We'd probably need to recurse to
+  * handle that cleanly.
+  */
+-int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
++int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+                   struct pipe_inode_info *pipe, unsigned int tlen,
+                   unsigned int flags)
+ {
+@@ -1383,16 +1396,6 @@ int skb_splice_bits(struct sk_buff *__sk
+               .ops = &sock_pipe_buf_ops,
+               .spd_release = sock_spd_release,
+       };
+-      struct sk_buff *skb;
+-
+-      /*
+-       * I'd love to avoid the clone here, but tcp_read_sock()
+-       * ignores reference counts and unconditonally kills the sk_buff
+-       * on return from the actor.
+-       */
+-      skb = skb_clone(__skb, GFP_KERNEL);
+-      if (unlikely(!skb))
+-              return -ENOMEM;
+       /*
+        * __skb_splice_bits() only fails if the output has no room left,
+@@ -1416,15 +1419,9 @@ int skb_splice_bits(struct sk_buff *__sk
+       }
+ done:
+-      /*
+-       * drop our reference to the clone, the pipe consumption will
+-       * drop the rest.
+-       */
+-      kfree_skb(skb);
+-
+       if (spd.nr_pages) {
++              struct sock *sk = skb->sk;
+               int ret;
+-              struct sock *sk = __skb->sk;
+               /*
+                * Drop the socket lock, otherwise we have reverse
index 50ba6a2665a5d0722829c152a6ee80a0257fa2d4..4a8c8546d560320c9c813321e8c13d2af96d9050 100644 (file)
@@ -45,3 +45,4 @@ ide-libata-fix-ata_id_is_cfa.patch
 x86-fixup-config-space-size-of-cpu-functions-for-amd-family-11h.patch
 mac80211-fix-a-buffer-overrun-in-station-debug-code.patch
 alsa-mtpav-fix-initial-value-for-input-hwport.patch
+net-fix-data-corruption-when-splicing-from-sockets.patch