]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 26 Nov 2013 22:35:59 +0000 (14:35 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 26 Nov 2013 22:35:59 +0000 (14:35 -0800)
added patches:
mei-nfc-fix-memory-leak-in-error-path.patch
powerpc-52xx-fix-build-breakage-for-mpc5200-lpbfifo-module.patch
powerpc-powernv-add-pe-to-its-own-peltv.patch
powerpc-ppc64-address-space-capped-at-32tb-mmap-randomisation-disabled.patch
powerpc-signals-mark-vsx-not-saved-with-small-contexts.patch
powerpc-vio-use-strcpy-in-modalias_show.patch
slub-handle-null-parameter-in-kmem_cache_flags.patch
sunrpc-fix-a-data-corruption-issue-when-retransmitting-rpc-calls.patch

queue-3.10/mei-nfc-fix-memory-leak-in-error-path.patch [new file with mode: 0644]
queue-3.10/powerpc-52xx-fix-build-breakage-for-mpc5200-lpbfifo-module.patch [new file with mode: 0644]
queue-3.10/powerpc-powernv-add-pe-to-its-own-peltv.patch [new file with mode: 0644]
queue-3.10/powerpc-ppc64-address-space-capped-at-32tb-mmap-randomisation-disabled.patch [new file with mode: 0644]
queue-3.10/powerpc-signals-mark-vsx-not-saved-with-small-contexts.patch [new file with mode: 0644]
queue-3.10/powerpc-vio-use-strcpy-in-modalias_show.patch [new file with mode: 0644]
queue-3.10/series
queue-3.10/slub-handle-null-parameter-in-kmem_cache_flags.patch [new file with mode: 0644]
queue-3.10/sunrpc-fix-a-data-corruption-issue-when-retransmitting-rpc-calls.patch [new file with mode: 0644]

diff --git a/queue-3.10/mei-nfc-fix-memory-leak-in-error-path.patch b/queue-3.10/mei-nfc-fix-memory-leak-in-error-path.patch
new file mode 100644 (file)
index 0000000..da1567e
--- /dev/null
@@ -0,0 +1,50 @@
+From 4bff7208f332b2b1d7cf1338e50527441283a198 Mon Sep 17 00:00:00 2001
+From: Tomas Winkler <tomas.winkler@intel.com>
+Date: Mon, 21 Oct 2013 22:05:38 +0300
+Subject: mei: nfc: fix memory leak in error path
+
+From: Tomas Winkler <tomas.winkler@intel.com>
+
+commit 4bff7208f332b2b1d7cf1338e50527441283a198 upstream.
+
+The flow may reach the err label without freeing cl and cl_info
+
+cl and cl_info weren't assigned to ndev->cl and cl_info
+so they weren't freed in mei_nfc_free called on error path
+
+Cc: Samuel Ortiz <sameo@linux.intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/nfc.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/misc/mei/nfc.c
++++ b/drivers/misc/mei/nfc.c
+@@ -485,8 +485,11 @@ int mei_nfc_host_init(struct mei_device
+       if (ndev->cl_info)
+               return 0;
+-      cl_info = mei_cl_allocate(dev);
+-      cl = mei_cl_allocate(dev);
++      ndev->cl_info = mei_cl_allocate(dev);
++      ndev->cl = mei_cl_allocate(dev);
++
++      cl = ndev->cl;
++      cl_info = ndev->cl_info;
+       if (!cl || !cl_info) {
+               ret = -ENOMEM;
+@@ -527,10 +530,9 @@ int mei_nfc_host_init(struct mei_device
+       cl->device_uuid = mei_nfc_guid;
++
+       list_add_tail(&cl->device_link, &dev->device_list);
+-      ndev->cl_info = cl_info;
+-      ndev->cl = cl;
+       ndev->req_id = 1;
+       INIT_WORK(&ndev->init_work, mei_nfc_init);
diff --git a/queue-3.10/powerpc-52xx-fix-build-breakage-for-mpc5200-lpbfifo-module.patch b/queue-3.10/powerpc-52xx-fix-build-breakage-for-mpc5200-lpbfifo-module.patch
new file mode 100644 (file)
index 0000000..68ace9c
--- /dev/null
@@ -0,0 +1,29 @@
+From 2bf75084f6d9f9a91ba6e30a501ff070d8a1acf6 Mon Sep 17 00:00:00 2001
+From: Anatolij Gustschin <agust@denx.de>
+Date: Wed, 16 Oct 2013 13:11:27 +0200
+Subject: powerpc/52xx: fix build breakage for MPC5200 LPBFIFO module
+
+From: Anatolij Gustschin <agust@denx.de>
+
+commit 2bf75084f6d9f9a91ba6e30a501ff070d8a1acf6 upstream.
+
+The MPC5200 LPBFIFO driver requires the bestcomm module to be
+enabled, otherwise building will fail. Fix it.
+
+Reported-by: Wolfgang Denk <wd@denx.de>
+Signed-off-by: Anatolij Gustschin <agust@denx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/52xx/Kconfig |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/platforms/52xx/Kconfig
++++ b/arch/powerpc/platforms/52xx/Kconfig
+@@ -57,5 +57,5 @@ config PPC_MPC5200_BUGFIX
+ config PPC_MPC5200_LPBFIFO
+       tristate "MPC5200 LocalPlus bus FIFO driver"
+-      depends on PPC_MPC52xx
++      depends on PPC_MPC52xx && PPC_BESTCOMM
+       select PPC_BESTCOMM_GEN_BD
diff --git a/queue-3.10/powerpc-powernv-add-pe-to-its-own-peltv.patch b/queue-3.10/powerpc-powernv-add-pe-to-its-own-peltv.patch
new file mode 100644 (file)
index 0000000..8a0d6fa
--- /dev/null
@@ -0,0 +1,50 @@
+From 631ad691b5818291d89af9be607d2fe40be0886e Mon Sep 17 00:00:00 2001
+From: Gavin Shan <shangw@linux.vnet.ibm.com>
+Date: Mon, 4 Nov 2013 16:32:46 +0800
+Subject: powerpc/powernv: Add PE to its own PELTV
+
+From: Gavin Shan <shangw@linux.vnet.ibm.com>
+
+commit 631ad691b5818291d89af9be607d2fe40be0886e upstream.
+
+We need add PE to its own PELTV. Otherwise, the errors originated
+from the PE might contribute to other PEs. In the result, we can't
+clear up the error successfully even we're checking and clearing
+errors during access to PCI config space.
+
+Reported-by: kalshett@in.ibm.com
+Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/pci-ioda.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -151,13 +151,23 @@ static int pnv_ioda_configure_pe(struct
+               rid_end = pe->rid + 1;
+       }
+-      /* Associate PE in PELT */
++      /*
++       * Associate PE in PELT. We need add the PE into the
++       * corresponding PELT-V as well. Otherwise, the error
++       * originated from the PE might contribute to other
++       * PEs.
++       */
+       rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+                            bcomp, dcomp, fcomp, OPAL_MAP_PE);
+       if (rc) {
+               pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+               return -ENXIO;
+       }
++
++      rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
++                              pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
++      if (rc)
++              pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
+       opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+                                 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
diff --git a/queue-3.10/powerpc-ppc64-address-space-capped-at-32tb-mmap-randomisation-disabled.patch b/queue-3.10/powerpc-ppc64-address-space-capped-at-32tb-mmap-randomisation-disabled.patch
new file mode 100644 (file)
index 0000000..d34c2e6
--- /dev/null
@@ -0,0 +1,39 @@
+From 5a049f14902982c26538250bdc8d54156d357252 Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@samba.org>
+Date: Mon, 18 Nov 2013 14:55:28 +1100
+Subject: powerpc: ppc64 address space capped at 32TB, mmap randomisation disabled
+
+From: Anton Blanchard <anton@samba.org>
+
+commit 5a049f14902982c26538250bdc8d54156d357252 upstream.
+
+Commit fba2369e6ceb (mm: use vm_unmapped_area() on powerpc architecture)
+has a bug in slice_scan_available() where we compare an unsigned long
+(high_slices) against a shifted int. As a result, comparisons against
+the top 32 bits of high_slices (representing the top 32TB) always
+returns 0 and the top of our mmap region is clamped at 32TB
+
+This also breaks mmap randomisation since the randomised address is
+always up near the top of the address space and it gets clamped down
+to 32TB.
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Acked-by: Michel Lespinasse <walken@google.com>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/slice.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/mm/slice.c
++++ b/arch/powerpc/mm/slice.c
+@@ -258,7 +258,7 @@ static bool slice_scan_available(unsigne
+               slice = GET_HIGH_SLICE_INDEX(addr);
+               *boundary_addr = (slice + end) ?
+                       ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
+-              return !!(available.high_slices & (1u << slice));
++              return !!(available.high_slices & (1ul << slice));
+       }
+ }
diff --git a/queue-3.10/powerpc-signals-mark-vsx-not-saved-with-small-contexts.patch b/queue-3.10/powerpc-signals-mark-vsx-not-saved-with-small-contexts.patch
new file mode 100644 (file)
index 0000000..03844c6
--- /dev/null
@@ -0,0 +1,58 @@
+From c13f20ac48328b05cd3b8c19e31ed6c132b44b42 Mon Sep 17 00:00:00 2001
+From: Michael Neuling <mikey@neuling.org>
+Date: Wed, 20 Nov 2013 16:18:54 +1100
+Subject: powerpc/signals: Mark VSX not saved with small contexts
+
+From: Michael Neuling <mikey@neuling.org>
+
+commit c13f20ac48328b05cd3b8c19e31ed6c132b44b42 upstream.
+
+The VSX MSR bit in the user context indicates if the context contains VSX
+state.  Currently we set this when the process has touched VSX at any stage.
+
+Unfortunately, if the user has not provided enough space to save the VSX state,
+we can't save it but we currently still set the MSR VSX bit.
+
+This patch changes this to clear the MSR VSX bit when the user doesn't provide
+enough space.  This indicates that there is no valid VSX state in the user
+context.
+
+This is needed to support get/set/make/swapcontext for applications that use
+VSX but only provide a small context.  For example, getcontext in glibc
+provides a smaller context since the VSX registers don't need to be saved over
+the glibc function call.  But since the program calling getcontext may have
+used VSX, the kernel currently says the VSX state is valid when it's not.  If
+the returned context is then used in setcontext (ie. a small context without
+VSX but with MSR VSX set), the kernel will refuse the context.  This situation
+has been reported by the glibc community.
+
+Based on patch from Carlos O'Donell.
+
+Tested-by: Haren Myneni <haren@linux.vnet.ibm.com>
+Signed-off-by: Michael Neuling <mikey@neuling.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/signal_32.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kernel/signal_32.c
++++ b/arch/powerpc/kernel/signal_32.c
+@@ -454,7 +454,15 @@ static int save_user_regs(struct pt_regs
+               if (copy_vsx_to_user(&frame->mc_vsregs, current))
+                       return 1;
+               msr |= MSR_VSX;
+-      }
++      } else if (!ctx_has_vsx_region)
++              /*
++               * With a small context structure we can't hold the VSX
++               * registers, hence clear the MSR value to indicate the state
++               * was not saved.
++               */
++              msr &= ~MSR_VSX;
++
++
+ #endif /* CONFIG_VSX */
+ #ifdef CONFIG_SPE
+       /* save spe registers */
diff --git a/queue-3.10/powerpc-vio-use-strcpy-in-modalias_show.patch b/queue-3.10/powerpc-vio-use-strcpy-in-modalias_show.patch
new file mode 100644 (file)
index 0000000..ace89b4
--- /dev/null
@@ -0,0 +1,39 @@
+From 411cabf79e684171669ad29a0628c400b4431e95 Mon Sep 17 00:00:00 2001
+From: Prarit Bhargava <prarit@redhat.com>
+Date: Thu, 17 Oct 2013 08:00:11 -0400
+Subject: powerpc/vio: use strcpy in modalias_show
+
+From: Prarit Bhargava <prarit@redhat.com>
+
+commit 411cabf79e684171669ad29a0628c400b4431e95 upstream.
+
+Commit e82b89a6f19bae73fb064d1b3dd91fcefbb478f4 used strcat instead of
+strcpy which can result in an overflow of newlines on the buffer.
+
+Signed-off-by: Prarit Bhargava
+Cc: benh@kernel.crashing.org
+Cc: ben@decadent.org.uk
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/vio.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/vio.c
++++ b/arch/powerpc/kernel/vio.c
+@@ -1530,12 +1530,12 @@ static ssize_t modalias_show(struct devi
+       dn = dev->of_node;
+       if (!dn) {
+-              strcat(buf, "\n");
++              strcpy(buf, "\n");
+               return strlen(buf);
+       }
+       cp = of_get_property(dn, "compatible", NULL);
+       if (!cp) {
+-              strcat(buf, "\n");
++              strcpy(buf, "\n");
+               return strlen(buf);
+       }
index 9095e32d6283f962ed9f2c5dff41d666d4eae993..6ab1871defd5f4355fdb5745afb9e69e785f9a4d 100644 (file)
@@ -62,3 +62,11 @@ hwmon-lm90-fix-max6696-alarm-handling.patch
 block-fix-race-between-request-completion-and-timeout-handling.patch
 block-fix-a-probe-argument-to-blk_register_region.patch
 block-properly-stack-underlying-max_segment_size-to-dm-device.patch
+powerpc-52xx-fix-build-breakage-for-mpc5200-lpbfifo-module.patch
+powerpc-vio-use-strcpy-in-modalias_show.patch
+powerpc-powernv-add-pe-to-its-own-peltv.patch
+powerpc-ppc64-address-space-capped-at-32tb-mmap-randomisation-disabled.patch
+powerpc-signals-mark-vsx-not-saved-with-small-contexts.patch
+slub-handle-null-parameter-in-kmem_cache_flags.patch
+sunrpc-fix-a-data-corruption-issue-when-retransmitting-rpc-calls.patch
+mei-nfc-fix-memory-leak-in-error-path.patch
diff --git a/queue-3.10/slub-handle-null-parameter-in-kmem_cache_flags.patch b/queue-3.10/slub-handle-null-parameter-in-kmem_cache_flags.patch
new file mode 100644 (file)
index 0000000..2afadad
--- /dev/null
@@ -0,0 +1,92 @@
+From c6f58d9b362b45c52afebe4342c9137d0dabe47f Mon Sep 17 00:00:00 2001
+From: Christoph Lameter <cl@linux.com>
+Date: Thu, 7 Nov 2013 16:29:15 +0000
+Subject: slub: Handle NULL parameter in kmem_cache_flags
+
+From: Christoph Lameter <cl@linux.com>
+
+commit c6f58d9b362b45c52afebe4342c9137d0dabe47f upstream.
+
+Andreas Herrmann writes:
+
+  When I've used slub_debug kernel option (e.g.
+  "slub_debug=,skbuff_fclone_cache" or similar) on a debug session I've
+  seen a panic like:
+
+    Highbank #setenv bootargs console=ttyAMA0 root=/dev/sda2 kgdboc.kgdboc=ttyAMA0,115200 slub_debug=,kmalloc-4096 earlyprintk=ttyAMA0
+    ...
+    Unable to handle kernel NULL pointer dereference at virtual address 00000000
+    pgd = c0004000
+    [00000000] *pgd=00000000
+    Internal error: Oops: 5 [#1] SMP ARM
+    Modules linked in:
+    CPU: 0 PID: 0 Comm: swapper Tainted: G        W    3.12.0-00048-gbe408cd #314
+    task: c0898360 ti: c088a000 task.ti: c088a000
+    PC is at strncmp+0x1c/0x84
+    LR is at kmem_cache_flags.isra.46.part.47+0x44/0x60
+    pc : [<c02c6da0>]    lr : [<c0110a3c>]    psr: 200001d3
+    sp : c088bea8  ip : c088beb8  fp : c088beb4
+    r10: 00000000  r9 : 413fc090  r8 : 00000001
+    r7 : 00000000  r6 : c2984a08  r5 : c0966e78  r4 : 00000000
+    r3 : 0000006b  r2 : 0000000c  r1 : 00000000  r0 : c2984a08
+    Flags: nzCv  IRQs off  FIQs off  Mode SVC_32  ISA ARM  Segment kernel
+    Control: 10c5387d  Table: 0000404a  DAC: 00000015
+    Process swapper (pid: 0, stack limit = 0xc088a248)
+    Stack: (0xc088bea8 to 0xc088c000)
+    bea0:                   c088bed4 c088beb8 c0110a3c c02c6d90 c0966e78 00000040
+    bec0: ef001f00 00000040 c088bf14 c088bed8 c0112070 c0110a04 00000005 c010fac8
+    bee0: c088bf5c c088bef0 c010fac8 ef001f00 00000040 00000000 00000040 00000001
+    bf00: 413fc090 00000000 c088bf34 c088bf18 c0839190 c0112040 00000000 ef001f00
+    bf20: 00000000 00000000 c088bf54 c088bf38 c0839200 c083914c 00000006 c0961c4c
+    bf40: c0961c28 00000000 c088bf7c c088bf58 c08392ac c08391c0 c08a2ed8 c0966e78
+    bf60: c086b874 c08a3f50 c0961c28 00000001 c088bfb4 c088bf80 c083b258 c0839248
+    bf80: 2f800000 0f000000 c08935b4 ffffffff c08cd400 ffffffff c08cd400 c0868408
+    bfa0: c29849c0 00000000 c088bff4 c088bfb8 c0824974 c083b1e4 ffffffff ffffffff
+    bfc0: c08245c0 00000000 00000000 c0868408 00000000 10c5387d c0892bcc c0868404
+    bfe0: c0899440 0000406a 00000000 c088bff8 00008074 c0824824 00000000 00000000
+    [<c02c6da0>] (strncmp+0x1c/0x84) from [<c0110a3c>] (kmem_cache_flags.isra.46.part.47+0x44/0x60)
+    [<c0110a3c>] (kmem_cache_flags.isra.46.part.47+0x44/0x60) from [<c0112070>] (__kmem_cache_create+0x3c/0x410)
+    [<c0112070>] (__kmem_cache_create+0x3c/0x410) from [<c0839190>] (create_boot_cache+0x50/0x74)
+    [<c0839190>] (create_boot_cache+0x50/0x74) from [<c0839200>] (create_kmalloc_cache+0x4c/0x88)
+    [<c0839200>] (create_kmalloc_cache+0x4c/0x88) from [<c08392ac>] (create_kmalloc_caches+0x70/0x114)
+    [<c08392ac>] (create_kmalloc_caches+0x70/0x114) from [<c083b258>] (kmem_cache_init+0x80/0xe0)
+    [<c083b258>] (kmem_cache_init+0x80/0xe0) from [<c0824974>] (start_kernel+0x15c/0x318)
+    [<c0824974>] (start_kernel+0x15c/0x318) from [<00008074>] (0x8074)
+    Code: e3520000 01a00002 089da800 e5d03000 (e5d1c000)
+    ---[ end trace 1b75b31a2719ed1d ]---
+    Kernel panic - not syncing: Fatal exception
+
+  Problem is that slub_debug option is not parsed before
+  create_boot_cache is called. Solve this by changing slub_debug to
+  early_param.
+
+  Kernels 3.11, 3.10 are also affected.  I am not sure about older
+  kernels.
+
+Christoph Lameter explains:
+
+  kmem_cache_flags may be called with NULL parameter during early boot.
+  Skip the test in that case.
+
+Reported-by: Andreas Herrmann <andreas.herrmann@calxeda.com>
+Signed-off-by: Christoph Lameter <cl@linux.com>
+Signed-off-by: Pekka Enberg <penberg@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1201,8 +1201,8 @@ static unsigned long kmem_cache_flags(un
+       /*
+        * Enable debugging if selected on the kernel commandline.
+        */
+-      if (slub_debug && (!slub_debug_slabs ||
+-              !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
++      if (slub_debug && (!slub_debug_slabs || (name &&
++              !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
+               flags |= slub_debug;
+       return flags;
diff --git a/queue-3.10/sunrpc-fix-a-data-corruption-issue-when-retransmitting-rpc-calls.patch b/queue-3.10/sunrpc-fix-a-data-corruption-issue-when-retransmitting-rpc-calls.patch
new file mode 100644 (file)
index 0000000..13d6d64
--- /dev/null
@@ -0,0 +1,152 @@
+From a6b31d18b02ff9d7915c5898c9b5ca41a798cd73 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Fri, 8 Nov 2013 16:03:50 -0500
+Subject: SUNRPC: Fix a data corruption issue when retransmitting RPC calls
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit a6b31d18b02ff9d7915c5898c9b5ca41a798cd73 upstream.
+
+The following scenario can cause silent data corruption when doing
+NFS writes. It has mainly been observed when doing database writes
+using O_DIRECT.
+
+1) The RPC client uses sendpage() to do zero-copy of the page data.
+2) Due to networking issues, the reply from the server is delayed,
+   and so the RPC client times out.
+
+3) The client issues a second sendpage of the page data as part of
+   an RPC call retransmission.
+
+4) The reply to the first transmission arrives from the server
+   _before_ the client hardware has emptied the TCP socket send
+   buffer.
+5) After processing the reply, the RPC state machine rules that
+   the call to be done, and triggers the completion callbacks.
+6) The application notices the RPC call is done, and reuses the
+   pages to store something else (e.g. a new write).
+
+7) The client NIC drains the TCP socket send buffer. Since the
+   page data has now changed, it reads a corrupted version of the
+   initial RPC call, and puts it on the wire.
+
+This patch fixes the problem in the following manner:
+
+The ordering guarantees of TCP ensure that when the server sends a
+reply, then we know that the _first_ transmission has completed. Using
+zero-copy in that situation is therefore safe.
+If a time out occurs, we then send the retransmission using sendmsg()
+(i.e. no zero-copy), We then know that the socket contains a full copy of
+the data, and so it will retransmit a faithful reproduction even if the
+RPC call completes, and the application reuses the O_DIRECT buffer in
+the meantime.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtsock.c |   28 +++++++++++++++++++++-------
+ 1 file changed, 21 insertions(+), 7 deletions(-)
+
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -391,8 +391,10 @@ static int xs_send_kvec(struct socket *s
+       return kernel_sendmsg(sock, &msg, NULL, 0, 0);
+ }
+-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more)
++static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy)
+ {
++      ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
++                      int offset, size_t size, int flags);
+       struct page **ppage;
+       unsigned int remainder;
+       int err, sent = 0;
+@@ -401,6 +403,9 @@ static int xs_send_pagedata(struct socke
+       base += xdr->page_base;
+       ppage = xdr->pages + (base >> PAGE_SHIFT);
+       base &= ~PAGE_MASK;
++      do_sendpage = sock->ops->sendpage;
++      if (!zerocopy)
++              do_sendpage = sock_no_sendpage;
+       for(;;) {
+               unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
+               int flags = XS_SENDMSG_FLAGS;
+@@ -408,7 +413,7 @@ static int xs_send_pagedata(struct socke
+               remainder -= len;
+               if (remainder != 0 || more)
+                       flags |= MSG_MORE;
+-              err = sock->ops->sendpage(sock, *ppage, base, len, flags);
++              err = do_sendpage(sock, *ppage, base, len, flags);
+               if (remainder == 0 || err != len)
+                       break;
+               sent += err;
+@@ -429,9 +434,10 @@ static int xs_send_pagedata(struct socke
+  * @addrlen: UDP only -- length of destination address
+  * @xdr: buffer containing this request
+  * @base: starting position in the buffer
++ * @zerocopy: true if it is safe to use sendpage()
+  *
+  */
+-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
++static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy)
+ {
+       unsigned int remainder = xdr->len - base;
+       int err, sent = 0;
+@@ -459,7 +465,7 @@ static int xs_sendpages(struct socket *s
+       if (base < xdr->page_len) {
+               unsigned int len = xdr->page_len - base;
+               remainder -= len;
+-              err = xs_send_pagedata(sock, xdr, base, remainder != 0);
++              err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy);
+               if (remainder == 0 || err != len)
+                       goto out;
+               sent += err;
+@@ -562,7 +568,7 @@ static int xs_local_send_request(struct
+                       req->rq_svec->iov_base, req->rq_svec->iov_len);
+       status = xs_sendpages(transport->sock, NULL, 0,
+-                                              xdr, req->rq_bytes_sent);
++                                              xdr, req->rq_bytes_sent, true);
+       dprintk("RPC:       %s(%u) = %d\n",
+                       __func__, xdr->len - req->rq_bytes_sent, status);
+       if (likely(status >= 0)) {
+@@ -618,7 +624,7 @@ static int xs_udp_send_request(struct rp
+       status = xs_sendpages(transport->sock,
+                             xs_addr(xprt),
+                             xprt->addrlen, xdr,
+-                            req->rq_bytes_sent);
++                            req->rq_bytes_sent, true);
+       dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
+                       xdr->len - req->rq_bytes_sent, status);
+@@ -689,6 +695,7 @@ static int xs_tcp_send_request(struct rp
+       struct rpc_xprt *xprt = req->rq_xprt;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       struct xdr_buf *xdr = &req->rq_snd_buf;
++      bool zerocopy = true;
+       int status;
+       xs_encode_stream_record_marker(&req->rq_snd_buf);
+@@ -696,13 +703,20 @@ static int xs_tcp_send_request(struct rp
+       xs_pktdump("packet data:",
+                               req->rq_svec->iov_base,
+                               req->rq_svec->iov_len);
++      /* Don't use zero copy if this is a resend. If the RPC call
++       * completes while the socket holds a reference to the pages,
++       * then we may end up resending corrupted data.
++       */
++      if (task->tk_flags & RPC_TASK_SENT)
++              zerocopy = false;
+       /* Continue transmitting the packet/record. We must be careful
+        * to cope with writespace callbacks arriving _after_ we have
+        * called sendmsg(). */
+       while (1) {
+               status = xs_sendpages(transport->sock,
+-                                      NULL, 0, xdr, req->rq_bytes_sent);
++                                      NULL, 0, xdr, req->rq_bytes_sent,
++                                      zerocopy);
+               dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
+                               xdr->len - req->rq_bytes_sent, status);